LLVM 20.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDKernelCodeT.h"
15#include "SIDefines.h"
16#include "SIInstrInfo.h"
17#include "SIRegisterInfo.h"
22#include "llvm/ADT/APFloat.h"
24#include "llvm/ADT/StringSet.h"
25#include "llvm/ADT/Twine.h"
28#include "llvm/MC/MCAsmInfo.h"
29#include "llvm/MC/MCContext.h"
30#include "llvm/MC/MCExpr.h"
31#include "llvm/MC/MCInst.h"
32#include "llvm/MC/MCInstrDesc.h"
37#include "llvm/MC/MCSymbol.h"
44#include <optional>
45
46using namespace llvm;
47using namespace llvm::AMDGPU;
48using namespace llvm::amdhsa;
49
50namespace {
51
52class AMDGPUAsmParser;
53
54enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
55
56//===----------------------------------------------------------------------===//
57// Operand
58//===----------------------------------------------------------------------===//
59
60class AMDGPUOperand : public MCParsedAsmOperand {
61 enum KindTy {
62 Token,
63 Immediate,
66 } Kind;
67
68 SMLoc StartLoc, EndLoc;
69 const AMDGPUAsmParser *AsmParser;
70
71public:
72 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
73 : Kind(Kind_), AsmParser(AsmParser_) {}
74
75 using Ptr = std::unique_ptr<AMDGPUOperand>;
76
77 struct Modifiers {
78 bool Abs = false;
79 bool Neg = false;
80 bool Sext = false;
81 bool Lit = false;
82
83 bool hasFPModifiers() const { return Abs || Neg; }
84 bool hasIntModifiers() const { return Sext; }
85 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
86
87 int64_t getFPModifiersOperand() const {
88 int64_t Operand = 0;
89 Operand |= Abs ? SISrcMods::ABS : 0u;
90 Operand |= Neg ? SISrcMods::NEG : 0u;
91 return Operand;
92 }
93
94 int64_t getIntModifiersOperand() const {
95 int64_t Operand = 0;
96 Operand |= Sext ? SISrcMods::SEXT : 0u;
97 return Operand;
98 }
99
100 int64_t getModifiersOperand() const {
101 assert(!(hasFPModifiers() && hasIntModifiers())
102 && "fp and int modifiers should not be used simultaneously");
103 if (hasFPModifiers())
104 return getFPModifiersOperand();
105 if (hasIntModifiers())
106 return getIntModifiersOperand();
107 return 0;
108 }
109
110 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
111 };
112
113 enum ImmTy {
114 ImmTyNone,
115 ImmTyGDS,
116 ImmTyLDS,
117 ImmTyOffen,
118 ImmTyIdxen,
119 ImmTyAddr64,
120 ImmTyOffset,
121 ImmTyInstOffset,
122 ImmTyOffset0,
123 ImmTyOffset1,
124 ImmTySMEMOffsetMod,
125 ImmTyCPol,
126 ImmTyTFE,
127 ImmTyD16,
128 ImmTyClamp,
129 ImmTyOModSI,
130 ImmTySDWADstSel,
131 ImmTySDWASrc0Sel,
132 ImmTySDWASrc1Sel,
133 ImmTySDWADstUnused,
134 ImmTyDMask,
135 ImmTyDim,
136 ImmTyUNorm,
137 ImmTyDA,
138 ImmTyR128A16,
139 ImmTyA16,
140 ImmTyLWE,
141 ImmTyExpTgt,
142 ImmTyExpCompr,
143 ImmTyExpVM,
144 ImmTyFORMAT,
145 ImmTyHwreg,
146 ImmTyOff,
147 ImmTySendMsg,
148 ImmTyInterpSlot,
149 ImmTyInterpAttr,
150 ImmTyInterpAttrChan,
151 ImmTyOpSel,
152 ImmTyOpSelHi,
153 ImmTyNegLo,
154 ImmTyNegHi,
155 ImmTyIndexKey8bit,
156 ImmTyIndexKey16bit,
157 ImmTyDPP8,
158 ImmTyDppCtrl,
159 ImmTyDppRowMask,
160 ImmTyDppBankMask,
161 ImmTyDppBoundCtrl,
162 ImmTyDppFI,
163 ImmTySwizzle,
164 ImmTyGprIdxMode,
165 ImmTyHigh,
166 ImmTyBLGP,
167 ImmTyCBSZ,
168 ImmTyABID,
169 ImmTyEndpgm,
170 ImmTyWaitVDST,
171 ImmTyWaitEXP,
172 ImmTyWaitVAVDst,
173 ImmTyWaitVMVSrc,
174 ImmTyByteSel,
175 };
176
177 // Immediate operand kind.
178 // It helps to identify the location of an offending operand after an error.
179 // Note that regular literals and mandatory literals (KImm) must be handled
180 // differently. When looking for an offending operand, we should usually
181 // ignore mandatory literals because they are part of the instruction and
182 // cannot be changed. Report location of mandatory operands only for VOPD,
183 // when both OpX and OpY have a KImm and there are no other literals.
184 enum ImmKindTy {
185 ImmKindTyNone,
186 ImmKindTyLiteral,
187 ImmKindTyMandatoryLiteral,
188 ImmKindTyConst,
189 };
190
191private:
192 struct TokOp {
193 const char *Data;
194 unsigned Length;
195 };
196
197 struct ImmOp {
198 int64_t Val;
199 ImmTy Type;
200 bool IsFPImm;
201 mutable ImmKindTy Kind;
202 Modifiers Mods;
203 };
204
205 struct RegOp {
206 unsigned RegNo;
207 Modifiers Mods;
208 };
209
210 union {
211 TokOp Tok;
212 ImmOp Imm;
213 RegOp Reg;
214 const MCExpr *Expr;
215 };
216
217public:
218 bool isToken() const override { return Kind == Token; }
219
220 bool isSymbolRefExpr() const {
221 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
222 }
223
224 bool isImm() const override {
225 return Kind == Immediate;
226 }
227
228 void setImmKindNone() const {
229 assert(isImm());
230 Imm.Kind = ImmKindTyNone;
231 }
232
233 void setImmKindLiteral() const {
234 assert(isImm());
235 Imm.Kind = ImmKindTyLiteral;
236 }
237
238 void setImmKindMandatoryLiteral() const {
239 assert(isImm());
240 Imm.Kind = ImmKindTyMandatoryLiteral;
241 }
242
243 void setImmKindConst() const {
244 assert(isImm());
245 Imm.Kind = ImmKindTyConst;
246 }
247
248 bool IsImmKindLiteral() const {
249 return isImm() && Imm.Kind == ImmKindTyLiteral;
250 }
251
252 bool IsImmKindMandatoryLiteral() const {
253 return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral;
254 }
255
256 bool isImmKindConst() const {
257 return isImm() && Imm.Kind == ImmKindTyConst;
258 }
259
260 bool isInlinableImm(MVT type) const;
261 bool isLiteralImm(MVT type) const;
262
263 bool isRegKind() const {
264 return Kind == Register;
265 }
266
267 bool isReg() const override {
268 return isRegKind() && !hasModifiers();
269 }
270
271 bool isRegOrInline(unsigned RCID, MVT type) const {
272 return isRegClass(RCID) || isInlinableImm(type);
273 }
274
275 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
276 return isRegOrInline(RCID, type) || isLiteralImm(type);
277 }
278
279 bool isRegOrImmWithInt16InputMods() const {
280 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
281 }
282
283 bool isRegOrImmWithIntT16InputMods() const {
284 return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::i16);
285 }
286
287 bool isRegOrImmWithInt32InputMods() const {
288 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
289 }
290
291 bool isRegOrInlineImmWithInt16InputMods() const {
292 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
293 }
294
295 bool isRegOrInlineImmWithInt32InputMods() const {
296 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
297 }
298
299 bool isRegOrImmWithInt64InputMods() const {
300 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
301 }
302
303 bool isRegOrImmWithFP16InputMods() const {
304 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
305 }
306
307 bool isRegOrImmWithFPT16InputMods() const {
308 return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::f16);
309 }
310
311 bool isRegOrImmWithFP32InputMods() const {
312 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
313 }
314
315 bool isRegOrImmWithFP64InputMods() const {
316 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
317 }
318
319 template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
320 return isRegOrInline(
321 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
322 }
323
324 bool isRegOrInlineImmWithFP32InputMods() const {
325 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
326 }
327
328 bool isPackedFP16InputMods() const {
329 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
330 }
331
332 bool isVReg() const {
333 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
334 isRegClass(AMDGPU::VReg_64RegClassID) ||
335 isRegClass(AMDGPU::VReg_96RegClassID) ||
336 isRegClass(AMDGPU::VReg_128RegClassID) ||
337 isRegClass(AMDGPU::VReg_160RegClassID) ||
338 isRegClass(AMDGPU::VReg_192RegClassID) ||
339 isRegClass(AMDGPU::VReg_256RegClassID) ||
340 isRegClass(AMDGPU::VReg_512RegClassID) ||
341 isRegClass(AMDGPU::VReg_1024RegClassID);
342 }
343
344 bool isVReg32() const {
345 return isRegClass(AMDGPU::VGPR_32RegClassID);
346 }
347
348 bool isVReg32OrOff() const {
349 return isOff() || isVReg32();
350 }
351
352 bool isNull() const {
353 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
354 }
355
356 bool isVRegWithInputMods() const;
357 template <bool IsFake16> bool isT16VRegWithInputMods() const;
358
359 bool isSDWAOperand(MVT type) const;
360 bool isSDWAFP16Operand() const;
361 bool isSDWAFP32Operand() const;
362 bool isSDWAInt16Operand() const;
363 bool isSDWAInt32Operand() const;
364
365 bool isImmTy(ImmTy ImmT) const {
366 return isImm() && Imm.Type == ImmT;
367 }
368
369 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
370
371 bool isImmLiteral() const { return isImmTy(ImmTyNone); }
372
373 bool isImmModifier() const {
374 return isImm() && Imm.Type != ImmTyNone;
375 }
376
377 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
378 bool isDim() const { return isImmTy(ImmTyDim); }
379 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
380 bool isOff() const { return isImmTy(ImmTyOff); }
381 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
382 bool isOffen() const { return isImmTy(ImmTyOffen); }
383 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
384 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
385 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
386 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
387 bool isGDS() const { return isImmTy(ImmTyGDS); }
388 bool isLDS() const { return isImmTy(ImmTyLDS); }
389 bool isCPol() const { return isImmTy(ImmTyCPol); }
390 bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
391 bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
392 bool isTFE() const { return isImmTy(ImmTyTFE); }
393 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
394 bool isDppFI() const { return isImmTy(ImmTyDppFI); }
395 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
396 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
397 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
398 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
399 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
400 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
401 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
402 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
403 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
404 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
405 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
406
407 bool isRegOrImm() const {
408 return isReg() || isImm();
409 }
410
411 bool isRegClass(unsigned RCID) const;
412
413 bool isInlineValue() const;
414
415 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
416 return isRegOrInline(RCID, type) && !hasModifiers();
417 }
418
419 bool isSCSrcB16() const {
420 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
421 }
422
423 bool isSCSrcV2B16() const {
424 return isSCSrcB16();
425 }
426
427 bool isSCSrc_b32() const {
428 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
429 }
430
431 bool isSCSrc_b64() const {
432 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
433 }
434
435 bool isBoolReg() const;
436
437 bool isSCSrcF16() const {
438 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
439 }
440
441 bool isSCSrcV2F16() const {
442 return isSCSrcF16();
443 }
444
445 bool isSCSrcF32() const {
446 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
447 }
448
449 bool isSCSrcF64() const {
450 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
451 }
452
453 bool isSSrc_b32() const {
454 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
455 }
456
457 bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(MVT::i16); }
458
459 bool isSSrcV2B16() const {
460 llvm_unreachable("cannot happen");
461 return isSSrc_b16();
462 }
463
464 bool isSSrc_b64() const {
465 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
466 // See isVSrc64().
467 return isSCSrc_b64() || isLiteralImm(MVT::i64);
468 }
469
470 bool isSSrc_f32() const {
471 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
472 }
473
474 bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }
475
476 bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }
477
478 bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }
479
480 bool isSSrcV2F16() const {
481 llvm_unreachable("cannot happen");
482 return isSSrc_f16();
483 }
484
485 bool isSSrcV2FP32() const {
486 llvm_unreachable("cannot happen");
487 return isSSrc_f32();
488 }
489
490 bool isSCSrcV2FP32() const {
491 llvm_unreachable("cannot happen");
492 return isSCSrcF32();
493 }
494
495 bool isSSrcV2INT32() const {
496 llvm_unreachable("cannot happen");
497 return isSSrc_b32();
498 }
499
500 bool isSCSrcV2INT32() const {
501 llvm_unreachable("cannot happen");
502 return isSCSrc_b32();
503 }
504
505 bool isSSrcOrLds_b32() const {
506 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
507 isLiteralImm(MVT::i32) || isExpr();
508 }
509
510 bool isVCSrc_b32() const {
511 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
512 }
513
514 bool isVCSrcB64() const {
515 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
516 }
517
518 bool isVCSrcTB16() const {
519 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
520 }
521
522 bool isVCSrcTB16_Lo128() const {
523 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
524 }
525
526 bool isVCSrcFake16B16_Lo128() const {
527 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
528 }
529
530 bool isVCSrc_b16() const {
531 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
532 }
533
534 bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
535
536 bool isVCSrc_f32() const {
537 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
538 }
539
540 bool isVCSrcF64() const {
541 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
542 }
543
544 bool isVCSrcTBF16() const {
545 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
546 }
547
548 bool isVCSrcTF16() const {
549 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
550 }
551
552 bool isVCSrcTBF16_Lo128() const {
553 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
554 }
555
556 bool isVCSrcTF16_Lo128() const {
557 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
558 }
559
560 bool isVCSrcFake16BF16_Lo128() const {
561 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
562 }
563
564 bool isVCSrcFake16F16_Lo128() const {
565 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
566 }
567
568 bool isVCSrc_bf16() const {
569 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
570 }
571
572 bool isVCSrc_f16() const {
573 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
574 }
575
576 bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
577
578 bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
579
580 bool isVSrc_b32() const {
581 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
582 }
583
584 bool isVSrc_b64() const { return isVCSrcF64() || isLiteralImm(MVT::i64); }
585
586 bool isVSrcT_b16() const { return isVCSrcTB16() || isLiteralImm(MVT::i16); }
587
588 bool isVSrcT_b16_Lo128() const {
589 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
590 }
591
592 bool isVSrcFake16_b16_Lo128() const {
593 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
594 }
595
596 bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(MVT::i16); }
597
598 bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
599
600 bool isVCSrcV2FP32() const {
601 return isVCSrcF64();
602 }
603
604 bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
605
606 bool isVCSrcV2INT32() const {
607 return isVCSrcB64();
608 }
609
610 bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
611
612 bool isVSrc_f32() const {
613 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
614 }
615
616 bool isVSrc_f64() const { return isVCSrcF64() || isLiteralImm(MVT::f64); }
617
618 bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
619
620 bool isVSrcT_f16() const { return isVCSrcTF16() || isLiteralImm(MVT::f16); }
621
622 bool isVSrcT_bf16_Lo128() const {
623 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
624 }
625
626 bool isVSrcT_f16_Lo128() const {
627 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
628 }
629
630 bool isVSrcFake16_bf16_Lo128() const {
631 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
632 }
633
634 bool isVSrcFake16_f16_Lo128() const {
635 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
636 }
637
638 bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
639
640 bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); }
641
642 bool isVSrc_v2bf16() const {
643 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
644 }
645
646 bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
647
648 bool isVISrcB32() const {
649 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
650 }
651
652 bool isVISrcB16() const {
653 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
654 }
655
656 bool isVISrcV2B16() const {
657 return isVISrcB16();
658 }
659
660 bool isVISrcF32() const {
661 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
662 }
663
664 bool isVISrcF16() const {
665 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
666 }
667
668 bool isVISrcV2F16() const {
669 return isVISrcF16() || isVISrcB32();
670 }
671
672 bool isVISrc_64_bf16() const {
673 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
674 }
675
676 bool isVISrc_64_f16() const {
677 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
678 }
679
680 bool isVISrc_64_b32() const {
681 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
682 }
683
684 bool isVISrc_64B64() const {
685 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
686 }
687
688 bool isVISrc_64_f64() const {
689 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
690 }
691
692 bool isVISrc_64V2FP32() const {
693 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
694 }
695
696 bool isVISrc_64V2INT32() const {
697 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
698 }
699
700 bool isVISrc_256_b32() const {
701 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
702 }
703
704 bool isVISrc_256_f32() const {
705 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
706 }
707
708 bool isVISrc_256B64() const {
709 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
710 }
711
712 bool isVISrc_256_f64() const {
713 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
714 }
715
716 bool isVISrc_128B16() const {
717 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
718 }
719
720 bool isVISrc_128V2B16() const {
721 return isVISrc_128B16();
722 }
723
724 bool isVISrc_128_b32() const {
725 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
726 }
727
728 bool isVISrc_128_f32() const {
729 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
730 }
731
732 bool isVISrc_256V2FP32() const {
733 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
734 }
735
736 bool isVISrc_256V2INT32() const {
737 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
738 }
739
740 bool isVISrc_512_b32() const {
741 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
742 }
743
744 bool isVISrc_512B16() const {
745 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
746 }
747
748 bool isVISrc_512V2B16() const {
749 return isVISrc_512B16();
750 }
751
752 bool isVISrc_512_f32() const {
753 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
754 }
755
756 bool isVISrc_512F16() const {
757 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
758 }
759
760 bool isVISrc_512V2F16() const {
761 return isVISrc_512F16() || isVISrc_512_b32();
762 }
763
764 bool isVISrc_1024_b32() const {
765 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
766 }
767
768 bool isVISrc_1024B16() const {
769 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
770 }
771
772 bool isVISrc_1024V2B16() const {
773 return isVISrc_1024B16();
774 }
775
776 bool isVISrc_1024_f32() const {
777 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
778 }
779
780 bool isVISrc_1024F16() const {
781 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
782 }
783
784 bool isVISrc_1024V2F16() const {
785 return isVISrc_1024F16() || isVISrc_1024_b32();
786 }
787
788 bool isAISrcB32() const {
789 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
790 }
791
792 bool isAISrcB16() const {
793 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
794 }
795
796 bool isAISrcV2B16() const {
797 return isAISrcB16();
798 }
799
800 bool isAISrcF32() const {
801 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
802 }
803
804 bool isAISrcF16() const {
805 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
806 }
807
808 bool isAISrcV2F16() const {
809 return isAISrcF16() || isAISrcB32();
810 }
811
812 bool isAISrc_64B64() const {
813 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
814 }
815
816 bool isAISrc_64_f64() const {
817 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
818 }
819
820 bool isAISrc_128_b32() const {
821 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
822 }
823
824 bool isAISrc_128B16() const {
825 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
826 }
827
828 bool isAISrc_128V2B16() const {
829 return isAISrc_128B16();
830 }
831
832 bool isAISrc_128_f32() const {
833 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
834 }
835
836 bool isAISrc_128F16() const {
837 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
838 }
839
840 bool isAISrc_128V2F16() const {
841 return isAISrc_128F16() || isAISrc_128_b32();
842 }
843
844 bool isVISrc_128_bf16() const {
845 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
846 }
847
848 bool isVISrc_128_f16() const {
849 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
850 }
851
852 bool isVISrc_128V2F16() const {
853 return isVISrc_128_f16() || isVISrc_128_b32();
854 }
855
856 bool isAISrc_256B64() const {
857 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
858 }
859
860 bool isAISrc_256_f64() const {
861 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
862 }
863
864 bool isAISrc_512_b32() const {
865 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
866 }
867
868 bool isAISrc_512B16() const {
869 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
870 }
871
872 bool isAISrc_512V2B16() const {
873 return isAISrc_512B16();
874 }
875
876 bool isAISrc_512_f32() const {
877 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
878 }
879
880 bool isAISrc_512F16() const {
881 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
882 }
883
884 bool isAISrc_512V2F16() const {
885 return isAISrc_512F16() || isAISrc_512_b32();
886 }
887
888 bool isAISrc_1024_b32() const {
889 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
890 }
891
892 bool isAISrc_1024B16() const {
893 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
894 }
895
896 bool isAISrc_1024V2B16() const {
897 return isAISrc_1024B16();
898 }
899
900 bool isAISrc_1024_f32() const {
901 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
902 }
903
904 bool isAISrc_1024F16() const {
905 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
906 }
907
908 bool isAISrc_1024V2F16() const {
909 return isAISrc_1024F16() || isAISrc_1024_b32();
910 }
911
912 bool isKImmFP32() const {
913 return isLiteralImm(MVT::f32);
914 }
915
916 bool isKImmFP16() const {
917 return isLiteralImm(MVT::f16);
918 }
919
920 bool isMem() const override {
921 return false;
922 }
923
924 bool isExpr() const {
925 return Kind == Expression;
926 }
927
928 bool isSOPPBrTarget() const { return isExpr() || isImm(); }
929
930 bool isSWaitCnt() const;
931 bool isDepCtr() const;
932 bool isSDelayALU() const;
933 bool isHwreg() const;
934 bool isSendMsg() const;
935 bool isSplitBarrier() const;
936 bool isSwizzle() const;
937 bool isSMRDOffset8() const;
938 bool isSMEMOffset() const;
939 bool isSMRDLiteralOffset() const;
940 bool isDPP8() const;
941 bool isDPPCtrl() const;
942 bool isBLGP() const;
943 bool isGPRIdxMode() const;
944 bool isS16Imm() const;
945 bool isU16Imm() const;
946 bool isEndpgm() const;
947
948 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
949 return [=](){ return P(*this); };
950 }
951
952 StringRef getToken() const {
953 assert(isToken());
954 return StringRef(Tok.Data, Tok.Length);
955 }
956
957 int64_t getImm() const {
958 assert(isImm());
959 return Imm.Val;
960 }
961
962 void setImm(int64_t Val) {
963 assert(isImm());
964 Imm.Val = Val;
965 }
966
967 ImmTy getImmTy() const {
968 assert(isImm());
969 return Imm.Type;
970 }
971
972 MCRegister getReg() const override {
973 assert(isRegKind());
974 return Reg.RegNo;
975 }
976
977 SMLoc getStartLoc() const override {
978 return StartLoc;
979 }
980
981 SMLoc getEndLoc() const override {
982 return EndLoc;
983 }
984
985 SMRange getLocRange() const {
986 return SMRange(StartLoc, EndLoc);
987 }
988
989 Modifiers getModifiers() const {
990 assert(isRegKind() || isImmTy(ImmTyNone));
991 return isRegKind() ? Reg.Mods : Imm.Mods;
992 }
993
994 void setModifiers(Modifiers Mods) {
995 assert(isRegKind() || isImmTy(ImmTyNone));
996 if (isRegKind())
997 Reg.Mods = Mods;
998 else
999 Imm.Mods = Mods;
1000 }
1001
1002 bool hasModifiers() const {
1003 return getModifiers().hasModifiers();
1004 }
1005
1006 bool hasFPModifiers() const {
1007 return getModifiers().hasFPModifiers();
1008 }
1009
1010 bool hasIntModifiers() const {
1011 return getModifiers().hasIntModifiers();
1012 }
1013
1014 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1015
1016 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1017
1018 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1019
1020 void addRegOperands(MCInst &Inst, unsigned N) const;
1021
1022 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1023 if (isRegKind())
1024 addRegOperands(Inst, N);
1025 else
1026 addImmOperands(Inst, N);
1027 }
1028
1029 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1030 Modifiers Mods = getModifiers();
1031 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1032 if (isRegKind()) {
1033 addRegOperands(Inst, N);
1034 } else {
1035 addImmOperands(Inst, N, false);
1036 }
1037 }
1038
1039 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1040 assert(!hasIntModifiers());
1041 addRegOrImmWithInputModsOperands(Inst, N);
1042 }
1043
1044 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1045 assert(!hasFPModifiers());
1046 addRegOrImmWithInputModsOperands(Inst, N);
1047 }
1048
1049 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1050 Modifiers Mods = getModifiers();
1051 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1052 assert(isRegKind());
1053 addRegOperands(Inst, N);
1054 }
1055
1056 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1057 assert(!hasIntModifiers());
1058 addRegWithInputModsOperands(Inst, N);
1059 }
1060
1061 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1062 assert(!hasFPModifiers());
1063 addRegWithInputModsOperands(Inst, N);
1064 }
1065
1066 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1067 // clang-format off
1068 switch (Type) {
1069 case ImmTyNone: OS << "None"; break;
1070 case ImmTyGDS: OS << "GDS"; break;
1071 case ImmTyLDS: OS << "LDS"; break;
1072 case ImmTyOffen: OS << "Offen"; break;
1073 case ImmTyIdxen: OS << "Idxen"; break;
1074 case ImmTyAddr64: OS << "Addr64"; break;
1075 case ImmTyOffset: OS << "Offset"; break;
1076 case ImmTyInstOffset: OS << "InstOffset"; break;
1077 case ImmTyOffset0: OS << "Offset0"; break;
1078 case ImmTyOffset1: OS << "Offset1"; break;
1079 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1080 case ImmTyCPol: OS << "CPol"; break;
1081 case ImmTyIndexKey8bit: OS << "index_key"; break;
1082 case ImmTyIndexKey16bit: OS << "index_key"; break;
1083 case ImmTyTFE: OS << "TFE"; break;
1084 case ImmTyD16: OS << "D16"; break;
1085 case ImmTyFORMAT: OS << "FORMAT"; break;
1086 case ImmTyClamp: OS << "Clamp"; break;
1087 case ImmTyOModSI: OS << "OModSI"; break;
1088 case ImmTyDPP8: OS << "DPP8"; break;
1089 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1090 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1091 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1092 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1093 case ImmTyDppFI: OS << "DppFI"; break;
1094 case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1095 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1096 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1097 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1098 case ImmTyDMask: OS << "DMask"; break;
1099 case ImmTyDim: OS << "Dim"; break;
1100 case ImmTyUNorm: OS << "UNorm"; break;
1101 case ImmTyDA: OS << "DA"; break;
1102 case ImmTyR128A16: OS << "R128A16"; break;
1103 case ImmTyA16: OS << "A16"; break;
1104 case ImmTyLWE: OS << "LWE"; break;
1105 case ImmTyOff: OS << "Off"; break;
1106 case ImmTyExpTgt: OS << "ExpTgt"; break;
1107 case ImmTyExpCompr: OS << "ExpCompr"; break;
1108 case ImmTyExpVM: OS << "ExpVM"; break;
1109 case ImmTyHwreg: OS << "Hwreg"; break;
1110 case ImmTySendMsg: OS << "SendMsg"; break;
1111 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1112 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1113 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1114 case ImmTyOpSel: OS << "OpSel"; break;
1115 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1116 case ImmTyNegLo: OS << "NegLo"; break;
1117 case ImmTyNegHi: OS << "NegHi"; break;
1118 case ImmTySwizzle: OS << "Swizzle"; break;
1119 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1120 case ImmTyHigh: OS << "High"; break;
1121 case ImmTyBLGP: OS << "BLGP"; break;
1122 case ImmTyCBSZ: OS << "CBSZ"; break;
1123 case ImmTyABID: OS << "ABID"; break;
1124 case ImmTyEndpgm: OS << "Endpgm"; break;
1125 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1126 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1127 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1128 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1129 case ImmTyByteSel: OS << "ByteSel" ; break;
1130 }
1131 // clang-format on
1132 }
1133
1134 void print(raw_ostream &OS) const override {
1135 switch (Kind) {
1136 case Register:
1137 OS << "<register " << AMDGPUInstPrinter::getRegisterName(getReg())
1138 << " mods: " << Reg.Mods << '>';
1139 break;
1140 case Immediate:
1141 OS << '<' << getImm();
1142 if (getImmTy() != ImmTyNone) {
1143 OS << " type: "; printImmTy(OS, getImmTy());
1144 }
1145 OS << " mods: " << Imm.Mods << '>';
1146 break;
1147 case Token:
1148 OS << '\'' << getToken() << '\'';
1149 break;
1150 case Expression:
1151 OS << "<expr " << *Expr << '>';
1152 break;
1153 }
1154 }
1155
1156 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1157 int64_t Val, SMLoc Loc,
1158 ImmTy Type = ImmTyNone,
1159 bool IsFPImm = false) {
1160 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1161 Op->Imm.Val = Val;
1162 Op->Imm.IsFPImm = IsFPImm;
1163 Op->Imm.Kind = ImmKindTyNone;
1164 Op->Imm.Type = Type;
1165 Op->Imm.Mods = Modifiers();
1166 Op->StartLoc = Loc;
1167 Op->EndLoc = Loc;
1168 return Op;
1169 }
1170
1171 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1172 StringRef Str, SMLoc Loc,
1173 bool HasExplicitEncodingSize = true) {
1174 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1175 Res->Tok.Data = Str.data();
1176 Res->Tok.Length = Str.size();
1177 Res->StartLoc = Loc;
1178 Res->EndLoc = Loc;
1179 return Res;
1180 }
1181
1182 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1183 unsigned RegNo, SMLoc S,
1184 SMLoc E) {
1185 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1186 Op->Reg.RegNo = RegNo;
1187 Op->Reg.Mods = Modifiers();
1188 Op->StartLoc = S;
1189 Op->EndLoc = E;
1190 return Op;
1191 }
1192
1193 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1194 const class MCExpr *Expr, SMLoc S) {
1195 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1196 Op->Expr = Expr;
1197 Op->StartLoc = S;
1198 Op->EndLoc = S;
1199 return Op;
1200 }
1201};
1202
1203raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1204 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1205 return OS;
1206}
1207
1208//===----------------------------------------------------------------------===//
1209// AsmParser
1210//===----------------------------------------------------------------------===//
1211
1212// Holds info related to the current kernel, e.g. count of SGPRs used.
1213// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1214// .amdgpu_hsa_kernel or at EOF.
1215class KernelScopeInfo {
1216 int SgprIndexUnusedMin = -1;
1217 int VgprIndexUnusedMin = -1;
1218 int AgprIndexUnusedMin = -1;
1219 MCContext *Ctx = nullptr;
1220 MCSubtargetInfo const *MSTI = nullptr;
1221
1222 void usesSgprAt(int i) {
1223 if (i >= SgprIndexUnusedMin) {
1224 SgprIndexUnusedMin = ++i;
1225 if (Ctx) {
1226 MCSymbol* const Sym =
1227 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1228 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1229 }
1230 }
1231 }
1232
1233 void usesVgprAt(int i) {
1234 if (i >= VgprIndexUnusedMin) {
1235 VgprIndexUnusedMin = ++i;
1236 if (Ctx) {
1237 MCSymbol* const Sym =
1238 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1239 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1240 VgprIndexUnusedMin);
1241 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1242 }
1243 }
1244 }
1245
1246 void usesAgprAt(int i) {
1247 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1248 if (!hasMAIInsts(*MSTI))
1249 return;
1250
1251 if (i >= AgprIndexUnusedMin) {
1252 AgprIndexUnusedMin = ++i;
1253 if (Ctx) {
1254 MCSymbol* const Sym =
1255 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1256 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1257
1258 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1259 MCSymbol* const vSym =
1260 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1261 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1262 VgprIndexUnusedMin);
1263 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1264 }
1265 }
1266 }
1267
1268public:
1269 KernelScopeInfo() = default;
1270
1271 void initialize(MCContext &Context) {
1272 Ctx = &Context;
1273 MSTI = Ctx->getSubtargetInfo();
1274
1275 usesSgprAt(SgprIndexUnusedMin = -1);
1276 usesVgprAt(VgprIndexUnusedMin = -1);
1277 if (hasMAIInsts(*MSTI)) {
1278 usesAgprAt(AgprIndexUnusedMin = -1);
1279 }
1280 }
1281
1282 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1283 unsigned RegWidth) {
1284 switch (RegKind) {
1285 case IS_SGPR:
1286 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1287 break;
1288 case IS_AGPR:
1289 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1290 break;
1291 case IS_VGPR:
1292 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1293 break;
1294 default:
1295 break;
1296 }
1297 }
1298};
1299
1300class AMDGPUAsmParser : public MCTargetAsmParser {
1301 MCAsmParser &Parser;
1302
1303 unsigned ForcedEncodingSize = 0;
1304 bool ForcedDPP = false;
1305 bool ForcedSDWA = false;
1306 KernelScopeInfo KernelScope;
1307
1308 /// @name Auto-generated Match Functions
1309 /// {
1310
1311#define GET_ASSEMBLER_HEADER
1312#include "AMDGPUGenAsmMatcher.inc"
1313
1314 /// }
1315
1316private:
1317 void createConstantSymbol(StringRef Id, int64_t Val);
1318
1319 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1320 bool OutOfRangeError(SMRange Range);
1321 /// Calculate VGPR/SGPR blocks required for given target, reserved
1322 /// registers, and user-specified NextFreeXGPR values.
1323 ///
1324 /// \param Features [in] Target features, used for bug corrections.
1325 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1326 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1327 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1328 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1329 /// descriptor field, if valid.
1330 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1331 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1332 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1333 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1334 /// \param VGPRBlocks [out] Result VGPR block count.
1335 /// \param SGPRBlocks [out] Result SGPR block count.
1336 bool calculateGPRBlocks(const FeatureBitset &Features, const MCExpr *VCCUsed,
1337 const MCExpr *FlatScrUsed, bool XNACKUsed,
1338 std::optional<bool> EnableWavefrontSize32,
1339 const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1340 const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1341 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks);
1342 bool ParseDirectiveAMDGCNTarget();
1343 bool ParseDirectiveAMDHSACodeObjectVersion();
1344 bool ParseDirectiveAMDHSAKernel();
1345 bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header);
1346 bool ParseDirectiveAMDKernelCodeT();
1347 // TODO: Possibly make subtargetHasRegister const.
1348 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1349 bool ParseDirectiveAMDGPUHsaKernel();
1350
1351 bool ParseDirectiveISAVersion();
1352 bool ParseDirectiveHSAMetadata();
1353 bool ParseDirectivePALMetadataBegin();
1354 bool ParseDirectivePALMetadata();
1355 bool ParseDirectiveAMDGPULDS();
1356
1357 /// Common code to parse out a block of text (typically YAML) between start and
1358 /// end directives.
1359 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1360 const char *AssemblerDirectiveEnd,
1361 std::string &CollectString);
1362
1363 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1364 RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1365 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1366 unsigned &RegNum, unsigned &RegWidth,
1367 bool RestoreOnFailure = false);
1368 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1369 unsigned &RegNum, unsigned &RegWidth,
1371 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1372 unsigned &RegWidth,
1374 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1375 unsigned &RegWidth,
1377 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1378 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1379 bool ParseRegRange(unsigned& Num, unsigned& Width);
1380 unsigned getRegularReg(RegisterKind RegKind, unsigned RegNum, unsigned SubReg,
1381 unsigned RegWidth, SMLoc Loc);
1382
1383 bool isRegister();
1384 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1385 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1386 void initializeGprCountSymbol(RegisterKind RegKind);
1387 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1388 unsigned RegWidth);
1389 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1390 bool IsAtomic);
1391
1392public:
1393 enum OperandMode {
1394 OperandMode_Default,
1395 OperandMode_NSA,
1396 };
1397
1398 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1399
1400 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1401 const MCInstrInfo &MII,
1402 const MCTargetOptions &Options)
1403 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1405
1406 if (getFeatureBits().none()) {
1407 // Set default features.
1408 copySTI().ToggleFeature("southern-islands");
1409 }
1410
1411 FeatureBitset FB = getFeatureBits();
1412 if (!FB[AMDGPU::FeatureWavefrontSize64] &&
1413 !FB[AMDGPU::FeatureWavefrontSize32]) {
1414 // If there is no default wave size it must be a generation before gfx10,
1415 // these have FeatureWavefrontSize64 in their definition already. For
1416 // gfx10+ set wave32 as a default.
1417 copySTI().ToggleFeature(AMDGPU::FeatureWavefrontSize32);
1418 }
1419
1420 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1421
1423 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1424 createConstantSymbol(".amdgcn.gfx_generation_number", ISA.Major);
1425 createConstantSymbol(".amdgcn.gfx_generation_minor", ISA.Minor);
1426 createConstantSymbol(".amdgcn.gfx_generation_stepping", ISA.Stepping);
1427 } else {
1428 createConstantSymbol(".option.machine_version_major", ISA.Major);
1429 createConstantSymbol(".option.machine_version_minor", ISA.Minor);
1430 createConstantSymbol(".option.machine_version_stepping", ISA.Stepping);
1431 }
1432 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1433 initializeGprCountSymbol(IS_VGPR);
1434 initializeGprCountSymbol(IS_SGPR);
1435 } else
1436 KernelScope.initialize(getContext());
1437
1438 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
1439 createConstantSymbol(Symbol, Code);
1440
1441 createConstantSymbol("UC_VERSION_W64_BIT", 0x2000);
1442 createConstantSymbol("UC_VERSION_W32_BIT", 0x4000);
1443 createConstantSymbol("UC_VERSION_MDP_BIT", 0x8000);
1444 }
1445
1446 bool hasMIMG_R128() const {
1447 return AMDGPU::hasMIMG_R128(getSTI());
1448 }
1449
1450 bool hasPackedD16() const {
1451 return AMDGPU::hasPackedD16(getSTI());
1452 }
1453
1454 bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1455
1456 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1457
1458 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1459
1460 bool isSI() const {
1461 return AMDGPU::isSI(getSTI());
1462 }
1463
1464 bool isCI() const {
1465 return AMDGPU::isCI(getSTI());
1466 }
1467
1468 bool isVI() const {
1469 return AMDGPU::isVI(getSTI());
1470 }
1471
1472 bool isGFX9() const {
1473 return AMDGPU::isGFX9(getSTI());
1474 }
1475
1476 // TODO: isGFX90A is also true for GFX940. We need to clean it.
1477 bool isGFX90A() const {
1478 return AMDGPU::isGFX90A(getSTI());
1479 }
1480
1481 bool isGFX940() const {
1482 return AMDGPU::isGFX940(getSTI());
1483 }
1484
1485 bool isGFX9Plus() const {
1486 return AMDGPU::isGFX9Plus(getSTI());
1487 }
1488
1489 bool isGFX10() const {
1490 return AMDGPU::isGFX10(getSTI());
1491 }
1492
1493 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1494
1495 bool isGFX11() const {
1496 return AMDGPU::isGFX11(getSTI());
1497 }
1498
1499 bool isGFX11Plus() const {
1500 return AMDGPU::isGFX11Plus(getSTI());
1501 }
1502
1503 bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1504
1505 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1506
1507 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1508
1509 bool isGFX10_BEncoding() const {
1511 }
1512
1513 bool hasInv2PiInlineImm() const {
1514 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1515 }
1516
1517 bool hasFlatOffsets() const {
1518 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1519 }
1520
1521 bool hasArchitectedFlatScratch() const {
1522 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1523 }
1524
1525 bool hasSGPR102_SGPR103() const {
1526 return !isVI() && !isGFX9();
1527 }
1528
1529 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1530
1531 bool hasIntClamp() const {
1532 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1533 }
1534
1535 bool hasPartialNSAEncoding() const {
1536 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1537 }
1538
1539 unsigned getNSAMaxSize(bool HasSampler = false) const {
1540 return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1541 }
1542
1543 unsigned getMaxNumUserSGPRs() const {
1545 }
1546
1547 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1548
1549 AMDGPUTargetStreamer &getTargetStreamer() {
1551 return static_cast<AMDGPUTargetStreamer &>(TS);
1552 }
1553
1554 const MCRegisterInfo *getMRI() const {
1555 // We need this const_cast because for some reason getContext() is not const
1556 // in MCAsmParser.
1557 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1558 }
1559
1560 const MCInstrInfo *getMII() const {
1561 return &MII;
1562 }
1563
1564 const FeatureBitset &getFeatureBits() const {
1565 return getSTI().getFeatureBits();
1566 }
1567
1568 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1569 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1570 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1571
1572 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1573 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1574 bool isForcedDPP() const { return ForcedDPP; }
1575 bool isForcedSDWA() const { return ForcedSDWA; }
1576 ArrayRef<unsigned> getMatchedVariants() const;
1577 StringRef getMatchedVariantName() const;
1578
1579 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1580 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1581 bool RestoreOnFailure);
1582 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1584 SMLoc &EndLoc) override;
1585 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1587 unsigned Kind) override;
1588 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1591 bool MatchingInlineAsm) override;
1592 bool ParseDirective(AsmToken DirectiveID) override;
1593 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1594 OperandMode Mode = OperandMode_Default);
1595 StringRef parseMnemonicSuffix(StringRef Name);
1597 SMLoc NameLoc, OperandVector &Operands) override;
1598 //bool ProcessInstruction(MCInst &Inst);
1599
1601
1602 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1603
1605 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1606 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1607 std::function<bool(int64_t &)> ConvertResult = nullptr);
1608
1609 ParseStatus parseOperandArrayWithPrefix(
1610 const char *Prefix, OperandVector &Operands,
1611 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1612 bool (*ConvertResult)(int64_t &) = nullptr);
1613
1615 parseNamedBit(StringRef Name, OperandVector &Operands,
1616 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1617 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1619 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1620 ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1621 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1622 SMLoc &StringLoc);
1623 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1626 int64_t &IntVal);
1627 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1630 AMDGPUOperand::ImmTy Type);
1631
1632 bool isModifier();
1633 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1634 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1635 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1636 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1637 bool parseSP3NegModifier();
1638 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1639 bool HasLit = false);
1641 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1642 bool HasLit = false);
1643 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1644 bool AllowImm = true);
1645 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1646 bool AllowImm = true);
1647 ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1648 ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1649 ParseStatus parseVReg32OrOff(OperandVector &Operands);
1650 ParseStatus tryParseIndexKey(OperandVector &Operands,
1651 AMDGPUOperand::ImmTy ImmTy);
1652 ParseStatus parseIndexKey8bit(OperandVector &Operands);
1653 ParseStatus parseIndexKey16bit(OperandVector &Operands);
1654
1655 ParseStatus parseDfmtNfmt(int64_t &Format);
1656 ParseStatus parseUfmt(int64_t &Format);
1657 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1658 int64_t &Format);
1659 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1660 int64_t &Format);
1661 ParseStatus parseFORMAT(OperandVector &Operands);
1662 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1663 ParseStatus parseNumericFormat(int64_t &Format);
1664 ParseStatus parseFlatOffset(OperandVector &Operands);
1665 ParseStatus parseR128A16(OperandVector &Operands);
1667 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1668 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1669
1670 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1671
1672 bool parseCnt(int64_t &IntVal);
1673 ParseStatus parseSWaitCnt(OperandVector &Operands);
1674
1675 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1676 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1677 ParseStatus parseDepCtr(OperandVector &Operands);
1678
1679 bool parseDelay(int64_t &Delay);
1680 ParseStatus parseSDelayALU(OperandVector &Operands);
1681
1682 ParseStatus parseHwreg(OperandVector &Operands);
1683
1684private:
1685 struct OperandInfoTy {
1686 SMLoc Loc;
1687 int64_t Val;
1688 bool IsSymbolic = false;
1689 bool IsDefined = false;
1690
1691 OperandInfoTy(int64_t Val) : Val(Val) {}
1692 };
1693
1694 struct StructuredOpField : OperandInfoTy {
1697 unsigned Width;
1698 bool IsDefined = false;
1699
1700 StructuredOpField(StringLiteral Id, StringLiteral Desc, unsigned Width,
1701 int64_t Default)
1702 : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {}
1703 virtual ~StructuredOpField() = default;
1704
1705 bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1706 Parser.Error(Loc, "invalid " + Desc + ": " + Err);
1707 return false;
1708 }
1709
1710 virtual bool validate(AMDGPUAsmParser &Parser) const {
1711 if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1712 return Error(Parser, "not supported on this GPU");
1713 if (!isUIntN(Width, Val))
1714 return Error(Parser, "only " + Twine(Width) + "-bit values are legal");
1715 return true;
1716 }
1717 };
1718
1719 ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1720 bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1721
1722 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1723 bool validateSendMsg(const OperandInfoTy &Msg,
1724 const OperandInfoTy &Op,
1725 const OperandInfoTy &Stream);
1726
1727 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1728 OperandInfoTy &Width);
1729
1730 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1731 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1732 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1733
1734 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1735 const OperandVector &Operands) const;
1736 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1737 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1738 SMLoc getLitLoc(const OperandVector &Operands,
1739 bool SearchMandatoryLiterals = false) const;
1740 SMLoc getMandatoryLitLoc(const OperandVector &Operands) const;
1741 SMLoc getConstLoc(const OperandVector &Operands) const;
1742 SMLoc getInstLoc(const OperandVector &Operands) const;
1743
1744 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1745 bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1746 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1747 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1748 bool validateSOPLiteral(const MCInst &Inst) const;
1749 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1750 bool validateVOPDRegBankConstraints(const MCInst &Inst,
1751 const OperandVector &Operands);
1752 bool validateIntClampSupported(const MCInst &Inst);
1753 bool validateMIMGAtomicDMask(const MCInst &Inst);
1754 bool validateMIMGGatherDMask(const MCInst &Inst);
1755 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1756 bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);
1757 bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc);
1758 bool validateMIMGD16(const MCInst &Inst);
1759 bool validateMIMGDim(const MCInst &Inst, const OperandVector &Operands);
1760 bool validateMIMGMSAA(const MCInst &Inst);
1761 bool validateOpSel(const MCInst &Inst);
1762 bool validateNeg(const MCInst &Inst, int OpName);
1763 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1764 bool validateVccOperand(unsigned Reg) const;
1765 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1766 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1767 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1768 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1769 bool validateAGPRLdSt(const MCInst &Inst) const;
1770 bool validateVGPRAlign(const MCInst &Inst) const;
1771 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1772 bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1773 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1774 bool validateDivScale(const MCInst &Inst);
1775 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1776 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1777 const SMLoc &IDLoc);
1778 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1779 const unsigned CPol);
1780 bool validateExeczVcczOperands(const OperandVector &Operands);
1781 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1782 std::optional<StringRef> validateLdsDirect(const MCInst &Inst);
1783 unsigned getConstantBusLimit(unsigned Opcode) const;
1784 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1785 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1786 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1787
1788 bool isSupportedMnemo(StringRef Mnemo,
1789 const FeatureBitset &FBS);
1790 bool isSupportedMnemo(StringRef Mnemo,
1791 const FeatureBitset &FBS,
1792 ArrayRef<unsigned> Variants);
1793 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1794
1795 bool isId(const StringRef Id) const;
1796 bool isId(const AsmToken &Token, const StringRef Id) const;
1797 bool isToken(const AsmToken::TokenKind Kind) const;
1798 StringRef getId() const;
1799 bool trySkipId(const StringRef Id);
1800 bool trySkipId(const StringRef Pref, const StringRef Id);
1801 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1802 bool trySkipToken(const AsmToken::TokenKind Kind);
1803 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1804 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1805 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1806
1807 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1808 AsmToken::TokenKind getTokenKind() const;
1809 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1811 StringRef getTokenStr() const;
1812 AsmToken peekToken(bool ShouldSkipSpace = true);
1813 AsmToken getToken() const;
1814 SMLoc getLoc() const;
1815 void lex();
1816
1817public:
1818 void onBeginOfFile() override;
1819 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1820
1821 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1822
1823 ParseStatus parseExpTgt(OperandVector &Operands);
1824 ParseStatus parseSendMsg(OperandVector &Operands);
1825 ParseStatus parseInterpSlot(OperandVector &Operands);
1826 ParseStatus parseInterpAttr(OperandVector &Operands);
1827 ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1828 ParseStatus parseBoolReg(OperandVector &Operands);
1829
1830 bool parseSwizzleOperand(int64_t &Op,
1831 const unsigned MinVal,
1832 const unsigned MaxVal,
1833 const StringRef ErrMsg,
1834 SMLoc &Loc);
1835 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1836 const unsigned MinVal,
1837 const unsigned MaxVal,
1838 const StringRef ErrMsg);
1839 ParseStatus parseSwizzle(OperandVector &Operands);
1840 bool parseSwizzleOffset(int64_t &Imm);
1841 bool parseSwizzleMacro(int64_t &Imm);
1842 bool parseSwizzleQuadPerm(int64_t &Imm);
1843 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1844 bool parseSwizzleBroadcast(int64_t &Imm);
1845 bool parseSwizzleSwap(int64_t &Imm);
1846 bool parseSwizzleReverse(int64_t &Imm);
1847
1848 ParseStatus parseGPRIdxMode(OperandVector &Operands);
1849 int64_t parseGPRIdxMacro();
1850
1851 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1852 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1853
1854 ParseStatus parseOModSI(OperandVector &Operands);
1855
1856 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1857 OptionalImmIndexMap &OptionalIdx);
1858 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1859 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1860 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1861 void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
1862
1863 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1864 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1865 OptionalImmIndexMap &OptionalIdx);
1866 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1867 OptionalImmIndexMap &OptionalIdx);
1868
1869 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1870 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1871
1872 bool parseDimId(unsigned &Encoding);
1874 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1876 ParseStatus parseDPPCtrl(OperandVector &Operands);
1877 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1878 int64_t parseDPPCtrlSel(StringRef Ctrl);
1879 int64_t parseDPPCtrlPerm();
1880 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1881 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1882 cvtDPP(Inst, Operands, true);
1883 }
1884 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1885 bool IsDPP8 = false);
1886 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1887 cvtVOP3DPP(Inst, Operands, true);
1888 }
1889
1890 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
1891 AMDGPUOperand::ImmTy Type);
1892 ParseStatus parseSDWADstUnused(OperandVector &Operands);
1893 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1894 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1895 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1896 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1897 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1898 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1899 uint64_t BasicInstType,
1900 bool SkipDstVcc = false,
1901 bool SkipSrcVcc = false);
1902
1903 ParseStatus parseEndpgm(OperandVector &Operands);
1904
1906};
1907
1908} // end anonymous namespace
1909
1910// May be called with integer type with equivalent bitwidth.
1911static const fltSemantics *getFltSemantics(unsigned Size) {
1912 switch (Size) {
1913 case 4:
1914 return &APFloat::IEEEsingle();
1915 case 8:
1916 return &APFloat::IEEEdouble();
1917 case 2:
1918 return &APFloat::IEEEhalf();
1919 default:
1920 llvm_unreachable("unsupported fp type");
1921 }
1922}
1923
1925 return getFltSemantics(VT.getSizeInBits() / 8);
1926}
1927
1929 switch (OperandType) {
1930 // When floating-point immediate is used as operand of type i16, the 32-bit
1931 // representation of the constant truncated to the 16 LSBs should be used.
1951 return &APFloat::IEEEsingle();
1957 return &APFloat::IEEEdouble();
1966 return &APFloat::IEEEhalf();
1974 return &APFloat::BFloat();
1975 default:
1976 llvm_unreachable("unsupported fp type");
1977 }
1978}
1979
1980//===----------------------------------------------------------------------===//
1981// Operand
1982//===----------------------------------------------------------------------===//
1983
1984static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1985 bool Lost;
1986
1987 // Convert literal to single precision
1989 APFloat::rmNearestTiesToEven,
1990 &Lost);
1991 // We allow precision lost but not overflow or underflow
1992 if (Status != APFloat::opOK &&
1993 Lost &&
1994 ((Status & APFloat::opOverflow) != 0 ||
1995 (Status & APFloat::opUnderflow) != 0)) {
1996 return false;
1997 }
1998
1999 return true;
2000}
2001
2002static bool isSafeTruncation(int64_t Val, unsigned Size) {
2003 return isUIntN(Size, Val) || isIntN(Size, Val);
2004}
2005
2006static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
2007 if (VT.getScalarType() == MVT::i16)
2008 return isInlinableLiteral32(Val, HasInv2Pi);
2009
2010 if (VT.getScalarType() == MVT::f16)
2011 return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
2012
2013 assert(VT.getScalarType() == MVT::bf16);
2014
2015 return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
2016}
2017
2018bool AMDGPUOperand::isInlinableImm(MVT type) const {
2019
2020 // This is a hack to enable named inline values like
2021 // shared_base with both 32-bit and 64-bit operands.
2022 // Note that these values are defined as
2023 // 32-bit operands only.
2024 if (isInlineValue()) {
2025 return true;
2026 }
2027
2028 if (!isImmTy(ImmTyNone)) {
2029 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2030 return false;
2031 }
2032 // TODO: We should avoid using host float here. It would be better to
2033 // check the float bit values which is what a few other places do.
2034 // We've had bot failures before due to weird NaN support on mips hosts.
2035
2036 APInt Literal(64, Imm.Val);
2037
2038 if (Imm.IsFPImm) { // We got fp literal token
2039 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2041 AsmParser->hasInv2PiInlineImm());
2042 }
2043
2044 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2045 if (!canLosslesslyConvertToFPType(FPLiteral, type))
2046 return false;
2047
2048 if (type.getScalarSizeInBits() == 16) {
2049 bool Lost = false;
2050 switch (type.getScalarType().SimpleTy) {
2051 default:
2052 llvm_unreachable("unknown 16-bit type");
2053 case MVT::bf16:
2054 FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven,
2055 &Lost);
2056 break;
2057 case MVT::f16:
2058 FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven,
2059 &Lost);
2060 break;
2061 case MVT::i16:
2062 FPLiteral.convert(APFloatBase::IEEEsingle(),
2063 APFloat::rmNearestTiesToEven, &Lost);
2064 break;
2065 }
2066 // We need to use 32-bit representation here because when a floating-point
2067 // inline constant is used as an i16 operand, its 32-bit representation
2068 // representation will be used. We will need the 32-bit value to check if
2069 // it is FP inline constant.
2070 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2071 return isInlineableLiteralOp16(ImmVal, type,
2072 AsmParser->hasInv2PiInlineImm());
2073 }
2074
2075 // Check if single precision literal is inlinable
2077 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2078 AsmParser->hasInv2PiInlineImm());
2079 }
2080
2081 // We got int literal token.
2082 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2084 AsmParser->hasInv2PiInlineImm());
2085 }
2086
2087 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
2088 return false;
2089 }
2090
2091 if (type.getScalarSizeInBits() == 16) {
2093 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
2094 type, AsmParser->hasInv2PiInlineImm());
2095 }
2096
2098 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
2099 AsmParser->hasInv2PiInlineImm());
2100}
2101
2102bool AMDGPUOperand::isLiteralImm(MVT type) const {
2103 // Check that this immediate can be added as literal
2104 if (!isImmTy(ImmTyNone)) {
2105 return false;
2106 }
2107
2108 if (!Imm.IsFPImm) {
2109 // We got int literal token.
2110
2111 if (type == MVT::f64 && hasFPModifiers()) {
2112 // Cannot apply fp modifiers to int literals preserving the same semantics
2113 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2114 // disable these cases.
2115 return false;
2116 }
2117
2118 unsigned Size = type.getSizeInBits();
2119 if (Size == 64)
2120 Size = 32;
2121
2122 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2123 // types.
2124 return isSafeTruncation(Imm.Val, Size);
2125 }
2126
2127 // We got fp literal token
2128 if (type == MVT::f64) { // Expected 64-bit fp operand
2129 // We would set low 64-bits of literal to zeroes but we accept this literals
2130 return true;
2131 }
2132
2133 if (type == MVT::i64) { // Expected 64-bit int operand
2134 // We don't allow fp literals in 64-bit integer instructions. It is
2135 // unclear how we should encode them.
2136 return false;
2137 }
2138
2139 // We allow fp literals with f16x2 operands assuming that the specified
2140 // literal goes into the lower half and the upper half is zero. We also
2141 // require that the literal may be losslessly converted to f16.
2142 //
2143 // For i16x2 operands, we assume that the specified literal is encoded as a
2144 // single-precision float. This is pretty odd, but it matches SP3 and what
2145 // happens in hardware.
2146 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2147 : (type == MVT::v2i16) ? MVT::f32
2148 : (type == MVT::v2f32) ? MVT::f32
2149 : type;
2150
2151 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2152 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2153}
2154
2155bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2156 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2157}
2158
2159bool AMDGPUOperand::isVRegWithInputMods() const {
2160 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2161 // GFX90A allows DPP on 64-bit operands.
2162 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2163 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2164}
2165
2166template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2167 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2168 : AMDGPU::VGPR_16_Lo128RegClassID);
2169}
2170
2171bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2172 if (AsmParser->isVI())
2173 return isVReg32();
2174 if (AsmParser->isGFX9Plus())
2175 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2176 return false;
2177}
2178
2179bool AMDGPUOperand::isSDWAFP16Operand() const {
2180 return isSDWAOperand(MVT::f16);
2181}
2182
2183bool AMDGPUOperand::isSDWAFP32Operand() const {
2184 return isSDWAOperand(MVT::f32);
2185}
2186
2187bool AMDGPUOperand::isSDWAInt16Operand() const {
2188 return isSDWAOperand(MVT::i16);
2189}
2190
2191bool AMDGPUOperand::isSDWAInt32Operand() const {
2192 return isSDWAOperand(MVT::i32);
2193}
2194
2195bool AMDGPUOperand::isBoolReg() const {
2196 auto FB = AsmParser->getFeatureBits();
2197 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) ||
2198 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32()));
2199}
2200
2201uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2202{
2203 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2204 assert(Size == 2 || Size == 4 || Size == 8);
2205
2206 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2207
2208 if (Imm.Mods.Abs) {
2209 Val &= ~FpSignMask;
2210 }
2211 if (Imm.Mods.Neg) {
2212 Val ^= FpSignMask;
2213 }
2214
2215 return Val;
2216}
2217
2218void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2219 if (isExpr()) {
2221 return;
2222 }
2223
2224 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2225 Inst.getNumOperands())) {
2226 addLiteralImmOperand(Inst, Imm.Val,
2227 ApplyModifiers &
2228 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2229 } else {
2230 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2232 setImmKindNone();
2233 }
2234}
2235
2236void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2237 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2238 auto OpNum = Inst.getNumOperands();
2239 // Check that this operand accepts literals
2240 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2241
2242 if (ApplyModifiers) {
2243 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2244 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2245 Val = applyInputFPModifiers(Val, Size);
2246 }
2247
2248 APInt Literal(64, Val);
2249 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2250
2251 if (Imm.IsFPImm) { // We got fp literal token
2252 switch (OpTy) {
2258 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2259 AsmParser->hasInv2PiInlineImm())) {
2260 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2261 setImmKindConst();
2262 return;
2263 }
2264
2265 // Non-inlineable
2266 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2267 // For fp operands we check if low 32 bits are zeros
2268 if (Literal.getLoBits(32) != 0) {
2269 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2270 "Can't encode literal as exact 64-bit floating-point operand. "
2271 "Low 32-bits will be set to zero");
2272 Val &= 0xffffffff00000000u;
2273 }
2274
2276 setImmKindLiteral();
2277 return;
2278 }
2279
2280 // We don't allow fp literals in 64-bit integer instructions. It is
2281 // unclear how we should encode them. This case should be checked earlier
2282 // in predicate methods (isLiteralImm())
2283 llvm_unreachable("fp literal in 64-bit integer instruction.");
2284
2292 if (AsmParser->hasInv2PiInlineImm() && Literal == 0x3fc45f306725feed) {
2293 // This is the 1/(2*pi) which is going to be truncated to bf16 with the
2294 // loss of precision. The constant represents ideomatic fp32 value of
2295 // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
2296 // bits. Prevent rounding below.
2297 Inst.addOperand(MCOperand::createImm(0x3e22));
2298 setImmKindLiteral();
2299 return;
2300 }
2301 [[fallthrough]];
2302
2330 bool lost;
2331 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2332 // Convert literal to single precision
2333 FPLiteral.convert(*getOpFltSemantics(OpTy),
2334 APFloat::rmNearestTiesToEven, &lost);
2335 // We allow precision lost but not overflow or underflow. This should be
2336 // checked earlier in isLiteralImm()
2337
2338 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2339 Inst.addOperand(MCOperand::createImm(ImmVal));
2340 if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) {
2341 setImmKindMandatoryLiteral();
2342 } else {
2343 setImmKindLiteral();
2344 }
2345 return;
2346 }
2347 default:
2348 llvm_unreachable("invalid operand size");
2349 }
2350
2351 return;
2352 }
2353
2354 // We got int literal token.
2355 // Only sign extend inline immediates.
2356 switch (OpTy) {
2372 if (isSafeTruncation(Val, 32) &&
2373 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2374 AsmParser->hasInv2PiInlineImm())) {
2376 setImmKindConst();
2377 return;
2378 }
2379
2380 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2381 setImmKindLiteral();
2382 return;
2383
2389 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2391 setImmKindConst();
2392 return;
2393 }
2394
2395 Val = AMDGPU::isSISrcFPOperand(InstDesc, OpNum) ? (uint64_t)Val << 32
2396 : Lo_32(Val);
2397
2399 setImmKindLiteral();
2400 return;
2401
2405 if (isSafeTruncation(Val, 16) &&
2406 AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val))) {
2407 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2408 setImmKindConst();
2409 return;
2410 }
2411
2412 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2413 setImmKindLiteral();
2414 return;
2415
2420 if (isSafeTruncation(Val, 16) &&
2421 AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2422 AsmParser->hasInv2PiInlineImm())) {
2424 setImmKindConst();
2425 return;
2426 }
2427
2428 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2429 setImmKindLiteral();
2430 return;
2431
2436 if (isSafeTruncation(Val, 16) &&
2437 AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2438 AsmParser->hasInv2PiInlineImm())) {
2440 setImmKindConst();
2441 return;
2442 }
2443
2444 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2445 setImmKindLiteral();
2446 return;
2447
2450 assert(isSafeTruncation(Val, 16));
2451 assert(AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val)));
2453 return;
2454 }
2457 assert(isSafeTruncation(Val, 16));
2458 assert(AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2459 AsmParser->hasInv2PiInlineImm()));
2460
2462 return;
2463 }
2464
2467 assert(isSafeTruncation(Val, 16));
2468 assert(AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2469 AsmParser->hasInv2PiInlineImm()));
2470
2472 return;
2473 }
2474
2476 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2477 setImmKindMandatoryLiteral();
2478 return;
2480 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2481 setImmKindMandatoryLiteral();
2482 return;
2483 default:
2484 llvm_unreachable("invalid operand size");
2485 }
2486}
2487
2488void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2489 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2490}
2491
2492bool AMDGPUOperand::isInlineValue() const {
2493 return isRegKind() && ::isInlineValue(getReg());
2494}
2495
2496//===----------------------------------------------------------------------===//
2497// AsmParser
2498//===----------------------------------------------------------------------===//
2499
2500void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2501 // TODO: make those pre-defined variables read-only.
2502 // Currently there is none suitable machinery in the core llvm-mc for this.
2503 // MCSymbol::isRedefinable is intended for another purpose, and
2504 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
2505 MCContext &Ctx = getContext();
2506 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2507 Sym->setVariableValue(MCConstantExpr::create(Val, Ctx));
2508}
2509
2510static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2511 if (Is == IS_VGPR) {
2512 switch (RegWidth) {
2513 default: return -1;
2514 case 32:
2515 return AMDGPU::VGPR_32RegClassID;
2516 case 64:
2517 return AMDGPU::VReg_64RegClassID;
2518 case 96:
2519 return AMDGPU::VReg_96RegClassID;
2520 case 128:
2521 return AMDGPU::VReg_128RegClassID;
2522 case 160:
2523 return AMDGPU::VReg_160RegClassID;
2524 case 192:
2525 return AMDGPU::VReg_192RegClassID;
2526 case 224:
2527 return AMDGPU::VReg_224RegClassID;
2528 case 256:
2529 return AMDGPU::VReg_256RegClassID;
2530 case 288:
2531 return AMDGPU::VReg_288RegClassID;
2532 case 320:
2533 return AMDGPU::VReg_320RegClassID;
2534 case 352:
2535 return AMDGPU::VReg_352RegClassID;
2536 case 384:
2537 return AMDGPU::VReg_384RegClassID;
2538 case 512:
2539 return AMDGPU::VReg_512RegClassID;
2540 case 1024:
2541 return AMDGPU::VReg_1024RegClassID;
2542 }
2543 } else if (Is == IS_TTMP) {
2544 switch (RegWidth) {
2545 default: return -1;
2546 case 32:
2547 return AMDGPU::TTMP_32RegClassID;
2548 case 64:
2549 return AMDGPU::TTMP_64RegClassID;
2550 case 128:
2551 return AMDGPU::TTMP_128RegClassID;
2552 case 256:
2553 return AMDGPU::TTMP_256RegClassID;
2554 case 512:
2555 return AMDGPU::TTMP_512RegClassID;
2556 }
2557 } else if (Is == IS_SGPR) {
2558 switch (RegWidth) {
2559 default: return -1;
2560 case 32:
2561 return AMDGPU::SGPR_32RegClassID;
2562 case 64:
2563 return AMDGPU::SGPR_64RegClassID;
2564 case 96:
2565 return AMDGPU::SGPR_96RegClassID;
2566 case 128:
2567 return AMDGPU::SGPR_128RegClassID;
2568 case 160:
2569 return AMDGPU::SGPR_160RegClassID;
2570 case 192:
2571 return AMDGPU::SGPR_192RegClassID;
2572 case 224:
2573 return AMDGPU::SGPR_224RegClassID;
2574 case 256:
2575 return AMDGPU::SGPR_256RegClassID;
2576 case 288:
2577 return AMDGPU::SGPR_288RegClassID;
2578 case 320:
2579 return AMDGPU::SGPR_320RegClassID;
2580 case 352:
2581 return AMDGPU::SGPR_352RegClassID;
2582 case 384:
2583 return AMDGPU::SGPR_384RegClassID;
2584 case 512:
2585 return AMDGPU::SGPR_512RegClassID;
2586 }
2587 } else if (Is == IS_AGPR) {
2588 switch (RegWidth) {
2589 default: return -1;
2590 case 32:
2591 return AMDGPU::AGPR_32RegClassID;
2592 case 64:
2593 return AMDGPU::AReg_64RegClassID;
2594 case 96:
2595 return AMDGPU::AReg_96RegClassID;
2596 case 128:
2597 return AMDGPU::AReg_128RegClassID;
2598 case 160:
2599 return AMDGPU::AReg_160RegClassID;
2600 case 192:
2601 return AMDGPU::AReg_192RegClassID;
2602 case 224:
2603 return AMDGPU::AReg_224RegClassID;
2604 case 256:
2605 return AMDGPU::AReg_256RegClassID;
2606 case 288:
2607 return AMDGPU::AReg_288RegClassID;
2608 case 320:
2609 return AMDGPU::AReg_320RegClassID;
2610 case 352:
2611 return AMDGPU::AReg_352RegClassID;
2612 case 384:
2613 return AMDGPU::AReg_384RegClassID;
2614 case 512:
2615 return AMDGPU::AReg_512RegClassID;
2616 case 1024:
2617 return AMDGPU::AReg_1024RegClassID;
2618 }
2619 }
2620 return -1;
2621}
2622
2625 .Case("exec", AMDGPU::EXEC)
2626 .Case("vcc", AMDGPU::VCC)
2627 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2628 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2629 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2630 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2631 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2632 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2633 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2634 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2635 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2636 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2637 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2638 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2639 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2640 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2641 .Case("m0", AMDGPU::M0)
2642 .Case("vccz", AMDGPU::SRC_VCCZ)
2643 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2644 .Case("execz", AMDGPU::SRC_EXECZ)
2645 .Case("src_execz", AMDGPU::SRC_EXECZ)
2646 .Case("scc", AMDGPU::SRC_SCC)
2647 .Case("src_scc", AMDGPU::SRC_SCC)
2648 .Case("tba", AMDGPU::TBA)
2649 .Case("tma", AMDGPU::TMA)
2650 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2651 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2652 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2653 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2654 .Case("vcc_lo", AMDGPU::VCC_LO)
2655 .Case("vcc_hi", AMDGPU::VCC_HI)
2656 .Case("exec_lo", AMDGPU::EXEC_LO)
2657 .Case("exec_hi", AMDGPU::EXEC_HI)
2658 .Case("tma_lo", AMDGPU::TMA_LO)
2659 .Case("tma_hi", AMDGPU::TMA_HI)
2660 .Case("tba_lo", AMDGPU::TBA_LO)
2661 .Case("tba_hi", AMDGPU::TBA_HI)
2662 .Case("pc", AMDGPU::PC_REG)
2663 .Case("null", AMDGPU::SGPR_NULL)
2664 .Default(AMDGPU::NoRegister);
2665}
2666
2667bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2668 SMLoc &EndLoc, bool RestoreOnFailure) {
2669 auto R = parseRegister();
2670 if (!R) return true;
2671 assert(R->isReg());
2672 RegNo = R->getReg();
2673 StartLoc = R->getStartLoc();
2674 EndLoc = R->getEndLoc();
2675 return false;
2676}
2677
2678bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2679 SMLoc &EndLoc) {
2680 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2681}
2682
2683ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2684 SMLoc &EndLoc) {
2685 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2686 bool PendingErrors = getParser().hasPendingError();
2687 getParser().clearPendingErrors();
2688 if (PendingErrors)
2689 return ParseStatus::Failure;
2690 if (Result)
2691 return ParseStatus::NoMatch;
2692 return ParseStatus::Success;
2693}
2694
2695bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2696 RegisterKind RegKind, unsigned Reg1,
2697 SMLoc Loc) {
2698 switch (RegKind) {
2699 case IS_SPECIAL:
2700 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2701 Reg = AMDGPU::EXEC;
2702 RegWidth = 64;
2703 return true;
2704 }
2705 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2706 Reg = AMDGPU::FLAT_SCR;
2707 RegWidth = 64;
2708 return true;
2709 }
2710 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2711 Reg = AMDGPU::XNACK_MASK;
2712 RegWidth = 64;
2713 return true;
2714 }
2715 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2716 Reg = AMDGPU::VCC;
2717 RegWidth = 64;
2718 return true;
2719 }
2720 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2721 Reg = AMDGPU::TBA;
2722 RegWidth = 64;
2723 return true;
2724 }
2725 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2726 Reg = AMDGPU::TMA;
2727 RegWidth = 64;
2728 return true;
2729 }
2730 Error(Loc, "register does not fit in the list");
2731 return false;
2732 case IS_VGPR:
2733 case IS_SGPR:
2734 case IS_AGPR:
2735 case IS_TTMP:
2736 if (Reg1 != Reg + RegWidth / 32) {
2737 Error(Loc, "registers in a list must have consecutive indices");
2738 return false;
2739 }
2740 RegWidth += 32;
2741 return true;
2742 default:
2743 llvm_unreachable("unexpected register kind");
2744 }
2745}
2746
2747struct RegInfo {
2749 RegisterKind Kind;
2750};
2751
2752static constexpr RegInfo RegularRegisters[] = {
2753 {{"v"}, IS_VGPR},
2754 {{"s"}, IS_SGPR},
2755 {{"ttmp"}, IS_TTMP},
2756 {{"acc"}, IS_AGPR},
2757 {{"a"}, IS_AGPR},
2758};
2759
2760static bool isRegularReg(RegisterKind Kind) {
2761 return Kind == IS_VGPR ||
2762 Kind == IS_SGPR ||
2763 Kind == IS_TTMP ||
2764 Kind == IS_AGPR;
2765}
2766
2768 for (const RegInfo &Reg : RegularRegisters)
2769 if (Str.starts_with(Reg.Name))
2770 return &Reg;
2771 return nullptr;
2772}
2773
2774static bool getRegNum(StringRef Str, unsigned& Num) {
2775 return !Str.getAsInteger(10, Num);
2776}
2777
2778bool
2779AMDGPUAsmParser::isRegister(const AsmToken &Token,
2780 const AsmToken &NextToken) const {
2781
2782 // A list of consecutive registers: [s0,s1,s2,s3]
2783 if (Token.is(AsmToken::LBrac))
2784 return true;
2785
2786 if (!Token.is(AsmToken::Identifier))
2787 return false;
2788
2789 // A single register like s0 or a range of registers like s[0:1]
2790
2791 StringRef Str = Token.getString();
2792 const RegInfo *Reg = getRegularRegInfo(Str);
2793 if (Reg) {
2794 StringRef RegName = Reg->Name;
2795 StringRef RegSuffix = Str.substr(RegName.size());
2796 if (!RegSuffix.empty()) {
2797 RegSuffix.consume_back(".l");
2798 RegSuffix.consume_back(".h");
2799 unsigned Num;
2800 // A single register with an index: rXX
2801 if (getRegNum(RegSuffix, Num))
2802 return true;
2803 } else {
2804 // A range of registers: r[XX:YY].
2805 if (NextToken.is(AsmToken::LBrac))
2806 return true;
2807 }
2808 }
2809
2810 return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2811}
2812
2813bool
2814AMDGPUAsmParser::isRegister()
2815{
2816 return isRegister(getToken(), peekToken());
2817}
2818
2819unsigned AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2820 unsigned SubReg, unsigned RegWidth,
2821 SMLoc Loc) {
2822 assert(isRegularReg(RegKind));
2823
2824 unsigned AlignSize = 1;
2825 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2826 // SGPR and TTMP registers must be aligned.
2827 // Max required alignment is 4 dwords.
2828 AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2829 }
2830
2831 if (RegNum % AlignSize != 0) {
2832 Error(Loc, "invalid register alignment");
2833 return AMDGPU::NoRegister;
2834 }
2835
2836 unsigned RegIdx = RegNum / AlignSize;
2837 int RCID = getRegClass(RegKind, RegWidth);
2838 if (RCID == -1) {
2839 Error(Loc, "invalid or unsupported register size");
2840 return AMDGPU::NoRegister;
2841 }
2842
2843 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2844 const MCRegisterClass RC = TRI->getRegClass(RCID);
2845 if (RegIdx >= RC.getNumRegs()) {
2846 Error(Loc, "register index is out of range");
2847 return AMDGPU::NoRegister;
2848 }
2849
2850 unsigned Reg = RC.getRegister(RegIdx);
2851
2852 if (SubReg) {
2853 Reg = TRI->getSubReg(Reg, SubReg);
2854
2855 // Currently all regular registers have their .l and .h subregisters, so
2856 // we should never need to generate an error here.
2857 assert(Reg && "Invalid subregister!");
2858 }
2859
2860 return Reg;
2861}
2862
2863bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2864 int64_t RegLo, RegHi;
2865 if (!skipToken(AsmToken::LBrac, "missing register index"))
2866 return false;
2867
2868 SMLoc FirstIdxLoc = getLoc();
2869 SMLoc SecondIdxLoc;
2870
2871 if (!parseExpr(RegLo))
2872 return false;
2873
2874 if (trySkipToken(AsmToken::Colon)) {
2875 SecondIdxLoc = getLoc();
2876 if (!parseExpr(RegHi))
2877 return false;
2878 } else {
2879 RegHi = RegLo;
2880 }
2881
2882 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2883 return false;
2884
2885 if (!isUInt<32>(RegLo)) {
2886 Error(FirstIdxLoc, "invalid register index");
2887 return false;
2888 }
2889
2890 if (!isUInt<32>(RegHi)) {
2891 Error(SecondIdxLoc, "invalid register index");
2892 return false;
2893 }
2894
2895 if (RegLo > RegHi) {
2896 Error(FirstIdxLoc, "first register index should not exceed second index");
2897 return false;
2898 }
2899
2900 Num = static_cast<unsigned>(RegLo);
2901 RegWidth = 32 * ((RegHi - RegLo) + 1);
2902 return true;
2903}
2904
2905unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2906 unsigned &RegNum, unsigned &RegWidth,
2907 SmallVectorImpl<AsmToken> &Tokens) {
2908 assert(isToken(AsmToken::Identifier));
2909 unsigned Reg = getSpecialRegForName(getTokenStr());
2910 if (Reg) {
2911 RegNum = 0;
2912 RegWidth = 32;
2913 RegKind = IS_SPECIAL;
2914 Tokens.push_back(getToken());
2915 lex(); // skip register name
2916 }
2917 return Reg;
2918}
2919
2920unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2921 unsigned &RegNum, unsigned &RegWidth,
2922 SmallVectorImpl<AsmToken> &Tokens) {
2923 assert(isToken(AsmToken::Identifier));
2924 StringRef RegName = getTokenStr();
2925 auto Loc = getLoc();
2926
2927 const RegInfo *RI = getRegularRegInfo(RegName);
2928 if (!RI) {
2929 Error(Loc, "invalid register name");
2930 return AMDGPU::NoRegister;
2931 }
2932
2933 Tokens.push_back(getToken());
2934 lex(); // skip register name
2935
2936 RegKind = RI->Kind;
2937 StringRef RegSuffix = RegName.substr(RI->Name.size());
2938 unsigned SubReg = NoSubRegister;
2939 if (!RegSuffix.empty()) {
2940 if (RegSuffix.consume_back(".l"))
2941 SubReg = AMDGPU::lo16;
2942 else if (RegSuffix.consume_back(".h"))
2943 SubReg = AMDGPU::hi16;
2944
2945 // Single 32-bit register: vXX.
2946 if (!getRegNum(RegSuffix, RegNum)) {
2947 Error(Loc, "invalid register index");
2948 return AMDGPU::NoRegister;
2949 }
2950 RegWidth = 32;
2951 } else {
2952 // Range of registers: v[XX:YY]. ":YY" is optional.
2953 if (!ParseRegRange(RegNum, RegWidth))
2954 return AMDGPU::NoRegister;
2955 }
2956
2957 return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
2958}
2959
2960unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2961 unsigned &RegWidth,
2962 SmallVectorImpl<AsmToken> &Tokens) {
2963 unsigned Reg = AMDGPU::NoRegister;
2964 auto ListLoc = getLoc();
2965
2966 if (!skipToken(AsmToken::LBrac,
2967 "expected a register or a list of registers")) {
2968 return AMDGPU::NoRegister;
2969 }
2970
2971 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2972
2973 auto Loc = getLoc();
2974 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2975 return AMDGPU::NoRegister;
2976 if (RegWidth != 32) {
2977 Error(Loc, "expected a single 32-bit register");
2978 return AMDGPU::NoRegister;
2979 }
2980
2981 for (; trySkipToken(AsmToken::Comma); ) {
2982 RegisterKind NextRegKind;
2983 unsigned NextReg, NextRegNum, NextRegWidth;
2984 Loc = getLoc();
2985
2986 if (!ParseAMDGPURegister(NextRegKind, NextReg,
2987 NextRegNum, NextRegWidth,
2988 Tokens)) {
2989 return AMDGPU::NoRegister;
2990 }
2991 if (NextRegWidth != 32) {
2992 Error(Loc, "expected a single 32-bit register");
2993 return AMDGPU::NoRegister;
2994 }
2995 if (NextRegKind != RegKind) {
2996 Error(Loc, "registers in a list must be of the same kind");
2997 return AMDGPU::NoRegister;
2998 }
2999 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
3000 return AMDGPU::NoRegister;
3001 }
3002
3003 if (!skipToken(AsmToken::RBrac,
3004 "expected a comma or a closing square bracket")) {
3005 return AMDGPU::NoRegister;
3006 }
3007
3008 if (isRegularReg(RegKind))
3009 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3010
3011 return Reg;
3012}
3013
3014bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
3015 unsigned &RegNum, unsigned &RegWidth,
3016 SmallVectorImpl<AsmToken> &Tokens) {
3017 auto Loc = getLoc();
3018 Reg = AMDGPU::NoRegister;
3019
3020 if (isToken(AsmToken::Identifier)) {
3021 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3022 if (Reg == AMDGPU::NoRegister)
3023 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3024 } else {
3025 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3026 }
3027
3028 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3029 if (Reg == AMDGPU::NoRegister) {
3030 assert(Parser.hasPendingError());
3031 return false;
3032 }
3033
3034 if (!subtargetHasRegister(*TRI, Reg)) {
3035 if (Reg == AMDGPU::SGPR_NULL) {
3036 Error(Loc, "'null' operand is not supported on this GPU");
3037 } else {
3038 Error(Loc, "register not available on this GPU");
3039 }
3040 return false;
3041 }
3042
3043 return true;
3044}
3045
3046bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
3047 unsigned &RegNum, unsigned &RegWidth,
3048 bool RestoreOnFailure /*=false*/) {
3049 Reg = AMDGPU::NoRegister;
3050
3052 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3053 if (RestoreOnFailure) {
3054 while (!Tokens.empty()) {
3055 getLexer().UnLex(Tokens.pop_back_val());
3056 }
3057 }
3058 return true;
3059 }
3060 return false;
3061}
3062
3063std::optional<StringRef>
3064AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3065 switch (RegKind) {
3066 case IS_VGPR:
3067 return StringRef(".amdgcn.next_free_vgpr");
3068 case IS_SGPR:
3069 return StringRef(".amdgcn.next_free_sgpr");
3070 default:
3071 return std::nullopt;
3072 }
3073}
3074
3075void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3076 auto SymbolName = getGprCountSymbolName(RegKind);
3077 assert(SymbolName && "initializing invalid register kind");
3078 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3079 Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
3080}
3081
3082bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3083 unsigned DwordRegIndex,
3084 unsigned RegWidth) {
3085 // Symbols are only defined for GCN targets
3086 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
3087 return true;
3088
3089 auto SymbolName = getGprCountSymbolName(RegKind);
3090 if (!SymbolName)
3091 return true;
3092 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3093
3094 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
3095 int64_t OldCount;
3096
3097 if (!Sym->isVariable())
3098 return !Error(getLoc(),
3099 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3100 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
3101 return !Error(
3102 getLoc(),
3103 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3104
3105 if (OldCount <= NewMax)
3106 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
3107
3108 return true;
3109}
3110
3111std::unique_ptr<AMDGPUOperand>
3112AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3113 const auto &Tok = getToken();
3114 SMLoc StartLoc = Tok.getLoc();
3115 SMLoc EndLoc = Tok.getEndLoc();
3116 RegisterKind RegKind;
3117 unsigned Reg, RegNum, RegWidth;
3118
3119 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3120 return nullptr;
3121 }
3122 if (isHsaAbi(getSTI())) {
3123 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3124 return nullptr;
3125 } else
3126 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3127 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
3128}
3129
3130ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3131 bool HasSP3AbsModifier, bool HasLit) {
3132 // TODO: add syntactic sugar for 1/(2*PI)
3133
3134 if (isRegister())
3135 return ParseStatus::NoMatch;
3136 assert(!isModifier());
3137
3138 if (!HasLit) {
3139 HasLit = trySkipId("lit");
3140 if (HasLit) {
3141 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3142 return ParseStatus::Failure;
3143 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit);
3144 if (S.isSuccess() &&
3145 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3146 return ParseStatus::Failure;
3147 return S;
3148 }
3149 }
3150
3151 const auto& Tok = getToken();
3152 const auto& NextTok = peekToken();
3153 bool IsReal = Tok.is(AsmToken::Real);
3154 SMLoc S = getLoc();
3155 bool Negate = false;
3156
3157 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
3158 lex();
3159 IsReal = true;
3160 Negate = true;
3161 }
3162
3163 AMDGPUOperand::Modifiers Mods;
3164 Mods.Lit = HasLit;
3165
3166 if (IsReal) {
3167 // Floating-point expressions are not supported.
3168 // Can only allow floating-point literals with an
3169 // optional sign.
3170
3171 StringRef Num = getTokenStr();
3172 lex();
3173
3174 APFloat RealVal(APFloat::IEEEdouble());
3175 auto roundMode = APFloat::rmNearestTiesToEven;
3176 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3177 return ParseStatus::Failure;
3178 if (Negate)
3179 RealVal.changeSign();
3180
3181 Operands.push_back(
3182 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
3183 AMDGPUOperand::ImmTyNone, true));
3184 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3185 Op.setModifiers(Mods);
3186
3187 return ParseStatus::Success;
3188
3189 } else {
3190 int64_t IntVal;
3191 const MCExpr *Expr;
3192 SMLoc S = getLoc();
3193
3194 if (HasSP3AbsModifier) {
3195 // This is a workaround for handling expressions
3196 // as arguments of SP3 'abs' modifier, for example:
3197 // |1.0|
3198 // |-1|
3199 // |1+x|
3200 // This syntax is not compatible with syntax of standard
3201 // MC expressions (due to the trailing '|').
3202 SMLoc EndLoc;
3203 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3204 return ParseStatus::Failure;
3205 } else {
3206 if (Parser.parseExpression(Expr))
3207 return ParseStatus::Failure;
3208 }
3209
3210 if (Expr->evaluateAsAbsolute(IntVal)) {
3211 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3212 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3213 Op.setModifiers(Mods);
3214 } else {
3215 if (HasLit)
3216 return ParseStatus::NoMatch;
3217 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3218 }
3219
3220 return ParseStatus::Success;
3221 }
3222
3223 return ParseStatus::NoMatch;
3224}
3225
3226ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3227 if (!isRegister())
3228 return ParseStatus::NoMatch;
3229
3230 if (auto R = parseRegister()) {
3231 assert(R->isReg());
3232 Operands.push_back(std::move(R));
3233 return ParseStatus::Success;
3234 }
3235 return ParseStatus::Failure;
3236}
3237
3238ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3239 bool HasSP3AbsMod, bool HasLit) {
3240 ParseStatus Res = parseReg(Operands);
3241 if (!Res.isNoMatch())
3242 return Res;
3243 if (isModifier())
3244 return ParseStatus::NoMatch;
3245 return parseImm(Operands, HasSP3AbsMod, HasLit);
3246}
3247
3248bool
3249AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3250 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3251 const auto &str = Token.getString();
3252 return str == "abs" || str == "neg" || str == "sext";
3253 }
3254 return false;
3255}
3256
3257bool
3258AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3259 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3260}
3261
3262bool
3263AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3264 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3265}
3266
3267bool
3268AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3269 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3270}
3271
3272// Check if this is an operand modifier or an opcode modifier
3273// which may look like an expression but it is not. We should
3274// avoid parsing these modifiers as expressions. Currently
3275// recognized sequences are:
3276// |...|
3277// abs(...)
3278// neg(...)
3279// sext(...)
3280// -reg
3281// -|...|
3282// -abs(...)
3283// name:...
3284//
3285bool
3286AMDGPUAsmParser::isModifier() {
3287
3288 AsmToken Tok = getToken();
3289 AsmToken NextToken[2];
3290 peekTokens(NextToken);
3291
3292 return isOperandModifier(Tok, NextToken[0]) ||
3293 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3294 isOpcodeModifierWithVal(Tok, NextToken[0]);
3295}
3296
3297// Check if the current token is an SP3 'neg' modifier.
3298// Currently this modifier is allowed in the following context:
3299//
3300// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3301// 2. Before an 'abs' modifier: -abs(...)
3302// 3. Before an SP3 'abs' modifier: -|...|
3303//
3304// In all other cases "-" is handled as a part
3305// of an expression that follows the sign.
3306//
3307// Note: When "-" is followed by an integer literal,
3308// this is interpreted as integer negation rather
3309// than a floating-point NEG modifier applied to N.
3310// Beside being contr-intuitive, such use of floating-point
3311// NEG modifier would have resulted in different meaning
3312// of integer literals used with VOP1/2/C and VOP3,
3313// for example:
3314// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3315// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3316// Negative fp literals with preceding "-" are
3317// handled likewise for uniformity
3318//
3319bool
3320AMDGPUAsmParser::parseSP3NegModifier() {
3321
3322 AsmToken NextToken[2];
3323 peekTokens(NextToken);
3324
3325 if (isToken(AsmToken::Minus) &&
3326 (isRegister(NextToken[0], NextToken[1]) ||
3327 NextToken[0].is(AsmToken::Pipe) ||
3328 isId(NextToken[0], "abs"))) {
3329 lex();
3330 return true;
3331 }
3332
3333 return false;
3334}
3335
3337AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3338 bool AllowImm) {
3339 bool Neg, SP3Neg;
3340 bool Abs, SP3Abs;
3341 bool Lit;
3342 SMLoc Loc;
3343
3344 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3345 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3346 return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3347
3348 SP3Neg = parseSP3NegModifier();
3349
3350 Loc = getLoc();
3351 Neg = trySkipId("neg");
3352 if (Neg && SP3Neg)
3353 return Error(Loc, "expected register or immediate");
3354 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3355 return ParseStatus::Failure;
3356
3357 Abs = trySkipId("abs");
3358 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3359 return ParseStatus::Failure;
3360
3361 Lit = trySkipId("lit");
3362 if (Lit && !skipToken(AsmToken::LParen, "expected left paren after lit"))
3363 return ParseStatus::Failure;
3364
3365 Loc = getLoc();
3366 SP3Abs = trySkipToken(AsmToken::Pipe);
3367 if (Abs && SP3Abs)
3368 return Error(Loc, "expected register or immediate");
3369
3370 ParseStatus Res;
3371 if (AllowImm) {
3372 Res = parseRegOrImm(Operands, SP3Abs, Lit);
3373 } else {
3374 Res = parseReg(Operands);
3375 }
3376 if (!Res.isSuccess())
3377 return (SP3Neg || Neg || SP3Abs || Abs || Lit) ? ParseStatus::Failure : Res;
3378
3379 if (Lit && !Operands.back()->isImm())
3380 Error(Loc, "expected immediate with lit modifier");
3381
3382 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3383 return ParseStatus::Failure;
3384 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3385 return ParseStatus::Failure;
3386 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3387 return ParseStatus::Failure;
3388 if (Lit && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3389 return ParseStatus::Failure;
3390
3391 AMDGPUOperand::Modifiers Mods;
3392 Mods.Abs = Abs || SP3Abs;
3393 Mods.Neg = Neg || SP3Neg;
3394 Mods.Lit = Lit;
3395
3396 if (Mods.hasFPModifiers() || Lit) {
3397 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3398 if (Op.isExpr())
3399 return Error(Op.getStartLoc(), "expected an absolute expression");
3400 Op.setModifiers(Mods);
3401 }
3402 return ParseStatus::Success;
3403}
3404
3406AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3407 bool AllowImm) {
3408 bool Sext = trySkipId("sext");
3409 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3410 return ParseStatus::Failure;
3411
3412 ParseStatus Res;
3413 if (AllowImm) {
3414 Res = parseRegOrImm(Operands);
3415 } else {
3416 Res = parseReg(Operands);
3417 }
3418 if (!Res.isSuccess())
3419 return Sext ? ParseStatus::Failure : Res;
3420
3421 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3422 return ParseStatus::Failure;
3423
3424 AMDGPUOperand::Modifiers Mods;
3425 Mods.Sext = Sext;
3426
3427 if (Mods.hasIntModifiers()) {
3428 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3429 if (Op.isExpr())
3430 return Error(Op.getStartLoc(), "expected an absolute expression");
3431 Op.setModifiers(Mods);
3432 }
3433
3434 return ParseStatus::Success;
3435}
3436
3437ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3438 return parseRegOrImmWithFPInputMods(Operands, false);
3439}
3440
3441ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3442 return parseRegOrImmWithIntInputMods(Operands, false);
3443}
3444
3445ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3446 auto Loc = getLoc();
3447 if (trySkipId("off")) {
3448 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3449 AMDGPUOperand::ImmTyOff, false));
3450 return ParseStatus::Success;
3451 }
3452
3453 if (!isRegister())
3454 return ParseStatus::NoMatch;
3455
3456 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3457 if (Reg) {
3458 Operands.push_back(std::move(Reg));
3459 return ParseStatus::Success;
3460 }
3461
3462 return ParseStatus::Failure;
3463}
3464
3465unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3466 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3467
3468 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3469 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3470 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3471 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3472 return Match_InvalidOperand;
3473
3474 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3475 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3476 // v_mac_f32/16 allow only dst_sel == DWORD;
3477 auto OpNum =
3478 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3479 const auto &Op = Inst.getOperand(OpNum);
3480 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3481 return Match_InvalidOperand;
3482 }
3483 }
3484
3485 return Match_Success;
3486}
3487
3489 static const unsigned Variants[] = {
3493 };
3494
3495 return ArrayRef(Variants);
3496}
3497
3498// What asm variants we should check
3499ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3500 if (isForcedDPP() && isForcedVOP3()) {
3501 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3502 return ArrayRef(Variants);
3503 }
3504 if (getForcedEncodingSize() == 32) {
3505 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3506 return ArrayRef(Variants);
3507 }
3508
3509 if (isForcedVOP3()) {
3510 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3511 return ArrayRef(Variants);
3512 }
3513
3514 if (isForcedSDWA()) {
3515 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3517 return ArrayRef(Variants);
3518 }
3519
3520 if (isForcedDPP()) {
3521 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3522 return ArrayRef(Variants);
3523 }
3524
3525 return getAllVariants();
3526}
3527
3528StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3529 if (isForcedDPP() && isForcedVOP3())
3530 return "e64_dpp";
3531
3532 if (getForcedEncodingSize() == 32)
3533 return "e32";
3534
3535 if (isForcedVOP3())
3536 return "e64";
3537
3538 if (isForcedSDWA())
3539 return "sdwa";
3540
3541 if (isForcedDPP())
3542 return "dpp";
3543
3544 return "";
3545}
3546
3547unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3548 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3549 for (MCPhysReg Reg : Desc.implicit_uses()) {
3550 switch (Reg) {
3551 case AMDGPU::FLAT_SCR:
3552 case AMDGPU::VCC:
3553 case AMDGPU::VCC_LO:
3554 case AMDGPU::VCC_HI:
3555 case AMDGPU::M0:
3556 return Reg;
3557 default:
3558 break;
3559 }
3560 }
3561 return AMDGPU::NoRegister;
3562}
3563
3564// NB: This code is correct only when used to check constant
3565// bus limitations because GFX7 support no f16 inline constants.
3566// Note that there are no cases when a GFX7 opcode violates
3567// constant bus limitations due to the use of an f16 constant.
3568bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3569 unsigned OpIdx) const {
3570 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3571
3572 if (!AMDGPU::isSISrcOperand(Desc, OpIdx) ||
3573 AMDGPU::isKImmOperand(Desc, OpIdx)) {
3574 return false;
3575 }
3576
3577 const MCOperand &MO = Inst.getOperand(OpIdx);
3578
3579 int64_t Val = MO.getImm();
3580 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3581
3582 switch (OpSize) { // expected operand size
3583 case 8:
3584 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3585 case 4:
3586 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3587 case 2: {
3588 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3592 return AMDGPU::isInlinableLiteralI16(Val, hasInv2PiInlineImm());
3593
3598
3603
3608
3613 return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3614
3619 return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
3620
3621 llvm_unreachable("invalid operand type");
3622 }
3623 default:
3624 llvm_unreachable("invalid operand size");
3625 }
3626}
3627
3628unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3629 if (!isGFX10Plus())
3630 return 1;
3631
3632 switch (Opcode) {
3633 // 64-bit shift instructions can use only one scalar value input
3634 case AMDGPU::V_LSHLREV_B64_e64:
3635 case AMDGPU::V_LSHLREV_B64_gfx10:
3636 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3637 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3638 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3639 case AMDGPU::V_LSHRREV_B64_e64:
3640 case AMDGPU::V_LSHRREV_B64_gfx10:
3641 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3642 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3643 case AMDGPU::V_ASHRREV_I64_e64:
3644 case AMDGPU::V_ASHRREV_I64_gfx10:
3645 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3646 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3647 case AMDGPU::V_LSHL_B64_e64:
3648 case AMDGPU::V_LSHR_B64_e64:
3649 case AMDGPU::V_ASHR_I64_e64:
3650 return 1;
3651 default:
3652 return 2;
3653 }
3654}
3655
3656constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3658
3659// Get regular operand indices in the same order as specified
3660// in the instruction (but append mandatory literals to the end).
3662 bool AddMandatoryLiterals = false) {
3663
3664 int16_t ImmIdx =
3665 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3666
3667 if (isVOPD(Opcode)) {
3668 int16_t ImmDeferredIdx =
3669 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immDeferred)
3670 : -1;
3671
3672 return {getNamedOperandIdx(Opcode, OpName::src0X),
3673 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3674 getNamedOperandIdx(Opcode, OpName::src0Y),
3675 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3676 ImmDeferredIdx,
3677 ImmIdx};
3678 }
3679
3680 return {getNamedOperandIdx(Opcode, OpName::src0),
3681 getNamedOperandIdx(Opcode, OpName::src1),
3682 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3683}
3684
3685bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3686 const MCOperand &MO = Inst.getOperand(OpIdx);
3687 if (MO.isImm())
3688 return !isInlineConstant(Inst, OpIdx);
3689 if (MO.isReg()) {
3690 auto Reg = MO.getReg();
3691 if (!Reg)
3692 return false;
3693 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3694 auto PReg = mc2PseudoReg(Reg);
3695 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3696 }
3697 return true;
3698}
3699
3700// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3701// Writelane is special in that it can use SGPR and M0 (which would normally
3702// count as using the constant bus twice - but in this case it is allowed since
3703// the lane selector doesn't count as a use of the constant bus). However, it is
3704// still required to abide by the 1 SGPR rule.
3705static bool checkWriteLane(const MCInst &Inst) {
3706 const unsigned Opcode = Inst.getOpcode();
3707 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3708 return false;
3709 const MCOperand &LaneSelOp = Inst.getOperand(2);
3710 if (!LaneSelOp.isReg())
3711 return false;
3712 auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg());
3713 return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11;
3714}
3715
3716bool AMDGPUAsmParser::validateConstantBusLimitations(
3717 const MCInst &Inst, const OperandVector &Operands) {
3718 const unsigned Opcode = Inst.getOpcode();
3719 const MCInstrDesc &Desc = MII.get(Opcode);
3720 unsigned LastSGPR = AMDGPU::NoRegister;
3721 unsigned ConstantBusUseCount = 0;
3722 unsigned NumLiterals = 0;
3723 unsigned LiteralSize;
3724
3725 if (!(Desc.TSFlags &
3728 !isVOPD(Opcode))
3729 return true;
3730
3731 if (checkWriteLane(Inst))
3732 return true;
3733
3734 // Check special imm operands (used by madmk, etc)
3735 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3736 ++NumLiterals;
3737 LiteralSize = 4;
3738 }
3739
3740 SmallDenseSet<unsigned> SGPRsUsed;
3741 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3742 if (SGPRUsed != AMDGPU::NoRegister) {
3743 SGPRsUsed.insert(SGPRUsed);
3744 ++ConstantBusUseCount;
3745 }
3746
3747 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3748
3749 for (int OpIdx : OpIndices) {
3750 if (OpIdx == -1)
3751 continue;
3752
3753 const MCOperand &MO = Inst.getOperand(OpIdx);
3754 if (usesConstantBus(Inst, OpIdx)) {
3755 if (MO.isReg()) {
3756 LastSGPR = mc2PseudoReg(MO.getReg());
3757 // Pairs of registers with a partial intersections like these
3758 // s0, s[0:1]
3759 // flat_scratch_lo, flat_scratch
3760 // flat_scratch_lo, flat_scratch_hi
3761 // are theoretically valid but they are disabled anyway.
3762 // Note that this code mimics SIInstrInfo::verifyInstruction
3763 if (SGPRsUsed.insert(LastSGPR).second) {
3764 ++ConstantBusUseCount;
3765 }
3766 } else { // Expression or a literal
3767
3768 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3769 continue; // special operand like VINTERP attr_chan
3770
3771 // An instruction may use only one literal.
3772 // This has been validated on the previous step.
3773 // See validateVOPLiteral.
3774 // This literal may be used as more than one operand.
3775 // If all these operands are of the same size,
3776 // this literal counts as one scalar value.
3777 // Otherwise it counts as 2 scalar values.
3778 // See "GFX10 Shader Programming", section 3.6.2.3.
3779
3780 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3781 if (Size < 4)
3782 Size = 4;
3783
3784 if (NumLiterals == 0) {
3785 NumLiterals = 1;
3786 LiteralSize = Size;
3787 } else if (LiteralSize != Size) {
3788 NumLiterals = 2;
3789 }
3790 }
3791 }
3792 }
3793 ConstantBusUseCount += NumLiterals;
3794
3795 if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3796 return true;
3797
3798 SMLoc LitLoc = getLitLoc(Operands);
3799 SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3800 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3801 Error(Loc, "invalid operand (violates constant bus restrictions)");
3802 return false;
3803}
3804
3805bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3806 const MCInst &Inst, const OperandVector &Operands) {
3807
3808 const unsigned Opcode = Inst.getOpcode();
3809 if (!isVOPD(Opcode))
3810 return true;
3811
3812 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3813
3814 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3815 const MCOperand &Opr = Inst.getOperand(OperandIdx);
3816 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3817 ? Opr.getReg()
3819 };
3820
3821 // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
3822 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
3823
3824 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3825 auto InvalidCompOprIdx =
3826 InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc);
3827 if (!InvalidCompOprIdx)
3828 return true;
3829
3830 auto CompOprIdx = *InvalidCompOprIdx;
3831 auto ParsedIdx =
3832 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
3833 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3834 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
3835
3836 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
3837 if (CompOprIdx == VOPD::Component::DST) {
3838 Error(Loc, "one dst register must be even and the other odd");
3839 } else {
3840 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3841 Error(Loc, Twine("src") + Twine(CompSrcIdx) +
3842 " operands must use different VGPR banks");
3843 }
3844
3845 return false;
3846}
3847
3848bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3849
3850 const unsigned Opc = Inst.getOpcode();
3851 const MCInstrDesc &Desc = MII.get(Opc);
3852
3853 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3854 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3855 assert(ClampIdx != -1);
3856 return Inst.getOperand(ClampIdx).getImm() == 0;
3857 }
3858
3859 return true;
3860}
3861
3864
3865bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
3866 const SMLoc &IDLoc) {
3867
3868 const unsigned Opc = Inst.getOpcode();
3869 const MCInstrDesc &Desc = MII.get(Opc);
3870
3871 if ((Desc.TSFlags & MIMGFlags) == 0)
3872 return true;
3873
3874 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3875 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3876 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3877
3878 if (VDataIdx == -1 && isGFX10Plus()) // no return image_sample
3879 return true;
3880
3881 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
3882 return true;
3883
3884 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3885 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3886 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3887 if (DMask == 0)
3888 DMask = 1;
3889
3890 bool IsPackedD16 = false;
3891 unsigned DataSize =
3892 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
3893 if (hasPackedD16()) {
3894 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3895 IsPackedD16 = D16Idx >= 0;
3896 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
3897 DataSize = (DataSize + 1) / 2;
3898 }
3899
3900 if ((VDataSize / 4) == DataSize + TFESize)
3901 return true;
3902
3903 StringRef Modifiers;
3904 if (isGFX90A())
3905 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
3906 else
3907 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
3908
3909 Error(IDLoc, Twine("image data size does not match ") + Modifiers);
3910 return false;
3911}
3912
3913bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
3914 const SMLoc &IDLoc) {
3915 const unsigned Opc = Inst.getOpcode();
3916 const MCInstrDesc &Desc = MII.get(Opc);
3917
3918 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
3919 return true;
3920
3922
3923 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3925 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3926 int RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG) ? AMDGPU::OpName::srsrc
3927 : AMDGPU::OpName::rsrc;
3928 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
3929 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3930 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3931
3932 assert(VAddr0Idx != -1);
3933 assert(SrsrcIdx != -1);
3934 assert(SrsrcIdx > VAddr0Idx);
3935
3936 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3937 if (BaseOpcode->BVH) {
3938 if (IsA16 == BaseOpcode->A16)
3939 return true;
3940 Error(IDLoc, "image address size does not match a16");
3941 return false;
3942 }
3943
3944 unsigned Dim = Inst.getOperand(DimIdx).getImm();
3946 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3947 unsigned ActualAddrSize =
3948 IsNSA ? SrsrcIdx - VAddr0Idx
3949 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3950
3951 unsigned ExpectedAddrSize =
3952 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3953
3954 if (IsNSA) {
3955 if (hasPartialNSAEncoding() &&
3956 ExpectedAddrSize >
3958 int VAddrLastIdx = SrsrcIdx - 1;
3959 unsigned VAddrLastSize =
3960 AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4;
3961
3962 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
3963 }
3964 } else {
3965 if (ExpectedAddrSize > 12)
3966 ExpectedAddrSize = 16;
3967
3968 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3969 // This provides backward compatibility for assembly created
3970 // before 160b/192b/224b types were directly supported.
3971 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3972 return true;
3973 }
3974
3975 if (ActualAddrSize == ExpectedAddrSize)
3976 return true;
3977
3978 Error(IDLoc, "image address size does not match dim and a16");
3979 return false;
3980}
3981
3982bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3983
3984 const unsigned Opc = Inst.getOpcode();
3985 const MCInstrDesc &Desc = MII.get(Opc);
3986
3987 if ((Desc.TSFlags & MIMGFlags) == 0)
3988 return true;
3989 if (!Desc.mayLoad() || !Desc.mayStore())
3990 return true; // Not atomic
3991
3992 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3993 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3994
3995 // This is an incomplete check because image_atomic_cmpswap
3996 // may only use 0x3 and 0xf while other atomic operations
3997 // may use 0x1 and 0x3. However these limitations are
3998 // verified when we check that dmask matches dst size.
3999 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4000}
4001
4002bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
4003
4004 const unsigned Opc = Inst.getOpcode();
4005 const MCInstrDesc &Desc = MII.get(Opc);
4006
4007 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
4008 return true;
4009
4010 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4011 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4012
4013 // GATHER4 instructions use dmask in a different fashion compared to
4014 // other MIMG instructions. The only useful DMASK values are
4015 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4016 // (red,red,red,red) etc.) The ISA document doesn't mention
4017 // this.
4018 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4019}
4020
4021bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst,
4022 const OperandVector &Operands) {
4023 if (!isGFX10Plus())
4024 return true;
4025
4026 const unsigned Opc = Inst.getOpcode();
4027 const MCInstrDesc &Desc = MII.get(Opc);
4028
4029 if ((Desc.TSFlags & MIMGFlags) == 0)
4030 return true;
4031
4032 // image_bvh_intersect_ray instructions do not have dim
4033 if (AMDGPU::getMIMGBaseOpcode(Opc)->BVH)
4034 return true;
4035
4036 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4037 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4038 if (Op.isDim())
4039 return true;
4040 }
4041 return false;
4042}
4043
4044bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4045 const unsigned Opc = Inst.getOpcode();
4046 const MCInstrDesc &Desc = MII.get(Opc);
4047
4048 if ((Desc.TSFlags & MIMGFlags) == 0)
4049 return true;
4050
4052 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4054
4055 if (!BaseOpcode->MSAA)
4056 return true;
4057
4058 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4059 assert(DimIdx != -1);
4060
4061 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4063
4064 return DimInfo->MSAA;
4065}
4066
4067static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4068{
4069 switch (Opcode) {
4070 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4071 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4072 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4073 return true;
4074 default:
4075 return false;
4076 }
4077}
4078
4079// movrels* opcodes should only allow VGPRS as src0.
4080// This is specified in .td description for vop1/vop3,
4081// but sdwa is handled differently. See isSDWAOperand.
4082bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4083 const OperandVector &Operands) {
4084
4085 const unsigned Opc = Inst.getOpcode();
4086 const MCInstrDesc &Desc = MII.get(Opc);
4087
4088 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
4089 return true;
4090
4091 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4092 assert(Src0Idx != -1);
4093
4094 SMLoc ErrLoc;
4095 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4096 if (Src0.isReg()) {
4097 auto Reg = mc2PseudoReg(Src0.getReg());
4098 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4099 if (!isSGPR(Reg, TRI))
4100 return true;
4101 ErrLoc = getRegLoc(Reg, Operands);
4102 } else {
4103 ErrLoc = getConstLoc(Operands);
4104 }
4105
4106 Error(ErrLoc, "source operand must be a VGPR");
4107 return false;
4108}
4109
4110bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4111 const OperandVector &Operands) {
4112
4113 const unsigned Opc = Inst.getOpcode();
4114
4115 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4116 return true;
4117
4118 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4119 assert(Src0Idx != -1);
4120
4121 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4122 if (!Src0.isReg())
4123 return true;
4124
4125 auto Reg = mc2PseudoReg(Src0.getReg());
4126 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4127 if (!isGFX90A() && isSGPR(Reg, TRI)) {
4128 Error(getRegLoc(Reg, Operands),
4129 "source operand must be either a VGPR or an inline constant");
4130 return false;
4131 }
4132
4133 return true;
4134}
4135
4136bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4137 const OperandVector &Operands) {
4138 unsigned Opcode = Inst.getOpcode();
4139 const MCInstrDesc &Desc = MII.get(Opcode);
4140
4141 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
4142 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4143 return true;
4144
4145 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4146 if (Src2Idx == -1)
4147 return true;
4148
4149 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
4150 Error(getConstLoc(Operands),
4151 "inline constants are not allowed for this operand");
4152 return false;
4153 }
4154
4155 return true;
4156}
4157
4158bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4159 const OperandVector &Operands) {
4160 const unsigned Opc = Inst.getOpcode();
4161 const MCInstrDesc &Desc = MII.get(Opc);
4162
4163 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
4164 return true;
4165
4166 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4167 if (Src2Idx == -1)
4168 return true;
4169
4170 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4171 if (!Src2.isReg())
4172 return true;
4173
4174 MCRegister Src2Reg = Src2.getReg();
4175 MCRegister DstReg = Inst.getOperand(0).getReg();
4176 if (Src2Reg == DstReg)
4177 return true;
4178
4179 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4180 if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128)
4181 return true;
4182
4183 if (TRI->regsOverlap(Src2Reg, DstReg)) {
4184 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
4185 "source 2 operand must not partially overlap with dst");
4186 return false;
4187 }
4188
4189 return true;
4190}
4191
4192bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4193 switch (Inst.getOpcode()) {
4194 default:
4195 return true;
4196 case V_DIV_SCALE_F32_gfx6_gfx7:
4197 case V_DIV_SCALE_F32_vi:
4198 case V_DIV_SCALE_F32_gfx10:
4199 case V_DIV_SCALE_F64_gfx6_gfx7:
4200 case V_DIV_SCALE_F64_vi:
4201 case V_DIV_SCALE_F64_gfx10:
4202 break;
4203 }
4204
4205 // TODO: Check that src0 = src1 or src2.
4206
4207 for (auto Name : {AMDGPU::OpName::src0_modifiers,
4208 AMDGPU::OpName::src2_modifiers,
4209 AMDGPU::OpName::src2_modifiers}) {
4211 .getImm() &
4213 return false;
4214 }
4215 }
4216
4217 return true;
4218}
4219
4220bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4221
4222 const unsigned Opc = Inst.getOpcode();
4223 const MCInstrDesc &Desc = MII.get(Opc);
4224
4225 if ((Desc.TSFlags & MIMGFlags) == 0)
4226 return true;
4227
4228 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4229 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
4230 if (isCI() || isSI())
4231 return false;
4232 }
4233
4234 return true;
4235}
4236
4237static bool IsRevOpcode(const unsigned Opcode)
4238{
4239 switch (Opcode) {
4240 case AMDGPU::V_SUBREV_F32_e32:
4241 case AMDGPU::V_SUBREV_F32_e64:
4242 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4243 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4244 case AMDGPU::V_SUBREV_F32_e32_vi:
4245 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4246 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4247 case AMDGPU::V_SUBREV_F32_e64_vi:
4248
4249 case AMDGPU::V_SUBREV_CO_U32_e32:
4250 case AMDGPU::V_SUBREV_CO_U32_e64:
4251 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4252 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4253
4254 case AMDGPU::V_SUBBREV_U32_e32:
4255 case AMDGPU::V_SUBBREV_U32_e64:
4256 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4257 case AMDGPU::V_SUBBREV_U32_e32_vi:
4258 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4259 case AMDGPU::V_SUBBREV_U32_e64_vi:
4260
4261 case AMDGPU::V_SUBREV_U32_e32:
4262 case AMDGPU::V_SUBREV_U32_e64:
4263 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4264 case AMDGPU::V_SUBREV_U32_e32_vi:
4265 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4266 case AMDGPU::V_SUBREV_U32_e64_vi:
4267
4268 case AMDGPU::V_SUBREV_F16_e32:
4269 case AMDGPU::V_SUBREV_F16_e64:
4270 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4271 case AMDGPU::V_SUBREV_F16_e32_vi:
4272 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4273 case AMDGPU::V_SUBREV_F16_e64_vi:
4274
4275 case AMDGPU::V_SUBREV_U16_e32:
4276 case AMDGPU::V_SUBREV_U16_e64:
4277 case AMDGPU::V_SUBREV_U16_e32_vi:
4278 case AMDGPU::V_SUBREV_U16_e64_vi:
4279
4280 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4281 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4282 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4283
4284 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4285 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4286
4287 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4288 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4289
4290 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4291 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4292
4293 case AMDGPU::V_LSHRREV_B32_e32:
4294 case AMDGPU::V_LSHRREV_B32_e64:
4295 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4296 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4297 case AMDGPU::V_LSHRREV_B32_e32_vi:
4298 case AMDGPU::V_LSHRREV_B32_e64_vi:
4299 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4300 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4301
4302 case AMDGPU::V_ASHRREV_I32_e32:
4303 case AMDGPU::V_ASHRREV_I32_e64:
4304 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4305 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4306 case AMDGPU::V_ASHRREV_I32_e32_vi:
4307 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4308 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4309 case AMDGPU::V_ASHRREV_I32_e64_vi:
4310
4311 case AMDGPU::V_LSHLREV_B32_e32:
4312 case AMDGPU::V_LSHLREV_B32_e64:
4313 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4314 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4315 case AMDGPU::V_LSHLREV_B32_e32_vi:
4316 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4317 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4318 case AMDGPU::V_LSHLREV_B32_e64_vi:
4319
4320 case AMDGPU::V_LSHLREV_B16_e32:
4321 case AMDGPU::V_LSHLREV_B16_e64:
4322 case AMDGPU::V_LSHLREV_B16_e32_vi:
4323 case AMDGPU::V_LSHLREV_B16_e64_vi:
4324 case AMDGPU::V_LSHLREV_B16_gfx10:
4325
4326 case AMDGPU::V_LSHRREV_B16_e32:
4327 case AMDGPU::V_LSHRREV_B16_e64:
4328 case AMDGPU::V_LSHRREV_B16_e32_vi:
4329 case AMDGPU::V_LSHRREV_B16_e64_vi:
4330 case AMDGPU::V_LSHRREV_B16_gfx10:
4331
4332 case AMDGPU::V_ASHRREV_I16_e32:
4333 case AMDGPU::V_ASHRREV_I16_e64:
4334 case AMDGPU::V_ASHRREV_I16_e32_vi:
4335 case AMDGPU::V_ASHRREV_I16_e64_vi:
4336 case AMDGPU::V_ASHRREV_I16_gfx10:
4337
4338 case AMDGPU::V_LSHLREV_B64_e64:
4339 case AMDGPU::V_LSHLREV_B64_gfx10:
4340 case AMDGPU::V_LSHLREV_B64_vi:
4341
4342 case AMDGPU::V_LSHRREV_B64_e64:
4343 case AMDGPU::V_LSHRREV_B64_gfx10:
4344 case AMDGPU::V_LSHRREV_B64_vi:
4345
4346 case AMDGPU::V_ASHRREV_I64_e64:
4347 case AMDGPU::V_ASHRREV_I64_gfx10:
4348 case AMDGPU::V_ASHRREV_I64_vi:
4349
4350 case AMDGPU::V_PK_LSHLREV_B16:
4351 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4352 case AMDGPU::V_PK_LSHLREV_B16_vi:
4353
4354 case AMDGPU::V_PK_LSHRREV_B16:
4355 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4356 case AMDGPU::V_PK_LSHRREV_B16_vi:
4357 case AMDGPU::V_PK_ASHRREV_I16:
4358 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4359 case AMDGPU::V_PK_ASHRREV_I16_vi:
4360 return true;
4361 default:
4362 return false;
4363 }
4364}
4365
4366std::optional<StringRef>
4367AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4368
4369 using namespace SIInstrFlags;
4370 const unsigned Opcode = Inst.getOpcode();
4371 const MCInstrDesc &Desc = MII.get(Opcode);
4372
4373 // lds_direct register is defined so that it can be used
4374 // with 9-bit operands only. Ignore encodings which do not accept these.
4375 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4376 if ((Desc.TSFlags & Enc) == 0)
4377 return std::nullopt;
4378
4379 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4380 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4381 if (SrcIdx == -1)
4382 break;
4383 const auto &Src = Inst.getOperand(SrcIdx);
4384 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4385
4386 if (isGFX90A() || isGFX11Plus())
4387 return StringRef("lds_direct is not supported on this GPU");
4388
4389 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4390 return StringRef("lds_direct cannot be used with this instruction");
4391
4392 if (SrcName != OpName::src0)
4393 return StringRef("lds_direct may be used as src0 only");
4394 }
4395 }
4396
4397 return std::nullopt;
4398}
4399
4400SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4401 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4402 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4403 if (Op.isFlatOffset())
4404 return Op.getStartLoc();
4405 }
4406 return getLoc();
4407}
4408
4409bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4410 const OperandVector &Operands) {
4411 auto Opcode = Inst.getOpcode();
4412 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4413 if (OpNum == -1)
4414 return true;
4415
4416 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4417 if ((TSFlags & SIInstrFlags::FLAT))
4418 return validateFlatOffset(Inst, Operands);
4419
4420 if ((TSFlags & SIInstrFlags::SMRD))
4421 return validateSMEMOffset(Inst, Operands);
4422
4423 const auto &Op = Inst.getOperand(OpNum);
4424 if (isGFX12Plus() &&
4425 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4426 const unsigned OffsetSize = 24;
4427 if (!isIntN(OffsetSize, Op.getImm())) {
4428 Error(getFlatOffsetLoc(Operands),
4429 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4430 return false;
4431 }
4432 } else {
4433 const unsigned OffsetSize = 16;
4434 if (!isUIntN(OffsetSize, Op.getImm())) {
4435 Error(getFlatOffsetLoc(Operands),
4436 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4437 return false;
4438 }
4439 }
4440 return true;
4441}
4442
4443bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4444 const OperandVector &Operands) {
4445 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4446 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4447 return true;
4448
4449 auto Opcode = Inst.getOpcode();
4450 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4451 assert(OpNum != -1);
4452
4453 const auto &Op = Inst.getOperand(OpNum);
4454 if (!hasFlatOffsets() && Op.getImm() != 0) {
4455 Error(getFlatOffsetLoc(Operands),
4456 "flat offset modifier is not supported on this GPU");
4457 return false;
4458 }
4459
4460 // For pre-GFX12 FLAT instructions the offset must be positive;
4461 // MSB is ignored and forced to zero.
4462 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4463 bool AllowNegative =
4465 isGFX12Plus();
4466 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4467 Error(getFlatOffsetLoc(Operands),
4468 Twine("expected a ") +
4469 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4470 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4471 return false;
4472 }
4473
4474 return true;
4475}
4476
4477SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4478 // Start with second operand because SMEM Offset cannot be dst or src0.
4479 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4480 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4481 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4482 return Op.getStartLoc();
4483 }
4484 return getLoc();
4485}
4486
4487bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4488 const OperandVector &Operands) {
4489 if (isCI() || isSI())
4490 return true;
4491
4492 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4493 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4494 return true;
4495
4496 auto Opcode = Inst.getOpcode();
4497 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4498 if (OpNum == -1)
4499 return true;
4500
4501 const auto &Op = Inst.getOperand(OpNum);
4502 if (!Op.isImm())
4503 return true;
4504
4505 uint64_t Offset = Op.getImm();
4506 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4509 return true;
4510
4511 Error(getSMEMOffsetLoc(Operands),
4512 isGFX12Plus() ? "expected a 24-bit signed offset"
4513 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4514 : "expected a 21-bit signed offset");
4515
4516 return false;
4517}
4518
4519bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4520 unsigned Opcode = Inst.getOpcode();
4521 const MCInstrDesc &Desc = MII.get(Opcode);
4522 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4523 return true;
4524
4525 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4526 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4527
4528 const int OpIndices[] = { Src0Idx, Src1Idx };
4529
4530 unsigned NumExprs = 0;
4531 unsigned NumLiterals = 0;
4533
4534 for (int OpIdx : OpIndices) {
4535 if (OpIdx == -1) break;
4536
4537 const MCOperand &MO = Inst.getOperand(OpIdx);
4538 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4539 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4540 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4541 uint32_t Value = static_cast<uint32_t>(MO.getImm());
4542 if (NumLiterals == 0 || LiteralValue != Value) {
4544 ++NumLiterals;
4545 }
4546 } else if (MO.isExpr()) {
4547 ++NumExprs;
4548 }
4549 }
4550 }
4551
4552 return NumLiterals + NumExprs <= 1;
4553}
4554
4555bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4556 const unsigned Opc = Inst.getOpcode();
4557 if (isPermlane16(Opc)) {
4558 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4559 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4560
4561 if (OpSel & ~3)
4562 return false;
4563 }
4564
4565 uint64_t TSFlags = MII.get(Opc).TSFlags;
4566
4567 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4568 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4569 if (OpSelIdx != -1) {
4570 if (Inst.getOperand(OpSelIdx).getImm() != 0)
4571 return false;
4572 }
4573 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4574 if (OpSelHiIdx != -1) {
4575 if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4576 return false;
4577 }
4578 }
4579
4580 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4581 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4582 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4583 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4584 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4585 if (OpSel & 3)
4586 return false;
4587 }
4588
4589 return true;
4590}
4591
4592bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, int OpName) {
4593 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
4594
4595 const unsigned Opc = Inst.getOpcode();
4596 uint64_t TSFlags = MII.get(Opc).TSFlags;
4597
4598 // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
4599 // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
4600 // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
4601 // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
4602 if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
4603 !(TSFlags & SIInstrFlags::IsSWMMAC))
4604 return true;
4605
4606 int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
4607 if (NegIdx == -1)
4608 return true;
4609
4610 unsigned Neg = Inst.getOperand(NegIdx).getImm();
4611
4612 // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
4613 // on some src operands but not allowed on other.
4614 // It is convenient that such instructions don't have src_modifiers operand
4615 // for src operands that don't allow neg because they also don't allow opsel.
4616
4617 int SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
4618 AMDGPU::OpName::src1_modifiers,
4619 AMDGPU::OpName::src2_modifiers};
4620
4621 for (unsigned i = 0; i < 3; ++i) {
4622 if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) {
4623 if (Neg & (1 << i))
4624 return false;
4625 }
4626 }
4627
4628 return true;
4629}
4630
4631bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4632 const OperandVector &Operands) {
4633 const unsigned Opc = Inst.getOpcode();
4634 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4635 if (DppCtrlIdx >= 0) {
4636 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4637
4638 if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl) &&
4639 AMDGPU::isDPALU_DPP(MII.get(Opc))) {
4640 // DP ALU DPP is supported for row_newbcast only on GFX9*
4641 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4642 Error(S, "DP ALU dpp only supports row_newbcast");
4643 return false;
4644 }
4645 }
4646
4647 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
4648 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
4649
4650 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
4651 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4652 if (Src1Idx >= 0) {
4653 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4654 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4655 if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) {
4656 auto Reg = mc2PseudoReg(Inst.getOperand(Src1Idx).getReg());
4657 SMLoc S = getRegLoc(Reg, Operands);
4658 Error(S, "invalid operand for instruction");
4659 return false;
4660 }
4661 if (Src1.isImm()) {
4662 Error(getInstLoc(Operands),
4663 "src1 immediate operand invalid for instruction");
4664 return false;
4665 }
4666 }
4667 }
4668
4669 return true;
4670}
4671
4672// Check if VCC register matches wavefront size
4673bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4674 auto FB = getFeatureBits();
4675 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4676 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4677}
4678
4679// One unique literal can be used. VOP3 literal is only allowed in GFX10+
4680bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4681 const OperandVector &Operands) {
4682 unsigned Opcode = Inst.getOpcode();
4683 const MCInstrDesc &Desc = MII.get(Opcode);
4684 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
4685 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4686 !HasMandatoryLiteral && !isVOPD(Opcode))
4687 return true;
4688
4689 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
4690
4691 unsigned NumExprs = 0;
4692 unsigned NumLiterals = 0;
4694
4695 for (int OpIdx : OpIndices) {
4696 if (OpIdx == -1)
4697 continue;
4698
4699 const MCOperand &MO = Inst.getOperand(OpIdx);
4700 if (!MO.isImm() && !MO.isExpr())
4701 continue;
4702 if (!isSISrcOperand(Desc, OpIdx))
4703 continue;
4704
4705 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4706 uint64_t Value = static_cast<uint64_t>(MO.getImm());
4707 bool IsFP64 = AMDGPU::isSISrcFPOperand(Desc, OpIdx) &&
4708 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
4709 bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
4710
4711 if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value)) {
4712 Error(getLitLoc(Operands), "invalid operand for instruction");
4713 return false;
4714 }
4715
4716 if (IsFP64 && IsValid32Op)
4717 Value = Hi_32(Value);
4718
4719 if (NumLiterals == 0 || LiteralValue != Value) {
4721 ++NumLiterals;
4722 }
4723 } else if (MO.isExpr()) {
4724 ++NumExprs;
4725 }
4726 }
4727 NumLiterals += NumExprs;
4728
4729 if (!NumLiterals)
4730 return true;
4731
4732 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4733 Error(getLitLoc(Operands), "literal operands are not supported");
4734 return false;
4735 }
4736
4737 if (NumLiterals > 1) {
4738 Error(getLitLoc(Operands, true), "only one unique literal operand is allowed");
4739 return false;
4740 }
4741
4742 return true;
4743}
4744
4745// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4746static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4747 const MCRegisterInfo *MRI) {
4748 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4749 if (OpIdx < 0)
4750 return -1;
4751
4752 const MCOperand &Op = Inst.getOperand(OpIdx);
4753 if (!Op.isReg())
4754 return -1;
4755
4756 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4757 auto Reg = Sub ? Sub : Op.getReg();
4758 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4759 return AGPR32.contains(Reg) ? 1 : 0;
4760}
4761
4762bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4763 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4764 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4766 SIInstrFlags::DS)) == 0)
4767 return true;
4768
4769 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4770 : AMDGPU::OpName::vdata;
4771
4772 const MCRegisterInfo *MRI = getMRI();
4773 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4774 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4775
4776 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4777 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4778 if (Data2Areg >= 0 && Data2Areg != DataAreg)
4779 return false;
4780 }
4781
4782 auto FB = getFeatureBits();
4783 if (FB[AMDGPU::FeatureGFX90AInsts]) {
4784 if (DataAreg < 0 || DstAreg < 0)
4785 return true;
4786 return DstAreg == DataAreg;
4787 }
4788
4789 return DstAreg < 1 && DataAreg < 1;
4790}
4791
4792bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4793 auto FB = getFeatureBits();
4794 if (!FB[AMDGPU::FeatureGFX90AInsts])
4795 return true;
4796
4797 const MCRegisterInfo *MRI = getMRI();
4798 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4799 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4800 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4801 const MCOperand &Op = Inst.getOperand(I);
4802 if (!Op.isReg())
4803 continue;
4804
4805 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4806 if (!Sub)
4807 continue;
4808
4809 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4810 return false;
4811 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4812 return false;
4813 }
4814
4815 return true;
4816}
4817
4818SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4819 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4820 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4821 if (Op.isBLGP())
4822 return Op.getStartLoc();
4823 }
4824 return SMLoc();
4825}
4826
4827bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4828 const OperandVector &Operands) {
4829 unsigned Opc = Inst.getOpcode();
4830 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4831 if (BlgpIdx == -1)
4832 return true;
4833 SMLoc BLGPLoc = getBLGPLoc(Operands);
4834 if (!BLGPLoc.isValid())
4835 return true;
4836 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
4837 auto FB = getFeatureBits();
4838 bool UsesNeg = false;
4839 if (FB[AMDGPU::FeatureGFX940Insts]) {
4840 switch (Opc) {
4841 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4842 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4843 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4844 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4845 UsesNeg = true;
4846 }
4847 }
4848
4849 if (IsNeg == UsesNeg)
4850 return true;
4851
4852 Error(BLGPLoc,
4853 UsesNeg ? "invalid modifier: blgp is not supported"
4854 : "invalid modifier: neg is not supported");
4855
4856 return false;
4857}
4858
4859bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
4860 const OperandVector &Operands) {
4861 if (!isGFX11Plus())
4862 return true;
4863
4864 unsigned Opc = Inst.getOpcode();
4865 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4866 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4867 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4868 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4869 return true;
4870
4871 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
4872 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
4873 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
4874 if (Reg == AMDGPU::SGPR_NULL)
4875 return true;
4876
4877 SMLoc RegLoc = getRegLoc(Reg, Operands);
4878 Error(RegLoc, "src0 must be null");
4879 return false;
4880}
4881
4882bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
4883 const OperandVector &Operands) {
4884 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4885 if ((TSFlags & SIInstrFlags::DS) == 0)
4886 return true;
4887 if (TSFlags & SIInstrFlags::GWS)
4888 return validateGWS(Inst, Operands);
4889 // Only validate GDS for non-GWS instructions.
4890 if (hasGDS())
4891 return true;
4892 int GDSIdx =
4893 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
4894 if (GDSIdx < 0)
4895 return true;
4896 unsigned GDS = Inst.getOperand(GDSIdx).getImm();
4897 if (GDS) {
4898 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
4899 Error(S, "gds modifier is not supported on this GPU");
4900 return false;
4901 }
4902 return true;
4903}
4904
4905// gfx90a has an undocumented limitation:
4906// DS_GWS opcodes must use even aligned registers.
4907bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4908 const OperandVector &Operands) {
4909 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4910 return true;
4911
4912 int Opc = Inst.getOpcode();
4913 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4914 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4915 return true;
4916
4917 const MCRegisterInfo *MRI = getMRI();
4918 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4919 int Data0Pos =
4920 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4921 assert(Data0Pos != -1);
4922 auto Reg = Inst.getOperand(Data0Pos).getReg();
4923 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4924 if (RegIdx & 1) {
4925 SMLoc RegLoc = getRegLoc(Reg, Operands);
4926 Error(RegLoc, "vgpr must be even aligned");
4927 return false;
4928 }
4929
4930 return true;
4931}
4932
4933bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4934 const OperandVector &Operands,
4935 const SMLoc &IDLoc) {
4936 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4937 AMDGPU::OpName::cpol);
4938 if (CPolPos == -1)
4939 return true;
4940
4941 unsigned CPol = Inst.getOperand(CPolPos).getImm();
4942
4943 if (isGFX12Plus())
4944 return validateTHAndScopeBits(Inst, Operands, CPol);
4945
4946 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4947 if (TSFlags & SIInstrFlags::SMRD) {
4948 if (CPol && (isSI() || isCI())) {
4949 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4950 Error(S, "cache policy is not supported for SMRD instructions");
4951 return false;
4952 }
4953 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
4954 Error(IDLoc, "invalid cache policy for SMEM instruction");
4955 return false;
4956 }
4957 }
4958
4959 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4960 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
4963 if (!(TSFlags & AllowSCCModifier)) {
4964 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4965 StringRef CStr(S.getPointer());
4966 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4967 Error(S,
4968 "scc modifier is not supported for this instruction on this GPU");
4969 return false;
4970 }
4971 }
4972
4974 return true;
4975
4976 if (TSFlags & SIInstrFlags::IsAtomicRet) {
4977 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4978 Error(IDLoc, isGFX940() ? "instruction must use sc0"
4979 : "instruction must use glc");
4980 return false;
4981 }
4982 } else {
4983 if (CPol & CPol::GLC) {
4984 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4985 StringRef CStr(S.getPointer());
4987 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4988 Error(S, isGFX940() ? "instruction must not use sc0"
4989 : "instruction must not use glc");
4990 return false;
4991 }
4992 }
4993
4994 return true;
4995}
4996
4997bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
4998 const OperandVector &Operands,
4999 const unsigned CPol) {
5000 const unsigned TH = CPol & AMDGPU::CPol::TH;
5001 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
5002
5003 const unsigned Opcode = Inst.getOpcode();
5004 const MCInstrDesc &TID = MII.get(Opcode);
5005
5006 auto PrintError = [&](StringRef Msg) {
5007 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5008 Error(S, Msg);
5009 return false;
5010 };
5011
5012 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
5015 return PrintError("instruction must use th:TH_ATOMIC_RETURN");
5016
5017 if (TH == 0)
5018 return true;
5019
5020 if ((TID.TSFlags & SIInstrFlags::SMRD) &&
5021 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
5022 (TH == AMDGPU::CPol::TH_NT_HT)))
5023 return PrintError("invalid th value for SMEM instruction");
5024
5025 if (TH == AMDGPU::CPol::TH_BYPASS) {
5026 if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
5028 (Scope == AMDGPU::CPol::SCOPE_SYS &&
5030 return PrintError("scope and th combination is not valid");
5031 }
5032
5033 bool IsStore = TID.mayStore();
5034 bool IsAtomic =
5036
5037 if (IsAtomic) {
5038 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5039 return PrintError("invalid th value for atomic instructions");
5040 } else if (IsStore) {
5041 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5042 return PrintError("invalid th value for store instructions");
5043 } else {
5044 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5045 return PrintError("invalid th value for load instructions");
5046 }
5047
5048 return true;
5049}
5050
5051bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) {
5052 if (!isGFX11Plus())
5053 return true;
5054 for (auto &Operand : Operands) {
5055 if (!Operand->isReg())
5056 continue;
5057 unsigned Reg = Operand->getReg();
5058 if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
5059 Error(getRegLoc(Reg, Operands),
5060 "execz and vccz are not supported on this GPU");
5061 return false;
5062 }
5063 }
5064 return true;
5065}
5066
5067bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5068 const OperandVector &Operands) {
5069 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5070 if (Desc.mayStore() &&
5072 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5073 if (Loc != getInstLoc(Operands)) {
5074 Error(Loc, "TFE modifier has no meaning for store instructions");
5075 return false;
5076 }
5077 }
5078
5079 return true;
5080}
5081
5082bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
5083 const SMLoc &IDLoc,
5084 const OperandVector &Operands) {
5085 if (auto ErrMsg = validateLdsDirect(Inst)) {
5086 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
5087 return false;
5088 }
5089 if (!validateSOPLiteral(Inst)) {
5090 Error(getLitLoc(Operands),
5091 "only one unique literal operand is allowed");
5092 return false;
5093 }
5094 if (!validateVOPLiteral(Inst, Operands)) {
5095 return false;
5096 }
5097 if (!validateConstantBusLimitations(Inst, Operands)) {
5098 return false;
5099 }
5100 if (!validateVOPDRegBankConstraints(Inst, Operands)) {
5101 return false;
5102 }
5103 if (!validateIntClampSupported(Inst)) {
5104 Error(getImmLoc(AMDGPUOperand::ImmTyClamp, Operands),
5105 "integer clamping is not supported on this GPU");
5106 return false;
5107 }
5108 if (!validateOpSel(Inst)) {
5109 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5110 "invalid op_sel operand");
5111 return false;
5112 }
5113 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5114 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5115 "invalid neg_lo operand");
5116 return false;
5117 }
5118 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5119 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5120 "invalid neg_hi operand");
5121 return false;
5122 }
5123 if (!validateDPP(Inst, Operands)) {
5124 return false;
5125 }
5126 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5127 if (!validateMIMGD16(Inst)) {
5128 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5129 "d16 modifier is not supported on this GPU");
5130 return false;
5131 }
5132 if (!validateMIMGDim(Inst, Operands)) {
5133 Error(IDLoc, "missing dim operand");
5134 return false;
5135 }
5136 if (!validateMIMGMSAA(Inst)) {
5137 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5138 "invalid dim; must be MSAA type");
5139 return false;
5140 }
5141 if (!validateMIMGDataSize(Inst, IDLoc)) {
5142 return false;
5143 }
5144 if (!validateMIMGAddrSize(Inst, IDLoc))
5145 return false;
5146 if (!validateMIMGAtomicDMask(Inst)) {
5147 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5148 "invalid atomic image dmask");
5149 return false;
5150 }
5151 if (!validateMIMGGatherDMask(Inst)) {
5152 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5153 "invalid image_gather dmask: only one bit must be set");
5154 return false;
5155 }
5156 if (!validateMovrels(Inst, Operands)) {
5157 return false;
5158 }
5159 if (!validateOffset(Inst, Operands)) {
5160 return false;
5161 }
5162 if (!validateMAIAccWrite(Inst, Operands)) {
5163 return false;
5164 }
5165 if (!validateMAISrc2(Inst, Operands)) {
5166 return false;
5167 }
5168 if (!validateMFMA(Inst, Operands)) {
5169 return false;
5170 }
5171 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5172 return false;
5173 }
5174
5175 if (!validateAGPRLdSt(Inst)) {
5176 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5177 ? "invalid register class: data and dst should be all VGPR or AGPR"
5178 : "invalid register class: agpr loads and stores not supported on this GPU"
5179 );
5180 return false;
5181 }
5182 if (!validateVGPRAlign(Inst)) {
5183 Error(IDLoc,
5184 "invalid register class: vgpr tuples must be 64 bit aligned");
5185 return false;
5186 }
5187 if (!validateDS(Inst, Operands)) {
5188 return false;
5189 }
5190
5191 if (!validateBLGP(Inst, Operands)) {
5192 return false;
5193 }
5194
5195 if (!validateDivScale(Inst)) {
5196 Error(IDLoc, "ABS not allowed in VOP3B instructions");
5197 return false;
5198 }
5199 if (!validateWaitCnt(Inst, Operands)) {
5200 return false;
5201 }
5202 if (!validateExeczVcczOperands(Operands)) {
5203 return false;
5204 }
5205 if (!validateTFE(Inst, Operands)) {
5206 return false;
5207 }
5208
5209 return true;
5210}
5211
5213 const FeatureBitset &FBS,
5214 unsigned VariantID =