LLVM 19.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDKernelCodeT.h"
13#include "SIDefines.h"
14#include "SIInstrInfo.h"
15#include "SIRegisterInfo.h"
20#include "llvm/ADT/APFloat.h"
22#include "llvm/ADT/StringSet.h"
23#include "llvm/ADT/Twine.h"
26#include "llvm/MC/MCAsmInfo.h"
27#include "llvm/MC/MCContext.h"
28#include "llvm/MC/MCExpr.h"
29#include "llvm/MC/MCInst.h"
30#include "llvm/MC/MCInstrDesc.h"
35#include "llvm/MC/MCSymbol.h"
42#include <optional>
43
44using namespace llvm;
45using namespace llvm::AMDGPU;
46using namespace llvm::amdhsa;
47
48namespace {
49
50class AMDGPUAsmParser;
51
52enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
53
54//===----------------------------------------------------------------------===//
55// Operand
56//===----------------------------------------------------------------------===//
57
58class AMDGPUOperand : public MCParsedAsmOperand {
59 enum KindTy {
60 Token,
61 Immediate,
64 } Kind;
65
66 SMLoc StartLoc, EndLoc;
67 const AMDGPUAsmParser *AsmParser;
68
69public:
70 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
71 : Kind(Kind_), AsmParser(AsmParser_) {}
72
73 using Ptr = std::unique_ptr<AMDGPUOperand>;
74
75 struct Modifiers {
76 bool Abs = false;
77 bool Neg = false;
78 bool Sext = false;
79 bool Lit = false;
80
81 bool hasFPModifiers() const { return Abs || Neg; }
82 bool hasIntModifiers() const { return Sext; }
83 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
84
85 int64_t getFPModifiersOperand() const {
86 int64_t Operand = 0;
87 Operand |= Abs ? SISrcMods::ABS : 0u;
88 Operand |= Neg ? SISrcMods::NEG : 0u;
89 return Operand;
90 }
91
92 int64_t getIntModifiersOperand() const {
93 int64_t Operand = 0;
94 Operand |= Sext ? SISrcMods::SEXT : 0u;
95 return Operand;
96 }
97
98 int64_t getModifiersOperand() const {
99 assert(!(hasFPModifiers() && hasIntModifiers())
100 && "fp and int modifiers should not be used simultaneously");
101 if (hasFPModifiers()) {
102 return getFPModifiersOperand();
103 } else if (hasIntModifiers()) {
104 return getIntModifiersOperand();
105 } else {
106 return 0;
107 }
108 }
109
110 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
111 };
112
113 enum ImmTy {
114 ImmTyNone,
115 ImmTyGDS,
116 ImmTyLDS,
117 ImmTyOffen,
118 ImmTyIdxen,
119 ImmTyAddr64,
120 ImmTyOffset,
121 ImmTyInstOffset,
122 ImmTyOffset0,
123 ImmTyOffset1,
124 ImmTySMEMOffsetMod,
125 ImmTyCPol,
126 ImmTyTFE,
127 ImmTyD16,
128 ImmTyClampSI,
129 ImmTyOModSI,
130 ImmTySDWADstSel,
131 ImmTySDWASrc0Sel,
132 ImmTySDWASrc1Sel,
133 ImmTySDWADstUnused,
134 ImmTyDMask,
135 ImmTyDim,
136 ImmTyUNorm,
137 ImmTyDA,
138 ImmTyR128A16,
139 ImmTyA16,
140 ImmTyLWE,
141 ImmTyExpTgt,
142 ImmTyExpCompr,
143 ImmTyExpVM,
144 ImmTyFORMAT,
145 ImmTyHwreg,
146 ImmTyOff,
147 ImmTySendMsg,
148 ImmTyInterpSlot,
149 ImmTyInterpAttr,
150 ImmTyInterpAttrChan,
151 ImmTyOpSel,
152 ImmTyOpSelHi,
153 ImmTyNegLo,
154 ImmTyNegHi,
155 ImmTyIndexKey8bit,
156 ImmTyIndexKey16bit,
157 ImmTyDPP8,
158 ImmTyDppCtrl,
159 ImmTyDppRowMask,
160 ImmTyDppBankMask,
161 ImmTyDppBoundCtrl,
162 ImmTyDppFI,
163 ImmTySwizzle,
164 ImmTyGprIdxMode,
165 ImmTyHigh,
166 ImmTyBLGP,
167 ImmTyCBSZ,
168 ImmTyABID,
169 ImmTyEndpgm,
170 ImmTyWaitVDST,
171 ImmTyWaitEXP,
172 ImmTyWaitVAVDst,
173 ImmTyWaitVMVSrc,
174 };
175
176 // Immediate operand kind.
177 // It helps to identify the location of an offending operand after an error.
178 // Note that regular literals and mandatory literals (KImm) must be handled
179 // differently. When looking for an offending operand, we should usually
180 // ignore mandatory literals because they are part of the instruction and
181 // cannot be changed. Report location of mandatory operands only for VOPD,
182 // when both OpX and OpY have a KImm and there are no other literals.
183 enum ImmKindTy {
184 ImmKindTyNone,
185 ImmKindTyLiteral,
186 ImmKindTyMandatoryLiteral,
187 ImmKindTyConst,
188 };
189
190private:
191 struct TokOp {
192 const char *Data;
193 unsigned Length;
194 };
195
196 struct ImmOp {
197 int64_t Val;
198 ImmTy Type;
199 bool IsFPImm;
200 mutable ImmKindTy Kind;
201 Modifiers Mods;
202 };
203
204 struct RegOp {
205 unsigned RegNo;
206 Modifiers Mods;
207 };
208
209 union {
210 TokOp Tok;
211 ImmOp Imm;
212 RegOp Reg;
213 const MCExpr *Expr;
214 };
215
216public:
217 bool isToken() const override { return Kind == Token; }
218
219 bool isSymbolRefExpr() const {
220 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
221 }
222
223 bool isImm() const override {
224 return Kind == Immediate;
225 }
226
227 void setImmKindNone() const {
228 assert(isImm());
229 Imm.Kind = ImmKindTyNone;
230 }
231
232 void setImmKindLiteral() const {
233 assert(isImm());
234 Imm.Kind = ImmKindTyLiteral;
235 }
236
237 void setImmKindMandatoryLiteral() const {
238 assert(isImm());
239 Imm.Kind = ImmKindTyMandatoryLiteral;
240 }
241
242 void setImmKindConst() const {
243 assert(isImm());
244 Imm.Kind = ImmKindTyConst;
245 }
246
247 bool IsImmKindLiteral() const {
248 return isImm() && Imm.Kind == ImmKindTyLiteral;
249 }
250
251 bool IsImmKindMandatoryLiteral() const {
252 return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral;
253 }
254
255 bool isImmKindConst() const {
256 return isImm() && Imm.Kind == ImmKindTyConst;
257 }
258
259 bool isInlinableImm(MVT type) const;
260 bool isLiteralImm(MVT type) const;
261
262 bool isRegKind() const {
263 return Kind == Register;
264 }
265
266 bool isReg() const override {
267 return isRegKind() && !hasModifiers();
268 }
269
270 bool isRegOrInline(unsigned RCID, MVT type) const {
271 return isRegClass(RCID) || isInlinableImm(type);
272 }
273
274 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
275 return isRegOrInline(RCID, type) || isLiteralImm(type);
276 }
277
278 bool isRegOrImmWithInt16InputMods() const {
279 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
280 }
281
282 bool isRegOrImmWithIntT16InputMods() const {
283 return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::i16);
284 }
285
286 bool isRegOrImmWithInt32InputMods() const {
287 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
288 }
289
290 bool isRegOrInlineImmWithInt16InputMods() const {
291 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
292 }
293
294 bool isRegOrInlineImmWithInt32InputMods() const {
295 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
296 }
297
298 bool isRegOrImmWithInt64InputMods() const {
299 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
300 }
301
302 bool isRegOrImmWithFP16InputMods() const {
303 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
304 }
305
306 bool isRegOrImmWithFPT16InputMods() const {
307 return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::f16);
308 }
309
310 bool isRegOrImmWithFP32InputMods() const {
311 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
312 }
313
314 bool isRegOrImmWithFP64InputMods() const {
315 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
316 }
317
318 template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
319 return isRegOrInline(
320 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
321 }
322
323 bool isRegOrInlineImmWithFP32InputMods() const {
324 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
325 }
326
327 bool isPackedFP16InputMods() const {
328 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
329 }
330
331 bool isVReg() const {
332 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
333 isRegClass(AMDGPU::VReg_64RegClassID) ||
334 isRegClass(AMDGPU::VReg_96RegClassID) ||
335 isRegClass(AMDGPU::VReg_128RegClassID) ||
336 isRegClass(AMDGPU::VReg_160RegClassID) ||
337 isRegClass(AMDGPU::VReg_192RegClassID) ||
338 isRegClass(AMDGPU::VReg_256RegClassID) ||
339 isRegClass(AMDGPU::VReg_512RegClassID) ||
340 isRegClass(AMDGPU::VReg_1024RegClassID);
341 }
342
343 bool isVReg32() const {
344 return isRegClass(AMDGPU::VGPR_32RegClassID);
345 }
346
347 bool isVReg32OrOff() const {
348 return isOff() || isVReg32();
349 }
350
351 bool isNull() const {
352 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
353 }
354
355 bool isVRegWithInputMods() const;
356 template <bool IsFake16> bool isT16VRegWithInputMods() const;
357
358 bool isSDWAOperand(MVT type) const;
359 bool isSDWAFP16Operand() const;
360 bool isSDWAFP32Operand() const;
361 bool isSDWAInt16Operand() const;
362 bool isSDWAInt32Operand() const;
363
364 bool isImmTy(ImmTy ImmT) const {
365 return isImm() && Imm.Type == ImmT;
366 }
367
368 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
369
370 bool isImmLiteral() const { return isImmTy(ImmTyNone); }
371
372 bool isImmModifier() const {
373 return isImm() && Imm.Type != ImmTyNone;
374 }
375
376 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
377 bool isDMask() const { return isImmTy(ImmTyDMask); }
378 bool isDim() const { return isImmTy(ImmTyDim); }
379 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
380 bool isOff() const { return isImmTy(ImmTyOff); }
381 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
382 bool isOffen() const { return isImmTy(ImmTyOffen); }
383 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
384 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
385 bool isOffset() const { return isImmTy(ImmTyOffset); }
386 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
387 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
388 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
389 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
390 bool isGDS() const { return isImmTy(ImmTyGDS); }
391 bool isLDS() const { return isImmTy(ImmTyLDS); }
392 bool isCPol() const { return isImmTy(ImmTyCPol); }
393 bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
394 bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
395 bool isTFE() const { return isImmTy(ImmTyTFE); }
396 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
397 bool isDppBankMask() const { return isImmTy(ImmTyDppBankMask); }
398 bool isDppRowMask() const { return isImmTy(ImmTyDppRowMask); }
399 bool isDppBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
400 bool isDppFI() const { return isImmTy(ImmTyDppFI); }
401 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
402 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
403 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
404 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
405 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
406 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
407 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
408 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
409 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
410 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
411 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
412
413 bool isRegOrImm() const {
414 return isReg() || isImm();
415 }
416
417 bool isRegClass(unsigned RCID) const;
418
419 bool isInlineValue() const;
420
421 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
422 return isRegOrInline(RCID, type) && !hasModifiers();
423 }
424
425 bool isSCSrcB16() const {
426 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
427 }
428
429 bool isSCSrcV2B16() const {
430 return isSCSrcB16();
431 }
432
433 bool isSCSrc_b32() const {
434 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
435 }
436
437 bool isSCSrc_b64() const {
438 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
439 }
440
441 bool isBoolReg() const;
442
443 bool isSCSrcF16() const {
444 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
445 }
446
447 bool isSCSrcV2F16() const {
448 return isSCSrcF16();
449 }
450
451 bool isSCSrcF32() const {
452 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
453 }
454
455 bool isSCSrcF64() const {
456 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
457 }
458
459 bool isSSrc_b32() const {
460 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
461 }
462
463 bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(MVT::i16); }
464
465 bool isSSrcV2B16() const {
466 llvm_unreachable("cannot happen");
467 return isSSrc_b16();
468 }
469
470 bool isSSrc_b64() const {
471 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
472 // See isVSrc64().
473 return isSCSrc_b64() || isLiteralImm(MVT::i64);
474 }
475
476 bool isSSrc_f32() const {
477 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
478 }
479
480 bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }
481
482 bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }
483
484 bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }
485
486 bool isSSrcV2F16() const {
487 llvm_unreachable("cannot happen");
488 return isSSrc_f16();
489 }
490
491 bool isSSrcV2FP32() const {
492 llvm_unreachable("cannot happen");
493 return isSSrc_f32();
494 }
495
496 bool isSCSrcV2FP32() const {
497 llvm_unreachable("cannot happen");
498 return isSCSrcF32();
499 }
500
501 bool isSSrcV2INT32() const {
502 llvm_unreachable("cannot happen");
503 return isSSrc_b32();
504 }
505
506 bool isSCSrcV2INT32() const {
507 llvm_unreachable("cannot happen");
508 return isSCSrc_b32();
509 }
510
511 bool isSSrcOrLds_b32() const {
512 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
513 isLiteralImm(MVT::i32) || isExpr();
514 }
515
516 bool isVCSrc_b32() const {
517 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
518 }
519
520 bool isVCSrcB64() const {
521 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
522 }
523
524 bool isVCSrcTB16() const {
525 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
526 }
527
528 bool isVCSrcTB16_Lo128() const {
529 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
530 }
531
532 bool isVCSrcFake16B16_Lo128() const {
533 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
534 }
535
536 bool isVCSrc_b16() const {
537 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
538 }
539
540 bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
541
542 bool isVCSrc_f32() const {
543 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
544 }
545
546 bool isVCSrcF64() const {
547 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
548 }
549
550 bool isVCSrcTBF16() const {
551 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
552 }
553
554 bool isVCSrcTF16() const {
555 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
556 }
557
558 bool isVCSrcTBF16_Lo128() const {
559 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
560 }
561
562 bool isVCSrcTF16_Lo128() const {
563 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
564 }
565
566 bool isVCSrcFake16BF16_Lo128() const {
567 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
568 }
569
570 bool isVCSrcFake16F16_Lo128() const {
571 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
572 }
573
574 bool isVCSrc_bf16() const {
575 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
576 }
577
578 bool isVCSrc_f16() const {
579 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
580 }
581
582 bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
583
584 bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
585
586 bool isVSrc_b32() const {
587 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
588 }
589
590 bool isVSrc_b64() const { return isVCSrcF64() || isLiteralImm(MVT::i64); }
591
592 bool isVSrcT_b16() const { return isVCSrcTB16() || isLiteralImm(MVT::i16); }
593
594 bool isVSrcT_b16_Lo128() const {
595 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
596 }
597
598 bool isVSrcFake16_b16_Lo128() const {
599 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
600 }
601
602 bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(MVT::i16); }
603
604 bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
605
606 bool isVCSrcV2FP32() const {
607 return isVCSrcF64();
608 }
609
610 bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
611
612 bool isVCSrcV2INT32() const {
613 return isVCSrcB64();
614 }
615
616 bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
617
618 bool isVSrc_f32() const {
619 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
620 }
621
622 bool isVSrc_f64() const { return isVCSrcF64() || isLiteralImm(MVT::f64); }
623
624 bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
625
626 bool isVSrcT_f16() const { return isVCSrcTF16() || isLiteralImm(MVT::f16); }
627
628 bool isVSrcT_bf16_Lo128() const {
629 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
630 }
631
632 bool isVSrcT_f16_Lo128() const {
633 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
634 }
635
636 bool isVSrcFake16_bf16_Lo128() const {
637 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
638 }
639
640 bool isVSrcFake16_f16_Lo128() const {
641 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
642 }
643
644 bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
645
646 bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); }
647
648 bool isVSrc_v2bf16() const {
649 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
650 }
651
652 bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
653
654 bool isVISrcB32() const {
655 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
656 }
657
658 bool isVISrcB16() const {
659 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
660 }
661
662 bool isVISrcV2B16() const {
663 return isVISrcB16();
664 }
665
666 bool isVISrcF32() const {
667 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
668 }
669
670 bool isVISrcF16() const {
671 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
672 }
673
674 bool isVISrcV2F16() const {
675 return isVISrcF16() || isVISrcB32();
676 }
677
678 bool isVISrc_64_bf16() const {
679 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
680 }
681
682 bool isVISrc_64_f16() const {
683 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
684 }
685
686 bool isVISrc_64_b32() const {
687 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
688 }
689
690 bool isVISrc_64B64() const {
691 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
692 }
693
694 bool isVISrc_64_f64() const {
695 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
696 }
697
698 bool isVISrc_64V2FP32() const {
699 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
700 }
701
702 bool isVISrc_64V2INT32() const {
703 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
704 }
705
706 bool isVISrc_256_b32() const {
707 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
708 }
709
710 bool isVISrc_256_f32() const {
711 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
712 }
713
714 bool isVISrc_256B64() const {
715 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
716 }
717
718 bool isVISrc_256_f64() const {
719 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
720 }
721
722 bool isVISrc_128B16() const {
723 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
724 }
725
726 bool isVISrc_128V2B16() const {
727 return isVISrc_128B16();
728 }
729
730 bool isVISrc_128_b32() const {
731 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
732 }
733
734 bool isVISrc_128_f32() const {
735 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
736 }
737
738 bool isVISrc_256V2FP32() const {
739 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
740 }
741
742 bool isVISrc_256V2INT32() const {
743 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
744 }
745
746 bool isVISrc_512_b32() const {
747 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
748 }
749
750 bool isVISrc_512B16() const {
751 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
752 }
753
754 bool isVISrc_512V2B16() const {
755 return isVISrc_512B16();
756 }
757
758 bool isVISrc_512_f32() const {
759 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
760 }
761
762 bool isVISrc_512F16() const {
763 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
764 }
765
766 bool isVISrc_512V2F16() const {
767 return isVISrc_512F16() || isVISrc_512_b32();
768 }
769
770 bool isVISrc_1024_b32() const {
771 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
772 }
773
774 bool isVISrc_1024B16() const {
775 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
776 }
777
778 bool isVISrc_1024V2B16() const {
779 return isVISrc_1024B16();
780 }
781
782 bool isVISrc_1024_f32() const {
783 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
784 }
785
786 bool isVISrc_1024F16() const {
787 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
788 }
789
790 bool isVISrc_1024V2F16() const {
791 return isVISrc_1024F16() || isVISrc_1024_b32();
792 }
793
794 bool isAISrcB32() const {
795 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
796 }
797
798 bool isAISrcB16() const {
799 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
800 }
801
802 bool isAISrcV2B16() const {
803 return isAISrcB16();
804 }
805
806 bool isAISrcF32() const {
807 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
808 }
809
810 bool isAISrcF16() const {
811 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
812 }
813
814 bool isAISrcV2F16() const {
815 return isAISrcF16() || isAISrcB32();
816 }
817
818 bool isAISrc_64B64() const {
819 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
820 }
821
822 bool isAISrc_64_f64() const {
823 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
824 }
825
826 bool isAISrc_128_b32() const {
827 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
828 }
829
830 bool isAISrc_128B16() const {
831 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
832 }
833
834 bool isAISrc_128V2B16() const {
835 return isAISrc_128B16();
836 }
837
838 bool isAISrc_128_f32() const {
839 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
840 }
841
842 bool isAISrc_128F16() const {
843 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
844 }
845
846 bool isAISrc_128V2F16() const {
847 return isAISrc_128F16() || isAISrc_128_b32();
848 }
849
850 bool isVISrc_128_bf16() const {
851 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
852 }
853
854 bool isVISrc_128_f16() const {
855 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
856 }
857
858 bool isVISrc_128V2F16() const {
859 return isVISrc_128_f16() || isVISrc_128_b32();
860 }
861
862 bool isAISrc_256B64() const {
863 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
864 }
865
866 bool isAISrc_256_f64() const {
867 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
868 }
869
870 bool isAISrc_512_b32() const {
871 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
872 }
873
874 bool isAISrc_512B16() const {
875 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
876 }
877
878 bool isAISrc_512V2B16() const {
879 return isAISrc_512B16();
880 }
881
882 bool isAISrc_512_f32() const {
883 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
884 }
885
886 bool isAISrc_512F16() const {
887 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
888 }
889
890 bool isAISrc_512V2F16() const {
891 return isAISrc_512F16() || isAISrc_512_b32();
892 }
893
894 bool isAISrc_1024_b32() const {
895 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
896 }
897
898 bool isAISrc_1024B16() const {
899 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
900 }
901
902 bool isAISrc_1024V2B16() const {
903 return isAISrc_1024B16();
904 }
905
906 bool isAISrc_1024_f32() const {
907 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
908 }
909
910 bool isAISrc_1024F16() const {
911 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
912 }
913
914 bool isAISrc_1024V2F16() const {
915 return isAISrc_1024F16() || isAISrc_1024_b32();
916 }
917
918 bool isKImmFP32() const {
919 return isLiteralImm(MVT::f32);
920 }
921
922 bool isKImmFP16() const {
923 return isLiteralImm(MVT::f16);
924 }
925
926 bool isMem() const override {
927 return false;
928 }
929
930 bool isExpr() const {
931 return Kind == Expression;
932 }
933
934 bool isSOPPBrTarget() const { return isExpr() || isImm(); }
935
936 bool isSWaitCnt() const;
937 bool isDepCtr() const;
938 bool isSDelayALU() const;
939 bool isHwreg() const;
940 bool isSendMsg() const;
941 bool isSplitBarrier() const;
942 bool isSwizzle() const;
943 bool isSMRDOffset8() const;
944 bool isSMEMOffset() const;
945 bool isSMRDLiteralOffset() const;
946 bool isDPP8() const;
947 bool isDPPCtrl() const;
948 bool isBLGP() const;
949 bool isCBSZ() const;
950 bool isABID() const;
951 bool isGPRIdxMode() const;
952 bool isS16Imm() const;
953 bool isU16Imm() const;
954 bool isEndpgm() const;
955 bool isWaitVDST() const;
956 bool isWaitEXP() const;
957 bool isWaitVAVDst() const;
958 bool isWaitVMVSrc() const;
959
960 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
961 return std::bind(P, *this);
962 }
963
964 StringRef getToken() const {
965 assert(isToken());
966 return StringRef(Tok.Data, Tok.Length);
967 }
968
969 int64_t getImm() const {
970 assert(isImm());
971 return Imm.Val;
972 }
973
974 void setImm(int64_t Val) {
975 assert(isImm());
976 Imm.Val = Val;
977 }
978
979 ImmTy getImmTy() const {
980 assert(isImm());
981 return Imm.Type;
982 }
983
984 unsigned getReg() const override {
985 assert(isRegKind());
986 return Reg.RegNo;
987 }
988
989 SMLoc getStartLoc() const override {
990 return StartLoc;
991 }
992
993 SMLoc getEndLoc() const override {
994 return EndLoc;
995 }
996
997 SMRange getLocRange() const {
998 return SMRange(StartLoc, EndLoc);
999 }
1000
1001 Modifiers getModifiers() const {
1002 assert(isRegKind() || isImmTy(ImmTyNone));
1003 return isRegKind() ? Reg.Mods : Imm.Mods;
1004 }
1005
1006 void setModifiers(Modifiers Mods) {
1007 assert(isRegKind() || isImmTy(ImmTyNone));
1008 if (isRegKind())
1009 Reg.Mods = Mods;
1010 else
1011 Imm.Mods = Mods;
1012 }
1013
1014 bool hasModifiers() const {
1015 return getModifiers().hasModifiers();
1016 }
1017
1018 bool hasFPModifiers() const {
1019 return getModifiers().hasFPModifiers();
1020 }
1021
1022 bool hasIntModifiers() const {
1023 return getModifiers().hasIntModifiers();
1024 }
1025
1026 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1027
1028 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1029
1030 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1031
1032 void addRegOperands(MCInst &Inst, unsigned N) const;
1033
1034 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1035 if (isRegKind())
1036 addRegOperands(Inst, N);
1037 else
1038 addImmOperands(Inst, N);
1039 }
1040
1041 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1042 Modifiers Mods = getModifiers();
1043 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1044 if (isRegKind()) {
1045 addRegOperands(Inst, N);
1046 } else {
1047 addImmOperands(Inst, N, false);
1048 }
1049 }
1050
1051 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1052 assert(!hasIntModifiers());
1053 addRegOrImmWithInputModsOperands(Inst, N);
1054 }
1055
1056 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1057 assert(!hasFPModifiers());
1058 addRegOrImmWithInputModsOperands(Inst, N);
1059 }
1060
1061 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1062 Modifiers Mods = getModifiers();
1063 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1064 assert(isRegKind());
1065 addRegOperands(Inst, N);
1066 }
1067
1068 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1069 assert(!hasIntModifiers());
1070 addRegWithInputModsOperands(Inst, N);
1071 }
1072
1073 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1074 assert(!hasFPModifiers());
1075 addRegWithInputModsOperands(Inst, N);
1076 }
1077
1078 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1079 // clang-format off
1080 switch (Type) {
1081 case ImmTyNone: OS << "None"; break;
1082 case ImmTyGDS: OS << "GDS"; break;
1083 case ImmTyLDS: OS << "LDS"; break;
1084 case ImmTyOffen: OS << "Offen"; break;
1085 case ImmTyIdxen: OS << "Idxen"; break;
1086 case ImmTyAddr64: OS << "Addr64"; break;
1087 case ImmTyOffset: OS << "Offset"; break;
1088 case ImmTyInstOffset: OS << "InstOffset"; break;
1089 case ImmTyOffset0: OS << "Offset0"; break;
1090 case ImmTyOffset1: OS << "Offset1"; break;
1091 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1092 case ImmTyCPol: OS << "CPol"; break;
1093 case ImmTyIndexKey8bit: OS << "index_key"; break;
1094 case ImmTyIndexKey16bit: OS << "index_key"; break;
1095 case ImmTyTFE: OS << "TFE"; break;
1096 case ImmTyD16: OS << "D16"; break;
1097 case ImmTyFORMAT: OS << "FORMAT"; break;
1098 case ImmTyClampSI: OS << "ClampSI"; break;
1099 case ImmTyOModSI: OS << "OModSI"; break;
1100 case ImmTyDPP8: OS << "DPP8"; break;
1101 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1102 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1103 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1104 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1105 case ImmTyDppFI: OS << "DppFI"; break;
1106 case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1107 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1108 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1109 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1110 case ImmTyDMask: OS << "DMask"; break;
1111 case ImmTyDim: OS << "Dim"; break;
1112 case ImmTyUNorm: OS << "UNorm"; break;
1113 case ImmTyDA: OS << "DA"; break;
1114 case ImmTyR128A16: OS << "R128A16"; break;
1115 case ImmTyA16: OS << "A16"; break;
1116 case ImmTyLWE: OS << "LWE"; break;
1117 case ImmTyOff: OS << "Off"; break;
1118 case ImmTyExpTgt: OS << "ExpTgt"; break;
1119 case ImmTyExpCompr: OS << "ExpCompr"; break;
1120 case ImmTyExpVM: OS << "ExpVM"; break;
1121 case ImmTyHwreg: OS << "Hwreg"; break;
1122 case ImmTySendMsg: OS << "SendMsg"; break;
1123 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1124 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1125 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1126 case ImmTyOpSel: OS << "OpSel"; break;
1127 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1128 case ImmTyNegLo: OS << "NegLo"; break;
1129 case ImmTyNegHi: OS << "NegHi"; break;
1130 case ImmTySwizzle: OS << "Swizzle"; break;
1131 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1132 case ImmTyHigh: OS << "High"; break;
1133 case ImmTyBLGP: OS << "BLGP"; break;
1134 case ImmTyCBSZ: OS << "CBSZ"; break;
1135 case ImmTyABID: OS << "ABID"; break;
1136 case ImmTyEndpgm: OS << "Endpgm"; break;
1137 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1138 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1139 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1140 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1141 }
1142 // clang-format on
1143 }
1144
1145 void print(raw_ostream &OS) const override {
1146 switch (Kind) {
1147 case Register:
1148 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1149 break;
1150 case Immediate:
1151 OS << '<' << getImm();
1152 if (getImmTy() != ImmTyNone) {
1153 OS << " type: "; printImmTy(OS, getImmTy());
1154 }
1155 OS << " mods: " << Imm.Mods << '>';
1156 break;
1157 case Token:
1158 OS << '\'' << getToken() << '\'';
1159 break;
1160 case Expression:
1161 OS << "<expr " << *Expr << '>';
1162 break;
1163 }
1164 }
1165
1166 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1167 int64_t Val, SMLoc Loc,
1168 ImmTy Type = ImmTyNone,
1169 bool IsFPImm = false) {
1170 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1171 Op->Imm.Val = Val;
1172 Op->Imm.IsFPImm = IsFPImm;
1173 Op->Imm.Kind = ImmKindTyNone;
1174 Op->Imm.Type = Type;
1175 Op->Imm.Mods = Modifiers();
1176 Op->StartLoc = Loc;
1177 Op->EndLoc = Loc;
1178 return Op;
1179 }
1180
1181 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1182 StringRef Str, SMLoc Loc,
1183 bool HasExplicitEncodingSize = true) {
1184 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1185 Res->Tok.Data = Str.data();
1186 Res->Tok.Length = Str.size();
1187 Res->StartLoc = Loc;
1188 Res->EndLoc = Loc;
1189 return Res;
1190 }
1191
1192 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1193 unsigned RegNo, SMLoc S,
1194 SMLoc E) {
1195 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1196 Op->Reg.RegNo = RegNo;
1197 Op->Reg.Mods = Modifiers();
1198 Op->StartLoc = S;
1199 Op->EndLoc = E;
1200 return Op;
1201 }
1202
1203 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1204 const class MCExpr *Expr, SMLoc S) {
1205 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1206 Op->Expr = Expr;
1207 Op->StartLoc = S;
1208 Op->EndLoc = S;
1209 return Op;
1210 }
1211};
1212
1213raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1214 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1215 return OS;
1216}
1217
1218//===----------------------------------------------------------------------===//
1219// AsmParser
1220//===----------------------------------------------------------------------===//
1221
1222// Holds info related to the current kernel, e.g. count of SGPRs used.
1223// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1224// .amdgpu_hsa_kernel or at EOF.
1225class KernelScopeInfo {
1226 int SgprIndexUnusedMin = -1;
1227 int VgprIndexUnusedMin = -1;
1228 int AgprIndexUnusedMin = -1;
1229 MCContext *Ctx = nullptr;
1230 MCSubtargetInfo const *MSTI = nullptr;
1231
1232 void usesSgprAt(int i) {
1233 if (i >= SgprIndexUnusedMin) {
1234 SgprIndexUnusedMin = ++i;
1235 if (Ctx) {
1236 MCSymbol* const Sym =
1237 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1238 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1239 }
1240 }
1241 }
1242
1243 void usesVgprAt(int i) {
1244 if (i >= VgprIndexUnusedMin) {
1245 VgprIndexUnusedMin = ++i;
1246 if (Ctx) {
1247 MCSymbol* const Sym =
1248 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1249 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1250 VgprIndexUnusedMin);
1251 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1252 }
1253 }
1254 }
1255
1256 void usesAgprAt(int i) {
1257 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1258 if (!hasMAIInsts(*MSTI))
1259 return;
1260
1261 if (i >= AgprIndexUnusedMin) {
1262 AgprIndexUnusedMin = ++i;
1263 if (Ctx) {
1264 MCSymbol* const Sym =
1265 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1266 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1267
1268 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1269 MCSymbol* const vSym =
1270 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1271 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1272 VgprIndexUnusedMin);
1273 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1274 }
1275 }
1276 }
1277
1278public:
1279 KernelScopeInfo() = default;
1280
1281 void initialize(MCContext &Context) {
1282 Ctx = &Context;
1283 MSTI = Ctx->getSubtargetInfo();
1284
1285 usesSgprAt(SgprIndexUnusedMin = -1);
1286 usesVgprAt(VgprIndexUnusedMin = -1);
1287 if (hasMAIInsts(*MSTI)) {
1288 usesAgprAt(AgprIndexUnusedMin = -1);
1289 }
1290 }
1291
1292 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1293 unsigned RegWidth) {
1294 switch (RegKind) {
1295 case IS_SGPR:
1296 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1297 break;
1298 case IS_AGPR:
1299 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1300 break;
1301 case IS_VGPR:
1302 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1303 break;
1304 default:
1305 break;
1306 }
1307 }
1308};
1309
1310class AMDGPUAsmParser : public MCTargetAsmParser {
1311 MCAsmParser &Parser;
1312
1313 unsigned ForcedEncodingSize = 0;
1314 bool ForcedDPP = false;
1315 bool ForcedSDWA = false;
1316 KernelScopeInfo KernelScope;
1317
1318 /// @name Auto-generated Match Functions
1319 /// {
1320
1321#define GET_ASSEMBLER_HEADER
1322#include "AMDGPUGenAsmMatcher.inc"
1323
1324 /// }
1325
1326private:
1327 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1328 bool OutOfRangeError(SMRange Range);
1329 /// Calculate VGPR/SGPR blocks required for given target, reserved
1330 /// registers, and user-specified NextFreeXGPR values.
1331 ///
1332 /// \param Features [in] Target features, used for bug corrections.
1333 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1334 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1335 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1336 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1337 /// descriptor field, if valid.
1338 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1339 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1340 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1341 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1342 /// \param VGPRBlocks [out] Result VGPR block count.
1343 /// \param SGPRBlocks [out] Result SGPR block count.
1344 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1345 bool FlatScrUsed, bool XNACKUsed,
1346 std::optional<bool> EnableWavefrontSize32,
1347 unsigned NextFreeVGPR, SMRange VGPRRange,
1348 unsigned NextFreeSGPR, SMRange SGPRRange,
1349 unsigned &VGPRBlocks, unsigned &SGPRBlocks);
1350 bool ParseDirectiveAMDGCNTarget();
1351 bool ParseDirectiveAMDHSACodeObjectVersion();
1352 bool ParseDirectiveAMDHSAKernel();
1353 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1354 bool ParseDirectiveAMDKernelCodeT();
1355 // TODO: Possibly make subtargetHasRegister const.
1356 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1357 bool ParseDirectiveAMDGPUHsaKernel();
1358
1359 bool ParseDirectiveISAVersion();
1360 bool ParseDirectiveHSAMetadata();
1361 bool ParseDirectivePALMetadataBegin();
1362 bool ParseDirectivePALMetadata();
1363 bool ParseDirectiveAMDGPULDS();
1364
1365 /// Common code to parse out a block of text (typically YAML) between start and
1366 /// end directives.
1367 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1368 const char *AssemblerDirectiveEnd,
1369 std::string &CollectString);
1370
1371 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1372 RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1373 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1374 unsigned &RegNum, unsigned &RegWidth,
1375 bool RestoreOnFailure = false);
1376 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1377 unsigned &RegNum, unsigned &RegWidth,
1379 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1380 unsigned &RegWidth,
1382 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1383 unsigned &RegWidth,
1385 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1386 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1387 bool ParseRegRange(unsigned& Num, unsigned& Width);
1388 unsigned getRegularReg(RegisterKind RegKind, unsigned RegNum, unsigned SubReg,
1389 unsigned RegWidth, SMLoc Loc);
1390
1391 bool isRegister();
1392 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1393 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1394 void initializeGprCountSymbol(RegisterKind RegKind);
1395 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1396 unsigned RegWidth);
1397 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1398 bool IsAtomic);
1399
1400public:
1401 enum AMDGPUMatchResultTy {
1402 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1403 };
1404 enum OperandMode {
1405 OperandMode_Default,
1406 OperandMode_NSA,
1407 };
1408
1409 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1410
1411 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1412 const MCInstrInfo &MII,
1413 const MCTargetOptions &Options)
1414 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1416
1417 if (getFeatureBits().none()) {
1418 // Set default features.
1419 copySTI().ToggleFeature("southern-islands");
1420 }
1421
1422 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1423
1424 {
1425 // TODO: make those pre-defined variables read-only.
1426 // Currently there is none suitable machinery in the core llvm-mc for this.
1427 // MCSymbol::isRedefinable is intended for another purpose, and
1428 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1430 MCContext &Ctx = getContext();
1431 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1432 MCSymbol *Sym =
1433 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1434 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1435 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1436 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1437 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1438 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1439 } else {
1440 MCSymbol *Sym =
1441 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1442 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1443 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1444 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1445 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1446 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1447 }
1448 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1449 initializeGprCountSymbol(IS_VGPR);
1450 initializeGprCountSymbol(IS_SGPR);
1451 } else
1452 KernelScope.initialize(getContext());
1453 }
1454 }
1455
1456 bool hasMIMG_R128() const {
1457 return AMDGPU::hasMIMG_R128(getSTI());
1458 }
1459
1460 bool hasPackedD16() const {
1461 return AMDGPU::hasPackedD16(getSTI());
1462 }
1463
1464 bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1465
1466 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1467
1468 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1469
1470 bool isSI() const {
1471 return AMDGPU::isSI(getSTI());
1472 }
1473
1474 bool isCI() const {
1475 return AMDGPU::isCI(getSTI());
1476 }
1477
1478 bool isVI() const {
1479 return AMDGPU::isVI(getSTI());
1480 }
1481
1482 bool isGFX9() const {
1483 return AMDGPU::isGFX9(getSTI());
1484 }
1485
1486 // TODO: isGFX90A is also true for GFX940. We need to clean it.
1487 bool isGFX90A() const {
1488 return AMDGPU::isGFX90A(getSTI());
1489 }
1490
1491 bool isGFX940() const {
1492 return AMDGPU::isGFX940(getSTI());
1493 }
1494
1495 bool isGFX9Plus() const {
1496 return AMDGPU::isGFX9Plus(getSTI());
1497 }
1498
1499 bool isGFX10() const {
1500 return AMDGPU::isGFX10(getSTI());
1501 }
1502
1503 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1504
1505 bool isGFX11() const {
1506 return AMDGPU::isGFX11(getSTI());
1507 }
1508
1509 bool isGFX11Plus() const {
1510 return AMDGPU::isGFX11Plus(getSTI());
1511 }
1512
1513 bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1514
1515 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1516
1517 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1518
1519 bool isGFX10_BEncoding() const {
1521 }
1522
1523 bool hasInv2PiInlineImm() const {
1524 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1525 }
1526
1527 bool hasFlatOffsets() const {
1528 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1529 }
1530
1531 bool hasArchitectedFlatScratch() const {
1532 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1533 }
1534
1535 bool hasSGPR102_SGPR103() const {
1536 return !isVI() && !isGFX9();
1537 }
1538
1539 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1540
1541 bool hasIntClamp() const {
1542 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1543 }
1544
1545 bool hasPartialNSAEncoding() const {
1546 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1547 }
1548
1549 unsigned getNSAMaxSize(bool HasSampler = false) const {
1550 return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1551 }
1552
1553 unsigned getMaxNumUserSGPRs() const {
1555 }
1556
1557 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1558
1559 AMDGPUTargetStreamer &getTargetStreamer() {
1561 return static_cast<AMDGPUTargetStreamer &>(TS);
1562 }
1563
1564 const MCRegisterInfo *getMRI() const {
1565 // We need this const_cast because for some reason getContext() is not const
1566 // in MCAsmParser.
1567 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1568 }
1569
1570 const MCInstrInfo *getMII() const {
1571 return &MII;
1572 }
1573
1574 const FeatureBitset &getFeatureBits() const {
1575 return getSTI().getFeatureBits();
1576 }
1577
1578 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1579 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1580 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1581
1582 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1583 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1584 bool isForcedDPP() const { return ForcedDPP; }
1585 bool isForcedSDWA() const { return ForcedSDWA; }
1586 ArrayRef<unsigned> getMatchedVariants() const;
1587 StringRef getMatchedVariantName() const;
1588
1589 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1590 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1591 bool RestoreOnFailure);
1592 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1594 SMLoc &EndLoc) override;
1595 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1597 unsigned Kind) override;
1598 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1601 bool MatchingInlineAsm) override;
1602 bool ParseDirective(AsmToken DirectiveID) override;
1603 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1604 OperandMode Mode = OperandMode_Default);
1605 StringRef parseMnemonicSuffix(StringRef Name);
1607 SMLoc NameLoc, OperandVector &Operands) override;
1608 //bool ProcessInstruction(MCInst &Inst);
1609
1611
1612 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1613
1615 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1616 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1617 std::function<bool(int64_t &)> ConvertResult = nullptr);
1618
1619 ParseStatus parseOperandArrayWithPrefix(
1620 const char *Prefix, OperandVector &Operands,
1621 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1622 bool (*ConvertResult)(int64_t &) = nullptr);
1623
1625 parseNamedBit(StringRef Name, OperandVector &Operands,
1626 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1627 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1629 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1630 ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1631 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1632 SMLoc &StringLoc);
1633
1634 bool isModifier();
1635 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1636 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1637 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1638 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1639 bool parseSP3NegModifier();
1640 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1641 bool HasLit = false);
1643 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1644 bool HasLit = false);
1645 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1646 bool AllowImm = true);
1647 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1648 bool AllowImm = true);
1649 ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1650 ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1651 ParseStatus parseVReg32OrOff(OperandVector &Operands);
1652 ParseStatus tryParseIndexKey(OperandVector &Operands,
1653 AMDGPUOperand::ImmTy ImmTy);
1654 ParseStatus parseIndexKey8bit(OperandVector &Operands);
1655 ParseStatus parseIndexKey16bit(OperandVector &Operands);
1656
1657 ParseStatus parseDfmtNfmt(int64_t &Format);
1658 ParseStatus parseUfmt(int64_t &Format);
1659 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1660 int64_t &Format);
1661 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1662 int64_t &Format);
1663 ParseStatus parseFORMAT(OperandVector &Operands);
1664 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1665 ParseStatus parseNumericFormat(int64_t &Format);
1666 ParseStatus parseFlatOffset(OperandVector &Operands);
1667 ParseStatus parseR128A16(OperandVector &Operands);
1669 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1670 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1671
1672 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1673
1674 bool parseCnt(int64_t &IntVal);
1675 ParseStatus parseSWaitCnt(OperandVector &Operands);
1676
1677 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1678 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1679 ParseStatus parseDepCtr(OperandVector &Operands);
1680
1681 bool parseDelay(int64_t &Delay);
1682 ParseStatus parseSDelayALU(OperandVector &Operands);
1683
1684 ParseStatus parseHwreg(OperandVector &Operands);
1685
1686private:
1687 struct OperandInfoTy {
1688 SMLoc Loc;
1689 int64_t Val;
1690 bool IsSymbolic = false;
1691 bool IsDefined = false;
1692
1693 OperandInfoTy(int64_t Val) : Val(Val) {}
1694 };
1695
1696 struct StructuredOpField : OperandInfoTy {
1699 unsigned Width;
1700 bool IsDefined = false;
1701
1702 StructuredOpField(StringLiteral Id, StringLiteral Desc, unsigned Width,
1703 int64_t Default)
1704 : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {}
1705 virtual ~StructuredOpField() = default;
1706
1707 bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1708 Parser.Error(Loc, "invalid " + Desc + ": " + Err);
1709 return false;
1710 }
1711
1712 virtual bool validate(AMDGPUAsmParser &Parser) const {
1713 if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1714 return Error(Parser, "not supported on this GPU");
1715 if (!isUIntN(Width, Val))
1716 return Error(Parser, "only " + Twine(Width) + "-bit values are legal");
1717 return true;
1718 }
1719 };
1720
1721 ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1722 bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1723
1724 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1725 bool validateSendMsg(const OperandInfoTy &Msg,
1726 const OperandInfoTy &Op,
1727 const OperandInfoTy &Stream);
1728
1729 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1730 OperandInfoTy &Width);
1731
1732 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1733 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1734 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1735
1736 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1737 const OperandVector &Operands) const;
1738 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1739 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1740 SMLoc getLitLoc(const OperandVector &Operands,
1741 bool SearchMandatoryLiterals = false) const;
1742 SMLoc getMandatoryLitLoc(const OperandVector &Operands) const;
1743 SMLoc getConstLoc(const OperandVector &Operands) const;
1744 SMLoc getInstLoc(const OperandVector &Operands) const;
1745
1746 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1747 bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1748 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1749 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1750 bool validateSOPLiteral(const MCInst &Inst) const;
1751 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1752 bool validateVOPDRegBankConstraints(const MCInst &Inst,
1753 const OperandVector &Operands);
1754 bool validateIntClampSupported(const MCInst &Inst);
1755 bool validateMIMGAtomicDMask(const MCInst &Inst);
1756 bool validateMIMGGatherDMask(const MCInst &Inst);
1757 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1758 bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);
1759 bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc);
1760 bool validateMIMGD16(const MCInst &Inst);
1761 bool validateMIMGMSAA(const MCInst &Inst);
1762 bool validateOpSel(const MCInst &Inst);
1763 bool validateNeg(const MCInst &Inst, int OpName);
1764 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1765 bool validateVccOperand(unsigned Reg) const;
1766 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1767 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1768 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1769 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1770 bool validateAGPRLdSt(const MCInst &Inst) const;
1771 bool validateVGPRAlign(const MCInst &Inst) const;
1772 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1773 bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1774 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1775 bool validateDivScale(const MCInst &Inst);
1776 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1777 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1778 const SMLoc &IDLoc);
1779 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1780 const unsigned CPol);
1781 bool validateExeczVcczOperands(const OperandVector &Operands);
1782 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1783 std::optional<StringRef> validateLdsDirect(const MCInst &Inst);
1784 unsigned getConstantBusLimit(unsigned Opcode) const;
1785 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1786 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1787 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1788
1789 bool isSupportedMnemo(StringRef Mnemo,
1790 const FeatureBitset &FBS);
1791 bool isSupportedMnemo(StringRef Mnemo,
1792 const FeatureBitset &FBS,
1793 ArrayRef<unsigned> Variants);
1794 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1795
1796 bool isId(const StringRef Id) const;
1797 bool isId(const AsmToken &Token, const StringRef Id) const;
1798 bool isToken(const AsmToken::TokenKind Kind) const;
1799 StringRef getId() const;
1800 bool trySkipId(const StringRef Id);
1801 bool trySkipId(const StringRef Pref, const StringRef Id);
1802 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1803 bool trySkipToken(const AsmToken::TokenKind Kind);
1804 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1805 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1806 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1807
1808 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1809 AsmToken::TokenKind getTokenKind() const;
1810 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1811 bool parseExpr(OperandVector &Operands);
1812 StringRef getTokenStr() const;
1813 AsmToken peekToken(bool ShouldSkipSpace = true);
1814 AsmToken getToken() const;
1815 SMLoc getLoc() const;
1816 void lex();
1817
1818public:
1819 void onBeginOfFile() override;
1820 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1821
1822 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1823
1824 ParseStatus parseExpTgt(OperandVector &Operands);
1825 ParseStatus parseSendMsg(OperandVector &Operands);
1826 ParseStatus parseInterpSlot(OperandVector &Operands);
1827 ParseStatus parseInterpAttr(OperandVector &Operands);
1828 ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1829 ParseStatus parseBoolReg(OperandVector &Operands);
1830
1831 bool parseSwizzleOperand(int64_t &Op,
1832 const unsigned MinVal,
1833 const unsigned MaxVal,
1834 const StringRef ErrMsg,
1835 SMLoc &Loc);
1836 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1837 const unsigned MinVal,
1838 const unsigned MaxVal,
1839 const StringRef ErrMsg);
1840 ParseStatus parseSwizzle(OperandVector &Operands);
1841 bool parseSwizzleOffset(int64_t &Imm);
1842 bool parseSwizzleMacro(int64_t &Imm);
1843 bool parseSwizzleQuadPerm(int64_t &Imm);
1844 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1845 bool parseSwizzleBroadcast(int64_t &Imm);
1846 bool parseSwizzleSwap(int64_t &Imm);
1847 bool parseSwizzleReverse(int64_t &Imm);
1848
1849 ParseStatus parseGPRIdxMode(OperandVector &Operands);
1850 int64_t parseGPRIdxMacro();
1851
1852 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1853 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1854
1855 ParseStatus parseOModSI(OperandVector &Operands);
1856
1857 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1858 OptionalImmIndexMap &OptionalIdx);
1859 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1860 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1861 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1862 void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
1863
1864 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1865 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1866 OptionalImmIndexMap &OptionalIdx);
1867 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1868 OptionalImmIndexMap &OptionalIdx);
1869
1870 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1871 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1872
1873 bool parseDimId(unsigned &Encoding);
1875 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1877 ParseStatus parseDPPCtrl(OperandVector &Operands);
1878 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1879 int64_t parseDPPCtrlSel(StringRef Ctrl);
1880 int64_t parseDPPCtrlPerm();
1881 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1882 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1883 cvtDPP(Inst, Operands, true);
1884 }
1885 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1886 bool IsDPP8 = false);
1887 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1888 cvtVOP3DPP(Inst, Operands, true);
1889 }
1890
1891 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
1892 AMDGPUOperand::ImmTy Type);
1893 ParseStatus parseSDWADstUnused(OperandVector &Operands);
1894 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1895 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1896 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1897 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1898 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1899 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1900 uint64_t BasicInstType,
1901 bool SkipDstVcc = false,
1902 bool SkipSrcVcc = false);
1903
1904 ParseStatus parseEndpgm(OperandVector &Operands);
1905
1907};
1908
1909} // end anonymous namespace
1910
1911// May be called with integer type with equivalent bitwidth.
1912static const fltSemantics *getFltSemantics(unsigned Size) {
1913 switch (Size) {
1914 case 4:
1915 return &APFloat::IEEEsingle();
1916 case 8:
1917 return &APFloat::IEEEdouble();
1918 case 2:
1919 return &APFloat::IEEEhalf();
1920 default:
1921 llvm_unreachable("unsupported fp type");
1922 }
1923}
1924
1926 return getFltSemantics(VT.getSizeInBits() / 8);
1927}
1928
1930 switch (OperandType) {
1931 // When floating-point immediate is used as operand of type i16, the 32-bit
1932 // representation of the constant truncated to the 16 LSBs should be used.
1952 return &APFloat::IEEEsingle();
1958 return &APFloat::IEEEdouble();
1967 return &APFloat::IEEEhalf();
1975 return &APFloat::BFloat();
1976 default:
1977 llvm_unreachable("unsupported fp type");
1978 }
1979}
1980
1981//===----------------------------------------------------------------------===//
1982// Operand
1983//===----------------------------------------------------------------------===//
1984
1985static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1986 bool Lost;
1987
1988 // Convert literal to single precision
1990 APFloat::rmNearestTiesToEven,
1991 &Lost);
1992 // We allow precision lost but not overflow or underflow
1993 if (Status != APFloat::opOK &&
1994 Lost &&
1995 ((Status & APFloat::opOverflow) != 0 ||
1996 (Status & APFloat::opUnderflow) != 0)) {
1997 return false;
1998 }
1999
2000 return true;
2001}
2002
2003static bool isSafeTruncation(int64_t Val, unsigned Size) {
2004 return isUIntN(Size, Val) || isIntN(Size, Val);
2005}
2006
2007static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
2008 if (VT.getScalarType() == MVT::i16)
2009 return isInlinableLiteral32(Val, HasInv2Pi);
2010
2011 if (VT.getScalarType() == MVT::f16)
2012 return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
2013
2014 assert(VT.getScalarType() == MVT::bf16);
2015
2016 return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
2017}
2018
2019bool AMDGPUOperand::isInlinableImm(MVT type) const {
2020
2021 // This is a hack to enable named inline values like
2022 // shared_base with both 32-bit and 64-bit operands.
2023 // Note that these values are defined as
2024 // 32-bit operands only.
2025 if (isInlineValue()) {
2026 return true;
2027 }
2028
2029 if (!isImmTy(ImmTyNone)) {
2030 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2031 return false;
2032 }
2033 // TODO: We should avoid using host float here. It would be better to
2034 // check the float bit values which is what a few other places do.
2035 // We've had bot failures before due to weird NaN support on mips hosts.
2036
2037 APInt Literal(64, Imm.Val);
2038
2039 if (Imm.IsFPImm) { // We got fp literal token
2040 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2042 AsmParser->hasInv2PiInlineImm());
2043 }
2044
2045 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2046 if (!canLosslesslyConvertToFPType(FPLiteral, type))
2047 return false;
2048
2049 if (type.getScalarSizeInBits() == 16) {
2050 bool Lost = false;
2051 switch (type.getScalarType().SimpleTy) {
2052 default:
2053 llvm_unreachable("unknown 16-bit type");
2054 case MVT::bf16:
2055 FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven,
2056 &Lost);
2057 break;
2058 case MVT::f16:
2059 FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven,
2060 &Lost);
2061 break;
2062 case MVT::i16:
2063 FPLiteral.convert(APFloatBase::IEEEsingle(),
2064 APFloat::rmNearestTiesToEven, &Lost);
2065 break;
2066 }
2067 // We need to use 32-bit representation here because when a floating-point
2068 // inline constant is used as an i16 operand, its 32-bit representation
2069 // representation will be used. We will need the 32-bit value to check if
2070 // it is FP inline constant.
2071 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2072 return isInlineableLiteralOp16(ImmVal, type,
2073 AsmParser->hasInv2PiInlineImm());
2074 }
2075
2076 // Check if single precision literal is inlinable
2078 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2079 AsmParser->hasInv2PiInlineImm());
2080 }
2081
2082 // We got int literal token.
2083 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2085 AsmParser->hasInv2PiInlineImm());
2086 }
2087
2088 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
2089 return false;
2090 }
2091
2092 if (type.getScalarSizeInBits() == 16) {
2094 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
2095 type, AsmParser->hasInv2PiInlineImm());
2096 }
2097
2099 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
2100 AsmParser->hasInv2PiInlineImm());
2101}
2102
2103bool AMDGPUOperand::isLiteralImm(MVT type) const {
2104 // Check that this immediate can be added as literal
2105 if (!isImmTy(ImmTyNone)) {
2106 return false;
2107 }
2108
2109 if (!Imm.IsFPImm) {
2110 // We got int literal token.
2111
2112 if (type == MVT::f64 && hasFPModifiers()) {
2113 // Cannot apply fp modifiers to int literals preserving the same semantics
2114 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2115 // disable these cases.
2116 return false;
2117 }
2118
2119 unsigned Size = type.getSizeInBits();
2120 if (Size == 64)
2121 Size = 32;
2122
2123 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2124 // types.
2125 return isSafeTruncation(Imm.Val, Size);
2126 }
2127
2128 // We got fp literal token
2129 if (type == MVT::f64) { // Expected 64-bit fp operand
2130 // We would set low 64-bits of literal to zeroes but we accept this literals
2131 return true;
2132 }
2133
2134 if (type == MVT::i64) { // Expected 64-bit int operand
2135 // We don't allow fp literals in 64-bit integer instructions. It is
2136 // unclear how we should encode them.
2137 return false;
2138 }
2139
2140 // We allow fp literals with f16x2 operands assuming that the specified
2141 // literal goes into the lower half and the upper half is zero. We also
2142 // require that the literal may be losslessly converted to f16.
2143 //
2144 // For i16x2 operands, we assume that the specified literal is encoded as a
2145 // single-precision float. This is pretty odd, but it matches SP3 and what
2146 // happens in hardware.
2147 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2148 : (type == MVT::v2i16) ? MVT::f32
2149 : (type == MVT::v2f32) ? MVT::f32
2150 : type;
2151
2152 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2153 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2154}
2155
2156bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2157 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2158}
2159
2160bool AMDGPUOperand::isVRegWithInputMods() const {
2161 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2162 // GFX90A allows DPP on 64-bit operands.
2163 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2164 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2165}
2166
2167template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2168 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2169 : AMDGPU::VGPR_16_Lo128RegClassID);
2170}
2171
2172bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2173 if (AsmParser->isVI())
2174 return isVReg32();
2175 else if (AsmParser->isGFX9Plus())
2176 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2177 else
2178 return false;
2179}
2180
2181bool AMDGPUOperand::isSDWAFP16Operand() const {
2182 return isSDWAOperand(MVT::f16);
2183}
2184
2185bool AMDGPUOperand::isSDWAFP32Operand() const {
2186 return isSDWAOperand(MVT::f32);
2187}
2188
2189bool AMDGPUOperand::isSDWAInt16Operand() const {
2190 return isSDWAOperand(MVT::i16);
2191}
2192
2193bool AMDGPUOperand::isSDWAInt32Operand() const {
2194 return isSDWAOperand(MVT::i32);
2195}
2196
2197bool AMDGPUOperand::isBoolReg() const {
2198 auto FB = AsmParser->getFeatureBits();
2199 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) ||
2200 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32()));
2201}
2202
2203uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2204{
2205 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2206 assert(Size == 2 || Size == 4 || Size == 8);
2207
2208 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2209
2210 if (Imm.Mods.Abs) {
2211 Val &= ~FpSignMask;
2212 }
2213 if (Imm.Mods.Neg) {
2214 Val ^= FpSignMask;
2215 }
2216
2217 return Val;
2218}
2219
2220void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2221 if (isExpr()) {
2223 return;
2224 }
2225
2226 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2227 Inst.getNumOperands())) {
2228 addLiteralImmOperand(Inst, Imm.Val,
2229 ApplyModifiers &
2230 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2231 } else {
2232 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2234 setImmKindNone();
2235 }
2236}
2237
2238void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2239 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2240 auto OpNum = Inst.getNumOperands();
2241 // Check that this operand accepts literals
2242 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2243
2244 if (ApplyModifiers) {
2245 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2246 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2247 Val = applyInputFPModifiers(Val, Size);
2248 }
2249
2250 APInt Literal(64, Val);
2251 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2252
2253 if (Imm.IsFPImm) { // We got fp literal token
2254 switch (OpTy) {
2260 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2261 AsmParser->hasInv2PiInlineImm())) {
2262 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2263 setImmKindConst();
2264 return;
2265 }
2266
2267 // Non-inlineable
2268 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2269 // For fp operands we check if low 32 bits are zeros
2270 if (Literal.getLoBits(32) != 0) {
2271 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2272 "Can't encode literal as exact 64-bit floating-point operand. "
2273 "Low 32-bits will be set to zero");
2274 Val &= 0xffffffff00000000u;
2275 }
2276
2278 setImmKindLiteral();
2279 return;
2280 }
2281
2282 // We don't allow fp literals in 64-bit integer instructions. It is
2283 // unclear how we should encode them. This case should be checked earlier
2284 // in predicate methods (isLiteralImm())
2285 llvm_unreachable("fp literal in 64-bit integer instruction.");
2286
2294 if (AsmParser->hasInv2PiInlineImm() && Literal == 0x3fc45f306725feed) {
2295 // This is the 1/(2*pi) which is going to be truncated to bf16 with the
2296 // loss of precision. The constant represents ideomatic fp32 value of
2297 // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
2298 // bits. Prevent rounding below.
2299 Inst.addOperand(MCOperand::createImm(0x3e22));
2300 setImmKindLiteral();
2301 return;
2302 }
2303 [[fallthrough]];
2304
2332 bool lost;
2333 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2334 // Convert literal to single precision
2335 FPLiteral.convert(*getOpFltSemantics(OpTy),
2336 APFloat::rmNearestTiesToEven, &lost);
2337 // We allow precision lost but not overflow or underflow. This should be
2338 // checked earlier in isLiteralImm()
2339
2340 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2341 Inst.addOperand(MCOperand::createImm(ImmVal));
2342 if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) {
2343 setImmKindMandatoryLiteral();
2344 } else {
2345 setImmKindLiteral();
2346 }
2347 return;
2348 }
2349 default:
2350 llvm_unreachable("invalid operand size");
2351 }
2352
2353 return;
2354 }
2355
2356 // We got int literal token.
2357 // Only sign extend inline immediates.
2358 switch (OpTy) {
2374 if (isSafeTruncation(Val, 32) &&
2375 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2376 AsmParser->hasInv2PiInlineImm())) {
2378 setImmKindConst();
2379 return;
2380 }
2381
2382 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2383 setImmKindLiteral();
2384 return;
2385
2391 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2393 setImmKindConst();
2394 return;
2395 }
2396
2397 Val = AMDGPU::isSISrcFPOperand(InstDesc, OpNum) ? (uint64_t)Val << 32
2398 : Lo_32(Val);
2399
2401 setImmKindLiteral();
2402 return;
2403
2407 if (isSafeTruncation(Val, 16) &&
2408 AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val))) {
2409 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2410 setImmKindConst();
2411 return;
2412 }
2413
2414 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2415 setImmKindLiteral();
2416 return;
2417
2422 if (isSafeTruncation(Val, 16) &&
2423 AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2424 AsmParser->hasInv2PiInlineImm())) {
2426 setImmKindConst();
2427 return;
2428 }
2429
2430 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2431 setImmKindLiteral();
2432 return;
2433
2438 if (isSafeTruncation(Val, 16) &&
2439 AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2440 AsmParser->hasInv2PiInlineImm())) {
2442 setImmKindConst();
2443 return;
2444 }
2445
2446 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2447 setImmKindLiteral();
2448 return;
2449
2452 assert(isSafeTruncation(Val, 16));
2453 assert(AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val)));
2455 return;
2456 }
2459 assert(isSafeTruncation(Val, 16));
2460 assert(AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2461 AsmParser->hasInv2PiInlineImm()));
2462
2464 return;
2465 }
2466
2469 assert(isSafeTruncation(Val, 16));
2470 assert(AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2471 AsmParser->hasInv2PiInlineImm()));
2472
2474 return;
2475 }
2476
2478 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2479 setImmKindMandatoryLiteral();
2480 return;
2482 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2483 setImmKindMandatoryLiteral();
2484 return;
2485 default:
2486 llvm_unreachable("invalid operand size");
2487 }
2488}
2489
2490void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2491 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2492}
2493
2494bool AMDGPUOperand::isInlineValue() const {
2495 return isRegKind() && ::isInlineValue(getReg());
2496}
2497
2498//===----------------------------------------------------------------------===//
2499// AsmParser
2500//===----------------------------------------------------------------------===//
2501
2502static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2503 if (Is == IS_VGPR) {
2504 switch (RegWidth) {
2505 default: return -1;
2506 case 32:
2507 return AMDGPU::VGPR_32RegClassID;
2508 case 64:
2509 return AMDGPU::VReg_64RegClassID;
2510 case 96:
2511 return AMDGPU::VReg_96RegClassID;
2512 case 128:
2513 return AMDGPU::VReg_128RegClassID;
2514 case 160:
2515 return AMDGPU::VReg_160RegClassID;
2516 case 192:
2517 return AMDGPU::VReg_192RegClassID;
2518 case 224:
2519 return AMDGPU::VReg_224RegClassID;
2520 case 256:
2521 return AMDGPU::VReg_256RegClassID;
2522 case 288:
2523 return AMDGPU::VReg_288RegClassID;
2524 case 320:
2525 return AMDGPU::VReg_320RegClassID;
2526 case 352:
2527 return AMDGPU::VReg_352RegClassID;
2528 case 384:
2529 return AMDGPU::VReg_384RegClassID;
2530 case 512:
2531 return AMDGPU::VReg_512RegClassID;
2532 case 1024:
2533 return AMDGPU::VReg_1024RegClassID;
2534 }
2535 } else if (Is == IS_TTMP) {
2536 switch (RegWidth) {
2537 default: return -1;
2538 case 32:
2539 return AMDGPU::TTMP_32RegClassID;
2540 case 64:
2541 return AMDGPU::TTMP_64RegClassID;
2542 case 128:
2543 return AMDGPU::TTMP_128RegClassID;
2544 case 256:
2545 return AMDGPU::TTMP_256RegClassID;
2546 case 512:
2547 return AMDGPU::TTMP_512RegClassID;
2548 }
2549 } else if (Is == IS_SGPR) {
2550 switch (RegWidth) {
2551 default: return -1;
2552 case 32:
2553 return AMDGPU::SGPR_32RegClassID;
2554 case 64:
2555 return AMDGPU::SGPR_64RegClassID;
2556 case 96:
2557 return AMDGPU::SGPR_96RegClassID;
2558 case 128:
2559 return AMDGPU::SGPR_128RegClassID;
2560 case 160:
2561 return AMDGPU::SGPR_160RegClassID;
2562 case 192:
2563 return AMDGPU::SGPR_192RegClassID;
2564 case 224:
2565 return AMDGPU::SGPR_224RegClassID;
2566 case 256:
2567 return AMDGPU::SGPR_256RegClassID;
2568 case 288:
2569 return AMDGPU::SGPR_288RegClassID;
2570 case 320:
2571 return AMDGPU::SGPR_320RegClassID;
2572 case 352:
2573 return AMDGPU::SGPR_352RegClassID;
2574 case 384:
2575 return AMDGPU::SGPR_384RegClassID;
2576 case 512:
2577 return AMDGPU::SGPR_512RegClassID;
2578 }
2579 } else if (Is == IS_AGPR) {
2580 switch (RegWidth) {
2581 default: return -1;
2582 case 32:
2583 return AMDGPU::AGPR_32RegClassID;
2584 case 64:
2585 return AMDGPU::AReg_64RegClassID;
2586 case 96:
2587 return AMDGPU::AReg_96RegClassID;
2588 case 128:
2589 return AMDGPU::AReg_128RegClassID;
2590 case 160:
2591 return AMDGPU::AReg_160RegClassID;
2592 case 192:
2593 return AMDGPU::AReg_192RegClassID;
2594 case 224:
2595 return AMDGPU::AReg_224RegClassID;
2596 case 256:
2597 return AMDGPU::AReg_256RegClassID;
2598 case 288:
2599 return AMDGPU::AReg_288RegClassID;
2600 case 320:
2601 return AMDGPU::AReg_320RegClassID;
2602 case 352:
2603 return AMDGPU::AReg_352RegClassID;
2604 case 384:
2605 return AMDGPU::AReg_384RegClassID;
2606 case 512:
2607 return AMDGPU::AReg_512RegClassID;
2608 case 1024:
2609 return AMDGPU::AReg_1024RegClassID;
2610 }
2611 }
2612 return -1;
2613}
2614
2617 .Case("exec", AMDGPU::EXEC)
2618 .Case("vcc", AMDGPU::VCC)
2619 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2620 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2621 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2622 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2623 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2624 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2625 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2626 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2627 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2628 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2629 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2630 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2631 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2632 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2633 .Case("m0", AMDGPU::M0)
2634 .Case("vccz", AMDGPU::SRC_VCCZ)
2635 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2636 .Case("execz", AMDGPU::SRC_EXECZ)
2637 .Case("src_execz", AMDGPU::SRC_EXECZ)
2638 .Case("scc", AMDGPU::SRC_SCC)
2639 .Case("src_scc", AMDGPU::SRC_SCC)
2640 .Case("tba", AMDGPU::TBA)
2641 .Case("tma", AMDGPU::TMA)
2642 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2643 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2644 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2645 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2646 .Case("vcc_lo", AMDGPU::VCC_LO)
2647 .Case("vcc_hi", AMDGPU::VCC_HI)
2648 .Case("exec_lo", AMDGPU::EXEC_LO)
2649 .Case("exec_hi", AMDGPU::EXEC_HI)
2650 .Case("tma_lo", AMDGPU::TMA_LO)
2651 .Case("tma_hi", AMDGPU::TMA_HI)
2652 .Case("tba_lo", AMDGPU::TBA_LO)
2653 .Case("tba_hi", AMDGPU::TBA_HI)
2654 .Case("pc", AMDGPU::PC_REG)
2655 .Case("null", AMDGPU::SGPR_NULL)
2656 .Default(AMDGPU::NoRegister);
2657}
2658
2659bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2660 SMLoc &EndLoc, bool RestoreOnFailure) {
2661 auto R = parseRegister();
2662 if (!R) return true;
2663 assert(R->isReg());
2664 RegNo = R->getReg();
2665 StartLoc = R->getStartLoc();
2666 EndLoc = R->getEndLoc();
2667 return false;
2668}
2669
2670bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2671 SMLoc &EndLoc) {
2672 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2673}
2674
2675ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2676 SMLoc &EndLoc) {
2677 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2678 bool PendingErrors = getParser().hasPendingError();
2679 getParser().clearPendingErrors();
2680 if (PendingErrors)
2681 return ParseStatus::Failure;
2682 if (Result)
2683 return ParseStatus::NoMatch;
2684 return ParseStatus::Success;
2685}
2686
2687bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2688 RegisterKind RegKind, unsigned Reg1,
2689 SMLoc Loc) {
2690 switch (RegKind) {
2691 case IS_SPECIAL:
2692 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2693 Reg = AMDGPU::EXEC;
2694 RegWidth = 64;
2695 return true;
2696 }
2697 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2698 Reg = AMDGPU::FLAT_SCR;
2699 RegWidth = 64;
2700 return true;
2701 }
2702 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2703 Reg = AMDGPU::XNACK_MASK;
2704 RegWidth = 64;
2705 return true;
2706 }
2707 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2708 Reg = AMDGPU::VCC;
2709 RegWidth = 64;
2710 return true;
2711 }
2712 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2713 Reg = AMDGPU::TBA;
2714 RegWidth = 64;
2715 return true;
2716 }
2717 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2718 Reg = AMDGPU::TMA;
2719 RegWidth = 64;
2720 return true;
2721 }
2722 Error(Loc, "register does not fit in the list");
2723 return false;
2724 case IS_VGPR:
2725 case IS_SGPR:
2726 case IS_AGPR:
2727 case IS_TTMP:
2728 if (Reg1 != Reg + RegWidth / 32) {
2729 Error(Loc, "registers in a list must have consecutive indices");
2730 return false;
2731 }
2732 RegWidth += 32;
2733 return true;
2734 default:
2735 llvm_unreachable("unexpected register kind");
2736 }
2737}
2738
2739struct RegInfo {
2741 RegisterKind Kind;
2742};
2743
2744static constexpr RegInfo RegularRegisters[] = {
2745 {{"v"}, IS_VGPR},
2746 {{"s"}, IS_SGPR},
2747 {{"ttmp"}, IS_TTMP},
2748 {{"acc"}, IS_AGPR},
2749 {{"a"}, IS_AGPR},
2750};
2751
2752static bool isRegularReg(RegisterKind Kind) {
2753 return Kind == IS_VGPR ||
2754 Kind == IS_SGPR ||
2755 Kind == IS_TTMP ||
2756 Kind == IS_AGPR;
2757}
2758
2760 for (const RegInfo &Reg : RegularRegisters)
2761 if (Str.starts_with(Reg.Name))
2762 return &Reg;
2763 return nullptr;
2764}
2765
2766static bool getRegNum(StringRef Str, unsigned& Num) {
2767 return !Str.getAsInteger(10, Num);
2768}
2769
2770bool
2771AMDGPUAsmParser::isRegister(const AsmToken &Token,
2772 const AsmToken &NextToken) const {
2773
2774 // A list of consecutive registers: [s0,s1,s2,s3]
2775 if (Token.is(AsmToken::LBrac))
2776 return true;
2777
2778 if (!Token.is(AsmToken::Identifier))
2779 return false;
2780
2781 // A single register like s0 or a range of registers like s[0:1]
2782
2783 StringRef Str = Token.getString();
2784 const RegInfo *Reg = getRegularRegInfo(Str);
2785 if (Reg) {
2786 StringRef RegName = Reg->Name;
2787 StringRef RegSuffix = Str.substr(RegName.size());
2788 if (!RegSuffix.empty()) {
2789 RegSuffix.consume_back(".l");
2790 RegSuffix.consume_back(".h");
2791 unsigned Num;
2792 // A single register with an index: rXX
2793 if (getRegNum(RegSuffix, Num))
2794 return true;
2795 } else {
2796 // A range of registers: r[XX:YY].
2797 if (NextToken.is(AsmToken::LBrac))
2798 return true;
2799 }
2800 }
2801
2802 return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2803}
2804
2805bool
2806AMDGPUAsmParser::isRegister()
2807{
2808 return isRegister(getToken(), peekToken());
2809}
2810
2811unsigned AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2812 unsigned SubReg, unsigned RegWidth,
2813 SMLoc Loc) {
2814 assert(isRegularReg(RegKind));
2815
2816 unsigned AlignSize = 1;
2817 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2818 // SGPR and TTMP registers must be aligned.
2819 // Max required alignment is 4 dwords.
2820 AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2821 }
2822
2823 if (RegNum % AlignSize != 0) {
2824 Error(Loc, "invalid register alignment");
2825 return AMDGPU::NoRegister;
2826 }
2827
2828 unsigned RegIdx = RegNum / AlignSize;
2829 int RCID = getRegClass(RegKind, RegWidth);
2830 if (RCID == -1) {
2831 Error(Loc, "invalid or unsupported register size");
2832 return AMDGPU::NoRegister;
2833 }
2834
2835 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2836 const MCRegisterClass RC = TRI->getRegClass(RCID);
2837 if (RegIdx >= RC.getNumRegs()) {
2838 Error(Loc, "register index is out of range");
2839 return AMDGPU::NoRegister;
2840 }
2841
2842 unsigned Reg = RC.getRegister(RegIdx);
2843
2844 if (SubReg) {
2845 Reg = TRI->getSubReg(Reg, SubReg);
2846
2847 // Currently all regular registers have their .l and .h subregisters, so
2848 // we should never need to generate an error here.
2849 assert(Reg && "Invalid subregister!");
2850 }
2851
2852 return Reg;
2853}
2854
2855bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2856 int64_t RegLo, RegHi;
2857 if (!skipToken(AsmToken::LBrac, "missing register index"))
2858 return false;
2859
2860 SMLoc FirstIdxLoc = getLoc();
2861 SMLoc SecondIdxLoc;
2862
2863 if (!parseExpr(RegLo))
2864 return false;
2865
2866 if (trySkipToken(AsmToken::Colon)) {
2867 SecondIdxLoc = getLoc();
2868 if (!parseExpr(RegHi))
2869 return false;
2870 } else {
2871 RegHi = RegLo;
2872 }
2873
2874 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2875 return false;
2876
2877 if (!isUInt<32>(RegLo)) {
2878 Error(FirstIdxLoc, "invalid register index");
2879 return false;
2880 }
2881
2882 if (!isUInt<32>(RegHi)) {
2883 Error(SecondIdxLoc, "invalid register index");
2884 return false;
2885 }
2886
2887 if (RegLo > RegHi) {
2888 Error(FirstIdxLoc, "first register index should not exceed second index");
2889 return false;
2890 }
2891
2892 Num = static_cast<unsigned>(RegLo);
2893 RegWidth = 32 * ((RegHi - RegLo) + 1);
2894 return true;
2895}
2896
2897unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2898 unsigned &RegNum, unsigned &RegWidth,
2899 SmallVectorImpl<AsmToken> &Tokens) {
2900 assert(isToken(AsmToken::Identifier));
2901 unsigned Reg = getSpecialRegForName(getTokenStr());
2902 if (Reg) {
2903 RegNum = 0;
2904 RegWidth = 32;
2905 RegKind = IS_SPECIAL;
2906 Tokens.push_back(getToken());
2907 lex(); // skip register name
2908 }
2909 return Reg;
2910}
2911
2912unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2913 unsigned &RegNum, unsigned &RegWidth,
2914 SmallVectorImpl<AsmToken> &Tokens) {
2915 assert(isToken(AsmToken::Identifier));
2916 StringRef RegName = getTokenStr();
2917 auto Loc = getLoc();
2918
2919 const RegInfo *RI = getRegularRegInfo(RegName);
2920 if (!RI) {
2921 Error(Loc, "invalid register name");
2922 return AMDGPU::NoRegister;
2923 }
2924
2925 Tokens.push_back(getToken());
2926 lex(); // skip register name
2927
2928 RegKind = RI->Kind;
2929 StringRef RegSuffix = RegName.substr(RI->Name.size());
2930 unsigned SubReg = NoSubRegister;
2931 if (!RegSuffix.empty()) {
2932 // We don't know the opcode till we are done parsing, so we don't know if
2933 // registers should be 16 or 32 bit. It is therefore mandatory to put .l or
2934 // .h to correctly specify 16 bit registers. We also can't determine class
2935 // VGPR_16_Lo128 or VGPR_16, so always parse them as VGPR_16.
2936 if (RegSuffix.consume_back(".l"))
2937 SubReg = AMDGPU::lo16;
2938 else if (RegSuffix.consume_back(".h"))
2939 SubReg = AMDGPU::hi16;
2940
2941 // Single 32-bit register: vXX.
2942 if (!getRegNum(RegSuffix, RegNum)) {
2943 Error(Loc, "invalid register index");
2944 return AMDGPU::NoRegister;
2945 }
2946 RegWidth = 32;
2947 } else {
2948 // Range of registers: v[XX:YY]. ":YY" is optional.
2949 if (!ParseRegRange(RegNum, RegWidth))
2950 return AMDGPU::NoRegister;
2951 }
2952
2953 return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
2954}
2955
2956unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2957 unsigned &RegWidth,
2958 SmallVectorImpl<AsmToken> &Tokens) {
2959 unsigned Reg = AMDGPU::NoRegister;
2960 auto ListLoc = getLoc();
2961
2962 if (!skipToken(AsmToken::LBrac,
2963 "expected a register or a list of registers")) {
2964 return AMDGPU::NoRegister;
2965 }
2966
2967 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2968
2969 auto Loc = getLoc();
2970 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2971 return AMDGPU::NoRegister;
2972 if (RegWidth != 32) {
2973 Error(Loc, "expected a single 32-bit register");
2974 return AMDGPU::NoRegister;
2975 }
2976
2977 for (; trySkipToken(AsmToken::Comma); ) {
2978 RegisterKind NextRegKind;
2979 unsigned NextReg, NextRegNum, NextRegWidth;
2980 Loc = getLoc();
2981
2982 if (!ParseAMDGPURegister(NextRegKind, NextReg,
2983 NextRegNum, NextRegWidth,
2984 Tokens)) {
2985 return AMDGPU::NoRegister;
2986 }
2987 if (NextRegWidth != 32) {
2988 Error(Loc, "expected a single 32-bit register");
2989 return AMDGPU::NoRegister;
2990 }
2991 if (NextRegKind != RegKind) {
2992 Error(Loc, "registers in a list must be of the same kind");
2993 return AMDGPU::NoRegister;
2994 }
2995 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2996 return AMDGPU::NoRegister;
2997 }
2998
2999 if (!skipToken(AsmToken::RBrac,
3000 "expected a comma or a closing square bracket")) {
3001 return AMDGPU::NoRegister;
3002 }
3003
3004 if (isRegularReg(RegKind))
3005 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3006
3007 return Reg;
3008}
3009
3010bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
3011 unsigned &RegNum, unsigned &RegWidth,
3012 SmallVectorImpl<AsmToken> &Tokens) {
3013 auto Loc = getLoc();
3014 Reg = AMDGPU::NoRegister;
3015
3016 if (isToken(AsmToken::Identifier)) {
3017 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3018 if (Reg == AMDGPU::NoRegister)
3019 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3020 } else {
3021 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3022 }
3023
3024 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3025 if (Reg == AMDGPU::NoRegister) {
3026 assert(Parser.hasPendingError());
3027 return false;
3028 }
3029
3030 if (!subtargetHasRegister(*TRI, Reg)) {
3031 if (Reg == AMDGPU::SGPR_NULL) {
3032 Error(Loc, "'null' operand is not supported on this GPU");
3033 } else {
3034 Error(Loc, "register not available on this GPU");
3035 }
3036 return false;
3037 }
3038
3039 return true;
3040}
3041
3042bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
3043 unsigned &RegNum, unsigned &RegWidth,
3044 bool RestoreOnFailure /*=false*/) {
3045 Reg = AMDGPU::NoRegister;
3046
3048 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3049 if (RestoreOnFailure) {
3050 while (!Tokens.empty()) {
3051 getLexer().UnLex(Tokens.pop_back_val());
3052 }
3053 }
3054 return true;
3055 }
3056 return false;
3057}
3058
3059std::optional<StringRef>
3060AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3061 switch (RegKind) {
3062 case IS_VGPR:
3063 return StringRef(".amdgcn.next_free_vgpr");
3064 case IS_SGPR:
3065 return StringRef(".amdgcn.next_free_sgpr");
3066 default:
3067 return std::nullopt;
3068 }
3069}
3070
3071void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3072 auto SymbolName = getGprCountSymbolName(RegKind);
3073 assert(SymbolName && "initializing invalid register kind");
3074 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3075 Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
3076}
3077
3078bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3079 unsigned DwordRegIndex,
3080 unsigned RegWidth) {
3081 // Symbols are only defined for GCN targets
3082 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
3083 return true;
3084
3085 auto SymbolName = getGprCountSymbolName(RegKind);
3086 if (!SymbolName)
3087 return true;
3088 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3089
3090 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
3091 int64_t OldCount;
3092
3093 if (!Sym->isVariable())
3094 return !Error(getLoc(),
3095 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3096 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
3097 return !Error(
3098 getLoc(),
3099 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3100
3101 if (OldCount <= NewMax)
3102 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
3103
3104 return true;
3105}
3106
3107std::unique_ptr<AMDGPUOperand>
3108AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3109 const auto &Tok = getToken();
3110 SMLoc StartLoc = Tok.getLoc();
3111 SMLoc EndLoc = Tok.getEndLoc();
3112 RegisterKind RegKind;
3113 unsigned Reg, RegNum, RegWidth;
3114
3115 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3116 return nullptr;
3117 }
3118 if (isHsaAbi(getSTI())) {
3119 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3120 return nullptr;
3121 } else
3122 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3123 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
3124}
3125
3126ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3127 bool HasSP3AbsModifier, bool HasLit) {
3128 // TODO: add syntactic sugar for 1/(2*PI)
3129
3130 if (isRegister())
3131 return ParseStatus::NoMatch;
3132 assert(!isModifier());
3133
3134 if (!HasLit) {
3135 HasLit = trySkipId("lit");
3136 if (HasLit) {
3137 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3138 return ParseStatus::Failure;
3139 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit);
3140 if (S.isSuccess() &&
3141 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3142 return ParseStatus::Failure;
3143 return S;
3144 }
3145 }
3146
3147 const auto& Tok = getToken();
3148 const auto& NextTok = peekToken();
3149 bool IsReal = Tok.is(AsmToken::Real);
3150 SMLoc S = getLoc();
3151 bool Negate = false;
3152
3153 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
3154 lex();
3155 IsReal = true;
3156 Negate = true;
3157 }
3158
3159 AMDGPUOperand::Modifiers Mods;
3160 Mods.Lit = HasLit;
3161
3162 if (IsReal) {
3163 // Floating-point expressions are not supported.
3164 // Can only allow floating-point literals with an
3165 // optional sign.
3166
3167 StringRef Num = getTokenStr();
3168 lex();
3169
3170 APFloat RealVal(APFloat::IEEEdouble());
3171 auto roundMode = APFloat::rmNearestTiesToEven;
3172 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3173 return ParseStatus::Failure;
3174 if (Negate)
3175 RealVal.changeSign();
3176
3177 Operands.push_back(
3178 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
3179 AMDGPUOperand::ImmTyNone, true));
3180 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3181 Op.setModifiers(Mods);
3182
3183 return ParseStatus::Success;
3184
3185 } else {
3186 int64_t IntVal;
3187 const MCExpr *Expr;
3188 SMLoc S = getLoc();
3189
3190 if (HasSP3AbsModifier) {
3191 // This is a workaround for handling expressions
3192 // as arguments of SP3 'abs' modifier, for example:
3193 // |1.0|
3194 // |-1|
3195 // |1+x|
3196 // This syntax is not compatible with syntax of standard
3197 // MC expressions (due to the trailing '|').
3198 SMLoc EndLoc;
3199 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3200 return ParseStatus::Failure;
3201 } else {
3202 if (Parser.parseExpression(Expr))
3203 return ParseStatus::Failure;
3204 }
3205
3206 if (Expr->evaluateAsAbsolute(IntVal)) {
3207 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3208 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3209 Op.setModifiers(Mods);
3210 } else {
3211 if (HasLit)
3212 return ParseStatus::NoMatch;
3213 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3214 }
3215
3216 return ParseStatus::Success;
3217 }
3218
3219 return ParseStatus::NoMatch;
3220}
3221
3222ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3223 if (!isRegister())
3224 return ParseStatus::NoMatch;
3225
3226 if (auto R = parseRegister()) {
3227 assert(R->isReg());
3228 Operands.push_back(std::move(R));
3229 return ParseStatus::Success;
3230 }
3231 return ParseStatus::Failure;
3232}
3233
3234ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3235 bool HasSP3AbsMod, bool HasLit) {
3236 ParseStatus Res = parseReg(Operands);
3237 if (!Res.isNoMatch())
3238 return Res;
3239 if (isModifier())
3240 return ParseStatus::NoMatch;
3241 return parseImm(Operands, HasSP3AbsMod, HasLit);
3242}
3243
3244bool
3245AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3246 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3247 const auto &str = Token.getString();
3248 return str == "abs" || str == "neg" || str == "sext";
3249 }
3250 return false;
3251}
3252
3253bool
3254AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3255 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3256}
3257
3258bool
3259AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3260 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3261}
3262
3263bool
3264AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3265 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3266}
3267
3268// Check if this is an operand modifier or an opcode modifier
3269// which may look like an expression but it is not. We should
3270// avoid parsing these modifiers as expressions. Currently
3271// recognized sequences are:
3272// |...|
3273// abs(...)
3274// neg(...)
3275// sext(...)
3276// -reg
3277// -|...|
3278// -abs(...)
3279// name:...
3280//
3281bool
3282AMDGPUAsmParser::isModifier() {
3283
3284 AsmToken Tok = getToken();
3285 AsmToken NextToken[2];
3286 peekTokens(NextToken);
3287
3288 return isOperandModifier(Tok, NextToken[0]) ||
3289 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3290 isOpcodeModifierWithVal(Tok, NextToken[0]);
3291}
3292
3293// Check if the current token is an SP3 'neg' modifier.
3294// Currently this modifier is allowed in the following context:
3295//
3296// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3297// 2. Before an 'abs' modifier: -abs(...)
3298// 3. Before an SP3 'abs' modifier: -|...|
3299//
3300// In all other cases "-" is handled as a part
3301// of an expression that follows the sign.
3302//
3303// Note: When "-" is followed by an integer literal,
3304// this is interpreted as integer negation rather
3305// than a floating-point NEG modifier applied to N.
3306// Beside being contr-intuitive, such use of floating-point
3307// NEG modifier would have resulted in different meaning
3308// of integer literals used with VOP1/2/C and VOP3,
3309// for example:
3310// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3311// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3312// Negative fp literals with preceding "-" are
3313// handled likewise for uniformity
3314//
3315bool
3316AMDGPUAsmParser::parseSP3NegModifier() {
3317
3318 AsmToken NextToken[2];
3319 peekTokens(NextToken);
3320
3321 if (isToken(AsmToken::Minus) &&
3322 (isRegister(NextToken[0], NextToken[1]) ||
3323 NextToken[0].is(AsmToken::Pipe) ||
3324 isId(NextToken[0], "abs"))) {
3325 lex();
3326 return true;
3327 }
3328
3329 return false;
3330}
3331
3333AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3334 bool AllowImm) {
3335 bool Neg, SP3Neg;
3336 bool Abs, SP3Abs;
3337 bool Lit;
3338 SMLoc Loc;
3339
3340 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3341 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3342 return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3343
3344 SP3Neg = parseSP3NegModifier();
3345
3346 Loc = getLoc();
3347 Neg = trySkipId("neg");
3348 if (Neg && SP3Neg)
3349 return Error(Loc, "expected register or immediate");
3350 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3351 return ParseStatus::Failure;
3352
3353 Abs = trySkipId("abs");
3354 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3355 return ParseStatus::Failure;
3356
3357 Lit = trySkipId("lit");
3358 if (Lit && !skipToken(AsmToken::LParen, "expected left paren after lit"))
3359 return ParseStatus::Failure;
3360
3361 Loc = getLoc();
3362 SP3Abs = trySkipToken(AsmToken::Pipe);
3363 if (Abs && SP3Abs)
3364 return Error(Loc, "expected register or immediate");
3365
3366 ParseStatus Res;
3367 if (AllowImm) {
3368 Res = parseRegOrImm(Operands, SP3Abs, Lit);
3369 } else {
3370 Res = parseReg(Operands);
3371 }
3372 if (!Res.isSuccess())
3373 return (SP3Neg || Neg || SP3Abs || Abs || Lit) ? ParseStatus::Failure : Res;
3374
3375 if (Lit && !Operands.back()->isImm())
3376 Error(Loc, "expected immediate with lit modifier");
3377
3378 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3379 return ParseStatus::Failure;
3380 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3381 return ParseStatus::Failure;
3382 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3383 return ParseStatus::Failure;
3384 if (Lit && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3385 return ParseStatus::Failure;
3386
3387 AMDGPUOperand::Modifiers Mods;
3388 Mods.Abs = Abs || SP3Abs;
3389 Mods.Neg = Neg || SP3Neg;
3390 Mods.Lit = Lit;
3391
3392 if (Mods.hasFPModifiers() || Lit) {
3393 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3394 if (Op.isExpr())
3395 return Error(Op.getStartLoc(), "expected an absolute expression");
3396 Op.setModifiers(Mods);
3397 }
3398 return ParseStatus::Success;
3399}
3400
3402AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3403 bool AllowImm) {
3404 bool Sext = trySkipId("sext");
3405 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3406 return ParseStatus::Failure;
3407
3408 ParseStatus Res;
3409 if (AllowImm) {
3410 Res = parseRegOrImm(Operands);
3411 } else {
3412 Res = parseReg(Operands);
3413 }
3414 if (!Res.isSuccess())
3415 return Sext ? ParseStatus::Failure : Res;
3416
3417 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3418 return ParseStatus::Failure;
3419
3420 AMDGPUOperand::Modifiers Mods;
3421 Mods.Sext = Sext;
3422
3423 if (Mods.hasIntModifiers()) {
3424 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3425 if (Op.isExpr())
3426 return Error(Op.getStartLoc(), "expected an absolute expression");
3427 Op.setModifiers(Mods);
3428 }
3429
3430 return ParseStatus::Success;
3431}
3432
3433ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3434 return parseRegOrImmWithFPInputMods(Operands, false);
3435}
3436
3437ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3438 return parseRegOrImmWithIntInputMods(Operands, false);
3439}
3440
3441ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3442 auto Loc = getLoc();
3443 if (trySkipId("off")) {
3444 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3445 AMDGPUOperand::ImmTyOff, false));
3446 return ParseStatus::Success;
3447 }
3448
3449 if (!isRegister())
3450 return ParseStatus::NoMatch;
3451
3452 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3453 if (Reg) {
3454 Operands.push_back(std::move(Reg));
3455 return ParseStatus::Success;
3456 }
3457
3458 return ParseStatus::Failure;
3459}
3460
3461unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3462 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3463
3464 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3465 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3466 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3467 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3468 return Match_InvalidOperand;
3469
3470 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3471 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3472 // v_mac_f32/16 allow only dst_sel == DWORD;
3473 auto OpNum =
3474 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3475 const auto &Op = Inst.getOperand(OpNum);
3476 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3477 return Match_InvalidOperand;
3478 }
3479 }
3480
3481 return Match_Success;
3482}
3483
3485 static const unsigned Variants[] = {
3489 };
3490
3491 return ArrayRef(Variants);
3492}
3493
3494// What asm variants we should check
3495ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3496 if (isForcedDPP() && isForcedVOP3()) {
3497 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3498 return ArrayRef(Variants);
3499 }
3500 if (getForcedEncodingSize() == 32) {
3501 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3502 return ArrayRef(Variants);
3503 }
3504
3505 if (isForcedVOP3()) {
3506 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3507 return ArrayRef(Variants);
3508 }
3509
3510 if (isForcedSDWA()) {
3511 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3513 return ArrayRef(Variants);
3514 }
3515
3516 if (isForcedDPP()) {
3517 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3518 return ArrayRef(Variants);
3519 }
3520
3521 return getAllVariants();
3522}
3523
3524StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3525 if (isForcedDPP() && isForcedVOP3())
3526 return "e64_dpp";
3527
3528 if (getForcedEncodingSize() == 32)
3529 return "e32";
3530
3531 if (isForcedVOP3())
3532 return "e64";
3533
3534 if (isForcedSDWA())
3535 return "sdwa";
3536
3537 if (isForcedDPP())
3538 return "dpp";
3539
3540 return "";
3541}
3542
3543unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3544 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3545 for (MCPhysReg Reg : Desc.implicit_uses()) {
3546 switch (Reg) {
3547 case AMDGPU::FLAT_SCR:
3548 case AMDGPU::VCC:
3549 case AMDGPU::VCC_LO:
3550 case AMDGPU::VCC_HI:
3551 case AMDGPU::M0:
3552 return Reg;
3553 default:
3554 break;
3555 }
3556 }
3557 return AMDGPU::NoRegister;
3558}
3559
3560// NB: This code is correct only when used to check constant
3561// bus limitations because GFX7 support no f16 inline constants.
3562// Note that there are no cases when a GFX7 opcode violates
3563// constant bus limitations due to the use of an f16 constant.
3564bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3565 unsigned OpIdx) const {
3566 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3567
3568 if (!AMDGPU::isSISrcOperand(Desc, OpIdx) ||
3569 AMDGPU::isKImmOperand(Desc, OpIdx)) {
3570 return false;
3571 }
3572
3573 const MCOperand &MO = Inst.getOperand(OpIdx);
3574
3575 int64_t Val = MO.getImm();
3576 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3577
3578 switch (OpSize) { // expected operand size
3579 case 8:
3580 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3581 case 4:
3582 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3583 case 2: {
3584 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3588 return AMDGPU::isInlinableLiteralI16(Val, hasInv2PiInlineImm());
3589
3594
3599
3604
3609 return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3610
3615 return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
3616
3617 llvm_unreachable("invalid operand type");
3618 }
3619 default:
3620 llvm_unreachable("invalid operand size");
3621 }
3622}
3623
3624unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3625 if (!isGFX10Plus())
3626 return 1;
3627
3628 switch (Opcode) {
3629 // 64-bit shift instructions can use only one scalar value input
3630 case AMDGPU::V_LSHLREV_B64_e64:
3631 case AMDGPU::V_LSHLREV_B64_gfx10:
3632 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3633 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3634 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3635 case AMDGPU::V_LSHRREV_B64_e64:
3636 case AMDGPU::V_LSHRREV_B64_gfx10:
3637 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3638 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3639 case AMDGPU::V_ASHRREV_I64_e64:
3640 case AMDGPU::V_ASHRREV_I64_gfx10:
3641 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3642 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3643 case AMDGPU::V_LSHL_B64_e64:
3644 case AMDGPU::V_LSHR_B64_e64:
3645 case AMDGPU::V_ASHR_I64_e64:
3646 return 1;
3647 default:
3648 return 2;
3649 }
3650}
3651
3652constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3654
3655// Get regular operand indices in the same order as specified
3656// in the instruction (but append mandatory literals to the end).
3658 bool AddMandatoryLiterals = false) {
3659
3660 int16_t ImmIdx =
3661 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3662
3663 if (isVOPD(Opcode)) {
3664 int16_t ImmDeferredIdx =
3665 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immDeferred)
3666 : -1;
3667
3668 return {getNamedOperandIdx(Opcode, OpName::src0X),
3669 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3670 getNamedOperandIdx(Opcode, OpName::src0Y),
3671 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3672 ImmDeferredIdx,
3673 ImmIdx};
3674 }
3675
3676 return {getNamedOperandIdx(Opcode, OpName::src0),
3677 getNamedOperandIdx(Opcode, OpName::src1),
3678 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3679}
3680
3681bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3682 const MCOperand &MO = Inst.getOperand(OpIdx);
3683 if (MO.isImm()) {
3684 return !isInlineConstant(Inst, OpIdx);
3685 } else if (MO.isReg()) {
3686 auto Reg = MO.getReg();
3687 if (!Reg) {
3688 return false;
3689 }
3690 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3691 auto PReg = mc2PseudoReg(Reg);
3692 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3693 } else {
3694 return true;
3695 }
3696}
3697
3698// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3699// Writelane is special in that it can use SGPR and M0 (which would normally
3700// count as using the constant bus twice - but in this case it is allowed since
3701// the lane selector doesn't count as a use of the constant bus). However, it is
3702// still required to abide by the 1 SGPR rule.
3703static bool checkWriteLane(const MCInst &Inst) {
3704 const unsigned Opcode = Inst.getOpcode();
3705 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3706 return false;
3707 const MCOperand &LaneSelOp = Inst.getOperand(2);
3708 if (!LaneSelOp.isReg())
3709 return false;
3710 auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg());
3711 return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11;
3712}
3713
3714bool AMDGPUAsmParser::validateConstantBusLimitations(
3715 const MCInst &Inst, const OperandVector &Operands) {
3716 const unsigned Opcode = Inst.getOpcode();
3717 const MCInstrDesc &Desc = MII.get(Opcode);
3718 unsigned LastSGPR = AMDGPU::NoRegister;
3719 unsigned ConstantBusUseCount = 0;
3720 unsigned NumLiterals = 0;
3721 unsigned LiteralSize;
3722
3723 if (!(Desc.TSFlags &
3726 !isVOPD(Opcode))
3727 return true;
3728
3729 if (checkWriteLane(Inst))
3730 return true;
3731
3732 // Check special imm operands (used by madmk, etc)
3733 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3734 ++NumLiterals;
3735 LiteralSize = 4;
3736 }
3737
3738 SmallDenseSet<unsigned> SGPRsUsed;
3739 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3740 if (SGPRUsed != AMDGPU::NoRegister) {
3741 SGPRsUsed.insert(SGPRUsed);
3742 ++ConstantBusUseCount;
3743 }
3744
3745 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3746
3747 for (int OpIdx : OpIndices) {
3748 if (OpIdx == -1)
3749 continue;
3750
3751 const MCOperand &MO = Inst.getOperand(OpIdx);
3752 if (usesConstantBus(Inst, OpIdx)) {
3753 if (MO.isReg()) {
3754 LastSGPR = mc2PseudoReg(MO.getReg());
3755 // Pairs of registers with a partial intersections like these
3756 // s0, s[0:1]
3757 // flat_scratch_lo, flat_scratch
3758 // flat_scratch_lo, flat_scratch_hi
3759 // are theoretically valid but they are disabled anyway.
3760 // Note that this code mimics SIInstrInfo::verifyInstruction
3761 if (SGPRsUsed.insert(LastSGPR).second) {
3762 ++ConstantBusUseCount;
3763 }
3764 } else { // Expression or a literal
3765
3766 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3767 continue; // special operand like VINTERP attr_chan
3768
3769 // An instruction may use only one literal.
3770 // This has been validated on the previous step.
3771 // See validateVOPLiteral.
3772 // This literal may be used as more than one operand.
3773 // If all these operands are of the same size,
3774 // this literal counts as one scalar value.
3775 // Otherwise it counts as 2 scalar values.
3776 // See "GFX10 Shader Programming", section 3.6.2.3.
3777
3778 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3779 if (Size < 4)
3780 Size = 4;
3781
3782 if (NumLiterals == 0) {
3783 NumLiterals = 1;
3784 LiteralSize = Size;
3785 } else if (LiteralSize != Size) {
3786 NumLiterals = 2;
3787 }
3788 }
3789 }
3790 }
3791 ConstantBusUseCount += NumLiterals;
3792
3793 if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3794 return true;
3795
3796 SMLoc LitLoc = getLitLoc(Operands);
3797 SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3798 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3799 Error(Loc, "invalid operand (violates constant bus restrictions)");
3800 return false;
3801}
3802
3803bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3804 const MCInst &Inst, const OperandVector &Operands) {
3805
3806 const unsigned Opcode = Inst.getOpcode();
3807 if (!isVOPD(Opcode))
3808 return true;
3809
3810 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3811
3812 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3813 const MCOperand &Opr = Inst.getOperand(OperandIdx);
3814 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3815 ? Opr.getReg()
3817 };
3818
3819 // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
3820 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
3821
3822 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3823 auto InvalidCompOprIdx =
3824 InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc);
3825 if (!InvalidCompOprIdx)
3826 return true;
3827
3828 auto CompOprIdx = *InvalidCompOprIdx;
3829 auto ParsedIdx =
3830 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
3831 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3832 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
3833
3834 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
3835 if (CompOprIdx == VOPD::Component::DST) {
3836 Error(Loc, "one dst register must be even and the other odd");
3837 } else {
3838 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3839 Error(Loc, Twine("src") + Twine(CompSrcIdx) +
3840 " operands must use different VGPR banks");
3841 }
3842
3843 return false;
3844}
3845
3846bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3847
3848 const unsigned Opc = Inst.getOpcode();
3849 const MCInstrDesc &Desc = MII.get(Opc);
3850
3851 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3852 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3853 assert(ClampIdx != -1);
3854 return Inst.getOperand(ClampIdx).getImm() == 0;
3855 }
3856
3857 return true;
3858}
3859
3862
3863bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
3864 const SMLoc &IDLoc) {
3865
3866 const unsigned Opc = Inst.getOpcode();
3867 const MCInstrDesc &Desc = MII.get(Opc);
3868
3869 if ((Desc.TSFlags & MIMGFlags) == 0)
3870 return true;
3871
3872 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3873 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3874 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3875
3876 assert(VDataIdx != -1);
3877
3878 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
3879 return true;
3880
3881 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3882 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3883 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3884 if (DMask == 0)
3885 DMask = 1;
3886
3887 bool IsPackedD16 = false;
3888 unsigned DataSize =
3889 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
3890 if (hasPackedD16()) {
3891 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3892 IsPackedD16 = D16Idx >= 0;
3893 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
3894 DataSize = (DataSize + 1) / 2;
3895 }
3896
3897 if ((VDataSize / 4) == DataSize + TFESize)
3898 return true;
3899
3900 StringRef Modifiers;
3901 if (isGFX90A())
3902 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
3903 else
3904 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
3905
3906 Error(IDLoc, Twine("image data size does not match ") + Modifiers);
3907 return false;
3908}
3909
3910bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
3911 const SMLoc &IDLoc) {
3912 const unsigned Opc = Inst.getOpcode();
3913 const MCInstrDesc &Desc = MII.get(Opc);
3914
3915 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
3916 return true;
3917
3919
3920 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3922 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3923 int RSrcOpName = Desc.TSFlags & SIInstrFlags::MIMG ? AMDGPU::OpName::srsrc
3924 : AMDGPU::OpName::rsrc;
3925 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
3926 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3927 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3928
3929 assert(VAddr0Idx != -1);
3930 assert(SrsrcIdx != -1);
3931 assert(SrsrcIdx > VAddr0Idx);
3932
3933 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3934 if (BaseOpcode->BVH) {
3935 if (IsA16 == BaseOpcode->A16)
3936 return true;
3937 Error(IDLoc, "image address size does not match a16");
3938 return false;
3939 }
3940
3941 unsigned Dim = Inst.getOperand(DimIdx).getImm();
3943 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3944 unsigned ActualAddrSize =
3945 IsNSA ? SrsrcIdx - VAddr0Idx
3946 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3947
3948 unsigned ExpectedAddrSize =
3949 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3950
3951 if (IsNSA) {
3952 if (hasPartialNSAEncoding() &&
3953 ExpectedAddrSize >
3955 int VAddrLastIdx = SrsrcIdx - 1;
3956 unsigned VAddrLastSize =
3957 AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4;
3958
3959 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
3960 }
3961 } else {
3962 if (ExpectedAddrSize > 12)
3963 ExpectedAddrSize = 16;
3964
3965 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3966 // This provides backward compatibility for assembly created
3967 // before 160b/192b/224b types were directly supported.
3968 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3969 return true;
3970 }
3971
3972 if (ActualAddrSize == ExpectedAddrSize)
3973 return true;
3974
3975 Error(IDLoc, "image address size does not match dim and a16");
3976 return false;
3977}
3978
3979bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3980
3981 const unsigned Opc = Inst.getOpcode();
3982 const MCInstrDesc &Desc = MII.get(Opc);
3983
3984 if ((Desc.TSFlags & MIMGFlags) == 0)
3985 return true;
3986 if (!Desc.mayLoad() || !Desc.mayStore())
3987 return true; // Not atomic
3988
3989 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3990 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3991
3992 // This is an incomplete check because image_atomic_cmpswap
3993 // may only use 0x3 and 0xf while other atomic operations
3994 // may use 0x1 and 0x3. However these limitations are
3995 // verified when we check that dmask matches dst size.
3996 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3997}
3998
3999bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
4000
4001 const unsigned Opc = Inst.getOpcode();
4002 const MCInstrDesc &Desc = MII.get(Opc);
4003
4004 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
4005 return true;
4006
4007 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4008 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4009
4010 // GATHER4 instructions use dmask in a different fashion compared to
4011 // other MIMG instructions. The only useful DMASK values are
4012 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4013 // (red,red,red,red) etc.) The ISA document doesn't mention
4014 // this.
4015 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4016}
4017
4018bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4019 const unsigned Opc = Inst.getOpcode();
4020 const MCInstrDesc &Desc = MII.get(Opc);
4021
4022 if ((Desc.TSFlags & MIMGFlags) == 0)
4023 return true;
4024
4026 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4028
4029 if (!BaseOpcode->MSAA)
4030 return true;
4031
4032 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4033 assert(DimIdx != -1);
4034
4035 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4037
4038 return DimInfo->MSAA;
4039}
4040
4041static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4042{
4043 switch (Opcode) {
4044 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4045 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4046 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4047 return true;
4048 default:
4049 return false;
4050 }
4051}
4052
4053// movrels* opcodes should only allow VGPRS as src0.
4054// This is specified in .td description for vop1/vop3,
4055// but sdwa is handled differently. See isSDWAOperand.
4056bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4057 const OperandVector &Operands) {
4058
4059 const unsigned Opc = Inst.getOpcode();
4060 const MCInstrDesc &Desc = MII.get(Opc);
4061
4062 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
4063 return true;
4064
4065 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4066 assert(Src0Idx != -1);
4067
4068 SMLoc ErrLoc;
4069 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4070 if (Src0.isReg()) {
4071 auto Reg = mc2PseudoReg(Src0.getReg());
4072 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4073 if (!isSGPR(Reg, TRI))
4074 return true;
4075 ErrLoc = getRegLoc(Reg, Operands);
4076 } else {
4077 ErrLoc = getConstLoc(Operands);
4078 }
4079
4080 Error(ErrLoc, "source operand must be a VGPR");
4081 return false;
4082}
4083
4084bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4085 const OperandVector &Operands) {
4086
4087 const unsigned Opc = Inst.getOpcode();
4088
4089 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4090 return true;
4091
4092 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4093 assert(Src0Idx != -1);
4094
4095 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4096 if (!Src0.isReg())
4097 return true;
4098
4099 auto Reg = mc2PseudoReg(Src0.getReg());
4100 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4101 if (!isGFX90A() && isSGPR(Reg, TRI)) {
4102 Error(getRegLoc(Reg, Operands),
4103 "source operand must be either a VGPR or an inline constant");
4104 return false;
4105 }
4106
4107 return true;
4108}
4109
4110bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4111 const OperandVector &Operands) {
4112 unsigned Opcode = Inst.getOpcode();
4113 const MCInstrDesc &Desc = MII.get(Opcode);
4114
4115 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
4116 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4117 return true;
4118
4119 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4120 if (Src2Idx == -1)
4121 return true;
4122
4123 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
4124 Error(getConstLoc(Operands),
4125 "inline constants are not allowed for this operand");
4126 return false;
4127 }
4128
4129 return true;
4130}
4131
4132bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4133 const OperandVector &Operands) {
4134 const unsigned Opc = Inst.getOpcode();
4135 const MCInstrDesc &Desc = MII.get(Opc);
4136
4137 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
4138 return true;
4139
4140 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4141 if (Src2Idx == -1)
4142 return true;
4143
4144 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4145 if (!Src2.isReg())
4146 return true;
4147
4148 MCRegister Src2Reg = Src2.getReg();
4149 MCRegister DstReg = Inst.getOperand(0).getReg();
4150 if (Src2Reg == DstReg)
4151 return true;
4152
4153 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4154 if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128)
4155 return true;
4156
4157 if (TRI->regsOverlap(Src2Reg, DstReg)) {
4158 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
4159 "source 2 operand must not partially overlap with dst");
4160 return false;
4161 }
4162
4163 return true;
4164}
4165
4166bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4167 switch (Inst.getOpcode()) {
4168 default:
4169 return true;
4170 case V_DIV_SCALE_F32_gfx6_gfx7:
4171 case V_DIV_SCALE_F32_vi:
4172 case V_DIV_SCALE_F32_gfx10:
4173 case V_DIV_SCALE_F64_gfx6_gfx7:
4174 case V_DIV_SCALE_F64_vi:
4175 case V_DIV_SCALE_F64_gfx10:
4176 break;
4177 }
4178
4179 // TODO: Check that src0 = src1 or src2.
4180
4181 for (auto Name : {AMDGPU::OpName::src0_modifiers,
4182 AMDGPU::OpName::src2_modifiers,
4183 AMDGPU::OpName::src2_modifiers}) {
4185 .getImm() &
4187 return false;
4188 }
4189 }
4190
4191 return true;
4192}
4193
4194bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4195
4196 const unsigned Opc = Inst.getOpcode();
4197 const MCInstrDesc &Desc = MII.get(Opc);
4198
4199 if ((Desc.TSFlags & MIMGFlags) == 0)
4200 return true;
4201
4202 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4203 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
4204 if (isCI() || isSI())
4205 return false;
4206 }
4207
4208 return true;
4209}
4210
4211static bool IsRevOpcode(const unsigned Opcode)
4212{
4213 switch (Opcode) {
4214 case AMDGPU::V_SUBREV_F32_e32:
4215 case AMDGPU::V_SUBREV_F32_e64:
4216 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4217 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4218 case AMDGPU::V_SUBREV_F32_e32_vi:
4219 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4220 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4221 case AMDGPU::V_SUBREV_F32_e64_vi:
4222
4223 case AMDGPU::V_SUBREV_CO_U32_e32:
4224 case AMDGPU::V_SUBREV_CO_U32_e64:
4225 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4226 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4227
4228 case AMDGPU::V_SUBBREV_U32_e32:
4229 case AMDGPU::V_SUBBREV_U32_e64:
4230 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4231 case AMDGPU::V_SUBBREV_U32_e32_vi:
4232 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4233 case AMDGPU::V_SUBBREV_U32_e64_vi:
4234
4235 case AMDGPU::V_SUBREV_U32_e32:
4236 case AMDGPU::V_SUBREV_U32_e64:
4237 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4238 case AMDGPU::V_SUBREV_U32_e32_vi:
4239 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4240 case AMDGPU::V_SUBREV_U32_e64_vi:
4241
4242 case AMDGPU::V_SUBREV_F16_e32:
4243 case AMDGPU::V_SUBREV_F16_e64:
4244 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4245 case AMDGPU::V_SUBREV_F16_e32_vi:
4246 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4247 case AMDGPU::V_SUBREV_F16_e64_vi:
4248
4249 case AMDGPU::V_SUBREV_U16_e32:
4250 case AMDGPU::V_SUBREV_U16_e64:
4251 case AMDGPU::V_SUBREV_U16_e32_vi:
4252 case AMDGPU::V_SUBREV_U16_e64_vi:
4253
4254 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4255 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4256 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4257
4258 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4259 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4260
4261 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4262 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4263
4264 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4265 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4266
4267 case AMDGPU::V_LSHRREV_B32_e32:
4268 case AMDGPU::V_LSHRREV_B32_e64:
4269 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4270 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4271 case AMDGPU::V_LSHRREV_B32_e32_vi:
4272 case AMDGPU::V_LSHRREV_B32_e64_vi:
4273 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4274 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4275
4276 case AMDGPU::V_ASHRREV_I32_e32:
4277 case AMDGPU::V_ASHRREV_I32_e64:
4278 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4279 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4280 case AMDGPU::V_ASHRREV_I32_e32_vi:
4281 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4282 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4283 case AMDGPU::V_ASHRREV_I32_e64_vi:
4284
4285 case AMDGPU::V_LSHLREV_B32_e32:
4286 case AMDGPU::V_LSHLREV_B32_e64:
4287 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4288 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4289 case AMDGPU::V_LSHLREV_B32_e32_vi:
4290 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4291 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4292 case AMDGPU::V_LSHLREV_B32_e64_vi:
4293
4294 case AMDGPU::V_LSHLREV_B16_e32:
4295 case AMDGPU::V_LSHLREV_B16_e64:
4296 case AMDGPU::V_LSHLREV_B16_e32_vi:
4297 case AMDGPU::V_LSHLREV_B16_e64_vi:
4298 case AMDGPU::V_LSHLREV_B16_gfx10:
4299
4300 case AMDGPU::V_LSHRREV_B16_e32:
4301 case AMDGPU::V_LSHRREV_B16_e64:
4302 case AMDGPU::V_LSHRREV_B16_e32_vi:
4303 case AMDGPU::V_LSHRREV_B16_e64_vi:
4304 case AMDGPU::V_LSHRREV_B16_gfx10:
4305
4306 case AMDGPU::V_ASHRREV_I16_e32:
4307 case AMDGPU::V_ASHRREV_I16_e64:
4308 case AMDGPU::V_ASHRREV_I16_e32_vi:
4309 case AMDGPU::V_ASHRREV_I16_e64_vi:
4310 case AMDGPU::V_ASHRREV_I16_gfx10:
4311
4312 case AMDGPU::V_LSHLREV_B64_e64:
4313 case AMDGPU::V_LSHLREV_B64_gfx10:
4314 case AMDGPU::V_LSHLREV_B64_vi:
4315
4316 case AMDGPU::V_LSHRREV_B64_e64:
4317 case AMDGPU::V_LSHRREV_B64_gfx10:
4318 case AMDGPU::V_LSHRREV_B64_vi:
4319
4320 case AMDGPU::V_ASHRREV_I64_e64:
4321 case AMDGPU::V_ASHRREV_I64_gfx10:
4322 case AMDGPU::V_ASHRREV_I64_vi:
4323
4324 case AMDGPU::V_PK_LSHLREV_B16:
4325 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4326 case AMDGPU::V_PK_LSHLREV_B16_vi:
4327
4328 case AMDGPU::V_PK_LSHRREV_B16:
4329 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4330 case AMDGPU::V_PK_LSHRREV_B16_vi:
4331 case AMDGPU::V_PK_ASHRREV_I16:
4332 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4333 case AMDGPU::V_PK_ASHRREV_I16_vi:
4334 return true;
4335 default:
4336 return false;
4337 }
4338}
4339
4340std::optional<StringRef>
4341AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4342
4343 using namespace SIInstrFlags;
4344 const unsigned Opcode = Inst.getOpcode();
4345 const MCInstrDesc &Desc = MII.get(Opcode);
4346
4347 // lds_direct register is defined so that it can be used
4348 // with 9-bit operands only. Ignore encodings which do not accept these.
4349 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4350 if ((Desc.TSFlags & Enc) == 0)
4351 return std::nullopt;
4352
4353 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4354 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4355 if (SrcIdx == -1)
4356 break;
4357 const auto &Src = Inst.getOperand(SrcIdx);
4358 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4359
4360 if (isGFX90A() || isGFX11Plus())
4361 return StringRef("lds_direct is not supported on this GPU");
4362
4363 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4364 return StringRef("lds_direct cannot be used with this instruction");
4365
4366 if (SrcName != OpName::src0)
4367 return StringRef("lds_direct may be used as src0 only");
4368 }
4369 }
4370
4371 return std::nullopt;
4372}
4373
4374SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4375 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4376 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4377 if (Op.isFlatOffset())
4378 return Op.getStartLoc();
4379 }
4380 return getLoc();
4381}
4382
4383bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4384 const OperandVector &Operands) {
4385 auto Opcode = Inst.getOpcode();
4386 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4387 if (OpNum == -1)
4388 return true;
4389
4390 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4391 if ((TSFlags & SIInstrFlags::FLAT))
4392 return validateFlatOffset(Inst, Operands);
4393
4394 if ((TSFlags & SIInstrFlags::SMRD))
4395 return validateSMEMOffset(Inst, Operands);
4396
4397 const auto &Op = Inst.getOperand(OpNum);
4398 if (isGFX12Plus() &&
4399 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4400 const unsigned OffsetSize = 24;
4401 if (!isIntN(OffsetSize, Op.getImm())) {
4402 Error(getFlatOffsetLoc(Operands),
4403 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4404 return false;
4405 }
4406 } else {
4407 const unsigned OffsetSize = 16;
4408 if (!isUIntN(OffsetSize, Op.getImm())) {
4409 Error(getFlatOffsetLoc(Operands),
4410 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4411 return false;
4412 }
4413 }
4414 return true;
4415}
4416
4417bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4418 const OperandVector &Operands) {
4419 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4420 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4421 return true;
4422
4423 auto Opcode = Inst.getOpcode();
4424 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4425 assert(OpNum != -1);
4426
4427 const auto &Op = Inst.getOperand(OpNum);
4428 if (!hasFlatOffsets() && Op.getImm() != 0) {
4429 Error(getFlatOffsetLoc(Operands),
4430 "flat offset modifier is not supported on this GPU");
4431 return false;
4432 }
4433
4434 // For pre-GFX12 FLAT instructions the offset must be positive;
4435 // MSB is ignored and forced to zero.
4436 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4437 bool AllowNegative =
4439 isGFX12Plus();
4440 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4441 Error(getFlatOffsetLoc(Operands),
4442 Twine("expected a ") +
4443 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4444 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4445 return false;
4446 }
4447
4448 return true;
4449}
4450
4451SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4452 // Start with second operand because SMEM Offset cannot be dst or src0.
4453 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4454 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4455 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4456 return Op.getStartLoc();
4457 }
4458 return getLoc();
4459}
4460
4461bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4462 const OperandVector &Operands) {
4463 if (isCI() || isSI())
4464 return true;
4465
4466 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4467 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4468 return true;
4469
4470 auto Opcode = Inst.getOpcode();
4471 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4472 if (OpNum == -1)
4473 return true;
4474
4475 const auto &Op = Inst.getOperand(OpNum);
4476 if (!Op.isImm())
4477 return true;
4478
4479 uint64_t Offset = Op.getImm();
4480 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4483 return true;
4484
4485 Error(getSMEMOffsetLoc(Operands),
4486 isGFX12Plus() ? "expected a 24-bit signed offset"
4487 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4488 : "expected a 21-bit signed offset");
4489
4490 return false;
4491}
4492
4493bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4494 unsigned Opcode = Inst.getOpcode();
4495 const MCInstrDesc &Desc = MII.get(Opcode);
4496 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4497 return true;
4498
4499 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4500 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4501
4502 const int OpIndices[] = { Src0Idx, Src1Idx };
4503
4504 unsigned NumExprs = 0;
4505 unsigned NumLiterals = 0;
4507
4508 for (int OpIdx : OpIndices) {
4509 if (OpIdx == -1) break;
4510
4511 const MCOperand &MO = Inst.getOperand(OpIdx);
4512 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4513 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4514 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4515 uint32_t Value = static_cast<uint32_t>(MO.getImm());
4516 if (NumLiterals == 0 || LiteralValue != Value) {
4518 ++NumLiterals;
4519 }
4520 } else if (MO.isExpr()) {
4521 ++NumExprs;
4522 }
4523 }
4524 }
4525
4526 return NumLiterals + NumExprs <= 1;
4527}
4528
4529bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4530 const unsigned Opc = Inst.getOpcode();
4531 if (isPermlane16(Opc)) {
4532 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4533 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4534
4535 if (OpSel & ~3)
4536 return false;
4537 }
4538
4539 uint64_t TSFlags = MII.get(Opc).TSFlags;
4540
4541 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4542 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4543 if (OpSelIdx != -1) {
4544 if (Inst.getOperand(OpSelIdx).getImm() != 0)
4545 return false;
4546 }
4547 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4548 if (OpSelHiIdx != -1) {
4549 if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4550 return false;
4551 }
4552 }
4553
4554 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4555 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4556 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4557 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4558 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4559 if (OpSel & 3)
4560 return false;
4561 }
4562
4563 return true;
4564}
4565
4566bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, int OpName) {
4567 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
4568
4569 const unsigned Opc = Inst.getOpcode();
4570 uint64_t TSFlags = MII.get(Opc).TSFlags;
4571
4572 // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
4573 // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
4574 // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
4575 // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
4576 if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
4577 !(TSFlags & SIInstrFlags::IsSWMMAC))
4578 return true;
4579
4580 int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
4581 if (NegIdx == -1)
4582 return true;
4583
4584 unsigned Neg = Inst.getOperand(NegIdx).getImm();
4585
4586 // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
4587 // on some src operands but not allowed on other.
4588 // It is convenient that such instructions don't have src_modifiers operand
4589 // for src operands that don't allow neg because they also don't allow opsel.
4590
4591 int SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
4592 AMDGPU::OpName::src1_modifiers,
4593 AMDGPU::OpName::src2_modifiers};
4594
4595 for (unsigned i = 0; i < 3; ++i) {
4596 if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) {
4597 if (Neg & (1 << i))
4598 return false;
4599 }
4600 }
4601
4602 return true;
4603}
4604
4605bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4606 const OperandVector &Operands) {
4607 const unsigned Opc = Inst.getOpcode();
4608 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4609 if (DppCtrlIdx >= 0) {
4610 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4611
4612 if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl) &&
4613 AMDGPU::isDPALU_DPP(MII.get(Opc))) {
4614 // DP ALU DPP is supported for row_newbcast only on GFX9*
4615 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4616 Error(S, "DP ALU dpp only supports row_newbcast");
4617 return false;
4618 }
4619 }
4620
4621 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
4622 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
4623
4624 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
4625 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4626 if (Src1Idx >= 0) {
4627 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4628 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4629 if (Src1.isImm() ||
4630 (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI))) {
4631 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[Src1Idx]);
4632 Error(Op.getStartLoc(), "invalid operand for instruction");
4633 return false;
4634 }
4635 }
4636 }
4637
4638 return true;
4639}
4640
4641// Check if VCC register matches wavefront size
4642bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4643 auto FB = getFeatureBits();
4644 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4645 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4646}
4647
4648// One unique literal can be used. VOP3 literal is only allowed in GFX10+
4649bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4650 const OperandVector &Operands) {
4651 unsigned Opcode = Inst.getOpcode();
4652 const MCInstrDesc &Desc = MII.get(Opcode);
4653 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
4654 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4655 !HasMandatoryLiteral && !isVOPD(Opcode))
4656 return true;
4657
4658 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
4659
4660 unsigned NumExprs = 0;
4661 unsigned NumLiterals = 0;
4663
4664 for (int OpIdx : OpIndices) {
4665 if (OpIdx == -1)
4666 continue;
4667
4668 const MCOperand &MO = Inst.getOperand(OpIdx);
4669 if (!MO.isImm() && !MO.isExpr())
4670 continue;
4671 if (!isSISrcOperand(Desc, OpIdx))
4672 continue;
4673
4674 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4675 uint64_t Value = static_cast<uint64_t>(MO.getImm());
4676 bool IsFP64 = AMDGPU::isSISrcFPOperand(Desc, OpIdx) &&
4677 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
4678 bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
4679
4680 if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value)) {
4681 Error(getLitLoc(Operands), "invalid operand for instruction");
4682 return false;
4683 }
4684
4685 if (IsFP64 && IsValid32Op)
4686 Value = Hi_32(Value);
4687
4688 if (NumLiterals == 0 || LiteralValue != Value) {
4690 ++NumLiterals;
4691 }
4692 } else if (MO.isExpr()) {
4693 ++NumExprs;
4694 }
4695 }
4696 NumLiterals += NumExprs;
4697
4698 if (!NumLiterals)
4699 return true;
4700
4701 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4702 Error(getLitLoc(Operands), "literal operands are not supported");
4703 return false;
4704 }
4705
4706 if (NumLiterals > 1) {
4707 Error(getLitLoc(Operands, true), "only one unique literal operand is allowed");
4708 return false;
4709 }
4710
4711 return true;
4712}
4713
4714// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4715static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4716 const MCRegisterInfo *MRI) {
4717 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4718 if (OpIdx < 0)
4719 return -1;
4720
4721 const MCOperand &Op = Inst.getOperand(OpIdx);
4722 if (!Op.isReg())
4723 return -1;
4724
4725 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4726 auto Reg = Sub ? Sub : Op.getReg();
4727 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4728 return AGPR32.contains(Reg) ? 1 : 0;
4729}
4730
4731bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4732 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4733 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4735 SIInstrFlags::DS)) == 0)
4736 return true;
4737
4738 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4739 : AMDGPU::OpName::vdata;
4740
4741 const MCRegisterInfo *MRI = getMRI();
4742 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4743 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4744
4745 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4746 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4747 if (Data2Areg >= 0 && Data2Areg != DataAreg)
4748 return false;
4749 }
4750
4751 auto FB = getFeatureBits();
4752 if (FB[AMDGPU::FeatureGFX90AInsts]) {
4753 if (DataAreg < 0 || DstAreg < 0)
4754 return true;
4755 return DstAreg == DataAreg;
4756 }
4757
4758 return DstAreg < 1 && DataAreg < 1;
4759}
4760
4761bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4762 auto FB = getFeatureBits();
4763 if (!FB[AMDGPU::FeatureGFX90AInsts])
4764 return true;
4765
4766 const MCRegisterInfo *MRI = getMRI();
4767 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4768 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4769 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4770 const MCOperand &Op = Inst.getOperand(I);
4771 if (!Op.isReg())
4772 continue;
4773
4774 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4775 if (!Sub)
4776 continue;
4777
4778 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4779 return false;
4780 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4781 return false;
4782 }
4783
4784 return true;
4785}
4786
4787SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4788 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4789 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4790 if (Op.isBLGP())
4791 return Op.getStartLoc();
4792 }
4793 return SMLoc();
4794}
4795
4796bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4797 const OperandVector &Operands) {
4798 unsigned Opc = Inst.getOpcode();
4799 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4800 if (BlgpIdx == -1)
4801 return true;
4802 SMLoc BLGPLoc = getBLGPLoc(Operands);
4803 if (!BLGPLoc.isValid())
4804 return true;
4805 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
4806 auto FB = getFeatureBits();
4807 bool UsesNeg = false;
4808 if (FB[AMDGPU::FeatureGFX940Insts]) {
4809 switch (Opc) {
4810 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4811 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4812 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4813 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4814 UsesNeg = true;
4815 }
4816 }
4817
4818 if (IsNeg == UsesNeg)
4819 return true;
4820
4821 Error(BLGPLoc,
4822 UsesNeg ? "invalid modifier: blgp is not supported"
4823 : "invalid modifier: neg is not supported");
4824
4825 return false;
4826}
4827
4828bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
4829 const OperandVector &Operands) {
4830 if (!isGFX11Plus())
4831 return true;
4832
4833 unsigned Opc = Inst.getOpcode();
4834 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4835 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4836 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4837 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4838 return true;
4839
4840 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
4841 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
4842 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
4843 if (Reg == AMDGPU::SGPR_NULL)
4844 return true;
4845
4846 SMLoc RegLoc = getRegLoc(Reg, Operands);
4847 Error(RegLoc, "src0 must be null");
4848 return false;
4849}
4850
4851bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
4852 const OperandVector &Operands) {
4853 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4854 if ((TSFlags & SIInstrFlags::DS) == 0)
4855 return true;
4856 if (TSFlags & SIInstrFlags::GWS)
4857 return validateGWS(Inst, Operands);
4858 // Only validate GDS for non-GWS instructions.
4859 if (hasGDS())
4860 return true;
4861 int GDSIdx =
4862 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
4863 if (GDSIdx < 0)
4864 return true;
4865 unsigned GDS = Inst.getOperand(GDSIdx).getImm();
4866 if (GDS) {
4867 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
4868 Error(S, "gds modifier is not supported on this GPU");
4869 return false;
4870 }
4871 return true;
4872}
4873
4874// gfx90a has an undocumented limitation:
4875// DS_GWS opcodes must use even aligned registers.
4876bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4877 const OperandVector &Operands) {
4878 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4879 return true;
4880
4881 int Opc = Inst.getOpcode();
4882 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4883 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4884 return true;
4885
4886 const MCRegisterInfo *MRI = getMRI();
4887 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4888 int Data0Pos =
4889 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4890 assert(Data0Pos != -1);
4891 auto Reg = Inst.getOperand(Data0Pos).getReg();
4892 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4893 if (RegIdx & 1) {
4894 SMLoc RegLoc = getRegLoc(Reg, Operands);
4895 Error(RegLoc, "vgpr must be even aligned");
4896 return false;
4897 }
4898
4899 return true;
4900}
4901
4902bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4903 const OperandVector &Operands,
4904 const SMLoc &IDLoc) {
4905 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4906 AMDGPU::OpName::cpol);
4907 if (CPolPos == -1)
4908 return true;
4909
4910 unsigned CPol = Inst.getOperand(CPolPos).getImm();
4911
4912 if (isGFX12Plus())
4913 return validateTHAndScopeBits(Inst, Operands, CPol);
4914
4915 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4916 if (TSFlags & SIInstrFlags::SMRD) {
4917 if (CPol && (isSI() || isCI())) {
4918 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4919 Error(S, "cache policy is not supported for SMRD instructions");
4920 return false;
4921 }
4922 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
4923 Error(IDLoc, "invalid cache policy for SMEM instruction");
4924 return false;
4925 }
4926 }
4927
4928 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4929 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
4932 if (!(TSFlags & AllowSCCModifier)) {
4933 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4934 StringRef CStr(S.getPointer());
4935 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4936 Error(S,
4937 "scc modifier is not supported for this instruction on this GPU");
4938 return false;
4939 }
4940 }
4941
4943 return true;
4944
4945 if (TSFlags & SIInstrFlags::IsAtomicRet) {
4946 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4947 Error(IDLoc, isGFX940() ? "instruction must use sc0"
4948 : "instruction must use glc");
4949 return false;
4950 }
4951 } else {
4952 if (CPol & CPol::GLC) {
4953 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4954 StringRef CStr(S.getPointer());
4956 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4957 Error(S, isGFX940() ? "instruction must not use sc0"
4958 : "instruction must not use glc");
4959 return false;
4960 }
4961 }
4962
4963 return true;
4964}
4965
4966bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
4967 const OperandVector &Operands,
4968 const unsigned CPol) {
4969 const unsigned TH = CPol & AMDGPU::CPol::TH;
4970 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
4971
4972 const unsigned Opcode = Inst.getOpcode();
4973 const MCInstrDesc &TID = MII.get(Opcode);
4974
4975 auto PrintError = [&](StringRef Msg) {
4976 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4977 Error(S, Msg);
4978 return false;
4979 };
4980
4981 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
4984 return PrintError("instruction must use th:TH_ATOMIC_RETURN");
4985
4986 if (TH == 0)
4987 return true;
4988
4989 if ((TID.TSFlags & SIInstrFlags::SMRD) &&
4990 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
4991 (TH == AMDGPU::CPol::TH_NT_HT)))
4992 return PrintError("invalid th value for SMEM instruction");
4993
4994 if (TH == AMDGPU::CPol::TH_BYPASS) {
4995 if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
4997 (Scope == AMDGPU::CPol::SCOPE_SYS &&
4999 return PrintError("scope and th combination is not valid");
5000 }
5001
5002 bool IsStore = TID.mayStore();
5003 bool IsAtomic =
5005
5006 if (IsAtomic) {
5007 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5008 return PrintError("invalid th value for atomic instructions");
5009 } else if (IsStore) {
5010 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5011 return PrintError("invalid th value for store instructions");
5012 } else {
5013 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5014 return PrintError("invalid th value for load instructions");
5015 }
5016
5017 return true;
5018}
5019
5020bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) {
5021 if (!isGFX11Plus())
5022 return true;
5023 for (auto &Operand : Operands) {
5024 if (!Operand->isReg())
5025 continue;
5026 unsigned Reg = Operand->getReg();
5027 if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
5028 Error(getRegLoc(Reg, Operands),
5029 "execz and vccz are not supported on this GPU");
5030 return false;
5031 }
5032 }
5033 return true;
5034}
5035
5036bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5037 const OperandVector &Operands) {
5038 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5039 if (Desc.mayStore() &&
5041 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5042 if (Loc != getInstLoc(Operands)) {
5043 Error(Loc, "TFE modifier has no meaning for store instructions");
5044 return false;
5045 }
5046 }
5047
5048 return true;
5049}
5050
5051bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
5052 const SMLoc &IDLoc,
5053 const OperandVector &Operands) {
5054 if (auto ErrMsg = validateLdsDirect(Inst)) {
5055 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
5056 return false;
5057 }
5058 if (!validateSOPLiteral(Inst)) {
5059 Error(getLitLoc(Operands),
5060 "only one unique literal operand is allowed");
5061 return false;
5062 }
5063 if (!validateVOPLiteral(Inst, Operands)) {
5064 return false;
5065 }
5066 if (!validateConstantBusLimitations(Inst, Operands)) {
5067 return false;
5068 }
5069 if (!validateVOPDRegBankConstraints(Inst, Operands)) {
5070 return false;
5071 }
5072 if (!validateIntClampSupported(Inst)) {
5073 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
5074 "integer clamping is not supported on this GPU");
5075 return false;
5076 }
5077 if (!validateOpSel(Inst)) {
5078 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5079 "invalid op_sel operand");
5080 return false;
5081 }
5082 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5083 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5084 "invalid neg_lo operand");
5085 return false;
5086 }
5087 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5088 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5089 "invalid neg_hi operand");
5090 return false;
5091 }
5092 if (!validateDPP(Inst, Operands)) {
5093 return false;
5094 }
5095 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5096 if (!validateMIMGD16(Inst)) {
5097 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5098 "d16 modifier is not supported on this GPU");
5099 return false;
5100 }
5101 if (!validateMIMGMSAA(Inst)) {
5102 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5103 "invalid dim; must be MSAA type");
5104 return false;
5105 }
5106 if (!validateMIMGDataSize(Inst, IDLoc)) {
5107 return false;
5108 }
5109 if (!validateMIMGAddrSize(Inst, IDLoc))
5110 return false;
5111 if (!validateMIMGAtomicDMask(Inst)) {
5112 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5113 "invalid atomic image dmask");
5114 return false;
5115 }
5116 if (!validateMIMGGatherDMask(Inst)) {
5117 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5118 "invalid image_gather dmask: only one bit must be set");
5119 return false;
5120 }
5121 if (!validateMovrels(Inst, Operands)) {
5122 return false;
5123 }
5124 if (!validateOffset(Inst, Operands)) {
5125 return false;
5126 }
5127 if (!validateMAIAccWrite(Inst, Operands)) {
5128 return false;
5129 }
5130 if (!validateMAISrc2(Inst, Operands)) {
5131 return false;
5132 }
5133 if (!validateMFMA(Inst, Operands)) {
5134 return false;
5135 }
5136 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5137 return false;
5138 }
5139
5140 if (!validateAGPRLdSt(Inst)) {
5141 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5142 ? "invalid register class: data and dst should be all VGPR or AGPR"
5143 : "invalid register class: agpr loads and stores not supported on this GPU"
5144 );
5145 return false;
5146 }
5147 if (!validateVGPRAlign(Inst)) {
5148 Error(IDLoc,
5149 "invalid register class: vgpr tuples must be 64 bit aligned");
5150 return false;
5151 }
5152 if (!validateDS(Inst, Operands)) {
5153 return false;
5154 }
5155
5156 if (!validateBLGP(Inst, Operands)) {
5157 return false;
5158 }
5159
5160 if (!validateDivScale(Inst)) {
5161 Error(IDLoc, "ABS not allowed in VOP3B instructions");
5162 return false;
5163 }
5164 if (!validateWaitCnt(Inst, Operands)) {
5165 return false;
5166 }
5167 if (!validateExeczVcczOperands(Operands)) {
5168 return false;
5169 }
5170 if (!validateTFE(Inst, Operands)) {
5171 return false;
5172 }
5173
5174 return true;
5175}
5176
5178 const FeatureBitset &FBS,
5179 unsigned VariantID = 0);
5180
5181static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5182 const FeatureBitset &AvailableFeatures,
5183 unsigned VariantID);
5184
5185bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5186 const FeatureBitset &FBS) {
5187 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
5188}
5189
5190bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5191 const FeatureBitset &FBS,
5192 ArrayRef<unsigned> Variants) {
5193 for (auto Variant : Variants) {
5194 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
5195 return true;
5196 }
5197
5198 return false;
5199}
5200
5201bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5202 const SMLoc &IDLoc) {
5203 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5204
5205 // Check if requested instruction variant is supported.
5206 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5207 return false;
5208
5209 // This instruction is not supported.
5210 // Clear any other pending errors because they are no longer relevant.
5211 getParser().clearPendingErrors();
5212
5213 // Requested instruction variant is not supported.
5214 // Check if any other variants are supported.
5215 StringRef VariantName = getMatchedVariantName();
5216 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5217 return Error(IDLoc,
5218 Twine(VariantName,
5219 " variant of this instruction is not supported"));
5220 }
5221
5222 // Check if this instruction may be used with a different wavesize.
5223 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5224 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5225
5226 FeatureBitset FeaturesWS32 = getFeatureBits();
5227 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
5228 .flip(AMDGPU::FeatureWavefrontSize32);
5229 FeatureBitset AvailableFeaturesWS32 =
5230 ComputeAvailableFeatures(FeaturesWS32);
5231
5232 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5233 return Error(IDLoc, "instruction requires wavesize=32");
5234 }
5235
5236 // Finally check if this instruction is supported on any other GPU.
5237 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5238 return Error(IDLoc, "instruction not supported on this GPU");
5239 }
5240
5241 // Instruction not supported on any GPU. Probably a typo.
5242 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
5243 return Error(IDLoc, "invalid instruction" + Suggestion);
5244}
5245
5247 uint64_t InvalidOprIdx) {
5248 assert(InvalidOprIdx < Operands.size());
5249 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5250 if (Op.isToken() && InvalidOprIdx > 1) {
5251 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5252 return PrevOp.isToken() && PrevOp.getToken() == "::";
5253 }
5254 return false;
5255}
5256
5257bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5259 MCStreamer &Out,
5261 bool MatchingInlineAsm) {
5262 MCInst Inst;
5263 unsigned Result = Match_Success;
5264 for (auto Variant : getMatchedVariants()) {
5265 uint64_t EI;
5266 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5267 Variant);
5268 // We order match statuses from least to most specific. We use most specific
5269 // status as resulting
5270 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
5271 if ((R == Match_Success) ||
5272 (R == Match_PreferE32) ||
5273 (R == Match_MissingFeature && Result != Match_PreferE32) ||
5274 (R == Match_InvalidOperand && Result != Match_MissingFeature
5275 && Result != Match_PreferE32) ||
5276 (R == Match_MnemonicFail && Result != Match_InvalidOperand
5277 && Result != Match_MissingFeature
5278 && Result != Match_PreferE32)) {
5279 Result = R;
5280 ErrorInfo = EI;
5281 }
5282 if (R == Match_Success)
5283 break;
5284 }
5285
5286 if (Result == Match_Success) {
5287 if (!validateInstruction(Inst, IDLoc, Operands)) {
5288 return true;
5289 }
5290 Inst.setLoc(IDLoc);
5291 Out.emitInstruction(Inst, getSTI());
5292 return false;
5293 }
5294
5295 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5296 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5297 return true;
5298 }
5299
5300 switch (Result) {
5301 default: break;
5302 case Match_MissingFeature:
5303 // It has been verified that the specified instruction
5304 // mnemonic is valid. A match was found but it requires
5305 // features which are not supported on this GPU.
5306 return Error(IDLoc, "operands are not valid for this GPU or mode");
5307
5308 case Match_InvalidOperand: {
5309 SMLoc ErrorLoc = IDLoc;
5310 if (ErrorInfo != ~0ULL) {
5311 if (ErrorInfo >= Operands.size()) {
5312 return Error(IDLoc, "too few operands for instruction");
5313 }
5314 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5315 if (ErrorLoc == SMLoc())
5316 ErrorLoc = IDLoc;
5317
5319 return Error(ErrorLoc, "invalid VOPDY instruction");
5320 }
5321 return Error(ErrorLoc, "invalid operand for instruction");
5322 }
5323
5324 case Match_PreferE32:
5325 return Error(IDLoc, "internal error: instruction without _e64 suffix "
5326 "should be encoded as e32");
5327 case Match_MnemonicFail:
5328 llvm_unreachable("Invalid instructions should have been handled already");
5329 }
5330 llvm_unreachable("Implement any new match types added!");
5331}
5332
5333bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5334 int64_t Tmp = -1;
5335 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
5336 return true;
5337 }
5338 if (getParser().parseAbsoluteExpression(Tmp)) {
5339 return true;
5340 }
5341 Ret = static_cast<uint32_t>(Tmp);
5342 return false;
5343}
5344
5345bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5346 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
5347 return TokError("directive only supported for amdgcn architecture");
5348
5349 std::string TargetIDDirective;
5350 SMLoc TargetStart = getTok().getLoc();
5351 if (getParser().parseEscapedString(TargetIDDirective))
5352 return true;
5353
5354 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5355 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5356 return getParser().Error(TargetRange.Start,
5357 (Twine(".amdgcn_target directive's target id ") +
5358 Twine(TargetIDDirective) +
5359 Twine(" does not match the specified target id ") +
5360 Twine(getTargetStreamer().getTargetID()->toString())).str());
5361
5362 return false;
5363}
5364
5365bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5366 return Error(Range.Start, "value out of range", Range);
5367}
5368
5369bool AMDGPUAsmParser::calculateGPRBlocks(
5370 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
5371 bool XNACKUsed, std::optional<bool> EnableWavefrontSize32,
5372 unsigned NextFreeVGPR, SMRange VGPRRange, unsigned NextFreeSGPR,
5373 SMRange SGPRRange, unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
5374 // TODO(scott.linder): These calculations are duplicated from
5375 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
5376 IsaVersion Version = getIsaVersion(getSTI().getCPU());
5377
5378 unsigned NumVGPRs = NextFreeVGPR;
5379 unsigned NumSGPRs = NextFreeSGPR;
5380
5381 if (Version.Major >= 10)
5382 NumSGPRs = 0;
5383 else {
5384 unsigned MaxAddressableNumSGPRs =
5386
5387 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
5388 NumSGPRs > MaxAddressableNumSGPRs)
5389 return OutOfRangeError(SGPRRange);
5390
5391 NumSGPRs +=
5392 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
5393
5394 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
5395 NumSGPRs > MaxAddressableNumSGPRs)
5396 return OutOfRangeError(SGPRRange);
5397
5398 if (Features.test(FeatureSGPRInitBug))
5400 }
5401
5402 VGPRBlocks = IsaInfo::getEncodedNumVGPRBlocks(&getSTI(), NumVGPRs,
5403 EnableWavefrontSize32);
5404 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
5405
5406 return false;
5407}
5408
5409bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5410 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
5411 return TokError("directive only supported for amdgcn architecture");
5412
5413 if (!isHsaAbi(getSTI()))
5414 return TokError("directive only supported for amdhsa OS");
5415
5416 StringRef KernelName;
5417 if (getParser().parseIdentifier(KernelName))
5418 return true;
5419
5421
5422 StringSet<> Seen;
5423
5424 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
5425
5426 SMRange VGPRRange;
5427 uint64_t NextFreeVGPR = 0;
5428 uint64_t AccumOffset = 0;
5429 uint64_t SharedVGPRCount = 0;
5430 uint64_t PreloadLength = 0;
5431 uint64_t PreloadOffset = 0;
5432 SMRange SGPRRange;
5433 uint64_t NextFreeSGPR = 0;
5434
5435 // Count the number of user SGPRs implied from the enabled feature bits.
5436 unsigned ImpliedUserSGPRCount = 0;
5437
5438 // Track if the asm explicitly contains the directive for the user SGPR
5439 // count.
5440 std::optional<unsigned> ExplicitUserSGPRCount;
5441 bool ReserveVCC = true;
5442 bool ReserveFlatScr = true;
5443 std::optional<bool> EnableWavefrontSize32;
5444
5445 while (true) {
5446 while (trySkipToken(AsmToken::EndOfStatement));
5447
5448 StringRef ID;
5449 SMRange IDRange = getTok().getLocRange();
5450 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
5451 return true;
5452
5453 if (ID == ".end_amdhsa_kernel")
5454 break;
5455
5456 if (!Seen.insert(ID).second)
5457 return TokError(".amdhsa_ directives cannot be repeated");
5458
5459 SMLoc ValStart = getLoc();
5460 int64_t IVal;
5461 if (getParser().parseAbsoluteExpression(IVal))
5462 return true;
5463 SMLoc ValEnd = getLoc();
5464 SMRange ValRange = SMRange(ValStart, ValEnd);
5465
5466 if (IVal < 0)
5467 return OutOfRangeError(ValRange);
5468
5469 uint64_t Val = IVal;
5470
5471#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
5472 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
5473 return OutOfRangeError(RANGE); \
5474 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
5475
5476 if (ID == ".amdhsa_group_segment_fixed_size") {
5477 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
5478 return OutOfRangeError(ValRange);
5479 KD.group_segment_fixed_size = Val;
5480 } else if (ID == ".amdhsa_private_segment_fixed_size") {
5481 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
5482 return OutOfRangeError(ValRange);
5484 } else if (ID == ".amdhsa_kernarg_size") {
5485 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
5486 return OutOfRangeError(ValRange);
5487 KD.kernarg_size = Val;
5488 } else if (ID == ".amdhsa_user_sgpr_count") {
5489 ExplicitUserSGPRCount = Val;
5490 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
5492 return Error(IDRange.Start,
5493 "directive is not supported with architected flat scratch",
5494 IDRange);
5496 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5497 Val, ValRange);
5498 if (Val)
5499 ImpliedUserSGPRCount += 4;
5500 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
5501 if (!hasKernargPreload())
5502 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5503
5504 if (Val > getMaxNumUserSGPRs())
5505 return OutOfRangeError(ValRange);
5506 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, Val,
5507 ValRange);
5508 if (Val) {
5509 ImpliedUserSGPRCount += Val;
5510 PreloadLength = Val;
5511 }
5512 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
5513 if (!hasKernargPreload())
5514 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5515
5516 if (Val >= 1024)
5517 return OutOfRangeError(ValRange);
5518 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, Val,
5519 ValRange);
5520 if (Val)
5521 PreloadOffset = Val;
5522 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
5524 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
5525 ValRange);
5526 if (Val)
5527 ImpliedUserSGPRCount += 2;
5528 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
5530 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
5531 ValRange);
5532 if (Val)
5533 ImpliedUserSGPRCount += 2;
5534 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
5536 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
5537 Val, ValRange);
5538 if (Val)
5539 ImpliedUserSGPRCount += 2;
5540 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
5542 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
5543 ValRange);
5544 if (Val)
5545 ImpliedUserSGPRCount += 2;
5546 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
5548 return Error(IDRange.Start,
5549 "directive is not supported with architected flat scratch",
5550 IDRange);
5552 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
5553 ValRange);
5554 if (Val)
5555 ImpliedUserSGPRCount += 2;
5556 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
5558 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5559 Val, ValRange);
5560 if (Val)
5561 ImpliedUserSGPRCount += 1;
5562 } else if (ID == ".amdhsa_wavefront_size32") {
5563 if (IVersion.Major < 10)
5564 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5565 EnableWavefrontSize32 = Val;
5567 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
5568 Val, ValRange);
5569 } else if (ID == ".amdhsa_uses_dynamic_stack") {
5571 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange);
5572 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5574 return Error(IDRange.Start,
5575 "directive is not supported with architected flat scratch",
5576 IDRange);
5578 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5579 } else if (ID == ".amdhsa_enable_private_segment") {
5581 return Error(
5582 IDRange.Start,
5583 "directive is not supported without architected flat scratch",
5584 IDRange);
5586 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5587 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
5589 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
5590 ValRange);
5591 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
5593 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
5594 ValRange);
5595 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
5597 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
5598 ValRange);
5599 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
5601 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
5602 ValRange);
5603 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
5605 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
5606 ValRange);
5607 } else if (ID == ".amdhsa_next_free_vgpr") {
5608 VGPRRange = ValRange;
5609 NextFreeVGPR = Val;
5610 } else if (ID == ".amdhsa_next_free_sgpr") {
5611 SGPRRange = ValRange;
5612 NextFreeSGPR = Val;
5613 } else if (ID == ".amdhsa_accum_offset") {
5614 if (!isGFX90A())
5615 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5616 AccumOffset = Val;
5617 } else if (ID == ".amdhsa_reserve_vcc") {
5618 if (!isUInt<1>(Val))
5619 return OutOfRangeError(ValRange);
5620 ReserveVCC = Val;
5621 } else if (ID == ".amdhsa_reserve_flat_scratch") {
5622 if (IVersion.Major < 7)
5623 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
5625 return Error(IDRange.Start,
5626 "directive is not supported with architected flat scratch",
5627 IDRange);
5628 if (!isUInt<1>(Val))
5629 return OutOfRangeError(ValRange);
5630 ReserveFlatScr = Val;
5631 } else if (ID == ".amdhsa_reserve_xnack_mask") {
5632 if (IVersion.Major < 8)
5633 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
5634 if (!isUInt<1>(Val))
5635 return OutOfRangeError(ValRange);
5636 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5637 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
5638 IDRange);
5639 } else if (ID == ".amdhsa_float_round_mode_32") {
5641 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
5642 } else if (ID == ".amdhsa_float_round_mode_16_64") {
5644 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
5645 } else if (ID == ".amdhsa_float_denorm_mode_32") {
5647 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
5648 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5650 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
5651 ValRange);
5652 } else if (ID == ".amdhsa_dx10_clamp") {
5653 if (IVersion.Major >= 12)
5654 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
5656 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, Val,
5657 ValRange);
5658 } else if (ID == ".amdhsa_ieee_mode") {
5659 if (IVersion.Major >= 12)
5660 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
5662 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, Val,
5663 ValRange);
5664 } else if (ID == ".amdhsa_fp16_overflow") {
5665 if (IVersion.Major < 9)
5666 return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5667 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, Val,
5668 ValRange);
5669 } else if (ID == ".amdhsa_tg_split") {
5670 if (!isGFX90A())
5671 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5672 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
5673 ValRange);
5674 } else if (ID == ".amdhsa_workgroup_processor_mode") {
5675 if (IVersion.Major < 10)
5676 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5677 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, Val,
5678 ValRange);
5679 } else if (ID == ".amdhsa_memory_ordered") {
5680 if (IVersion.Major < 10)
5681 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5682 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, Val,
5683 ValRange);
5684 } else if (ID == ".amdhsa_forward_progress") {
5685 if (IVersion.Major < 10)
5686 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5687 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, Val,
5688 ValRange);
5689 } else if (ID == ".amdhsa_shared_vgpr_count") {
5690 if (IVersion.Major < 10 || IVersion.Major >= 12)
5691 return Error(IDRange.Start, "directive requires gfx10 or gfx11",
5692 IDRange);
5693 SharedVGPRCount = Val;
5695 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, Val,
5696 ValRange);
5697 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5700 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
5701 ValRange);
5702 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5704 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5705 Val, ValRange);
5706 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5709 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
5710 ValRange);
5711 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5713 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5714 Val, ValRange);
5715 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5717 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5718 Val, ValRange);
5719 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5721 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5722 Val, ValRange);
5723 } else if (ID == ".amdhsa_exception_int_div_zero") {
5725 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5726 Val, ValRange);
5727 } else if (ID == ".amdhsa_round_robin_scheduling") {
5728 if (IVersion.Major < 12)
5729 return Error(IDRange.Start, "directive requires gfx12+", IDRange);
5731 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, Val,
5732 ValRange);
5733 } else {
5734 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5735 }
5736
5737#undef PARSE_BITS_ENTRY
5738 }
5739
5740 if (!Seen.contains(".amdhsa_next_free_vgpr"))
5741 return TokError(".amdhsa_next_free_vgpr directive is required");
5742
5743 if (!Seen.contains(".amdhsa_next_free_sgpr"))
5744 return TokError(".amdhsa_next_free_sgpr directive is required");
5745
5746 unsigned VGPRBlocks;
5747 unsigned SGPRBlocks;
5748 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5749 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5750 EnableWavefrontSize32, NextFreeVGPR,
5751 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5752 SGPRBlocks))
5753 return true;
5754
5755 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5756 VGPRBlocks))
5757 return OutOfRangeError(VGPRRange);
5759 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
5760
5761 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5762 SGPRBlocks))
5763 return OutOfRangeError(SGPRRange);
5765 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
5766 SGPRBlocks);
5767
5768 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5769 return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5770 "enabled user SGPRs");
5771
5772 unsigned UserSGPRCount =
5773 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5774
5775 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5776 return TokError("too many user SGPRs enabled");
5777 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
5778 UserSGPRCount);
5779
5780 if (PreloadLength && KD.kernarg_size &&
5781 (PreloadLength * 4 + PreloadOffset * 4 > KD.kernarg_size))
5782 return TokError("Kernarg preload length + offset is larger than the "
5783 "kernarg segment size");
5784
5785 if (isGFX90A()) {
5786 if (!Seen.contains(".amdhsa_accum_offset"))
5787 return TokError(".amdhsa_accum_offset directive is required");
5788 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5789 return TokError("accum_offset should be in range [4..256] in "
5790 "increments of 4");
5791 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
5792 return TokError("accum_offset exceeds total VGPR allocation");
5793 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5794 (AccumOffset / 4 - 1));
5795 }
5796
5797 if (IVersion.Major >= 10 && IVersion.Major < 12) {
5798 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5799 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
5800 return TokError("shared_vgpr_count directive not valid on "
5801 "wavefront size 32");
5802 }
5803 if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5804 return TokError("shared_vgpr_count*2 + "
5805 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5806 "exceed 63\n");
5807 }
5808 }
5809
5810 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
5811 NextFreeVGPR, NextFreeSGPR,
5812 ReserveVCC, ReserveFlatScr);
5813 return false;
5814}
5815
5816bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
5818 if (ParseAsAbsoluteExpression(Version))
5819 return true;
5820
5821 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
5822 return false;
5823}
5824
5825bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5826 amd_kernel_code_t &Header) {
5827 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5828 // assembly for backwards compatibility.
5829 if (ID == "max_scratch_backing_memory_byte_size") {
5830 Parser.eatToEndOfStatement();
5831 return false;
5832 }
5833
5834 SmallString<40> ErrStr;
5835 raw_svector_ostream Err(ErrStr);
5836 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5837 return TokError(Err.str());
5838 }
5839 Lex();
5840
5841 if (ID == "enable_dx10_clamp") {
5842 if (G_00B848_DX10_CLAMP(Header.compute_pgm_resource_registers) &&
5843 isGFX12Plus())
5844 return TokError("enable_dx10_clamp=1 is not allowed on GFX12+");
5845 }
5846
5847 if (ID == "enable_ieee_mode") {
5848 if (G_00B848_IEEE_MODE(Header.compute_pgm_resource_registers) &&
5849 isGFX12Plus())
5850 return TokError("enable_ieee_mode=1 is not allowed on GFX12+");
5851 }
5852
5853 if (ID == "enable_wavefront_size32") {
5854 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5855 if (!isGFX10Plus())
5856 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5857 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5858 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5859 } else {
5860 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5861 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5862 }
5863 }
5864
5865 if (ID == "wavefront_size") {
5866 if (Header.wavefront_size == 5) {
5867 if (!isGFX10Plus())
5868 return TokError("wavefront_size=5 is only allowed on GFX10+");
5869 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5870 return TokError("wavefront_size=5 requires +WavefrontSize32");
5871 } else if (Header.wavefront_size == 6) {
5872 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5873 return TokError("wavefront_size=6 requires +WavefrontSize64");
5874 }
5875 }
5876
5877 if (ID == "enable_wgp_mode") {
5878 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5879 !isGFX10Plus())
5880 return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5881 }
5882
5883 if (ID == "enable_mem_ordered") {
5884 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5885 !isGFX10Plus())
5886 return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5887 }
5888
5889 if (ID == "enable_fwd_progress") {
5890 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5891 !isGFX10Plus())
5892 return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5893 }
5894
5895 return false;
5896}
5897
5898bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5899 amd_kernel_code_t Header;
5900 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5901
5902 while (true) {
5903 // Lex EndOfStatement. This is in a while loop, because lexing a comment
5904 // will set the current token to EndOfStatement.
5905 while(trySkipToken(AsmToken::EndOfStatement));
5906
5907 StringRef ID;
5908 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5909 return true;
5910
5911 if (ID == ".end_amd_kernel_code_t")
5912 break;
5913
5914 if (ParseAMDKernelCodeTValue(ID, Header))
5915 return true;
5916 }
5917
5918 getTargetStreamer().EmitAMDKernelCodeT(Header);
5919
5920 return false;
5921}
5922
5923bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5924 StringRef KernelName;
5925 if (!parseId(KernelName, "expected symbol name"))
5926 return true;
5927
5928 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5930
5931 KernelScope.initialize(getContext());
5932 return false;
5933}
5934
5935bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5936 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5937 return Error(getLoc(),
5938 ".amd_amdgpu_isa directive is not available on non-amdgcn "
5939 "architectures");
5940 }
5941
5942 auto TargetIDDirective = getLexer().getTok().getStringContents();
5943 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5944 return Error(getParser().getTok().getLoc(), "target id must match options");
5945
5946 getTargetStreamer().EmitISAVersion();
5947 Lex();
5948
5949 return false;
5950}
5951
5952bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5953 assert(isHsaAbi(getSTI()));
5954
5955 std::string HSAMetadataString;
5956 if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
5957 HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
5958 return true;
5959
5960 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5961 return Error(getLoc(), "invalid HSA metadata");
5962
5963 return false;
5964}
5965
5966/// Common code to parse out a block of text (typically YAML) between start and
5967/// end directives.
5968bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5969 const char *AssemblerDirectiveEnd,
5970 std::string &CollectString) {
5971
5972 raw_string_ostream CollectStream(CollectString);
5973
5974 getLexer().setSkipSpace(false);
5975
5976 bool FoundEnd = false;
5977 while (!isToken(AsmToken::Eof)) {
5978 while (isToken(AsmToken::Space)) {
5979 CollectStream << getTokenStr();
5980 Lex();
5981 }
5982
5983 if (trySkipId(AssemblerDirectiveEnd)) {
5984 FoundEnd = true;
5985 break;
5986 }
5987
5988 CollectStream << Parser.parseStringToEndOfStatement()
5989 << getContext().getAsmInfo()->getSeparatorString();
5990
5991 Parser.eatToEndOfStatement();
5992 }
5993
5994 getLexer().setSkipSpace(true);
5995
5996 if (isToken(AsmToken::Eof) && !FoundEnd) {
5997 return TokError(Twine("expected directive ") +
5998 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5999 }
6000
6001 CollectStream.flush();
6002 return false;
6003}
6004
6005/// Parse the assembler directive for new MsgPack-format PAL metadata.
6006bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6007 std::string String;
6008 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
6010 return true;
6011
6012 auto PALMetadata = getTargetStreamer().getPALMetadata();
6013 if (!PALMetadata->setFromString(String))
6014 return Error(getLoc(), "invalid PAL metadata");
6015 return false;
6016}
6017
6018/// Parse the assembler directive for old linear-format PAL metadata.
6019bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6020 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
6021 return Error(getLoc(),
6022 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
6023 "not available on non-amdpal OSes")).str());
6024 }
6025
6026 auto PALMetadata = getTargetStreamer().getPALMetadata();
6027 PALMetadata->setLegacy();
6028 for (;;) {
6030 if (ParseAsAbsoluteExpression(Key)) {
6031 return TokError(Twine("invalid value in ") +
6033 }
6034 if (!trySkipToken(AsmToken::Comma)) {
6035 return TokError(Twine("expected an even number of values in ") +
6037 }
6038 if (ParseAsAbsoluteExpression(Value)) {
6039 return TokError(Twine("invalid value in ") +
6041 }
6042 PALMetadata->setRegister(Key, Value);
6043 if (!trySkipToken(AsmToken::Comma))
6044 break;
6045 }
6046 return false;
6047}
6048
6049/// ParseDirectiveAMDGPULDS
6050/// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
6051bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6052 if (getParser().checkForValidSection())
6053 return true;
6054
6056 SMLoc NameLoc = getLoc();
6057 if (getParser().parseIdentifier(Name))
6058 return TokError("expected identifier in directive");
6059
6060 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
6061 if (getParser().parseComma())
6062 return true;
6063
6064 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
6065
6066 int64_t Size;
6067 SMLoc SizeLoc = getLoc();
6068 if (getParser().parseAbsoluteExpression(Size))
6069 return true;
6070 if (Size < 0)
6071 return Error(SizeLoc, "size must be non-negative");
6072 if (Size > LocalMemorySize)
6073 return Error(SizeLoc, "size is too large");
6074
6075 int64_t Alignment = 4;
6076 if (trySkipToken(AsmToken::Comma)) {
6077 SMLoc AlignLoc = getLoc();
6078 if (getParser().parseAbsoluteExpression(Alignment))
6079 return true;
6080 if (Alignment < 0 || !isPowerOf2_64(Alignment))
6081 return Error(AlignLoc, "alignment must be a power of two");
6082
6083 // Alignment larger than the size of LDS is possible in theory, as long
6084 // as the linker manages to place to symbol at address 0, but we do want
6085 // to make sure the alignment fits nicely into a 32-bit integer.
6086 if (Alignment >= 1u << 31)
6087 return Error(AlignLoc, "alignment is too large");
6088 }
6089
6090 if (parseEOL())
6091 return true;
6092
6093 Symbol->redefineIfPossible();
6094 if (!Symbol->isUndefined())
6095 return Error(NameLoc, "invalid symbol redefinition");
6096
6097 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
6098 return false;
6099}
6100
6101bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6102 StringRef IDVal = DirectiveID.getString();
6103
6104 if (isHsaAbi(getSTI())) {
6105 if (IDVal == ".amdhsa_kernel")
6106 return ParseDirectiveAMDHSAKernel();
6107
6108 if (IDVal == ".amdhsa_code_object_version")
6109 return ParseDirectiveAMDHSACodeObjectVersion();
6110
6111 // TODO: Restructure/combine with PAL metadata directive.
6113 return ParseDirectiveHSAMetadata();
6114 } else {
6115 if (IDVal == ".amd_kernel_code_t")
6116 return ParseDirectiveAMDKernelCodeT();
6117
6118 if (IDVal == ".amdgpu_hsa_kernel")
6119 return ParseDirectiveAMDGPUHsaKernel();
6120
6121 if (IDVal == ".amd_amdgpu_isa")
6122 return ParseDirectiveISAVersion();
6123
6125 return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
6126 Twine(" directive is "
6127 "not available on non-amdhsa OSes"))
6128 .str());
6129 }
6130 }
6131
6132 if (IDVal == ".amdgcn_target")
6133 return ParseDirectiveAMDGCNTarget();
6134
6135 if (IDVal == ".amdgpu_lds")
6136 return ParseDirectiveAMDGPULDS();
6137
6138 if (IDVal == PALMD::AssemblerDirectiveBegin)
6139 return ParseDirectivePALMetadataBegin();
6140
6141 if (IDVal == PALMD::AssemblerDirective)
6142 return ParseDirectivePALMetadata();
6143
6144 return true;
6145}
6146
6147bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
6148 unsigned RegNo) {
6149
6150 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
6151 return isGFX9Plus();
6152
6153 // GFX10+ has 2 more SGPRs 104 and 105.
6154 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
6155 return hasSGPR104_SGPR105();
6156
6157 switch (RegNo) {
6158 case AMDGPU::SRC_SHARED_BASE_LO:
6159 case AMDGPU::SRC_SHARED_BASE:
6160 case AMDGPU::SRC_SHARED_LIMIT_LO:
6161 case AMDGPU::SRC_SHARED_LIMIT:
6162 case AMDGPU::SRC_PRIVATE_BASE_LO:
6163 case AMDGPU::SRC_PRIVATE_BASE:
6164 case AMDGPU::SRC_PRIVATE_LIMIT_LO:
6165 case AMDGPU::SRC_PRIVATE_LIMIT:
6166 return isGFX9Plus();
6167 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
6168 return isGFX9Plus() && !isGFX11Plus();
6169 case AMDGPU::TBA:
6170 case AMDGPU::TBA_LO:
6171 case AMDGPU::TBA_HI:
6172 case AMDGPU::TMA:
6173 case AMDGPU::TMA_LO:
6174 case AMDGPU::TMA_HI:
6175 return !isGFX9Plus();
6176 case AMDGPU::XNACK_MASK:
6177 case AMDGPU::XNACK_MASK_LO:
6178 case AMDGPU::XNACK_MASK_HI:
6179 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6180 case AMDGPU::SGPR_NULL:
6181 return isGFX10Plus();
6182 default:
6183 break;
6184 }
6185
6186 if (isCI())
6187 return true;
6188
6189 if (isSI() || isGFX10Plus()) {
6190 // No flat_scr on SI.
6191 // On GFX10Plus flat scratch is not a valid register operand and can only be
6192 // accessed with s_setreg/s_getreg.
6193 switch (RegNo) {
6194 case AMDGPU::FLAT_SCR:
6195 case AMDGPU::FLAT_SCR_LO:
6196 case AMDGPU::FLAT_SCR_HI:
6197 return false;
6198 default:
6199 return true;
6200 }
6201 }
6202
6203 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
6204 // SI/CI have.
6205 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
6206 return hasSGPR102_SGPR103();
6207
6208 return true;
6209}
6210
6211ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
6212 StringRef Mnemonic,
6213 OperandMode Mode) {
6214 ParseStatus Res = parseVOPD(Operands);
6215 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6216 return Res;
6217
6218 // Try to parse with a custom parser
6219 Res = MatchOperandParserImpl(Operands, Mnemonic);
6220
6221 // If we successfully parsed the operand or if there as an error parsing,
6222 // we are done.
6223 //
6224 // If we are parsing after we reach EndOfStatement then this means we
6225 // are appending default values to the Operands list. This is only done
6226 // by custom parser, so we shouldn't continue on to the generic parsing.
6227 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6228 return Res;
6229
6230 SMLoc RBraceLoc;
6231 SMLoc LBraceLoc = getLoc();
6232 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
6233 unsigned Prefix = Operands.size();
6234
6235 for (;;) {
6236 auto Loc = getLoc();
6237 Res = parseReg(Operands);
6238 if (Res.isNoMatch())
6239 Error(Loc, "expected a register");
6240 if (!Res.isSuccess())
6241 return ParseStatus::Failure;
6242
6243 RBraceLoc = getLoc();
6244 if (trySkipToken(AsmToken::RBrac))
6245 break;
6246
6247 if (!skipToken(AsmToken::Comma,
6248 "expected a comma or a closing square bracket"))
6249 return ParseStatus::Failure;
6250 }
6251
6252 if (Operands.size() - Prefix > 1) {
6253 Operands.insert(Operands.begin() + Prefix,
6254 AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
6255 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
6256 }
6257
6258 return ParseStatus::Success;
6259 }
6260
6261 return parseRegOrImm(Operands);
6262}
6263
6264StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
6265 // Clear any forced encodings from the previous instruction.
6266 setForcedEncodingSize(0);
6267 setForcedDPP(false);
6268 setForcedSDWA(false);
6269
6270 if (Name.ends_with("_e64_dpp")) {
6271 setForcedDPP(true);
6272 setForcedEncodingSize(64);
6273 return Name.substr(0, Name.size() - 8);
6274 } else if (Name.ends_with("_e64")) {
6275 setForcedEncodingSize(64);
6276 return Name.substr(0, Name.size() - 4);
6277 } else if (Name.ends_with("_e32")) {
6278 setForcedEncodingSize(32);
6279 return Name.substr(0, Name.size() - 4);
6280 } else if (Name.ends_with("_dpp")) {
6281 setForcedDPP(true);
6282 return Name.substr(0, Name.size() - 4);
6283 } else if (Name.ends_with("_sdwa")) {
6284 setForcedSDWA(true);
6285 return Name.substr(0, Name.size() - 5);
6286 }
6287 return Name;
6288}
6289
6290static void applyMnemonicAliases(StringRef &Mnemonic,
6291 const FeatureBitset &Features,
6292 unsigned VariantID);
6293
6294bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
6296 SMLoc NameLoc, OperandVector &Operands) {
6297 // Add the instruction mnemonic
6298 Name = parseMnemonicSuffix(Name);
6299
6300 // If the target architecture uses MnemonicAlias, call it here to parse
6301 // operands correctly.
6302 applyMnemonicAliases(Name, getAvailableFeatures(), 0);
6303
6304 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
6305
6306 bool IsMIMG = Name.starts_with("image_");
6307
6308 while (!trySkipToken(AsmToken::EndOfStatement)) {
6309 OperandMode Mode = OperandMode_Default;
6310 if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
6311 Mode = OperandMode_NSA;
6312 ParseStatus Res = parseOperand(Operands, Name, Mode);
6313
6314 if (!Res.isSuccess()) {
6315 checkUnsupportedInstruction(Name, NameLoc);
6316 if (!Parser.hasPendingError()) {
6317 // FIXME: use real operand location rather than the current location.
6318 StringRef Msg = Res.isFailure() ? "failed parsing operand."
6319 : "not a valid operand.";
6320 Error(getLoc(), Msg);
6321 }
6322 while (!trySkipToken(AsmToken::EndOfStatement)) {
6323 lex();
6324 }
6325 return true;
6326 }
6327
6328 // Eat the comma or space if there is one.
6329 trySkipToken(AsmToken::Comma);
6330 }
6331
6332 return false;
6333}
6334
6335//===----------------------------------------------------------------------===//
6336// Utility functions
6337//===----------------------------------------------------------------------===//
6338
6339ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
6341 SMLoc S = getLoc();
6342 if (!trySkipId(Name))
6343 return ParseStatus::NoMatch;
6344
6345 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
6346 return ParseStatus::Success;
6347}
6348
6349ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
6350 int64_t &IntVal) {
6351
6352 if (!trySkipId(Prefix, AsmToken::Colon))
6353 return ParseStatus::NoMatch;
6354
6355 return parseExpr(IntVal) ? ParseStatus::Success : ParseStatus::Failure;
6356}
6357
6358ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
6359 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6360 std::function<bool(int64_t &)> ConvertResult) {
6361 SMLoc S = getLoc();
6362 int64_t Value = 0;
6363
6364 ParseStatus Res = parseIntWithPrefix(Prefix, Value);
6365 if (!Res.isSuccess())
6366 return Res;
6367
6368 if (ConvertResult && !ConvertResult(Value)) {
6369 Error(S, "invalid " + StringRef(Prefix) + " value.");
6370 }
6371
6372 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
6373 return ParseStatus::Success;
6374}
6375
6376ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
6377 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6378 bool (*ConvertResult)(int64_t &)) {
6379 SMLoc S = getLoc();
6380 if (!trySkipId(Prefix, AsmToken::Colon))
6381 return ParseStatus::NoMatch;
6382
6383 if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
6384 return ParseStatus::Failure;
6385
6386 unsigned Val = 0;
6387 const unsigned MaxSize = 4;
6388
6389 // FIXME: How to verify the number of elements matches the number of src
6390 // operands?
6391 for (int I = 0; ; ++I) {
6392 int64_t Op;
6393 SMLoc Loc = getLoc();
6394 if (!parseExpr(Op))
6395 return ParseStatus::Failure;
6396
6397 if (Op != 0 && Op != 1)
6398 return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
6399
6400 Val |= (Op << I);
6401
6402 if (trySkipToken(AsmToken::RBrac))
6403 break;
6404
6405 if (I + 1 == MaxSize)
6406 return Error(getLoc(), "expected a closing square bracket");
6407
6408 if (!skipToken(AsmToken::Comma, "expected a comma"))
6409 return ParseStatus::Failure;
6410 }
6411
6412 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
6413 return ParseStatus::Success;
6414}
6415
6416ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
6418 AMDGPUOperand::ImmTy ImmTy) {
6419 int64_t Bit;
6420 SMLoc S = getLoc();
6421
6422 if (trySkipId(Name)) {
6423 Bit = 1;
6424 } else if (trySkipId("no", Name)) {
6425 Bit = 0;
6426 } else {
6427 return ParseStatus::NoMatch;
6428 }
6429
6430 if (Name == "r128" && !hasMIMG_R128())
6431 return Error(S, "r128 modifier is not supported on this GPU");
6432 if (Name == "a16" && !hasA16())
6433 return Error(S, "a16 modifier is not supported on this GPU");
6434
6435 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
6436 ImmTy = AMDGPUOperand::ImmTyR128A16;
6437
6438 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
6439 return ParseStatus::Success;
6440}
6441
6442unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
6443 bool &Disabling) const {
6444 Disabling = Id.consume_front("no");
6445
6446 if (isGFX940() && !Mnemo.starts_with("s_")) {
6447 return StringSwitch<unsigned>(Id)
6448 .Case("nt", AMDGPU::CPol::NT)
6449 .Case("sc0", AMDGPU::CPol::SC0)
6450 .Case("sc1", AMDGPU::CPol::SC1)
6451 .Default(0);
6452 }
6453
6454 return StringSwitch<unsigned>(Id)
6455 .Case("dlc", AMDGPU::CPol::DLC)
6456 .Case("glc", AMDGPU::CPol::GLC)
6457 .Case("scc", AMDGPU::CPol::SCC)
6458 .Case("slc", AMDGPU::CPol::SLC)
6459 .Default(0);
6460}
6461
6462ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
6463 if (isGFX12Plus()) {
6464 SMLoc StringLoc = getLoc();
6465
6466 int64_t CPolVal = 0;
6469
6470 for (;;) {
6471 if (ResTH.isNoMatch()) {
6472 int64_t TH;
6473 ResTH = parseTH(Operands, TH);
6474 if (ResTH.isFailure())
6475 return ResTH;
6476 if (ResTH.isSuccess()) {
6477 CPolVal |= TH;
6478 continue;
6479 }
6480 }
6481
6482 if (ResScope.isNoMatch()) {
6483 int64_t Scope;
6484 ResScope = parseScope(Operands, Scope);
6485 if (ResScope.isFailure())
6486 return ResScope;
6487 if (ResScope.isSuccess()) {
6488 CPolVal |= Scope;
6489 continue;
6490 }
6491 }
6492
6493 break;
6494 }
6495
6496 if (ResTH.isNoMatch() && ResScope.isNoMatch())
6497 return ParseStatus::NoMatch;
6498
6499 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
6500 AMDGPUOperand::ImmTyCPol));
6501 return ParseStatus::Success;
6502 }
6503
6504 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
6505 SMLoc OpLoc = getLoc();
6506 unsigned Enabled = 0, Seen = 0;
6507 for (;;) {
6508 SMLoc S = getLoc();
6509 bool Disabling;
6510 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
6511 if (!CPol)
6512 break;
6513
6514 lex();
6515
6516 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
6517 return Error(S, "dlc modifier is not supported on this GPU");
6518
6519 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
6520 return Error(S, "scc modifier is not supported on this GPU");
6521
6522 if (Seen & CPol)
6523 return Error(S, "duplicate cache policy modifier");
6524
6525 if (!Disabling)
6526 Enabled |= CPol;
6527
6528 Seen |= CPol;
6529 }
6530
6531 if (!Seen)
6532 return ParseStatus::NoMatch;
6533
6534 Operands.push_back(
6535 AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
6536 return ParseStatus::Success;
6537}
6538
6539ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
6540 int64_t &Scope) {
6541 Scope = AMDGPU::CPol::SCOPE_CU; // default;
6542
6544 SMLoc StringLoc;
6545 ParseStatus Res;
6546
6547 Res = parseStringWithPrefix("scope", Value, StringLoc);
6548 if (!Res.isSuccess())
6549 return Res;
6550
6552 .Case("SCOPE_CU", AMDGPU::CPol::SCOPE_CU)
6553 .Case("SCOPE_SE", AMDGPU::CPol::SCOPE_SE)
6554 .Case("SCOPE_DEV", AMDGPU::CPol::SCOPE_DEV)
6555 .Case("SCOPE_SYS", AMDGPU::CPol::SCOPE_SYS)
6556 .Default(0xffffffff);
6557
6558 if (Scope == 0xffffffff)
6559 return Error(StringLoc, "invalid scope value");
6560
6561 return ParseStatus::Success;
6562}
6563
6564ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
6565 TH = AMDGPU::CPol::TH_RT; // default
6566
6568 SMLoc StringLoc;
6569 ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
6570 if (!Res.isSuccess())
6571 return Res;
6572
6573 if (Value == "TH_DEFAULT")
6575 else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_RT_WB" ||
6576 Value == "TH_LOAD_NT_WB") {
6577 return Error(StringLoc, "invalid th value");
6578 } else if (Value.consume_front("TH_ATOMIC_")) {
6580 } else if (Value.consume_front("TH_LOAD_")) {
6582 } else if (Value.consume_front("TH_STORE_")) {
6584 } else {
6585 return Error(StringLoc, "invalid th value");
6586 }
6587
6588 if (Value == "BYPASS")
6590
6591 if (TH != 0) {
6598 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
6601 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
6603 .Default(0xffffffff);
6604 else
6610 .Case("RT_WB", AMDGPU::CPol::TH_RT_WB)
6611 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
6612 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
6613 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
6614 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
6615 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
6616 .Default(0xffffffff);
6617 }
6618
6619 if (TH == 0xffffffff)
6620 return Error(StringLoc, "invalid th value");
6621
6622 return ParseStatus::Success;
6623}
6624
6626 MCInst& Inst, const OperandVector& Operands,
6627 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
6628 AMDGPUOperand::ImmTy ImmT,
6629 int64_t Default = 0) {
6630 auto i = OptionalIdx.find(ImmT);
6631 if (i != OptionalIdx.end()) {
6632 unsigned Idx = i->second;
6633 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
6634 } else {
6636 }
6637}
6638
6639ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
6641 SMLoc &StringLoc) {
6642 if (!trySkipId(Prefix, AsmToken::Colon))
6643 return ParseStatus::NoMatch;
6644
6645 StringLoc = getLoc();
6646 return parseId(Value, "expected an identifier") ? ParseStatus::Success
6648}
6649
6650//===----------------------------------------------------------------------===//
6651// MTBUF format
6652//===----------------------------------------------------------------------===//
6653
6654bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
6655 int64_t MaxVal,
6656 int64_t &Fmt) {
6657 int64_t Val;
6658 SMLoc Loc = getLoc();
6659
6660 auto Res = parseIntWithPrefix(Pref, Val);
6661 if (Res.isFailure())
6662 return false;
6663 if (Res.isNoMatch())
6664 return true;
6665
6666 if (Val < 0 || Val > MaxVal) {
6667 Error(Loc, Twine("out of range ", StringRef(Pref)));
6668 return false;
6669 }
6670
6671 Fmt = Val;
6672 return true;
6673}
6674
6675ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
6676 AMDGPUOperand::ImmTy ImmTy) {
6677 const char *Pref = "index_key";
6678 int64_t ImmVal = 0;
6679 SMLoc Loc = getLoc();
6680 auto Res = parseIntWithPrefix(Pref, ImmVal);
6681 if (!Res.isSuccess())
6682 return Res;
6683
6684 if (ImmTy == AMDGPUOperand::ImmTyIndexKey16bit && (ImmVal < 0 || ImmVal > 1))
6685 return Error(Loc, Twine("out of range ", StringRef(Pref)));
6686
6687 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
6688 return Error(Loc, Twine("out of range ", StringRef(Pref)));
6689
6690 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy));
6691 return ParseStatus::Success;
6692}
6693
6694ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
6695 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
6696}
6697
6698ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
6699 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
6700}
6701
6702// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
6703// values to live in a joint format operand in the MCInst encoding.
6704ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
6705 using namespace llvm::AMDGPU::MTBUFFormat;
6706
6707 int64_t Dfmt = DFMT_UNDEF;
6708 int64_t Nfmt = NFMT_UNDEF;
6709
6710 // dfmt and nfmt can appear in either order, and each is optional.
6711 for (int I = 0; I < 2; ++I) {
6712 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
6713 return ParseStatus::Failure;
6714
6715 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
6716 return ParseStatus::Failure;
6717
6718 // Skip optional comma between dfmt/nfmt
6719 // but guard against 2 commas following each other.
6720 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
6721 !peekToken().is(AsmToken::Comma)) {
6722 trySkipToken(AsmToken::Comma);
6723 }
6724 }
6725
6726 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
6727 return ParseStatus::NoMatch;
6728
6729 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6730 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6731
6732 Format = encodeDfmtNfmt(Dfmt, Nfmt);
6733 return ParseStatus::Success;
6734}
6735
6736ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
6737 using namespace llvm::AMDGPU::MTBUFFormat;
6738
6739 int64_t Fmt = UFMT_UNDEF;
6740
6741 if (!tryParseFmt("format", UFMT_MAX, Fmt))
6742 return ParseStatus::Failure;
6743
6744 if (Fmt == UFMT_UNDEF)
6745 return ParseStatus::NoMatch;
6746
6747 Format = Fmt;
6748 return ParseStatus::Success;
6749}
6750
6751bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6752 int64_t &Nfmt,
6753 StringRef FormatStr,
6754 SMLoc Loc) {
6755 using namespace llvm::AMDGPU::MTBUFFormat;
6756 int64_t Format;
6757
6758 Format = getDfmt(FormatStr);
6759 if (Format != DFMT_UNDEF) {
6760 Dfmt = Format;
6761 return true;
6762 }
6763
6764 Format = getNfmt(FormatStr, getSTI());
6765 if (Format != NFMT_UNDEF) {
6766 Nfmt = Format;
6767 return true;
6768 }
6769
6770 Error(Loc, "unsupported format");
6771 return false;
6772}
6773
6774ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
6775 SMLoc FormatLoc,
6776 int64_t &Format) {
6777 using namespace llvm::AMDGPU::MTBUFFormat;
6778
6779 int64_t Dfmt = DFMT_UNDEF;
6780 int64_t Nfmt = NFMT_UNDEF;
6781 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6782 return ParseStatus::Failure;
6783
6784 if (trySkipToken(AsmToken::Comma)) {
6785 StringRef Str;
6786 SMLoc Loc = getLoc();
6787 if (!parseId(Str, "expected a format string") ||
6788 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
6789 return ParseStatus::Failure;
6790 if (Dfmt == DFMT_UNDEF)
6791 return Error(Loc, "duplicate numeric format");
6792 if (Nfmt == NFMT_UNDEF)
6793 return Error(Loc, "duplicate data format");
6794 }
6795
6796 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6797 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6798
6799 if (isGFX10Plus()) {
6800 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
6801 if (Ufmt == UFMT_UNDEF)
6802 return Error(FormatLoc, "unsupported format");
6803 Format = Ufmt;
6804 } else {
6805 Format = encodeDfmtNfmt(Dfmt, Nfmt);
6806 }
6807
6808 return ParseStatus::Success;
6809}
6810
6811ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
6812 SMLoc Loc,
6813 int64_t &Format) {
6814 using namespace llvm::AMDGPU::MTBUFFormat;
6815
6816 auto Id = getUnifiedFormat(FormatStr, getSTI());
6817 if (Id == UFMT_UNDEF)
6818 return ParseStatus::NoMatch;
6819
6820 if (!isGFX10Plus())
6821 return Error(Loc, "unified format is not supported on this GPU");
6822
6823 Format = Id;
6824 return ParseStatus::Success;
6825}
6826
6827ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6828 using namespace llvm::AMDGPU::MTBUFFormat;
6829 SMLoc Loc = getLoc();
6830
6831 if (!parseExpr(Format))
6832 return ParseStatus::Failure;
6833 if (!isValidFormatEncoding(Format, getSTI()))
6834 return Error(Loc, "out of range format");
6835
6836 return ParseStatus::Success;
6837}
6838
6839ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6840 using namespace llvm::AMDGPU::MTBUFFormat;
6841
6842 if (!trySkipId("format", AsmToken::Colon))
6843 return ParseStatus::NoMatch;
6844
6845 if (trySkipToken(AsmToken::LBrac)) {
6846 StringRef FormatStr;
6847 SMLoc Loc = getLoc();
6848 if (!parseId(FormatStr, "expected a format string"))
6849 return ParseStatus::Failure;
6850
6851 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6852 if (Res.isNoMatch())
6853 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6854 if (!Res.isSuccess())
6855 return Res;
6856
6857 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
6858 return ParseStatus::Failure;
6859
6860 return ParseStatus::Success;
6861 }
6862
6863 return parseNumericFormat(Format);
6864}
6865
6866ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
6867 using namespace llvm::AMDGPU::MTBUFFormat;
6868
6869 int64_t Format = getDefaultFormatEncoding(getSTI());
6870 ParseStatus Res;
6871 SMLoc Loc = getLoc();
6872
6873 // Parse legacy format syntax.
6874 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
6875 if (Res.isFailure())
6876 return Res;
6877
6878 bool FormatFound = Res.isSuccess();
6879
6880 Operands.push_back(
6881 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6882
6883 if (FormatFound)
6884 trySkipToken(AsmToken::Comma);
6885
6886 if (isToken(AsmToken::EndOfStatement)) {
6887 // We are expecting an soffset operand,
6888 // but let matcher handle the error.
6889 return ParseStatus::Success;
6890 }
6891
6892 // Parse soffset.
6893 Res = parseRegOrImm(Operands);
6894 if (!Res.isSuccess())
6895 return Res;
6896
6897 trySkipToken(AsmToken::Comma);
6898
6899 if (!FormatFound) {
6900 Res = parseSymbolicOrNumericFormat(Format);
6901 if (Res.isFailure())
6902 return Res;
6903 if (Res.isSuccess()) {
6904 auto Size = Operands.size();
6905 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
6906 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6907 Op.setImm(Format);
6908 }
6909 return ParseStatus::Success;
6910 }
6911
6912 if (isId("format") && peekToken().is(AsmToken::Colon))
6913 return Error(getLoc(), "duplicate format");
6914 return ParseStatus::Success;
6915}
6916
6917ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
6918 ParseStatus Res =
6919 parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
6920 if (Res.isNoMatch()) {
6921 Res = parseIntWithPrefix("inst_offset", Operands,
6922 AMDGPUOperand::ImmTyInstOffset);
6923 }
6924 return Res;
6925}
6926
6927ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
6928 ParseStatus Res =
6929 parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
6930 if (Res.isNoMatch())
6931 Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
6932 return Res;
6933}
6934
6935ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
6936 ParseStatus Res =
6937 parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
6938 if (Res.isNoMatch()) {
6939 Res =
6940 parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
6941 }
6942 return Res;
6943}
6944
6945//===----------------------------------------------------------------------===//
6946// Exp
6947//===----------------------------------------------------------------------===//
6948
6949void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
6950 OptionalImmIndexMap OptionalIdx;
6951
6952 unsigned OperandIdx[4];
6953 unsigned EnMask = 0;
6954 int SrcIdx = 0;
6955
6956 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6957 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6958
6959 // Add the register arguments
6960 if (Op.isReg()) {
6961 assert(SrcIdx < 4);
6962 OperandIdx[SrcIdx] = Inst.size();
6963 Op.addRegOperands(Inst, 1);
6964 ++SrcIdx;
6965 continue;
6966 }
6967
6968 if (Op.isOff()) {
6969 assert(SrcIdx < 4);
6970 OperandIdx[SrcIdx] = Inst.size();
6971 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
6972 ++SrcIdx;
6973 continue;
6974 }
6975
6976 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
6977 Op.addImmOperands(Inst, 1);
6978 continue;
6979 }
6980
6981 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
6982 continue;
6983
6984 // Handle optional arguments
6985 OptionalIdx[Op.getImmTy()] = i;
6986 }
6987
6988 assert(SrcIdx == 4);
6989
6990 bool Compr = false;
6991 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
6992 Compr = true;
6993 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
6994 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
6995 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
6996 }
6997
6998 for (auto i = 0; i < SrcIdx; ++i) {
6999 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
7000 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7001 }
7002 }
7003
7004 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
7005 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
7006
7007 Inst.addOperand(MCOperand::createImm(EnMask));
7008}
7009
7010//===----------------------------------------------------------------------===//
7011// s_waitcnt
7012//===----------------------------------------------------------------------===//
7013
7014static bool
7016 const AMDGPU::IsaVersion ISA,
7017 int64_t &IntVal,
7018 int64_t CntVal,
7019 bool Saturate,
7020 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
7021 unsigned (*decode)(const IsaVersion &Version, unsigned))
7022{
7023 bool Failed = false;
7024
7025 IntVal = encode(ISA, IntVal, CntVal);
7026 if (CntVal != decode(ISA, IntVal)) {
7027 if (Saturate) {
7028 IntVal = encode(ISA, IntVal, -1);
7029 } else {
7030 Failed = true;
7031 }
7032 }
7033 return Failed;
7034}
7035
7036bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7037
7038 SMLoc CntLoc = getLoc();
7039 StringRef CntName = getTokenStr();
7040
7041 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7042 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7043 return false;
7044
7045 int64_t CntVal;
7046 SMLoc ValLoc = getLoc();
7047 if (!parseExpr(CntVal))
7048 return false;
7049
7051
7052 bool Failed = true;
7053 bool Sat = CntName.ends_with("_sat");
7054
7055 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
7056 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
7057 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
7058 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
7059 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
7060 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
7061 } else {
7062 Error(CntLoc, "invalid counter name " + CntName);
7063 return false;
7064 }
7065
7066 if (Failed) {
7067 Error(ValLoc, "too large value for " + CntName);
7068 return false;
7069 }
7070
7071 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7072 return false;
7073
7074 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7075 if (isToken(AsmToken::EndOfStatement)) {
7076 Error(getLoc(), "expected a counter name");
7077 return false;
7078 }
7079 }
7080
7081 return true;
7082}
7083
7084ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
7086 int64_t Waitcnt = getWaitcntBitMask(ISA);
7087 SMLoc S = getLoc();
7088
7089 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7090 while (!isToken(AsmToken::EndOfStatement)) {
7091 if (!parseCnt(Waitcnt))
7092 return ParseStatus::Failure;
7093 }
7094 } else {
7095 if (!parseExpr(Waitcnt))
7096 return ParseStatus::Failure;
7097 }
7098
7099 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
7100 return ParseStatus::Success;
7101}
7102
7103bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7104 SMLoc FieldLoc = getLoc();
7105 StringRef FieldName = getTokenStr();
7106 if (!skipToken(AsmToken::Identifier, "expected a field name") ||
7107 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7108 return false;
7109
7110 SMLoc ValueLoc = getLoc();
7111 StringRef ValueName = getTokenStr();
7112 if (!skipToken(AsmToken::Identifier, "expected a value name") ||
7113 !skipToken(AsmToken::RParen, "expected a right parenthesis"))
7114 return false;
7115
7116 unsigned Shift;
7117 if (FieldName == "instid0") {
7118 Shift = 0;
7119 } else if (FieldName == "instskip") {
7120 Shift = 4;
7121 } else if (FieldName == "instid1") {
7122 Shift = 7;
7123 } else {
7124 Error(FieldLoc, "invalid field name " + FieldName);
7125 return false;
7126 }
7127
7128 int Value;
7129 if (Shift == 4) {
7130 // Parse values for instskip.
7132 .Case("SAME", 0)
7133 .Case("NEXT", 1)
7134 .Case("SKIP_1", 2)
7135 .Case("SKIP_2", 3)
7136 .Case("SKIP_3", 4)
7137 .Case("SKIP_4", 5)
7138 .Default(-1);
7139 } else {
7140 // Parse values for instid0 and instid1.
7142 .Case("NO_DEP", 0)
7143 .Case("VALU_DEP_1", 1)
7144 .Case("VALU_DEP_2", 2)
7145 .Case("VALU_DEP_3", 3)
7146 .Case("VALU_DEP_4", 4)
7147 .Case("TRANS32_DEP_1", 5)
7148 .Case("TRANS32_DEP_2", 6)
7149 .Case("TRANS32_DEP_3", 7)
7150 .Case("FMA_ACCUM_CYCLE_1", 8)
7151 .Case("SALU_CYCLE_1", 9)
7152 .Case("SALU_CYCLE_2", 10)
7153 .Case("SALU_CYCLE_3", 11)
7154 .Default(-1);
7155 }
7156 if (Value < 0) {
7157 Error(ValueLoc, "invalid value name " + ValueName);
7158 return false;
7159 }
7160
7161 Delay |= Value << Shift;
7162 return true;
7163}
7164
7165ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
7166 int64_t Delay = 0;
7167 SMLoc S = getLoc();
7168
7169 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7170 do {
7171 if (!parseDelay(Delay))
7172 return ParseStatus::Failure;
7173 } while (trySkipToken(AsmToken::Pipe));
7174 } else {
7175 if (!parseExpr(Delay))
7176 return ParseStatus::Failure;
7177 }
7178
7179 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
7180 return ParseStatus::Success;
7181}
7182
7183bool
7184AMDGPUOperand::isSWaitCnt() const {
7185 return isImm();
7186}
7187
7188bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
7189
7190//===----------------------------------------------------------------------===//
7191// DepCtr
7192//===----------------------------------------------------------------------===//
7193
7194void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
7195 StringRef DepCtrName) {
7196 switch (ErrorId) {
7197 case OPR_ID_UNKNOWN:
7198 Error(Loc, Twine("invalid counter name ", DepCtrName));
7199 return;
7200 case OPR_ID_UNSUPPORTED:
7201 Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
7202 return;
7203 case OPR_ID_DUPLICATE:
7204 Error(Loc, Twine("duplicate counter name ", DepCtrName));
7205 return;
7206 case OPR_VAL_INVALID:
7207 Error(Loc, Twine("invalid value for ", DepCtrName));
7208 return;
7209 default:
7210 assert(false);
7211 }
7212}
7213
7214bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
7215
7216 using namespace llvm::AMDGPU::DepCtr;
7217
7218 SMLoc DepCtrLoc = getLoc();
7219 StringRef DepCtrName = getTokenStr();
7220
7221 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7222 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7223 return false;
7224
7225 int64_t ExprVal;
7226 if (!parseExpr(ExprVal))
7227 return false;
7228
7229 unsigned PrevOprMask = UsedOprMask;
7230 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
7231
7232 if (CntVal < 0) {
7233 depCtrError(DepCtrLoc, CntVal, DepCtrName);
7234 return false;
7235 }
7236
7237 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7238 return false;
7239
7240 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7241 if (isToken(AsmToken::EndOfStatement)) {
7242 Error(getLoc(), "expected a counter name");
7243 return false;
7244 }
7245 }
7246
7247 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
7248 DepCtr = (DepCtr & ~CntValMask) | CntVal;
7249 return true;
7250}
7251
7252ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
7253 using namespace llvm::AMDGPU::DepCtr;
7254
7255 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
7256 SMLoc Loc = getLoc();
7257
7258 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7259 unsigned UsedOprMask = 0;
7260 while (!isToken(AsmToken::EndOfStatement)) {
7261 if (!parseDepCtr(DepCtr, UsedOprMask))
7262 return ParseStatus::Failure;
7263 }
7264 } else {
7265 if (!parseExpr(DepCtr))
7266 return ParseStatus::Failure;
7267 }
7268
7269 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
7270 return ParseStatus::Success;
7271}
7272
7273bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
7274
7275//===----------------------------------------------------------------------===//
7276// hwreg
7277//===----------------------------------------------------------------------===//
7278
7279ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
7280 OperandInfoTy &Offset,
7281 OperandInfoTy &Width) {
7282 using namespace llvm::AMDGPU::Hwreg;
7283
7284 if (!trySkipId("hwreg", AsmToken::LParen))
7285 return ParseStatus::NoMatch;
7286
7287 // The register may be specified by name or using a numeric code
7288 HwReg.Loc = getLoc();
7289 if (isToken(AsmToken::Identifier) &&
7290 (HwReg.Val = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7291 HwReg.IsSymbolic = true;
7292 lex(); // skip register name
7293 } else if (!parseExpr(HwReg.Val, "a register name")) {
7294 return ParseStatus::Failure;
7295 }
7296
7297 if (trySkipToken(AsmToken::RParen))
7298 return ParseStatus::Success;
7299
7300 // parse optional params
7301 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
7302 return ParseStatus::Failure;
7303
7304 Offset.Loc = getLoc();
7305 if (!parseExpr(Offset.Val))
7306 return ParseStatus::Failure;
7307
7308 if (!skipToken(AsmToken::Comma, "expected a comma"))
7309 return ParseStatus::Failure;
7310
7311 Width.Loc = getLoc();
7312 if (!parseExpr(Width.Val) ||
7313 !skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7314 return ParseStatus::Failure;
7315
7316 return ParseStatus::Success;
7317}
7318
7319ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
7320 using namespace llvm::AMDGPU::Hwreg;
7321
7322 int64_t ImmVal = 0;
7323 SMLoc Loc = getLoc();
7324
7325 StructuredOpField HwReg("id", "hardware register", HwregId::Width,
7326 HwregId::Default);
7327 StructuredOpField Offset("offset", "bit offset", HwregOffset::Width,
7328 HwregOffset::Default);
7329 struct : StructuredOpField {
7330 using StructuredOpField::StructuredOpField;
7331 bool validate(AMDGPUAsmParser &Parser) const override {
7332 if (!isUIntN(Width, Val - 1))
7333 return Error(Parser, "only values from 1 to 32 are legal");
7334 return true;
7335 }
7336 } Width("size", "bitfield width", HwregSize::Width, HwregSize::Default);
7337 ParseStatus Res = parseStructuredOpFields({&HwReg, &Offset, &Width});
7338
7339 if (Res.isNoMatch())
7340 Res = parseHwregFunc(HwReg, Offset, Width);
7341
7342 if (Res.isSuccess()) {
7343 if (!validateStructuredOpFields({&HwReg, &Offset, &Width}))
7344 return ParseStatus::Failure;
7345 ImmVal = HwregEncoding::encode(HwReg.Val, Offset.Val, Width.Val);
7346 }
7347
7348 if (Res.isNoMatch() &&
7349 parseExpr(ImmVal, "a hwreg macro, structured immediate"))
7351
7352 if (!Res.isSuccess())
7353 return ParseStatus::Failure;
7354
7355 if (!isUInt<16>(ImmVal))
7356 return Error(Loc, "invalid immediate: only 16-bit values are legal");
7357 Operands.push_back(
7358 AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
7359 return ParseStatus::Success;
7360}
7361
7362bool AMDGPUOperand::isHwreg() const {
7363 return isImmTy(ImmTyHwreg);
7364}
7365
7366//===----------------------------------------------------------------------===//
7367// sendmsg
7368//===----------------------------------------------------------------------===//
7369
7370bool
7371AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
7372 OperandInfoTy &Op,
7373 OperandInfoTy &Stream) {
7374 using namespace llvm::AMDGPU::SendMsg;
7375
7376 Msg.Loc = getLoc();
7377 if (isToken(AsmToken::Identifier) &&
7378 (Msg.Val = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7379 Msg.IsSymbolic = true;
7380 lex(); // skip message name
7381 } else if (!parseExpr(Msg.Val, "a message name")) {
7382 return false;
7383 }
7384
7385 if (trySkipToken(AsmToken::Comma)) {
7386 Op.IsDefined = true;
7387 Op.Loc = getLoc();
7388 if (isToken(AsmToken::Identifier) &&
7389 (Op.Val = getMsgOpId(Msg.Val, getTokenStr())) >= 0) {
7390 lex(); // skip operation name
7391 } else if (!parseExpr(Op.Val, "an operation name")) {
7392 return false;
7393 }
7394
7395 if (trySkipToken(AsmToken::Comma)) {
7396 Stream.IsDefined = true;
7397 Stream.Loc = getLoc();
7398 if (!parseExpr(Stream.Val))
7399 return false;
7400 }
7401 }
7402
7403 return skipToken(AsmToken::RParen, "expected a closing parenthesis");
7404}
7405
7406bool
7407AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
7408 const OperandInfoTy &Op,
7409 const OperandInfoTy &Stream) {
7410 using namespace llvm::AMDGPU::SendMsg;
7411
7412 // Validation strictness depends on whether message is specified
7413 // in a symbolic or in a numeric form. In the latter case
7414 // only encoding possibility is checked.
7415 bool Strict = Msg.IsSymbolic;
7416
7417 if (Strict) {
7418 if (Msg.Val == OPR_ID_UNSUPPORTED) {
7419 Error(Msg.Loc, "specified message id is not supported on this GPU");
7420 return false;
7421 }
7422 } else {
7423 if (!isValidMsgId(Msg.Val, getSTI())) {
7424 Error(Msg.Loc, "invalid message id");
7425 return false;
7426 }
7427 }
7428 if (Strict && (msgRequiresOp(Msg.Val, getSTI()) != Op.IsDefined)) {
7429 if (Op.IsDefined) {
7430 Error(Op.Loc, "message does not support operations");
7431 } else {
7432 Error(Msg.Loc, "missing message operation");
7433 }
7434 return false;
7435 }
7436 if (!isValidMsgOp(Msg.Val, Op.Val, getSTI(), Strict)) {
7437 Error(Op.Loc, "invalid operation id");
7438 return false;
7439 }
7440 if (Strict && !msgSupportsStream(Msg.Val, Op.Val, getSTI()) &&
7441 Stream.IsDefined) {
7442 Error(Stream.Loc, "message operation does not support streams");
7443 return false;
7444 }
7445 if (!isValidMsgStream(Msg.Val, Op.Val, Stream.Val, getSTI(), Strict)) {
7446 Error(Stream.Loc, "invalid message stream id");
7447 return false;
7448 }
7449 return true;
7450}
7451
7452ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
7453 using namespace llvm::AMDGPU::SendMsg;
7454
7455 int64_t ImmVal = 0;
7456 SMLoc Loc = getLoc();
7457
7458 if (trySkipId("sendmsg", AsmToken::LParen)) {
7459 OperandInfoTy Msg(OPR_ID_UNKNOWN);
7460 OperandInfoTy Op(OP_NONE_);
7461 OperandInfoTy Stream(STREAM_ID_NONE_);
7462 if (parseSendMsgBody(Msg, Op, Stream) &&
7463 validateSendMsg(Msg, Op, Stream)) {
7464 ImmVal = encodeMsg(Msg.Val, Op.Val, Stream.Val);
7465 } else {
7466 return ParseStatus::Failure;
7467 }
7468 } else if (parseExpr(ImmVal, "a sendmsg macro")) {
7469 if (ImmVal < 0 || !isUInt<16>(ImmVal))
7470 return Error(Loc, "invalid immediate: only 16-bit values are legal");
7471 } else {
7472 return ParseStatus::Failure;
7473 }
7474
7475 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
7476 return ParseStatus::Success;
7477}
7478
7479bool AMDGPUOperand::isSendMsg() const {
7480 return isImmTy(ImmTySendMsg);
7481}
7482
7483//===----------------------------------------------------------------------===//
7484// v_interp
7485//===----------------------------------------------------------------------===//
7486
7487ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
7488 StringRef Str;
7489 SMLoc S = getLoc();
7490
7491 if (!parseId(Str))
7492 return ParseStatus::NoMatch;
7493
7494 int Slot = StringSwitch<int>(Str)
7495 .Case("p10", 0)
7496 .Case("p20", 1)
7497 .Case("p0", 2)
7498 .Default(-1);
7499
7500 if (Slot == -1)
7501 return Error(S, "invalid interpolation slot");
7502
7503 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
7504 AMDGPUOperand::ImmTyInterpSlot));
7505 return ParseStatus::Success;
7506}
7507
7508ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
7509 StringRef Str;
7510 SMLoc S = getLoc();
7511
7512 if (!parseId(Str))
7513 return ParseStatus::NoMatch;
7514
7515 if (!Str.starts_with("attr"))
7516 return Error(S, "invalid interpolation attribute");
7517
7518 StringRef Chan = Str.take_back(2);
7519 int AttrChan = StringSwitch<int>(Chan)
7520 .Case(".x", 0)
7521 .Case(".y", 1)
7522 .Case(".z", 2)
7523 .Case(".w", 3)
7524 .Default(-1);
7525 if (AttrChan == -1)
7526 return Error(S, "invalid or missing interpolation attribute channel");
7527
7528 Str = Str.drop_back(2).drop_front(4);
7529
7530 uint8_t Attr;
7531 if (Str.getAsInteger(10, Attr))
7532 return Error(S, "invalid or missing interpolation attribute number");
7533
7534 if (Attr > 32)
7535 return Error(S, "out of bounds interpolation attribute number");
7536
7537 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
7538
7539 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
7540 AMDGPUOperand::ImmTyInterpAttr));
7541 Operands.push_back(AMDGPUOperand::CreateImm(
7542 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
7543 return ParseStatus::Success;
7544}
7545
7546//===----------------------------------------------------------------------===//
7547// exp
7548//===----------------------------------------------------------------------===//
7549
7550ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
7551 using namespace llvm::AMDGPU::Exp;
7552
7553 StringRef Str;
7554 SMLoc S = getLoc();
7555
7556 if (!parseId(Str))
7557 return ParseStatus::NoMatch;
7558
7559 unsigned Id = getTgtId(Str);
7560 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
7561 return Error(S, (Id == ET_INVALID)
7562 ? "invalid exp target"
7563 : "exp target is not supported on this GPU");
7564
7565 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
7566 AMDGPUOperand::ImmTyExpTgt));
7567 return ParseStatus::Success;
7568}
7569
7570//===----------------------------------------------------------------------===//
7571// parser helpers
7572//===----------------------------------------------------------------------===//
7573
7574bool
7575AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
7576 return Token.is(AsmToken::Identifier) && Token.getString() == Id;
7577}
7578
7579bool
7580AMDGPUAsmParser::isId(const StringRef Id) const {
7581 return isId(getToken(), Id);
7582}
7583
7584bool
7585AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
7586 return getTokenKind() == Kind;
7587}
7588
7589StringRef AMDGPUAsmParser::getId() const {
7590 return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
7591}
7592
7593bool
7594AMDGPUAsmParser::trySkipId(const StringRef Id) {
7595 if (isId(Id)) {
7596 lex();
7597 return true;
7598 }
7599 return false;
7600}
7601
7602bool
7603AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
7604 if (isToken(AsmToken::Identifier)) {
7605 StringRef Tok = getTokenStr();
7606 if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {
7607 lex();
7608 return true;
7609 }
7610 }
7611 return false;
7612}
7613
7614bool
7615AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
7616 if (isId(Id) && peekToken().is(Kind)) {
7617 lex();
7618 lex();
7619 return true;
7620 }
7621 return false;
7622}
7623
7624bool
7625AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
7626 if (isToken(Kind)) {
7627 lex();
7628 return true;
7629 }
7630 return false;
7631}
7632
7633bool
7634AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
7635 const StringRef ErrMsg) {
7636 if (!trySkipToken(Kind)) {
7637 Error(getLoc(), ErrMsg);
7638 return false;
7639 }
7640 return true;
7641}
7642
7643bool
7644AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
7645 SMLoc S = getLoc();
7646
7647 const MCExpr *Expr;
7648 if (Parser.parseExpression(Expr))
7649 return false;
7650
7651 if (Expr->evaluateAsAbsolute(Imm))
7652 return true;
7653
7654 if (Expected.empty()) {
7655 Error(S, "expected absolute expression");
7656 } else {
7657 Error(S, Twine("expected ", Expected) +
7658 Twine(" or an absolute expression"));
7659 }
7660 return false;
7661}
7662
7663bool
7664AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
7665 SMLoc S = getLoc();
7666
7667 const MCExpr *Expr;
7668 if (Parser.parseExpression(Expr))
7669 return false;
7670
7671 int64_t IntVal;
7672 if (Expr->evaluateAsAbsolute(IntVal)) {
7673 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
7674 } else {
7675 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
7676 }
7677 return true;
7678}
7679
7680bool
7681AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
7682 if (isToken(AsmToken::String)) {
7683 Val = getToken().getStringContents();
7684 lex();
7685 return true;
7686 } else {
7687 Error(getLoc(), ErrMsg);
7688 return false;
7689 }
7690}
7691
7692bool
7693AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
7694 if (isToken(AsmToken::Identifier)) {
7695 Val = getTokenStr();
7696 lex();
7697 return true;
7698 } else {
7699 if (!ErrMsg.empty())
7700 Error(getLoc(), ErrMsg);
7701 return false;
7702 }
7703}
7704
7706AMDGPUAsmParser::getToken() const {
7707 return Parser.getTok();
7708}
7709
7710AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
7711 return isToken(AsmToken::EndOfStatement)
7712 ? getToken()
7713 : getLexer().peekTok(ShouldSkipSpace);
7714}
7715
7716void
7717AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
7718 auto TokCount = getLexer().peekTokens(Tokens);
7719
7720 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
7721 Tokens[Idx] = AsmToken(AsmToken::Error, "");
7722}
7723
7725AMDGPUAsmParser::getTokenKind() const {
7726 return getLexer().getKind();
7727}
7728
7729SMLoc
7730AMDGPUAsmParser::getLoc() const {
7731 return getToken().getLoc();
7732}
7733
7735AMDGPUAsmParser::getTokenStr() const {
7736 return getToken().getString();
7737}
7738
7739void
7740AMDGPUAsmParser::lex() {
7741 Parser.Lex();
7742}
7743
7744SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
7745 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
7746}
7747
7748SMLoc
7749AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
7750 const OperandVector &Operands) const {
7751 for (unsigned i = Operands.size() - 1; i > 0; --i) {
7752 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7753 if (Test(Op))
7754 return Op.getStartLoc();
7755 }
7756 return getInstLoc(Operands);
7757}
7758
7759SMLoc
7760AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
7761 const OperandVector &Operands) const {
7762 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
7763 return getOperandLoc(Test, Operands);
7764}
7765
7766SMLoc
7767AMDGPUAsmParser::getRegLoc(unsigned Reg,
7768 const OperandVector &Operands) const {
7769 auto Test = [=](const AMDGPUOperand& Op) {
7770 return Op.isRegKind() && Op.getReg() == Reg;
7771 };
7772 return getOperandLoc(Test, Operands);
7773}
7774
7775SMLoc AMDGPUAsmParser::getLitLoc(const OperandVector &Operands,
7776 bool SearchMandatoryLiterals) const {
7777 auto Test = [](const AMDGPUOperand& Op) {
7778 return Op.IsImmKindLiteral() || Op.isExpr();
7779 };
7780 SMLoc Loc = getOperandLoc(Test, Operands);
7781 if (SearchMandatoryLiterals && Loc == getInstLoc(Operands))
7782 Loc = getMandatoryLitLoc(Operands);
7783 return Loc;
7784}
7785
7786SMLoc AMDGPUAsmParser::getMandatoryLitLoc(const OperandVector &Operands) const {
7787 auto Test = [](const AMDGPUOperand &Op) {
7788 return Op.IsImmKindMandatoryLiteral();
7789 };
7790 return getOperandLoc(Test, Operands);
7791}
7792
7793SMLoc
7794AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
7795 auto Test = [](const AMDGPUOperand& Op) {
7796 return Op.isImmKindConst();
7797 };
7798 return getOperandLoc(Test, Operands);
7799}
7800
7802AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) {
7803 if (!trySkipToken(AsmToken::LCurly))
7804 return ParseStatus::NoMatch;
7805
7806 bool First = true;
7807 while (!trySkipToken(AsmToken::RCurly)) {
7808 if (!First &&
7809 !skipToken(AsmToken::Comma, "comma or closing brace expected"))
7810 return ParseStatus::Failure;
7811
7812 StringRef Id = getTokenStr();
7813 SMLoc IdLoc = getLoc();
7814 if (!skipToken(AsmToken::Identifier, "field name expected") ||
7815 !skipToken(AsmToken::Colon, "colon expected"))
7816 return ParseStatus::Failure;
7817
7818 auto I =
7819 find_if(Fields, [Id](StructuredOpField *F) { return F->Id == Id; });
7820 if (I == Fields.end())
7821 return Error(IdLoc, "unknown field");
7822 if ((*I)->IsDefined)
7823 return Error(IdLoc, "duplicate field");
7824
7825 // TODO: Support symbolic values.
7826 (*I)->Loc = getLoc();
7827 if (!parseExpr((*I)->Val))
7828 return ParseStatus::Failure;
7829 (*I)->IsDefined = true;
7830
7831 First = false;
7832 }
7833 return ParseStatus::Success;
7834}
7835
7836bool AMDGPUAsmParser::validateStructuredOpFields(
7838 return all_of(Fields, [this](const StructuredOpField *F) {
7839 return F->validate(*this);
7840 });
7841}
7842
7843//===----------------------------------------------------------------------===//
7844// swizzle
7845//===----------------------------------------------------------------------===//
7846
7848static unsigned
7849encodeBitmaskPerm(const unsigned AndMask,
7850 const unsigned OrMask,
7851 const unsigned XorMask) {
7852 using namespace llvm::AMDGPU::Swizzle;
7853
7854 return BITMASK_PERM_ENC |
7855 (AndMask << BITMASK_AND_SHIFT) |
7856 (OrMask << BITMASK_OR_SHIFT) |
7857 (XorMask << BITMASK_XOR_SHIFT);
7858}
7859
7860bool
7861AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
7862 const unsigned MinVal,
7863 const unsigned MaxVal,
7864 const StringRef ErrMsg,
7865 SMLoc &Loc) {
7866 if (!skipToken(AsmToken::Comma, "expected a comma")) {
7867 return false;
7868 }
7869 Loc = getLoc();
7870 if (!parseExpr(Op)) {
7871 return false;
7872 }
7873 if (Op < MinVal || Op > MaxVal) {
7874 Error(Loc, ErrMsg);
7875 return false;
7876 }
7877
7878 return true;
7879}
7880
7881bool
7882AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
7883 const unsigned MinVal,
7884 const unsigned MaxVal,
7885 const StringRef ErrMsg) {
7886 SMLoc Loc;
7887 for (unsigned i = 0; i < OpNum; ++i) {
7888 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
7889 return false;
7890 }
7891
7892 return true;
7893}
7894
7895bool
7896AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7897 using namespace llvm::AMDGPU::Swizzle;
7898
7899 int64_t Lane[LANE_NUM];
7900 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
7901 "expected a 2-bit lane id")) {
7903 for (unsigned I = 0; I < LANE_NUM; ++I) {
7904 Imm |= Lane[I] << (LANE_SHIFT * I);
7905 }
7906 return true;
7907 }
7908 return false;
7909}
7910
7911bool
7912AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
7913 using namespace llvm::AMDGPU::Swizzle;
7914
7915 SMLoc Loc;
7916 int64_t GroupSize;
7917 int64_t LaneIdx;
7918
7919 if (!parseSwizzleOperand(GroupSize,
7920 2, 32,
7921 "group size must be in the interval [2,32]",
7922 Loc)) {
7923 return false;
7924 }
7925 if (!isPowerOf2_64(GroupSize)) {
7926 Error(Loc, "group size must be a power of two");
7927 return false;
7928 }
7929 if (parseSwizzleOperand(LaneIdx,
7930 0, GroupSize - 1,
7931 "lane id must be in the interval [0,group size - 1]",
7932 Loc)) {
7933 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
7934 return true;
7935 }
7936 return false;
7937}
7938
7939bool
7940AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
7941 using namespace llvm::AMDGPU::Swizzle;
7942
7943 SMLoc Loc;
7944 int64_t GroupSize;
7945
7946 if (!parseSwizzleOperand(GroupSize,
7947 2, 32,
7948 "group size must be in the interval [2,32]",
7949 Loc)) {
7950 return false;
7951 }
7952 if (!isPowerOf2_64(GroupSize)) {
7953 Error(Loc, "group size must be a power of two");
7954 return false;
7955 }
7956
7957 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
7958 return true;
7959}
7960
7961bool
7962AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
7963 using namespace llvm::AMDGPU::Swizzle;
7964
7965 SMLoc Loc;
7966 int64_t GroupSize;
7967
7968 if (!parseSwizzleOperand(GroupSize,
7969 1, 16,
7970 "group size must be in the interval [1,16]",
7971 Loc)) {
7972 return false;
7973 }
7974 if (!isPowerOf2_64(GroupSize)) {
7975 Error(Loc, "group size must be a power of two");
7976 return false;
7977 }
7978
7979 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
7980 return true;
7981}
7982
7983bool
7984AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
7985 using namespace llvm::AMDGPU::Swizzle;
7986
7987 if (!skipToken(AsmToken::Comma, "expected a comma")) {
7988 return false;
7989 }
7990
7991 StringRef Ctl;
7992 SMLoc StrLoc = getLoc();
7993 if (!parseString(Ctl)) {
7994 return false;
7995 }
7996 if (Ctl.size() != BITMASK_WIDTH) {
7997 Error(StrLoc, "expected a 5-character mask");
7998 return false;
7999 }
8000
8001 unsigned AndMask = 0;
8002 unsigned OrMask = 0;
8003 unsigned XorMask = 0;
8004
8005 for (size_t i = 0; i < Ctl.size(); ++i) {
8006 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
8007 switch(Ctl[i]) {
8008 default:
8009 Error(StrLoc, "invalid mask");
8010 return false;
8011 case '0':
8012 break;
8013 case '1':
8014 OrMask |= Mask;
8015 break;
8016 case 'p':
8017 AndMask |= Mask;
8018 break;
8019 case 'i':
8020 AndMask |= Mask;
8021 XorMask |= Mask;
8022 break;
8023 }
8024 }
8025
8026 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
8027 return true;
8028}
8029
8030bool
8031AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
8032
8033 SMLoc OffsetLoc = getLoc();
8034
8035 if (!parseExpr(Imm, "a swizzle macro")) {
8036 return false;
8037 }
8038 if (!isUInt<16>(Imm)) {
8039 Error(OffsetLoc, "expected a 16-bit offset");
8040 return false;
8041 }
8042 return true;
8043}
8044
8045bool
8046AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
8047 using namespace llvm::AMDGPU::Swizzle;
8048
8049 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
8050
8051 SMLoc ModeLoc = getLoc();
8052 bool Ok = false;
8053
8054 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
8055 Ok = parseSwizzleQuadPerm(Imm);
8056 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
8057 Ok = parseSwizzleBitmaskPerm(Imm);
8058 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
8059 Ok = parseSwizzleBroadcast(Imm);
8060 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
8061 Ok = parseSwizzleSwap(Imm);
8062 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
8063 Ok = parseSwizzleReverse(Imm);
8064 } else {
8065 Error(ModeLoc, "expected a swizzle mode");
8066 }
8067
8068 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
8069 }
8070
8071 return false;
8072}
8073
8074ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
8075 SMLoc S = getLoc();
8076 int64_t Imm = 0;
8077
8078 if (trySkipId("offset")) {
8079
8080 bool Ok = false;
8081 if (skipToken(AsmToken::Colon, "expected a colon")) {
8082 if (trySkipId("swizzle")) {
8083 Ok = parseSwizzleMacro(Imm);
8084 } else {
8085 Ok = parseSwizzleOffset(Imm);
8086 }
8087 }
8088
8089 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
8090
8092 }
8093 return ParseStatus::NoMatch;
8094}
8095
8096bool
8097AMDGPUOperand::isSwizzle() const {
8098 return isImmTy(ImmTySwizzle);
8099}
8100
8101//===----------------------------------------------------------------------===//
8102// VGPR Index Mode
8103//===----------------------------------------------------------------------===//
8104
8105int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8106
8107 using namespace llvm::AMDGPU::VGPRIndexMode;
8108
8109 if (trySkipToken(AsmToken::RParen)) {
8110 return OFF;
8111 }
8112
8113 int64_t Imm = 0;
8114
8115 while (true) {
8116 unsigned Mode = 0;
8117 SMLoc S = getLoc();
8118
8119 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
8120 if (trySkipId(IdSymbolic[ModeId])) {
8121 Mode = 1 << ModeId;
8122 break;
8123 }
8124 }
8125
8126 if (Mode == 0) {
8127 Error(S, (Imm == 0)?
8128 "expected a VGPR index mode or a closing parenthesis" :
8129 "expected a VGPR index mode");
8130 return UNDEF;
8131 }
8132
8133 if (Imm & Mode) {
8134 Error(S, "duplicate VGPR index mode");
8135 return UNDEF;
8136 }
8137 Imm |= Mode;
8138
8139 if (trySkipToken(AsmToken::RParen))
8140 break;
8141 if (!skipToken(AsmToken::Comma,
8142 "expected a comma or a closing parenthesis"))
8143 return UNDEF;
8144 }
8145
8146 return Imm;
8147}
8148
8149ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
8150
8151 using namespace llvm::AMDGPU::VGPRIndexMode;
8152
8153 int64_t Imm = 0;
8154 SMLoc S = getLoc();
8155
8156 if (trySkipId("gpr_idx", AsmToken::LParen)) {
8157 Imm = parseGPRIdxMacro();
8158 if (Imm == UNDEF)
8159 return ParseStatus::Failure;
8160 } else {
8161 if (getParser().parseAbsoluteExpression(Imm))
8162 return ParseStatus::Failure;
8163 if (Imm < 0 || !isUInt<4>(Imm))
8164 return Error(S, "invalid immediate: only 4-bit values are legal");
8165 }
8166
8167 Operands.push_back(
8168 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
8169 return ParseStatus::Success;
8170}
8171
8172bool AMDGPUOperand::isGPRIdxMode() const {
8173 return isImmTy(ImmTyGprIdxMode);
8174}
8175
8176//===----------------------------------------------------------------------===//
8177// sopp branch targets
8178//===----------------------------------------------------------------------===//
8179
8180ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
8181
8182 // Make sure we are not parsing something
8183 // that looks like a label or an expression but is not.
8184 // This will improve error messages.
8185 if (isRegister() || isModifier())
8186 return ParseStatus::NoMatch;
8187
8188 if (!parseExpr(Operands))
8189 return ParseStatus::Failure;
8190
8191 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
8192 assert(Opr.isImm() || Opr.isExpr());
8193 SMLoc Loc = Opr.getStartLoc();
8194
8195 // Currently we do not support arbitrary expressions as branch targets.
8196 // Only labels and absolute expressions are accepted.
8197 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
8198 Error(Loc, "expected an absolute expression or a label");
8199 } else if (Opr.isImm() && !Opr.isS16Imm()) {
8200 Error(Loc, "expected a 16-bit signed jump offset");
8201 }
8202
8203 return ParseStatus::Success;
8204}
8205
8206//===----------------------------------------------------------------------===//
8207// Boolean holding registers
8208//===----------------------------------------------------------------------===//
8209
8210ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
8211 return parseReg(Operands);
8212}
8213
8214//===----------------------------------------------------------------------===//
8215// mubuf
8216//===----------------------------------------------------------------------===//
8217
8218void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
8219 const OperandVector &Operands,
8220 bool IsAtomic) {
8221 OptionalImmIndexMap OptionalIdx;
8222 unsigned FirstOperandIdx = 1;
8223 bool IsAtomicReturn = false;
8224
8225 if (IsAtomic) {
8226 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
8228 }
8229
8230 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
8231 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8232
8233 // Add the register arguments
8234 if (Op.isReg()) {
8235 Op.addRegOperands(Inst, 1);
8236 // Insert a tied src for atomic return dst.
8237 // This cannot be postponed as subsequent calls to
8238 // addImmOperands rely on correct number of MC operands.
8239 if (IsAtomicReturn && i == FirstOperandIdx)
8240 Op.addRegOperands(Inst, 1);
8241 continue;
8242 }
8243
8244 // Handle the case where soffset is an immediate
8245 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
8246 Op.addImmOperands(Inst, 1);
8247 continue;
8248 }
8249
8250 // Handle tokens like 'offen' which are sometimes hard-coded into the
8251 // asm string. There are no MCInst operands for these.
8252 if (Op.isToken()) {
8253 continue;
8254 }
8255 assert(Op.isImm());
8256
8257 // Handle optional arguments
8258 OptionalIdx[Op.getImmTy()] = i;
8259 }
8260
8261 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
8262 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
8263}
8264
8265//===----------------------------------------------------------------------===//
8266// smrd
8267//===----------------------------------------------------------------------===//
8268
8269bool AMDGPUOperand::isSMRDOffset8() const {
8270 return isImmLiteral() && isUInt<8>(getImm());
8271}
8272
8273bool AMDGPUOperand::isSMEMOffset() const {
8274 // Offset range is checked later by validator.
8275 return isImmLiteral();
8276}
8277
8278bool AMDGPUOperand::isSMRDLiteralOffset() const {
8279 // 32-bit literals are only supported on CI and we only want to use them
8280 // when the offset is > 8-bits.
8281 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
8282}
8283
8284//===----------------------------------------------------------------------===//
8285// vop3
8286//===----------------------------------------------------------------------===//
8287
8288static bool ConvertOmodMul(int64_t &Mul) {
8289 if (Mul != 1 && Mul != 2 && Mul != 4)
8290 return false;
8291
8292 Mul >>= 1;
8293 return true;
8294}
8295
8296static bool ConvertOmodDiv(int64_t &Div) {
8297 if (Div == 1) {
8298 Div = 0;
8299 return true;
8300 }
8301
8302 if (Div == 2) {
8303 Div = 3;
8304 return true;
8305 }
8306
8307 return false;
8308}
8309
8310// For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
8311// This is intentional and ensures compatibility with sp3.
8312// See bug 35397 for details.
8313bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
8314 if (BoundCtrl == 0 || BoundCtrl == 1) {
8315 if (!isGFX11Plus())
8316 BoundCtrl = 1;
8317 return true;
8318 }
8319 return false;
8320}
8321
8322void AMDGPUAsmParser::onBeginOfFile() {
8323 if (!getParser().getStreamer().getTargetStreamer() ||
8324 getSTI().getTargetTriple().getArch() == Triple::r600)
8325 return;
8326
8327 if (!getTargetStreamer().getTargetID())
8328 getTargetStreamer().initializeTargetID(getSTI(),
8329 getSTI().getFeatureString());
8330
8331 if (isHsaAbi(getSTI()))
8332 getTargetStreamer().EmitDirectiveAMDGCNTarget();
8333}
8334
8335/// Parse AMDGPU specific expressions.
8336///
8337/// expr ::= or(expr, ...) |
8338/// max(expr, ...)
8339///
8340bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
8342
8343 if (isToken(AsmToken::Identifier)) {
8344 StringRef TokenId = getTokenStr();
8345 AGVK VK = StringSwitch<AGVK>(TokenId)
8346 .Case("max", AGVK::AGVK_Max)
8347 .Case("or", AGVK::AGVK_Or)
8348 .Default(AGVK::AGVK_None);
8349
8350 if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) {
8352 uint64_t CommaCount = 0;
8353 lex(); // Eat 'max'/'or'
8354 lex(); // Eat '('
8355 while (true) {
8356 if (trySkipToken(AsmToken::RParen)) {
8357 if (Exprs.empty()) {
8358 Error(getToken().getLoc(),
8359 "empty " + Twine(TokenId) + " expression");
8360 return true;
8361 }
8362 if (CommaCount + 1 != Exprs.size()) {
8363 Error(getToken().getLoc(),
8364 "mismatch of commas in " + Twine(TokenId) + " expression");
8365 return true;
8366 }
8367 Res = AMDGPUVariadicMCExpr::create(VK, Exprs, getContext());
8368 return false;
8369 }
8370 const MCExpr *Expr;
8371 if (getParser().parseExpression(Expr, EndLoc))
8372 return true;
8373 Exprs.push_back(Expr);
8374 bool LastTokenWasComma = trySkipToken(AsmToken::Comma);
8375 if (LastTokenWasComma)
8376 CommaCount++;
8377 if (!LastTokenWasComma && !isToken(AsmToken::RParen)) {
8378 Error(getToken().getLoc(),
8379 "unexpected token in " + Twine(TokenId) + " expression");
8380 return true;
8381 }
8382 }
8383 }
8384 }
8385 return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
8386}
8387
8388ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
8389 StringRef Name = getTokenStr();
8390 if (Name == "mul") {
8391 return parseIntWithPrefix("mul", Operands,
8392 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
8393 }
8394
8395 if (Name == "div") {
8396 return parseIntWithPrefix("div", Operands,
8397 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
8398 }
8399
8400 return ParseStatus::NoMatch;
8401}
8402
8403// Determines which bit DST_OP_SEL occupies in the op_sel operand according to
8404// the number of src operands present, then copies that bit into src0_modifiers.
8405static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
8406 int Opc = Inst.getOpcode();
8407 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8408 if (OpSelIdx == -1)
8409 return;
8410
8411 int SrcNum;
8412 const int Ops[] = { AMDGPU::OpName::src0,
8413 AMDGPU::OpName::src1,
8414 AMDGPU::OpName::src2 };
8415 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
8416 ++SrcNum)
8417 ;
8418 assert(SrcNum > 0);
8419
8420 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8421
8422 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
8423 if (DstIdx == -1)
8424 return;
8425
8426 const MCOperand &DstOp = Inst.getOperand(DstIdx);
8427 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
8428 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8429 if (DstOp.isReg() &&
8430 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg())) {
8431 if (AMDGPU::isHi(DstOp.getReg(), MRI))
8432 ModVal |= SISrcMods::DST_OP_SEL;
8433 } else {
8434 if ((OpSel & (1 << SrcNum)) != 0)
8435 ModVal |= SISrcMods::DST_OP_SEL;
8436 }
8437 Inst.getOperand(ModIdx).setImm(ModVal);
8438}
8439
8440void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
8441 const OperandVector &Operands) {
8442 cvtVOP3P(Inst, Operands);
8443 cvtVOP3DstOpSelOnly(Inst, *getMRI());
8444}
8445
8446void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
8447 OptionalImmIndexMap &OptionalIdx) {
8448 cvtVOP3P(Inst, Operands, OptionalIdx);
8449 cvtVOP3DstOpSelOnly(Inst, *getMRI());
8450}
8451
8452static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
8453 return
8454 // 1. This operand is input modifiers
8455 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
8456 // 2. This is not last operand
8457 && Desc.NumOperands > (OpNum + 1)
8458 // 3. Next operand is register class
8459 && Desc.operands()[OpNum + 1].RegClass != -1
8460 // 4. Next register is not tied to any other operand
8461 && Desc.getOperandConstraint(OpNum + 1,
8462 MCOI::OperandConstraint::TIED_TO) == -1;
8463}
8464
8465void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
8466{
8467 OptionalImmIndexMap OptionalIdx;
8468 unsigned Opc = Inst.getOpcode();
8469
8470 unsigned I = 1;
8471 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8472 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8473 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8474 }
8475
8476 for (unsigned E = Operands.size(); I != E; ++I) {
8477 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8479 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8480 } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
8481 Op.isInterpAttrChan()) {
8482 Inst.addOperand(MCOperand::createImm(Op.getImm()));
8483 } else if (Op.isImmModifier()) {
8484 OptionalIdx[Op.getImmTy()] = I;
8485 } else {
8486 llvm_unreachable("unhandled operand type");
8487 }
8488 }
8489
8490 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
8491 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8492 AMDGPUOperand::ImmTyHigh);
8493
8494 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8495 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8496 AMDGPUOperand::ImmTyClampSI);
8497
8498 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8499 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8500 AMDGPUOperand::ImmTyOModSI);
8501}
8502
8503void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
8504{
8505 OptionalImmIndexMap OptionalIdx;
8506 unsigned Opc = Inst.getOpcode();
8507
8508 unsigned I = 1;
8509 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8510 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8511 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8512 }
8513
8514 for (unsigned E = Operands.size(); I != E; ++I) {
8515 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8517 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8518 } else if (Op.isImmModifier()) {
8519 OptionalIdx[Op.getImmTy()] = I;
8520 } else {
8521 llvm_unreachable("unhandled operand type");
8522 }
8523 }
8524
8525 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8526
8527 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8528 if (OpSelIdx != -1)
8529 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8530
8531 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
8532
8533 if (OpSelIdx == -1)
8534 return;
8535
8536 const int Ops[] = { AMDGPU::OpName::src0,
8537 AMDGPU::OpName::src1,
8538 AMDGPU::OpName::src2 };
8539 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8540 AMDGPU::OpName::src1_modifiers,
8541 AMDGPU::OpName::src2_modifiers };
8542
8543 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8544
8545 for (int J = 0; J < 3; ++J) {
8546 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8547 if (OpIdx == -1)
8548 break;
8549
8550 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8551 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8552
8553 if ((OpSel & (1 << J)) != 0)
8554 ModVal |= SISrcMods::OP_SEL_0;
8555 if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
8556 (OpSel & (1 << 3)) != 0)
8557 ModVal |= SISrcMods::DST_OP_SEL;
8558
8559 Inst.getOperand(ModIdx).setImm(ModVal);
8560 }
8561}
8562
8563void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
8564 OptionalImmIndexMap &OptionalIdx) {
8565 unsigned Opc = Inst.getOpcode();
8566
8567 unsigned I = 1;
8568 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8569 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8570 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8571 }
8572
8573 for (unsigned E = Operands.size(); I != E; ++I) {
8574 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8576 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8577 } else if (Op.isImmModifier()) {
8578 OptionalIdx[Op.getImmTy()] = I;
8579 } else if (Op.isRegOrImm()) {
8580 Op.addRegOrImmOperands(Inst, 1);
8581 } else {
8582 llvm_unreachable("unhandled operand type");
8583 }
8584 }
8585
8586 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8587 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8588 AMDGPUOperand::ImmTyClampSI);
8589
8590 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8591 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8592 AMDGPUOperand::ImmTyOModSI);
8593
8594 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
8595 // it has src2 register operand that is tied to dst operand
8596 // we don't allow modifiers for this operand in assembler so src2_modifiers
8597 // should be 0.
8598 if (isMAC(Opc)) {
8599 auto it = Inst.begin();
8600 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
8601 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
8602 ++it;
8603 // Copy the operand to ensure it's not invalidated when Inst grows.
8604 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
8605 }
8606}
8607
8608void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
8609 OptionalImmIndexMap OptionalIdx;
8610 cvtVOP3(Inst, Operands, OptionalIdx);
8611}
8612
8613void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
8614 OptionalImmIndexMap &OptIdx) {
8615 const int Opc = Inst.getOpcode();
8616 const MCInstrDesc &Desc = MII.get(Opc);
8617
8618 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
8619
8620 if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
8621 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
8622 Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_gfx12 ||
8623 Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_gfx12) {
8624 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
8625 Inst.addOperand(Inst.getOperand(0));
8626 }
8627
8628 // Adding vdst_in operand is already covered for these DPP instructions in
8629 // cvtVOP3DPP.
8630 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) &&
8631 !(Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp_gfx12 ||
8632 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp_gfx12 ||
8633 Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp8_gfx12 ||
8634 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp8_gfx12)) {
8635 assert(!IsPacked);
8636 Inst.addOperand(Inst.getOperand(0));
8637 }
8638
8639 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
8640 // instruction, and then figure out where to actually put the modifiers
8641
8642 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8643 if (OpSelIdx != -1) {
8644 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
8645 }
8646
8647 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
8648 if (OpSelHiIdx != -1) {
8649 int DefaultVal = IsPacked ? -1 : 0;
8650 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
8651 DefaultVal);
8652 }
8653
8654 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
8655 if (NegLoIdx != -1)
8656 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
8657
8658 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
8659 if (NegHiIdx != -1)
8660 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
8661
8662 const int Ops[] = { AMDGPU::OpName::src0,
8663 AMDGPU::OpName::src1,
8664 AMDGPU::OpName::src2 };
8665 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8666 AMDGPU::OpName::src1_modifiers,
8667 AMDGPU::OpName::src2_modifiers };
8668
8669 unsigned OpSel = 0;
8670 unsigned OpSelHi = 0;
8671 unsigned NegLo = 0;
8672 unsigned NegHi = 0;
8673
8674 if (OpSelIdx != -1)
8675 OpSel = Inst.getOperand(OpSelIdx).getImm();
8676
8677 if (OpSelHiIdx != -1)
8678 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
8679
8680 if (NegLoIdx != -1)
8681 NegLo = Inst.getOperand(NegLoIdx).getImm();
8682
8683 if (NegHiIdx != -1)
8684 NegHi = Inst.getOperand(NegHiIdx).getImm();
8685
8686 for (int J = 0; J < 3; ++J) {
8687 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8688 if (OpIdx == -1)
8689 break;
8690
8691 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8692
8693 if (ModIdx == -1)
8694 continue;
8695
8696 uint32_t ModVal = 0;
8697
8698 const MCOperand &SrcOp = Inst.getOperand(OpIdx);
8699 if (SrcOp.isReg() && getMRI()
8700 ->getRegClass(AMDGPU::VGPR_16RegClassID)
8701 .contains(SrcOp.getReg())) {
8702 bool VGPRSuffixIsHi = AMDGPU::isHi(SrcOp.getReg(), *getMRI());
8703 if (VGPRSuffixIsHi)
8704 ModVal |= SISrcMods::OP_SEL_0;
8705 } else {
8706 if ((OpSel & (1 << J)) != 0)
8707 ModVal |= SISrcMods::OP_SEL_0;
8708 }
8709
8710 if ((OpSelHi & (1 << J)) != 0)
8711 ModVal |= SISrcMods::OP_SEL_1;
8712
8713 if ((NegLo & (1 << J)) != 0)
8714 ModVal |= SISrcMods::NEG;
8715
8716 if ((NegHi & (1 << J)) != 0)
8717 ModVal |= SISrcMods::NEG_HI;
8718
8719 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
8720 }
8721}
8722
8723void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
8724 OptionalImmIndexMap OptIdx;
8725 cvtVOP3(Inst, Operands, OptIdx);
8726 cvtVOP3P(Inst, Operands, OptIdx);
8727}
8728
8730 unsigned i, unsigned Opc, unsigned OpName) {
8731 if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1)
8732 ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
8733 else
8734 ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1);
8735}
8736
8737void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
8738 unsigned Opc = Inst.getOpcode();
8739
8740 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
8741 addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers);
8742 addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers);
8743 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef
8744 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2
8745
8746 OptionalImmIndexMap OptIdx;
8747 for (unsigned i = 5; i < Operands.size(); ++i) {
8748 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8749 OptIdx[Op.getImmTy()] = i;
8750 }
8751
8752 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit))
8753 addOptionalImmOperand(Inst, Operands, OptIdx,
8754 AMDGPUOperand::ImmTyIndexKey8bit);
8755
8756 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit))
8757 addOptionalImmOperand(Inst, Operands, OptIdx,
8758 AMDGPUOperand::ImmTyIndexKey16bit);
8759
8760 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8761 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClampSI);
8762
8763 cvtVOP3P(Inst, Operands, OptIdx);
8764}
8765
8766//===----------------------------------------------------------------------===//
8767// VOPD
8768//===----------------------------------------------------------------------===//
8769
8770ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
8771 if (!hasVOPD(getSTI()))
8772 return ParseStatus::NoMatch;
8773
8774 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
8775 SMLoc S = getLoc();
8776 lex();
8777 lex();
8778 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
8779 SMLoc OpYLoc = getLoc();
8780 StringRef OpYName;
8781 if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
8782 Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
8783 return ParseStatus::Success;
8784 }
8785 return Error(OpYLoc, "expected a VOPDY instruction after ::");
8786 }
8787 return ParseStatus::NoMatch;
8788}
8789
8790// Create VOPD MCInst operands using parsed assembler operands.
8791void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
8792 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
8793 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
8794 if (Op.isReg()) {
8795 Op.addRegOperands(Inst, 1);
8796 return;
8797 }
8798 if (Op.isImm()) {
8799 Op.addImmOperands(Inst, 1);
8800 return;
8801 }
8802 llvm_unreachable("Unhandled operand type in cvtVOPD");
8803 };
8804
8805 const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
8806
8807 // MCInst operands are ordered as follows:
8808 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
8809
8810 for (auto CompIdx : VOPD::COMPONENTS) {
8811 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
8812 }
8813
8814 for (auto CompIdx : VOPD::COMPONENTS) {
8815 const auto &CInfo = InstInfo[CompIdx];
8816 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
8817 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
8818 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
8819 if (CInfo.hasSrc2Acc())
8820 addOp(CInfo.getIndexOfDstInParsedOperands());
8821 }
8822}
8823
8824//===----------------------------------------------------------------------===//
8825// dpp
8826//===----------------------------------------------------------------------===//
8827
8828bool AMDGPUOperand::isDPP8() const {
8829 return isImmTy(ImmTyDPP8);
8830}
8831
8832bool AMDGPUOperand::isDPPCtrl() const {
8833 using namespace AMDGPU::DPP;
8834
8835 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8836 if (result) {
8837 int64_t Imm = getImm();
8838 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8839 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
8840 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8841 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
8842 (Imm == DppCtrl::WAVE_SHL1) ||
8843 (Imm == DppCtrl::WAVE_ROL1) ||
8844 (Imm == DppCtrl::WAVE_SHR1) ||
8845 (Imm == DppCtrl::WAVE_ROR1) ||
8846 (Imm == DppCtrl::ROW_MIRROR) ||
8847 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
8848 (Imm == DppCtrl::BCAST15) ||
8849 (Imm == DppCtrl::BCAST31) ||
8850 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8851 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
8852 }
8853 return false;
8854}
8855
8856//===----------------------------------------------------------------------===//
8857// mAI
8858//===----------------------------------------------------------------------===//
8859
8860bool AMDGPUOperand::isBLGP() const {
8861 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8862}
8863
8864bool AMDGPUOperand::isCBSZ() const {
8865 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
8866}
8867
8868bool AMDGPUOperand::isABID() const {
8869 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
8870}
8871
8872bool AMDGPUOperand::isS16Imm() const {
8873 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8874}
8875
8876bool AMDGPUOperand::isU16Imm() const {
8877 return isImmLiteral() && isUInt<16>(getImm());
8878}
8879
8880//===----------------------------------------------------------------------===//
8881// dim
8882//===----------------------------------------------------------------------===//
8883
8884bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
8885 // We want to allow "dim:1D" etc.,
8886 // but the initial 1 is tokenized as an integer.
8887 std::string Token;
8888 if (isToken(AsmToken::Integer)) {
8889 SMLoc Loc = getToken().getEndLoc();
8890 Token = std::string(getTokenStr());
8891 lex();
8892 if (getLoc() != Loc)
8893 return false;
8894 }
8895
8896 StringRef Suffix;
8897 if (!parseId(Suffix))
8898 return false;
8899 Token += Suffix;
8900
8901 StringRef DimId = Token;
8902 if (DimId.starts_with("SQ_RSRC_IMG_"))
8903 DimId = DimId.drop_front(12);
8904
8906 if (!DimInfo)
8907 return false;
8908
8909 Encoding = DimInfo->Encoding;
8910 return true;
8911}
8912
8913ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
8914 if (!isGFX10Plus())
8915 return ParseStatus::NoMatch;
8916
8917 SMLoc S = getLoc();
8918
8919 if (!trySkipId("dim", AsmToken::Colon))
8920 return ParseStatus::NoMatch;
8921
8922 unsigned Encoding;
8923 SMLoc Loc = getLoc();
8924 if (!parseDimId(Encoding))
8925 return Error(Loc, "invalid dim value");
8926
8927 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
8928 AMDGPUOperand::ImmTyDim));
8929 return ParseStatus::Success;
8930}
8931
8932//===----------------------------------------------------------------------===//
8933// dpp
8934//===----------------------------------------------------------------------===//
8935
8936ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
8937 SMLoc S = getLoc();
8938
8939 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
8940 return ParseStatus::NoMatch;
8941
8942 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
8943
8944 int64_t Sels[8];
8945
8946 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8947 return ParseStatus::Failure;
8948
8949 for (size_t i = 0; i < 8; ++i) {
8950 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8951 return ParseStatus::Failure;
8952
8953 SMLoc Loc = getLoc();
8954 if (getParser().parseAbsoluteExpression(Sels[i]))
8955 return ParseStatus::Failure;
8956 if (0 > Sels[i] || 7 < Sels[i])
8957 return Error(Loc, "expected a 3-bit value");
8958 }
8959
8960 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8961 return ParseStatus::Failure;
8962
8963 unsigned DPP8 = 0;
8964 for (size_t i = 0; i < 8; ++i)
8965 DPP8 |= (Sels[i] << (i * 3));
8966
8967 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
8968 return ParseStatus::Success;
8969}
8970
8971bool
8972AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
8973 const OperandVector &Operands) {
8974 if (Ctrl == "row_newbcast")
8975 return isGFX90A();
8976
8977 if (Ctrl == "row_share" ||
8978 Ctrl == "row_xmask")
8979 return isGFX10Plus();
8980
8981 if (Ctrl == "wave_shl" ||
8982 Ctrl == "wave_shr" ||
8983 Ctrl == "wave_rol" ||
8984 Ctrl == "wave_ror" ||
8985 Ctrl == "row_bcast")
8986 return isVI() || isGFX9();
8987
8988 return Ctrl == "row_mirror" ||
8989 Ctrl == "row_half_mirror" ||
8990 Ctrl == "quad_perm" ||
8991 Ctrl == "row_shl" ||
8992 Ctrl == "row_shr" ||
8993 Ctrl == "row_ror";
8994}
8995
8996int64_t
8997AMDGPUAsmParser::parseDPPCtrlPerm() {
8998 // quad_perm:[%d,%d,%d,%d]
8999
9000 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9001 return -1;
9002
9003 int64_t Val = 0;
9004 for (int i = 0; i < 4; ++i) {
9005 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9006 return -1;
9007
9008 int64_t Temp;
9009 SMLoc Loc = getLoc();
9010 if (getParser().parseAbsoluteExpression(Temp))
9011 return -1;
9012 if (Temp < 0 || Temp > 3) {
9013 Error(Loc, "expected a 2-bit value");
9014 return -1;
9015 }
9016
9017 Val += (Temp << i * 2);
9018 }
9019
9020 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9021 return -1;
9022
9023 return Val;
9024}
9025
9026int64_t
9027AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
9028 using namespace AMDGPU::DPP;
9029
9030 // sel:%d
9031
9032 int64_t Val;
9033 SMLoc Loc = getLoc();
9034
9035 if (getParser().parseAbsoluteExpression(Val))
9036 return -1;
9037
9038 struct DppCtrlCheck {
9039 int64_t Ctrl;
9040 int Lo;
9041 int Hi;
9042 };
9043
9044 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
9045 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
9046 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
9047 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
9048 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
9049 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15})
9050 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15})
9051 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15})
9052 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
9053 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
9054 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
9055 .Default({-1, 0, 0});
9056
9057 bool Valid;
9058 if (Check.Ctrl == -1) {
9059 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
9060 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
9061 } else {
9062 Valid = Check.Lo <= Val && Val <= Check.Hi;
9063 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
9064 }
9065
9066 if (!Valid) {
9067 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
9068 return -1;
9069 }
9070
9071 return Val;
9072}
9073
9074ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
9075 using namespace AMDGPU::DPP;
9076
9077 if (!isToken(AsmToken::Identifier) ||
9078 !isSupportedDPPCtrl(getTokenStr(), Operands))
9079 return ParseStatus::NoMatch;
9080
9081 SMLoc S = getLoc();
9082 int64_t Val = -1;
9084
9085 parseId(Ctrl);
9086
9087 if (Ctrl == "row_mirror") {
9088 Val = DppCtrl::ROW_MIRROR;
9089 } else if (Ctrl == "row_half_mirror") {
9090 Val = DppCtrl::ROW_HALF_MIRROR;
9091 } else {
9092 if (skipToken(AsmToken::Colon, "expected a colon")) {
9093 if (Ctrl == "quad_perm") {
9094 Val = parseDPPCtrlPerm();
9095 } else {
9096 Val = parseDPPCtrlSel(Ctrl);
9097 }
9098 }
9099 }
9100
9101 if (Val == -1)
9102 return ParseStatus::Failure;
9103
9104 Operands.push_back(
9105 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
9106 return ParseStatus::Success;
9107}
9108
9109void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
9110 bool IsDPP8) {
9111 OptionalImmIndexMap OptionalIdx;
9112 unsigned Opc = Inst.getOpcode();
9113 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9114
9115 // MAC instructions are special because they have 'old'
9116 // operand which is not tied to dst (but assumed to be).
9117 // They also have dummy unused src2_modifiers.
9118 int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
9119 int Src2ModIdx =
9120 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
9121 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
9122 Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
9123
9124 unsigned I = 1;
9125 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9126 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9127 }
9128
9129 int Fi = 0;
9130 for (unsigned E = Operands.size(); I != E; ++I) {
9131
9132 if (IsMAC) {
9133 int NumOperands = Inst.getNumOperands();
9134 if (OldIdx == NumOperands) {
9135 // Handle old operand
9136 constexpr int DST_IDX = 0;
9137 Inst.addOperand(Inst.getOperand(DST_IDX));
9138 } else if (Src2ModIdx == NumOperands) {
9139 // Add unused dummy src2_modifiers
9141 }
9142 }
9143
9144 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
9145 if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
9146 Inst.addOperand(Inst.getOperand(0));
9147 }
9148
9149 bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp8_gfx12 ||
9150 Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp8_gfx12 ||
9151 Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp_gfx12 ||
9152 Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp_gfx12;
9153 if (IsVOP3CvtSrDpp) {
9154 if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
9157 }
9158 }
9159
9160 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
9162 if (TiedTo != -1) {
9163 assert((unsigned)TiedTo < Inst.getNumOperands());
9164 // handle tied old or src2 for MAC instructions
9165 Inst.addOperand(Inst.getOperand(TiedTo));
9166 }
9167 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9168 // Add the register arguments
9169 if (IsDPP8 && Op.isDppFI()) {
9170 Fi = Op.getImm();
9171 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9172 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9173 } else if (Op.isReg()) {
9174 Op.addRegOperands(Inst, 1);
9175 } else if (Op.isImm() &&
9176 Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
9177 assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP");
9178 Op.addImmOperands(Inst, 1);
9179 } else if (Op.isImm()) {
9180 OptionalIdx[Op.getImmTy()] = I;
9181 } else {
9182 llvm_unreachable("unhandled operand type");
9183 }
9184 }
9185 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9186 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
9187
9188 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9189 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
9190
9191 if (Desc.TSFlags & SIInstrFlags::VOP3P)
9192 cvtVOP3P(Inst, Operands, OptionalIdx);
9193 else if (Desc.TSFlags & SIInstrFlags::VOP3)
9194 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
9195 else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
9196 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
9197 }
9198
9199 if (IsDPP8) {
9200 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
9201 using namespace llvm::AMDGPU::DPP;
9202 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
9203 } else {
9204 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
9205 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
9206 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
9207 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
9208
9209 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
9210 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9211 AMDGPUOperand::ImmTyDppFI);
9212 }
9213}
9214
9215void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
9216 OptionalImmIndexMap OptionalIdx;
9217
9218 unsigned I = 1;
9219 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9220 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9221 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9222 }
9223
9224 int Fi = 0;
9225 for (unsigned E = Operands.size(); I != E; ++I) {
9226 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
9228 if (TiedTo != -1) {
9229 assert((unsigned)TiedTo < Inst.getNumOperands());
9230 // handle tied old or src2 for MAC instructions
9231 Inst.addOperand(Inst.getOperand(TiedTo));
9232 }
9233 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9234 // Add the register arguments
9235 if (Op.isReg() && validateVccOperand(Op.getReg())) {
9236 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
9237 // Skip it.
9238 continue;
9239 }
9240
9241 if (IsDPP8) {
9242 if (Op.isDPP8()) {
9243 Op.addImmOperands(Inst, 1);
9244 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9245 Op.addRegWithFPInputModsOperands(Inst, 2);
9246 } else if (Op.isDppFI()) {
9247 Fi = Op.getImm();
9248 } else if (Op.isReg()) {
9249 Op.addRegOperands(Inst, 1);
9250 } else {
9251 llvm_unreachable("Invalid operand type");
9252 }
9253 } else {
9255 Op.addRegWithFPInputModsOperands(Inst, 2);
9256 } else if (Op.isReg()) {
9257 Op.addRegOperands(Inst, 1);
9258 } else if (Op.isDPPCtrl()) {
9259 Op.addImmOperands(Inst, 1);
9260 } else if (Op.isImm()) {
9261 // Handle optional arguments
9262 OptionalIdx[Op.getImmTy()] = I;
9263 } else {
9264 llvm_unreachable("Invalid operand type");
9265 }
9266 }
9267 }
9268
9269 if (IsDPP8) {
9270 using namespace llvm::AMDGPU::DPP;
9271 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
9272 } else {
9273 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
9274 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
9275 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
9276 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
9277 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9278 AMDGPUOperand::ImmTyDppFI);
9279 }
9280 }
9281}
9282
9283//===----------------------------------------------------------------------===//
9284// sdwa
9285//===----------------------------------------------------------------------===//
9286
9287ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
9288 StringRef Prefix,
9289 AMDGPUOperand::ImmTy Type) {
9290 using namespace llvm::AMDGPU::SDWA;
9291
9292 SMLoc S = getLoc();
9294
9295 SMLoc StringLoc;
9296 ParseStatus Res = parseStringWithPrefix(Prefix, Value, StringLoc);
9297 if (!Res.isSuccess())
9298 return Res;
9299
9300 int64_t Int;
9302 .Case("BYTE_0", SdwaSel::BYTE_0)
9303 .Case("BYTE_1", SdwaSel::BYTE_1)
9304 .Case("BYTE_2", SdwaSel::BYTE_2)
9305 .Case("BYTE_3", SdwaSel::BYTE_3)
9306 .Case("WORD_0", SdwaSel::WORD_0)
9307 .Case("WORD_1", SdwaSel::WORD_1)
9308 .Case("DWORD", SdwaSel::DWORD)
9309 .Default(0xffffffff);
9310
9311 if (Int == 0xffffffff)
9312 return Error(StringLoc, "invalid " + Twine(Prefix) + " value");
9313
9314 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
9315 return ParseStatus::Success;
9316}
9317
9318ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
9319 using namespace llvm::AMDGPU::SDWA;
9320
9321 SMLoc S = getLoc();
9323
9324 SMLoc StringLoc;
9325 ParseStatus Res = parseStringWithPrefix("dst_unused", Value, StringLoc);
9326 if (!Res.isSuccess())
9327 return Res;
9328
9329 int64_t Int;
9331 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
9332 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
9333 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
9334 .Default(0xffffffff);
9335
9336 if (Int == 0xffffffff)
9337 return Error(StringLoc, "invalid dst_unused value");
9338
9339 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySDWADstUnused));
9340 return ParseStatus::Success;
9341}
9342
9343void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
9344 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
9345}
9346
9347void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
9348 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
9349}
9350
9351void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
9352 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
9353}
9354
9355void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
9356 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
9357}
9358
9359void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
9360 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
9361}
9362
9363void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
9364 uint64_t BasicInstType,
9365 bool SkipDstVcc,
9366 bool SkipSrcVcc) {
9367 using namespace llvm::AMDGPU::SDWA;
9368
9369 OptionalImmIndexMap OptionalIdx;
9370 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
9371 bool SkippedVcc = false;
9372
9373 unsigned I = 1;
9374 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9375 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9376 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9377 }
9378
9379 for (unsigned E = Operands.size(); I != E; ++I) {
9380 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9381 if (SkipVcc && !SkippedVcc && Op.isReg() &&
9382 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
9383 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
9384 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
9385 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
9386 // Skip VCC only if we didn't skip it on previous iteration.
9387 // Note that src0 and src1 occupy 2 slots each because of modifiers.
9388 if (BasicInstType == SIInstrFlags::VOP2 &&
9389 ((SkipDstVcc && Inst.getNumOperands() == 1) ||
9390 (SkipSrcVcc && Inst.getNumOperands() == 5))) {
9391 SkippedVcc = true;
9392 continue;
9393 } else if (BasicInstType == SIInstrFlags::VOPC &&
9394 Inst.getNumOperands() == 0) {
9395 SkippedVcc = true;
9396 continue;
9397 }
9398 }
9400 Op.addRegOrImmWithInputModsOperands(Inst, 2);
9401 } else if (Op.isImm()) {
9402 // Handle optional arguments
9403 OptionalIdx[Op.getImmTy()] = I;
9404 } else {
9405 llvm_unreachable("Invalid operand type");
9406 }
9407 SkippedVcc = false;
9408 }
9409
9410 const unsigned Opc = Inst.getOpcode();
9411 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
9412 Opc != AMDGPU::V_NOP_sdwa_vi) {
9413 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
9414 switch (BasicInstType) {
9415 case SIInstrFlags::VOP1:
9416 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9417 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9418 AMDGPUOperand::ImmTyClampSI, 0);
9419
9420 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9421 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9422 AMDGPUOperand::ImmTyOModSI, 0);
9423
9424 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
9425 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9426 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9427
9428 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
9429 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9430 AMDGPUOperand::ImmTySDWADstUnused,
9431 DstUnused::UNUSED_PRESERVE);
9432
9433 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9434 break;
9435
9436 case SIInstrFlags::VOP2:
9437 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
9438
9439 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
9440 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
9441
9442 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9443 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
9444 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9445 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
9446 break;
9447
9448 case SIInstrFlags::VOPC:
9449 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
9450 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
9451 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9452 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
9453 break;
9454
9455 default:
9456 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
9457 }
9458 }
9459
9460 // special case v_mac_{f16, f32}:
9461 // it has src2 register operand that is tied to dst operand
9462 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
9463 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
9464 auto it = Inst.begin();
9465 std::advance(
9466 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
9467 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
9468 }
9469}
9470
9471/// Force static initialization.
9475}
9476
9477#define GET_REGISTER_MATCHER
9478#define GET_MATCHER_IMPLEMENTATION
9479#define GET_MNEMONIC_SPELL_CHECKER
9480#define GET_MNEMONIC_CHECKER
9481#include "AMDGPUGenAsmMatcher.inc"
9482
9483ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
9484 unsigned MCK) {
9485 switch (MCK) {
9486 case MCK_addr64:
9487 return parseTokenOp("addr64", Operands);
9488 case MCK_done:
9489 return parseTokenOp("done", Operands);
9490 case MCK_idxen:
9491 return parseTokenOp("idxen", Operands);
9492 case MCK_lds:
9493 return parseTokenOp("lds", Operands);
9494 case MCK_offen:
9495 return parseTokenOp("offen", Operands);
9496 case MCK_off:
9497 return parseTokenOp("off", Operands);
9498 case MCK_row_95_en:
9499 return parseTokenOp("row_en", Operands);
9500 case MCK_gds:
9501 return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
9502 case MCK_tfe:
9503 return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
9504 }
9505 return tryCustomParseOperand(Operands, MCK);
9506}
9507
9508// This function should be defined after auto-generated include so that we have
9509// MatchClassKind enum defined
9510unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
9511 unsigned Kind) {
9512 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
9513 // But MatchInstructionImpl() expects to meet token and fails to validate
9514 // operand. This method checks if we are given immediate operand but expect to
9515 // get corresponding token.
9516 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
9517 switch (Kind) {
9518 case MCK_addr64:
9519 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
9520 case MCK_gds:
9521 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
9522 case MCK_lds:
9523 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
9524 case MCK_idxen:
9525 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
9526 case MCK_offen:
9527 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
9528 case MCK_tfe:
9529 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
9530 case MCK_SSrc_b32:
9531 // When operands have expression values, they will return true for isToken,
9532 // because it is not possible to distinguish between a token and an
9533 // expression at parse time. MatchInstructionImpl() will always try to
9534 // match an operand as a token, when isToken returns true, and when the
9535 // name of the expression is not a valid token, the match will fail,
9536 // so we need to handle it here.
9537 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
9538 case MCK_SSrc_f32:
9539 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
9540 case MCK_SOPPBrTarget:
9541 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
9542 case MCK_VReg32OrOff:
9543 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
9544 case MCK_InterpSlot:
9545 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
9546 case MCK_InterpAttr:
9547 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
9548 case MCK_InterpAttrChan:
9549 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
9550 case MCK_SReg_64:
9551 case MCK_SReg_64_XEXEC:
9552 // Null is defined as a 32-bit register but
9553 // it should also be enabled with 64-bit operands.
9554 // The following code enables it for SReg_64 operands
9555 // used as source and destination. Remaining source
9556 // operands are handled in isInlinableImm.
9557 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
9558 default:
9559 return Match_InvalidOperand;
9560 }
9561}
9562
9563//===----------------------------------------------------------------------===//
9564// endpgm
9565//===----------------------------------------------------------------------===//
9566
9567ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
9568 SMLoc S = getLoc();
9569 int64_t Imm = 0;
9570
9571 if (!parseExpr(Imm)) {
9572 // The operand is optional, if not present default to 0
9573 Imm = 0;
9574 }
9575
9576 if (!isUInt<16>(Imm))
9577 return Error(S, "expected a 16-bit value");
9578
9579 Operands.push_back(
9580 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
9581 return ParseStatus::Success;
9582}
9583
9584bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
9585
9586//===----------------------------------------------------------------------===//
9587// LDSDIR
9588//===----------------------------------------------------------------------===//
9589
9590bool AMDGPUOperand::isWaitVDST() const {
9591 return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm());
9592}
9593
9594bool AMDGPUOperand::isWaitVAVDst() const {
9595 return isImmTy(ImmTyWaitVAVDst) && isUInt<4>(getImm());
9596}
9597
9598bool AMDGPUOperand::isWaitVMVSrc() const {
9599 return isImmTy(ImmTyWaitVMVSrc) && isUInt<1>(getImm());
9600}
9601
9602//===----------------------------------------------------------------------===//
9603// VINTERP
9604//===----------------------------------------------------------------------===//
9605
9606bool AMDGPUOperand::isWaitEXP() const {
9607 return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm());
9608}
9609
9610//===----------------------------------------------------------------------===//
9611// Split Barrier
9612//===----------------------------------------------------------------------===//
9613
9614bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
static bool ConvertOmodDiv(int64_t &Div)
static unsigned getSpecialRegForName(StringRef RegName)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, unsigned OpName)
static bool IsRevOpcode(const unsigned Opcode)
static int getRegClass(RegisterKind Is, unsigned RegWidth)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, const MCRegisterInfo *MRI)
Provides AMDGPU specific target descriptions.
AMDGPU metadata definitions and in-memory representations.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_SET(DST, MSK, VAL)
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_READNONE
Definition: Compiler.h:220
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:135
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
@ Default
Definition: DwarfDebug.cpp:87
std::string Name
uint64_t Size
Symbol * Sym
Definition: ELF_riscv.cpp:479
static unsigned getOperandSize(MachineInstr &MI, unsigned Idx, MachineRegisterInfo &MRI)
#define Check(C,...)
static llvm::Expected< InlineInfo > decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
Definition: InlineInfo.cpp:180
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
LLVMContext & Context
#define P(N)
#define G_00B848_FWD_PROGRESS(x)
Definition: SIDefines.h:1157
#define G_00B848_MEM_ORDERED(x)
Definition: SIDefines.h:1154
#define G_00B848_IEEE_MODE(x)
Definition: SIDefines.h:1148
#define G_00B848_DX10_CLAMP(x)
Definition: SIDefines.h:1139
#define G_00B848_WGP_MODE(x)
Definition: SIDefines.h:1151
Interface definition for SIInstrInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
unsigned unsigned DefaultVal
raw_pwrite_stream & OS
This file implements the SmallBitVector class.
static bool Enabled
Definition: Statistic.cpp:46
StringSet - A set-like wrapper for the StringMap.
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
BinaryOperator * Mul
support::ulittle16_t & Lo
Definition: aarch32.cpp:206
support::ulittle16_t & Hi
Definition: aarch32.cpp:205
static const AMDGPUVariadicMCExpr * create(VariadicKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:5196
Class for arbitrary precision integers.
Definition: APInt.h:76
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
Target independent representation for an assembler token.
Definition: MCAsmMacro.h:21
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
Definition: MCAsmMacro.h:110
bool is(TokenKind K) const
Definition: MCAsmMacro.h:82
TokenKind getKind() const
Definition: MCAsmMacro.h:81
This class represents an Operation in the Expression.
Register getReg() const
Base class for user error types.
Definition: Error.h:352
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
Tagged union holding either a T or a Error.
Definition: Error.h:474
Class representing an expression and its matching format.
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
Generic assembler parser interface, for use by target specific assembly parsers.
Definition: MCAsmParser.h:123
virtual MCStreamer & getStreamer()=0
Return the output streamer for the assembler.
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition: MCExpr.cpp:194
Context object for machine code objects.
Definition: MCContext.h:76
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:448
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:200
const MCSubtargetInfo * getSubtargetInfo() const
Definition: MCContext.h:452
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
unsigned getNumOperands() const
Definition: MCInst.h:208
SMLoc getLoc() const
Definition: MCInst.h:204
void setLoc(SMLoc loc)
Definition: MCInst.h:203
unsigned getOpcode() const
Definition: MCInst.h:198
iterator insert(iterator I, const MCOperand &Op)
Definition: MCInst.h:224
void addOperand(const MCOperand Op)
Definition: MCInst.h:210
iterator begin()
Definition: MCInst.h:219
size_t size() const
Definition: MCInst.h:218
const MCOperand & getOperand(unsigned i) const
Definition: MCInst.h:206
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
bool mayStore() const
Return true if this instruction could possibly modify memory.
Definition: MCInstrDesc.h:444
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:26
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:36
static MCOperand createReg(unsigned Reg)
Definition: MCInst.h:134
void setImm(int64_t Val)
Definition: MCInst.h:85
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:162
void setReg(unsigned Reg)
Set the register number.
Definition: MCInst.h:75
int64_t getImm() const
Definition: MCInst.h:80
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:141
bool isImm() const
Definition: MCInst.h:62
unsigned getReg() const
Returns the register number.
Definition: MCInst.h:69
bool isReg() const
Definition: MCInst.h:61
bool isExpr() const
Definition: MCInst.h:65
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
virtual unsigned getReg() const =0
virtual bool isReg() const =0
isReg - Is this a register operand?
virtual bool isMem() const =0
isMem - Is this a memory operand?
virtual bool isToken() const =0
isToken - Is this a token operand?
virtual bool isImm() const =0
isImm - Is this an immediate operand?
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
unsigned getRegister(unsigned i) const
getRegister - Return the specified register in the class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
static constexpr unsigned NoRegister
Definition: MCRegister.h:52
Streaming machine code generation interface.
Definition: MCStreamer.h:212
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
MCTargetStreamer * getTargetStreamer()
Definition: MCStreamer.h:304
Generic base class for all target subtargets.
const FeatureBitset & getFeatureBits() const
FeatureBitset ToggleFeature(uint64_t FB)
Toggle a feature and return the re-computed feature bits.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:40
void setVariableValue(const MCExpr *Value)
Definition: MCSymbol.cpp:47
MCTargetAsmParser - Generic interface to target specific assembly parsers.
MCSubtargetInfo & copySTI()
Create a copy of STI and return a non-const reference to it.
virtual bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
virtual bool ParseDirective(AsmToken DirectiveID)
ParseDirective - Parse a target specific assembler directive This method is deprecated,...
virtual ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
tryParseRegister - parse one register if possible
const MCInstrInfo & MII
void setAvailableFeatures(const FeatureBitset &Value)
const MCSubtargetInfo & getSTI() const
virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, unsigned Kind)
Allow a target to add special case operand matching for things that tblgen doesn't/can't handle effec...
virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands)=0
ParseInstruction - Parse one assembly instruction.
virtual unsigned checkTargetMatchPredicate(MCInst &Inst)
checkTargetMatchPredicate - Validate the instruction match against any complex target predicates not ...
virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm)=0
MatchAndEmitInstruction - Recognize a series of operands of a parsed instruction as an actual MCInst ...
Target specific streamer interface.
Definition: MCStreamer.h:93
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Represents a location in source code.
Definition: SMLoc.h:23
static SMLoc getFromPointer(const char *Ptr)
Definition: SMLoc.h:36
constexpr const char * getPointer() const
Definition: SMLoc.h:34
constexpr bool isValid() const
Definition: SMLoc.h:29
Represents a range in source code.
Definition: SMLoc.h:48
SMLoc Start
Definition: SMLoc.h:50
Implements a dense probed hash-table based set with some number of buckets stored inline.
Definition: DenseSet.h:290
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
Register getReg() const
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition: StringRef.h:849
StringMapEntry - This is used to represent one value that is inserted into a StringMap.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
Definition: StringRef.h:651
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:257
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition: StringRef.h:605
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition: StringRef.h:271
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:23
bool contains(StringRef key) const
Check if the set contains the given key.
Definition: StringSet.h:55
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition: StringSet.h:38
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM Value Representation.
Definition: Value.h:74
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:660
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:690
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char NumVGPRs[]
Key for Kernel::CodeProps::Metadata::mNumVGPRs.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
const CustomOperand< const MCSubtargetInfo & > Opr[]
int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI)
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, std::optional< bool > EnableWavefrontSize32)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
Key
PAL metadata keys.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
int64_t getMsgOpId(int64_t MsgId, const StringRef Name)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
const CustomOperand< const MCSubtargetInfo & > Msg[]
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
constexpr unsigned COMPONENTS[]
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
LLVM_READNONE bool isLegalDPALU_DPPControl(unsigned DC)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const MCSubtargetInfo *STI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
unsigned mc2PseudoReg(unsigned Reg)
Convert hardware register Reg to a pseudo register.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isDPALU_DPP(const MCInstrDesc &OpDesc)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
bool isGFX9(const MCSubtargetInfo &STI)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
bool hasMAIInsts(const MCSubtargetInfo &STI)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
bool isSGPR(unsigned Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
bool isHi(unsigned Reg, const MCRegisterInfo &MRI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isInlineValue(unsigned Reg)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition: SIDefines.h:234
@ OPERAND_REG_IMM_INT64
Definition: SIDefines.h:201
@ OPERAND_REG_IMM_V2FP16
Definition: SIDefines.h:211
@ OPERAND_REG_INLINE_C_V2INT32
Definition: SIDefines.h:227
@ OPERAND_REG_INLINE_C_FP64
Definition: SIDefines.h:223
@ OPERAND_REG_INLINE_C_BF16
Definition: SIDefines.h:220
@ OPERAND_REG_INLINE_C_V2BF16
Definition: SIDefines.h:225
@ OPERAND_REG_IMM_V2INT16
Definition: SIDefines.h:212
@ OPERAND_REG_IMM_BF16
Definition: SIDefines.h:205
@ OPERAND_REG_INLINE_AC_V2FP16
Definition: SIDefines.h:246
@ OPERAND_REG_IMM_INT32
Operands with register or 32-bit immediate.
Definition: SIDefines.h:200
@ OPERAND_REG_IMM_V2BF16
Definition: SIDefines.h:210
@ OPERAND_REG_IMM_BF16_DEFERRED
Definition: SIDefines.h:207
@ OPERAND_REG_IMM_FP16
Definition: SIDefines.h:206
@ OPERAND_REG_INLINE_C_INT64
Definition: SIDefines.h:219
@ OPERAND_REG_INLINE_AC_BF16
Definition: SIDefines.h:240
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition: SIDefines.h:217
@ OPERAND_REG_INLINE_AC_INT16
Operands with an AccVGPR register or inline constant.
Definition: SIDefines.h:238
@ OPERAND_REG_IMM_FP64
Definition: SIDefines.h:204
@ OPERAND_REG_INLINE_C_V2FP16
Definition: SIDefines.h:226
@ OPERAND_REG_INLINE_AC_V2INT16
Definition: SIDefines.h:244
@ OPERAND_REG_INLINE_AC_FP16
Definition: SIDefines.h:241
@ OPERAND_REG_INLINE_AC_INT32
Definition: SIDefines.h:239
@ OPERAND_REG_INLINE_AC_FP32
Definition: SIDefines.h:242
@ OPERAND_REG_INLINE_AC_V2BF16
Definition: SIDefines.h:245
@ OPERAND_REG_IMM_V2INT32
Definition: SIDefines.h:213
@ OPERAND_REG_IMM_FP32
Definition: SIDefines.h:203
@ OPERAND_INPUT_MODS
Definition: SIDefines.h:251
@ OPERAND_REG_INLINE_C_FP32
Definition: SIDefines.h:222
@ OPERAND_REG_INLINE_C_INT32
Definition: SIDefines.h:218
@ OPERAND_REG_INLINE_C_V2INT16
Definition: SIDefines.h:224
@ OPERAND_REG_IMM_V2FP32
Definition: SIDefines.h:214
@ OPERAND_REG_INLINE_AC_FP64
Definition: SIDefines.h:243
@ OPERAND_REG_INLINE_C_FP16
Definition: SIDefines.h:221
@ OPERAND_REG_IMM_INT16
Definition: SIDefines.h:202
@ OPERAND_REG_INLINE_C_V2FP32
Definition: SIDefines.h:228
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
Definition: SIDefines.h:231
@ OPERAND_REG_IMM_FP32_DEFERRED
Definition: SIDefines.h:209
@ OPERAND_REG_IMM_FP16_DEFERRED
Definition: SIDefines.h:208
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isVI(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ STT_AMDGPU_HSA_KERNEL
Definition: ELF.h:1334
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:211
const uint64_t Version
Definition: InstrProf.h:1047
@ OPERAND_IMMEDIATE
Definition: MCInstrDesc.h:60
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
Definition: PPCPredicates.h:87
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
Reg
All possible values of the reg field in the ModR/M byte.
std::optional< const char * > toString(const std::optional< DWARFFormValue > &V)
Take an optional DWARFFormValue and try to extract a string value from it.
Format
The format used for serializing/deserializing remarks.
Definition: RemarkFormat.h:25
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
Definition: Error.h:1071
@ Offset
Definition: DWP.cpp:456
@ Length
Definition: DWP.cpp:456
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1731
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
Definition: Alignment.h:217
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:417
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:228
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
void PrintError(const Twine &Msg)
Definition: Error.cpp:101
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:269
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition: bit.h:342
Target & getTheR600Target()
The target for R600 GPUs.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition: MathExtras.h:136
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition: MathExtras.h:174
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition: MathExtras.h:141
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Target & getTheGCNTarget()
The target for GCN GPUs.
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:233
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:293
unsigned M0(unsigned Val)
Definition: VE.h:375
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1758
bool parseAmdKernelCodeField(StringRef ID, MCAsmParser &Parser, amd_kernel_code_t &C, raw_ostream &Err)
#define N
RegisterKind Kind
StringLiteral Name
AMD Kernel Code Object (amd_kernel_code_t).
Instruction set architecture version.
Definition: TargetParser.h:125
Represents the counter values to wait for in an s_waitcnt instruction.
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:249
static const fltSemantics & IEEEhalf() LLVM_READNONE
Definition: APFloat.cpp:247
static const fltSemantics & BFloat() LLVM_READNONE
Definition: APFloat.cpp:248
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:246
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Description of the encoding of one expression Op.
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...