LLVM  16.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
19 #include "llvm/ADT/APFloat.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/BinaryFormat/ELF.h"
24 #include "llvm/MC/MCAsmInfo.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCExpr.h"
27 #include "llvm/MC/MCInst.h"
28 #include "llvm/MC/MCInstrDesc.h"
33 #include "llvm/MC/MCSymbol.h"
34 #include "llvm/MC/TargetRegistry.h"
37 #include "llvm/Support/Casting.h"
41 #include <optional>
42 
43 using namespace llvm;
44 using namespace llvm::AMDGPU;
45 using namespace llvm::amdhsa;
46 
47 namespace {
48 
49 class AMDGPUAsmParser;
50 
51 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
52 
53 //===----------------------------------------------------------------------===//
54 // Operand
55 //===----------------------------------------------------------------------===//
56 
57 class AMDGPUOperand : public MCParsedAsmOperand {
58  enum KindTy {
59  Token,
60  Immediate,
61  Register,
63  } Kind;
64 
65  SMLoc StartLoc, EndLoc;
66  const AMDGPUAsmParser *AsmParser;
67 
68 public:
69  AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
70  : Kind(Kind_), AsmParser(AsmParser_) {}
71 
72  using Ptr = std::unique_ptr<AMDGPUOperand>;
73 
74  struct Modifiers {
75  bool Abs = false;
76  bool Neg = false;
77  bool Sext = false;
78 
79  bool hasFPModifiers() const { return Abs || Neg; }
80  bool hasIntModifiers() const { return Sext; }
81  bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
82 
83  int64_t getFPModifiersOperand() const {
84  int64_t Operand = 0;
85  Operand |= Abs ? SISrcMods::ABS : 0u;
86  Operand |= Neg ? SISrcMods::NEG : 0u;
87  return Operand;
88  }
89 
90  int64_t getIntModifiersOperand() const {
91  int64_t Operand = 0;
92  Operand |= Sext ? SISrcMods::SEXT : 0u;
93  return Operand;
94  }
95 
96  int64_t getModifiersOperand() const {
97  assert(!(hasFPModifiers() && hasIntModifiers())
98  && "fp and int modifiers should not be used simultaneously");
99  if (hasFPModifiers()) {
100  return getFPModifiersOperand();
101  } else if (hasIntModifiers()) {
102  return getIntModifiersOperand();
103  } else {
104  return 0;
105  }
106  }
107 
108  friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
109  };
110 
111  enum ImmTy {
112  ImmTyNone,
113  ImmTyGDS,
114  ImmTyLDS,
115  ImmTyOffen,
116  ImmTyIdxen,
117  ImmTyAddr64,
118  ImmTyOffset,
119  ImmTyInstOffset,
120  ImmTyOffset0,
121  ImmTyOffset1,
122  ImmTyCPol,
123  ImmTySWZ,
124  ImmTyTFE,
125  ImmTyD16,
126  ImmTyClampSI,
127  ImmTyOModSI,
128  ImmTySdwaDstSel,
129  ImmTySdwaSrc0Sel,
130  ImmTySdwaSrc1Sel,
131  ImmTySdwaDstUnused,
132  ImmTyDMask,
133  ImmTyDim,
134  ImmTyUNorm,
135  ImmTyDA,
136  ImmTyR128A16,
137  ImmTyA16,
138  ImmTyLWE,
139  ImmTyExpTgt,
140  ImmTyExpCompr,
141  ImmTyExpVM,
142  ImmTyFORMAT,
143  ImmTyHwreg,
144  ImmTyOff,
145  ImmTySendMsg,
146  ImmTyInterpSlot,
147  ImmTyInterpAttr,
148  ImmTyAttrChan,
149  ImmTyOpSel,
150  ImmTyOpSelHi,
151  ImmTyNegLo,
152  ImmTyNegHi,
153  ImmTyDPP8,
154  ImmTyDppCtrl,
155  ImmTyDppRowMask,
156  ImmTyDppBankMask,
157  ImmTyDppBoundCtrl,
158  ImmTyDppFi,
159  ImmTySwizzle,
160  ImmTyGprIdxMode,
161  ImmTyHigh,
162  ImmTyBLGP,
163  ImmTyCBSZ,
164  ImmTyABID,
165  ImmTyEndpgm,
166  ImmTyWaitVDST,
167  ImmTyWaitEXP,
168  };
169 
170  // Immediate operand kind.
171  // It helps to identify the location of an offending operand after an error.
172  // Note that regular literals and mandatory literals (KImm) must be handled
173  // differently. When looking for an offending operand, we should usually
174  // ignore mandatory literals because they are part of the instruction and
175  // cannot be changed. Report location of mandatory operands only for VOPD,
176  // when both OpX and OpY have a KImm and there are no other literals.
177  enum ImmKindTy {
178  ImmKindTyNone,
179  ImmKindTyLiteral,
180  ImmKindTyMandatoryLiteral,
181  ImmKindTyConst,
182  };
183 
184 private:
185  struct TokOp {
186  const char *Data;
187  unsigned Length;
188  };
189 
190  struct ImmOp {
191  int64_t Val;
192  ImmTy Type;
193  bool IsFPImm;
194  mutable ImmKindTy Kind;
195  Modifiers Mods;
196  };
197 
198  struct RegOp {
199  unsigned RegNo;
200  Modifiers Mods;
201  };
202 
203  union {
204  TokOp Tok;
205  ImmOp Imm;
206  RegOp Reg;
207  const MCExpr *Expr;
208  };
209 
210 public:
211  bool isToken() const override { return Kind == Token; }
212 
213  bool isSymbolRefExpr() const {
214  return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
215  }
216 
217  bool isImm() const override {
218  return Kind == Immediate;
219  }
220 
221  void setImmKindNone() const {
222  assert(isImm());
223  Imm.Kind = ImmKindTyNone;
224  }
225 
226  void setImmKindLiteral() const {
227  assert(isImm());
228  Imm.Kind = ImmKindTyLiteral;
229  }
230 
231  void setImmKindMandatoryLiteral() const {
232  assert(isImm());
233  Imm.Kind = ImmKindTyMandatoryLiteral;
234  }
235 
236  void setImmKindConst() const {
237  assert(isImm());
238  Imm.Kind = ImmKindTyConst;
239  }
240 
241  bool IsImmKindLiteral() const {
242  return isImm() && Imm.Kind == ImmKindTyLiteral;
243  }
244 
245  bool IsImmKindMandatoryLiteral() const {
246  return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral;
247  }
248 
249  bool isImmKindConst() const {
250  return isImm() && Imm.Kind == ImmKindTyConst;
251  }
252 
253  bool isInlinableImm(MVT type) const;
254  bool isLiteralImm(MVT type) const;
255 
256  bool isRegKind() const {
257  return Kind == Register;
258  }
259 
260  bool isReg() const override {
261  return isRegKind() && !hasModifiers();
262  }
263 
264  bool isRegOrInline(unsigned RCID, MVT type) const {
265  return isRegClass(RCID) || isInlinableImm(type);
266  }
267 
268  bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
269  return isRegOrInline(RCID, type) || isLiteralImm(type);
270  }
271 
272  bool isRegOrImmWithInt16InputMods() const {
273  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
274  }
275 
276  bool isRegOrImmWithInt32InputMods() const {
277  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
278  }
279 
280  bool isRegOrInlineImmWithInt16InputMods() const {
281  return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
282  }
283 
284  bool isRegOrInlineImmWithInt32InputMods() const {
285  return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
286  }
287 
288  bool isRegOrImmWithInt64InputMods() const {
289  return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
290  }
291 
292  bool isRegOrImmWithFP16InputMods() const {
293  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
294  }
295 
296  bool isRegOrImmWithFP32InputMods() const {
297  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
298  }
299 
300  bool isRegOrImmWithFP64InputMods() const {
301  return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
302  }
303 
304  bool isRegOrInlineImmWithFP16InputMods() const {
305  return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16);
306  }
307 
308  bool isRegOrInlineImmWithFP32InputMods() const {
309  return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
310  }
311 
312 
313  bool isVReg() const {
314  return isRegClass(AMDGPU::VGPR_32RegClassID) ||
315  isRegClass(AMDGPU::VReg_64RegClassID) ||
316  isRegClass(AMDGPU::VReg_96RegClassID) ||
317  isRegClass(AMDGPU::VReg_128RegClassID) ||
318  isRegClass(AMDGPU::VReg_160RegClassID) ||
319  isRegClass(AMDGPU::VReg_192RegClassID) ||
320  isRegClass(AMDGPU::VReg_256RegClassID) ||
321  isRegClass(AMDGPU::VReg_512RegClassID) ||
322  isRegClass(AMDGPU::VReg_1024RegClassID);
323  }
324 
325  bool isVReg32() const {
326  return isRegClass(AMDGPU::VGPR_32RegClassID);
327  }
328 
329  bool isVReg32OrOff() const {
330  return isOff() || isVReg32();
331  }
332 
333  bool isNull() const {
334  return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
335  }
336 
337  bool isVRegWithInputMods() const;
338  bool isT16VRegWithInputMods() const;
339 
340  bool isSDWAOperand(MVT type) const;
341  bool isSDWAFP16Operand() const;
342  bool isSDWAFP32Operand() const;
343  bool isSDWAInt16Operand() const;
344  bool isSDWAInt32Operand() const;
345 
346  bool isImmTy(ImmTy ImmT) const {
347  return isImm() && Imm.Type == ImmT;
348  }
349 
350  bool isImmModifier() const {
351  return isImm() && Imm.Type != ImmTyNone;
352  }
353 
354  bool isClampSI() const { return isImmTy(ImmTyClampSI); }
355  bool isOModSI() const { return isImmTy(ImmTyOModSI); }
356  bool isDMask() const { return isImmTy(ImmTyDMask); }
357  bool isDim() const { return isImmTy(ImmTyDim); }
358  bool isUNorm() const { return isImmTy(ImmTyUNorm); }
359  bool isDA() const { return isImmTy(ImmTyDA); }
360  bool isR128A16() const { return isImmTy(ImmTyR128A16); }
361  bool isGFX10A16() const { return isImmTy(ImmTyA16); }
362  bool isLWE() const { return isImmTy(ImmTyLWE); }
363  bool isOff() const { return isImmTy(ImmTyOff); }
364  bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
365  bool isExpVM() const { return isImmTy(ImmTyExpVM); }
366  bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
367  bool isOffen() const { return isImmTy(ImmTyOffen); }
368  bool isIdxen() const { return isImmTy(ImmTyIdxen); }
369  bool isAddr64() const { return isImmTy(ImmTyAddr64); }
370  bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
371  bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
372  bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
373 
374  bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
375  bool isGDS() const { return isImmTy(ImmTyGDS); }
376  bool isLDS() const { return isImmTy(ImmTyLDS); }
377  bool isCPol() const { return isImmTy(ImmTyCPol); }
378  bool isSWZ() const { return isImmTy(ImmTySWZ); }
379  bool isTFE() const { return isImmTy(ImmTyTFE); }
380  bool isD16() const { return isImmTy(ImmTyD16); }
381  bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
382  bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
383  bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
384  bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
385  bool isFI() const { return isImmTy(ImmTyDppFi); }
386  bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
387  bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
388  bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
389  bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
390  bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
391  bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
392  bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
393  bool isOpSel() const { return isImmTy(ImmTyOpSel); }
394  bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
395  bool isNegLo() const { return isImmTy(ImmTyNegLo); }
396  bool isNegHi() const { return isImmTy(ImmTyNegHi); }
397  bool isHigh() const { return isImmTy(ImmTyHigh); }
398 
399  bool isMod() const {
400  return isClampSI() || isOModSI();
401  }
402 
403  bool isRegOrImm() const {
404  return isReg() || isImm();
405  }
406 
407  bool isRegClass(unsigned RCID) const;
408 
409  bool isInlineValue() const;
410 
411  bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
412  return isRegOrInline(RCID, type) && !hasModifiers();
413  }
414 
415  bool isSCSrcB16() const {
416  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
417  }
418 
419  bool isSCSrcV2B16() const {
420  return isSCSrcB16();
421  }
422 
423  bool isSCSrcB32() const {
424  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
425  }
426 
427  bool isSCSrcB64() const {
428  return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
429  }
430 
431  bool isBoolReg() const;
432 
433  bool isSCSrcF16() const {
434  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
435  }
436 
437  bool isSCSrcV2F16() const {
438  return isSCSrcF16();
439  }
440 
441  bool isSCSrcF32() const {
442  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
443  }
444 
445  bool isSCSrcF64() const {
446  return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
447  }
448 
449  bool isSSrcB32() const {
450  return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
451  }
452 
453  bool isSSrcB16() const {
454  return isSCSrcB16() || isLiteralImm(MVT::i16);
455  }
456 
457  bool isSSrcV2B16() const {
458  llvm_unreachable("cannot happen");
459  return isSSrcB16();
460  }
461 
462  bool isSSrcB64() const {
463  // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
464  // See isVSrc64().
465  return isSCSrcB64() || isLiteralImm(MVT::i64);
466  }
467 
468  bool isSSrcF32() const {
469  return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
470  }
471 
472  bool isSSrcF64() const {
473  return isSCSrcB64() || isLiteralImm(MVT::f64);
474  }
475 
476  bool isSSrcF16() const {
477  return isSCSrcB16() || isLiteralImm(MVT::f16);
478  }
479 
480  bool isSSrcV2F16() const {
481  llvm_unreachable("cannot happen");
482  return isSSrcF16();
483  }
484 
485  bool isSSrcV2FP32() const {
486  llvm_unreachable("cannot happen");
487  return isSSrcF32();
488  }
489 
490  bool isSCSrcV2FP32() const {
491  llvm_unreachable("cannot happen");
492  return isSCSrcF32();
493  }
494 
495  bool isSSrcV2INT32() const {
496  llvm_unreachable("cannot happen");
497  return isSSrcB32();
498  }
499 
500  bool isSCSrcV2INT32() const {
501  llvm_unreachable("cannot happen");
502  return isSCSrcB32();
503  }
504 
505  bool isSSrcOrLdsB32() const {
506  return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
507  isLiteralImm(MVT::i32) || isExpr();
508  }
509 
510  bool isVCSrcB32() const {
511  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
512  }
513 
514  bool isVCSrcB64() const {
515  return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
516  }
517 
518  bool isVCSrcTB16_Lo128() const {
519  return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
520  }
521 
522  bool isVCSrcB16() const {
523  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
524  }
525 
526  bool isVCSrcV2B16() const {
527  return isVCSrcB16();
528  }
529 
530  bool isVCSrcF32() const {
531  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
532  }
533 
534  bool isVCSrcF64() const {
535  return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
536  }
537 
538  bool isVCSrcTF16_Lo128() const {
539  return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
540  }
541 
542  bool isVCSrcF16() const {
543  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
544  }
545 
546  bool isVCSrcV2F16() const {
547  return isVCSrcF16();
548  }
549 
550  bool isVSrcB32() const {
551  return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
552  }
553 
554  bool isVSrcB64() const {
555  return isVCSrcF64() || isLiteralImm(MVT::i64);
556  }
557 
558  bool isVSrcTB16_Lo128() const {
559  return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
560  }
561 
562  bool isVSrcB16() const {
563  return isVCSrcB16() || isLiteralImm(MVT::i16);
564  }
565 
566  bool isVSrcV2B16() const {
567  return isVSrcB16() || isLiteralImm(MVT::v2i16);
568  }
569 
570  bool isVCSrcV2FP32() const {
571  return isVCSrcF64();
572  }
573 
574  bool isVSrcV2FP32() const {
575  return isVSrcF64() || isLiteralImm(MVT::v2f32);
576  }
577 
578  bool isVCSrcV2INT32() const {
579  return isVCSrcB64();
580  }
581 
582  bool isVSrcV2INT32() const {
583  return isVSrcB64() || isLiteralImm(MVT::v2i32);
584  }
585 
586  bool isVSrcF32() const {
587  return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
588  }
589 
590  bool isVSrcF64() const {
591  return isVCSrcF64() || isLiteralImm(MVT::f64);
592  }
593 
594  bool isVSrcTF16_Lo128() const {
595  return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
596  }
597 
598  bool isVSrcF16() const {
599  return isVCSrcF16() || isLiteralImm(MVT::f16);
600  }
601 
602  bool isVSrcV2F16() const {
603  return isVSrcF16() || isLiteralImm(MVT::v2f16);
604  }
605 
606  bool isVISrcB32() const {
607  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
608  }
609 
610  bool isVISrcB16() const {
611  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
612  }
613 
614  bool isVISrcV2B16() const {
615  return isVISrcB16();
616  }
617 
618  bool isVISrcF32() const {
619  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
620  }
621 
622  bool isVISrcF16() const {
623  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
624  }
625 
626  bool isVISrcV2F16() const {
627  return isVISrcF16() || isVISrcB32();
628  }
629 
630  bool isVISrc_64B64() const {
631  return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
632  }
633 
634  bool isVISrc_64F64() const {
635  return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
636  }
637 
638  bool isVISrc_64V2FP32() const {
639  return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
640  }
641 
642  bool isVISrc_64V2INT32() const {
643  return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
644  }
645 
646  bool isVISrc_256B64() const {
647  return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
648  }
649 
650  bool isVISrc_256F64() const {
651  return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
652  }
653 
654  bool isVISrc_128B16() const {
655  return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
656  }
657 
658  bool isVISrc_128V2B16() const {
659  return isVISrc_128B16();
660  }
661 
662  bool isVISrc_128B32() const {
663  return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
664  }
665 
666  bool isVISrc_128F32() const {
667  return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
668  }
669 
670  bool isVISrc_256V2FP32() const {
671  return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
672  }
673 
674  bool isVISrc_256V2INT32() const {
675  return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
676  }
677 
678  bool isVISrc_512B32() const {
679  return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
680  }
681 
682  bool isVISrc_512B16() const {
683  return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
684  }
685 
686  bool isVISrc_512V2B16() const {
687  return isVISrc_512B16();
688  }
689 
690  bool isVISrc_512F32() const {
691  return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
692  }
693 
694  bool isVISrc_512F16() const {
695  return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
696  }
697 
698  bool isVISrc_512V2F16() const {
699  return isVISrc_512F16() || isVISrc_512B32();
700  }
701 
702  bool isVISrc_1024B32() const {
703  return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
704  }
705 
706  bool isVISrc_1024B16() const {
707  return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
708  }
709 
710  bool isVISrc_1024V2B16() const {
711  return isVISrc_1024B16();
712  }
713 
714  bool isVISrc_1024F32() const {
715  return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
716  }
717 
718  bool isVISrc_1024F16() const {
719  return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
720  }
721 
722  bool isVISrc_1024V2F16() const {
723  return isVISrc_1024F16() || isVISrc_1024B32();
724  }
725 
726  bool isAISrcB32() const {
727  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
728  }
729 
730  bool isAISrcB16() const {
731  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
732  }
733 
734  bool isAISrcV2B16() const {
735  return isAISrcB16();
736  }
737 
738  bool isAISrcF32() const {
739  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
740  }
741 
742  bool isAISrcF16() const {
743  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
744  }
745 
746  bool isAISrcV2F16() const {
747  return isAISrcF16() || isAISrcB32();
748  }
749 
750  bool isAISrc_64B64() const {
751  return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
752  }
753 
754  bool isAISrc_64F64() const {
755  return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
756  }
757 
758  bool isAISrc_128B32() const {
759  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
760  }
761 
762  bool isAISrc_128B16() const {
763  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
764  }
765 
766  bool isAISrc_128V2B16() const {
767  return isAISrc_128B16();
768  }
769 
770  bool isAISrc_128F32() const {
771  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
772  }
773 
774  bool isAISrc_128F16() const {
775  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
776  }
777 
778  bool isAISrc_128V2F16() const {
779  return isAISrc_128F16() || isAISrc_128B32();
780  }
781 
782  bool isVISrc_128F16() const {
783  return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
784  }
785 
786  bool isVISrc_128V2F16() const {
787  return isVISrc_128F16() || isVISrc_128B32();
788  }
789 
790  bool isAISrc_256B64() const {
791  return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
792  }
793 
794  bool isAISrc_256F64() const {
795  return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
796  }
797 
798  bool isAISrc_512B32() const {
799  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
800  }
801 
802  bool isAISrc_512B16() const {
803  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
804  }
805 
806  bool isAISrc_512V2B16() const {
807  return isAISrc_512B16();
808  }
809 
810  bool isAISrc_512F32() const {
811  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
812  }
813 
814  bool isAISrc_512F16() const {
815  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
816  }
817 
818  bool isAISrc_512V2F16() const {
819  return isAISrc_512F16() || isAISrc_512B32();
820  }
821 
822  bool isAISrc_1024B32() const {
823  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
824  }
825 
826  bool isAISrc_1024B16() const {
827  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
828  }
829 
830  bool isAISrc_1024V2B16() const {
831  return isAISrc_1024B16();
832  }
833 
834  bool isAISrc_1024F32() const {
835  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
836  }
837 
838  bool isAISrc_1024F16() const {
839  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
840  }
841 
842  bool isAISrc_1024V2F16() const {
843  return isAISrc_1024F16() || isAISrc_1024B32();
844  }
845 
846  bool isKImmFP32() const {
847  return isLiteralImm(MVT::f32);
848  }
849 
850  bool isKImmFP16() const {
851  return isLiteralImm(MVT::f16);
852  }
853 
854  bool isMem() const override {
855  return false;
856  }
857 
858  bool isExpr() const {
859  return Kind == Expression;
860  }
861 
862  bool isSoppBrTarget() const {
863  return isExpr() || isImm();
864  }
865 
866  bool isSWaitCnt() const;
867  bool isDepCtr() const;
868  bool isSDelayAlu() const;
869  bool isHwreg() const;
870  bool isSendMsg() const;
871  bool isSwizzle() const;
872  bool isSMRDOffset8() const;
873  bool isSMEMOffset() const;
874  bool isSMRDLiteralOffset() const;
875  bool isDPP8() const;
876  bool isDPPCtrl() const;
877  bool isBLGP() const;
878  bool isCBSZ() const;
879  bool isABID() const;
880  bool isGPRIdxMode() const;
881  bool isS16Imm() const;
882  bool isU16Imm() const;
883  bool isEndpgm() const;
884  bool isWaitVDST() const;
885  bool isWaitEXP() const;
886 
887  StringRef getToken() const {
888  assert(isToken());
889  return StringRef(Tok.Data, Tok.Length);
890  }
891 
892  int64_t getImm() const {
893  assert(isImm());
894  return Imm.Val;
895  }
896 
897  void setImm(int64_t Val) {
898  assert(isImm());
899  Imm.Val = Val;
900  }
901 
902  ImmTy getImmTy() const {
903  assert(isImm());
904  return Imm.Type;
905  }
906 
907  unsigned getReg() const override {
908  assert(isRegKind());
909  return Reg.RegNo;
910  }
911 
912  SMLoc getStartLoc() const override {
913  return StartLoc;
914  }
915 
916  SMLoc getEndLoc() const override {
917  return EndLoc;
918  }
919 
920  SMRange getLocRange() const {
921  return SMRange(StartLoc, EndLoc);
922  }
923 
924  Modifiers getModifiers() const {
925  assert(isRegKind() || isImmTy(ImmTyNone));
926  return isRegKind() ? Reg.Mods : Imm.Mods;
927  }
928 
929  void setModifiers(Modifiers Mods) {
930  assert(isRegKind() || isImmTy(ImmTyNone));
931  if (isRegKind())
932  Reg.Mods = Mods;
933  else
934  Imm.Mods = Mods;
935  }
936 
937  bool hasModifiers() const {
938  return getModifiers().hasModifiers();
939  }
940 
941  bool hasFPModifiers() const {
942  return getModifiers().hasFPModifiers();
943  }
944 
945  bool hasIntModifiers() const {
946  return getModifiers().hasIntModifiers();
947  }
948 
949  uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
950 
951  void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
952 
953  void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
954 
955  template <unsigned Bitwidth>
956  void addKImmFPOperands(MCInst &Inst, unsigned N) const;
957 
958  void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
959  addKImmFPOperands<16>(Inst, N);
960  }
961 
962  void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
963  addKImmFPOperands<32>(Inst, N);
964  }
965 
966  void addRegOperands(MCInst &Inst, unsigned N) const;
967 
968  void addBoolRegOperands(MCInst &Inst, unsigned N) const {
969  addRegOperands(Inst, N);
970  }
971 
972  void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
973  if (isRegKind())
974  addRegOperands(Inst, N);
975  else if (isExpr())
976  Inst.addOperand(MCOperand::createExpr(Expr));
977  else
978  addImmOperands(Inst, N);
979  }
980 
981  void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
982  Modifiers Mods = getModifiers();
983  Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
984  if (isRegKind()) {
985  addRegOperands(Inst, N);
986  } else {
987  addImmOperands(Inst, N, false);
988  }
989  }
990 
991  void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
992  assert(!hasIntModifiers());
993  addRegOrImmWithInputModsOperands(Inst, N);
994  }
995 
996  void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
997  assert(!hasFPModifiers());
998  addRegOrImmWithInputModsOperands(Inst, N);
999  }
1000 
1001  void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1002  Modifiers Mods = getModifiers();
1003  Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1004  assert(isRegKind());
1005  addRegOperands(Inst, N);
1006  }
1007 
1008  void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1009  assert(!hasIntModifiers());
1010  addRegWithInputModsOperands(Inst, N);
1011  }
1012 
1013  void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1014  assert(!hasFPModifiers());
1015  addRegWithInputModsOperands(Inst, N);
1016  }
1017 
1018  void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
1019  if (isImm())
1020  addImmOperands(Inst, N);
1021  else {
1022  assert(isExpr());
1023  Inst.addOperand(MCOperand::createExpr(Expr));
1024  }
1025  }
1026 
1027  static void printImmTy(raw_ostream& OS, ImmTy Type) {
1028  switch (Type) {
1029  case ImmTyNone: OS << "None"; break;
1030  case ImmTyGDS: OS << "GDS"; break;
1031  case ImmTyLDS: OS << "LDS"; break;
1032  case ImmTyOffen: OS << "Offen"; break;
1033  case ImmTyIdxen: OS << "Idxen"; break;
1034  case ImmTyAddr64: OS << "Addr64"; break;
1035  case ImmTyOffset: OS << "Offset"; break;
1036  case ImmTyInstOffset: OS << "InstOffset"; break;
1037  case ImmTyOffset0: OS << "Offset0"; break;
1038  case ImmTyOffset1: OS << "Offset1"; break;
1039  case ImmTyCPol: OS << "CPol"; break;
1040  case ImmTySWZ: OS << "SWZ"; break;
1041  case ImmTyTFE: OS << "TFE"; break;
1042  case ImmTyD16: OS << "D16"; break;
1043  case ImmTyFORMAT: OS << "FORMAT"; break;
1044  case ImmTyClampSI: OS << "ClampSI"; break;
1045  case ImmTyOModSI: OS << "OModSI"; break;
1046  case ImmTyDPP8: OS << "DPP8"; break;
1047  case ImmTyDppCtrl: OS << "DppCtrl"; break;
1048  case ImmTyDppRowMask: OS << "DppRowMask"; break;
1049  case ImmTyDppBankMask: OS << "DppBankMask"; break;
1050  case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1051  case ImmTyDppFi: OS << "FI"; break;
1052  case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1053  case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1054  case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1055  case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1056  case ImmTyDMask: OS << "DMask"; break;
1057  case ImmTyDim: OS << "Dim"; break;
1058  case ImmTyUNorm: OS << "UNorm"; break;
1059  case ImmTyDA: OS << "DA"; break;
1060  case ImmTyR128A16: OS << "R128A16"; break;
1061  case ImmTyA16: OS << "A16"; break;
1062  case ImmTyLWE: OS << "LWE"; break;
1063  case ImmTyOff: OS << "Off"; break;
1064  case ImmTyExpTgt: OS << "ExpTgt"; break;
1065  case ImmTyExpCompr: OS << "ExpCompr"; break;
1066  case ImmTyExpVM: OS << "ExpVM"; break;
1067  case ImmTyHwreg: OS << "Hwreg"; break;
1068  case ImmTySendMsg: OS << "SendMsg"; break;
1069  case ImmTyInterpSlot: OS << "InterpSlot"; break;
1070  case ImmTyInterpAttr: OS << "InterpAttr"; break;
1071  case ImmTyAttrChan: OS << "AttrChan"; break;
1072  case ImmTyOpSel: OS << "OpSel"; break;
1073  case ImmTyOpSelHi: OS << "OpSelHi"; break;
1074  case ImmTyNegLo: OS << "NegLo"; break;
1075  case ImmTyNegHi: OS << "NegHi"; break;
1076  case ImmTySwizzle: OS << "Swizzle"; break;
1077  case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1078  case ImmTyHigh: OS << "High"; break;
1079  case ImmTyBLGP: OS << "BLGP"; break;
1080  case ImmTyCBSZ: OS << "CBSZ"; break;
1081  case ImmTyABID: OS << "ABID"; break;
1082  case ImmTyEndpgm: OS << "Endpgm"; break;
1083  case ImmTyWaitVDST: OS << "WaitVDST"; break;
1084  case ImmTyWaitEXP: OS << "WaitEXP"; break;
1085  }
1086  }
1087 
1088  void print(raw_ostream &OS) const override {
1089  switch (Kind) {
1090  case Register:
1091  OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1092  break;
1093  case Immediate:
1094  OS << '<' << getImm();
1095  if (getImmTy() != ImmTyNone) {
1096  OS << " type: "; printImmTy(OS, getImmTy());
1097  }
1098  OS << " mods: " << Imm.Mods << '>';
1099  break;
1100  case Token:
1101  OS << '\'' << getToken() << '\'';
1102  break;
1103  case Expression:
1104  OS << "<expr " << *Expr << '>';
1105  break;
1106  }
1107  }
1108 
1109  static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1110  int64_t Val, SMLoc Loc,
1111  ImmTy Type = ImmTyNone,
1112  bool IsFPImm = false) {
1113  auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1114  Op->Imm.Val = Val;
1115  Op->Imm.IsFPImm = IsFPImm;
1116  Op->Imm.Kind = ImmKindTyNone;
1117  Op->Imm.Type = Type;
1118  Op->Imm.Mods = Modifiers();
1119  Op->StartLoc = Loc;
1120  Op->EndLoc = Loc;
1121  return Op;
1122  }
1123 
1124  static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1125  StringRef Str, SMLoc Loc,
1126  bool HasExplicitEncodingSize = true) {
1127  auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1128  Res->Tok.Data = Str.data();
1129  Res->Tok.Length = Str.size();
1130  Res->StartLoc = Loc;
1131  Res->EndLoc = Loc;
1132  return Res;
1133  }
1134 
1135  static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1136  unsigned RegNo, SMLoc S,
1137  SMLoc E) {
1138  auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1139  Op->Reg.RegNo = RegNo;
1140  Op->Reg.Mods = Modifiers();
1141  Op->StartLoc = S;
1142  Op->EndLoc = E;
1143  return Op;
1144  }
1145 
1146  static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1147  const class MCExpr *Expr, SMLoc S) {
1148  auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1149  Op->Expr = Expr;
1150  Op->StartLoc = S;
1151  Op->EndLoc = S;
1152  return Op;
1153  }
1154 };
1155 
1156 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1157  OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1158  return OS;
1159 }
1160 
1161 //===----------------------------------------------------------------------===//
1162 // AsmParser
1163 //===----------------------------------------------------------------------===//
1164 
1165 // Holds info related to the current kernel, e.g. count of SGPRs used.
1166 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1167 // .amdgpu_hsa_kernel or at EOF.
1168 class KernelScopeInfo {
1169  int SgprIndexUnusedMin = -1;
1170  int VgprIndexUnusedMin = -1;
1171  int AgprIndexUnusedMin = -1;
1172  MCContext *Ctx = nullptr;
1173  MCSubtargetInfo const *MSTI = nullptr;
1174 
1175  void usesSgprAt(int i) {
1176  if (i >= SgprIndexUnusedMin) {
1177  SgprIndexUnusedMin = ++i;
1178  if (Ctx) {
1179  MCSymbol* const Sym =
1180  Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1181  Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1182  }
1183  }
1184  }
1185 
1186  void usesVgprAt(int i) {
1187  if (i >= VgprIndexUnusedMin) {
1188  VgprIndexUnusedMin = ++i;
1189  if (Ctx) {
1190  MCSymbol* const Sym =
1191  Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1192  int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1193  VgprIndexUnusedMin);
1194  Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1195  }
1196  }
1197  }
1198 
1199  void usesAgprAt(int i) {
1200  // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1201  if (!hasMAIInsts(*MSTI))
1202  return;
1203 
1204  if (i >= AgprIndexUnusedMin) {
1205  AgprIndexUnusedMin = ++i;
1206  if (Ctx) {
1207  MCSymbol* const Sym =
1208  Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1209  Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1210 
1211  // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1212  MCSymbol* const vSym =
1213  Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1214  int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1215  VgprIndexUnusedMin);
1216  vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1217  }
1218  }
1219  }
1220 
1221 public:
1222  KernelScopeInfo() = default;
1223 
1224  void initialize(MCContext &Context) {
1225  Ctx = &Context;
1226  MSTI = Ctx->getSubtargetInfo();
1227 
1228  usesSgprAt(SgprIndexUnusedMin = -1);
1229  usesVgprAt(VgprIndexUnusedMin = -1);
1230  if (hasMAIInsts(*MSTI)) {
1231  usesAgprAt(AgprIndexUnusedMin = -1);
1232  }
1233  }
1234 
1235  void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1236  unsigned RegWidth) {
1237  switch (RegKind) {
1238  case IS_SGPR:
1239  usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1240  break;
1241  case IS_AGPR:
1242  usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1243  break;
1244  case IS_VGPR:
1245  usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1246  break;
1247  default:
1248  break;
1249  }
1250  }
1251 };
1252 
1253 class AMDGPUAsmParser : public MCTargetAsmParser {
1254  MCAsmParser &Parser;
1255 
1256  unsigned ForcedEncodingSize = 0;
1257  bool ForcedDPP = false;
1258  bool ForcedSDWA = false;
1259  KernelScopeInfo KernelScope;
1260  unsigned CPolSeen;
1261 
1262  /// @name Auto-generated Match Functions
1263  /// {
1264 
1265 #define GET_ASSEMBLER_HEADER
1266 #include "AMDGPUGenAsmMatcher.inc"
1267 
1268  /// }
1269 
1270 private:
1271  bool ParseAsAbsoluteExpression(uint32_t &Ret);
1272  bool OutOfRangeError(SMRange Range);
1273  /// Calculate VGPR/SGPR blocks required for given target, reserved
1274  /// registers, and user-specified NextFreeXGPR values.
1275  ///
1276  /// \param Features [in] Target features, used for bug corrections.
1277  /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1278  /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1279  /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1280  /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1281  /// descriptor field, if valid.
1282  /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1283  /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1284  /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1285  /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1286  /// \param VGPRBlocks [out] Result VGPR block count.
1287  /// \param SGPRBlocks [out] Result SGPR block count.
1288  bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1289  bool FlatScrUsed, bool XNACKUsed,
1290  Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1291  SMRange VGPRRange, unsigned NextFreeSGPR,
1292  SMRange SGPRRange, unsigned &VGPRBlocks,
1293  unsigned &SGPRBlocks);
1294  bool ParseDirectiveAMDGCNTarget();
1295  bool ParseDirectiveAMDHSAKernel();
1296  bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1297  bool ParseDirectiveHSACodeObjectVersion();
1298  bool ParseDirectiveHSACodeObjectISA();
1299  bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1300  bool ParseDirectiveAMDKernelCodeT();
1301  // TODO: Possibly make subtargetHasRegister const.
1302  bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1303  bool ParseDirectiveAMDGPUHsaKernel();
1304 
1305  bool ParseDirectiveISAVersion();
1306  bool ParseDirectiveHSAMetadata();
1307  bool ParseDirectivePALMetadataBegin();
1308  bool ParseDirectivePALMetadata();
1309  bool ParseDirectiveAMDGPULDS();
1310 
1311  /// Common code to parse out a block of text (typically YAML) between start and
1312  /// end directives.
1313  bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1314  const char *AssemblerDirectiveEnd,
1315  std::string &CollectString);
1316 
1317  bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1318  RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1319  bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1320  unsigned &RegNum, unsigned &RegWidth,
1321  bool RestoreOnFailure = false);
1322  bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1323  unsigned &RegNum, unsigned &RegWidth,
1324  SmallVectorImpl<AsmToken> &Tokens);
1325  unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1326  unsigned &RegWidth,
1327  SmallVectorImpl<AsmToken> &Tokens);
1328  unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1329  unsigned &RegWidth,
1330  SmallVectorImpl<AsmToken> &Tokens);
1331  unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1332  unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1333  bool ParseRegRange(unsigned& Num, unsigned& Width);
1334  unsigned getRegularReg(RegisterKind RegKind,
1335  unsigned RegNum,
1336  unsigned RegWidth,
1337  SMLoc Loc);
1338 
1339  bool isRegister();
1340  bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1341  Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1342  void initializeGprCountSymbol(RegisterKind RegKind);
1343  bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1344  unsigned RegWidth);
1345  void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1346  bool IsAtomic);
1347  void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1348  bool IsGdsHardcoded);
1349 
1350 public:
1351  enum AMDGPUMatchResultTy {
1352  Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1353  };
1354  enum OperandMode {
1355  OperandMode_Default,
1356  OperandMode_NSA,
1357  };
1358 
1359  using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1360 
1361  AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1362  const MCInstrInfo &MII,
1363  const MCTargetOptions &Options)
1364  : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1365  MCAsmParserExtension::Initialize(Parser);
1366 
1367  if (getFeatureBits().none()) {
1368  // Set default features.
1369  copySTI().ToggleFeature("southern-islands");
1370  }
1371 
1372  setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1373 
1374  {
1375  // TODO: make those pre-defined variables read-only.
1376  // Currently there is none suitable machinery in the core llvm-mc for this.
1377  // MCSymbol::isRedefinable is intended for another purpose, and
1378  // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1379  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1380  MCContext &Ctx = getContext();
1381  if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1382  MCSymbol *Sym =
1383  Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1384  Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1385  Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1386  Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1387  Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1388  Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1389  } else {
1390  MCSymbol *Sym =
1391  Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1392  Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1393  Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1394  Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1395  Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1396  Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1397  }
1398  if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1399  initializeGprCountSymbol(IS_VGPR);
1400  initializeGprCountSymbol(IS_SGPR);
1401  } else
1402  KernelScope.initialize(getContext());
1403  }
1404  }
1405 
1406  bool hasMIMG_R128() const {
1407  return AMDGPU::hasMIMG_R128(getSTI());
1408  }
1409 
1410  bool hasPackedD16() const {
1411  return AMDGPU::hasPackedD16(getSTI());
1412  }
1413 
1414  bool hasGFX10A16() const {
1415  return AMDGPU::hasGFX10A16(getSTI());
1416  }
1417 
1418  bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1419 
1420  bool isSI() const {
1421  return AMDGPU::isSI(getSTI());
1422  }
1423 
1424  bool isCI() const {
1425  return AMDGPU::isCI(getSTI());
1426  }
1427 
1428  bool isVI() const {
1429  return AMDGPU::isVI(getSTI());
1430  }
1431 
1432  bool isGFX9() const {
1433  return AMDGPU::isGFX9(getSTI());
1434  }
1435 
1436  // TODO: isGFX90A is also true for GFX940. We need to clean it.
1437  bool isGFX90A() const {
1438  return AMDGPU::isGFX90A(getSTI());
1439  }
1440 
1441  bool isGFX940() const {
1442  return AMDGPU::isGFX940(getSTI());
1443  }
1444 
1445  bool isGFX9Plus() const {
1446  return AMDGPU::isGFX9Plus(getSTI());
1447  }
1448 
1449  bool isGFX10() const {
1450  return AMDGPU::isGFX10(getSTI());
1451  }
1452 
1453  bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1454 
1455  bool isGFX11() const {
1456  return AMDGPU::isGFX11(getSTI());
1457  }
1458 
1459  bool isGFX11Plus() const {
1460  return AMDGPU::isGFX11Plus(getSTI());
1461  }
1462 
1463  bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1464 
1465  bool isGFX10_BEncoding() const {
1466  return AMDGPU::isGFX10_BEncoding(getSTI());
1467  }
1468 
1469  bool hasInv2PiInlineImm() const {
1470  return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1471  }
1472 
1473  bool hasFlatOffsets() const {
1474  return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1475  }
1476 
1477  bool hasArchitectedFlatScratch() const {
1478  return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1479  }
1480 
1481  bool hasSGPR102_SGPR103() const {
1482  return !isVI() && !isGFX9();
1483  }
1484 
1485  bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1486 
1487  bool hasIntClamp() const {
1488  return getFeatureBits()[AMDGPU::FeatureIntClamp];
1489  }
1490 
1491  AMDGPUTargetStreamer &getTargetStreamer() {
1492  MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1493  return static_cast<AMDGPUTargetStreamer &>(TS);
1494  }
1495 
1496  const MCRegisterInfo *getMRI() const {
1497  // We need this const_cast because for some reason getContext() is not const
1498  // in MCAsmParser.
1499  return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1500  }
1501 
1502  const MCInstrInfo *getMII() const {
1503  return &MII;
1504  }
1505 
1506  const FeatureBitset &getFeatureBits() const {
1507  return getSTI().getFeatureBits();
1508  }
1509 
1510  void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1511  void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1512  void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1513 
1514  unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1515  bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1516  bool isForcedDPP() const { return ForcedDPP; }
1517  bool isForcedSDWA() const { return ForcedSDWA; }
1518  ArrayRef<unsigned> getMatchedVariants() const;
1519  StringRef getMatchedVariantName() const;
1520 
1521  std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1522  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1523  bool RestoreOnFailure);
1524  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1525  OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1526  SMLoc &EndLoc) override;
1527  unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1528  unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1529  unsigned Kind) override;
1530  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1531  OperandVector &Operands, MCStreamer &Out,
1532  uint64_t &ErrorInfo,
1533  bool MatchingInlineAsm) override;
1534  bool ParseDirective(AsmToken DirectiveID) override;
1535  OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1536  OperandMode Mode = OperandMode_Default);
1537  StringRef parseMnemonicSuffix(StringRef Name);
1538  bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1539  SMLoc NameLoc, OperandVector &Operands) override;
1540  //bool ProcessInstruction(MCInst &Inst);
1541 
1542  OperandMatchResultTy parseTokenOp(StringRef Name, OperandVector &Operands);
1543 
1544  OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1545 
1546  OperandMatchResultTy
1547  parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1548  AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1549  bool (*ConvertResult)(int64_t &) = nullptr);
1550 
1551  OperandMatchResultTy
1552  parseOperandArrayWithPrefix(const char *Prefix,
1553  OperandVector &Operands,
1554  AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1555  bool (*ConvertResult)(int64_t&) = nullptr);
1556 
1557  OperandMatchResultTy
1558  parseNamedBit(StringRef Name, OperandVector &Operands,
1559  AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1560  OperandMatchResultTy parseCPol(OperandVector &Operands);
1561  OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1562  StringRef &Value,
1563  SMLoc &StringLoc);
1564 
1565  bool isModifier();
1566  bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1567  bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1568  bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1569  bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1570  bool parseSP3NegModifier();
1571  OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1572  OperandMatchResultTy parseReg(OperandVector &Operands);
1573  OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1574  OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1575  OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1576  OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1577  OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1578  OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1579  OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1580  OperandMatchResultTy parseUfmt(int64_t &Format);
1581  OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1582  OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1583  OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1584  OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1585  OperandMatchResultTy parseNumericFormat(int64_t &Format);
1586  bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1587  bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1588 
1589  void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1590  void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1591  void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1592  void cvtExp(MCInst &Inst, const OperandVector &Operands);
1593 
1594  bool parseCnt(int64_t &IntVal);
1595  OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1596 
1597  bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1598  void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1599  OperandMatchResultTy parseDepCtrOps(OperandVector &Operands);
1600 
1601  bool parseDelay(int64_t &Delay);
1602  OperandMatchResultTy parseSDelayAluOps(OperandVector &Operands);
1603 
1604  OperandMatchResultTy parseHwreg(OperandVector &Operands);
1605 
1606 private:
1607  struct OperandInfoTy {
1608  SMLoc Loc;
1609  int64_t Id;
1610  bool IsSymbolic = false;
1611  bool IsDefined = false;
1612 
1613  OperandInfoTy(int64_t Id_) : Id(Id_) {}
1614  };
1615 
1616  bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1617  bool validateSendMsg(const OperandInfoTy &Msg,
1618  const OperandInfoTy &Op,
1619  const OperandInfoTy &Stream);
1620 
1621  bool parseHwregBody(OperandInfoTy &HwReg,
1622  OperandInfoTy &Offset,
1623  OperandInfoTy &Width);
1624  bool validateHwreg(const OperandInfoTy &HwReg,
1625  const OperandInfoTy &Offset,
1626  const OperandInfoTy &Width);
1627 
1628  SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1629  SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1630  SMLoc getBLGPLoc(const OperandVector &Operands) const;
1631 
1632  SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1633  const OperandVector &Operands) const;
1634  SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1635  SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1636  SMLoc getLitLoc(const OperandVector &Operands,
1637  bool SearchMandatoryLiterals = false) const;
1638  SMLoc getMandatoryLitLoc(const OperandVector &Operands) const;
1639  SMLoc getConstLoc(const OperandVector &Operands) const;
1640  SMLoc getInstLoc(const OperandVector &Operands) const;
1641 
1642  bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1643  bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1644  bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1645  bool validateSOPLiteral(const MCInst &Inst) const;
1646  bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1647  bool validateVOPDRegBankConstraints(const MCInst &Inst,
1648  const OperandVector &Operands);
1649  bool validateIntClampSupported(const MCInst &Inst);
1650  bool validateMIMGAtomicDMask(const MCInst &Inst);
1651  bool validateMIMGGatherDMask(const MCInst &Inst);
1652  bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1653  bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);
1654  bool validateMIMGAddrSize(const MCInst &Inst);
1655  bool validateMIMGD16(const MCInst &Inst);
1656  bool validateMIMGMSAA(const MCInst &Inst);
1657  bool validateOpSel(const MCInst &Inst);
1658  bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1659  bool validateVccOperand(unsigned Reg) const;
1660  bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1661  bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1662  bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1663  bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1664  bool validateAGPRLdSt(const MCInst &Inst) const;
1665  bool validateVGPRAlign(const MCInst &Inst) const;
1666  bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1667  bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1668  bool validateDivScale(const MCInst &Inst);
1669  bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1670  bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1671  const SMLoc &IDLoc);
1672  bool validateExeczVcczOperands(const OperandVector &Operands);
1673  bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1674  Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1675  unsigned getConstantBusLimit(unsigned Opcode) const;
1676  bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1677  bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1678  unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1679 
1680  bool isSupportedMnemo(StringRef Mnemo,
1681  const FeatureBitset &FBS);
1682  bool isSupportedMnemo(StringRef Mnemo,
1683  const FeatureBitset &FBS,
1684  ArrayRef<unsigned> Variants);
1685  bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1686 
1687  bool isId(const StringRef Id) const;
1688  bool isId(const AsmToken &Token, const StringRef Id) const;
1689  bool isToken(const AsmToken::TokenKind Kind) const;
1690  bool trySkipId(const StringRef Id);
1691  bool trySkipId(const StringRef Pref, const StringRef Id);
1692  bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1693  bool trySkipToken(const AsmToken::TokenKind Kind);
1694  bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1695  bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1696  bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1697 
1698  void peekTokens(MutableArrayRef<AsmToken> Tokens);
1699  AsmToken::TokenKind getTokenKind() const;
1700  bool parseExpr(int64_t &Imm, StringRef Expected = "");
1701  bool parseExpr(OperandVector &Operands);
1702  StringRef getTokenStr() const;
1703  AsmToken peekToken(bool ShouldSkipSpace = true);
1704  AsmToken getToken() const;
1705  SMLoc getLoc() const;
1706  void lex();
1707 
1708 public:
1709  void onBeginOfFile() override;
1710 
1711  OperandMatchResultTy parseCustomOperand(OperandVector &Operands,
1712  unsigned MCK);
1713 
1714  OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1715  OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1716  OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1717  OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1718  OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1719  OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1720 
1721  bool parseSwizzleOperand(int64_t &Op,
1722  const unsigned MinVal,
1723  const unsigned MaxVal,
1724  const StringRef ErrMsg,
1725  SMLoc &Loc);
1726  bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1727  const unsigned MinVal,
1728  const unsigned MaxVal,
1729  const StringRef ErrMsg);
1730  OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1731  bool parseSwizzleOffset(int64_t &Imm);
1732  bool parseSwizzleMacro(int64_t &Imm);
1733  bool parseSwizzleQuadPerm(int64_t &Imm);
1734  bool parseSwizzleBitmaskPerm(int64_t &Imm);
1735  bool parseSwizzleBroadcast(int64_t &Imm);
1736  bool parseSwizzleSwap(int64_t &Imm);
1737  bool parseSwizzleReverse(int64_t &Imm);
1738 
1739  OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1740  int64_t parseGPRIdxMacro();
1741 
1742  void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1743  void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1744  void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1745 
1746  AMDGPUOperand::Ptr defaultCPol() const;
1747 
1748  AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1749  AMDGPUOperand::Ptr defaultSMEMOffset() const;
1750  AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1751  AMDGPUOperand::Ptr defaultFlatOffset() const;
1752 
1753  OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1754 
1755  void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1756  OptionalImmIndexMap &OptionalIdx);
1757  void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1758  void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1759  void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1760  void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1761  void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1762  OptionalImmIndexMap &OptionalIdx);
1763  void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1764  OptionalImmIndexMap &OptionalIdx);
1765 
1766  void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1767  void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1768 
1769  void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1770  bool IsAtomic = false);
1771  void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1772  void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1773 
1774  void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1775 
1776  bool parseDimId(unsigned &Encoding);
1777  OperandMatchResultTy parseDim(OperandVector &Operands);
1778  OperandMatchResultTy parseDPP8(OperandVector &Operands);
1779  OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1780  bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1781  int64_t parseDPPCtrlSel(StringRef Ctrl);
1782  int64_t parseDPPCtrlPerm();
1783  AMDGPUOperand::Ptr defaultRowMask() const;
1784  AMDGPUOperand::Ptr defaultBankMask() const;
1785  AMDGPUOperand::Ptr defaultBoundCtrl() const;
1786  AMDGPUOperand::Ptr defaultFI() const;
1787  void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1788  void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1789  cvtDPP(Inst, Operands, true);
1790  }
1791  void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1792  bool IsDPP8 = false);
1793  void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1794  cvtVOP3DPP(Inst, Operands, true);
1795  }
1796 
1797  OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1798  AMDGPUOperand::ImmTy Type);
1799  OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1800  void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1801  void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1802  void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1803  void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1804  void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1805  void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1806  uint64_t BasicInstType,
1807  bool SkipDstVcc = false,
1808  bool SkipSrcVcc = false);
1809 
1810  AMDGPUOperand::Ptr defaultBLGP() const;
1811  AMDGPUOperand::Ptr defaultCBSZ() const;
1812  AMDGPUOperand::Ptr defaultABID() const;
1813 
1814  OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1815  AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1816 
1817  AMDGPUOperand::Ptr defaultWaitVDST() const;
1818  AMDGPUOperand::Ptr defaultWaitEXP() const;
1819  OperandMatchResultTy parseVOPD(OperandVector &Operands);
1820 };
1821 
1822 } // end anonymous namespace
1823 
1824 // May be called with integer type with equivalent bitwidth.
1825 static const fltSemantics *getFltSemantics(unsigned Size) {
1826  switch (Size) {
1827  case 4:
1828  return &APFloat::IEEEsingle();
1829  case 8:
1830  return &APFloat::IEEEdouble();
1831  case 2:
1832  return &APFloat::IEEEhalf();
1833  default:
1834  llvm_unreachable("unsupported fp type");
1835  }
1836 }
1837 
1838 static const fltSemantics *getFltSemantics(MVT VT) {
1839  return getFltSemantics(VT.getSizeInBits() / 8);
1840 }
1841 
1842 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1843  switch (OperandType) {
1844  case AMDGPU::OPERAND_REG_IMM_INT32:
1845  case AMDGPU::OPERAND_REG_IMM_FP32:
1846  case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1847  case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1848  case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1849  case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1850  case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1851  case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1852  case AMDGPU::OPERAND_REG_IMM_V2FP32:
1853  case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1854  case AMDGPU::OPERAND_REG_IMM_V2INT32:
1855  case AMDGPU::OPERAND_KIMM32:
1856  return &APFloat::IEEEsingle();
1857  case AMDGPU::OPERAND_REG_IMM_INT64:
1858  case AMDGPU::OPERAND_REG_IMM_FP64:
1859  case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1860  case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1861  case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1862  return &APFloat::IEEEdouble();
1863  case AMDGPU::OPERAND_REG_IMM_INT16:
1864  case AMDGPU::OPERAND_REG_IMM_FP16:
1865  case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1866  case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1867  case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1868  case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1869  case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1870  case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1871  case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1872  case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1873  case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1874  case AMDGPU::OPERAND_REG_IMM_V2INT16:
1875  case AMDGPU::OPERAND_REG_IMM_V2FP16:
1876  case AMDGPU::OPERAND_KIMM16:
1877  return &APFloat::IEEEhalf();
1878  default:
1879  llvm_unreachable("unsupported fp type");
1880  }
1881 }
1882 
1883 //===----------------------------------------------------------------------===//
1884 // Operand
1885 //===----------------------------------------------------------------------===//
1886 
1887 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1888  bool Lost;
1889 
1890  // Convert literal to single precision
1891  APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1892  APFloat::rmNearestTiesToEven,
1893  &Lost);
1894  // We allow precision lost but not overflow or underflow
1895  if (Status != APFloat::opOK &&
1896  Lost &&
1897  ((Status & APFloat::opOverflow) != 0 ||
1898  (Status & APFloat::opUnderflow) != 0)) {
1899  return false;
1900  }
1901 
1902  return true;
1903 }
1904 
1905 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1906  return isUIntN(Size, Val) || isIntN(Size, Val);
1907 }
1908 
1909 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1910  if (VT.getScalarType() == MVT::i16) {
1911  // FP immediate values are broken.
1912  return isInlinableIntLiteral(Val);
1913  }
1914 
1915  // f16/v2f16 operands work correctly for all values.
1916  return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1917 }
1918 
1919 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1920 
1921  // This is a hack to enable named inline values like
1922  // shared_base with both 32-bit and 64-bit operands.
1923  // Note that these values are defined as
1924  // 32-bit operands only.
1925  if (isInlineValue()) {
1926  return true;
1927  }
1928 
1929  if (!isImmTy(ImmTyNone)) {
1930  // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1931  return false;
1932  }
1933  // TODO: We should avoid using host float here. It would be better to
1934  // check the float bit values which is what a few other places do.
1935  // We've had bot failures before due to weird NaN support on mips hosts.
1936 
1937  APInt Literal(64, Imm.Val);
1938 
1939  if (Imm.IsFPImm) { // We got fp literal token
1940  if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1941  return AMDGPU::isInlinableLiteral64(Imm.Val,
1942  AsmParser->hasInv2PiInlineImm());
1943  }
1944 
1945  APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1946  if (!canLosslesslyConvertToFPType(FPLiteral, type))
1947  return false;
1948 
1949  if (type.getScalarSizeInBits() == 16) {
1950  return isInlineableLiteralOp16(
1951  static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1952  type, AsmParser->hasInv2PiInlineImm());
1953  }
1954 
1955  // Check if single precision literal is inlinable
1957  static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1958  AsmParser->hasInv2PiInlineImm());
1959  }
1960 
1961  // We got int literal token.
1962  if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1963  return AMDGPU::isInlinableLiteral64(Imm.Val,
1964  AsmParser->hasInv2PiInlineImm());
1965  }
1966 
1967  if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1968  return false;
1969  }
1970 
1971  if (type.getScalarSizeInBits() == 16) {
1972  return isInlineableLiteralOp16(
1973  static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1974  type, AsmParser->hasInv2PiInlineImm());
1975  }
1976 
1978  static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1979  AsmParser->hasInv2PiInlineImm());
1980 }
1981 
1982 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1983  // Check that this immediate can be added as literal
1984  if (!isImmTy(ImmTyNone)) {
1985  return false;
1986  }
1987 
1988  if (!Imm.IsFPImm) {
1989  // We got int literal token.
1990 
1991  if (type == MVT::f64 && hasFPModifiers()) {
1992  // Cannot apply fp modifiers to int literals preserving the same semantics
1993  // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1994  // disable these cases.
1995  return false;
1996  }
1997 
1998  unsigned Size = type.getSizeInBits();
1999  if (Size == 64)
2000  Size = 32;
2001 
2002  // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2003  // types.
2004  return isSafeTruncation(Imm.Val, Size);
2005  }
2006 
2007  // We got fp literal token
2008  if (type == MVT::f64) { // Expected 64-bit fp operand
2009  // We would set low 64-bits of literal to zeroes but we accept this literals
2010  return true;
2011  }
2012 
2013  if (type == MVT::i64) { // Expected 64-bit int operand
2014  // We don't allow fp literals in 64-bit integer instructions. It is
2015  // unclear how we should encode them.
2016  return false;
2017  }
2018 
2019  // We allow fp literals with f16x2 operands assuming that the specified
2020  // literal goes into the lower half and the upper half is zero. We also
2021  // require that the literal may be losslessly converted to f16.
2022  MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
2023  (type == MVT::v2i16)? MVT::i16 :
2024  (type == MVT::v2f32)? MVT::f32 : type;
2025 
2026  APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2027  return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2028 }
2029 
2030 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2031  return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2032 }
2033 
2034 bool AMDGPUOperand::isVRegWithInputMods() const {
2035  return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2036  // GFX90A allows DPP on 64-bit operands.
2037  (isRegClass(AMDGPU::VReg_64RegClassID) &&
2038  AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
2039 }
2040 
2041 bool AMDGPUOperand::isT16VRegWithInputMods() const {
2042  return isRegClass(AMDGPU::VGPR_32_Lo128RegClassID);
2043 }
2044 
2045 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2046  if (AsmParser->isVI())
2047  return isVReg32();
2048  else if (AsmParser->isGFX9Plus())
2049  return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2050  else
2051  return false;
2052 }
2053 
2054 bool AMDGPUOperand::isSDWAFP16Operand() const {
2055  return isSDWAOperand(MVT::f16);
2056 }
2057 
2058 bool AMDGPUOperand::isSDWAFP32Operand() const {
2059  return isSDWAOperand(MVT::f32);
2060 }
2061 
2062 bool AMDGPUOperand::isSDWAInt16Operand() const {
2063  return isSDWAOperand(MVT::i16);
2064 }
2065 
2066 bool AMDGPUOperand::isSDWAInt32Operand() const {
2067  return isSDWAOperand(MVT::i32);
2068 }
2069 
2070 bool AMDGPUOperand::isBoolReg() const {
2071  auto FB = AsmParser->getFeatureBits();
2072  return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
2073  (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
2074 }
2075 
2076 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2077 {
2078  assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2079  assert(Size == 2 || Size == 4 || Size == 8);
2080 
2081  const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2082 
2083  if (Imm.Mods.Abs) {
2084  Val &= ~FpSignMask;
2085  }
2086  if (Imm.Mods.Neg) {
2087  Val ^= FpSignMask;
2088  }
2089 
2090  return Val;
2091 }
2092 
2093 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2094  if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2095  Inst.getNumOperands())) {
2096  addLiteralImmOperand(Inst, Imm.Val,
2097  ApplyModifiers &
2098  isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2099  } else {
2100  assert(!isImmTy(ImmTyNone) || !hasModifiers());
2101  Inst.addOperand(MCOperand::createImm(Imm.Val));
2102  setImmKindNone();
2103  }
2104 }
2105 
2106 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2107  const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2108  auto OpNum = Inst.getNumOperands();
2109  // Check that this operand accepts literals
2110  assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2111 
2112  if (ApplyModifiers) {
2113  assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2114  const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2115  Val = applyInputFPModifiers(Val, Size);
2116  }
2117 
2118  APInt Literal(64, Val);
2119  uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
2120 
2121  if (Imm.IsFPImm) { // We got fp literal token
2122  switch (OpTy) {
2128  if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2129  AsmParser->hasInv2PiInlineImm())) {
2130  Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2131  setImmKindConst();
2132  return;
2133  }
2134 
2135  // Non-inlineable
2136  if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2137  // For fp operands we check if low 32 bits are zeros
2138  if (Literal.getLoBits(32) != 0) {
2139  const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2140  "Can't encode literal as exact 64-bit floating-point operand. "
2141  "Low 32-bits will be set to zero");
2142  }
2143 
2144  Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2145  setImmKindLiteral();
2146  return;
2147  }
2148 
2149  // We don't allow fp literals in 64-bit integer instructions. It is
2150  // unclear how we should encode them. This case should be checked earlier
2151  // in predicate methods (isLiteralImm())
2152  llvm_unreachable("fp literal in 64-bit integer instruction.");
2153 
2179  case AMDGPU::OPERAND_KIMM16: {
2180  bool lost;
2181  APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2182  // Convert literal to single precision
2183  FPLiteral.convert(*getOpFltSemantics(OpTy),
2185  // We allow precision lost but not overflow or underflow. This should be
2186  // checked earlier in isLiteralImm()
2187 
2188  uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2189  Inst.addOperand(MCOperand::createImm(ImmVal));
2190  if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) {
2191  setImmKindMandatoryLiteral();
2192  } else {
2193  setImmKindLiteral();
2194  }
2195  return;
2196  }
2197  default:
2198  llvm_unreachable("invalid operand size");
2199  }
2200 
2201  return;
2202  }
2203 
2204  // We got int literal token.
2205  // Only sign extend inline immediates.
2206  switch (OpTy) {
2220  if (isSafeTruncation(Val, 32) &&
2221  AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2222  AsmParser->hasInv2PiInlineImm())) {
2223  Inst.addOperand(MCOperand::createImm(Val));
2224  setImmKindConst();
2225  return;
2226  }
2227 
2228  Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2229  setImmKindLiteral();
2230  return;
2231 
2237  if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2238  Inst.addOperand(MCOperand::createImm(Val));
2239  setImmKindConst();
2240  return;
2241  }
2242 
2244  setImmKindLiteral();
2245  return;
2246 
2254  if (isSafeTruncation(Val, 16) &&
2255  AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2256  AsmParser->hasInv2PiInlineImm())) {
2257  Inst.addOperand(MCOperand::createImm(Val));
2258  setImmKindConst();
2259  return;
2260  }
2261 
2262  Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2263  setImmKindLiteral();
2264  return;
2265 
2270  assert(isSafeTruncation(Val, 16));
2271  assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2272  AsmParser->hasInv2PiInlineImm()));
2273 
2274  Inst.addOperand(MCOperand::createImm(Val));
2275  return;
2276  }
2278  Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2279  setImmKindMandatoryLiteral();
2280  return;
2282  Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2283  setImmKindMandatoryLiteral();
2284  return;
2285  default:
2286  llvm_unreachable("invalid operand size");
2287  }
2288 }
2289 
2290 template <unsigned Bitwidth>
2291 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2292  APInt Literal(64, Imm.Val);
2293  setImmKindMandatoryLiteral();
2294 
2295  if (!Imm.IsFPImm) {
2296  // We got int literal token.
2297  Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2298  return;
2299  }
2300 
2301  bool Lost;
2302  APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2303  FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2305  Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2306 }
2307 
2308 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2309  Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2310 }
2311 
2312 static bool isInlineValue(unsigned Reg) {
2313  switch (Reg) {
2314  case AMDGPU::SRC_SHARED_BASE_LO:
2315  case AMDGPU::SRC_SHARED_BASE:
2316  case AMDGPU::SRC_SHARED_LIMIT_LO:
2317  case AMDGPU::SRC_SHARED_LIMIT:
2318  case AMDGPU::SRC_PRIVATE_BASE_LO:
2319  case AMDGPU::SRC_PRIVATE_BASE:
2320  case AMDGPU::SRC_PRIVATE_LIMIT_LO:
2321  case AMDGPU::SRC_PRIVATE_LIMIT:
2322  case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2323  return true;
2324  case AMDGPU::SRC_VCCZ:
2325  case AMDGPU::SRC_EXECZ:
2326  case AMDGPU::SRC_SCC:
2327  return true;
2328  case AMDGPU::SGPR_NULL:
2329  return true;
2330  default:
2331  return false;
2332  }
2333 }
2334 
2335 bool AMDGPUOperand::isInlineValue() const {
2336  return isRegKind() && ::isInlineValue(getReg());
2337 }
2338 
2339 //===----------------------------------------------------------------------===//
2340 // AsmParser
2341 //===----------------------------------------------------------------------===//
2342 
2343 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2344  if (Is == IS_VGPR) {
2345  switch (RegWidth) {
2346  default: return -1;
2347  case 32:
2348  return AMDGPU::VGPR_32RegClassID;
2349  case 64:
2350  return AMDGPU::VReg_64RegClassID;
2351  case 96:
2352  return AMDGPU::VReg_96RegClassID;
2353  case 128:
2354  return AMDGPU::VReg_128RegClassID;
2355  case 160:
2356  return AMDGPU::VReg_160RegClassID;
2357  case 192:
2358  return AMDGPU::VReg_192RegClassID;
2359  case 224:
2360  return AMDGPU::VReg_224RegClassID;
2361  case 256:
2362  return AMDGPU::VReg_256RegClassID;
2363  case 288:
2364  return AMDGPU::VReg_288RegClassID;
2365  case 320:
2366  return AMDGPU::VReg_320RegClassID;
2367  case 352:
2368  return AMDGPU::VReg_352RegClassID;
2369  case 384:
2370  return AMDGPU::VReg_384RegClassID;
2371  case 512:
2372  return AMDGPU::VReg_512RegClassID;
2373  case 1024:
2374  return AMDGPU::VReg_1024RegClassID;
2375  }
2376  } else if (Is == IS_TTMP) {
2377  switch (RegWidth) {
2378  default: return -1;
2379  case 32:
2380  return AMDGPU::TTMP_32RegClassID;
2381  case 64:
2382  return AMDGPU::TTMP_64RegClassID;
2383  case 128:
2384  return AMDGPU::TTMP_128RegClassID;
2385  case 256:
2386  return AMDGPU::TTMP_256RegClassID;
2387  case 512:
2388  return AMDGPU::TTMP_512RegClassID;
2389  }
2390  } else if (Is == IS_SGPR) {
2391  switch (RegWidth) {
2392  default: return -1;
2393  case 32:
2394  return AMDGPU::SGPR_32RegClassID;
2395  case 64:
2396  return AMDGPU::SGPR_64RegClassID;
2397  case 96:
2398  return AMDGPU::SGPR_96RegClassID;
2399  case 128:
2400  return AMDGPU::SGPR_128RegClassID;
2401  case 160:
2402  return AMDGPU::SGPR_160RegClassID;
2403  case 192:
2404  return AMDGPU::SGPR_192RegClassID;
2405  case 224:
2406  return AMDGPU::SGPR_224RegClassID;
2407  case 256:
2408  return AMDGPU::SGPR_256RegClassID;
2409  case 288:
2410  return AMDGPU::SGPR_288RegClassID;
2411  case 320:
2412  return AMDGPU::SGPR_320RegClassID;
2413  case 352:
2414  return AMDGPU::SGPR_352RegClassID;
2415  case 384:
2416  return AMDGPU::SGPR_384RegClassID;
2417  case 512:
2418  return AMDGPU::SGPR_512RegClassID;
2419  }
2420  } else if (Is == IS_AGPR) {
2421  switch (RegWidth) {
2422  default: return -1;
2423  case 32:
2424  return AMDGPU::AGPR_32RegClassID;
2425  case 64:
2426  return AMDGPU::AReg_64RegClassID;
2427  case 96:
2428  return AMDGPU::AReg_96RegClassID;
2429  case 128:
2430  return AMDGPU::AReg_128RegClassID;
2431  case 160:
2432  return AMDGPU::AReg_160RegClassID;
2433  case 192:
2434  return AMDGPU::AReg_192RegClassID;
2435  case 224:
2436  return AMDGPU::AReg_224RegClassID;
2437  case 256:
2438  return AMDGPU::AReg_256RegClassID;
2439  case 288:
2440  return AMDGPU::AReg_288RegClassID;
2441  case 320:
2442  return AMDGPU::AReg_320RegClassID;
2443  case 352:
2444  return AMDGPU::AReg_352RegClassID;
2445  case 384:
2446  return AMDGPU::AReg_384RegClassID;
2447  case 512:
2448  return AMDGPU::AReg_512RegClassID;
2449  case 1024:
2450  return AMDGPU::AReg_1024RegClassID;
2451  }
2452  }
2453  return -1;
2454 }
2455 
2458  .Case("exec", AMDGPU::EXEC)
2459  .Case("vcc", AMDGPU::VCC)
2460  .Case("flat_scratch", AMDGPU::FLAT_SCR)
2461  .Case("xnack_mask", AMDGPU::XNACK_MASK)
2462  .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2463  .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2464  .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2465  .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2466  .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2467  .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2468  .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2469  .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2470  .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2471  .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2472  .Case("lds_direct", AMDGPU::LDS_DIRECT)
2473  .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2474  .Case("m0", AMDGPU::M0)
2475  .Case("vccz", AMDGPU::SRC_VCCZ)
2476  .Case("src_vccz", AMDGPU::SRC_VCCZ)
2477  .Case("execz", AMDGPU::SRC_EXECZ)
2478  .Case("src_execz", AMDGPU::SRC_EXECZ)
2479  .Case("scc", AMDGPU::SRC_SCC)
2480  .Case("src_scc", AMDGPU::SRC_SCC)
2481  .Case("tba", AMDGPU::TBA)
2482  .Case("tma", AMDGPU::TMA)
2483  .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2484  .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2485  .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2486  .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2487  .Case("vcc_lo", AMDGPU::VCC_LO)
2488  .Case("vcc_hi", AMDGPU::VCC_HI)
2489  .Case("exec_lo", AMDGPU::EXEC_LO)
2490  .Case("exec_hi", AMDGPU::EXEC_HI)
2491  .Case("tma_lo", AMDGPU::TMA_LO)
2492  .Case("tma_hi", AMDGPU::TMA_HI)
2493  .Case("tba_lo", AMDGPU::TBA_LO)
2494  .Case("tba_hi", AMDGPU::TBA_HI)
2495  .Case("pc", AMDGPU::PC_REG)
2496  .Case("null", AMDGPU::SGPR_NULL)
2497  .Default(AMDGPU::NoRegister);
2498 }
2499 
2500 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2501  SMLoc &EndLoc, bool RestoreOnFailure) {
2502  auto R = parseRegister();
2503  if (!R) return true;
2504  assert(R->isReg());
2505  RegNo = R->getReg();
2506  StartLoc = R->getStartLoc();
2507  EndLoc = R->getEndLoc();
2508  return false;
2509 }
2510 
2511 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2512  SMLoc &EndLoc) {
2513  return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2514 }
2515 
2516 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2517  SMLoc &StartLoc,
2518  SMLoc &EndLoc) {
2519  bool Result =
2520  ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2521  bool PendingErrors = getParser().hasPendingError();
2522  getParser().clearPendingErrors();
2523  if (PendingErrors)
2524  return MatchOperand_ParseFail;
2525  if (Result)
2526  return MatchOperand_NoMatch;
2527  return MatchOperand_Success;
2528 }
2529 
2530 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2531  RegisterKind RegKind, unsigned Reg1,
2532  SMLoc Loc) {
2533  switch (RegKind) {
2534  case IS_SPECIAL:
2535  if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2536  Reg = AMDGPU::EXEC;
2537  RegWidth = 64;
2538  return true;
2539  }
2540  if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2541  Reg = AMDGPU::FLAT_SCR;
2542  RegWidth = 64;
2543  return true;
2544  }
2545  if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2546  Reg = AMDGPU::XNACK_MASK;
2547  RegWidth = 64;
2548  return true;
2549  }
2550  if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2551  Reg = AMDGPU::VCC;
2552  RegWidth = 64;
2553  return true;
2554  }
2555  if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2556  Reg = AMDGPU::TBA;
2557  RegWidth = 64;
2558  return true;
2559  }
2560  if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2561  Reg = AMDGPU::TMA;
2562  RegWidth = 64;
2563  return true;
2564  }
2565  Error(Loc, "register does not fit in the list");
2566  return false;
2567  case IS_VGPR:
2568  case IS_SGPR:
2569  case IS_AGPR:
2570  case IS_TTMP:
2571  if (Reg1 != Reg + RegWidth / 32) {
2572  Error(Loc, "registers in a list must have consecutive indices");
2573  return false;
2574  }
2575  RegWidth += 32;
2576  return true;
2577  default:
2578  llvm_unreachable("unexpected register kind");
2579  }
2580 }
2581 
2582 struct RegInfo {
2584  RegisterKind Kind;
2585 };
2586 
2587 static constexpr RegInfo RegularRegisters[] = {
2588  {{"v"}, IS_VGPR},
2589  {{"s"}, IS_SGPR},
2590  {{"ttmp"}, IS_TTMP},
2591  {{"acc"}, IS_AGPR},
2592  {{"a"}, IS_AGPR},
2593 };
2594 
2595 static bool isRegularReg(RegisterKind Kind) {
2596  return Kind == IS_VGPR ||
2597  Kind == IS_SGPR ||
2598  Kind == IS_TTMP ||
2599  Kind == IS_AGPR;
2600 }
2601 
2602 static const RegInfo* getRegularRegInfo(StringRef Str) {
2603  for (const RegInfo &Reg : RegularRegisters)
2604  if (Str.startswith(Reg.Name))
2605  return &Reg;
2606  return nullptr;
2607 }
2608 
2609 static bool getRegNum(StringRef Str, unsigned& Num) {
2610  return !Str.getAsInteger(10, Num);
2611 }
2612 
2613 bool
2614 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2615  const AsmToken &NextToken) const {
2616 
2617  // A list of consecutive registers: [s0,s1,s2,s3]
2618  if (Token.is(AsmToken::LBrac))
2619  return true;
2620 
2621  if (!Token.is(AsmToken::Identifier))
2622  return false;
2623 
2624  // A single register like s0 or a range of registers like s[0:1]
2625 
2626  StringRef Str = Token.getString();
2627  const RegInfo *Reg = getRegularRegInfo(Str);
2628  if (Reg) {
2629  StringRef RegName = Reg->Name;
2630  StringRef RegSuffix = Str.substr(RegName.size());
2631  if (!RegSuffix.empty()) {
2632  unsigned Num;
2633  // A single register with an index: rXX
2634  if (getRegNum(RegSuffix, Num))
2635  return true;
2636  } else {
2637  // A range of registers: r[XX:YY].
2638  if (NextToken.is(AsmToken::LBrac))
2639  return true;
2640  }
2641  }
2642 
2643  return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2644 }
2645 
2646 bool
2647 AMDGPUAsmParser::isRegister()
2648 {
2649  return isRegister(getToken(), peekToken());
2650 }
2651 
2652 unsigned
2653 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2654  unsigned RegNum,
2655  unsigned RegWidth,
2656  SMLoc Loc) {
2657 
2658  assert(isRegularReg(RegKind));
2659 
2660  unsigned AlignSize = 1;
2661  if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2662  // SGPR and TTMP registers must be aligned.
2663  // Max required alignment is 4 dwords.
2664  AlignSize = std::min(RegWidth / 32, 4u);
2665  }
2666 
2667  if (RegNum % AlignSize != 0) {
2668  Error(Loc, "invalid register alignment");
2669  return AMDGPU::NoRegister;
2670  }
2671 
2672  unsigned RegIdx = RegNum / AlignSize;
2673  int RCID = getRegClass(RegKind, RegWidth);
2674  if (RCID == -1) {
2675  Error(Loc, "invalid or unsupported register size");
2676  return AMDGPU::NoRegister;
2677  }
2678 
2679  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2680  const MCRegisterClass RC = TRI->getRegClass(RCID);
2681  if (RegIdx >= RC.getNumRegs()) {
2682  Error(Loc, "register index is out of range");
2683  return AMDGPU::NoRegister;
2684  }
2685 
2686  return RC.getRegister(RegIdx);
2687 }
2688 
2689 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2690  int64_t RegLo, RegHi;
2691  if (!skipToken(AsmToken::LBrac, "missing register index"))
2692  return false;
2693 
2694  SMLoc FirstIdxLoc = getLoc();
2695  SMLoc SecondIdxLoc;
2696 
2697  if (!parseExpr(RegLo))
2698  return false;
2699 
2700  if (trySkipToken(AsmToken::Colon)) {
2701  SecondIdxLoc = getLoc();
2702  if (!parseExpr(RegHi))
2703  return false;
2704  } else {
2705  RegHi = RegLo;
2706  }
2707 
2708  if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2709  return false;
2710 
2711  if (!isUInt<32>(RegLo)) {
2712  Error(FirstIdxLoc, "invalid register index");
2713  return false;
2714  }
2715 
2716  if (!isUInt<32>(RegHi)) {
2717  Error(SecondIdxLoc, "invalid register index");
2718  return false;
2719  }
2720 
2721  if (RegLo > RegHi) {
2722  Error(FirstIdxLoc, "first register index should not exceed second index");
2723  return false;
2724  }
2725 
2726  Num = static_cast<unsigned>(RegLo);
2727  RegWidth = 32 * ((RegHi - RegLo) + 1);
2728  return true;
2729 }
2730 
2731 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2732  unsigned &RegNum, unsigned &RegWidth,
2733  SmallVectorImpl<AsmToken> &Tokens) {
2734  assert(isToken(AsmToken::Identifier));
2735  unsigned Reg = getSpecialRegForName(getTokenStr());
2736  if (Reg) {
2737  RegNum = 0;
2738  RegWidth = 32;
2739  RegKind = IS_SPECIAL;
2740  Tokens.push_back(getToken());
2741  lex(); // skip register name
2742  }
2743  return Reg;
2744 }
2745 
2746 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2747  unsigned &RegNum, unsigned &RegWidth,
2748  SmallVectorImpl<AsmToken> &Tokens) {
2749  assert(isToken(AsmToken::Identifier));
2750  StringRef RegName = getTokenStr();
2751  auto Loc = getLoc();
2752 
2753  const RegInfo *RI = getRegularRegInfo(RegName);
2754  if (!RI) {
2755  Error(Loc, "invalid register name");
2756  return AMDGPU::NoRegister;
2757  }
2758 
2759  Tokens.push_back(getToken());
2760  lex(); // skip register name
2761 
2762  RegKind = RI->Kind;
2763  StringRef RegSuffix = RegName.substr(RI->Name.size());
2764  if (!RegSuffix.empty()) {
2765  // Single 32-bit register: vXX.
2766  if (!getRegNum(RegSuffix, RegNum)) {
2767  Error(Loc, "invalid register index");
2768  return AMDGPU::NoRegister;
2769  }
2770  RegWidth = 32;
2771  } else {
2772  // Range of registers: v[XX:YY]. ":YY" is optional.
2773  if (!ParseRegRange(RegNum, RegWidth))
2774  return AMDGPU::NoRegister;
2775  }
2776 
2777  return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2778 }
2779 
2780 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2781  unsigned &RegWidth,
2782  SmallVectorImpl<AsmToken> &Tokens) {
2783  unsigned Reg = AMDGPU::NoRegister;
2784  auto ListLoc = getLoc();
2785 
2786  if (!skipToken(AsmToken::LBrac,
2787  "expected a register or a list of registers")) {
2788  return AMDGPU::NoRegister;
2789  }
2790 
2791  // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2792 
2793  auto Loc = getLoc();
2794  if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2795  return AMDGPU::NoRegister;
2796  if (RegWidth != 32) {
2797  Error(Loc, "expected a single 32-bit register");
2798  return AMDGPU::NoRegister;
2799  }
2800 
2801  for (; trySkipToken(AsmToken::Comma); ) {
2802  RegisterKind NextRegKind;
2803  unsigned NextReg, NextRegNum, NextRegWidth;
2804  Loc = getLoc();
2805 
2806  if (!ParseAMDGPURegister(NextRegKind, NextReg,
2807  NextRegNum, NextRegWidth,
2808  Tokens)) {
2809  return AMDGPU::NoRegister;
2810  }
2811  if (NextRegWidth != 32) {
2812  Error(Loc, "expected a single 32-bit register");
2813  return AMDGPU::NoRegister;
2814  }
2815  if (NextRegKind != RegKind) {
2816  Error(Loc, "registers in a list must be of the same kind");
2817  return AMDGPU::NoRegister;
2818  }
2819  if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2820  return AMDGPU::NoRegister;
2821  }
2822 
2823  if (!skipToken(AsmToken::RBrac,
2824  "expected a comma or a closing square bracket")) {
2825  return AMDGPU::NoRegister;
2826  }
2827 
2828  if (isRegularReg(RegKind))
2829  Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2830 
2831  return Reg;
2832 }
2833 
2834 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2835  unsigned &RegNum, unsigned &RegWidth,
2836  SmallVectorImpl<AsmToken> &Tokens) {
2837  auto Loc = getLoc();
2838  Reg = AMDGPU::NoRegister;
2839 
2840  if (isToken(AsmToken::Identifier)) {
2841  Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2842  if (Reg == AMDGPU::NoRegister)
2843  Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2844  } else {
2845  Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2846  }
2847 
2848  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2849  if (Reg == AMDGPU::NoRegister) {
2850  assert(Parser.hasPendingError());
2851  return false;
2852  }
2853 
2854  if (!subtargetHasRegister(*TRI, Reg)) {
2855  if (Reg == AMDGPU::SGPR_NULL) {
2856  Error(Loc, "'null' operand is not supported on this GPU");
2857  } else {
2858  Error(Loc, "register not available on this GPU");
2859  }
2860  return false;
2861  }
2862 
2863  return true;
2864 }
2865 
2866 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2867  unsigned &RegNum, unsigned &RegWidth,
2868  bool RestoreOnFailure /*=false*/) {
2869  Reg = AMDGPU::NoRegister;
2870 
2871  SmallVector<AsmToken, 1> Tokens;
2872  if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2873  if (RestoreOnFailure) {
2874  while (!Tokens.empty()) {
2875  getLexer().UnLex(Tokens.pop_back_val());
2876  }
2877  }
2878  return true;
2879  }
2880  return false;
2881 }
2882 
2884 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2885  switch (RegKind) {
2886  case IS_VGPR:
2887  return StringRef(".amdgcn.next_free_vgpr");
2888  case IS_SGPR:
2889  return StringRef(".amdgcn.next_free_sgpr");
2890  default:
2891  return None;
2892  }
2893 }
2894 
2895 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2896  auto SymbolName = getGprCountSymbolName(RegKind);
2897  assert(SymbolName && "initializing invalid register kind");
2898  MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2899  Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2900 }
2901 
2902 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2903  unsigned DwordRegIndex,
2904  unsigned RegWidth) {
2905  // Symbols are only defined for GCN targets
2906  if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2907  return true;
2908 
2909  auto SymbolName = getGprCountSymbolName(RegKind);
2910  if (!SymbolName)
2911  return true;
2912  MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2913 
2914  int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
2915  int64_t OldCount;
2916 
2917  if (!Sym->isVariable())
2918  return !Error(getLoc(),
2919  ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2920  if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2921  return !Error(
2922  getLoc(),
2923  ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2924 
2925  if (OldCount <= NewMax)
2926  Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2927 
2928  return true;
2929 }
2930 
2931 std::unique_ptr<AMDGPUOperand>
2932 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2933  const auto &Tok = getToken();
2934  SMLoc StartLoc = Tok.getLoc();
2935  SMLoc EndLoc = Tok.getEndLoc();
2936  RegisterKind RegKind;
2937  unsigned Reg, RegNum, RegWidth;
2938 
2939  if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2940  return nullptr;
2941  }
2942  if (isHsaAbiVersion3AndAbove(&getSTI())) {
2943  if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2944  return nullptr;
2945  } else
2946  KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2947  return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2948 }
2949 
2951 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2952  // TODO: add syntactic sugar for 1/(2*PI)
2953 
2954  if (isRegister())
2955  return MatchOperand_NoMatch;
2956  assert(!isModifier());
2957 
2958  const auto& Tok = getToken();
2959  const auto& NextTok = peekToken();
2960  bool IsReal = Tok.is(AsmToken::Real);
2961  SMLoc S = getLoc();
2962  bool Negate = false;
2963 
2964  if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2965  lex();
2966  IsReal = true;
2967  Negate = true;
2968  }
2969 
2970  if (IsReal) {
2971  // Floating-point expressions are not supported.
2972  // Can only allow floating-point literals with an
2973  // optional sign.
2974 
2975  StringRef Num = getTokenStr();
2976  lex();
2977 
2978  APFloat RealVal(APFloat::IEEEdouble());
2979  auto roundMode = APFloat::rmNearestTiesToEven;
2980  if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2981  return MatchOperand_ParseFail;
2982  }
2983  if (Negate)
2984  RealVal.changeSign();
2985 
2986  Operands.push_back(
2987  AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2988  AMDGPUOperand::ImmTyNone, true));
2989 
2990  return MatchOperand_Success;
2991 
2992  } else {
2993  int64_t IntVal;
2994  const MCExpr *Expr;
2995  SMLoc S = getLoc();
2996 
2997  if (HasSP3AbsModifier) {
2998  // This is a workaround for handling expressions
2999  // as arguments of SP3 'abs' modifier, for example:
3000  // |1.0|
3001  // |-1|
3002  // |1+x|
3003  // This syntax is not compatible with syntax of standard
3004  // MC expressions (due to the trailing '|').
3005  SMLoc EndLoc;
3006  if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3007  return MatchOperand_ParseFail;
3008  } else {
3009  if (Parser.parseExpression(Expr))
3010  return MatchOperand_ParseFail;
3011  }
3012 
3013  if (Expr->evaluateAsAbsolute(IntVal)) {
3014  Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3015  } else {
3016  Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3017  }
3018 
3019  return MatchOperand_Success;
3020  }
3021 
3022  return MatchOperand_NoMatch;
3023 }
3024 
3026 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3027  if (!isRegister())
3028  return MatchOperand_NoMatch;
3029 
3030  if (auto R = parseRegister()) {
3031  assert(R->isReg());
3032  Operands.push_back(std::move(R));
3033  return MatchOperand_Success;
3034  }
3035  return MatchOperand_ParseFail;
3036 }
3037 
3039 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
3040  auto res = parseReg(Operands);
3041  if (res != MatchOperand_NoMatch) {
3042  return res;
3043  } else if (isModifier()) {
3044  return MatchOperand_NoMatch;
3045  } else {
3046  return parseImm(Operands, HasSP3AbsMod);
3047  }
3048 }
3049 
3050 bool
3051 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3052  if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3053  const auto &str = Token.getString();
3054  return str == "abs" || str == "neg" || str == "sext";
3055  }
3056  return false;
3057 }
3058 
3059 bool
3060 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3061  return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3062 }
3063 
3064 bool
3065 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3066  return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3067 }
3068 
3069 bool
3070 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3071  return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3072 }
3073 
3074 // Check if this is an operand modifier or an opcode modifier
3075 // which may look like an expression but it is not. We should
3076 // avoid parsing these modifiers as expressions. Currently
3077 // recognized sequences are:
3078 // |...|
3079 // abs(...)
3080 // neg(...)
3081 // sext(...)
3082 // -reg
3083 // -|...|
3084 // -abs(...)
3085 // name:...
3086 //
3087 bool
3088 AMDGPUAsmParser::isModifier() {
3089 
3090  AsmToken Tok = getToken();
3091  AsmToken NextToken[2];
3092  peekTokens(NextToken);
3093 
3094  return isOperandModifier(Tok, NextToken[0]) ||
3095  (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3096  isOpcodeModifierWithVal(Tok, NextToken[0]);
3097 }
3098 
3099 // Check if the current token is an SP3 'neg' modifier.
3100 // Currently this modifier is allowed in the following context:
3101 //
3102 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3103 // 2. Before an 'abs' modifier: -abs(...)
3104 // 3. Before an SP3 'abs' modifier: -|...|
3105 //
3106 // In all other cases "-" is handled as a part
3107 // of an expression that follows the sign.
3108 //
3109 // Note: When "-" is followed by an integer literal,
3110 // this is interpreted as integer negation rather
3111 // than a floating-point NEG modifier applied to N.
3112 // Beside being contr-intuitive, such use of floating-point
3113 // NEG modifier would have resulted in different meaning
3114 // of integer literals used with VOP1/2/C and VOP3,
3115 // for example:
3116 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3117 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3118 // Negative fp literals with preceding "-" are
3119 // handled likewise for uniformity
3120 //
3121 bool
3122 AMDGPUAsmParser::parseSP3NegModifier() {
3123 
3124  AsmToken NextToken[2];
3125  peekTokens(NextToken);
3126 
3127  if (isToken(AsmToken::Minus) &&
3128  (isRegister(NextToken[0], NextToken[1]) ||
3129  NextToken[0].is(AsmToken::Pipe) ||
3130  isId(NextToken[0], "abs"))) {
3131  lex();
3132  return true;
3133  }
3134 
3135  return false;
3136 }
3137 
3139 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3140  bool AllowImm) {
3141  bool Neg, SP3Neg;
3142  bool Abs, SP3Abs;
3143  SMLoc Loc;
3144 
3145  // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3146  if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
3147  Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3148  return MatchOperand_ParseFail;
3149  }
3150 
3151  SP3Neg = parseSP3NegModifier();
3152 
3153  Loc = getLoc();
3154  Neg = trySkipId("neg");
3155  if (Neg && SP3Neg) {
3156  Error(Loc, "expected register or immediate");
3157  return MatchOperand_ParseFail;
3158  }
3159  if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3160  return MatchOperand_ParseFail;
3161 
3162  Abs = trySkipId("abs");
3163  if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3164  return MatchOperand_ParseFail;
3165 
3166  Loc = getLoc();
3167  SP3Abs = trySkipToken(AsmToken::Pipe);
3168  if (Abs && SP3Abs) {
3169  Error(Loc, "expected register or immediate");
3170  return MatchOperand_ParseFail;
3171  }
3172 
3174  if (AllowImm) {
3175  Res = parseRegOrImm(Operands, SP3Abs);
3176  } else {
3177  Res = parseReg(Operands);
3178  }
3179  if (Res != MatchOperand_Success) {
3180  return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
3181  }
3182 
3183  if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3184  return MatchOperand_ParseFail;
3185  if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3186  return MatchOperand_ParseFail;
3187  if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3188  return MatchOperand_ParseFail;
3189 
3190  AMDGPUOperand::Modifiers Mods;
3191  Mods.Abs = Abs || SP3Abs;
3192  Mods.Neg = Neg || SP3Neg;
3193 
3194  if (Mods.hasFPModifiers()) {
3195  AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3196  if (Op.isExpr()) {
3197  Error(Op.getStartLoc(), "expected an absolute expression");
3198  return MatchOperand_ParseFail;
3199  }
3200  Op.setModifiers(Mods);
3201  }
3202  return MatchOperand_Success;
3203 }
3204 
3206 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3207  bool AllowImm) {
3208  bool Sext = trySkipId("sext");
3209  if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3210  return MatchOperand_ParseFail;
3211 
3213  if (AllowImm) {
3214  Res = parseRegOrImm(Operands);
3215  } else {
3216  Res = parseReg(Operands);
3217  }
3218  if (Res != MatchOperand_Success) {
3219  return Sext? MatchOperand_ParseFail : Res;
3220  }
3221 
3222  if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3223  return MatchOperand_ParseFail;
3224 
3225  AMDGPUOperand::Modifiers Mods;
3226  Mods.Sext = Sext;
3227 
3228  if (Mods.hasIntModifiers()) {
3229  AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3230  if (Op.isExpr()) {
3231  Error(Op.getStartLoc(), "expected an absolute expression");
3232  return MatchOperand_ParseFail;
3233  }
3234  Op.setModifiers(Mods);
3235  }
3236 
3237  return MatchOperand_Success;
3238 }
3239 
3241 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3242  return parseRegOrImmWithFPInputMods(Operands, false);
3243 }
3244 
3246 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3247  return parseRegOrImmWithIntInputMods(Operands, false);
3248 }
3249 
3250 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3251  auto Loc = getLoc();
3252  if (trySkipId("off")) {
3253  Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3254  AMDGPUOperand::ImmTyOff, false));
3255  return MatchOperand_Success;
3256  }
3257 
3258  if (!isRegister())
3259  return MatchOperand_NoMatch;
3260 
3261  std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3262  if (Reg) {
3263  Operands.push_back(std::move(Reg));
3264  return MatchOperand_Success;
3265  }
3266 
3267  return MatchOperand_ParseFail;
3268 
3269 }
3270 
3271 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3272  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3273 
3274  if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3275  (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3276  (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3277  (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3278  return Match_InvalidOperand;
3279 
3280  if ((TSFlags & SIInstrFlags::VOP3) &&
3282  getForcedEncodingSize() != 64)
3283  return Match_PreferE32;
3284 
3285  if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3286  Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3287  // v_mac_f32/16 allow only dst_sel == DWORD;
3288  auto OpNum =
3289  AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3290  const auto &Op = Inst.getOperand(OpNum);
3291  if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3292  return Match_InvalidOperand;
3293  }
3294  }
3295 
3296  return Match_Success;
3297 }
3298 
3300  static const unsigned Variants[] = {
3304  };
3305 
3306  return makeArrayRef(Variants);
3307 }
3308 
3309 // What asm variants we should check
3310 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3311  if (isForcedDPP() && isForcedVOP3()) {
3312  static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3313  return makeArrayRef(Variants);
3314  }
3315  if (getForcedEncodingSize() == 32) {
3316  static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3317  return makeArrayRef(Variants);
3318  }
3319 
3320  if (isForcedVOP3()) {
3321  static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3322  return makeArrayRef(Variants);
3323  }
3324 
3325  if (isForcedSDWA()) {
3326  static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3328  return makeArrayRef(Variants);
3329  }
3330 
3331  if (isForcedDPP()) {
3332  static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3333  return makeArrayRef(Variants);
3334  }
3335 
3336  return getAllVariants();
3337 }
3338 
3339 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3340  if (isForcedDPP() && isForcedVOP3())
3341  return "e64_dpp";
3342 
3343  if (getForcedEncodingSize() == 32)
3344  return "e32";
3345 
3346  if (isForcedVOP3())
3347  return "e64";
3348 
3349  if (isForcedSDWA())
3350  return "sdwa";
3351 
3352  if (isForcedDPP())
3353  return "dpp";
3354 
3355  return "";
3356 }
3357 
3358 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3359  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3360  const unsigned Num = Desc.getNumImplicitUses();
3361  for (unsigned i = 0; i < Num; ++i) {
3362  unsigned Reg = Desc.ImplicitUses[i];
3363  switch (Reg) {
3364  case AMDGPU::FLAT_SCR:
3365  case AMDGPU::VCC:
3366  case AMDGPU::VCC_LO:
3367  case AMDGPU::VCC_HI:
3368  case AMDGPU::M0:
3369  return Reg;
3370  default:
3371  break;
3372  }
3373  }
3374  return AMDGPU::NoRegister;
3375 }
3376 
3377 // NB: This code is correct only when used to check constant
3378 // bus limitations because GFX7 support no f16 inline constants.
3379 // Note that there are no cases when a GFX7 opcode violates
3380 // constant bus limitations due to the use of an f16 constant.
3381 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3382  unsigned OpIdx) const {
3383  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3384 
3385  if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3386  return false;
3387  }
3388 
3389  const MCOperand &MO = Inst.getOperand(OpIdx);
3390 
3391  int64_t Val = MO.getImm();
3392  auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3393 
3394  switch (OpSize) { // expected operand size
3395  case 8:
3396  return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3397  case 4:
3398  return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3399  case 2: {
3400  const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3404  return AMDGPU::isInlinableIntLiteral(Val);
3405 
3410 
3414  return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3415 
3416  return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3417  }
3418  default:
3419  llvm_unreachable("invalid operand size");
3420  }
3421 }
3422 
3423 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3424  if (!isGFX10Plus())
3425  return 1;
3426 
3427  switch (Opcode) {
3428  // 64-bit shift instructions can use only one scalar value input
3429  case AMDGPU::V_LSHLREV_B64_e64:
3430  case AMDGPU::V_LSHLREV_B64_gfx10:
3431  case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3432  case AMDGPU::V_LSHRREV_B64_e64:
3433  case AMDGPU::V_LSHRREV_B64_gfx10:
3434  case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3435  case AMDGPU::V_ASHRREV_I64_e64:
3436  case AMDGPU::V_ASHRREV_I64_gfx10:
3437  case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3438  case AMDGPU::V_LSHL_B64_e64:
3439  case AMDGPU::V_LSHR_B64_e64:
3440  case AMDGPU::V_ASHR_I64_e64:
3441  return 1;
3442  default:
3443  return 2;
3444  }
3445 }
3446 
3447 constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3449 
3450 // Get regular operand indices in the same order as specified
3451 // in the instruction (but append mandatory literals to the end).
3452 static OperandIndices getSrcOperandIndices(unsigned Opcode,
3453  bool AddMandatoryLiterals = false) {
3454 
3455  int16_t ImmIdx =
3456  AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3457 
3458  if (isVOPD(Opcode)) {
3459  int16_t ImmDeferredIdx =
3460  AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immDeferred)
3461  : -1;
3462 
3463  return {getNamedOperandIdx(Opcode, OpName::src0X),
3464  getNamedOperandIdx(Opcode, OpName::vsrc1X),
3465  getNamedOperandIdx(Opcode, OpName::src0Y),
3466  getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3467  ImmDeferredIdx,
3468  ImmIdx};
3469  }
3470 
3471  return {getNamedOperandIdx(Opcode, OpName::src0),
3472  getNamedOperandIdx(Opcode, OpName::src1),
3473  getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3474 }
3475 
3476 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3477  const MCOperand &MO = Inst.getOperand(OpIdx);
3478  if (MO.isImm()) {
3479  return !isInlineConstant(Inst, OpIdx);
3480  } else if (MO.isReg()) {
3481  auto Reg = MO.getReg();
3482  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3483  auto PReg = mc2PseudoReg(Reg);
3484  return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3485  } else {
3486  return true;
3487  }
3488 }
3489 
3490 bool AMDGPUAsmParser::validateConstantBusLimitations(
3491  const MCInst &Inst, const OperandVector &Operands) {
3492  const unsigned Opcode = Inst.getOpcode();
3493  const MCInstrDesc &Desc = MII.get(Opcode);
3494  unsigned LastSGPR = AMDGPU::NoRegister;
3495  unsigned ConstantBusUseCount = 0;
3496  unsigned NumLiterals = 0;
3497  unsigned LiteralSize;
3498 
3499  if (!(Desc.TSFlags &
3502  !isVOPD(Opcode))
3503  return true;
3504 
3505  // Check special imm operands (used by madmk, etc)
3506  if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3507  ++NumLiterals;
3508  LiteralSize = 4;
3509  }
3510 
3511  SmallDenseSet<unsigned> SGPRsUsed;
3512  unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3513  if (SGPRUsed != AMDGPU::NoRegister) {
3514  SGPRsUsed.insert(SGPRUsed);
3515  ++ConstantBusUseCount;
3516  }
3517 
3518  OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3519 
3520  for (int OpIdx : OpIndices) {
3521  if (OpIdx == -1)
3522  continue;
3523 
3524  const MCOperand &MO = Inst.getOperand(OpIdx);
3525  if (usesConstantBus(Inst, OpIdx)) {
3526  if (MO.isReg()) {
3527  LastSGPR = mc2PseudoReg(MO.getReg());
3528  // Pairs of registers with a partial intersections like these
3529  // s0, s[0:1]
3530  // flat_scratch_lo, flat_scratch
3531  // flat_scratch_lo, flat_scratch_hi
3532  // are theoretically valid but they are disabled anyway.
3533  // Note that this code mimics SIInstrInfo::verifyInstruction
3534  if (SGPRsUsed.insert(LastSGPR).second) {
3535  ++ConstantBusUseCount;
3536  }
3537  } else { // Expression or a literal
3538 
3539  if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3540  continue; // special operand like VINTERP attr_chan
3541 
3542  // An instruction may use only one literal.
3543  // This has been validated on the previous step.
3544  // See validateVOPLiteral.
3545  // This literal may be used as more than one operand.
3546  // If all these operands are of the same size,
3547  // this literal counts as one scalar value.
3548  // Otherwise it counts as 2 scalar values.
3549  // See "GFX10 Shader Programming", section 3.6.2.3.
3550 
3551  unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3552  if (Size < 4)
3553  Size = 4;
3554 
3555  if (NumLiterals == 0) {
3556  NumLiterals = 1;
3557  LiteralSize = Size;
3558  } else if (LiteralSize != Size) {
3559  NumLiterals = 2;
3560  }
3561  }
3562  }
3563  }
3564  ConstantBusUseCount += NumLiterals;
3565 
3566  if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3567  return true;
3568 
3569  SMLoc LitLoc = getLitLoc(Operands);
3570  SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3571  SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3572  Error(Loc, "invalid operand (violates constant bus restrictions)");
3573  return false;
3574 }
3575 
3576 bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3577  const MCInst &Inst, const OperandVector &Operands) {
3578 
3579  const unsigned Opcode = Inst.getOpcode();
3580  if (!isVOPD(Opcode))
3581  return true;
3582 
3583  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3584 
3585  auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3586  const MCOperand &Opr = Inst.getOperand(OperandIdx);
3587  return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3588  ? Opr.getReg()
3590  };
3591 
3592  const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3593  auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(getVRegIdx);
3594  if (!InvalidCompOprIdx)
3595  return true;
3596 
3597  auto CompOprIdx = *InvalidCompOprIdx;
3598  auto ParsedIdx =
3599  std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
3600  InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3601  assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
3602 
3603  auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
3604  if (CompOprIdx == VOPD::Component::DST) {
3605  Error(Loc, "one dst register must be even and the other odd");
3606  } else {
3607  auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3608  Error(Loc, Twine("src") + Twine(CompSrcIdx) +
3609  " operands must use different VGPR banks");
3610  }
3611 
3612  return false;
3613 }
3614 
3615 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3616 
3617  const unsigned Opc = Inst.getOpcode();
3618  const MCInstrDesc &Desc = MII.get(Opc);
3619 
3620  if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3621  int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3622  assert(ClampIdx != -1);
3623  return Inst.getOperand(ClampIdx).getImm() == 0;
3624  }
3625 
3626  return true;
3627 }
3628 
3629 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
3630  const SMLoc &IDLoc) {
3631 
3632  const unsigned Opc = Inst.getOpcode();
3633  const MCInstrDesc &Desc = MII.get(Opc);
3634 
3635  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3636  return true;
3637 
3638  int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3639  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3640  int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3641 
3642  assert(VDataIdx != -1);
3643 
3644  if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
3645  return true;
3646 
3647  unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3648  unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3649  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3650  if (DMask == 0)
3651  DMask = 1;
3652 
3653  bool IsPackedD16 = false;
3654  unsigned DataSize =
3655  (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3656  if (hasPackedD16()) {
3657  int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3658  IsPackedD16 = D16Idx >= 0;
3659  if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
3660  DataSize = (DataSize + 1) / 2;
3661  }
3662 
3663  if ((VDataSize / 4) == DataSize + TFESize)
3664  return true;
3665 
3666  StringRef Modifiers;
3667  if (isGFX90A())
3668  Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
3669  else
3670  Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
3671 
3672  Error(IDLoc, Twine("image data size does not match ") + Modifiers);
3673  return false;
3674 }
3675 
3676 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3677  const unsigned Opc = Inst.getOpcode();
3678  const MCInstrDesc &Desc = MII.get(Opc);
3679 
3680  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3681  return true;
3682 
3683  const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3684 
3685  const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3686  AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3687  int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3688  int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3689  int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3690  int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3691 
3692  assert(VAddr0Idx != -1);
3693  assert(SrsrcIdx != -1);
3694  assert(SrsrcIdx > VAddr0Idx);
3695 
3696  if (DimIdx == -1)
3697  return true; // intersect_ray
3698 
3699  unsigned Dim = Inst.getOperand(DimIdx).getImm();
3700  const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3701  bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3702  unsigned ActualAddrSize =
3703  IsNSA ? SrsrcIdx - VAddr0Idx
3704  : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3705  bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3706 
3707  unsigned ExpectedAddrSize =
3708  AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3709 
3710  if (!IsNSA) {
3711  if (ExpectedAddrSize > 12)
3712  ExpectedAddrSize = 16;
3713 
3714  // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3715  // This provides backward compatibility for assembly created
3716  // before 160b/192b/224b types were directly supported.
3717  if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3718  return true;
3719  }
3720 
3721  return ActualAddrSize == ExpectedAddrSize;
3722 }
3723 
3724 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3725 
3726  const unsigned Opc = Inst.getOpcode();
3727  const MCInstrDesc &Desc = MII.get(Opc);
3728 
3729  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3730  return true;
3731  if (!Desc.mayLoad() || !Desc.mayStore())
3732  return true; // Not atomic
3733 
3734  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3735  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3736 
3737  // This is an incomplete check because image_atomic_cmpswap
3738  // may only use 0x3 and 0xf while other atomic operations
3739  // may use 0x1 and 0x3. However these limitations are
3740  // verified when we check that dmask matches dst size.
3741  return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3742 }
3743 
3744 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3745 
3746  const unsigned Opc = Inst.getOpcode();
3747  const MCInstrDesc &Desc = MII.get(Opc);
3748 
3749  if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3750  return true;
3751 
3752  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3753  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3754 
3755  // GATHER4 instructions use dmask in a different fashion compared to
3756  // other MIMG instructions. The only useful DMASK values are
3757  // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3758  // (red,red,red,red) etc.) The ISA document doesn't mention
3759  // this.
3760  return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3761 }
3762 
3763 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3764  const unsigned Opc = Inst.getOpcode();
3765  const MCInstrDesc &Desc = MII.get(Opc);
3766 
3767  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3768  return true;
3769 
3770  const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3771  const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3772  AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3773 
3774  if (!BaseOpcode->MSAA)
3775  return true;
3776 
3777  int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3778  assert(DimIdx != -1);
3779 
3780  unsigned Dim = Inst.getOperand(DimIdx).getImm();
3781  const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3782 
3783  return DimInfo->MSAA;
3784 }
3785 
3786 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3787 {
3788  switch (Opcode) {
3789  case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3790  case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3791  case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3792  return true;
3793  default:
3794  return false;
3795  }
3796 }
3797 
3798 // movrels* opcodes should only allow VGPRS as src0.
3799 // This is specified in .td description for vop1/vop3,
3800 // but sdwa is handled differently. See isSDWAOperand.
3801 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3802  const OperandVector &Operands) {
3803 
3804  const unsigned Opc = Inst.getOpcode();
3805  const MCInstrDesc &Desc = MII.get(Opc);
3806 
3807  if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3808  return true;
3809 
3810  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3811  assert(Src0Idx != -1);
3812 
3813  SMLoc ErrLoc;
3814  const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3815  if (Src0.isReg()) {
3816  auto Reg = mc2PseudoReg(Src0.getReg());
3817  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3818  if (!isSGPR(Reg, TRI))
3819  return true;
3820  ErrLoc = getRegLoc(Reg, Operands);
3821  } else {
3822  ErrLoc = getConstLoc(Operands);
3823  }
3824 
3825  Error(ErrLoc, "source operand must be a VGPR");
3826  return false;
3827 }
3828 
3829 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3830  const OperandVector &Operands) {
3831 
3832  const unsigned Opc = Inst.getOpcode();
3833 
3834  if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3835  return true;
3836 
3837  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3838  assert(Src0Idx != -1);
3839 
3840  const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3841  if (!Src0.isReg())
3842  return true;
3843 
3844  auto Reg = mc2PseudoReg(Src0.getReg());
3845  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3846  if (!isGFX90A() && isSGPR(Reg, TRI)) {
3847  Error(getRegLoc(Reg, Operands),
3848  "source operand must be either a VGPR or an inline constant");
3849  return false;
3850  }
3851 
3852  return true;
3853 }
3854 
3855 bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
3856  const OperandVector &Operands) {
3857  unsigned Opcode = Inst.getOpcode();
3858  const MCInstrDesc &Desc = MII.get(Opcode);
3859 
3860  if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
3861  !getFeatureBits()[FeatureMFMAInlineLiteralBug])
3862  return true;
3863 
3864  const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
3865  if (Src2Idx == -1)
3866  return true;
3867 
3868  if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
3869  Error(getConstLoc(Operands),
3870  "inline constants are not allowed for this operand");
3871  return false;
3872  }
3873 
3874  return true;
3875 }
3876 
3877 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3878  const OperandVector &Operands) {
3879  const unsigned Opc = Inst.getOpcode();
3880  const MCInstrDesc &Desc = MII.get(Opc);
3881 
3882  if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3883  return true;
3884 
3885  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3886  if (Src2Idx == -1)
3887  return true;
3888 
3889  const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3890  if (!Src2.isReg())
3891  return true;
3892 
3893  MCRegister Src2Reg = Src2.getReg();
3894  MCRegister DstReg = Inst.getOperand(0).getReg();
3895  if (Src2Reg == DstReg)
3896  return true;
3897 
3898  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3899  if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128)
3900  return true;
3901 
3902  if (TRI->regsOverlap(Src2Reg, DstReg)) {
3903  Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3904  "source 2 operand must not partially overlap with dst");
3905  return false;
3906  }
3907 
3908  return true;
3909 }
3910 
3911 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3912  switch (Inst.getOpcode()) {
3913  default:
3914  return true;
3915  case V_DIV_SCALE_F32_gfx6_gfx7:
3916  case V_DIV_SCALE_F32_vi:
3917  case V_DIV_SCALE_F32_gfx10:
3918  case V_DIV_SCALE_F64_gfx6_gfx7:
3919  case V_DIV_SCALE_F64_vi:
3920  case V_DIV_SCALE_F64_gfx10:
3921  break;
3922  }
3923 
3924  // TODO: Check that src0 = src1 or src2.
3925 
3926  for (auto Name : {AMDGPU::OpName::src0_modifiers,
3927  AMDGPU::OpName::src2_modifiers,
3928  AMDGPU::OpName::src2_modifiers}) {
3929  if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3930  .getImm() &
3931  SISrcMods::ABS) {
3932  return false;
3933  }
3934  }
3935 
3936  return true;
3937 }
3938 
3939 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3940 
3941  const unsigned Opc = Inst.getOpcode();
3942  const MCInstrDesc &Desc = MII.get(Opc);
3943 
3944  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3945  return true;
3946 
3947  int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3948  if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3949  if (isCI() || isSI())
3950  return false;
3951  }
3952 
3953  return true;
3954 }
3955 
3956 static bool IsRevOpcode(const unsigned Opcode)
3957 {
3958  switch (Opcode) {
3959  case AMDGPU::V_SUBREV_F32_e32:
3960  case AMDGPU::V_SUBREV_F32_e64:
3961  case AMDGPU::V_SUBREV_F32_e32_gfx10:
3962  case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3963  case AMDGPU::V_SUBREV_F32_e32_vi:
3964  case AMDGPU::V_SUBREV_F32_e64_gfx10:
3965  case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3966  case AMDGPU::V_SUBREV_F32_e64_vi:
3967 
3968  case AMDGPU::V_SUBREV_CO_U32_e32:
3969  case AMDGPU::V_SUBREV_CO_U32_e64:
3970  case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3971  case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3972 
3973  case AMDGPU::V_SUBBREV_U32_e32:
3974  case AMDGPU::V_SUBBREV_U32_e64:
3975  case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3976  case AMDGPU::V_SUBBREV_U32_e32_vi:
3977  case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3978  case AMDGPU::V_SUBBREV_U32_e64_vi:
3979 
3980  case AMDGPU::V_SUBREV_U32_e32:
3981  case AMDGPU::V_SUBREV_U32_e64:
3982  case AMDGPU::V_SUBREV_U32_e32_gfx9:
3983  case AMDGPU::V_SUBREV_U32_e32_vi:
3984  case AMDGPU::V_SUBREV_U32_e64_gfx9:
3985  case AMDGPU::V_SUBREV_U32_e64_vi:
3986 
3987  case AMDGPU::V_SUBREV_F16_e32:
3988  case AMDGPU::V_SUBREV_F16_e64:
3989  case AMDGPU::V_SUBREV_F16_e32_gfx10:
3990  case AMDGPU::V_SUBREV_F16_e32_vi:
3991  case AMDGPU::V_SUBREV_F16_e64_gfx10:
3992  case AMDGPU::V_SUBREV_F16_e64_vi:
3993 
3994  case AMDGPU::V_SUBREV_U16_e32:
3995  case AMDGPU::V_SUBREV_U16_e64:
3996  case AMDGPU::V_SUBREV_U16_e32_vi:
3997  case AMDGPU::V_SUBREV_U16_e64_vi:
3998 
3999  case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4000  case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4001  case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4002 
4003  case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4004  case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4005 
4006  case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4007  case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4008 
4009  case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4010  case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4011 
4012  case AMDGPU::V_LSHRREV_B32_e32:
4013  case AMDGPU::V_LSHRREV_B32_e64:
4014  case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4015  case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4016  case AMDGPU::V_LSHRREV_B32_e32_vi:
4017  case AMDGPU::V_LSHRREV_B32_e64_vi:
4018  case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4019  case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4020 
4021  case AMDGPU::V_ASHRREV_I32_e32:
4022  case AMDGPU::V_ASHRREV_I32_e64:
4023  case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4024  case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4025  case AMDGPU::V_ASHRREV_I32_e32_vi:
4026  case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4027  case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4028  case AMDGPU::V_ASHRREV_I32_e64_vi:
4029 
4030  case AMDGPU::V_LSHLREV_B32_e32:
4031  case AMDGPU::V_LSHLREV_B32_e64:
4032  case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4033  case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4034  case AMDGPU::V_LSHLREV_B32_e32_vi:
4035  case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4036  case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4037  case AMDGPU::V_LSHLREV_B32_e64_vi:
4038 
4039  case AMDGPU::V_LSHLREV_B16_e32:
4040  case AMDGPU::V_LSHLREV_B16_e64:
4041  case AMDGPU::V_LSHLREV_B16_e32_vi:
4042  case AMDGPU::V_LSHLREV_B16_e64_vi:
4043  case AMDGPU::V_LSHLREV_B16_gfx10:
4044 
4045  case AMDGPU::V_LSHRREV_B16_e32:
4046  case AMDGPU::V_LSHRREV_B16_e64:
4047  case AMDGPU::V_LSHRREV_B16_e32_vi:
4048  case AMDGPU::V_LSHRREV_B16_e64_vi:
4049  case AMDGPU::V_LSHRREV_B16_gfx10:
4050 
4051  case AMDGPU::V_ASHRREV_I16_e32:
4052  case AMDGPU::V_ASHRREV_I16_e64:
4053  case AMDGPU::V_ASHRREV_I16_e32_vi:
4054  case AMDGPU::V_ASHRREV_I16_e64_vi:
4055  case AMDGPU::V_ASHRREV_I16_gfx10:
4056 
4057  case AMDGPU::V_LSHLREV_B64_e64:
4058  case AMDGPU::V_LSHLREV_B64_gfx10:
4059  case AMDGPU::V_LSHLREV_B64_vi:
4060 
4061  case AMDGPU::V_LSHRREV_B64_e64:
4062  case AMDGPU::V_LSHRREV_B64_gfx10:
4063  case AMDGPU::V_LSHRREV_B64_vi:
4064 
4065  case AMDGPU::V_ASHRREV_I64_e64:
4066  case AMDGPU::V_ASHRREV_I64_gfx10:
4067  case AMDGPU::V_ASHRREV_I64_vi:
4068 
4069  case AMDGPU::V_PK_LSHLREV_B16:
4070  case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4071  case AMDGPU::V_PK_LSHLREV_B16_vi:
4072 
4073  case AMDGPU::V_PK_LSHRREV_B16:
4074  case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4075  case AMDGPU::V_PK_LSHRREV_B16_vi:
4076  case AMDGPU::V_PK_ASHRREV_I16:
4077  case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4078  case AMDGPU::V_PK_ASHRREV_I16_vi:
4079  return true;
4080  default:
4081  return false;
4082  }
4083 }
4084 
4085 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4086 
4087  using namespace SIInstrFlags;
4088  const unsigned Opcode = Inst.getOpcode();
4089  const MCInstrDesc &Desc = MII.get(Opcode);
4090 
4091  // lds_direct register is defined so that it can be used
4092  // with 9-bit operands only. Ignore encodings which do not accept these.
4093  const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4094  if ((Desc.TSFlags & Enc) == 0)
4095  return None;
4096 
4097  for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4098  auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4099  if (SrcIdx == -1)
4100  break;
4101  const auto &Src = Inst.getOperand(SrcIdx);
4102  if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4103 
4104  if (isGFX90A() || isGFX11Plus())
4105  return StringRef("lds_direct is not supported on this GPU");
4106 
4107  if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4108  return StringRef("lds_direct cannot be used with this instruction");
4109 
4110  if (SrcName != OpName::src0)
4111  return StringRef("lds_direct may be used as src0 only");
4112  }
4113  }
4114 
4115  return None;
4116 }
4117 
4118 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4119  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4120  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4121  if (Op.isFlatOffset())
4122  return Op.getStartLoc();
4123  }
4124  return getLoc();
4125 }
4126 
4127 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4128  const OperandVector &Operands) {
4129  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4130  if ((TSFlags & SIInstrFlags::FLAT) == 0)
4131  return true;
4132 
4133  auto Opcode = Inst.getOpcode();
4134  auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4135  assert(OpNum != -1);
4136 
4137  const auto &Op = Inst.getOperand(OpNum);
4138  if (!hasFlatOffsets() && Op.getImm() != 0) {
4139  Error(getFlatOffsetLoc(Operands),
4140  "flat offset modifier is not supported on this GPU");
4141  return false;
4142  }
4143 
4144  // For FLAT segment the offset must be positive;
4145  // MSB is ignored and forced to zero.
4147  unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
4148  if (!isIntN(OffsetSize, Op.getImm())) {
4149  Error(getFlatOffsetLoc(Operands),
4150  Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4151  return false;
4152  }
4153  } else {
4154  unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
4155  if (!isUIntN(OffsetSize, Op.getImm())) {
4156  Error(getFlatOffsetLoc(Operands),
4157  Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4158  return false;
4159  }
4160  }
4161 
4162  return true;
4163 }
4164 
4165 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4166  // Start with second operand because SMEM Offset cannot be dst or src0.
4167  for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4168  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4169  if (Op.isSMEMOffset())
4170  return Op.getStartLoc();
4171  }
4172  return getLoc();
4173 }
4174 
4175 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4176  const OperandVector &Operands) {
4177  if (isCI() || isSI())
4178  return true;
4179 
4180  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4181  if ((TSFlags & SIInstrFlags::SMRD) == 0)
4182  return true;
4183 
4184  auto Opcode = Inst.getOpcode();
4185  auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4186  if (OpNum == -1)
4187  return true;
4188 
4189  const auto &Op = Inst.getOperand(OpNum);
4190  if (!Op.isImm())
4191  return true;
4192 
4193  uint64_t Offset = Op.getImm();
4194  bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4195  if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
4196  AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
4197  return true;
4198 
4199  Error(getSMEMOffsetLoc(Operands),
4200  (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
4201  "expected a 21-bit signed offset");
4202 
4203  return false;
4204 }
4205 
4206 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4207  unsigned Opcode = Inst.getOpcode();
4208  const MCInstrDesc &Desc = MII.get(Opcode);
4209  if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4210  return true;
4211 
4212  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4213  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4214 
4215  const int OpIndices[] = { Src0Idx, Src1Idx };
4216 
4217  unsigned NumExprs = 0;
4218  unsigned NumLiterals = 0;
4220 
4221  for (int OpIdx : OpIndices) {
4222  if (OpIdx == -1) break;
4223 
4224  const MCOperand &MO = Inst.getOperand(OpIdx);
4225  // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4226  if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4227  if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4228  uint32_t Value = static_cast<uint32_t>(MO.getImm());
4229  if (NumLiterals == 0 || LiteralValue != Value) {
4230  LiteralValue = Value;
4231  ++NumLiterals;
4232  }
4233  } else if (MO.isExpr()) {
4234  ++NumExprs;
4235  }
4236  }
4237  }
4238 
4239  return NumLiterals + NumExprs <= 1;
4240 }
4241 
4242 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4243  const unsigned Opc = Inst.getOpcode();
4244  if (isPermlane16(Opc)) {
4245  int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4246  unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4247 
4248  if (OpSel & ~3)
4249  return false;
4250  }
4251 
4252  uint64_t TSFlags = MII.get(Opc).TSFlags;
4253 
4254  if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4255  int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4256  if (OpSelIdx != -1) {
4257  if (Inst.getOperand(OpSelIdx).getImm() != 0)
4258  return false;
4259  }
4260  int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4261  if (OpSelHiIdx != -1) {
4262  if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4263  return false;
4264  }
4265  }
4266 
4267  // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4268  if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4270  int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4271  unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4272  if (OpSel & 3)
4273  return false;
4274  }
4275 
4276  return true;
4277 }
4278 
4279 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4280  const OperandVector &Operands) {
4281  const unsigned Opc = Inst.getOpcode();
4282  int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4283  if (DppCtrlIdx < 0)
4284  return true;
4285  unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4286 
4288  // DPP64 is supported for row_newbcast only.
4289  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4290  if (Src0Idx >= 0 &&
4291  getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
4292  SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4293  Error(S, "64 bit dpp only supports row_newbcast");
4294  return false;
4295  }
4296  }
4297 
4298  return true;
4299 }
4300 
4301 // Check if VCC register matches wavefront size
4302 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4303  auto FB = getFeatureBits();
4304  return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4305  (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4306 }
4307 
4308 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4309 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4310  const OperandVector &Operands) {
4311  unsigned Opcode = Inst.getOpcode();
4312  const MCInstrDesc &Desc = MII.get(Opcode);
4313  bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
4314  if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4315  !HasMandatoryLiteral && !isVOPD(Opcode))
4316  return true;
4317 
4318  OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
4319 
4320  unsigned NumExprs = 0;
4321  unsigned NumLiterals = 0;
4323 
4324  for (int OpIdx : OpIndices) {
4325  if (OpIdx == -1)
4326  continue;
4327 
4328  const MCOperand &MO = Inst.getOperand(OpIdx);
4329  if (!MO.isImm() && !MO.isExpr())
4330  continue;
4331  if (!isSISrcOperand(Desc, OpIdx))
4332  continue;
4333 
4334  if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4335  uint32_t Value = static_cast<uint32_t>(MO.getImm());
4336  if (NumLiterals == 0 || LiteralValue != Value) {
4337  LiteralValue = Value;
4338  ++NumLiterals;
4339  }
4340  } else if (MO.isExpr()) {
4341  ++NumExprs;
4342  }
4343  }
4344  NumLiterals += NumExprs;
4345 
4346  if (!NumLiterals)
4347  return true;
4348 
4349  if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4350  Error(getLitLoc(Operands), "literal operands are not supported");
4351  return false;
4352  }
4353 
4354  if (NumLiterals > 1) {
4355  Error(getLitLoc(Operands, true), "only one unique literal operand is allowed");
4356  return false;
4357  }
4358 
4359  return true;
4360 }
4361 
4362 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4363 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4364  const MCRegisterInfo *MRI) {
4365  int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4366  if (OpIdx < 0)
4367  return -1;
4368 
4369  const MCOperand &Op = Inst.getOperand(OpIdx);
4370  if (!Op.isReg())
4371  return -1;
4372 
4373  unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4374  auto Reg = Sub ? Sub : Op.getReg();
4375  const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4376  return AGPR32.contains(Reg) ? 1 : 0;
4377 }
4378 
4379 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4380  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4383  SIInstrFlags::DS)) == 0)
4384  return true;
4385 
4386  uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4387  : AMDGPU::OpName::vdata;
4388 
4389  const MCRegisterInfo *MRI = getMRI();
4390  int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4391  int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4392 
4393  if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4394  int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4395  if (Data2Areg >= 0 && Data2Areg != DataAreg)
4396  return false;
4397  }
4398 
4399  auto FB = getFeatureBits();
4400  if (FB[AMDGPU::FeatureGFX90AInsts]) {
4401  if (DataAreg < 0 || DstAreg < 0)
4402  return true;
4403  return DstAreg == DataAreg;
4404  }
4405 
4406  return DstAreg < 1 && DataAreg < 1;
4407 }
4408 
4409 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4410  auto FB = getFeatureBits();
4411  if (!FB[AMDGPU::FeatureGFX90AInsts])
4412  return true;
4413 
4414  const MCRegisterInfo *MRI = getMRI();
4415  const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4416  const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4417  for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4418  const MCOperand &Op = Inst.getOperand(I);
4419  if (!Op.isReg())
4420  continue;
4421 
4422  unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4423  if (!Sub)
4424  continue;
4425 
4426  if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4427  return false;
4428  if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4429  return false;
4430  }
4431 
4432  return true;
4433 }
4434 
4435 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4436  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4437  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4438  if (Op.isBLGP())
4439  return Op.getStartLoc();
4440  }
4441  return SMLoc();
4442 }
4443 
4444 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4445  const OperandVector &Operands) {
4446  unsigned Opc = Inst.getOpcode();
4447  int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4448  if (BlgpIdx == -1)
4449  return true;
4450  SMLoc BLGPLoc = getBLGPLoc(Operands);
4451  if (!BLGPLoc.isValid())
4452  return true;
4453  bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:");
4454  auto FB = getFeatureBits();
4455  bool UsesNeg = false;
4456  if (FB[AMDGPU::FeatureGFX940Insts]) {
4457  switch (Opc) {
4458  case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4459  case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4460  case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4461  case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4462  UsesNeg = true;
4463  }
4464  }
4465 
4466  if (IsNeg == UsesNeg)
4467  return true;
4468 
4469  Error(BLGPLoc,
4470  UsesNeg ? "invalid modifier: blgp is not supported"
4471  : "invalid modifier: neg is not supported");
4472 
4473  return false;
4474 }
4475 
4476 bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
4477  const OperandVector &Operands) {
4478  if (!isGFX11Plus())
4479  return true;
4480 
4481  unsigned Opc = Inst.getOpcode();
4482  if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4483  Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4484  Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4485  Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4486  return true;
4487 
4488  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
4489  assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
4490  auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
4491  if (Reg == AMDGPU::SGPR_NULL)
4492  return true;
4493 
4494  SMLoc RegLoc = getRegLoc(Reg, Operands);
4495  Error(RegLoc, "src0 must be null");
4496  return false;
4497 }
4498 
4499 // gfx90a has an undocumented limitation:
4500 // DS_GWS opcodes must use even aligned registers.
4501 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4502  const OperandVector &Operands) {
4503  if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4504  return true;
4505 
4506  int Opc = Inst.getOpcode();
4507  if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4508  Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4509  return true;
4510 
4511  const MCRegisterInfo *MRI = getMRI();
4512  const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4513  int Data0Pos =
4514  AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4515  assert(Data0Pos != -1);
4516  auto Reg = Inst.getOperand(Data0Pos).getReg();
4517  auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4518  if (RegIdx & 1) {
4519  SMLoc RegLoc = getRegLoc(Reg, Operands);
4520  Error(RegLoc, "vgpr must be even aligned");
4521  return false;
4522  }
4523 
4524  return true;
4525 }
4526 
4527 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4528  const OperandVector &Operands,
4529  const SMLoc &IDLoc) {
4530  int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4531  AMDGPU::OpName::cpol);
4532  if (CPolPos == -1)
4533  return true;
4534 
4535  unsigned CPol = Inst.getOperand(CPolPos).getImm();
4536 
4537  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4538  if (TSFlags & SIInstrFlags::SMRD) {
4539  if (CPol && (isSI() || isCI())) {
4540  SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4541  Error(S, "cache policy is not supported for SMRD instructions");
4542  return false;
4543  }
4545  Error(IDLoc, "invalid cache policy for SMEM instruction");
4546  return false;
4547  }
4548  }
4549 
4550  if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4551  SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4552  StringRef CStr(S.getPointer());
4553  S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4554  Error(S, "scc is not supported on this GPU");
4555  return false;
4556  }
4557 
4559  return true;
4560 
4562  if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4563  Error(IDLoc, isGFX940() ? "instruction must use sc0"
4564  : "instruction must use glc");
4565  return false;
4566  }
4567  } else {
4568  if (CPol & CPol::GLC) {
4569  SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4570  StringRef CStr(S.getPointer());
4572  &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4573  Error(S, isGFX940() ? "instruction must not use sc0"
4574  : "instruction must not use glc");
4575  return false;
4576  }
4577  }
4578 
4579  return true;
4580 }
4581 
4582 bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) {
4583  if (!isGFX11Plus())
4584  return true;
4585  for (auto &Operand : Operands) {
4586  if (!Operand->isReg())
4587  continue;
4588  unsigned Reg = Operand->getReg();
4589  if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
4590  Error(getRegLoc(Reg, Operands),
4591  "execz and vccz are not supported on this GPU");
4592  return false;
4593  }
4594  }
4595  return true;
4596 }
4597 
4598 bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
4599  const OperandVector &Operands) {
4600  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4601  if (Desc.mayStore() &&
4603  SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
4604  if (Loc != getInstLoc(Operands)) {
4605  Error(Loc, "TFE modifier has no meaning for store instructions");
4606  return false;
4607  }
4608  }
4609 
4610  return true;
4611 }
4612 
4613 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4614  const SMLoc &IDLoc,
4615  const OperandVector &Operands) {
4616  if (auto ErrMsg = validateLdsDirect(Inst)) {
4617  Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4618  return false;
4619  }
4620  if (!validateSOPLiteral(Inst)) {
4621  Error(getLitLoc(Operands),
4622  "only one unique literal operand is allowed");
4623  return false;
4624  }
4625  if (!validateVOPLiteral(Inst, Operands)) {
4626  return false;
4627  }
4628  if (!validateConstantBusLimitations(Inst, Operands)) {
4629  return false;
4630  }
4631  if (!validateVOPDRegBankConstraints(Inst, Operands)) {
4632  return false;
4633  }
4634  if (!validateIntClampSupported(Inst)) {
4635  Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4636  "integer clamping is not supported on this GPU");
4637  return false;
4638  }
4639  if (!validateOpSel(Inst)) {
4640  Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4641  "invalid op_sel operand");
4642  return false;
4643  }
4644  if (!validateDPP(Inst, Operands)) {
4645  return false;
4646  }
4647  // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4648  if (!validateMIMGD16(Inst)) {
4649  Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4650  "d16 modifier is not supported on this GPU");
4651  return false;
4652  }
4653  if (!validateMIMGMSAA(Inst)) {
4654  Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4655  "invalid dim; must be MSAA type");
4656  return false;
4657  }
4658  if (!validateMIMGDataSize(Inst, IDLoc)) {
4659  return false;
4660  }
4661  if (!validateMIMGAddrSize(Inst)) {
4662  Error(IDLoc,
4663  "image address size does not match dim and a16");
4664  return false;
4665  }
4666  if (!validateMIMGAtomicDMask(Inst)) {
4667  Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4668  "invalid atomic image dmask");
4669  return false;
4670  }
4671  if (!validateMIMGGatherDMask(Inst)) {
4672  Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4673  "invalid image_gather dmask: only one bit must be set");
4674  return false;
4675  }
4676  if (!validateMovrels(Inst, Operands)) {
4677  return false;
4678  }
4679  if (!validateFlatOffset(Inst, Operands)) {
4680  return false;
4681  }
4682  if (!validateSMEMOffset(Inst, Operands)) {
4683  return false;
4684  }
4685  if (!validateMAIAccWrite(Inst, Operands)) {
4686  return false;
4687  }
4688  if (!validateMAISrc2(Inst, Operands)) {
4689  return false;
4690  }
4691  if (!validateMFMA(Inst, Operands)) {
4692  return false;
4693  }
4694  if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4695  return false;
4696  }
4697 
4698  if (!validateAGPRLdSt(Inst)) {
4699  Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4700  ? "invalid register class: data and dst should be all VGPR or AGPR"
4701  : "invalid register class: agpr loads and stores not supported on this GPU"
4702  );
4703  return false;
4704  }
4705  if (!validateVGPRAlign(Inst)) {
4706  Error(IDLoc,
4707  "invalid register class: vgpr tuples must be 64 bit aligned");
4708  return false;
4709  }
4710  if (!validateGWS(Inst, Operands)) {
4711  return false;
4712  }
4713 
4714  if (!validateBLGP(Inst, Operands)) {
4715  return false;
4716  }
4717 
4718  if (!validateDivScale(Inst)) {
4719  Error(IDLoc, "ABS not allowed in VOP3B instructions");
4720  return false;
4721  }
4722  if (!validateWaitCnt(Inst, Operands)) {
4723  return false;
4724  }
4725  if (!validateExeczVcczOperands(Operands)) {
4726  return false;
4727  }
4728  if (!validateTFE(Inst, Operands)) {
4729  return false;
4730  }
4731 
4732  return true;
4733 }
4734 
4735 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4736  const FeatureBitset &FBS,
4737  unsigned VariantID = 0);
4738 
4739 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4740  const FeatureBitset &AvailableFeatures,
4741  unsigned VariantID);
4742 
4743 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4744  const FeatureBitset &FBS) {
4745  return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4746 }
4747 
4748 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4749  const FeatureBitset &FBS,
4750  ArrayRef<unsigned> Variants) {
4751  for (auto Variant : Variants) {
4752  if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4753  return true;
4754  }
4755 
4756  return false;
4757 }
4758 
4759 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4760  const SMLoc &IDLoc) {
4761  FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
4762 
4763  // Check if requested instruction variant is supported.
4764  if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4765  return false;
4766 
4767  // This instruction is not supported.
4768  // Clear any other pending errors because they are no longer relevant.
4769  getParser().clearPendingErrors();
4770 
4771  // Requested instruction variant is not supported.
4772  // Check if any other variants are supported.
4773  StringRef VariantName = getMatchedVariantName();
4774  if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4775  return Error(IDLoc,
4776  Twine(VariantName,
4777  " variant of this instruction is not supported"));
4778  }
4779 
4780  // Check if this instruction may be used with a different wavesize.
4781  if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
4782  !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
4783 
4784  FeatureBitset FeaturesWS32 = getFeatureBits();
4785  FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
4786  .flip(AMDGPU::FeatureWavefrontSize32);
4787  FeatureBitset AvailableFeaturesWS32 =
4788  ComputeAvailableFeatures(FeaturesWS32);
4789 
4790  if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
4791  return Error(IDLoc, "instruction requires wavesize=32");
4792  }
4793 
4794  // Finally check if this instruction is supported on any other GPU.
4795  if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4796  return Error(IDLoc, "instruction not supported on this GPU");
4797  }
4798 
4799  // Instruction not supported on any GPU. Probably a typo.
4800  std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4801  return Error(IDLoc, "invalid instruction" + Suggestion);
4802 }
4803 
4805  uint64_t InvalidOprIdx) {
4806  assert(InvalidOprIdx < Operands.size());
4807  const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
4808  if (Op.isToken() && InvalidOprIdx > 1) {
4809  const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
4810  return PrevOp.isToken() && PrevOp.getToken() == "::";
4811  }
4812  return false;
4813 }
4814 
4815 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4817  MCStreamer &Out,
4819  bool MatchingInlineAsm) {
4820  MCInst Inst;
4821  unsigned Result = Match_Success;
4822  for (auto Variant : getMatchedVariants()) {
4823  uint64_t EI;
4824  auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4825  Variant);
4826  // We order match statuses from least to most specific. We use most specific
4827  // status as resulting
4828  // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4829  if ((R == Match_Success) ||
4830  (R == Match_PreferE32) ||
4831  (R == Match_MissingFeature && Result != Match_PreferE32) ||
4832  (R == Match_InvalidOperand && Result != Match_MissingFeature
4833  && Result != Match_PreferE32) ||
4834  (R == Match_MnemonicFail && Result != Match_InvalidOperand
4835  && Result != Match_MissingFeature
4836  && Result != Match_PreferE32)) {
4837  Result = R;
4838  ErrorInfo = EI;
4839  }
4840  if (R == Match_Success)
4841  break;
4842  }
4843 
4844  if (Result == Match_Success) {
4845  if (!validateInstruction(Inst, IDLoc, Operands)) {
4846  return true;
4847  }
4848  Inst.setLoc(IDLoc);
4849  Out.emitInstruction(Inst, getSTI());
4850  return false;
4851  }
4852 
4853  StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4854  if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4855  return true;
4856  }
4857 
4858  switch (Result) {
4859  default: break;
4860  case Match_MissingFeature:
4861  // It has been verified that the specified instruction
4862  // mnemonic is valid. A match was found but it requires
4863  // features which are not supported on this GPU.
4864  return Error(IDLoc, "operands are not valid for this GPU or mode");
4865 
4866  case Match_InvalidOperand: {
4867  SMLoc ErrorLoc = IDLoc;
4868  if (ErrorInfo != ~0ULL) {
4869  if (ErrorInfo >= Operands.size()) {
4870  return Error(IDLoc, "too few operands for instruction");
4871  }
4872  ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4873  if (ErrorLoc == SMLoc())
4874  ErrorLoc = IDLoc;
4875 
4877  return Error(ErrorLoc, "invalid VOPDY instruction");
4878  }
4879  return Error(ErrorLoc, "invalid operand for instruction");
4880  }
4881 
4882  case Match_PreferE32:
4883  return Error(IDLoc, "internal error: instruction without _e64 suffix "
4884  "should be encoded as e32");
4885  case Match_MnemonicFail:
4886  llvm_unreachable("Invalid instructions should have been handled already");
4887  }
4888  llvm_unreachable("Implement any new match types added!");
4889 }
4890 
4891 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4892  int64_t Tmp = -1;
4893  if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4894  return true;
4895  }
4896  if (getParser().parseAbsoluteExpression(Tmp)) {
4897  return true;
4898  }
4899  Ret = static_cast<uint32_t>(Tmp);
4900  return false;
4901 }
4902 
4903 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4904  uint32_t &Minor) {
4905  if (ParseAsAbsoluteExpression(Major))
4906  return TokError("invalid major version");
4907 
4908  if (!trySkipToken(AsmToken::Comma))
4909  return TokError("minor version number required, comma expected");
4910 
4911  if (ParseAsAbsoluteExpression(Minor))
4912  return TokError("invalid minor version");
4913 
4914  return false;
4915 }
4916 
4917 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4918  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4919  return TokError("directive only supported for amdgcn architecture");
4920 
4921  std::string TargetIDDirective;
4922  SMLoc TargetStart = getTok().getLoc();
4923  if (getParser().parseEscapedString(TargetIDDirective))
4924  return true;
4925 
4926  SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4927  if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4928  return getParser().Error(TargetRange.Start,
4929  (Twine(".amdgcn_target directive's target id ") +
4930  Twine(TargetIDDirective) +
4931  Twine(" does not match the specified target id ") +
4932  Twine(getTargetStreamer().getTargetID()->toString())).str());
4933 
4934  return false;
4935 }
4936 
4937 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4938  return Error(Range.Start, "value out of range", Range);
4939 }
4940 
4941 bool AMDGPUAsmParser::calculateGPRBlocks(
4942  const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4943  bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4944  SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4945  unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4946  // TODO(scott.linder): These calculations are duplicated from
4947  // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4948  IsaVersion Version = getIsaVersion(getSTI().getCPU());
4949 
4950  unsigned NumVGPRs = NextFreeVGPR;
4951  unsigned NumSGPRs = NextFreeSGPR;
4952 
4953  if (Version.Major >= 10)
4954  NumSGPRs = 0;
4955  else {
4956  unsigned MaxAddressableNumSGPRs =
4958 
4959  if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4960  NumSGPRs > MaxAddressableNumSGPRs)
4961  return OutOfRangeError(SGPRRange);
4962 
4963  NumSGPRs +=
4964  IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4965 
4966  if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4967  NumSGPRs > MaxAddressableNumSGPRs)
4968  return OutOfRangeError(SGPRRange);
4969 
4970  if (Features.test(FeatureSGPRInitBug))
4972  }
4973 
4974  VGPRBlocks =
4975  IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4976  SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4977 
4978  return false;
4979 }
4980 
4981 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4982  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4983  return TokError("directive only supported for amdgcn architecture");
4984 
4985  if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4986  return TokError("directive only supported for amdhsa OS");
4987 
4988  StringRef KernelName;
4989  if (getParser().parseIdentifier(KernelName))
4990  return true;
4991 
4993 
4994  StringSet<> Seen;
4995 
4996  IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4997 
4998  SMRange VGPRRange;
4999  uint64_t NextFreeVGPR = 0;
5000  uint64_t AccumOffset = 0;
5001  uint64_t SharedVGPRCount = 0;
5002  SMRange SGPRRange;
5003  uint64_t NextFreeSGPR = 0;
5004 
5005  // Count the number of user SGPRs implied from the enabled feature bits.
5006  unsigned ImpliedUserSGPRCount = 0;
5007 
5008  // Track if the asm explicitly contains the directive for the user SGPR
5009  // count.
5010  std::optional<unsigned> ExplicitUserSGPRCount;
5011  bool ReserveVCC = true;
5012  bool ReserveFlatScr = true;
5013  Optional<bool> EnableWavefrontSize32;
5014 
5015  while (true) {
5016  while (trySkipToken(AsmToken::EndOfStatement));
5017 
5018  StringRef ID;
5019  SMRange IDRange = getTok().getLocRange();
5020  if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
5021  return true;
5022 
5023  if (ID == ".end_amdhsa_kernel")
5024  break;
5025 
5026  if (!Seen.insert(ID).second)
5027  return TokError(".amdhsa_ directives cannot be repeated");
5028 
5029  SMLoc ValStart = getLoc();
5030  int64_t IVal;
5031  if (getParser().parseAbsoluteExpression(IVal))
5032  return true;
5033  SMLoc ValEnd = getLoc();
5034  SMRange ValRange = SMRange(ValStart, ValEnd);
5035 
5036  if (IVal < 0)
5037  return OutOfRangeError(ValRange);
5038 
5039  uint64_t Val = IVal;
5040 
5041 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
5042  if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
5043  return OutOfRangeError(RANGE); \
5044  AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
5045 
5046  if (ID == ".amdhsa_group_segment_fixed_size") {
5047  if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
5048  return OutOfRangeError(ValRange);
5049  KD.group_segment_fixed_size = Val;
5050  } else if (ID == ".amdhsa_private_segment_fixed_size") {
5051  if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
5052  return OutOfRangeError(ValRange);
5053  KD.private_segment_fixed_size = Val;
5054  } else if (ID == ".amdhsa_kernarg_size") {
5055  if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
5056  return OutOfRangeError(ValRange);
5057  KD.kernarg_size = Val;
5058  } else if (ID == ".amdhsa_user_sgpr_count") {
5059  ExplicitUserSGPRCount = Val;
5060  } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
5062  return Error(IDRange.Start,
5063  "directive is not supported with architected flat scratch",
5064  IDRange);
5066  KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5067  Val, ValRange);
5068  if (Val)
5069  ImpliedUserSGPRCount += 4;
5070  } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
5072  KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
5073  ValRange);
5074  if (Val)
5075  ImpliedUserSGPRCount += 2;
5076  } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
5078  KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
5079  ValRange);
5080  if (Val)
5081  ImpliedUserSGPRCount += 2;
5082  } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
5084  KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
5085  Val, ValRange);
5086  if (Val)
5087  ImpliedUserSGPRCount += 2;
5088  } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
5090  KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
5091  ValRange);
5092  if (Val)
5093  ImpliedUserSGPRCount += 2;
5094  } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
5096  return Error(IDRange.Start,
5097  "directive is not supported with architected flat scratch",
5098  IDRange);
5100  KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
5101  ValRange);
5102  if (Val)
5103  ImpliedUserSGPRCount += 2;
5104  } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
5106  KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5107  Val, ValRange);
5108  if (Val)
5109  ImpliedUserSGPRCount += 1;
5110  } else if (ID == ".amdhsa_wavefront_size32") {
5111  if (IVersion.Major < 10)
5112  return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5113  EnableWavefrontSize32 = Val;
5115  KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
5116  Val, ValRange);
5117  } else if (ID == ".amdhsa_uses_dynamic_stack") {
5119  KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange);
5120  } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5122  return Error(IDRange.Start,
5123  "directive is not supported with architected flat scratch",
5124  IDRange);
5126  COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5127  } else if (ID == ".amdhsa_enable_private_segment") {
5129  return Error(
5130  IDRange.Start,
5131  "directive is not supported without architected flat scratch",
5132  IDRange);
5134  COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5135  } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
5137  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
5138  ValRange);
5139  } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
5141  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
5142  ValRange);
5143  } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
5145  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
5146  ValRange);
5147  } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
5149  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
5150  ValRange);
5151  } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
5153  COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
5154  ValRange);
5155  } else if (ID == ".amdhsa_next_free_vgpr") {
5156  VGPRRange = ValRange;
5157  NextFreeVGPR = Val;
5158  } else if (ID == ".amdhsa_next_free_sgpr") {
5159  SGPRRange = ValRange;
5160  NextFreeSGPR = Val;
5161  } else if (ID == ".amdhsa_accum_offset") {
5162  if (!isGFX90A())
5163  return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5164  AccumOffset = Val;
5165  } else if (ID == ".amdhsa_reserve_vcc") {
5166  if (!isUInt<1>(Val))
5167  return OutOfRangeError(ValRange);
5168  ReserveVCC = Val;
5169  } else if (ID == ".amdhsa_reserve_flat_scratch") {
5170  if (IVersion.Major < 7)
5171  return Error(IDRange.Start, "directive requires gfx7+", IDRange);
5173  return Error(IDRange.Start,
5174  "directive is not supported with architected flat scratch",
5175  IDRange);
5176  if (!isUInt<1>(Val))
5177  return OutOfRangeError(ValRange);
5178  ReserveFlatScr = Val;
5179  } else if (ID == ".amdhsa_reserve_xnack_mask") {
5180  if (IVersion.Major < 8)
5181  return Error(IDRange.Start, "directive requires gfx8+", IDRange);
5182  if (!isUInt<1>(Val))
5183  return OutOfRangeError(ValRange);
5184  if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5185  return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
5186  IDRange);
5187  } else if (ID == ".amdhsa_float_round_mode_32") {
5189  COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
5190  } else if (ID == ".amdhsa_float_round_mode_16_64") {
5192  COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
5193  } else if (ID == ".amdhsa_float_denorm_mode_32") {
5195  COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
5196  } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5198  COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
5199  ValRange);
5200  } else if (ID == ".amdhsa_dx10_clamp") {
5202  COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
5203  } else if (ID == ".amdhsa_ieee_mode") {
5204  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
5205  Val, ValRange);
5206  } else if (ID == ".amdhsa_fp16_overflow") {
5207  if (IVersion.Major < 9)
5208  return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5209  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
5210  ValRange);
5211  } else if (ID == ".amdhsa_tg_split") {
5212  if (!isGFX90A())
5213  return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5214  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
5215  ValRange);
5216  } else if (ID == ".amdhsa_workgroup_processor_mode") {
5217  if (IVersion.Major < 10)
5218  return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5219  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
5220  ValRange);
5221  } else if (ID == ".amdhsa_memory_ordered") {
5222  if (IVersion.Major < 10)
5223  return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5224  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
5225  ValRange);
5226  } else if (ID == ".amdhsa_forward_progress") {
5227  if (IVersion.Major < 10)
5228  return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5229  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
5230  ValRange);
5231  } else if (ID == ".amdhsa_shared_vgpr_count") {
5232  if (IVersion.Major < 10)
5233  return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5234  SharedVGPRCount = Val;
5236  COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT, Val,
5237  ValRange);
5238  } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5240  KD.compute_pgm_rsrc2,
5241  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
5242  ValRange);
5243  } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5245  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5246  Val, ValRange);
5247  } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5249  KD.compute_pgm_rsrc2,
5250  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
5251  ValRange);
5252  } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5254  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5255  Val, ValRange);
5256  } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5258  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5259  Val, ValRange);
5260  } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5262  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5263  Val, ValRange);
5264  } else if (ID == ".amdhsa_exception_int_div_zero") {
5266  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5267  Val, ValRange);
5268  } else {
5269  return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5270  }
5271 
5272 #undef PARSE_BITS_ENTRY
5273  }
5274 
5275  if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
5276  return TokError(".amdhsa_next_free_vgpr directive is required");
5277 
5278  if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
5279  return TokError(".amdhsa_next_free_sgpr directive is required");
5280 
5281  unsigned VGPRBlocks;
5282  unsigned SGPRBlocks;
5283  if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5284  getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5285  EnableWavefrontSize32, NextFreeVGPR,
5286  VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5287  SGPRBlocks))
5288  return true;
5289 
5290  if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5291  VGPRBlocks))
5292  return OutOfRangeError(VGPRRange);
5294  COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
5295 
5296  if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5297  SGPRBlocks))
5298  return OutOfRangeError(SGPRRange);
5300  COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
5301  SGPRBlocks);
5302 
5303  if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5304  return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5305  "enabled user SGPRs");
5306 
5307  unsigned UserSGPRCount =
5308  ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5309 
5310  if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5311  return TokError("too many user SGPRs enabled");
5312  AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
5313  UserSGPRCount);
5314 
5315  if (isGFX90A()) {
5316  if (Seen.find(".amdhsa_accum_offset") == Seen.end())
5317  return TokError(".amdhsa_accum_offset directive is required");
5318  if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5319  return TokError("accum_offset should be in range [4..256] in "
5320  "increments of 4");
5321  if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
5322  return TokError("accum_offset exceeds total VGPR allocation");
5323  AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5324  (AccumOffset / 4 - 1));
5325  }
5326 
5327  if (IVersion.Major == 10) {
5328  // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5329  if (SharedVGPRCount && EnableWavefrontSize32) {
5330  return TokError("shared_vgpr_count directive not valid on "
5331  "wavefront size 32");
5332  }
5333  if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5334  return TokError("shared_vgpr_count*2 + "
5335  "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5336  "exceed 63\n");
5337  }
5338  }
5339 
5340  getTargetStreamer().EmitAmdhsaKernelDescriptor(
5341  getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
5342  ReserveFlatScr);
5343  return false;
5344 }
5345 
5346 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
5347  uint32_t Major;
5348  uint32_t Minor;
5349 
5350  if (ParseDirectiveMajorMinor(Major, Minor))
5351  return true;
5352 
5353  getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
5354  return false;
5355 }
5356 
5357 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
5358  uint32_t Major;
5359  uint32_t Minor;
5360  uint32_t Stepping;
5361  StringRef VendorName;
5362  StringRef ArchName;
5363 
5364  // If this directive has no arguments, then use the ISA version for the
5365  // targeted GPU.
5366  if (isToken(AsmToken::EndOfStatement)) {
5367  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5368  getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
5369  ISA.Stepping,
5370  "AMD", "AMDGPU");
5371  return false;
5372  }
5373 
5374  if (ParseDirectiveMajorMinor(Major, Minor))
5375  return true;
5376 
5377  if (!trySkipToken(AsmToken::Comma))
5378  return TokError("stepping version number required, comma expected");
5379 
5380  if (ParseAsAbsoluteExpression(Stepping))
5381  return TokError("invalid stepping version");
5382 
5383  if (!trySkipToken(AsmToken::Comma))
5384  return TokError("vendor name required, comma expected");
5385 
5386  if (!parseString(VendorName, "invalid vendor name"))
5387  return true;
5388 
5389  if (!trySkipToken(AsmToken::Comma))
5390  return TokError("arch name required, comma expected");
5391 
5392  if (!parseString(ArchName, "invalid arch name"))
5393  return true;
5394 
5395  getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
5396  VendorName, ArchName);
5397  return false;
5398 }
5399 
5400 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5401  amd_kernel_code_t &Header) {
5402  // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5403  // assembly for backwards compatibility.
5404  if (ID == "max_scratch_backing_memory_byte_size") {
5405  Parser.eatToEndOfStatement();
5406  return false;
5407  }
5408 
5409  SmallString<40> ErrStr;
5410  raw_svector_ostream Err(ErrStr);
5411  if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5412  return TokError(Err.str());
5413  }
5414  Lex();
5415 
5416  if (ID == "enable_wavefront_size32") {
5417  if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5418  if (!isGFX10Plus())
5419  return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5420  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5421  return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5422  } else {
5423  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5424  return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5425  }
5426  }
5427 
5428  if (ID == "wavefront_size") {
5429  if (Header.wavefront_size == 5) {
5430  if (!isGFX10Plus())
5431  return TokError("wavefront_size=5 is only allowed on GFX10+");
5432  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5433  return TokError("wavefront_size=5 requires +WavefrontSize32");
5434  } else if (Header.wavefront_size == 6) {
5435  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5436  return TokError("wavefront_size=6 requires +WavefrontSize64");
5437  }
5438  }
5439 
5440  if (ID == "enable_wgp_mode") {
5441  if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5442  !isGFX10Plus())
5443  return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5444  }
5445 
5446  if (ID == "enable_mem_ordered") {
5447  if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5448  !isGFX10Plus())
5449  return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5450  }
5451 
5452  if (ID == "enable_fwd_progress") {
5453  if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5454  !isGFX10Plus())
5455  return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5456  }
5457 
5458  return false;
5459 }
5460 
5461 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5462  amd_kernel_code_t Header;
5463  AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5464 
5465  while (true) {
5466  // Lex EndOfStatement. This is in a while loop, because lexing a comment
5467  // will set the current token to EndOfStatement.
5468  while(trySkipToken(AsmToken::EndOfStatement));
5469 
5470  StringRef ID;
5471  if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5472  return true;
5473 
5474  if (ID == ".end_amd_kernel_code_t")
5475  break;
5476 
5477  if (ParseAMDKernelCodeTValue(ID, Header))
5478  return true;
5479  }
5480 
5481  getTargetStreamer().EmitAMDKernelCodeT(Header);
5482 
5483  return false;
5484 }
5485 
5486 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5487  StringRef KernelName;
5488  if (!parseId(KernelName, "expected symbol name"))
5489  return true;
5490 
5491  getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5493 
5494  KernelScope.initialize(getContext());
5495  return false;
5496 }
5497 
5498 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5499  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5500  return Error(getLoc(),
5501  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5502  "architectures");
5503  }
5504 
5505  auto TargetIDDirective = getLexer().getTok().getStringContents();
5506  if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5507  return Error(getParser().getTok().getLoc(), "target id must match options");
5508 
5509  getTargetStreamer().EmitISAVersion();
5510  Lex();
5511 
5512  return false;
5513 }
5514 
5515 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5516  const char *AssemblerDirectiveBegin;
5517  const char *AssemblerDirectiveEnd;
5519  isHsaAbiVersion3AndAbove(&getSTI())
5520  ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5522  : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5523  HSAMD::AssemblerDirectiveEnd);
5524 
5525  if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5526  return Error(getLoc(),
5527  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5528  "not available on non-amdhsa OSes")).str());
5529  }
5530 
5531  std::string HSAMetadataString;
5532  if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5533  HSAMetadataString))
5534  return true;
5535 
5536  if (isHsaAbiVersion3AndAbove(&getSTI())) {
5537  if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5538  return Error(getLoc(), "invalid HSA metadata");
5539  } else {
5540  if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5541  return Error(getLoc(), "invalid HSA metadata");
5542  }
5543 
5544  return false;
5545 }
5546 
5547 /// Common code to parse out a block of text (typically YAML) between start and
5548 /// end directives.
5549 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5550  const char *AssemblerDirectiveEnd,
5551  std::string &CollectString) {
5552 
5553  raw_string_ostream CollectStream(CollectString);
5554 
5555  getLexer().setSkipSpace(false);
5556 
5557  bool FoundEnd = false;
5558  while (!isToken(AsmToken::Eof)) {
5559  while (isToken(AsmToken::Space)) {
5560  CollectStream << getTokenStr();
5561  Lex();
5562  }
5563 
5564  if (trySkipId(AssemblerDirectiveEnd)) {
5565  FoundEnd = true;
5566  break;
5567  }
5568 
5569  CollectStream << Parser.parseStringToEndOfStatement()
5570  << getContext().getAsmInfo()->getSeparatorString();
5571 
5572  Parser.eatToEndOfStatement();
5573  }
5574 
5575  getLexer().setSkipSpace(true);
5576 
5577  if (isToken(AsmToken::Eof) && !FoundEnd) {
5578  return TokError(Twine("expected directive ") +
5579  Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5580  }
5581 
5582  CollectStream.flush();
5583  return false;
5584 }
5585 
5586 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5587 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5588  std::string String;
5589  if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5591  return true;
5592 
5593  auto PALMetadata = getTargetStreamer().getPALMetadata();
5594  if (!PALMetadata->setFromString(String))
5595  return Error(getLoc(), "invalid PAL metadata");
5596  return false;
5597 }
5598 
5599 /// Parse the assembler directive for old linear-format PAL metadata.
5600 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5601  if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5602  return Error(getLoc(),
5603  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5604  "not available on non-amdpal OSes")).str());
5605  }
5606 
5607  auto PALMetadata = getTargetStreamer().getPALMetadata();
5608  PALMetadata->setLegacy();
5609  for (;;) {
5610  uint32_t Key, Value;
5611  if (ParseAsAbsoluteExpression(Key)) {
5612  return TokError(Twine("invalid value in ") +
5614  }
5615  if (!trySkipToken(AsmToken::Comma)) {
5616  return TokError(Twine("expected an even number of values in ") +
5618  }
5619  if (ParseAsAbsoluteExpression(Value)) {
5620  return TokError(Twine("invalid value in ") +
5622  }
5623  PALMetadata->setRegister(Key, Value);
5624  if (!trySkipToken(AsmToken::Comma))
5625  break;
5626  }
5627  return false;
5628 }
5629 
5630 /// ParseDirectiveAMDGPULDS
5631 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5632 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5633  if (getParser().checkForValidSection())
5634  return true;
5635 
5636  StringRef Name;
5637  SMLoc NameLoc = getLoc();
5638  if (getParser().parseIdentifier(Name))
5639  return TokError("expected identifier in directive");
5640 
5641  MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5642  if (parseToken(AsmToken::Comma, "expected ','"))
5643  return true;
5644 
5645  unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5646 
5647  int64_t Size;
5648  SMLoc SizeLoc = getLoc();
5649  if (getParser().parseAbsoluteExpression(Size))
5650  return true;
5651  if (Size < 0)
5652  return Error(SizeLoc, "size must be non-negative");
5653  if (Size > LocalMemorySize)
5654  return Error(SizeLoc, "size is too large");
5655 
5656  int64_t Alignment = 4;
5657  if (trySkipToken(AsmToken::Comma)) {
5658  SMLoc AlignLoc = getLoc();
5659  if (getParser().parseAbsoluteExpression(Alignment))
5660  return true;
5661  if (Alignment < 0 || !isPowerOf2_64(Alignment))
5662  return Error(AlignLoc, "alignment must be a power of two");
5663 
5664  // Alignment larger than the size of LDS is possible in theory, as long
5665  // as the linker manages to place to symbol at address 0, but we do want
5666  // to make sure the alignment fits nicely into a 32-bit integer.
5667  if (Alignment >= 1u << 31)
5668  return Error(AlignLoc, "alignment is too large");
5669  }
5670 
5671  if (parseEOL())
5672  return true;
5673 
5674  Symbol->redefineIfPossible();
5675  if (!Symbol->isUndefined())
5676  return Error(NameLoc, "invalid symbol redefinition");
5677 
5678  getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5679  return false;
5680 }
5681 
5682 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5683  StringRef IDVal = DirectiveID.getString();
5684 
5685  if (isHsaAbiVersion3AndAbove(&getSTI())) {
5686  if (IDVal == ".amdhsa_kernel")
5687  return ParseDirectiveAMDHSAKernel();
5688 
5689  // TODO: Restructure/combine with PAL metadata directive.
5691  return ParseDirectiveHSAMetadata();
5692  } else {
5693  if (IDVal == ".hsa_code_object_version")
5694  return ParseDirectiveHSACodeObjectVersion();
5695 
5696  if (IDVal == ".hsa_code_object_isa")
5697  return ParseDirectiveHSACodeObjectISA();
5698 
5699  if (IDVal == ".amd_kernel_code_t")
5700  return ParseDirectiveAMDKernelCodeT();
5701 
5702  if (IDVal == ".amdgpu_hsa_kernel")
5703  return ParseDirectiveAMDGPUHsaKernel();
5704 
5705  if (IDVal == ".amd_amdgpu_isa")
5706  return ParseDirectiveISAVersion();
5707 
5709  return ParseDirectiveHSAMetadata();
5710  }
5711 
5712  if (IDVal == ".amdgcn_target")
5713  return ParseDirectiveAMDGCNTarget();
5714 
5715  if (IDVal == ".amdgpu_lds")
5716  return ParseDirectiveAMDGPULDS();
5717 
5718  if (IDVal == PALMD::AssemblerDirectiveBegin)
5719  return ParseDirectivePALMetadataBegin();
5720 
5721  if (IDVal == PALMD::AssemblerDirective)
5722  return ParseDirectivePALMetadata();
5723 
5724  return true;
5725 }
5726 
5727 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5728  unsigned RegNo) {
5729 
5730  if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
5731  return isGFX9Plus();
5732 
5733  // GFX10+ has 2 more SGPRs 104 and 105.
5734  if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
5735  return hasSGPR104_SGPR105();
5736 
5737  switch (RegNo) {
5738  case AMDGPU::SRC_SHARED_BASE_LO:
5739  case AMDGPU::SRC_SHARED_BASE:
5740  case AMDGPU::SRC_SHARED_LIMIT_LO:
5741  case AMDGPU::SRC_SHARED_LIMIT:
5742  case AMDGPU::SRC_PRIVATE_BASE_LO:
5743  case AMDGPU::SRC_PRIVATE_BASE:
5744  case AMDGPU::SRC_PRIVATE_LIMIT_LO:
5745  case AMDGPU::SRC_PRIVATE_LIMIT:
5746  return isGFX9Plus();
5747  case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5748  return isGFX9Plus() && !isGFX11Plus();
5749  case AMDGPU::TBA:
5750  case AMDGPU::TBA_LO:
5751  case AMDGPU::TBA_HI:
5752  case AMDGPU::TMA:
5753  case AMDGPU::TMA_LO:
5754  case AMDGPU::TMA_HI:
5755  return !isGFX9Plus();
5756  case AMDGPU::XNACK_MASK:
5757  case AMDGPU::XNACK_MASK_LO:
5758  case AMDGPU::XNACK_MASK_HI:
5759  return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5760  case AMDGPU::SGPR_NULL:
5761  return isGFX10Plus();
5762  default:
5763  break;
5764  }
5765 
5766  if (isCI())
5767  return true;
5768 
5769  if (isSI() || isGFX10Plus()) {
5770  // No flat_scr on SI.
5771  // On GFX10Plus flat scratch is not a valid register operand and can only be
5772  // accessed with s_setreg/s_getreg.
5773  switch (RegNo) {
5774  case AMDGPU::FLAT_SCR:
5775  case AMDGPU::FLAT_SCR_LO:
5776  case AMDGPU::FLAT_SCR_HI:
5777  return false;
5778  default:
5779  return true;
5780  }
5781  }
5782 
5783  // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5784  // SI/CI have.
5785  if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
5786  return hasSGPR102_SGPR103();
5787 
5788  return true;
5789 }
5790 
5792 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5793  OperandMode Mode) {
5794  OperandMatchResultTy ResTy = parseVOPD(Operands);
5795  if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5796  isToken(AsmToken::EndOfStatement))
5797  return ResTy;
5798 
5799  // Try to parse with a custom parser
5800  ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5801 
5802  // If we successfully parsed the operand or if there as an error parsing,
5803  // we are done.
5804  //
5805  // If we are parsing after we reach EndOfStatement then this means we
5806  // are appending default values to the Operands list. This is only done
5807  // by custom parser, so we shouldn't continue on to the generic parsing.
5808  if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5809  isToken(AsmToken::EndOfStatement))
5810  return ResTy;
5811 
5812  SMLoc RBraceLoc;
5813  SMLoc LBraceLoc = getLoc();
5814  if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5815  unsigned Prefix = Operands.size();
5816 
5817  for (;;) {
5818  auto Loc = getLoc();
5819  ResTy = parseReg(Operands);
5820  if (ResTy == MatchOperand_NoMatch)
5821  Error(Loc, "expected a register");
5822  if (ResTy != MatchOperand_Success)
5823  return MatchOperand_ParseFail;
5824 
5825  RBraceLoc = getLoc();
5826  if (trySkipToken(AsmToken::RBrac))
5827  break;
5828 
5829  if (!skipToken(AsmToken::Comma,
5830  "expected a comma or a closing square bracket")) {
5831  return MatchOperand_ParseFail;
5832  }
5833  }
5834 
5835  if (Operands.size() - Prefix > 1) {
5836  Operands.insert(Operands.begin() + Prefix,
5837  AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5838  Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5839  }
5840 
5841  return MatchOperand_Success;
5842  }
5843 
5844  return parseRegOrImm(Operands);
5845 }
5846 
5847 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5848  // Clear any forced encodings from the previous instruction.
5849  setForcedEncodingSize(0);
5850  setForcedDPP(false);
5851  setForcedSDWA(false);
5852 
5853  if (Name.endswith("_e64_dpp")) {
5854  setForcedDPP(true);
5855  setForcedEncodingSize(64);
5856  return Name.substr(0, Name.size() - 8);
5857  } else if (Name.endswith("_e64")) {
5858  setForcedEncodingSize(64);
5859  return Name.substr(0, Name.size() - 4);
5860  } else if (Name.endswith("_e32")) {
5861  setForcedEncodingSize(32);
5862  return Name.substr(0, Name.size() - 4);
5863  } else if (Name.endswith("_dpp")) {
5864  setForcedDPP(true);
5865  return Name.substr(0, Name.size() - 4);
5866  } else if (Name.endswith("_sdwa")) {
5867  setForcedSDWA(true);
5868  return Name.substr(0, Name.size() - 5);
5869  }
5870  return Name;
5871 }
5872 
5873 static void applyMnemonicAliases(StringRef &Mnemonic,
5874  const FeatureBitset &Features,
5875  unsigned VariantID);
5876 
5877 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5878  StringRef Name,
5879  SMLoc NameLoc, OperandVector &Operands) {
5880  // Add the instruction mnemonic
5881  Name = parseMnemonicSuffix(Name);
5882 
5883  // If the target architecture uses MnemonicAlias, call it here to parse
5884  // operands correctly.
5885  applyMnemonicAliases(Name, getAvailableFeatures(), 0);
5886 
5887  Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5888 
5889  bool IsMIMG = Name.startswith("image_");
5890