LLVM  15.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
19 #include "llvm/ADT/APFloat.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/BinaryFormat/ELF.h"
24 #include "llvm/MC/MCAsmInfo.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCExpr.h"
27 #include "llvm/MC/MCInst.h"
32 #include "llvm/MC/MCSymbol.h"
33 #include "llvm/MC/TargetRegistry.h"
36 #include "llvm/Support/Casting.h"
40 
41 using namespace llvm;
42 using namespace llvm::AMDGPU;
43 using namespace llvm::amdhsa;
44 
45 namespace {
46 
47 class AMDGPUAsmParser;
48 
49 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
50 
51 //===----------------------------------------------------------------------===//
52 // Operand
53 //===----------------------------------------------------------------------===//
54 
55 class AMDGPUOperand : public MCParsedAsmOperand {
56  enum KindTy {
57  Token,
58  Immediate,
59  Register,
61  } Kind;
62 
63  SMLoc StartLoc, EndLoc;
64  const AMDGPUAsmParser *AsmParser;
65 
66 public:
67  AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
68  : Kind(Kind_), AsmParser(AsmParser_) {}
69 
70  using Ptr = std::unique_ptr<AMDGPUOperand>;
71 
72  struct Modifiers {
73  bool Abs = false;
74  bool Neg = false;
75  bool Sext = false;
76 
77  bool hasFPModifiers() const { return Abs || Neg; }
78  bool hasIntModifiers() const { return Sext; }
79  bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
80 
81  int64_t getFPModifiersOperand() const {
82  int64_t Operand = 0;
83  Operand |= Abs ? SISrcMods::ABS : 0u;
84  Operand |= Neg ? SISrcMods::NEG : 0u;
85  return Operand;
86  }
87 
88  int64_t getIntModifiersOperand() const {
89  int64_t Operand = 0;
90  Operand |= Sext ? SISrcMods::SEXT : 0u;
91  return Operand;
92  }
93 
94  int64_t getModifiersOperand() const {
95  assert(!(hasFPModifiers() && hasIntModifiers())
96  && "fp and int modifiers should not be used simultaneously");
97  if (hasFPModifiers()) {
98  return getFPModifiersOperand();
99  } else if (hasIntModifiers()) {
100  return getIntModifiersOperand();
101  } else {
102  return 0;
103  }
104  }
105 
106  friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
107  };
108 
109  enum ImmTy {
110  ImmTyNone,
111  ImmTyGDS,
112  ImmTyLDS,
113  ImmTyOffen,
114  ImmTyIdxen,
115  ImmTyAddr64,
116  ImmTyOffset,
117  ImmTyInstOffset,
118  ImmTyOffset0,
119  ImmTyOffset1,
120  ImmTyCPol,
121  ImmTySWZ,
122  ImmTyTFE,
123  ImmTyD16,
124  ImmTyClampSI,
125  ImmTyOModSI,
126  ImmTyDPP8,
127  ImmTyDppCtrl,
128  ImmTyDppRowMask,
129  ImmTyDppBankMask,
130  ImmTyDppBoundCtrl,
131  ImmTyDppFi,
132  ImmTySdwaDstSel,
133  ImmTySdwaSrc0Sel,
134  ImmTySdwaSrc1Sel,
135  ImmTySdwaDstUnused,
136  ImmTyDMask,
137  ImmTyDim,
138  ImmTyUNorm,
139  ImmTyDA,
140  ImmTyR128A16,
141  ImmTyA16,
142  ImmTyLWE,
143  ImmTyExpTgt,
144  ImmTyExpCompr,
145  ImmTyExpVM,
146  ImmTyFORMAT,
147  ImmTyHwreg,
148  ImmTyOff,
149  ImmTySendMsg,
150  ImmTyInterpSlot,
151  ImmTyInterpAttr,
152  ImmTyAttrChan,
153  ImmTyOpSel,
154  ImmTyOpSelHi,
155  ImmTyNegLo,
156  ImmTyNegHi,
157  ImmTySwizzle,
158  ImmTyGprIdxMode,
159  ImmTyHigh,
160  ImmTyBLGP,
161  ImmTyCBSZ,
162  ImmTyABID,
163  ImmTyEndpgm,
164  ImmTyWaitVDST,
165  };
166 
167  enum ImmKindTy {
168  ImmKindTyNone,
169  ImmKindTyLiteral,
170  ImmKindTyConst,
171  };
172 
173 private:
174  struct TokOp {
175  const char *Data;
176  unsigned Length;
177  };
178 
179  struct ImmOp {
180  int64_t Val;
181  ImmTy Type;
182  bool IsFPImm;
183  mutable ImmKindTy Kind;
184  Modifiers Mods;
185  };
186 
187  struct RegOp {
188  unsigned RegNo;
189  Modifiers Mods;
190  };
191 
192  union {
193  TokOp Tok;
194  ImmOp Imm;
195  RegOp Reg;
196  const MCExpr *Expr;
197  };
198 
199 public:
200  bool isToken() const override {
201  if (Kind == Token)
202  return true;
203 
204  // When parsing operands, we can't always tell if something was meant to be
205  // a token, like 'gds', or an expression that references a global variable.
206  // In this case, we assume the string is an expression, and if we need to
207  // interpret is a token, then we treat the symbol name as the token.
208  return isSymbolRefExpr();
209  }
210 
211  bool isSymbolRefExpr() const {
212  return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
213  }
214 
215  bool isImm() const override {
216  return Kind == Immediate;
217  }
218 
219  void setImmKindNone() const {
220  assert(isImm());
221  Imm.Kind = ImmKindTyNone;
222  }
223 
224  void setImmKindLiteral() const {
225  assert(isImm());
226  Imm.Kind = ImmKindTyLiteral;
227  }
228 
229  void setImmKindConst() const {
230  assert(isImm());
231  Imm.Kind = ImmKindTyConst;
232  }
233 
234  bool IsImmKindLiteral() const {
235  return isImm() && Imm.Kind == ImmKindTyLiteral;
236  }
237 
238  bool isImmKindConst() const {
239  return isImm() && Imm.Kind == ImmKindTyConst;
240  }
241 
242  bool isInlinableImm(MVT type) const;
243  bool isLiteralImm(MVT type) const;
244 
245  bool isRegKind() const {
246  return Kind == Register;
247  }
248 
249  bool isReg() const override {
250  return isRegKind() && !hasModifiers();
251  }
252 
253  bool isRegOrInline(unsigned RCID, MVT type) const {
254  return isRegClass(RCID) || isInlinableImm(type);
255  }
256 
257  bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
258  return isRegOrInline(RCID, type) || isLiteralImm(type);
259  }
260 
261  bool isRegOrImmWithInt16InputMods() const {
262  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
263  }
264 
265  bool isRegOrImmWithInt32InputMods() const {
266  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
267  }
268 
269  bool isRegOrImmWithInt64InputMods() const {
270  return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
271  }
272 
273  bool isRegOrImmWithFP16InputMods() const {
274  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
275  }
276 
277  bool isRegOrImmWithFP32InputMods() const {
278  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
279  }
280 
281  bool isRegOrImmWithFP64InputMods() const {
282  return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
283  }
284 
285  bool isVReg() const {
286  return isRegClass(AMDGPU::VGPR_32RegClassID) ||
287  isRegClass(AMDGPU::VReg_64RegClassID) ||
288  isRegClass(AMDGPU::VReg_96RegClassID) ||
289  isRegClass(AMDGPU::VReg_128RegClassID) ||
290  isRegClass(AMDGPU::VReg_160RegClassID) ||
291  isRegClass(AMDGPU::VReg_192RegClassID) ||
292  isRegClass(AMDGPU::VReg_256RegClassID) ||
293  isRegClass(AMDGPU::VReg_512RegClassID) ||
294  isRegClass(AMDGPU::VReg_1024RegClassID);
295  }
296 
297  bool isVReg32() const {
298  return isRegClass(AMDGPU::VGPR_32RegClassID);
299  }
300 
301  bool isVReg32OrOff() const {
302  return isOff() || isVReg32();
303  }
304 
305  bool isNull() const {
306  return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
307  }
308 
309  bool isVRegWithInputMods() const;
310 
311  bool isSDWAOperand(MVT type) const;
312  bool isSDWAFP16Operand() const;
313  bool isSDWAFP32Operand() const;
314  bool isSDWAInt16Operand() const;
315  bool isSDWAInt32Operand() const;
316 
317  bool isImmTy(ImmTy ImmT) const {
318  return isImm() && Imm.Type == ImmT;
319  }
320 
321  bool isImmModifier() const {
322  return isImm() && Imm.Type != ImmTyNone;
323  }
324 
325  bool isClampSI() const { return isImmTy(ImmTyClampSI); }
326  bool isOModSI() const { return isImmTy(ImmTyOModSI); }
327  bool isDMask() const { return isImmTy(ImmTyDMask); }
328  bool isDim() const { return isImmTy(ImmTyDim); }
329  bool isUNorm() const { return isImmTy(ImmTyUNorm); }
330  bool isDA() const { return isImmTy(ImmTyDA); }
331  bool isR128A16() const { return isImmTy(ImmTyR128A16); }
332  bool isGFX10A16() const { return isImmTy(ImmTyA16); }
333  bool isLWE() const { return isImmTy(ImmTyLWE); }
334  bool isOff() const { return isImmTy(ImmTyOff); }
335  bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
336  bool isExpVM() const { return isImmTy(ImmTyExpVM); }
337  bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
338  bool isOffen() const { return isImmTy(ImmTyOffen); }
339  bool isIdxen() const { return isImmTy(ImmTyIdxen); }
340  bool isAddr64() const { return isImmTy(ImmTyAddr64); }
341  bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
342  bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
343  bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
344 
345  bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
346  bool isGDS() const { return isImmTy(ImmTyGDS); }
347  bool isLDS() const { return isImmTy(ImmTyLDS); }
348  bool isCPol() const { return isImmTy(ImmTyCPol); }
349  bool isSWZ() const { return isImmTy(ImmTySWZ); }
350  bool isTFE() const { return isImmTy(ImmTyTFE); }
351  bool isD16() const { return isImmTy(ImmTyD16); }
352  bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
353  bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
354  bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
355  bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
356  bool isFI() const { return isImmTy(ImmTyDppFi); }
357  bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
358  bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
359  bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
360  bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
361  bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
362  bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
363  bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
364  bool isOpSel() const { return isImmTy(ImmTyOpSel); }
365  bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
366  bool isNegLo() const { return isImmTy(ImmTyNegLo); }
367  bool isNegHi() const { return isImmTy(ImmTyNegHi); }
368  bool isHigh() const { return isImmTy(ImmTyHigh); }
369 
370  bool isMod() const {
371  return isClampSI() || isOModSI();
372  }
373 
374  bool isRegOrImm() const {
375  return isReg() || isImm();
376  }
377 
378  bool isRegClass(unsigned RCID) const;
379 
380  bool isInlineValue() const;
381 
382  bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
383  return isRegOrInline(RCID, type) && !hasModifiers();
384  }
385 
386  bool isSCSrcB16() const {
387  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
388  }
389 
390  bool isSCSrcV2B16() const {
391  return isSCSrcB16();
392  }
393 
394  bool isSCSrcB32() const {
395  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
396  }
397 
398  bool isSCSrcB64() const {
399  return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
400  }
401 
402  bool isBoolReg() const;
403 
404  bool isSCSrcF16() const {
405  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
406  }
407 
408  bool isSCSrcV2F16() const {
409  return isSCSrcF16();
410  }
411 
412  bool isSCSrcF32() const {
413  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
414  }
415 
416  bool isSCSrcF64() const {
417  return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
418  }
419 
420  bool isSSrcB32() const {
421  return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
422  }
423 
424  bool isSSrcB16() const {
425  return isSCSrcB16() || isLiteralImm(MVT::i16);
426  }
427 
428  bool isSSrcV2B16() const {
429  llvm_unreachable("cannot happen");
430  return isSSrcB16();
431  }
432 
433  bool isSSrcB64() const {
434  // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
435  // See isVSrc64().
436  return isSCSrcB64() || isLiteralImm(MVT::i64);
437  }
438 
439  bool isSSrcF32() const {
440  return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
441  }
442 
443  bool isSSrcF64() const {
444  return isSCSrcB64() || isLiteralImm(MVT::f64);
445  }
446 
447  bool isSSrcF16() const {
448  return isSCSrcB16() || isLiteralImm(MVT::f16);
449  }
450 
451  bool isSSrcV2F16() const {
452  llvm_unreachable("cannot happen");
453  return isSSrcF16();
454  }
455 
456  bool isSSrcV2FP32() const {
457  llvm_unreachable("cannot happen");
458  return isSSrcF32();
459  }
460 
461  bool isSCSrcV2FP32() const {
462  llvm_unreachable("cannot happen");
463  return isSCSrcF32();
464  }
465 
466  bool isSSrcV2INT32() const {
467  llvm_unreachable("cannot happen");
468  return isSSrcB32();
469  }
470 
471  bool isSCSrcV2INT32() const {
472  llvm_unreachable("cannot happen");
473  return isSCSrcB32();
474  }
475 
476  bool isSSrcOrLdsB32() const {
477  return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
478  isLiteralImm(MVT::i32) || isExpr();
479  }
480 
481  bool isVCSrcB32() const {
482  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
483  }
484 
485  bool isVCSrcB64() const {
486  return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
487  }
488 
489  bool isVCSrcB16() const {
490  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
491  }
492 
493  bool isVCSrcV2B16() const {
494  return isVCSrcB16();
495  }
496 
497  bool isVCSrcF32() const {
498  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
499  }
500 
501  bool isVCSrcF64() const {
502  return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
503  }
504 
505  bool isVCSrcF16() const {
506  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
507  }
508 
509  bool isVCSrcV2F16() const {
510  return isVCSrcF16();
511  }
512 
513  bool isVSrcB32() const {
514  return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
515  }
516 
517  bool isVSrcB64() const {
518  return isVCSrcF64() || isLiteralImm(MVT::i64);
519  }
520 
521  bool isVSrcB16() const {
522  return isVCSrcB16() || isLiteralImm(MVT::i16);
523  }
524 
525  bool isVSrcV2B16() const {
526  return isVSrcB16() || isLiteralImm(MVT::v2i16);
527  }
528 
529  bool isVCSrcV2FP32() const {
530  return isVCSrcF64();
531  }
532 
533  bool isVSrcV2FP32() const {
534  return isVSrcF64() || isLiteralImm(MVT::v2f32);
535  }
536 
537  bool isVCSrcV2INT32() const {
538  return isVCSrcB64();
539  }
540 
541  bool isVSrcV2INT32() const {
542  return isVSrcB64() || isLiteralImm(MVT::v2i32);
543  }
544 
545  bool isVSrcF32() const {
546  return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
547  }
548 
549  bool isVSrcF64() const {
550  return isVCSrcF64() || isLiteralImm(MVT::f64);
551  }
552 
553  bool isVSrcF16() const {
554  return isVCSrcF16() || isLiteralImm(MVT::f16);
555  }
556 
557  bool isVSrcV2F16() const {
558  return isVSrcF16() || isLiteralImm(MVT::v2f16);
559  }
560 
561  bool isVISrcB32() const {
562  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
563  }
564 
565  bool isVISrcB16() const {
566  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
567  }
568 
569  bool isVISrcV2B16() const {
570  return isVISrcB16();
571  }
572 
573  bool isVISrcF32() const {
574  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
575  }
576 
577  bool isVISrcF16() const {
578  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
579  }
580 
581  bool isVISrcV2F16() const {
582  return isVISrcF16() || isVISrcB32();
583  }
584 
585  bool isVISrc_64B64() const {
586  return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
587  }
588 
589  bool isVISrc_64F64() const {
590  return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
591  }
592 
593  bool isVISrc_64V2FP32() const {
594  return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
595  }
596 
597  bool isVISrc_64V2INT32() const {
598  return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
599  }
600 
601  bool isVISrc_256B64() const {
602  return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
603  }
604 
605  bool isVISrc_256F64() const {
606  return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
607  }
608 
609  bool isVISrc_128B16() const {
610  return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
611  }
612 
613  bool isVISrc_128V2B16() const {
614  return isVISrc_128B16();
615  }
616 
617  bool isVISrc_128B32() const {
618  return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
619  }
620 
621  bool isVISrc_128F32() const {
622  return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
623  }
624 
625  bool isVISrc_256V2FP32() const {
626  return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
627  }
628 
629  bool isVISrc_256V2INT32() const {
630  return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
631  }
632 
633  bool isVISrc_512B32() const {
634  return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
635  }
636 
637  bool isVISrc_512B16() const {
638  return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
639  }
640 
641  bool isVISrc_512V2B16() const {
642  return isVISrc_512B16();
643  }
644 
645  bool isVISrc_512F32() const {
646  return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
647  }
648 
649  bool isVISrc_512F16() const {
650  return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
651  }
652 
653  bool isVISrc_512V2F16() const {
654  return isVISrc_512F16() || isVISrc_512B32();
655  }
656 
657  bool isVISrc_1024B32() const {
658  return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
659  }
660 
661  bool isVISrc_1024B16() const {
662  return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
663  }
664 
665  bool isVISrc_1024V2B16() const {
666  return isVISrc_1024B16();
667  }
668 
669  bool isVISrc_1024F32() const {
670  return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
671  }
672 
673  bool isVISrc_1024F16() const {
674  return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
675  }
676 
677  bool isVISrc_1024V2F16() const {
678  return isVISrc_1024F16() || isVISrc_1024B32();
679  }
680 
681  bool isAISrcB32() const {
682  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
683  }
684 
685  bool isAISrcB16() const {
686  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
687  }
688 
689  bool isAISrcV2B16() const {
690  return isAISrcB16();
691  }
692 
693  bool isAISrcF32() const {
694  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
695  }
696 
697  bool isAISrcF16() const {
698  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
699  }
700 
701  bool isAISrcV2F16() const {
702  return isAISrcF16() || isAISrcB32();
703  }
704 
705  bool isAISrc_64B64() const {
706  return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
707  }
708 
709  bool isAISrc_64F64() const {
710  return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
711  }
712 
713  bool isAISrc_128B32() const {
714  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
715  }
716 
717  bool isAISrc_128B16() const {
718  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
719  }
720 
721  bool isAISrc_128V2B16() const {
722  return isAISrc_128B16();
723  }
724 
725  bool isAISrc_128F32() const {
726  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
727  }
728 
729  bool isAISrc_128F16() const {
730  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
731  }
732 
733  bool isAISrc_128V2F16() const {
734  return isAISrc_128F16() || isAISrc_128B32();
735  }
736 
737  bool isVISrc_128F16() const {
738  return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
739  }
740 
741  bool isVISrc_128V2F16() const {
742  return isVISrc_128F16() || isVISrc_128B32();
743  }
744 
745  bool isAISrc_256B64() const {
746  return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
747  }
748 
749  bool isAISrc_256F64() const {
750  return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
751  }
752 
753  bool isAISrc_512B32() const {
754  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
755  }
756 
757  bool isAISrc_512B16() const {
758  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
759  }
760 
761  bool isAISrc_512V2B16() const {
762  return isAISrc_512B16();
763  }
764 
765  bool isAISrc_512F32() const {
766  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
767  }
768 
769  bool isAISrc_512F16() const {
770  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
771  }
772 
773  bool isAISrc_512V2F16() const {
774  return isAISrc_512F16() || isAISrc_512B32();
775  }
776 
777  bool isAISrc_1024B32() const {
778  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
779  }
780 
781  bool isAISrc_1024B16() const {
782  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
783  }
784 
785  bool isAISrc_1024V2B16() const {
786  return isAISrc_1024B16();
787  }
788 
789  bool isAISrc_1024F32() const {
790  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
791  }
792 
793  bool isAISrc_1024F16() const {
794  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
795  }
796 
797  bool isAISrc_1024V2F16() const {
798  return isAISrc_1024F16() || isAISrc_1024B32();
799  }
800 
801  bool isKImmFP32() const {
802  return isLiteralImm(MVT::f32);
803  }
804 
805  bool isKImmFP16() const {
806  return isLiteralImm(MVT::f16);
807  }
808 
809  bool isMem() const override {
810  return false;
811  }
812 
813  bool isExpr() const {
814  return Kind == Expression;
815  }
816 
817  bool isSoppBrTarget() const {
818  return isExpr() || isImm();
819  }
820 
821  bool isSWaitCnt() const;
822  bool isDepCtr() const;
823  bool isSDelayAlu() const;
824  bool isHwreg() const;
825  bool isSendMsg() const;
826  bool isSwizzle() const;
827  bool isSMRDOffset8() const;
828  bool isSMEMOffset() const;
829  bool isSMRDLiteralOffset() const;
830  bool isDPP8() const;
831  bool isDPPCtrl() const;
832  bool isBLGP() const;
833  bool isCBSZ() const;
834  bool isABID() const;
835  bool isGPRIdxMode() const;
836  bool isS16Imm() const;
837  bool isU16Imm() const;
838  bool isEndpgm() const;
839  bool isWaitVDST() const;
840 
841  StringRef getExpressionAsToken() const {
842  assert(isExpr());
843  const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
844  return S->getSymbol().getName();
845  }
846 
847  StringRef getToken() const {
848  assert(isToken());
849 
850  if (Kind == Expression)
851  return getExpressionAsToken();
852 
853  return StringRef(Tok.Data, Tok.Length);
854  }
855 
856  int64_t getImm() const {
857  assert(isImm());
858  return Imm.Val;
859  }
860 
861  void setImm(int64_t Val) {
862  assert(isImm());
863  Imm.Val = Val;
864  }
865 
866  ImmTy getImmTy() const {
867  assert(isImm());
868  return Imm.Type;
869  }
870 
871  unsigned getReg() const override {
872  assert(isRegKind());
873  return Reg.RegNo;
874  }
875 
876  SMLoc getStartLoc() const override {
877  return StartLoc;
878  }
879 
880  SMLoc getEndLoc() const override {
881  return EndLoc;
882  }
883 
884  SMRange getLocRange() const {
885  return SMRange(StartLoc, EndLoc);
886  }
887 
888  Modifiers getModifiers() const {
889  assert(isRegKind() || isImmTy(ImmTyNone));
890  return isRegKind() ? Reg.Mods : Imm.Mods;
891  }
892 
893  void setModifiers(Modifiers Mods) {
894  assert(isRegKind() || isImmTy(ImmTyNone));
895  if (isRegKind())
896  Reg.Mods = Mods;
897  else
898  Imm.Mods = Mods;
899  }
900 
901  bool hasModifiers() const {
902  return getModifiers().hasModifiers();
903  }
904 
905  bool hasFPModifiers() const {
906  return getModifiers().hasFPModifiers();
907  }
908 
909  bool hasIntModifiers() const {
910  return getModifiers().hasIntModifiers();
911  }
912 
913  uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
914 
915  void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
916 
917  void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
918 
919  template <unsigned Bitwidth>
920  void addKImmFPOperands(MCInst &Inst, unsigned N) const;
921 
922  void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
923  addKImmFPOperands<16>(Inst, N);
924  }
925 
926  void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
927  addKImmFPOperands<32>(Inst, N);
928  }
929 
930  void addRegOperands(MCInst &Inst, unsigned N) const;
931 
932  void addBoolRegOperands(MCInst &Inst, unsigned N) const {
933  addRegOperands(Inst, N);
934  }
935 
936  void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
937  if (isRegKind())
938  addRegOperands(Inst, N);
939  else if (isExpr())
940  Inst.addOperand(MCOperand::createExpr(Expr));
941  else
942  addImmOperands(Inst, N);
943  }
944 
945  void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
946  Modifiers Mods = getModifiers();
947  Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
948  if (isRegKind()) {
949  addRegOperands(Inst, N);
950  } else {
951  addImmOperands(Inst, N, false);
952  }
953  }
954 
955  void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
956  assert(!hasIntModifiers());
957  addRegOrImmWithInputModsOperands(Inst, N);
958  }
959 
960  void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
961  assert(!hasFPModifiers());
962  addRegOrImmWithInputModsOperands(Inst, N);
963  }
964 
965  void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
966  Modifiers Mods = getModifiers();
967  Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
968  assert(isRegKind());
969  addRegOperands(Inst, N);
970  }
971 
972  void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
973  assert(!hasIntModifiers());
974  addRegWithInputModsOperands(Inst, N);
975  }
976 
977  void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
978  assert(!hasFPModifiers());
979  addRegWithInputModsOperands(Inst, N);
980  }
981 
982  void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
983  if (isImm())
984  addImmOperands(Inst, N);
985  else {
986  assert(isExpr());
987  Inst.addOperand(MCOperand::createExpr(Expr));
988  }
989  }
990 
991  static void printImmTy(raw_ostream& OS, ImmTy Type) {
992  switch (Type) {
993  case ImmTyNone: OS << "None"; break;
994  case ImmTyGDS: OS << "GDS"; break;
995  case ImmTyLDS: OS << "LDS"; break;
996  case ImmTyOffen: OS << "Offen"; break;
997  case ImmTyIdxen: OS << "Idxen"; break;
998  case ImmTyAddr64: OS << "Addr64"; break;
999  case ImmTyOffset: OS << "Offset"; break;
1000  case ImmTyInstOffset: OS << "InstOffset"; break;
1001  case ImmTyOffset0: OS << "Offset0"; break;
1002  case ImmTyOffset1: OS << "Offset1"; break;
1003  case ImmTyCPol: OS << "CPol"; break;
1004  case ImmTySWZ: OS << "SWZ"; break;
1005  case ImmTyTFE: OS << "TFE"; break;
1006  case ImmTyD16: OS << "D16"; break;
1007  case ImmTyFORMAT: OS << "FORMAT"; break;
1008  case ImmTyClampSI: OS << "ClampSI"; break;
1009  case ImmTyOModSI: OS << "OModSI"; break;
1010  case ImmTyDPP8: OS << "DPP8"; break;
1011  case ImmTyDppCtrl: OS << "DppCtrl"; break;
1012  case ImmTyDppRowMask: OS << "DppRowMask"; break;
1013  case ImmTyDppBankMask: OS << "DppBankMask"; break;
1014  case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1015  case ImmTyDppFi: OS << "FI"; break;
1016  case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1017  case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1018  case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1019  case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1020  case ImmTyDMask: OS << "DMask"; break;
1021  case ImmTyDim: OS << "Dim"; break;
1022  case ImmTyUNorm: OS << "UNorm"; break;
1023  case ImmTyDA: OS << "DA"; break;
1024  case ImmTyR128A16: OS << "R128A16"; break;
1025  case ImmTyA16: OS << "A16"; break;
1026  case ImmTyLWE: OS << "LWE"; break;
1027  case ImmTyOff: OS << "Off"; break;
1028  case ImmTyExpTgt: OS << "ExpTgt"; break;
1029  case ImmTyExpCompr: OS << "ExpCompr"; break;
1030  case ImmTyExpVM: OS << "ExpVM"; break;
1031  case ImmTyHwreg: OS << "Hwreg"; break;
1032  case ImmTySendMsg: OS << "SendMsg"; break;
1033  case ImmTyInterpSlot: OS << "InterpSlot"; break;
1034  case ImmTyInterpAttr: OS << "InterpAttr"; break;
1035  case ImmTyAttrChan: OS << "AttrChan"; break;
1036  case ImmTyOpSel: OS << "OpSel"; break;
1037  case ImmTyOpSelHi: OS << "OpSelHi"; break;
1038  case ImmTyNegLo: OS << "NegLo"; break;
1039  case ImmTyNegHi: OS << "NegHi"; break;
1040  case ImmTySwizzle: OS << "Swizzle"; break;
1041  case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1042  case ImmTyHigh: OS << "High"; break;
1043  case ImmTyBLGP: OS << "BLGP"; break;
1044  case ImmTyCBSZ: OS << "CBSZ"; break;
1045  case ImmTyABID: OS << "ABID"; break;
1046  case ImmTyEndpgm: OS << "Endpgm"; break;
1047  case ImmTyWaitVDST: OS << "WaitVDST"; break;
1048  }
1049  }
1050 
1051  void print(raw_ostream &OS) const override {
1052  switch (Kind) {
1053  case Register:
1054  OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1055  break;
1056  case Immediate:
1057  OS << '<' << getImm();
1058  if (getImmTy() != ImmTyNone) {
1059  OS << " type: "; printImmTy(OS, getImmTy());
1060  }
1061  OS << " mods: " << Imm.Mods << '>';
1062  break;
1063  case Token:
1064  OS << '\'' << getToken() << '\'';
1065  break;
1066  case Expression:
1067  OS << "<expr " << *Expr << '>';
1068  break;
1069  }
1070  }
1071 
1072  static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1073  int64_t Val, SMLoc Loc,
1074  ImmTy Type = ImmTyNone,
1075  bool IsFPImm = false) {
1076  auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1077  Op->Imm.Val = Val;
1078  Op->Imm.IsFPImm = IsFPImm;
1079  Op->Imm.Kind = ImmKindTyNone;
1080  Op->Imm.Type = Type;
1081  Op->Imm.Mods = Modifiers();
1082  Op->StartLoc = Loc;
1083  Op->EndLoc = Loc;
1084  return Op;
1085  }
1086 
1087  static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1088  StringRef Str, SMLoc Loc,
1089  bool HasExplicitEncodingSize = true) {
1090  auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1091  Res->Tok.Data = Str.data();
1092  Res->Tok.Length = Str.size();
1093  Res->StartLoc = Loc;
1094  Res->EndLoc = Loc;
1095  return Res;
1096  }
1097 
1098  static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1099  unsigned RegNo, SMLoc S,
1100  SMLoc E) {
1101  auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1102  Op->Reg.RegNo = RegNo;
1103  Op->Reg.Mods = Modifiers();
1104  Op->StartLoc = S;
1105  Op->EndLoc = E;
1106  return Op;
1107  }
1108 
1109  static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1110  const class MCExpr *Expr, SMLoc S) {
1111  auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1112  Op->Expr = Expr;
1113  Op->StartLoc = S;
1114  Op->EndLoc = S;
1115  return Op;
1116  }
1117 };
1118 
1119 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1120  OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1121  return OS;
1122 }
1123 
1124 //===----------------------------------------------------------------------===//
1125 // AsmParser
1126 //===----------------------------------------------------------------------===//
1127 
1128 // Holds info related to the current kernel, e.g. count of SGPRs used.
1129 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1130 // .amdgpu_hsa_kernel or at EOF.
1131 class KernelScopeInfo {
1132  int SgprIndexUnusedMin = -1;
1133  int VgprIndexUnusedMin = -1;
1134  int AgprIndexUnusedMin = -1;
1135  MCContext *Ctx = nullptr;
1136  MCSubtargetInfo const *MSTI = nullptr;
1137 
1138  void usesSgprAt(int i) {
1139  if (i >= SgprIndexUnusedMin) {
1140  SgprIndexUnusedMin = ++i;
1141  if (Ctx) {
1142  MCSymbol* const Sym =
1143  Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1144  Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1145  }
1146  }
1147  }
1148 
1149  void usesVgprAt(int i) {
1150  if (i >= VgprIndexUnusedMin) {
1151  VgprIndexUnusedMin = ++i;
1152  if (Ctx) {
1153  MCSymbol* const Sym =
1154  Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1155  int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1156  VgprIndexUnusedMin);
1157  Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1158  }
1159  }
1160  }
1161 
1162  void usesAgprAt(int i) {
1163  // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1164  if (!hasMAIInsts(*MSTI))
1165  return;
1166 
1167  if (i >= AgprIndexUnusedMin) {
1168  AgprIndexUnusedMin = ++i;
1169  if (Ctx) {
1170  MCSymbol* const Sym =
1171  Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1172  Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1173 
1174  // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1175  MCSymbol* const vSym =
1176  Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1177  int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1178  VgprIndexUnusedMin);
1179  vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1180  }
1181  }
1182  }
1183 
1184 public:
1185  KernelScopeInfo() = default;
1186 
1187  void initialize(MCContext &Context) {
1188  Ctx = &Context;
1189  MSTI = Ctx->getSubtargetInfo();
1190 
1191  usesSgprAt(SgprIndexUnusedMin = -1);
1192  usesVgprAt(VgprIndexUnusedMin = -1);
1193  if (hasMAIInsts(*MSTI)) {
1194  usesAgprAt(AgprIndexUnusedMin = -1);
1195  }
1196  }
1197 
1198  void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1199  unsigned RegWidth) {
1200  switch (RegKind) {
1201  case IS_SGPR:
1202  usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1203  break;
1204  case IS_AGPR:
1205  usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1206  break;
1207  case IS_VGPR:
1208  usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1209  break;
1210  default:
1211  break;
1212  }
1213  }
1214 };
1215 
1216 class AMDGPUAsmParser : public MCTargetAsmParser {
1217  MCAsmParser &Parser;
1218 
1219  // Number of extra operands parsed after the first optional operand.
1220  // This may be necessary to skip hardcoded mandatory operands.
1221  static const unsigned MAX_OPR_LOOKAHEAD = 8;
1222 
1223  unsigned ForcedEncodingSize = 0;
1224  bool ForcedDPP = false;
1225  bool ForcedSDWA = false;
1226  KernelScopeInfo KernelScope;
1227  unsigned CPolSeen;
1228 
1229  /// @name Auto-generated Match Functions
1230  /// {
1231 
1232 #define GET_ASSEMBLER_HEADER
1233 #include "AMDGPUGenAsmMatcher.inc"
1234 
1235  /// }
1236 
1237 private:
1238  bool ParseAsAbsoluteExpression(uint32_t &Ret);
1239  bool OutOfRangeError(SMRange Range);
1240  /// Calculate VGPR/SGPR blocks required for given target, reserved
1241  /// registers, and user-specified NextFreeXGPR values.
1242  ///
1243  /// \param Features [in] Target features, used for bug corrections.
1244  /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1245  /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1246  /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1247  /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1248  /// descriptor field, if valid.
1249  /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1250  /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1251  /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1252  /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1253  /// \param VGPRBlocks [out] Result VGPR block count.
1254  /// \param SGPRBlocks [out] Result SGPR block count.
1255  bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1256  bool FlatScrUsed, bool XNACKUsed,
1257  Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1258  SMRange VGPRRange, unsigned NextFreeSGPR,
1259  SMRange SGPRRange, unsigned &VGPRBlocks,
1260  unsigned &SGPRBlocks);
1261  bool ParseDirectiveAMDGCNTarget();
1262  bool ParseDirectiveAMDHSAKernel();
1263  bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1264  bool ParseDirectiveHSACodeObjectVersion();
1265  bool ParseDirectiveHSACodeObjectISA();
1266  bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1267  bool ParseDirectiveAMDKernelCodeT();
1268  // TODO: Possibly make subtargetHasRegister const.
1269  bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1270  bool ParseDirectiveAMDGPUHsaKernel();
1271 
1272  bool ParseDirectiveISAVersion();
1273  bool ParseDirectiveHSAMetadata();
1274  bool ParseDirectivePALMetadataBegin();
1275  bool ParseDirectivePALMetadata();
1276  bool ParseDirectiveAMDGPULDS();
1277 
1278  /// Common code to parse out a block of text (typically YAML) between start and
1279  /// end directives.
1280  bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1281  const char *AssemblerDirectiveEnd,
1282  std::string &CollectString);
1283 
1284  bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1285  RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1286  bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1287  unsigned &RegNum, unsigned &RegWidth,
1288  bool RestoreOnFailure = false);
1289  bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1290  unsigned &RegNum, unsigned &RegWidth,
1291  SmallVectorImpl<AsmToken> &Tokens);
1292  unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1293  unsigned &RegWidth,
1294  SmallVectorImpl<AsmToken> &Tokens);
1295  unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1296  unsigned &RegWidth,
1297  SmallVectorImpl<AsmToken> &Tokens);
1298  unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1299  unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1300  bool ParseRegRange(unsigned& Num, unsigned& Width);
1301  unsigned getRegularReg(RegisterKind RegKind,
1302  unsigned RegNum,
1303  unsigned RegWidth,
1304  SMLoc Loc);
1305 
1306  bool isRegister();
1307  bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1308  Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1309  void initializeGprCountSymbol(RegisterKind RegKind);
1310  bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1311  unsigned RegWidth);
1312  void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1313  bool IsAtomic, bool IsLds = false);
1314  void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1315  bool IsGdsHardcoded);
1316 
1317 public:
1318  enum AMDGPUMatchResultTy {
1319  Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1320  };
1321  enum OperandMode {
1322  OperandMode_Default,
1323  OperandMode_NSA,
1324  };
1325 
1326  using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1327 
1328  AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1329  const MCInstrInfo &MII,
1330  const MCTargetOptions &Options)
1331  : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1332  MCAsmParserExtension::Initialize(Parser);
1333 
1334  if (getFeatureBits().none()) {
1335  // Set default features.
1336  copySTI().ToggleFeature("southern-islands");
1337  }
1338 
1339  setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1340 
1341  {
1342  // TODO: make those pre-defined variables read-only.
1343  // Currently there is none suitable machinery in the core llvm-mc for this.
1344  // MCSymbol::isRedefinable is intended for another purpose, and
1345  // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1346  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1347  MCContext &Ctx = getContext();
1348  if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1349  MCSymbol *Sym =
1350  Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1351  Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1352  Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1353  Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1354  Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1355  Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1356  } else {
1357  MCSymbol *Sym =
1358  Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1359  Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1360  Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1361  Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1362  Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1363  Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1364  }
1365  if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1366  initializeGprCountSymbol(IS_VGPR);
1367  initializeGprCountSymbol(IS_SGPR);
1368  } else
1369  KernelScope.initialize(getContext());
1370  }
1371  }
1372 
1373  bool hasMIMG_R128() const {
1374  return AMDGPU::hasMIMG_R128(getSTI());
1375  }
1376 
1377  bool hasPackedD16() const {
1378  return AMDGPU::hasPackedD16(getSTI());
1379  }
1380 
1381  bool hasGFX10A16() const {
1382  return AMDGPU::hasGFX10A16(getSTI());
1383  }
1384 
1385  bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1386 
1387  bool isSI() const {
1388  return AMDGPU::isSI(getSTI());
1389  }
1390 
1391  bool isCI() const {
1392  return AMDGPU::isCI(getSTI());
1393  }
1394 
1395  bool isVI() const {
1396  return AMDGPU::isVI(getSTI());
1397  }
1398 
1399  bool isGFX9() const {
1400  return AMDGPU::isGFX9(getSTI());
1401  }
1402 
1403  // TODO: isGFX90A is also true for GFX940. We need to clean it.
1404  bool isGFX90A() const {
1405  return AMDGPU::isGFX90A(getSTI());
1406  }
1407 
1408  bool isGFX940() const {
1409  return AMDGPU::isGFX940(getSTI());
1410  }
1411 
1412  bool isGFX9Plus() const {
1413  return AMDGPU::isGFX9Plus(getSTI());
1414  }
1415 
1416  bool isGFX10() const {
1417  return AMDGPU::isGFX10(getSTI());
1418  }
1419 
1420  bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1421 
1422  bool isGFX11() const {
1423  return AMDGPU::isGFX11(getSTI());
1424  }
1425 
1426  bool isGFX11Plus() const {
1427  return AMDGPU::isGFX11Plus(getSTI());
1428  }
1429 
1430  bool isGFX10_BEncoding() const {
1431  return AMDGPU::isGFX10_BEncoding(getSTI());
1432  }
1433 
1434  bool hasInv2PiInlineImm() const {
1435  return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1436  }
1437 
1438  bool hasFlatOffsets() const {
1439  return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1440  }
1441 
1442  bool hasArchitectedFlatScratch() const {
1443  return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1444  }
1445 
1446  bool hasSGPR102_SGPR103() const {
1447  return !isVI() && !isGFX9();
1448  }
1449 
1450  bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1451 
1452  bool hasIntClamp() const {
1453  return getFeatureBits()[AMDGPU::FeatureIntClamp];
1454  }
1455 
1456  AMDGPUTargetStreamer &getTargetStreamer() {
1457  MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1458  return static_cast<AMDGPUTargetStreamer &>(TS);
1459  }
1460 
1461  const MCRegisterInfo *getMRI() const {
1462  // We need this const_cast because for some reason getContext() is not const
1463  // in MCAsmParser.
1464  return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1465  }
1466 
1467  const MCInstrInfo *getMII() const {
1468  return &MII;
1469  }
1470 
1471  const FeatureBitset &getFeatureBits() const {
1472  return getSTI().getFeatureBits();
1473  }
1474 
1475  void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1476  void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1477  void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1478 
1479  unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1480  bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1481  bool isForcedDPP() const { return ForcedDPP; }
1482  bool isForcedSDWA() const { return ForcedSDWA; }
1483  ArrayRef<unsigned> getMatchedVariants() const;
1484  StringRef getMatchedVariantName() const;
1485 
1486  std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1487  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1488  bool RestoreOnFailure);
1489  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1490  OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1491  SMLoc &EndLoc) override;
1492  unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1493  unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1494  unsigned Kind) override;
1495  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1496  OperandVector &Operands, MCStreamer &Out,
1497  uint64_t &ErrorInfo,
1498  bool MatchingInlineAsm) override;
1499  bool ParseDirective(AsmToken DirectiveID) override;
1500  OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1501  OperandMode Mode = OperandMode_Default);
1502  StringRef parseMnemonicSuffix(StringRef Name);
1503  bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1504  SMLoc NameLoc, OperandVector &Operands) override;
1505  //bool ProcessInstruction(MCInst &Inst);
1506 
1507  OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1508 
1509  OperandMatchResultTy
1510  parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1511  AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1512  bool (*ConvertResult)(int64_t &) = nullptr);
1513 
1514  OperandMatchResultTy
1515  parseOperandArrayWithPrefix(const char *Prefix,
1516  OperandVector &Operands,
1517  AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1518  bool (*ConvertResult)(int64_t&) = nullptr);
1519 
1520  OperandMatchResultTy
1521  parseNamedBit(StringRef Name, OperandVector &Operands,
1522  AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1523  OperandMatchResultTy parseCPol(OperandVector &Operands);
1524  OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1525  StringRef &Value,
1526  SMLoc &StringLoc);
1527 
1528  bool isModifier();
1529  bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1530  bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1531  bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1532  bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1533  bool parseSP3NegModifier();
1534  OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1535  OperandMatchResultTy parseReg(OperandVector &Operands);
1536  OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1537  OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1538  OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1539  OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1540  OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1541  OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1542  OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1543  OperandMatchResultTy parseUfmt(int64_t &Format);
1544  OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1545  OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1546  OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1547  OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1548  OperandMatchResultTy parseNumericFormat(int64_t &Format);
1549  bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1550  bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1551 
1552  void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1553  void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1554  void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1555  void cvtExp(MCInst &Inst, const OperandVector &Operands);
1556 
1557  bool parseCnt(int64_t &IntVal);
1558  OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1559 
1560  bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1561  void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1562  OperandMatchResultTy parseDepCtrOps(OperandVector &Operands);
1563 
1564  bool parseDelay(int64_t &Delay);
1565  OperandMatchResultTy parseSDelayAluOps(OperandVector &Operands);
1566 
1567  OperandMatchResultTy parseHwreg(OperandVector &Operands);
1568 
1569 private:
1570  struct OperandInfoTy {
1571  SMLoc Loc;
1572  int64_t Id;
1573  bool IsSymbolic = false;
1574  bool IsDefined = false;
1575 
1576  OperandInfoTy(int64_t Id_) : Id(Id_) {}
1577  };
1578 
1579  bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1580  bool validateSendMsg(const OperandInfoTy &Msg,
1581  const OperandInfoTy &Op,
1582  const OperandInfoTy &Stream);
1583 
1584  bool parseHwregBody(OperandInfoTy &HwReg,
1585  OperandInfoTy &Offset,
1586  OperandInfoTy &Width);
1587  bool validateHwreg(const OperandInfoTy &HwReg,
1588  const OperandInfoTy &Offset,
1589  const OperandInfoTy &Width);
1590 
1591  SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1592  SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1593  SMLoc getBLGPLoc(const OperandVector &Operands) const;
1594 
1595  SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1596  const OperandVector &Operands) const;
1597  SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1598  SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1599  SMLoc getLitLoc(const OperandVector &Operands) const;
1600  SMLoc getConstLoc(const OperandVector &Operands) const;
1601 
1602  bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1603  bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1604  bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1605  bool validateSOPLiteral(const MCInst &Inst) const;
1606  bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1607  bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1608  bool validateIntClampSupported(const MCInst &Inst);
1609  bool validateMIMGAtomicDMask(const MCInst &Inst);
1610  bool validateMIMGGatherDMask(const MCInst &Inst);
1611  bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1612  Optional<StringRef> validateMIMGDataSize(const MCInst &Inst);
1613  bool validateMIMGAddrSize(const MCInst &Inst);
1614  bool validateMIMGD16(const MCInst &Inst);
1615  bool validateMIMGDim(const MCInst &Inst);
1616  bool validateMIMGMSAA(const MCInst &Inst);
1617  bool validateOpSel(const MCInst &Inst);
1618  bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1619  bool validateVccOperand(unsigned Reg) const;
1620  bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1621  bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1622  bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1623  bool validateAGPRLdSt(const MCInst &Inst) const;
1624  bool validateVGPRAlign(const MCInst &Inst) const;
1625  bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1626  bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1627  bool validateDivScale(const MCInst &Inst);
1628  bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1629  const SMLoc &IDLoc);
1630  bool validateFlatLdsDMA(const MCInst &Inst, const OperandVector &Operands,
1631  const SMLoc &IDLoc);
1632  Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1633  unsigned getConstantBusLimit(unsigned Opcode) const;
1634  bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1635  bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1636  unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1637 
1638  bool isSupportedMnemo(StringRef Mnemo,
1639  const FeatureBitset &FBS);
1640  bool isSupportedMnemo(StringRef Mnemo,
1641  const FeatureBitset &FBS,
1642  ArrayRef<unsigned> Variants);
1643  bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1644 
1645  bool isId(const StringRef Id) const;
1646  bool isId(const AsmToken &Token, const StringRef Id) const;
1647  bool isToken(const AsmToken::TokenKind Kind) const;
1648  bool trySkipId(const StringRef Id);
1649  bool trySkipId(const StringRef Pref, const StringRef Id);
1650  bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1651  bool trySkipToken(const AsmToken::TokenKind Kind);
1652  bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1653  bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1654  bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1655 
1656  void peekTokens(MutableArrayRef<AsmToken> Tokens);
1657  AsmToken::TokenKind getTokenKind() const;
1658  bool parseExpr(int64_t &Imm, StringRef Expected = "");
1659  bool parseExpr(OperandVector &Operands);
1660  StringRef getTokenStr() const;
1661  AsmToken peekToken();
1662  AsmToken getToken() const;
1663  SMLoc getLoc() const;
1664  void lex();
1665 
1666 public:
1667  void onBeginOfFile() override;
1668 
1669  OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1670  OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1671 
1672  OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1673  OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1674  OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1675  OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1676  OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1677  OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1678 
1679  bool parseSwizzleOperand(int64_t &Op,
1680  const unsigned MinVal,
1681  const unsigned MaxVal,
1682  const StringRef ErrMsg,
1683  SMLoc &Loc);
1684  bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1685  const unsigned MinVal,
1686  const unsigned MaxVal,
1687  const StringRef ErrMsg);
1688  OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1689  bool parseSwizzleOffset(int64_t &Imm);
1690  bool parseSwizzleMacro(int64_t &Imm);
1691  bool parseSwizzleQuadPerm(int64_t &Imm);
1692  bool parseSwizzleBitmaskPerm(int64_t &Imm);
1693  bool parseSwizzleBroadcast(int64_t &Imm);
1694  bool parseSwizzleSwap(int64_t &Imm);
1695  bool parseSwizzleReverse(int64_t &Imm);
1696 
1697  OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1698  int64_t parseGPRIdxMacro();
1699 
1700  void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1701  void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1702  void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1703  void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1704 
1705  AMDGPUOperand::Ptr defaultCPol() const;
1706 
1707  AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1708  AMDGPUOperand::Ptr defaultSMEMOffset() const;
1709  AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1710  AMDGPUOperand::Ptr defaultFlatOffset() const;
1711 
1712  OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1713 
1714  void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1715  OptionalImmIndexMap &OptionalIdx);
1716  void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1717  void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1718  void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1719  void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1720  OptionalImmIndexMap &OptionalIdx);
1721 
1722  void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1723 
1724  void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1725  bool IsAtomic = false);
1726  void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1727  void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1728 
1729  void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1730 
1731  bool parseDimId(unsigned &Encoding);
1732  OperandMatchResultTy parseDim(OperandVector &Operands);
1733  OperandMatchResultTy parseDPP8(OperandVector &Operands);
1734  OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1735  bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1736  int64_t parseDPPCtrlSel(StringRef Ctrl);
1737  int64_t parseDPPCtrlPerm();
1738  AMDGPUOperand::Ptr defaultRowMask() const;
1739  AMDGPUOperand::Ptr defaultBankMask() const;
1740  AMDGPUOperand::Ptr defaultBoundCtrl() const;
1741  AMDGPUOperand::Ptr defaultFI() const;
1742  void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1743  void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1744 
1745  OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1746  AMDGPUOperand::ImmTy Type);
1747  OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1748  void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1749  void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1750  void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1751  void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1752  void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1753  void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1754  uint64_t BasicInstType,
1755  bool SkipDstVcc = false,
1756  bool SkipSrcVcc = false);
1757 
1758  AMDGPUOperand::Ptr defaultBLGP() const;
1759  AMDGPUOperand::Ptr defaultCBSZ() const;
1760  AMDGPUOperand::Ptr defaultABID() const;
1761 
1762  OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1763  AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1764 
1765  OperandMatchResultTy parseWaitVDST(OperandVector &Operands);
1766  AMDGPUOperand::Ptr defaultWaitVDST() const;
1767 };
1768 
1769 struct OptionalOperand {
1770  const char *Name;
1771  AMDGPUOperand::ImmTy Type;
1772  bool IsBit;
1773  bool (*ConvertResult)(int64_t&);
1774 };
1775 
1776 } // end anonymous namespace
1777 
1778 // May be called with integer type with equivalent bitwidth.
1779 static const fltSemantics *getFltSemantics(unsigned Size) {
1780  switch (Size) {
1781  case 4:
1782  return &APFloat::IEEEsingle();
1783  case 8:
1784  return &APFloat::IEEEdouble();
1785  case 2:
1786  return &APFloat::IEEEhalf();
1787  default:
1788  llvm_unreachable("unsupported fp type");
1789  }
1790 }
1791 
1792 static const fltSemantics *getFltSemantics(MVT VT) {
1793  return getFltSemantics(VT.getSizeInBits() / 8);
1794 }
1795 
1796 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1797  switch (OperandType) {
1798  case AMDGPU::OPERAND_REG_IMM_INT32:
1799  case AMDGPU::OPERAND_REG_IMM_FP32:
1800  case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1801  case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1802  case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1803  case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1804  case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1805  case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1806  case AMDGPU::OPERAND_REG_IMM_V2FP32:
1807  case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1808  case AMDGPU::OPERAND_REG_IMM_V2INT32:
1809  case AMDGPU::OPERAND_KIMM32:
1810  return &APFloat::IEEEsingle();
1811  case AMDGPU::OPERAND_REG_IMM_INT64:
1812  case AMDGPU::OPERAND_REG_IMM_FP64:
1813  case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1814  case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1815  case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1816  return &APFloat::IEEEdouble();
1817  case AMDGPU::OPERAND_REG_IMM_INT16:
1818  case AMDGPU::OPERAND_REG_IMM_FP16:
1819  case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1820  case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1821  case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1822  case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1823  case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1824  case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1825  case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1826  case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1827  case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1828  case AMDGPU::OPERAND_REG_IMM_V2INT16:
1829  case AMDGPU::OPERAND_REG_IMM_V2FP16:
1830  case AMDGPU::OPERAND_KIMM16:
1831  return &APFloat::IEEEhalf();
1832  default:
1833  llvm_unreachable("unsupported fp type");
1834  }
1835 }
1836 
1837 //===----------------------------------------------------------------------===//
1838 // Operand
1839 //===----------------------------------------------------------------------===//
1840 
1841 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1842  bool Lost;
1843 
1844  // Convert literal to single precision
1845  APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1846  APFloat::rmNearestTiesToEven,
1847  &Lost);
1848  // We allow precision lost but not overflow or underflow
1849  if (Status != APFloat::opOK &&
1850  Lost &&
1851  ((Status & APFloat::opOverflow) != 0 ||
1852  (Status & APFloat::opUnderflow) != 0)) {
1853  return false;
1854  }
1855 
1856  return true;
1857 }
1858 
1859 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1860  return isUIntN(Size, Val) || isIntN(Size, Val);
1861 }
1862 
1863 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1864  if (VT.getScalarType() == MVT::i16) {
1865  // FP immediate values are broken.
1866  return isInlinableIntLiteral(Val);
1867  }
1868 
1869  // f16/v2f16 operands work correctly for all values.
1870  return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1871 }
1872 
1873 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1874 
1875  // This is a hack to enable named inline values like
1876  // shared_base with both 32-bit and 64-bit operands.
1877  // Note that these values are defined as
1878  // 32-bit operands only.
1879  if (isInlineValue()) {
1880  return true;
1881  }
1882 
1883  if (!isImmTy(ImmTyNone)) {
1884  // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1885  return false;
1886  }
1887  // TODO: We should avoid using host float here. It would be better to
1888  // check the float bit values which is what a few other places do.
1889  // We've had bot failures before due to weird NaN support on mips hosts.
1890 
1891  APInt Literal(64, Imm.Val);
1892 
1893  if (Imm.IsFPImm) { // We got fp literal token
1894  if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1895  return AMDGPU::isInlinableLiteral64(Imm.Val,
1896  AsmParser->hasInv2PiInlineImm());
1897  }
1898 
1899  APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1900  if (!canLosslesslyConvertToFPType(FPLiteral, type))
1901  return false;
1902 
1903  if (type.getScalarSizeInBits() == 16) {
1904  return isInlineableLiteralOp16(
1905  static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1906  type, AsmParser->hasInv2PiInlineImm());
1907  }
1908 
1909  // Check if single precision literal is inlinable
1911  static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1912  AsmParser->hasInv2PiInlineImm());
1913  }
1914 
1915  // We got int literal token.
1916  if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1917  return AMDGPU::isInlinableLiteral64(Imm.Val,
1918  AsmParser->hasInv2PiInlineImm());
1919  }
1920 
1921  if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1922  return false;
1923  }
1924 
1925  if (type.getScalarSizeInBits() == 16) {
1926  return isInlineableLiteralOp16(
1927  static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1928  type, AsmParser->hasInv2PiInlineImm());
1929  }
1930 
1932  static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1933  AsmParser->hasInv2PiInlineImm());
1934 }
1935 
1936 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1937  // Check that this immediate can be added as literal
1938  if (!isImmTy(ImmTyNone)) {
1939  return false;
1940  }
1941 
1942  if (!Imm.IsFPImm) {
1943  // We got int literal token.
1944 
1945  if (type == MVT::f64 && hasFPModifiers()) {
1946  // Cannot apply fp modifiers to int literals preserving the same semantics
1947  // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1948  // disable these cases.
1949  return false;
1950  }
1951 
1952  unsigned Size = type.getSizeInBits();
1953  if (Size == 64)
1954  Size = 32;
1955 
1956  // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1957  // types.
1958  return isSafeTruncation(Imm.Val, Size);
1959  }
1960 
1961  // We got fp literal token
1962  if (type == MVT::f64) { // Expected 64-bit fp operand
1963  // We would set low 64-bits of literal to zeroes but we accept this literals
1964  return true;
1965  }
1966 
1967  if (type == MVT::i64) { // Expected 64-bit int operand
1968  // We don't allow fp literals in 64-bit integer instructions. It is
1969  // unclear how we should encode them.
1970  return false;
1971  }
1972 
1973  // We allow fp literals with f16x2 operands assuming that the specified
1974  // literal goes into the lower half and the upper half is zero. We also
1975  // require that the literal may be losslessly converted to f16.
1976  MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1977  (type == MVT::v2i16)? MVT::i16 :
1978  (type == MVT::v2f32)? MVT::f32 : type;
1979 
1980  APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1981  return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1982 }
1983 
1984 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1985  return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1986 }
1987 
1988 bool AMDGPUOperand::isVRegWithInputMods() const {
1989  return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1990  // GFX90A allows DPP on 64-bit operands.
1991  (isRegClass(AMDGPU::VReg_64RegClassID) &&
1992  AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1993 }
1994 
1995 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1996  if (AsmParser->isVI())
1997  return isVReg32();
1998  else if (AsmParser->isGFX9Plus())
1999  return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2000  else
2001  return false;
2002 }
2003 
2004 bool AMDGPUOperand::isSDWAFP16Operand() const {
2005  return isSDWAOperand(MVT::f16);
2006 }
2007 
2008 bool AMDGPUOperand::isSDWAFP32Operand() const {
2009  return isSDWAOperand(MVT::f32);
2010 }
2011 
2012 bool AMDGPUOperand::isSDWAInt16Operand() const {
2013  return isSDWAOperand(MVT::i16);
2014 }
2015 
2016 bool AMDGPUOperand::isSDWAInt32Operand() const {
2017  return isSDWAOperand(MVT::i32);
2018 }
2019 
2020 bool AMDGPUOperand::isBoolReg() const {
2021  auto FB = AsmParser->getFeatureBits();
2022  return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
2023  (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
2024 }
2025 
2026 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2027 {
2028  assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2029  assert(Size == 2 || Size == 4 || Size == 8);
2030 
2031  const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2032 
2033  if (Imm.Mods.Abs) {
2034  Val &= ~FpSignMask;
2035  }
2036  if (Imm.Mods.Neg) {
2037  Val ^= FpSignMask;
2038  }
2039 
2040  return Val;
2041 }
2042 
2043 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2044  if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2045  Inst.getNumOperands())) {
2046  addLiteralImmOperand(Inst, Imm.Val,
2047  ApplyModifiers &
2048  isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2049  } else {
2050  assert(!isImmTy(ImmTyNone) || !hasModifiers());
2051  Inst.addOperand(MCOperand::createImm(Imm.Val));
2052  setImmKindNone();
2053  }
2054 }
2055 
2056 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2057  const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2058  auto OpNum = Inst.getNumOperands();
2059  // Check that this operand accepts literals
2060  assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2061 
2062  if (ApplyModifiers) {
2063  assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2064  const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2065  Val = applyInputFPModifiers(Val, Size);
2066  }
2067 
2068  APInt Literal(64, Val);
2069  uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
2070 
2071  if (Imm.IsFPImm) { // We got fp literal token
2072  switch (OpTy) {
2078  if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2079  AsmParser->hasInv2PiInlineImm())) {
2080  Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2081  setImmKindConst();
2082  return;
2083  }
2084 
2085  // Non-inlineable
2086  if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2087  // For fp operands we check if low 32 bits are zeros
2088  if (Literal.getLoBits(32) != 0) {
2089  const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2090  "Can't encode literal as exact 64-bit floating-point operand. "
2091  "Low 32-bits will be set to zero");
2092  }
2093 
2094  Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2095  setImmKindLiteral();
2096  return;
2097  }
2098 
2099  // We don't allow fp literals in 64-bit integer instructions. It is
2100  // unclear how we should encode them. This case should be checked earlier
2101  // in predicate methods (isLiteralImm())
2102  llvm_unreachable("fp literal in 64-bit integer instruction.");
2103 
2129  case AMDGPU::OPERAND_KIMM16: {
2130  bool lost;
2131  APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2132  // Convert literal to single precision
2133  FPLiteral.convert(*getOpFltSemantics(OpTy),
2135  // We allow precision lost but not overflow or underflow. This should be
2136  // checked earlier in isLiteralImm()
2137 
2138  uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2139  Inst.addOperand(MCOperand::createImm(ImmVal));
2140  setImmKindLiteral();
2141  return;
2142  }
2143  default:
2144  llvm_unreachable("invalid operand size");
2145  }
2146 
2147  return;
2148  }
2149 
2150  // We got int literal token.
2151  // Only sign extend inline immediates.
2152  switch (OpTy) {
2166  if (isSafeTruncation(Val, 32) &&
2167  AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2168  AsmParser->hasInv2PiInlineImm())) {
2169  Inst.addOperand(MCOperand::createImm(Val));
2170  setImmKindConst();
2171  return;
2172  }
2173 
2174  Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2175  setImmKindLiteral();
2176  return;
2177 
2183  if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2184  Inst.addOperand(MCOperand::createImm(Val));
2185  setImmKindConst();
2186  return;
2187  }
2188 
2190  setImmKindLiteral();
2191  return;
2192 
2200  if (isSafeTruncation(Val, 16) &&
2201  AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2202  AsmParser->hasInv2PiInlineImm())) {
2203  Inst.addOperand(MCOperand::createImm(Val));
2204  setImmKindConst();
2205  return;
2206  }
2207 
2208  Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2209  setImmKindLiteral();
2210  return;
2211 
2216  assert(isSafeTruncation(Val, 16));
2217  assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2218  AsmParser->hasInv2PiInlineImm()));
2219 
2220  Inst.addOperand(MCOperand::createImm(Val));
2221  return;
2222  }
2224  Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2225  setImmKindNone();
2226  return;
2228  Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2229  setImmKindNone();
2230  return;
2231  default:
2232  llvm_unreachable("invalid operand size");
2233  }
2234 }
2235 
2236 template <unsigned Bitwidth>
2237 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2238  APInt Literal(64, Imm.Val);
2239  setImmKindNone();
2240 
2241  if (!Imm.IsFPImm) {
2242  // We got int literal token.
2243  Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2244  return;
2245  }
2246 
2247  bool Lost;
2248  APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2249  FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2251  Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2252 }
2253 
2254 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2255  Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2256 }
2257 
2258 static bool isInlineValue(unsigned Reg) {
2259  switch (Reg) {
2260  case AMDGPU::SRC_SHARED_BASE:
2261  case AMDGPU::SRC_SHARED_LIMIT:
2262  case AMDGPU::SRC_PRIVATE_BASE:
2263  case AMDGPU::SRC_PRIVATE_LIMIT:
2264  case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2265  return true;
2266  case AMDGPU::SRC_VCCZ:
2267  case AMDGPU::SRC_EXECZ:
2268  case AMDGPU::SRC_SCC:
2269  return true;
2270  case AMDGPU::SGPR_NULL:
2271  return true;
2272  default:
2273  return false;
2274  }
2275 }
2276 
2277 bool AMDGPUOperand::isInlineValue() const {
2278  return isRegKind() && ::isInlineValue(getReg());
2279 }
2280 
2281 //===----------------------------------------------------------------------===//
2282 // AsmParser
2283 //===----------------------------------------------------------------------===//
2284 
2285 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2286  if (Is == IS_VGPR) {
2287  switch (RegWidth) {
2288  default: return -1;
2289  case 32:
2290  return AMDGPU::VGPR_32RegClassID;
2291  case 64:
2292  return AMDGPU::VReg_64RegClassID;
2293  case 96:
2294  return AMDGPU::VReg_96RegClassID;
2295  case 128:
2296  return AMDGPU::VReg_128RegClassID;
2297  case 160:
2298  return AMDGPU::VReg_160RegClassID;
2299  case 192:
2300  return AMDGPU::VReg_192RegClassID;
2301  case 224:
2302  return AMDGPU::VReg_224RegClassID;
2303  case 256:
2304  return AMDGPU::VReg_256RegClassID;
2305  case 512:
2306  return AMDGPU::VReg_512RegClassID;
2307  case 1024:
2308  return AMDGPU::VReg_1024RegClassID;
2309  }
2310  } else if (Is == IS_TTMP) {
2311  switch (RegWidth) {
2312  default: return -1;
2313  case 32:
2314  return AMDGPU::TTMP_32RegClassID;
2315  case 64:
2316  return AMDGPU::TTMP_64RegClassID;
2317  case 128:
2318  return AMDGPU::TTMP_128RegClassID;
2319  case 256:
2320  return AMDGPU::TTMP_256RegClassID;
2321  case 512:
2322  return AMDGPU::TTMP_512RegClassID;
2323  }
2324  } else if (Is == IS_SGPR) {
2325  switch (RegWidth) {
2326  default: return -1;
2327  case 32:
2328  return AMDGPU::SGPR_32RegClassID;
2329  case 64:
2330  return AMDGPU::SGPR_64RegClassID;
2331  case 96:
2332  return AMDGPU::SGPR_96RegClassID;
2333  case 128:
2334  return AMDGPU::SGPR_128RegClassID;
2335  case 160:
2336  return AMDGPU::SGPR_160RegClassID;
2337  case 192:
2338  return AMDGPU::SGPR_192RegClassID;
2339  case 224:
2340  return AMDGPU::SGPR_224RegClassID;
2341  case 256:
2342  return AMDGPU::SGPR_256RegClassID;
2343  case 512:
2344  return AMDGPU::SGPR_512RegClassID;
2345  }
2346  } else if (Is == IS_AGPR) {
2347  switch (RegWidth) {
2348  default: return -1;
2349  case 32:
2350  return AMDGPU::AGPR_32RegClassID;
2351  case 64:
2352  return AMDGPU::AReg_64RegClassID;
2353  case 96:
2354  return AMDGPU::AReg_96RegClassID;
2355  case 128:
2356  return AMDGPU::AReg_128RegClassID;
2357  case 160:
2358  return AMDGPU::AReg_160RegClassID;
2359  case 192:
2360  return AMDGPU::AReg_192RegClassID;
2361  case 224:
2362  return AMDGPU::AReg_224RegClassID;
2363  case 256:
2364  return AMDGPU::AReg_256RegClassID;
2365  case 512:
2366  return AMDGPU::AReg_512RegClassID;
2367  case 1024:
2368  return AMDGPU::AReg_1024RegClassID;
2369  }
2370  }
2371  return -1;
2372 }
2373 
2376  .Case("exec", AMDGPU::EXEC)
2377  .Case("vcc", AMDGPU::VCC)
2378  .Case("flat_scratch", AMDGPU::FLAT_SCR)
2379  .Case("xnack_mask", AMDGPU::XNACK_MASK)
2380  .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2381  .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2382  .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2383  .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2384  .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2385  .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2386  .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2387  .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2388  .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2389  .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2390  .Case("lds_direct", AMDGPU::LDS_DIRECT)
2391  .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2392  .Case("m0", AMDGPU::M0)
2393  .Case("vccz", AMDGPU::SRC_VCCZ)
2394  .Case("src_vccz", AMDGPU::SRC_VCCZ)
2395  .Case("execz", AMDGPU::SRC_EXECZ)
2396  .Case("src_execz", AMDGPU::SRC_EXECZ)
2397  .Case("scc", AMDGPU::SRC_SCC)
2398  .Case("src_scc", AMDGPU::SRC_SCC)
2399  .Case("tba", AMDGPU::TBA)
2400  .Case("tma", AMDGPU::TMA)
2401  .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2402  .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2403  .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2404  .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2405  .Case("vcc_lo", AMDGPU::VCC_LO)
2406  .Case("vcc_hi", AMDGPU::VCC_HI)
2407  .Case("exec_lo", AMDGPU::EXEC_LO)
2408  .Case("exec_hi", AMDGPU::EXEC_HI)
2409  .Case("tma_lo", AMDGPU::TMA_LO)
2410  .Case("tma_hi", AMDGPU::TMA_HI)
2411  .Case("tba_lo", AMDGPU::TBA_LO)
2412  .Case("tba_hi", AMDGPU::TBA_HI)
2413  .Case("pc", AMDGPU::PC_REG)
2414  .Case("null", AMDGPU::SGPR_NULL)
2415  .Default(AMDGPU::NoRegister);
2416 }
2417 
2418 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2419  SMLoc &EndLoc, bool RestoreOnFailure) {
2420  auto R = parseRegister();
2421  if (!R) return true;
2422  assert(R->isReg());
2423  RegNo = R->getReg();
2424  StartLoc = R->getStartLoc();
2425  EndLoc = R->getEndLoc();
2426  return false;
2427 }
2428 
2429 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2430  SMLoc &EndLoc) {
2431  return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2432 }
2433 
2434 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2435  SMLoc &StartLoc,
2436  SMLoc &EndLoc) {
2437  bool Result =
2438  ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2439  bool PendingErrors = getParser().hasPendingError();
2440  getParser().clearPendingErrors();
2441  if (PendingErrors)
2442  return MatchOperand_ParseFail;
2443  if (Result)
2444  return MatchOperand_NoMatch;
2445  return MatchOperand_Success;
2446 }
2447 
2448 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2449  RegisterKind RegKind, unsigned Reg1,
2450  SMLoc Loc) {
2451  switch (RegKind) {
2452  case IS_SPECIAL:
2453  if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2454  Reg = AMDGPU::EXEC;
2455  RegWidth = 64;
2456  return true;
2457  }
2458  if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2459  Reg = AMDGPU::FLAT_SCR;
2460  RegWidth = 64;
2461  return true;
2462  }
2463  if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2464  Reg = AMDGPU::XNACK_MASK;
2465  RegWidth = 64;
2466  return true;
2467  }
2468  if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2469  Reg = AMDGPU::VCC;
2470  RegWidth = 64;
2471  return true;
2472  }
2473  if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2474  Reg = AMDGPU::TBA;
2475  RegWidth = 64;
2476  return true;
2477  }
2478  if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2479  Reg = AMDGPU::TMA;
2480  RegWidth = 64;
2481  return true;
2482  }
2483  Error(Loc, "register does not fit in the list");
2484  return false;
2485  case IS_VGPR:
2486  case IS_SGPR:
2487  case IS_AGPR:
2488  case IS_TTMP:
2489  if (Reg1 != Reg + RegWidth / 32) {
2490  Error(Loc, "registers in a list must have consecutive indices");
2491  return false;
2492  }
2493  RegWidth += 32;
2494  return true;
2495  default:
2496  llvm_unreachable("unexpected register kind");
2497  }
2498 }
2499 
2500 struct RegInfo {
2502  RegisterKind Kind;
2503 };
2504 
2505 static constexpr RegInfo RegularRegisters[] = {
2506  {{"v"}, IS_VGPR},
2507  {{"s"}, IS_SGPR},
2508  {{"ttmp"}, IS_TTMP},
2509  {{"acc"}, IS_AGPR},
2510  {{"a"}, IS_AGPR},
2511 };
2512 
2513 static bool isRegularReg(RegisterKind Kind) {
2514  return Kind == IS_VGPR ||
2515  Kind == IS_SGPR ||
2516  Kind == IS_TTMP ||
2517  Kind == IS_AGPR;
2518 }
2519 
2520 static const RegInfo* getRegularRegInfo(StringRef Str) {
2521  for (const RegInfo &Reg : RegularRegisters)
2522  if (Str.startswith(Reg.Name))
2523  return &Reg;
2524  return nullptr;
2525 }
2526 
2527 static bool getRegNum(StringRef Str, unsigned& Num) {
2528  return !Str.getAsInteger(10, Num);
2529 }
2530 
2531 bool
2532 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2533  const AsmToken &NextToken) const {
2534 
2535  // A list of consecutive registers: [s0,s1,s2,s3]
2536  if (Token.is(AsmToken::LBrac))
2537  return true;
2538 
2539  if (!Token.is(AsmToken::Identifier))
2540  return false;
2541 
2542  // A single register like s0 or a range of registers like s[0:1]
2543 
2544  StringRef Str = Token.getString();
2545  const RegInfo *Reg = getRegularRegInfo(Str);
2546  if (Reg) {
2547  StringRef RegName = Reg->Name;
2548  StringRef RegSuffix = Str.substr(RegName.size());
2549  if (!RegSuffix.empty()) {
2550  unsigned Num;
2551  // A single register with an index: rXX
2552  if (getRegNum(RegSuffix, Num))
2553  return true;
2554  } else {
2555  // A range of registers: r[XX:YY].
2556  if (NextToken.is(AsmToken::LBrac))
2557  return true;
2558  }
2559  }
2560 
2561  return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2562 }
2563 
2564 bool
2565 AMDGPUAsmParser::isRegister()
2566 {
2567  return isRegister(getToken(), peekToken());
2568 }
2569 
2570 unsigned
2571 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2572  unsigned RegNum,
2573  unsigned RegWidth,
2574  SMLoc Loc) {
2575 
2576  assert(isRegularReg(RegKind));
2577 
2578  unsigned AlignSize = 1;
2579  if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2580  // SGPR and TTMP registers must be aligned.
2581  // Max required alignment is 4 dwords.
2582  AlignSize = std::min(RegWidth / 32, 4u);
2583  }
2584 
2585  if (RegNum % AlignSize != 0) {
2586  Error(Loc, "invalid register alignment");
2587  return AMDGPU::NoRegister;
2588  }
2589 
2590  unsigned RegIdx = RegNum / AlignSize;
2591  int RCID = getRegClass(RegKind, RegWidth);
2592  if (RCID == -1) {
2593  Error(Loc, "invalid or unsupported register size");
2594  return AMDGPU::NoRegister;
2595  }
2596 
2597  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2598  const MCRegisterClass RC = TRI->getRegClass(RCID);
2599  if (RegIdx >= RC.getNumRegs()) {
2600  Error(Loc, "register index is out of range");
2601  return AMDGPU::NoRegister;
2602  }
2603 
2604  return RC.getRegister(RegIdx);
2605 }
2606 
2607 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2608  int64_t RegLo, RegHi;
2609  if (!skipToken(AsmToken::LBrac, "missing register index"))
2610  return false;
2611 
2612  SMLoc FirstIdxLoc = getLoc();
2613  SMLoc SecondIdxLoc;
2614 
2615  if (!parseExpr(RegLo))
2616  return false;
2617 
2618  if (trySkipToken(AsmToken::Colon)) {
2619  SecondIdxLoc = getLoc();
2620  if (!parseExpr(RegHi))
2621  return false;
2622  } else {
2623  RegHi = RegLo;
2624  }
2625 
2626  if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2627  return false;
2628 
2629  if (!isUInt<32>(RegLo)) {
2630  Error(FirstIdxLoc, "invalid register index");
2631  return false;
2632  }
2633 
2634  if (!isUInt<32>(RegHi)) {
2635  Error(SecondIdxLoc, "invalid register index");
2636  return false;
2637  }
2638 
2639  if (RegLo > RegHi) {
2640  Error(FirstIdxLoc, "first register index should not exceed second index");
2641  return false;
2642  }
2643 
2644  Num = static_cast<unsigned>(RegLo);
2645  RegWidth = 32 * ((RegHi - RegLo) + 1);
2646  return true;
2647 }
2648 
2649 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2650  unsigned &RegNum, unsigned &RegWidth,
2651  SmallVectorImpl<AsmToken> &Tokens) {
2652  assert(isToken(AsmToken::Identifier));
2653  unsigned Reg = getSpecialRegForName(getTokenStr());
2654  if (Reg) {
2655  RegNum = 0;
2656  RegWidth = 32;
2657  RegKind = IS_SPECIAL;
2658  Tokens.push_back(getToken());
2659  lex(); // skip register name
2660  }
2661  return Reg;
2662 }
2663 
2664 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2665  unsigned &RegNum, unsigned &RegWidth,
2666  SmallVectorImpl<AsmToken> &Tokens) {
2667  assert(isToken(AsmToken::Identifier));
2668  StringRef RegName = getTokenStr();
2669  auto Loc = getLoc();
2670 
2671  const RegInfo *RI = getRegularRegInfo(RegName);
2672  if (!RI) {
2673  Error(Loc, "invalid register name");
2674  return AMDGPU::NoRegister;
2675  }
2676 
2677  Tokens.push_back(getToken());
2678  lex(); // skip register name
2679 
2680  RegKind = RI->Kind;
2681  StringRef RegSuffix = RegName.substr(RI->Name.size());
2682  if (!RegSuffix.empty()) {
2683  // Single 32-bit register: vXX.
2684  if (!getRegNum(RegSuffix, RegNum)) {
2685  Error(Loc, "invalid register index");
2686  return AMDGPU::NoRegister;
2687  }
2688  RegWidth = 32;
2689  } else {
2690  // Range of registers: v[XX:YY]. ":YY" is optional.
2691  if (!ParseRegRange(RegNum, RegWidth))
2692  return AMDGPU::NoRegister;
2693  }
2694 
2695  return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2696 }
2697 
2698 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2699  unsigned &RegWidth,
2700  SmallVectorImpl<AsmToken> &Tokens) {
2701  unsigned Reg = AMDGPU::NoRegister;
2702  auto ListLoc = getLoc();
2703 
2704  if (!skipToken(AsmToken::LBrac,
2705  "expected a register or a list of registers")) {
2706  return AMDGPU::NoRegister;
2707  }
2708 
2709  // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2710 
2711  auto Loc = getLoc();
2712  if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2713  return AMDGPU::NoRegister;
2714  if (RegWidth != 32) {
2715  Error(Loc, "expected a single 32-bit register");
2716  return AMDGPU::NoRegister;
2717  }
2718 
2719  for (; trySkipToken(AsmToken::Comma); ) {
2720  RegisterKind NextRegKind;
2721  unsigned NextReg, NextRegNum, NextRegWidth;
2722  Loc = getLoc();
2723 
2724  if (!ParseAMDGPURegister(NextRegKind, NextReg,
2725  NextRegNum, NextRegWidth,
2726  Tokens)) {
2727  return AMDGPU::NoRegister;
2728  }
2729  if (NextRegWidth != 32) {
2730  Error(Loc, "expected a single 32-bit register");
2731  return AMDGPU::NoRegister;
2732  }
2733  if (NextRegKind != RegKind) {
2734  Error(Loc, "registers in a list must be of the same kind");
2735  return AMDGPU::NoRegister;
2736  }
2737  if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2738  return AMDGPU::NoRegister;
2739  }
2740 
2741  if (!skipToken(AsmToken::RBrac,
2742  "expected a comma or a closing square bracket")) {
2743  return AMDGPU::NoRegister;
2744  }
2745 
2746  if (isRegularReg(RegKind))
2747  Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2748 
2749  return Reg;
2750 }
2751 
2752 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2753  unsigned &RegNum, unsigned &RegWidth,
2754  SmallVectorImpl<AsmToken> &Tokens) {
2755  auto Loc = getLoc();
2756  Reg = AMDGPU::NoRegister;
2757 
2758  if (isToken(AsmToken::Identifier)) {
2759  Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2760  if (Reg == AMDGPU::NoRegister)
2761  Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2762  } else {
2763  Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2764  }
2765 
2766  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2767  if (Reg == AMDGPU::NoRegister) {
2768  assert(Parser.hasPendingError());
2769  return false;
2770  }
2771 
2772  if (!subtargetHasRegister(*TRI, Reg)) {
2773  if (Reg == AMDGPU::SGPR_NULL) {
2774  Error(Loc, "'null' operand is not supported on this GPU");
2775  } else {
2776  Error(Loc, "register not available on this GPU");
2777  }
2778  return false;
2779  }
2780 
2781  return true;
2782 }
2783 
2784 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2785  unsigned &RegNum, unsigned &RegWidth,
2786  bool RestoreOnFailure /*=false*/) {
2787  Reg = AMDGPU::NoRegister;
2788 
2789  SmallVector<AsmToken, 1> Tokens;
2790  if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2791  if (RestoreOnFailure) {
2792  while (!Tokens.empty()) {
2793  getLexer().UnLex(Tokens.pop_back_val());
2794  }
2795  }
2796  return true;
2797  }
2798  return false;
2799 }
2800 
2802 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2803  switch (RegKind) {
2804  case IS_VGPR:
2805  return StringRef(".amdgcn.next_free_vgpr");
2806  case IS_SGPR:
2807  return StringRef(".amdgcn.next_free_sgpr");
2808  default:
2809  return None;
2810  }
2811 }
2812 
2813 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2814  auto SymbolName = getGprCountSymbolName(RegKind);
2815  assert(SymbolName && "initializing invalid register kind");
2816  MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2817  Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2818 }
2819 
2820 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2821  unsigned DwordRegIndex,
2822  unsigned RegWidth) {
2823  // Symbols are only defined for GCN targets
2824  if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2825  return true;
2826 
2827  auto SymbolName = getGprCountSymbolName(RegKind);
2828  if (!SymbolName)
2829  return true;
2830  MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2831 
2832  int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
2833  int64_t OldCount;
2834 
2835  if (!Sym->isVariable())
2836  return !Error(getLoc(),
2837  ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2838  if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2839  return !Error(
2840  getLoc(),
2841  ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2842 
2843  if (OldCount <= NewMax)
2844  Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2845 
2846  return true;
2847 }
2848 
2849 std::unique_ptr<AMDGPUOperand>
2850 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2851  const auto &Tok = getToken();
2852  SMLoc StartLoc = Tok.getLoc();
2853  SMLoc EndLoc = Tok.getEndLoc();
2854  RegisterKind RegKind;
2855  unsigned Reg, RegNum, RegWidth;
2856 
2857  if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2858  return nullptr;
2859  }
2860  if (isHsaAbiVersion3AndAbove(&getSTI())) {
2861  if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2862  return nullptr;
2863  } else
2864  KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2865  return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2866 }
2867 
2869 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2870  // TODO: add syntactic sugar for 1/(2*PI)
2871 
2872  assert(!isRegister());
2873  assert(!isModifier());
2874 
2875  const auto& Tok = getToken();
2876  const auto& NextTok = peekToken();
2877  bool IsReal = Tok.is(AsmToken::Real);
2878  SMLoc S = getLoc();
2879  bool Negate = false;
2880 
2881  if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2882  lex();
2883  IsReal = true;
2884  Negate = true;
2885  }
2886 
2887  if (IsReal) {
2888  // Floating-point expressions are not supported.
2889  // Can only allow floating-point literals with an
2890  // optional sign.
2891 
2892  StringRef Num = getTokenStr();
2893  lex();
2894 
2895  APFloat RealVal(APFloat::IEEEdouble());
2896  auto roundMode = APFloat::rmNearestTiesToEven;
2897  if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2898  return MatchOperand_ParseFail;
2899  }
2900  if (Negate)
2901  RealVal.changeSign();
2902 
2903  Operands.push_back(
2904  AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2905  AMDGPUOperand::ImmTyNone, true));
2906 
2907  return MatchOperand_Success;
2908 
2909  } else {
2910  int64_t IntVal;
2911  const MCExpr *Expr;
2912  SMLoc S = getLoc();
2913 
2914  if (HasSP3AbsModifier) {
2915  // This is a workaround for handling expressions
2916  // as arguments of SP3 'abs' modifier, for example:
2917  // |1.0|
2918  // |-1|
2919  // |1+x|
2920  // This syntax is not compatible with syntax of standard
2921  // MC expressions (due to the trailing '|').
2922  SMLoc EndLoc;
2923  if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2924  return MatchOperand_ParseFail;
2925  } else {
2926  if (Parser.parseExpression(Expr))
2927  return MatchOperand_ParseFail;
2928  }
2929 
2930  if (Expr->evaluateAsAbsolute(IntVal)) {
2931  Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2932  } else {
2933  Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2934  }
2935 
2936  return MatchOperand_Success;
2937  }
2938 
2939  return MatchOperand_NoMatch;
2940 }
2941 
2943 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2944  if (!isRegister())
2945  return MatchOperand_NoMatch;
2946 
2947  if (auto R = parseRegister()) {
2948  assert(R->isReg());
2949  Operands.push_back(std::move(R));
2950  return MatchOperand_Success;
2951  }
2952  return MatchOperand_ParseFail;
2953 }
2954 
2956 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2957  auto res = parseReg(Operands);
2958  if (res != MatchOperand_NoMatch) {
2959  return res;
2960  } else if (isModifier()) {
2961  return MatchOperand_NoMatch;
2962  } else {
2963  return parseImm(Operands, HasSP3AbsMod);
2964  }
2965 }
2966 
2967 bool
2968 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2969  if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2970  const auto &str = Token.getString();
2971  return str == "abs" || str == "neg" || str == "sext";
2972  }
2973  return false;
2974 }
2975 
2976 bool
2977 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2978  return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2979 }
2980 
2981 bool
2982 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2983  return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2984 }
2985 
2986 bool
2987 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2988  return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2989 }
2990 
2991 // Check if this is an operand modifier or an opcode modifier
2992 // which may look like an expression but it is not. We should
2993 // avoid parsing these modifiers as expressions. Currently
2994 // recognized sequences are:
2995 // |...|
2996 // abs(...)
2997 // neg(...)
2998 // sext(...)
2999 // -reg
3000 // -|...|
3001 // -abs(...)
3002 // name:...
3003 // Note that simple opcode modifiers like 'gds' may be parsed as
3004 // expressions; this is a special case. See getExpressionAsToken.
3005 //
3006 bool
3007 AMDGPUAsmParser::isModifier() {
3008 
3009  AsmToken Tok = getToken();
3010  AsmToken NextToken[2];
3011  peekTokens(NextToken);
3012 
3013  return isOperandModifier(Tok, NextToken[0]) ||
3014  (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3015  isOpcodeModifierWithVal(Tok, NextToken[0]);
3016 }
3017 
3018 // Check if the current token is an SP3 'neg' modifier.
3019 // Currently this modifier is allowed in the following context:
3020 //
3021 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3022 // 2. Before an 'abs' modifier: -abs(...)
3023 // 3. Before an SP3 'abs' modifier: -|...|
3024 //
3025 // In all other cases "-" is handled as a part
3026 // of an expression that follows the sign.
3027 //
3028 // Note: When "-" is followed by an integer literal,
3029 // this is interpreted as integer negation rather
3030 // than a floating-point NEG modifier applied to N.
3031 // Beside being contr-intuitive, such use of floating-point
3032 // NEG modifier would have resulted in different meaning
3033 // of integer literals used with VOP1/2/C and VOP3,
3034 // for example:
3035 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3036 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3037 // Negative fp literals with preceding "-" are
3038 // handled likewise for uniformity
3039 //
3040 bool
3041 AMDGPUAsmParser::parseSP3NegModifier() {
3042 
3043  AsmToken NextToken[2];
3044  peekTokens(NextToken);
3045 
3046  if (isToken(AsmToken::Minus) &&
3047  (isRegister(NextToken[0], NextToken[1]) ||
3048  NextToken[0].is(AsmToken::Pipe) ||
3049  isId(NextToken[0], "abs"))) {
3050  lex();
3051  return true;
3052  }
3053 
3054  return false;
3055 }
3056 
3058 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3059  bool AllowImm) {
3060  bool Neg, SP3Neg;
3061  bool Abs, SP3Abs;
3062  SMLoc Loc;
3063 
3064  // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3065  if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
3066  Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3067  return MatchOperand_ParseFail;
3068  }
3069 
3070  SP3Neg = parseSP3NegModifier();
3071 
3072  Loc = getLoc();
3073  Neg = trySkipId("neg");
3074  if (Neg && SP3Neg) {
3075  Error(Loc, "expected register or immediate");
3076  return MatchOperand_ParseFail;
3077  }
3078  if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3079  return MatchOperand_ParseFail;
3080 
3081  Abs = trySkipId("abs");
3082  if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3083  return MatchOperand_ParseFail;
3084 
3085  Loc = getLoc();
3086  SP3Abs = trySkipToken(AsmToken::Pipe);
3087  if (Abs && SP3Abs) {
3088  Error(Loc, "expected register or immediate");
3089  return MatchOperand_ParseFail;
3090  }
3091 
3093  if (AllowImm) {
3094  Res = parseRegOrImm(Operands, SP3Abs);
3095  } else {
3096  Res = parseReg(Operands);
3097  }
3098  if (Res != MatchOperand_Success) {
3099  return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
3100  }
3101 
3102  if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3103  return MatchOperand_ParseFail;
3104  if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3105  return MatchOperand_ParseFail;
3106  if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3107  return MatchOperand_ParseFail;
3108 
3109  AMDGPUOperand::Modifiers Mods;
3110  Mods.Abs = Abs || SP3Abs;
3111  Mods.Neg = Neg || SP3Neg;
3112 
3113  if (Mods.hasFPModifiers()) {
3114  AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3115  if (Op.isExpr()) {
3116  Error(Op.getStartLoc(), "expected an absolute expression");
3117  return MatchOperand_ParseFail;
3118  }
3119  Op.setModifiers(Mods);
3120  }
3121  return MatchOperand_Success;
3122 }
3123 
3125 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3126  bool AllowImm) {
3127  bool Sext = trySkipId("sext");
3128  if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3129  return MatchOperand_ParseFail;
3130 
3132  if (AllowImm) {
3133  Res = parseRegOrImm(Operands);
3134  } else {
3135  Res = parseReg(Operands);
3136  }
3137  if (Res != MatchOperand_Success) {
3138  return Sext? MatchOperand_ParseFail : Res;
3139  }
3140 
3141  if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3142  return MatchOperand_ParseFail;
3143 
3144  AMDGPUOperand::Modifiers Mods;
3145  Mods.Sext = Sext;
3146 
3147  if (Mods.hasIntModifiers()) {
3148  AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3149  if (Op.isExpr()) {
3150  Error(Op.getStartLoc(), "expected an absolute expression");
3151  return MatchOperand_ParseFail;
3152  }
3153  Op.setModifiers(Mods);
3154  }
3155 
3156  return MatchOperand_Success;
3157 }
3158 
3160 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3161  return parseRegOrImmWithFPInputMods(Operands, false);
3162 }
3163 
3165 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3166  return parseRegOrImmWithIntInputMods(Operands, false);
3167 }
3168 
3169 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3170  auto Loc = getLoc();
3171  if (trySkipId("off")) {
3172  Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3173  AMDGPUOperand::ImmTyOff, false));
3174  return MatchOperand_Success;
3175  }
3176 
3177  if (!isRegister())
3178  return MatchOperand_NoMatch;
3179 
3180  std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3181  if (Reg) {
3182  Operands.push_back(std::move(Reg));
3183  return MatchOperand_Success;
3184  }
3185 
3186  return MatchOperand_ParseFail;
3187 
3188 }
3189 
3190 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3191  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3192 
3193  if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3194  (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3195  (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3196  (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3197  return Match_InvalidOperand;
3198 
3199  if ((TSFlags & SIInstrFlags::VOP3) &&
3200  (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3201  getForcedEncodingSize() != 64)
3202  return Match_PreferE32;
3203 
3204  if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3205  Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3206  // v_mac_f32/16 allow only dst_sel == DWORD;
3207  auto OpNum =
3208  AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3209  const auto &Op = Inst.getOperand(OpNum);
3210  if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3211  return Match_InvalidOperand;
3212  }
3213  }
3214 
3215  return Match_Success;
3216 }
3217 
3219  static const unsigned Variants[] = {
3222  };
3223 
3224  return makeArrayRef(Variants);
3225 }
3226 
3227 // What asm variants we should check
3228 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3229  if (getForcedEncodingSize() == 32) {
3230  static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3231  return makeArrayRef(Variants);
3232  }
3233 
3234  if (isForcedVOP3()) {
3235  static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3236  return makeArrayRef(Variants);
3237  }
3238 
3239  if (isForcedSDWA()) {
3240  static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3242  return makeArrayRef(Variants);
3243  }
3244 
3245  if (isForcedDPP()) {
3246  static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3247  return makeArrayRef(Variants);
3248  }
3249 
3250  return getAllVariants();
3251 }
3252 
3253 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3254  if (getForcedEncodingSize() == 32)
3255  return "e32";
3256 
3257  if (isForcedVOP3())
3258  return "e64";
3259 
3260  if (isForcedSDWA())
3261  return "sdwa";
3262 
3263  if (isForcedDPP())
3264  return "dpp";
3265 
3266  return "";
3267 }
3268 
3269 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3270  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3271  const unsigned Num = Desc.getNumImplicitUses();
3272  for (unsigned i = 0; i < Num; ++i) {
3273  unsigned Reg = Desc.ImplicitUses[i];
3274  switch (Reg) {
3275  case AMDGPU::FLAT_SCR:
3276  case AMDGPU::VCC:
3277  case AMDGPU::VCC_LO:
3278  case AMDGPU::VCC_HI:
3279  case AMDGPU::M0:
3280  return Reg;
3281  default:
3282  break;
3283  }
3284  }
3285  return AMDGPU::NoRegister;
3286 }
3287 
3288 // NB: This code is correct only when used to check constant
3289 // bus limitations because GFX7 support no f16 inline constants.
3290 // Note that there are no cases when a GFX7 opcode violates
3291 // constant bus limitations due to the use of an f16 constant.
3292 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3293  unsigned OpIdx) const {
3294  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3295 
3296  if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3297  return false;
3298  }
3299 
3300  const MCOperand &MO = Inst.getOperand(OpIdx);
3301 
3302  int64_t Val = MO.getImm();
3303  auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3304 
3305  switch (OpSize) { // expected operand size
3306  case 8:
3307  return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3308  case 4:
3309  return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3310  case 2: {
3311  const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3315  return AMDGPU::isInlinableIntLiteral(Val);
3316 
3321 
3325  return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3326 
3327  return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3328  }
3329  default:
3330  llvm_unreachable("invalid operand size");
3331  }
3332 }
3333 
3334 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3335  if (!isGFX10Plus())
3336  return 1;
3337 
3338  switch (Opcode) {
3339  // 64-bit shift instructions can use only one scalar value input
3340  case AMDGPU::V_LSHLREV_B64_e64:
3341  case AMDGPU::V_LSHLREV_B64_gfx10:
3342  case AMDGPU::V_LSHRREV_B64_e64:
3343  case AMDGPU::V_LSHRREV_B64_gfx10:
3344  case AMDGPU::V_ASHRREV_I64_e64:
3345  case AMDGPU::V_ASHRREV_I64_gfx10:
3346  case AMDGPU::V_LSHL_B64_e64:
3347  case AMDGPU::V_LSHR_B64_e64:
3348  case AMDGPU::V_ASHR_I64_e64:
3349  return 1;
3350  default:
3351  return 2;
3352  }
3353 }
3354 
3355 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3356  const MCOperand &MO = Inst.getOperand(OpIdx);
3357  if (MO.isImm()) {
3358  return !isInlineConstant(Inst, OpIdx);
3359  } else if (MO.isReg()) {
3360  auto Reg = MO.getReg();
3361  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3362  auto PReg = mc2PseudoReg(Reg);
3363  return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3364  } else {
3365  return true;
3366  }
3367 }
3368 
3369 bool
3370 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3371  const OperandVector &Operands) {
3372  const unsigned Opcode = Inst.getOpcode();
3373  const MCInstrDesc &Desc = MII.get(Opcode);
3374  unsigned LastSGPR = AMDGPU::NoRegister;
3375  unsigned ConstantBusUseCount = 0;
3376  unsigned NumLiterals = 0;
3377  unsigned LiteralSize;
3378 
3379  if (Desc.TSFlags &
3383  SIInstrFlags::SDWA)) {
3384  // Check special imm operands (used by madmk, etc)
3385  if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3386  ++NumLiterals;
3387  LiteralSize = 4;
3388  }
3389 
3390  SmallDenseSet<unsigned> SGPRsUsed;
3391  unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3392  if (SGPRUsed != AMDGPU::NoRegister) {
3393  SGPRsUsed.insert(SGPRUsed);
3394  ++ConstantBusUseCount;
3395  }
3396 
3397  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3398  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3399  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3400 
3401  const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3402 
3403  for (int OpIdx : OpIndices) {
3404  if (OpIdx == -1) break;
3405 
3406  const MCOperand &MO = Inst.getOperand(OpIdx);
3407  if (usesConstantBus(Inst, OpIdx)) {
3408  if (MO.isReg()) {
3409  LastSGPR = mc2PseudoReg(MO.getReg());
3410  // Pairs of registers with a partial intersections like these
3411  // s0, s[0:1]
3412  // flat_scratch_lo, flat_scratch
3413  // flat_scratch_lo, flat_scratch_hi
3414  // are theoretically valid but they are disabled anyway.
3415  // Note that this code mimics SIInstrInfo::verifyInstruction
3416  if (!SGPRsUsed.count(LastSGPR)) {
3417  SGPRsUsed.insert(LastSGPR);
3418  ++ConstantBusUseCount;
3419  }
3420  } else { // Expression or a literal
3421 
3422  if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3423  continue; // special operand like VINTERP attr_chan
3424 
3425  // An instruction may use only one literal.
3426  // This has been validated on the previous step.
3427  // See validateVOPLiteral.
3428  // This literal may be used as more than one operand.
3429  // If all these operands are of the same size,
3430  // this literal counts as one scalar value.
3431  // Otherwise it counts as 2 scalar values.
3432  // See "GFX10 Shader Programming", section 3.6.2.3.
3433 
3434  unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3435  if (Size < 4) Size = 4;
3436 
3437  if (NumLiterals == 0) {
3438  NumLiterals = 1;
3439  LiteralSize = Size;
3440  } else if (LiteralSize != Size) {
3441  NumLiterals = 2;
3442  }
3443  }
3444  }
3445  }
3446  }
3447  ConstantBusUseCount += NumLiterals;
3448 
3449  if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3450  return true;
3451 
3452  SMLoc LitLoc = getLitLoc(Operands);
3453  SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3454  SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3455  Error(Loc, "invalid operand (violates constant bus restrictions)");
3456  return false;
3457 }
3458 
3459 bool
3460 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3461  const OperandVector &Operands) {
3462  const unsigned Opcode = Inst.getOpcode();
3463  const MCInstrDesc &Desc = MII.get(Opcode);
3464 
3465  const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3466  if (DstIdx == -1 ||
3467  Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3468  return true;
3469  }
3470 
3471  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3472 
3473  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3474  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3475  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3476 
3477  assert(DstIdx != -1);
3478  const MCOperand &Dst = Inst.getOperand(DstIdx);
3479  assert(Dst.isReg());
3480 
3481  const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3482 
3483  for (int SrcIdx : SrcIndices) {
3484  if (SrcIdx == -1) break;
3485  const MCOperand &Src = Inst.getOperand(SrcIdx);
3486  if (Src.isReg()) {
3487  if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) {
3488  const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3489  Error(getRegLoc(SrcReg, Operands),
3490  "destination must be different than all sources");
3491  return false;
3492  }
3493  }
3494  }
3495 
3496  return true;
3497 }
3498 
3499 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3500 
3501  const unsigned Opc = Inst.getOpcode();
3502  const MCInstrDesc &Desc = MII.get(Opc);
3503 
3504  if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3506  assert(ClampIdx != -1);
3507  return Inst.getOperand(ClampIdx).getImm() == 0;
3508  }
3509 
3510  return true;
3511 }
3512 
3513 Optional<StringRef> AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3514 
3515  const unsigned Opc = Inst.getOpcode();
3516  const MCInstrDesc &Desc = MII.get(Opc);
3517 
3518  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3519  return None;
3520 
3521  int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3522  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3523  int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3524 
3525  assert(VDataIdx != -1);
3526 
3527  if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3528  return None;
3529 
3530  unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3531  unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3532  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3533  if (DMask == 0)
3534  DMask = 1;
3535 
3536  bool isPackedD16 = false;
3537  unsigned DataSize =
3538  (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3539  if (hasPackedD16()) {
3540  int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3541  isPackedD16 = D16Idx >= 0;
3542  if (isPackedD16 && Inst.getOperand(D16Idx).getImm())
3543  DataSize = (DataSize + 1) / 2;
3544  }
3545 
3546  if ((VDataSize / 4) == DataSize + TFESize)
3547  return None;
3548 
3549  return StringRef(isPackedD16
3550  ? "image data size does not match dmask, d16 and tfe"
3551  : "image data size does not match dmask and tfe");
3552 }
3553 
3554 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3555  const unsigned Opc = Inst.getOpcode();
3556  const MCInstrDesc &Desc = MII.get(Opc);
3557 
3558  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3559  return true;
3560 
3561  const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3562 
3563  const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3564  AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3565  int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3566  int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3567  int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3568  int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3569 
3570  assert(VAddr0Idx != -1);
3571  assert(SrsrcIdx != -1);
3572  assert(SrsrcIdx > VAddr0Idx);
3573 
3574  if (DimIdx == -1)
3575  return true; // intersect_ray
3576 
3577  unsigned Dim = Inst.getOperand(DimIdx).getImm();
3578  const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3579  bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3580  unsigned ActualAddrSize =
3581  IsNSA ? SrsrcIdx - VAddr0Idx
3582  : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3583  bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3584 
3585  unsigned ExpectedAddrSize =
3586  AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3587 
3588  if (!IsNSA) {
3589  if (ExpectedAddrSize > 8)
3590  ExpectedAddrSize = 16;
3591 
3592  // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3593  // This provides backward compatibility for assembly created
3594  // before 160b/192b/224b types were directly supported.
3595  if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3596  return true;
3597  }
3598 
3599  return ActualAddrSize == ExpectedAddrSize;
3600 }
3601 
3602 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3603 
3604  const unsigned Opc = Inst.getOpcode();
3605  const MCInstrDesc &Desc = MII.get(Opc);
3606 
3607  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3608  return true;
3609  if (!Desc.mayLoad() || !Desc.mayStore())
3610  return true; // Not atomic
3611 
3612  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3613  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3614 
3615  // This is an incomplete check because image_atomic_cmpswap
3616  // may only use 0x3 and 0xf while other atomic operations
3617  // may use 0x1 and 0x3. However these limitations are
3618  // verified when we check that dmask matches dst size.
3619  return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3620 }
3621 
3622 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3623 
3624  const unsigned Opc = Inst.getOpcode();
3625  const MCInstrDesc &Desc = MII.get(Opc);
3626 
3627  if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3628  return true;
3629 
3630  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3631  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3632 
3633  // GATHER4 instructions use dmask in a different fashion compared to
3634  // other MIMG instructions. The only useful DMASK values are
3635  // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3636  // (red,red,red,red) etc.) The ISA document doesn't mention
3637  // this.
3638  return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3639 }
3640 
3641 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3642  const unsigned Opc = Inst.getOpcode();
3643  const MCInstrDesc &Desc = MII.get(Opc);
3644 
3645  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3646  return true;
3647 
3648  const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3649  const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3650  AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3651 
3652  if (!BaseOpcode->MSAA)
3653  return true;
3654 
3655  int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3656  assert(DimIdx != -1);
3657 
3658  unsigned Dim = Inst.getOperand(DimIdx).getImm();
3659  const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3660 
3661  return DimInfo->MSAA;
3662 }
3663 
3664 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3665 {
3666  switch (Opcode) {
3667  case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3668  case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3669  case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3670  return true;
3671  default:
3672  return false;
3673  }
3674 }
3675 
3676 // movrels* opcodes should only allow VGPRS as src0.
3677 // This is specified in .td description for vop1/vop3,
3678 // but sdwa is handled differently. See isSDWAOperand.
3679 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3680  const OperandVector &Operands) {
3681 
3682  const unsigned Opc = Inst.getOpcode();
3683  const MCInstrDesc &Desc = MII.get(Opc);
3684 
3685  if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3686  return true;
3687 
3688  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3689  assert(Src0Idx != -1);
3690 
3691  SMLoc ErrLoc;
3692  const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3693  if (Src0.isReg()) {
3694  auto Reg = mc2PseudoReg(Src0.getReg());
3695  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3696  if (!isSGPR(Reg, TRI))
3697  return true;
3698  ErrLoc = getRegLoc(Reg, Operands);
3699  } else {
3700  ErrLoc = getConstLoc(Operands);
3701  }
3702 
3703  Error(ErrLoc, "source operand must be a VGPR");
3704  return false;
3705 }
3706 
3707 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3708  const OperandVector &Operands) {
3709 
3710  const unsigned Opc = Inst.getOpcode();
3711 
3712  if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3713  return true;
3714 
3715  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3716  assert(Src0Idx != -1);
3717 
3718  const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3719  if (!Src0.isReg())
3720  return true;
3721 
3722  auto Reg = mc2PseudoReg(Src0.getReg());
3723  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3724  if (!isGFX90A() && isSGPR(Reg, TRI)) {
3725  Error(getRegLoc(Reg, Operands),
3726  "source operand must be either a VGPR or an inline constant");
3727  return false;
3728  }
3729 
3730  return true;
3731 }
3732 
3733 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3734  const OperandVector &Operands) {
3735  const unsigned Opc = Inst.getOpcode();
3736  const MCInstrDesc &Desc = MII.get(Opc);
3737 
3738  if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3739  return true;
3740 
3741  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3742  if (Src2Idx == -1)
3743  return true;
3744 
3745  const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3746  if (!Src2.isReg())
3747  return true;
3748 
3749  MCRegister Src2Reg = Src2.getReg();
3750  MCRegister DstReg = Inst.getOperand(0).getReg();
3751  if (Src2Reg == DstReg)
3752  return true;
3753 
3754  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3755  if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128)
3756  return true;
3757 
3758  if (TRI->regsOverlap(Src2Reg, DstReg)) {
3759  Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3760  "source 2 operand must not partially overlap with dst");
3761  return false;
3762  }
3763 
3764  return true;
3765 }
3766 
3767 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3768  switch (Inst.getOpcode()) {
3769  default:
3770  return true;
3771  case V_DIV_SCALE_F32_gfx6_gfx7:
3772  case V_DIV_SCALE_F32_vi:
3773  case V_DIV_SCALE_F32_gfx10:
3774  case V_DIV_SCALE_F64_gfx6_gfx7:
3775  case V_DIV_SCALE_F64_vi:
3776  case V_DIV_SCALE_F64_gfx10:
3777  break;
3778  }
3779 
3780  // TODO: Check that src0 = src1 or src2.
3781 
3782  for (auto Name : {AMDGPU::OpName::src0_modifiers,
3783  AMDGPU::OpName::src2_modifiers,
3784  AMDGPU::OpName::src2_modifiers}) {
3786  .getImm() &
3787  SISrcMods::ABS) {
3788  return false;
3789  }
3790  }
3791 
3792  return true;
3793 }
3794 
3795 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3796 
3797  const unsigned Opc = Inst.getOpcode();
3798  const MCInstrDesc &Desc = MII.get(Opc);
3799 
3800  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3801  return true;
3802 
3803  int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3804  if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3805  if (isCI() || isSI())
3806  return false;
3807  }
3808 
3809  return true;
3810 }
3811 
3812 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3813  const unsigned Opc = Inst.getOpcode();
3814  const MCInstrDesc &Desc = MII.get(Opc);
3815 
3816  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3817  return true;
3818 
3819  int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3820  if (DimIdx < 0)
3821  return true;
3822 
3823  long Imm = Inst.getOperand(DimIdx).getImm();
3824  if (Imm < 0 || Imm >= 8)
3825  return false;
3826 
3827  return true;
3828 }
3829 
3830 static bool IsRevOpcode(const unsigned Opcode)
3831 {
3832  switch (Opcode) {
3833  case AMDGPU::V_SUBREV_F32_e32:
3834  case AMDGPU::V_SUBREV_F32_e64:
3835  case AMDGPU::V_SUBREV_F32_e32_gfx10:
3836  case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3837  case AMDGPU::V_SUBREV_F32_e32_vi:
3838  case AMDGPU::V_SUBREV_F32_e64_gfx10:
3839  case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3840  case AMDGPU::V_SUBREV_F32_e64_vi:
3841 
3842  case AMDGPU::V_SUBREV_CO_U32_e32:
3843  case AMDGPU::V_SUBREV_CO_U32_e64:
3844  case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3845  case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3846 
3847  case AMDGPU::V_SUBBREV_U32_e32:
3848  case AMDGPU::V_SUBBREV_U32_e64:
3849  case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3850  case AMDGPU::V_SUBBREV_U32_e32_vi:
3851  case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3852  case AMDGPU::V_SUBBREV_U32_e64_vi:
3853 
3854  case AMDGPU::V_SUBREV_U32_e32:
3855  case AMDGPU::V_SUBREV_U32_e64:
3856  case AMDGPU::V_SUBREV_U32_e32_gfx9:
3857  case AMDGPU::V_SUBREV_U32_e32_vi:
3858  case AMDGPU::V_SUBREV_U32_e64_gfx9:
3859  case AMDGPU::V_SUBREV_U32_e64_vi:
3860 
3861  case AMDGPU::V_SUBREV_F16_e32:
3862  case AMDGPU::V_SUBREV_F16_e64:
3863  case AMDGPU::V_SUBREV_F16_e32_gfx10:
3864  case AMDGPU::V_SUBREV_F16_e32_vi:
3865  case AMDGPU::V_SUBREV_F16_e64_gfx10:
3866  case AMDGPU::V_SUBREV_F16_e64_vi:
3867 
3868  case AMDGPU::V_SUBREV_U16_e32:
3869  case AMDGPU::V_SUBREV_U16_e64:
3870  case AMDGPU::V_SUBREV_U16_e32_vi:
3871  case AMDGPU::V_SUBREV_U16_e64_vi:
3872 
3873  case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3874  case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3875  case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3876 
3877  case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3878  case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3879 
3880  case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3881  case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3882 
3883  case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3884  case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3885 
3886  case AMDGPU::V_LSHRREV_B32_e32:
3887  case AMDGPU::V_LSHRREV_B32_e64:
3888  case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3889  case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3890  case AMDGPU::V_LSHRREV_B32_e32_vi:
3891  case AMDGPU::V_LSHRREV_B32_e64_vi:
3892  case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3893  case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3894 
3895  case AMDGPU::V_ASHRREV_I32_e32:
3896  case AMDGPU::V_ASHRREV_I32_e64:
3897  case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3898  case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3899  case AMDGPU::V_ASHRREV_I32_e32_vi:
3900  case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3901  case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3902  case AMDGPU::V_ASHRREV_I32_e64_vi:
3903 
3904  case AMDGPU::V_LSHLREV_B32_e32:
3905  case AMDGPU::V_LSHLREV_B32_e64:
3906  case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3907  case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3908  case AMDGPU::V_LSHLREV_B32_e32_vi:
3909  case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3910  case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3911  case AMDGPU::V_LSHLREV_B32_e64_vi:
3912 
3913  case AMDGPU::V_LSHLREV_B16_e32:
3914  case AMDGPU::V_LSHLREV_B16_e64:
3915  case AMDGPU::V_LSHLREV_B16_e32_vi:
3916  case AMDGPU::V_LSHLREV_B16_e64_vi:
3917  case AMDGPU::V_LSHLREV_B16_gfx10:
3918 
3919  case AMDGPU::V_LSHRREV_B16_e32:
3920  case AMDGPU::V_LSHRREV_B16_e64:
3921  case AMDGPU::V_LSHRREV_B16_e32_vi:
3922  case AMDGPU::V_LSHRREV_B16_e64_vi:
3923  case AMDGPU::V_LSHRREV_B16_gfx10:
3924 
3925  case AMDGPU::V_ASHRREV_I16_e32:
3926  case AMDGPU::V_ASHRREV_I16_e64:
3927  case AMDGPU::V_ASHRREV_I16_e32_vi:
3928  case AMDGPU::V_ASHRREV_I16_e64_vi:
3929  case AMDGPU::V_ASHRREV_I16_gfx10:
3930 
3931  case AMDGPU::V_LSHLREV_B64_e64:
3932  case AMDGPU::V_LSHLREV_B64_gfx10:
3933  case AMDGPU::V_LSHLREV_B64_vi:
3934 
3935  case AMDGPU::V_LSHRREV_B64_e64:
3936  case AMDGPU::V_LSHRREV_B64_gfx10:
3937  case AMDGPU::V_LSHRREV_B64_vi:
3938 
3939  case AMDGPU::V_ASHRREV_I64_e64:
3940  case AMDGPU::V_ASHRREV_I64_gfx10:
3941  case AMDGPU::V_ASHRREV_I64_vi:
3942 
3943  case AMDGPU::V_PK_LSHLREV_B16:
3944  case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3945  case AMDGPU::V_PK_LSHLREV_B16_vi:
3946 
3947  case AMDGPU::V_PK_LSHRREV_B16:
3948  case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3949  case AMDGPU::V_PK_LSHRREV_B16_vi:
3950  case AMDGPU::V_PK_ASHRREV_I16:
3951  case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3952  case AMDGPU::V_PK_ASHRREV_I16_vi:
3953  return true;
3954  default:
3955  return false;
3956  }
3957 }
3958 
3959 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3960 
3961  using namespace SIInstrFlags;
3962  const unsigned Opcode = Inst.getOpcode();
3963  const MCInstrDesc &Desc = MII.get(Opcode);
3964 
3965  // lds_direct register is defined so that it can be used
3966  // with 9-bit operands only. Ignore encodings which do not accept these.
3967  const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
3968  if ((Desc.TSFlags & Enc) == 0)
3969  return None;
3970 
3971  for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3972  auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
3973  if (SrcIdx == -1)
3974  break;
3975  const auto &Src = Inst.getOperand(SrcIdx);
3976  if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3977 
3978  if (isGFX90A() || isGFX11Plus())
3979  return StringRef("lds_direct is not supported on this GPU");
3980 
3981  if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
3982  return StringRef("lds_direct cannot be used with this instruction");
3983 
3984  if (SrcName != OpName::src0)
3985  return StringRef("lds_direct may be used as src0 only");
3986  }
3987  }
3988 
3989  return None;
3990 }
3991 
3992 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3993  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3994  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3995  if (Op.isFlatOffset())
3996  return Op.getStartLoc();
3997  }
3998  return getLoc();
3999 }
4000 
4001 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4002  const OperandVector &Operands) {
4003  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4004  if ((TSFlags & SIInstrFlags::FLAT) == 0)
4005  return true;
4006 
4007  auto Opcode = Inst.getOpcode();
4008  auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4009  assert(OpNum != -1);
4010 
4011  const auto &Op = Inst.getOperand(OpNum);
4012  if (!hasFlatOffsets() && Op.getImm() != 0) {
4013  Error(getFlatOffsetLoc(Operands),
4014  "flat offset modifier is not supported on this GPU");
4015  return false;
4016  }
4017 
4018  // For FLAT segment the offset must be positive;
4019  // MSB is ignored and forced to zero.
4021  unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
4022  if (!isIntN(OffsetSize, Op.getImm())) {
4023  Error(getFlatOffsetLoc(Operands),
4024  Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4025  return false;
4026  }
4027  } else {
4028  unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
4029  if (!isUIntN(OffsetSize, Op.getImm())) {
4030  Error(getFlatOffsetLoc(Operands),
4031  Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4032  return false;
4033  }
4034  }
4035 
4036  return true;
4037 }
4038 
4039 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4040  // Start with second operand because SMEM Offset cannot be dst or src0.
4041  for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4042  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4043  if (Op.isSMEMOffset())
4044  return Op.getStartLoc();
4045  }
4046  return getLoc();
4047 }
4048 
4049 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4050  const OperandVector &Operands) {
4051  if (isCI() || isSI())
4052  return true;
4053 
4054  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4055  if ((TSFlags & SIInstrFlags::SMRD) == 0)
4056  return true;
4057 
4058  auto Opcode = Inst.getOpcode();
4059  auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4060  if (OpNum == -1)
4061  return true;
4062 
4063  const auto &Op = Inst.getOperand(OpNum);
4064  if (!Op.isImm())
4065  return true;
4066 
4067  uint64_t Offset = Op.getImm();
4068  bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4069  if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
4070  AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
4071  return true;
4072 
4073  Error(getSMEMOffsetLoc(Operands),
4074  (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
4075  "expected a 21-bit signed offset");
4076 
4077  return false;
4078 }
4079 
4080 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4081  unsigned Opcode = Inst.getOpcode();
4082  const MCInstrDesc &Desc = MII.get(Opcode);
4083  if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4084  return true;
4085 
4086  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4087  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4088 
4089  const int OpIndices[] = { Src0Idx, Src1Idx };
4090 
4091  unsigned NumExprs = 0;
4092  unsigned NumLiterals = 0;
4094 
4095  for (int OpIdx : OpIndices) {
4096  if (OpIdx == -1) break;
4097 
4098  const MCOperand &MO = Inst.getOperand(OpIdx);
4099  // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4100  if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4101  if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4102  uint32_t Value = static_cast<uint32_t>(MO.getImm());
4103  if (NumLiterals == 0 || LiteralValue != Value) {
4104  LiteralValue = Value;
4105  ++NumLiterals;
4106  }
4107  } else if (MO.isExpr()) {
4108  ++NumExprs;
4109  }
4110  }
4111  }
4112 
4113  return NumLiterals + NumExprs <= 1;
4114 }
4115 
4116 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4117  const unsigned Opc = Inst.getOpcode();
4118  if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
4119  Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
4120  int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4121  unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4122 
4123  if (OpSel & ~3)
4124  return false;
4125  }
4126 
4127  if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) {
4128  int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4129  if (OpSelIdx != -1) {
4130  if (Inst.getOperand(OpSelIdx).getImm() != 0)
4131  return false;
4132  }
4133  int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4134  if (OpSelHiIdx != -1) {
4135  if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4136  return false;
4137  }
4138  }
4139 
4140  return true;
4141 }
4142 
4143 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4144  const OperandVector &Operands) {
4145  const unsigned Opc = Inst.getOpcode();
4146  int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4147  if (DppCtrlIdx < 0)
4148  return true;
4149  unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4150 
4152  // DPP64 is supported for row_newbcast only.
4153  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4154  if (Src0Idx >= 0 &&
4155  getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
4156  SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4157  Error(S, "64 bit dpp only supports row_newbcast");
4158  return false;
4159  }
4160  }
4161 
4162  return true;
4163 }
4164 
4165 // Check if VCC register matches wavefront size
4166 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4167  auto FB = getFeatureBits();
4168  return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4169  (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4170 }
4171 
4172 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4173 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4174  const OperandVector &Operands) {
4175  unsigned Opcode = Inst.getOpcode();
4176  const MCInstrDesc &Desc = MII.get(Opcode);
4177  const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
4178  if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4179  ImmIdx == -1)
4180  return true;
4181 
4182  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4183  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4184  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4185 
4186  const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx};
4187 
4188  unsigned NumExprs = 0;
4189  unsigned NumLiterals = 0;
4191 
4192  for (int OpIdx : OpIndices) {
4193  if (OpIdx == -1)
4194  continue;
4195 
4196  const MCOperand &MO = Inst.getOperand(OpIdx);
4197  if (!MO.isImm() && !MO.isExpr())
4198  continue;
4199  if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4200  continue;
4201 
4202  if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4203  getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4204  Error(getConstLoc(Operands),
4205  "inline constants are not allowed for this operand");
4206  return false;
4207  }
4208 
4209  if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4210  uint32_t Value = static_cast<uint32_t>(MO.getImm());
4211  if (NumLiterals == 0 || LiteralValue != Value) {
4212  LiteralValue = Value;
4213  ++NumLiterals;
4214  }
4215  } else if (MO.isExpr()) {
4216  ++NumExprs;
4217  }
4218  }
4219  NumLiterals += NumExprs;
4220 
4221  if (!NumLiterals)
4222  return true;
4223 
4224  if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4225  Error(getLitLoc(Operands), "literal operands are not supported");
4226  return false;
4227  }
4228 
4229  if (NumLiterals > 1) {
4230  Error(getLitLoc(Operands), "only one literal operand is allowed");
4231  return false;
4232  }
4233 
4234  return true;
4235 }
4236 
4237 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4238 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4239  const MCRegisterInfo *MRI) {
4240  int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4241  if (OpIdx < 0)
4242  return -1;
4243 
4244  const MCOperand &Op = Inst.getOperand(OpIdx);
4245  if (!Op.isReg())
4246  return -1;
4247 
4248  unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4249  auto Reg = Sub ? Sub : Op.getReg();
4250  const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4251  return AGPR32.contains(Reg) ? 1 : 0;
4252 }
4253 
4254 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4255  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4256  if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4258  SIInstrFlags::DS)) == 0)
4259  return true;
4260 
4261  uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4262  : AMDGPU::OpName::vdata;
4263 
4264  const MCRegisterInfo *MRI = getMRI();
4265  int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4266  int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4267 
4268  if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4269  int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4270  if (Data2Areg >= 0 && Data2Areg != DataAreg)
4271  return false;
4272  }
4273 
4274  auto FB = getFeatureBits();
4275  if (FB[AMDGPU::FeatureGFX90AInsts]) {
4276  if (DataAreg < 0 || DstAreg < 0)
4277  return true;
4278  return DstAreg == DataAreg;
4279  }
4280 
4281  return DstAreg < 1 && DataAreg < 1;
4282 }
4283 
4284 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4285  auto FB = getFeatureBits();
4286  if (!FB[AMDGPU::FeatureGFX90AInsts])
4287  return true;
4288 
4289  const MCRegisterInfo *MRI = getMRI();
4290  const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4291  const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4292  for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4293  const MCOperand &Op = Inst.getOperand(I);
4294  if (!Op.isReg())
4295  continue;
4296 
4297  unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4298  if (!Sub)
4299  continue;
4300 
4301  if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4302  return false;
4303  if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4304  return false;
4305  }
4306 
4307  return true;
4308 }
4309 
4310 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4311  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4312  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4313  if (Op.isBLGP())
4314  return Op.getStartLoc();
4315  }
4316  return SMLoc();
4317 }
4318 
4319 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4320  const OperandVector &Operands) {
4321  unsigned Opc = Inst.getOpcode();
4322  int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4323  if (BlgpIdx == -1)
4324  return true;
4325  SMLoc BLGPLoc = getBLGPLoc(Operands);
4326  if (!BLGPLoc.isValid())
4327  return true;
4328  bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:");
4329  auto FB = getFeatureBits();
4330  bool UsesNeg = false;
4331  if (FB[AMDGPU::FeatureGFX940Insts]) {
4332  switch (Opc) {
4333  case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4334  case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4335  case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4336  case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4337  UsesNeg = true;
4338  }
4339  }
4340 
4341  if (IsNeg == UsesNeg)
4342  return true;
4343 
4344  Error(BLGPLoc,
4345  UsesNeg ? "invalid modifier: blgp is not supported"
4346  : "invalid modifier: neg is not supported");
4347 
4348  return false;
4349 }
4350 
4351 // gfx90a has an undocumented limitation:
4352 // DS_GWS opcodes must use even aligned registers.
4353 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4354  const OperandVector &Operands) {
4355  if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4356  return true;
4357 
4358  int Opc = Inst.getOpcode();
4359  if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4360  Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4361  return true;
4362 
4363  const MCRegisterInfo *MRI = getMRI();
4364  const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4365  int Data0Pos =
4366  AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4367  assert(Data0Pos != -1);
4368  auto Reg = Inst.getOperand(Data0Pos).getReg();
4369  auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4370  if (RegIdx & 1) {
4371  SMLoc RegLoc = getRegLoc(Reg, Operands);
4372  Error(RegLoc, "vgpr must be even aligned");
4373  return false;
4374  }
4375 
4376  return true;
4377 }
4378 
4379 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4380  const OperandVector &Operands,
4381  const SMLoc &IDLoc) {
4382  int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4383  AMDGPU::OpName::cpol);
4384  if (CPolPos == -1)
4385  return true;
4386 
4387  unsigned CPol = Inst.getOperand(CPolPos).getImm();
4388 
4389  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4390  if (TSFlags & SIInstrFlags::SMRD) {
4391  if (CPol && (isSI() || isCI())) {
4392  SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4393  Error(S, "cache policy is not supported for SMRD instructions");
4394  return false;
4395  }
4397  Error(IDLoc, "invalid cache policy for SMEM instruction");
4398  return false;
4399  }
4400  }
4401 
4402  if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4403  SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4404  StringRef CStr(S.getPointer());
4405  S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4406  Error(S, "scc is not supported on this GPU");
4407  return false;
4408  }
4409 
4411  return true;
4412 
4413  if (TSFlags & SIInstrFlags::IsAtomicRet) {
4414  if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4415  Error(IDLoc, isGFX940() ? "instruction must use sc0"
4416  : "instruction must use glc");
4417  return false;
4418  }
4419  } else {
4420  if (CPol & CPol::GLC) {
4421  SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4422  StringRef CStr(S.getPointer());
4424  &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4425  Error(S, isGFX940() ? "instruction must not use sc0"
4426  : "instruction must not use glc");
4427  return false;
4428  }
4429  }
4430 
4431  return true;
4432 }
4433 
4434 bool AMDGPUAsmParser::validateFlatLdsDMA(const MCInst &Inst,
4435  const OperandVector &Operands,
4436  const SMLoc &IDLoc) {
4437  if (isGFX940())
4438  return true;
4439 
4440  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4441  if ((TSFlags & (SIInstrFlags::VALU | SIInstrFlags::FLAT)) !=
4443  return true;
4444  // This is FLAT LDS DMA.
4445 
4446  SMLoc S = getImmLoc(AMDGPUOperand::ImmTyLDS, Operands);
4447  StringRef CStr(S.getPointer());
4448  if (!CStr.startswith("lds")) {
4449  // This is incorrectly selected LDS DMA version of a FLAT load opcode.
4450  // And LDS version should have 'lds' modifier, but it follows optional
4451  // operands so its absense is ignored by the matcher.
4452  Error(IDLoc, "invalid operands for instruction");
4453  return false;
4454  }
4455 
4456  return true;
4457 }
4458 
4459 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4460  const SMLoc &IDLoc,
4461  const OperandVector &Operands) {
4462  if (auto ErrMsg = validateLdsDirect(Inst)) {
4463  Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4464  return false;
4465  }
4466  if (!validateSOPLiteral(Inst)) {
4467  Error(getLitLoc(Operands),
4468  "only one literal operand is allowed");
4469  return false;
4470  }
4471  if (!validateVOPLiteral(Inst, Operands)) {
4472  return false;
4473  }
4474  if (!validateConstantBusLimitations(Inst, Operands)) {
4475  return false;
4476  }
4477  if (!validateEarlyClobberLimitations(Inst, Operands)) {
4478  return false;
4479  }
4480  if (!validateIntClampSupported(Inst)) {
4481  Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4482  "integer clamping is not supported on this GPU");
4483  return false;
4484  }
4485  if (!validateOpSel(Inst)) {
4486  Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4487  "invalid op_sel operand");
4488  return false;
4489  }
4490  if (!validateDPP(Inst, Operands)) {
4491  return false;
4492  }
4493  // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4494  if (!validateMIMGD16(Inst)) {
4495  Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4496  "d16 modifier is not supported on this GPU");
4497  return false;
4498  }
4499  if (!validateMIMGDim(Inst)) {
4500  Error(IDLoc, "dim modifier is required on this GPU");
4501  return false;
4502  }
4503  if (!validateMIMGMSAA(Inst)) {
4504  Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4505  "invalid dim; must be MSAA type");
4506  return false;
4507  }
4508  if (auto ErrMsg = validateMIMGDataSize(Inst)) {
4509  Error(IDLoc, *ErrMsg);
4510  return false;
4511  }
4512  if (!validateMIMGAddrSize(Inst)) {
4513  Error(IDLoc,
4514  "image address size does not match dim and a16");
4515  return false;
4516  }
4517  if (!validateMIMGAtomicDMask(Inst)) {
4518  Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4519  "invalid atomic image dmask");
4520  return false;
4521  }
4522  if (!validateMIMGGatherDMask(Inst)) {
4523  Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4524  "invalid image_gather dmask: only one bit must be set");
4525  return false;
4526  }
4527  if (!validateMovrels(Inst, Operands)) {
4528  return false;
4529  }
4530  if (!validateFlatOffset(Inst, Operands)) {
4531  return false;
4532  }
4533  if (!validateSMEMOffset(Inst, Operands)) {
4534  return false;
4535  }
4536  if (!validateMAIAccWrite(Inst, Operands)) {
4537  return false;
4538  }
4539  if (!validateMFMA(Inst, Operands)) {
4540  return false;
4541  }
4542  if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4543  return false;
4544  }
4545 
4546  if (!validateAGPRLdSt(Inst)) {
4547  Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4548  ? "invalid register class: data and dst should be all VGPR or AGPR"
4549  : "invalid register class: agpr loads and stores not supported on this GPU"
4550  );
4551  return false;
4552  }
4553  if (!validateVGPRAlign(Inst)) {
4554  Error(IDLoc,
4555  "invalid register class: vgpr tuples must be 64 bit aligned");
4556  return false;
4557  }
4558  if (!validateGWS(Inst, Operands)) {
4559  return false;
4560  }
4561 
4562  if (!validateBLGP(Inst, Operands)) {
4563  return false;
4564  }
4565 
4566  if (!validateDivScale(Inst)) {
4567  Error(IDLoc, "ABS not allowed in VOP3B instructions");
4568  return false;
4569  }
4570  if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4571  return false;
4572  }
4573 
4574  if (!validateFlatLdsDMA(Inst, Operands, IDLoc)) {
4575  return false;
4576  }
4577 
4578  return true;
4579 }
4580 
4581 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4582  const FeatureBitset &FBS,
4583  unsigned VariantID = 0);
4584 
4585 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4586  const FeatureBitset &AvailableFeatures,
4587  unsigned VariantID);
4588 
4589 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4590  const FeatureBitset &FBS) {
4591  return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4592 }
4593 
4594 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4595  const FeatureBitset &FBS,
4596  ArrayRef<unsigned> Variants) {
4597  for (auto Variant : Variants) {
4598  if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4599  return true;
4600  }
4601 
4602  return false;
4603 }
4604 
4605 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4606  const SMLoc &IDLoc) {
4607  FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4608 
4609  // Check if requested instruction variant is supported.
4610  if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4611  return false;
4612 
4613  // This instruction is not supported.
4614  // Clear any other pending errors because they are no longer relevant.
4615  getParser().clearPendingErrors();
4616 
4617  // Requested instruction variant is not supported.
4618  // Check if any other variants are supported.
4619  StringRef VariantName = getMatchedVariantName();
4620  if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4621  return Error(IDLoc,
4622  Twine(VariantName,
4623  " variant of this instruction is not supported"));
4624  }
4625 
4626  // Finally check if this instruction is supported on any other GPU.
4627  if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4628  return Error(IDLoc, "instruction not supported on this GPU");
4629  }
4630 
4631  // Instruction not supported on any GPU. Probably a typo.
4632  std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4633  return Error(IDLoc, "invalid instruction" + Suggestion);
4634 }
4635 
4636 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4638  MCStreamer &Out,
4640  bool MatchingInlineAsm) {
4641  MCInst Inst;
4642  unsigned Result = Match_Success;
4643  for (auto Variant : getMatchedVariants()) {
4644  uint64_t EI;
4645  auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4646  Variant);
4647  // We order match statuses from least to most specific. We use most specific
4648  // status as resulting
4649  // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4650  if ((R == Match_Success) ||
4651  (R == Match_PreferE32) ||
4652  (R == Match_MissingFeature && Result != Match_PreferE32) ||
4653  (R == Match_InvalidOperand && Result != Match_MissingFeature
4654  && Result != Match_PreferE32) ||
4655  (R == Match_MnemonicFail && Result != Match_InvalidOperand
4656  && Result != Match_MissingFeature
4657  && Result != Match_PreferE32)) {
4658  Result = R;
4659  ErrorInfo = EI;
4660  }
4661  if (R == Match_Success)
4662  break;
4663  }
4664 
4665  if (Result == Match_Success) {
4666  if (!validateInstruction(Inst, IDLoc, Operands)) {
4667  return true;
4668  }
4669  Inst.setLoc(IDLoc);
4670  Out.emitInstruction(Inst, getSTI());
4671  return false;
4672  }
4673 
4674  StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4675  if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4676  return true;
4677  }
4678 
4679  switch (Result) {
4680  default: break;
4681  case Match_MissingFeature:
4682  // It has been verified that the specified instruction
4683  // mnemonic is valid. A match was found but it requires
4684  // features which are not supported on this GPU.
4685  return Error(IDLoc, "operands are not valid for this GPU or mode");
4686 
4687  case Match_InvalidOperand: {
4688  SMLoc ErrorLoc = IDLoc;
4689  if (ErrorInfo != ~0ULL) {
4690  if (ErrorInfo >= Operands.size()) {
4691  return Error(IDLoc, "too few operands for instruction");
4692  }
4693  ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4694  if (ErrorLoc == SMLoc())
4695  ErrorLoc = IDLoc;
4696  }
4697  return Error(ErrorLoc, "invalid operand for instruction");
4698  }
4699 
4700  case Match_PreferE32:
4701  return Error(IDLoc, "internal error: instruction without _e64 suffix "
4702  "should be encoded as e32");
4703  case Match_MnemonicFail:
4704  llvm_unreachable("Invalid instructions should have been handled already");
4705  }
4706  llvm_unreachable("Implement any new match types added!");
4707 }
4708 
4709 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4710  int64_t Tmp = -1;
4711  if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4712  return true;
4713  }
4714  if (getParser().parseAbsoluteExpression(Tmp)) {
4715  return true;
4716  }
4717  Ret = static_cast<uint32_t>(Tmp);
4718  return false;
4719 }
4720 
4721 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4722  uint32_t &Minor) {
4723  if (ParseAsAbsoluteExpression(Major))
4724  return TokError("invalid major version");
4725 
4726  if (!trySkipToken(AsmToken::Comma))
4727  return TokError("minor version number required, comma expected");
4728 
4729  if (ParseAsAbsoluteExpression(Minor))
4730  return TokError("invalid minor version");
4731 
4732  return false;
4733 }
4734 
4735 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4736  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4737  return TokError("directive only supported for amdgcn architecture");
4738 
4739  std::string TargetIDDirective;
4740  SMLoc TargetStart = getTok().getLoc();
4741  if (getParser().parseEscapedString(TargetIDDirective))
4742  return true;
4743 
4744  SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4745  if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4746  return getParser().Error(TargetRange.Start,
4747  (Twine(".amdgcn_target directive's target id ") +
4748  Twine(TargetIDDirective) +
4749  Twine(" does not match the specified target id ") +
4750  Twine(getTargetStreamer().getTargetID()->toString())).str());
4751 
4752  return false;
4753 }
4754 
4755 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4756  return Error(Range.Start, "value out of range", Range);
4757 }
4758 
4759 bool AMDGPUAsmParser::calculateGPRBlocks(
4760  const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4761  bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4762  SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4763  unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4764  // TODO(scott.linder): These calculations are duplicated from
4765  // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4766  IsaVersion Version = getIsaVersion(getSTI().getCPU());
4767 
4768  unsigned NumVGPRs = NextFreeVGPR;
4769  unsigned NumSGPRs = NextFreeSGPR;
4770 
4771  if (Version.Major >= 10)
4772  NumSGPRs = 0;
4773  else {
4774  unsigned MaxAddressableNumSGPRs =
4776 
4777  if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4778  NumSGPRs > MaxAddressableNumSGPRs)
4779  return OutOfRangeError(SGPRRange);
4780 
4781  NumSGPRs +=
4782  IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4783 
4784  if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4785  NumSGPRs > MaxAddressableNumSGPRs)
4786  return OutOfRangeError(SGPRRange);
4787 
4788  if (Features.test(FeatureSGPRInitBug))
4790  }
4791 
4792  VGPRBlocks =
4793  IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4794  SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4795 
4796  return false;
4797 }
4798 
4799 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4800  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4801  return TokError("directive only supported for amdgcn architecture");
4802 
4803  if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4804  return TokError("directive only supported for amdhsa OS");
4805 
4806  StringRef KernelName;
4807  if (getParser().parseIdentifier(KernelName))
4808  return true;
4809 
4811 
4812  StringSet<> Seen;
4813 
4814  IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4815 
4816  SMRange VGPRRange;
4817  uint64_t NextFreeVGPR = 0;
4818  uint64_t AccumOffset = 0;
4819  uint64_t SharedVGPRCount = 0;
4820  SMRange SGPRRange;
4821  uint64_t NextFreeSGPR = 0;
4822 
4823  // Count the number of user SGPRs implied from the enabled feature bits.
4824  unsigned ImpliedUserSGPRCount = 0;
4825 
4826  // Track if the asm explicitly contains the directive for the user SGPR
4827  // count.
4828  Optional<unsigned> ExplicitUserSGPRCount;
4829  bool ReserveVCC = true;
4830  bool ReserveFlatScr = true;
4831  Optional<bool> EnableWavefrontSize32;
4832 
4833  while (true) {
4834  while (trySkipToken(AsmToken::EndOfStatement));
4835 
4836  StringRef ID;
4837  SMRange IDRange = getTok().getLocRange();
4838  if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4839  return true;
4840 
4841  if (ID == ".end_amdhsa_kernel")
4842  break;
4843 
4844  if (Seen.find(ID) != Seen.end())
4845  return TokError(".amdhsa_ directives cannot be repeated");
4846  Seen.insert(ID);
4847 
4848  SMLoc ValStart = getLoc();
4849  int64_t IVal;
4850  if (getParser().parseAbsoluteExpression(IVal))
4851  return true;
4852  SMLoc ValEnd = getLoc();
4853  SMRange ValRange = SMRange(ValStart, ValEnd);
4854 
4855  if (IVal < 0)
4856  return OutOfRangeError(ValRange);
4857 
4858  uint64_t Val = IVal;
4859 
4860 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
4861  if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
4862  return OutOfRangeError(RANGE); \
4863  AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4864 
4865  if (ID == ".amdhsa_group_segment_fixed_size") {
4866  if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4867  return OutOfRangeError(ValRange);
4868  KD.group_segment_fixed_size = Val;
4869  } else if (ID == ".amdhsa_private_segment_fixed_size") {
4870  if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4871  return OutOfRangeError(ValRange);
4872  KD.private_segment_fixed_size = Val;
4873  } else if (ID == ".amdhsa_kernarg_size") {
4874  if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4875  return OutOfRangeError(ValRange);
4876  KD.kernarg_size = Val;
4877  } else if (ID == ".amdhsa_user_sgpr_count") {
4878  ExplicitUserSGPRCount = Val;
4879  } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4881  return Error(IDRange.Start,
4882  "directive is not supported with architected flat scratch",
4883  IDRange);
4885  KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4886  Val, ValRange);
4887  if (Val)
4888  ImpliedUserSGPRCount += 4;
4889  } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4891  KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4892  ValRange);
4893  if (Val)
4894  ImpliedUserSGPRCount += 2;
4895  } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4897  KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4898  ValRange);
4899  if (Val)
4900  ImpliedUserSGPRCount += 2;
4901  } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4903  KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4904  Val, ValRange);
4905  if (Val)
4906  ImpliedUserSGPRCount += 2;
4907  } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4909  KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4910  ValRange);
4911  if (Val)
4912  ImpliedUserSGPRCount += 2;
4913  } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4915  return Error(IDRange.Start,
4916  "directive is not supported with architected flat scratch",
4917  IDRange);
4919  KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4920  ValRange);
4921  if (Val)
4922  ImpliedUserSGPRCount += 2;
4923  } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4925  KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4926  Val, ValRange);
4927  if (Val)
4928  ImpliedUserSGPRCount += 1;
4929  } else if (ID == ".amdhsa_wavefront_size32") {
4930  if (IVersion.Major < 10)
4931  return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4932  EnableWavefrontSize32 = Val;
4934  KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4935  Val, ValRange);
4936  } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4938  return Error(IDRange.Start,
4939  "directive is not supported with architected flat scratch",
4940  IDRange);
4942  COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4943  } else if (ID == ".amdhsa_enable_private_segment") {
4945  return Error(
4946  IDRange.Start,
4947  "directive is not supported without architected flat scratch",
4948  IDRange);
4950  COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4951  } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4953  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4954  ValRange);
4955  } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4957  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4958  ValRange);
4959  } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4961  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4962  ValRange);
4963  } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4965  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4966  ValRange);
4967  } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4969  COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4970  ValRange);
4971  } else if (ID == ".amdhsa_next_free_vgpr") {
4972  VGPRRange = ValRange;
4973  NextFreeVGPR = Val;
4974  } else if (ID == ".amdhsa_next_free_sgpr") {
4975  SGPRRange = ValRange;
4976  NextFreeSGPR = Val;
4977  } else if (ID == ".amdhsa_accum_offset") {
4978  if (!isGFX90A())
4979  return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4980  AccumOffset = Val;
4981  } else if (ID == ".amdhsa_reserve_vcc") {
4982  if (!isUInt<1>(Val))
4983  return OutOfRangeError(ValRange);
4984  ReserveVCC = Val;
4985  } else if (ID == ".amdhsa_reserve_flat_scratch") {
4986  if (IVersion.Major < 7)
4987  return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4989  return Error(IDRange.Start,
4990  "directive is not supported with architected flat scratch",
4991  IDRange);
4992  if (!isUInt<1>(Val))
4993  return OutOfRangeError(ValRange);
4994  ReserveFlatScr = Val;
4995  } else if (ID == ".amdhsa_reserve_xnack_mask") {
4996  if (IVersion.Major < 8)
4997  return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4998  if (!isUInt<1>(Val))
4999  return OutOfRangeError(ValRange);
5000  if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5001  return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
5002  IDRange);
5003  } else if (ID == ".amdhsa_float_round_mode_32") {
5005  COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
5006  } else if (ID == ".amdhsa_float_round_mode_16_64") {
5008  COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
5009  } else if (ID == ".amdhsa_float_denorm_mode_32") {
5011  COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
5012  } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5014  COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
5015  ValRange);
5016  } else if (ID == ".amdhsa_dx10_clamp") {
5018  COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
5019  } else if (ID == ".amdhsa_ieee_mode") {
5020  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
5021  Val, ValRange);
5022  } else if (ID == ".amdhsa_fp16_overflow") {
5023  if (IVersion.Major < 9)
5024  return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5025  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
5026  ValRange);
5027  } else if (ID == ".amdhsa_tg_split") {
5028  if (!isGFX90A())
5029  return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5030  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
5031  ValRange);
5032  } else if (ID == ".amdhsa_workgroup_processor_mode") {
5033  if (IVersion.Major < 10)
5034  return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5035  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
5036  ValRange);
5037  } else if (ID == ".amdhsa_memory_ordered") {
5038  if (IVersion.Major < 10)
5039  return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5040  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
5041  ValRange);
5042  } else if (ID == ".amdhsa_forward_progress") {
5043  if (IVersion.Major < 10)
5044  return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5045  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
5046  ValRange);
5047  } else if (ID == ".amdhsa_shared_vgpr_count") {
5048  if (IVersion.Major < 10)
5049  return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5050  SharedVGPRCount = Val;
5052  COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT, Val,
5053  ValRange);
5054  } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5056  KD.compute_pgm_rsrc2,
5057  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
5058  ValRange);
5059  } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5061  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5062  Val, ValRange);
5063  } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5065  KD.compute_pgm_rsrc2,
5066  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
5067  ValRange);
5068  } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5070  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5071  Val, ValRange);
5072  } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5074  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5075  Val, ValRange);
5076  } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5078  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5079  Val, ValRange);
5080  } else if (ID == ".amdhsa_exception_int_div_zero") {
5082  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5083  Val, ValRange);
5084  } else {
5085  return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5086  }
5087 
5088 #undef PARSE_BITS_ENTRY
5089  }
5090 
5091  if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
5092  return TokError(".amdhsa_next_free_vgpr directive is required");
5093 
5094  if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
5095  return TokError(".amdhsa_next_free_sgpr directive is required");
5096 
5097  unsigned VGPRBlocks;
5098  unsigned SGPRBlocks;
5099  if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5100  getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5101  EnableWavefrontSize32, NextFreeVGPR,
5102  VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5103  SGPRBlocks))
5104  return true;
5105 
5106  if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5107  VGPRBlocks))
5108  return OutOfRangeError(VGPRRange);
5110  COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
5111 
5112  if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5113  SGPRBlocks))
5114  return OutOfRangeError(SGPRRange);
5116  COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
5117  SGPRBlocks);
5118 
5119  if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5120  return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5121  "enabled user SGPRs");
5122 
5123  unsigned UserSGPRCount =
5124  ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5125 
5126  if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5127  return TokError("too many user SGPRs enabled");
5128  AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
5129  UserSGPRCount);
5130 
5131  if (isGFX90A()) {
5132  if (Seen.find(".amdhsa_accum_offset") == Seen.end())
5133  return TokError(".amdhsa_accum_offset directive is required");
5134  if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5135  return TokError("accum_offset should be in range [4..256] in "
5136  "increments of 4");
5137  if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
5138  return TokError("accum_offset exceeds total VGPR allocation");
5139  AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5140  (AccumOffset / 4 - 1));
5141  }
5142 
5143  if (IVersion.Major == 10) {
5144  // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5145  if (SharedVGPRCount && EnableWavefrontSize32) {
5146  return TokError("shared_vgpr_count directive not valid on "
5147  "wavefront size 32");
5148  }
5149  if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5150  return TokError("shared_vgpr_count*2 + "
5151  "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5152  "exceed 63\n");
5153  }
5154  }
5155 
5156  getTargetStreamer().EmitAmdhsaKernelDescriptor(
5157  getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
5158  ReserveFlatScr);
5159  return false;
5160 }
5161 
5162 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
5163  uint32_t Major;
5164  uint32_t Minor;
5165 
5166  if (ParseDirectiveMajorMinor(Major, Minor))
5167  return true;
5168 
5169  getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
5170  return false;
5171 }
5172 
5173 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
5174  uint32_t Major;
5175  uint32_t Minor;
5176  uint32_t Stepping;
5177  StringRef VendorName;
5178  StringRef ArchName;
5179 
5180  // If this directive has no arguments, then use the ISA version for the
5181  // targeted GPU.
5182  if (isToken(AsmToken::EndOfStatement)) {
5183  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5184  getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
5185  ISA.Stepping,
5186  "AMD", "AMDGPU");
5187  return false;
5188  }
5189 
5190  if (ParseDirectiveMajorMinor(Major, Minor))
5191  return true;
5192 
5193  if (!trySkipToken(AsmToken::Comma))
5194  return TokError("stepping version number required, comma expected");
5195 
5196  if (ParseAsAbsoluteExpression(Stepping))
5197  return TokError("invalid stepping version");
5198 
5199  if (!trySkipToken(AsmToken::Comma))
5200  return TokError("vendor name required, comma expected");
5201 
5202  if (!parseString(VendorName, "invalid vendor name"))
5203  return true;
5204 
5205  if (!trySkipToken(AsmToken::Comma))
5206  return TokError("arch name required, comma expected");
5207 
5208  if (!parseString(ArchName, "invalid arch name"))
5209  return true;
5210 
5211  getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
5212  VendorName, ArchName);
5213  return false;
5214 }
5215 
5216 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5217  amd_kernel_code_t &Header) {
5218  // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5219  // assembly for backwards compatibility.
5220  if (ID == "max_scratch_backing_memory_byte_size") {
5221  Parser.eatToEndOfStatement();
5222  return false;
5223  }
5224 
5225  SmallString<40> ErrStr;
5226  raw_svector_ostream Err(ErrStr);
5227  if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5228  return TokError(Err.str());
5229  }
5230  Lex();
5231 
5232  if (ID == "enable_wavefront_size32") {
5234  if (!isGFX10Plus())
5235  return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5236  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5237  return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5238  } else {
5239  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5240  return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5241  }
5242  }
5243 
5244  if (ID == "wavefront_size") {
5245  if (Header.wavefront_size == 5) {
5246  if (!isGFX10Plus())
5247  return TokError("wavefront_size=5 is only allowed on GFX10+");
5248  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5249  return TokError("wavefront_size=5 requires +WavefrontSize32");
5250  } else if (Header.wavefront_size == 6) {
5251  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5252  return TokError("wavefront_size=6 requires +WavefrontSize64");
5253  }
5254  }
5255 
5256  if (ID == "enable_wgp_mode") {
5258  !isGFX10Plus())
5259  return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5260  }
5261 
5262  if (ID == "enable_mem_ordered") {
5264  !isGFX10Plus())
5265  return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5266  }
5267 
5268  if (ID == "enable_fwd_progress") {
5270  !isGFX10Plus())
5271  return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5272  }
5273 
5274  return false;
5275 }
5276 
5277 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5278  amd_kernel_code_t Header;
5279  AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5280 
5281  while (true) {
5282  // Lex EndOfStatement. This is in a while loop, because lexing a comment
5283  // will set the current token to EndOfStatement.
5284  while(trySkipToken(AsmToken::EndOfStatement));
5285 
5286  StringRef ID;
5287  if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5288  return true;
5289 
5290  if (ID == ".end_amd_kernel_code_t")
5291  break;
5292 
5293  if (ParseAMDKernelCodeTValue(ID, Header))
5294  return true;
5295  }
5296 
5297  getTargetStreamer().EmitAMDKernelCodeT(Header);
5298 
5299  return false;
5300 }
5301 
5302 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5303  StringRef KernelName;
5304  if (!parseId(KernelName, "expected symbol name"))
5305  return true;
5306 
5307  getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5309 
5310  KernelScope.initialize(getContext());
5311  return false;
5312 }
5313 
5314 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5315  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5316  return Error(getLoc(),
5317  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5318  "architectures");
5319  }
5320 
5321  auto TargetIDDirective = getLexer().getTok().getStringContents();
5322  if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5323  return Error(getParser().getTok().getLoc(), "target id must match options");
5324 
5325  getTargetStreamer().EmitISAVersion();
5326  Lex();
5327 
5328  return false;
5329 }
5330 
5331 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5332  const char *AssemblerDirectiveBegin;
5333  const char *AssemblerDirectiveEnd;
5335  isHsaAbiVersion3AndAbove(&getSTI())
5336  ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5338  : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5339  HSAMD::AssemblerDirectiveEnd);
5340 
5341  if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5342  return Error(getLoc(),
5343  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5344  "not available on non-amdhsa OSes")).str());
5345  }
5346 
5347  std::string HSAMetadataString;
5348  if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5349  HSAMetadataString))
5350  return true;
5351 
5352  if (isHsaAbiVersion3AndAbove(&getSTI())) {
5353  if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5354  return Error(getLoc(), "invalid HSA metadata");
5355  } else {
5356  if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5357  return Error(getLoc(), "invalid HSA metadata");
5358  }
5359 
5360  return false;
5361 }
5362 
5363 /// Common code to parse out a block of text (typically YAML) between start and
5364 /// end directives.
5365 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5366  const char *AssemblerDirectiveEnd,
5367  std::string &CollectString) {
5368 
5369  raw_string_ostream CollectStream(CollectString);
5370 
5371  getLexer().setSkipSpace(false);
5372 
5373  bool FoundEnd = false;
5374  while (!isToken(AsmToken::Eof)) {
5375  while (isToken(AsmToken::Space)) {
5376  CollectStream << getTokenStr();
5377  Lex();
5378  }
5379 
5380  if (trySkipId(AssemblerDirectiveEnd)) {
5381  FoundEnd = true;
5382  break;
5383  }
5384 
5385  CollectStream << Parser.parseStringToEndOfStatement()
5386  << getContext().getAsmInfo()->getSeparatorString();
5387 
5388  Parser.eatToEndOfStatement();
5389  }
5390 
5391  getLexer().setSkipSpace(true);
5392 
5393  if (isToken(AsmToken::Eof) && !FoundEnd) {
5394  return TokError(Twine("expected directive ") +
5395  Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5396  }
5397 
5398  CollectStream.flush();
5399  return false;
5400 }
5401 
5402 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5403 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5404  std::string String;
5405  if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5407  return true;
5408 
5409  auto PALMetadata = getTargetStreamer().getPALMetadata();
5410  if (!PALMetadata->setFromString(String))
5411  return Error(getLoc(), "invalid PAL metadata");
5412  return false;
5413 }
5414 
5415 /// Parse the assembler directive for old linear-format PAL metadata.
5416 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5417  if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5418  return Error(getLoc(),
5419  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5420  "not available on non-amdpal OSes")).str());
5421  }
5422 
5423  auto PALMetadata = getTargetStreamer().getPALMetadata();
5424  PALMetadata->setLegacy();
5425  for (;;) {
5426  uint32_t Key, Value;
5427  if (ParseAsAbsoluteExpression(Key)) {
5428  return TokError(Twine("invalid value in ") +
5430  }
5431  if (!trySkipToken(AsmToken::Comma)) {
5432  return TokError(Twine("expected an even number of values in ") +
5434  }
5435  if (ParseAsAbsoluteExpression(Value)) {
5436  return TokError(Twine("invalid value in ") +
5438  }
5439  PALMetadata->setRegister(Key, Value);
5440  if (!trySkipToken(AsmToken::Comma))
5441  break;
5442  }
5443  return false;
5444 }
5445 
5446 /// ParseDirectiveAMDGPULDS
5447 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5448 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5449  if (getParser().checkForValidSection())
5450  return true;
5451 
5452  StringRef Name;
5453  SMLoc NameLoc = getLoc();
5454  if (getParser().parseIdentifier(Name))
5455  return TokError("expected identifier in directive");
5456 
5457  MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5458  if (parseToken(AsmToken::Comma, "expected ','"))
5459  return true;
5460 
5461  unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5462 
5463  int64_t Size;
5464  SMLoc SizeLoc = getLoc();
5465  if (getParser().parseAbsoluteExpression(Size))
5466  return true;
5467  if (Size < 0)
5468  return Error(SizeLoc, "size must be non-negative");
5469  if (Size > LocalMemorySize)
5470  return Error(SizeLoc, "size is too large");
5471 
5472  int64_t Alignment = 4;
5473  if (trySkipToken(AsmToken::Comma)) {
5474  SMLoc AlignLoc = getLoc();
5475  if (getParser().parseAbsoluteExpression(Alignment))
5476  return true;
5477  if (Alignment < 0 || !isPowerOf2_64(Alignment))
5478  return Error(AlignLoc, "alignment must be a power of two");
5479 
5480  // Alignment larger than the size of LDS is possible in theory, as long
5481  // as the linker manages to place to symbol at address 0, but we do want
5482  // to make sure the alignment fits nicely into a 32-bit integer.
5483  if (Alignment >= 1u << 31)
5484  return Error(AlignLoc, "alignment is too large");
5485  }
5486 
5487  if (parseToken(AsmToken::EndOfStatement,
5488  "unexpected token in '.amdgpu_lds' directive"))
5489  return true;
5490 
5491  Symbol->redefineIfPossible();
5492  if (!Symbol->isUndefined())
5493  return Error(NameLoc, "invalid symbol redefinition");
5494 
5495  getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5496  return false;
5497 }
5498 
5499 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5500  StringRef IDVal = DirectiveID.getString();
5501 
5502  if (isHsaAbiVersion3AndAbove(&getSTI())) {
5503  if (IDVal == ".amdhsa_kernel")
5504  return ParseDirectiveAMDHSAKernel();
5505 
5506  // TODO: Restructure/combine with PAL metadata directive.
5508  return ParseDirectiveHSAMetadata();
5509  } else {
5510  if (IDVal == ".hsa_code_object_version")
5511  return ParseDirectiveHSACodeObjectVersion();
5512 
5513  if (IDVal == ".hsa_code_object_isa")
5514  return ParseDirectiveHSACodeObjectISA();
5515 
5516  if (IDVal == ".amd_kernel_code_t")
5517  return ParseDirectiveAMDKernelCodeT();
5518 
5519  if (IDVal == ".amdgpu_hsa_kernel")
5520  return ParseDirectiveAMDGPUHsaKernel();
5521 
5522  if (IDVal == ".amd_amdgpu_isa")
5523  return ParseDirectiveISAVersion();
5524 
5526  return ParseDirectiveHSAMetadata();
5527  }
5528 
5529  if (IDVal == ".amdgcn_target")
5530  return ParseDirectiveAMDGCNTarget();
5531 
5532  if (IDVal == ".amdgpu_lds")
5533  return ParseDirectiveAMDGPULDS();
5534 
5535  if (IDVal == PALMD::AssemblerDirectiveBegin)
5536  return ParseDirectivePALMetadataBegin();
5537 
5538  if (IDVal == PALMD::AssemblerDirective)
5539  return ParseDirectivePALMetadata();
5540 
5541  return true;
5542 }
5543 
5544 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5545  unsigned RegNo) {
5546 
5547  if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
5548  return isGFX9Plus();
5549 
5550  // GFX10 has 2 more SGPRs 104 and 105.
5551  if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
5552  return hasSGPR104_SGPR105();
5553 
5554  switch (RegNo) {
5555  case AMDGPU::SRC_SHARED_BASE:
5556  case AMDGPU::SRC_SHARED_LIMIT:
5557  case AMDGPU::SRC_PRIVATE_BASE:
5558  case AMDGPU::SRC_PRIVATE_LIMIT:
5559  case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5560  return isGFX9Plus();
5561  case AMDGPU::TBA:
5562  case AMDGPU::TBA_LO:
5563  case AMDGPU::TBA_HI:
5564  case AMDGPU::TMA:
5565  case AMDGPU::TMA_LO:
5566  case AMDGPU::TMA_HI:
5567  return !isGFX9Plus();
5568  case AMDGPU::XNACK_MASK:
5569  case AMDGPU::XNACK_MASK_LO:
5570  case AMDGPU::XNACK_MASK_HI:
5571  return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5572  case AMDGPU::SGPR_NULL:
5573  return isGFX10Plus();
5574  default:
5575  break;
5576  }
5577 
5578  if (isCI())
5579  return true;
5580 
5581  if (isSI() || isGFX10Plus()) {
5582  // No flat_scr on SI.
5583  // On GFX10 flat scratch is not a valid register operand and can only be
5584  // accessed with s_setreg/s_getreg.
5585  switch (RegNo) {
5586  case AMDGPU::FLAT_SCR:
5587  case AMDGPU::FLAT_SCR_LO:
5588  case AMDGPU::FLAT_SCR_HI:
5589  return false;
5590  default:
5591  return true;
5592  }
5593  }
5594 
5595  // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5596  // SI/CI have.
5597  if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
5598  return hasSGPR102_SGPR103();
5599 
5600  return true;
5601 }
5602 
5604 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5605  OperandMode Mode) {
5606  // Try to parse with a custom parser
5607  OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5608 
5609  // If we successfully parsed the operand or if there as an error parsing,
5610  // we are done.
5611  //
5612  // If we are parsing after we reach EndOfStatement then this means we
5613  // are appending default values to the Operands list. This is only done
5614  // by custom parser, so we shouldn't continue on to the generic parsing.
5615  if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5616  isToken(AsmToken::EndOfStatement))
5617  return ResTy;
5618 
5619  SMLoc RBraceLoc;
5620  SMLoc LBraceLoc = getLoc();
5621  if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5622  unsigned Prefix = Operands.size();
5623 
5624  for (;;) {
5625  auto Loc = getLoc();
5626  ResTy = parseReg(Operands);
5627  if (ResTy == MatchOperand_NoMatch)
5628  Error(Loc, "expected a register");
5629  if (ResTy != MatchOperand_Success)
5630  return MatchOperand_ParseFail;
5631 
5632  RBraceLoc = getLoc();
5633  if (trySkipToken(AsmToken::RBrac))
5634  break;
5635 
5636  if (!skipToken(AsmToken::Comma,
5637  "expected a comma or a closing square bracket")) {
5638  return MatchOperand_ParseFail;
5639  }
5640  }
5641 
5642  if (Operands.size() - Prefix > 1) {
5643  Operands.insert(Operands.begin() + Prefix,
5644  AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5645  Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5646  }
5647 
5648  return MatchOperand_Success;
5649  }
5650 
5651  return parseRegOrImm(Operands);
5652 }
5653 
5654 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5655  // Clear any forced encodings from the previous instruction.
5656  setForcedEncodingSize(0);
5657  setForcedDPP(false);
5658  setForcedSDWA(false);
5659 
5660  if (Name.endswith("_e64")) {
5661  setForcedEncodingSize(64);
5662  return Name.substr(0, Name.size() - 4);
5663  } else if (Name.endswith("_e32")) {
5664  setForcedEncodingSize(32);
5665  return Name.substr(0, Name.size() - 4);
5666  } else if (Name.endswith("_dpp")) {
5667  setForcedDPP(true);
5668  return Name.substr(0, Name.size() - 4);
5669  } else if (Name.endswith("_sdwa")) {
5670  setForcedSDWA(true);
5671  return Name.substr(0, Name.size() - 5);
5672  }
5673  return Name;
5674 }
5675 
5676 static void applyMnemonicAliases(StringRef &Mnemonic,
5677  const FeatureBitset &Features,
5678  unsigned VariantID);
5679 
5680 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5681  StringRef Name,
5682  SMLoc NameLoc, OperandVector &Operands) {
5683  // Add the instruction mnemonic
5684  Name = parseMnemonicSuffix(Name);
5685 
5686  // If the target architecture uses MnemonicAlias, call it here to parse
5687  // operands correctly.
5688  applyMnemonicAliases(Name, getAvailableFeatures(), 0);
5689 
5690  Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5691 
5692  bool IsMIMG = Name.startswith("image_");
5693 
5694  while (!trySkipToken(AsmToken::EndOfStatement)) {
5695  OperandMode Mode = OperandMode_Default;
5696  if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5697  Mode = OperandMode_NSA;
5698  CPolSeen = 0;
5699  OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5700 
5701  if (Res != MatchOperand_Success) {
5702  checkUnsupportedInstruction(Name, NameLoc);
5703  if (!Parser.hasPendingError()) {
5704  // FIXME: use real operand location rather than the current location.
5705  StringRef Msg =
5706  (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5707  "not a valid operand.";
5708  Error(getLoc(), Msg);
5709  }
5710  while (!trySkipToken(AsmToken::EndOfStatement)) {
5711  lex();
5712  }
5713  return true;
5714  }
5715 
5716  // Eat the comma or space if there is one.
5717  trySkipToken(AsmToken::Comma);
5718  }
5719 
5720  return false;
5721 }
5722 
5723 //===----------------------------------------------------------------------===//
5724 // Utility functions
5725 //===----------------------------------------------------------------------===//
5726 
5728 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5729 
5730  if (!trySkipId(Prefix, AsmToken::Colon))
5731  return MatchOperand_NoMatch;
5732 
5733  return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5734 }
5735 
5737 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5738  AMDGPUOperand::ImmTy ImmTy,
5739  bool (*ConvertResult)(int64_t&)) {
5740  SMLoc S = getLoc();
5741  int64_t Value = 0;
5742 
5743  OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5744  if (Res != MatchOperand_Success)
5745  return Res;
5746 
5747  if (ConvertResult && !ConvertResult(Value)) {
5748  Error(S, "invalid " + StringRef(Prefix) + " value.");
5749  }
5750 
5751  Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5752  return MatchOperand_Success;
5753 }
5754 
5756 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5758  AMDGPUOperand::ImmTy ImmTy,
5759  bool (*ConvertResult)(int64_t&)) {
5760  SMLoc S = getLoc();
5761  if (!trySkipId(Prefix, AsmToken::Colon))
5762  return MatchOperand_NoMatch;
5763 
5764  if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5765  return MatchOperand_ParseFail;
5766 
5767  unsigned Val = 0;
5768  const unsigned MaxSize = 4;
5769 
5770  // FIXME: How to verify the number of elements matches the number of src
5771  // operands?
5772  for (int I = 0; ; ++I) {
5773  int64_t Op;
5774  SMLoc Loc = getLoc();
5775  if (!parseExpr(Op))
5776  return MatchOperand_ParseFail;
5777 
5778  if (Op != 0 && Op != 1) {
5779  Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5780  return MatchOperand_ParseFail;
5781  }
5782 
5783  Val |= (Op << I);
5784 
5785  if (trySkipToken(AsmToken::RBrac))
5786  break;
5787 
5788  if (I + 1 == MaxSize) {
5789  Error(getLoc(), "expected a closing square bracket");
5790  return MatchOperand_ParseFail;
5791  }
5792 
5793  if (!skipToken(AsmToken::Comma, "expected a comma"))
5794  return MatchOperand_ParseFail;
5795  }
5796 
5797  Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5798  return MatchOperand_Success;
5799 }
5800 
5802 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5803  AMDGPUOperand::ImmTy ImmTy) {
5804  int64_t Bit;
5805  SMLoc S = getLoc();
5806 
5807  if (trySkipId(Name)) {
5808  Bit = 1;
5809  } else if (trySkipId("no", Name)) {
5810  Bit = 0;
5811  } else {
5812  return MatchOperand_NoMatch;
5813  }
5814 
5815  if (Name == "r128" && !hasMIMG_R128()) {
5816  Error(S, "r128 modifier is not supported on this GPU");
5817  return MatchOperand_ParseFail;
5818  }
5819  if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5820  Error(S, "a16 modifier is not supported on this GPU");
5821  return MatchOperand_ParseFail;
5822  }
5823 
5824  if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5825  ImmTy = AMDGPUOperand::ImmTyR128A16;
5826 
5827  Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5828  return MatchOperand_Success;
5829 }
5830 
5832 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5833  unsigned CPolOn = 0;
5834  unsigned CPolOff = 0;
5835  SMLoc S = getLoc();
5836 
5837  StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5838  if (isGFX940() && !Mnemo.startswith("s_")) {
5839  if (trySkipId("sc0"))
5840  CPolOn = AMDGPU::CPol::SC0;
5841  else if (trySkipId("nosc0"))
5842  CPolOff = AMDGPU::CPol::SC0;
5843  else if (trySkipId("nt"))
5844  CPolOn = AMDGPU::CPol::NT;
5845  else if (trySkipId("nont"))
5846  CPolOff = AMDGPU::CPol::NT;
5847  else if (trySkipId("sc1"))
5848  CPolOn = AMDGPU::CPol::SC1;
5849  else if (trySkipId("nosc1"))
5850  CPolOff = AMDGPU::CPol::SC1;
5851  else
5852  return MatchOperand_NoMatch;
5853  }
5854  else if (trySkipId("glc"))
5855  CPolOn = AMDGPU::CPol::GLC;
5856  else if (trySkipId("noglc"))
5857  CPolOff = AMDGPU::CPol::GLC;
5858  else if (trySkipId("slc"))
5859  CPolOn = AMDGPU::CPol::SLC;
5860  else if (trySkipId("noslc"))
5861  CPolOff = AMDGPU::CPol::SLC;
5862  else if (trySkipId("dlc"))
5863  CPolOn = AMDGPU::CPol::DLC;
5864  else if (trySkipId("nodlc"))
5865  CPolOff = AMDGPU::CPol::DLC;
5866  else if (trySkipId("scc"))
5867  CPolOn = AMDGPU::CPol::SCC;
5868  else if (trySkipId("noscc"))
5869  CPolOff = AMDGPU::CPol::SCC;
5870  else
5871  return MatchOperand_NoMatch;
5872 
5873  if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5874  Error(S, "dlc modifier is not supported on this GPU");
5875  return MatchOperand_ParseFail;
5876  }
5877 
5878  if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5879  Error(S, "scc modifier is not supported on this GPU");
5880  return MatchOperand_ParseFail;
5881  }
5882 
5883  if (CPolSeen & (CPolOn | CPolOff)) {
5884  Error(S, "duplicate cache policy modifier"