LLVM  14.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
19 #include "llvm/ADT/APFloat.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/MC/MCAsmInfo.h"
24 #include "llvm/MC/MCContext.h"
25 #include "llvm/MC/MCExpr.h"
26 #include "llvm/MC/MCInst.h"
30 #include "llvm/MC/MCSymbol.h"
31 #include "llvm/MC/TargetRegistry.h"
34 #include "llvm/Support/Casting.h"
37 
38 using namespace llvm;
39 using namespace llvm::AMDGPU;
40 using namespace llvm::amdhsa;
41 
42 namespace {
43 
44 class AMDGPUAsmParser;
45 
46 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
47 
48 //===----------------------------------------------------------------------===//
49 // Operand
50 //===----------------------------------------------------------------------===//
51 
52 class AMDGPUOperand : public MCParsedAsmOperand {
53  enum KindTy {
54  Token,
55  Immediate,
56  Register,
58  } Kind;
59 
60  SMLoc StartLoc, EndLoc;
61  const AMDGPUAsmParser *AsmParser;
62 
63 public:
64  AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
65  : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
66 
67  using Ptr = std::unique_ptr<AMDGPUOperand>;
68 
69  struct Modifiers {
70  bool Abs = false;
71  bool Neg = false;
72  bool Sext = false;
73 
74  bool hasFPModifiers() const { return Abs || Neg; }
75  bool hasIntModifiers() const { return Sext; }
76  bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
77 
78  int64_t getFPModifiersOperand() const {
79  int64_t Operand = 0;
80  Operand |= Abs ? SISrcMods::ABS : 0u;
81  Operand |= Neg ? SISrcMods::NEG : 0u;
82  return Operand;
83  }
84 
85  int64_t getIntModifiersOperand() const {
86  int64_t Operand = 0;
87  Operand |= Sext ? SISrcMods::SEXT : 0u;
88  return Operand;
89  }
90 
91  int64_t getModifiersOperand() const {
92  assert(!(hasFPModifiers() && hasIntModifiers())
93  && "fp and int modifiers should not be used simultaneously");
94  if (hasFPModifiers()) {
95  return getFPModifiersOperand();
96  } else if (hasIntModifiers()) {
97  return getIntModifiersOperand();
98  } else {
99  return 0;
100  }
101  }
102 
103  friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
104  };
105 
106  enum ImmTy {
107  ImmTyNone,
108  ImmTyGDS,
109  ImmTyLDS,
110  ImmTyOffen,
111  ImmTyIdxen,
112  ImmTyAddr64,
113  ImmTyOffset,
114  ImmTyInstOffset,
115  ImmTyOffset0,
116  ImmTyOffset1,
117  ImmTyCPol,
118  ImmTySWZ,
119  ImmTyTFE,
120  ImmTyD16,
121  ImmTyClampSI,
122  ImmTyOModSI,
123  ImmTyDPP8,
124  ImmTyDppCtrl,
125  ImmTyDppRowMask,
126  ImmTyDppBankMask,
127  ImmTyDppBoundCtrl,
128  ImmTyDppFi,
129  ImmTySdwaDstSel,
130  ImmTySdwaSrc0Sel,
131  ImmTySdwaSrc1Sel,
132  ImmTySdwaDstUnused,
133  ImmTyDMask,
134  ImmTyDim,
135  ImmTyUNorm,
136  ImmTyDA,
137  ImmTyR128A16,
138  ImmTyA16,
139  ImmTyLWE,
140  ImmTyExpTgt,
141  ImmTyExpCompr,
142  ImmTyExpVM,
143  ImmTyFORMAT,
144  ImmTyHwreg,
145  ImmTyOff,
146  ImmTySendMsg,
147  ImmTyInterpSlot,
148  ImmTyInterpAttr,
149  ImmTyAttrChan,
150  ImmTyOpSel,
151  ImmTyOpSelHi,
152  ImmTyNegLo,
153  ImmTyNegHi,
154  ImmTySwizzle,
155  ImmTyGprIdxMode,
156  ImmTyHigh,
157  ImmTyBLGP,
158  ImmTyCBSZ,
159  ImmTyABID,
160  ImmTyEndpgm,
161  };
162 
163  enum ImmKindTy {
164  ImmKindTyNone,
165  ImmKindTyLiteral,
166  ImmKindTyConst,
167  };
168 
169 private:
170  struct TokOp {
171  const char *Data;
172  unsigned Length;
173  };
174 
175  struct ImmOp {
176  int64_t Val;
177  ImmTy Type;
178  bool IsFPImm;
179  mutable ImmKindTy Kind;
180  Modifiers Mods;
181  };
182 
183  struct RegOp {
184  unsigned RegNo;
185  Modifiers Mods;
186  };
187 
188  union {
189  TokOp Tok;
190  ImmOp Imm;
191  RegOp Reg;
192  const MCExpr *Expr;
193  };
194 
195 public:
196  bool isToken() const override {
197  if (Kind == Token)
198  return true;
199 
200  // When parsing operands, we can't always tell if something was meant to be
201  // a token, like 'gds', or an expression that references a global variable.
202  // In this case, we assume the string is an expression, and if we need to
203  // interpret is a token, then we treat the symbol name as the token.
204  return isSymbolRefExpr();
205  }
206 
207  bool isSymbolRefExpr() const {
208  return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
209  }
210 
211  bool isImm() const override {
212  return Kind == Immediate;
213  }
214 
215  void setImmKindNone() const {
216  assert(isImm());
217  Imm.Kind = ImmKindTyNone;
218  }
219 
220  void setImmKindLiteral() const {
221  assert(isImm());
222  Imm.Kind = ImmKindTyLiteral;
223  }
224 
225  void setImmKindConst() const {
226  assert(isImm());
227  Imm.Kind = ImmKindTyConst;
228  }
229 
230  bool IsImmKindLiteral() const {
231  return isImm() && Imm.Kind == ImmKindTyLiteral;
232  }
233 
234  bool isImmKindConst() const {
235  return isImm() && Imm.Kind == ImmKindTyConst;
236  }
237 
238  bool isInlinableImm(MVT type) const;
239  bool isLiteralImm(MVT type) const;
240 
241  bool isRegKind() const {
242  return Kind == Register;
243  }
244 
245  bool isReg() const override {
246  return isRegKind() && !hasModifiers();
247  }
248 
249  bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
250  return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
251  }
252 
253  bool isRegOrImmWithInt16InputMods() const {
254  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
255  }
256 
257  bool isRegOrImmWithInt32InputMods() const {
258  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
259  }
260 
261  bool isRegOrImmWithInt64InputMods() const {
262  return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
263  }
264 
265  bool isRegOrImmWithFP16InputMods() const {
266  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
267  }
268 
269  bool isRegOrImmWithFP32InputMods() const {
270  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
271  }
272 
273  bool isRegOrImmWithFP64InputMods() const {
274  return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
275  }
276 
277  bool isVReg() const {
278  return isRegClass(AMDGPU::VGPR_32RegClassID) ||
279  isRegClass(AMDGPU::VReg_64RegClassID) ||
280  isRegClass(AMDGPU::VReg_96RegClassID) ||
281  isRegClass(AMDGPU::VReg_128RegClassID) ||
282  isRegClass(AMDGPU::VReg_160RegClassID) ||
283  isRegClass(AMDGPU::VReg_192RegClassID) ||
284  isRegClass(AMDGPU::VReg_256RegClassID) ||
285  isRegClass(AMDGPU::VReg_512RegClassID) ||
286  isRegClass(AMDGPU::VReg_1024RegClassID);
287  }
288 
289  bool isVReg32() const {
290  return isRegClass(AMDGPU::VGPR_32RegClassID);
291  }
292 
293  bool isVReg32OrOff() const {
294  return isOff() || isVReg32();
295  }
296 
297  bool isNull() const {
298  return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
299  }
300 
301  bool isVRegWithInputMods() const;
302 
303  bool isSDWAOperand(MVT type) const;
304  bool isSDWAFP16Operand() const;
305  bool isSDWAFP32Operand() const;
306  bool isSDWAInt16Operand() const;
307  bool isSDWAInt32Operand() const;
308 
309  bool isImmTy(ImmTy ImmT) const {
310  return isImm() && Imm.Type == ImmT;
311  }
312 
313  bool isImmModifier() const {
314  return isImm() && Imm.Type != ImmTyNone;
315  }
316 
317  bool isClampSI() const { return isImmTy(ImmTyClampSI); }
318  bool isOModSI() const { return isImmTy(ImmTyOModSI); }
319  bool isDMask() const { return isImmTy(ImmTyDMask); }
320  bool isDim() const { return isImmTy(ImmTyDim); }
321  bool isUNorm() const { return isImmTy(ImmTyUNorm); }
322  bool isDA() const { return isImmTy(ImmTyDA); }
323  bool isR128A16() const { return isImmTy(ImmTyR128A16); }
324  bool isGFX10A16() const { return isImmTy(ImmTyA16); }
325  bool isLWE() const { return isImmTy(ImmTyLWE); }
326  bool isOff() const { return isImmTy(ImmTyOff); }
327  bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
328  bool isExpVM() const { return isImmTy(ImmTyExpVM); }
329  bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
330  bool isOffen() const { return isImmTy(ImmTyOffen); }
331  bool isIdxen() const { return isImmTy(ImmTyIdxen); }
332  bool isAddr64() const { return isImmTy(ImmTyAddr64); }
333  bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
334  bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
335  bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
336 
337  bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
338  bool isGDS() const { return isImmTy(ImmTyGDS); }
339  bool isLDS() const { return isImmTy(ImmTyLDS); }
340  bool isCPol() const { return isImmTy(ImmTyCPol); }
341  bool isSWZ() const { return isImmTy(ImmTySWZ); }
342  bool isTFE() const { return isImmTy(ImmTyTFE); }
343  bool isD16() const { return isImmTy(ImmTyD16); }
344  bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
345  bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
346  bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
347  bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
348  bool isFI() const { return isImmTy(ImmTyDppFi); }
349  bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
350  bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
351  bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
352  bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
353  bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
354  bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
355  bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
356  bool isOpSel() const { return isImmTy(ImmTyOpSel); }
357  bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
358  bool isNegLo() const { return isImmTy(ImmTyNegLo); }
359  bool isNegHi() const { return isImmTy(ImmTyNegHi); }
360  bool isHigh() const { return isImmTy(ImmTyHigh); }
361 
362  bool isMod() const {
363  return isClampSI() || isOModSI();
364  }
365 
366  bool isRegOrImm() const {
367  return isReg() || isImm();
368  }
369 
370  bool isRegClass(unsigned RCID) const;
371 
372  bool isInlineValue() const;
373 
374  bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
375  return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
376  }
377 
378  bool isSCSrcB16() const {
379  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
380  }
381 
382  bool isSCSrcV2B16() const {
383  return isSCSrcB16();
384  }
385 
386  bool isSCSrcB32() const {
387  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
388  }
389 
390  bool isSCSrcB64() const {
391  return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
392  }
393 
394  bool isBoolReg() const;
395 
396  bool isSCSrcF16() const {
397  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
398  }
399 
400  bool isSCSrcV2F16() const {
401  return isSCSrcF16();
402  }
403 
404  bool isSCSrcF32() const {
405  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
406  }
407 
408  bool isSCSrcF64() const {
409  return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
410  }
411 
412  bool isSSrcB32() const {
413  return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
414  }
415 
416  bool isSSrcB16() const {
417  return isSCSrcB16() || isLiteralImm(MVT::i16);
418  }
419 
420  bool isSSrcV2B16() const {
421  llvm_unreachable("cannot happen");
422  return isSSrcB16();
423  }
424 
425  bool isSSrcB64() const {
426  // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
427  // See isVSrc64().
428  return isSCSrcB64() || isLiteralImm(MVT::i64);
429  }
430 
431  bool isSSrcF32() const {
432  return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
433  }
434 
435  bool isSSrcF64() const {
436  return isSCSrcB64() || isLiteralImm(MVT::f64);
437  }
438 
439  bool isSSrcF16() const {
440  return isSCSrcB16() || isLiteralImm(MVT::f16);
441  }
442 
443  bool isSSrcV2F16() const {
444  llvm_unreachable("cannot happen");
445  return isSSrcF16();
446  }
447 
448  bool isSSrcV2FP32() const {
449  llvm_unreachable("cannot happen");
450  return isSSrcF32();
451  }
452 
453  bool isSCSrcV2FP32() const {
454  llvm_unreachable("cannot happen");
455  return isSCSrcF32();
456  }
457 
458  bool isSSrcV2INT32() const {
459  llvm_unreachable("cannot happen");
460  return isSSrcB32();
461  }
462 
463  bool isSCSrcV2INT32() const {
464  llvm_unreachable("cannot happen");
465  return isSCSrcB32();
466  }
467 
468  bool isSSrcOrLdsB32() const {
469  return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
470  isLiteralImm(MVT::i32) || isExpr();
471  }
472 
473  bool isVCSrcB32() const {
474  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
475  }
476 
477  bool isVCSrcB64() const {
478  return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
479  }
480 
481  bool isVCSrcB16() const {
482  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
483  }
484 
485  bool isVCSrcV2B16() const {
486  return isVCSrcB16();
487  }
488 
489  bool isVCSrcF32() const {
490  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
491  }
492 
493  bool isVCSrcF64() const {
494  return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
495  }
496 
497  bool isVCSrcF16() const {
498  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
499  }
500 
501  bool isVCSrcV2F16() const {
502  return isVCSrcF16();
503  }
504 
505  bool isVSrcB32() const {
506  return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
507  }
508 
509  bool isVSrcB64() const {
510  return isVCSrcF64() || isLiteralImm(MVT::i64);
511  }
512 
513  bool isVSrcB16() const {
514  return isVCSrcB16() || isLiteralImm(MVT::i16);
515  }
516 
517  bool isVSrcV2B16() const {
518  return isVSrcB16() || isLiteralImm(MVT::v2i16);
519  }
520 
521  bool isVCSrcV2FP32() const {
522  return isVCSrcF64();
523  }
524 
525  bool isVSrcV2FP32() const {
526  return isVSrcF64() || isLiteralImm(MVT::v2f32);
527  }
528 
529  bool isVCSrcV2INT32() const {
530  return isVCSrcB64();
531  }
532 
533  bool isVSrcV2INT32() const {
534  return isVSrcB64() || isLiteralImm(MVT::v2i32);
535  }
536 
537  bool isVSrcF32() const {
538  return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
539  }
540 
541  bool isVSrcF64() const {
542  return isVCSrcF64() || isLiteralImm(MVT::f64);
543  }
544 
545  bool isVSrcF16() const {
546  return isVCSrcF16() || isLiteralImm(MVT::f16);
547  }
548 
549  bool isVSrcV2F16() const {
550  return isVSrcF16() || isLiteralImm(MVT::v2f16);
551  }
552 
553  bool isVISrcB32() const {
554  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
555  }
556 
557  bool isVISrcB16() const {
558  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
559  }
560 
561  bool isVISrcV2B16() const {
562  return isVISrcB16();
563  }
564 
565  bool isVISrcF32() const {
566  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
567  }
568 
569  bool isVISrcF16() const {
570  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
571  }
572 
573  bool isVISrcV2F16() const {
574  return isVISrcF16() || isVISrcB32();
575  }
576 
577  bool isVISrc_64B64() const {
578  return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
579  }
580 
581  bool isVISrc_64F64() const {
582  return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
583  }
584 
585  bool isVISrc_64V2FP32() const {
586  return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
587  }
588 
589  bool isVISrc_64V2INT32() const {
590  return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
591  }
592 
593  bool isVISrc_256B64() const {
594  return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
595  }
596 
597  bool isVISrc_256F64() const {
598  return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
599  }
600 
601  bool isVISrc_128B16() const {
602  return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
603  }
604 
605  bool isVISrc_128V2B16() const {
606  return isVISrc_128B16();
607  }
608 
609  bool isVISrc_128B32() const {
610  return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
611  }
612 
613  bool isVISrc_128F32() const {
614  return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
615  }
616 
617  bool isVISrc_256V2FP32() const {
618  return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
619  }
620 
621  bool isVISrc_256V2INT32() const {
622  return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
623  }
624 
625  bool isVISrc_512B32() const {
626  return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
627  }
628 
629  bool isVISrc_512B16() const {
630  return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
631  }
632 
633  bool isVISrc_512V2B16() const {
634  return isVISrc_512B16();
635  }
636 
637  bool isVISrc_512F32() const {
638  return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
639  }
640 
641  bool isVISrc_512F16() const {
642  return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
643  }
644 
645  bool isVISrc_512V2F16() const {
646  return isVISrc_512F16() || isVISrc_512B32();
647  }
648 
649  bool isVISrc_1024B32() const {
650  return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
651  }
652 
653  bool isVISrc_1024B16() const {
654  return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
655  }
656 
657  bool isVISrc_1024V2B16() const {
658  return isVISrc_1024B16();
659  }
660 
661  bool isVISrc_1024F32() const {
662  return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
663  }
664 
665  bool isVISrc_1024F16() const {
666  return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
667  }
668 
669  bool isVISrc_1024V2F16() const {
670  return isVISrc_1024F16() || isVISrc_1024B32();
671  }
672 
673  bool isAISrcB32() const {
674  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
675  }
676 
677  bool isAISrcB16() const {
678  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
679  }
680 
681  bool isAISrcV2B16() const {
682  return isAISrcB16();
683  }
684 
685  bool isAISrcF32() const {
686  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
687  }
688 
689  bool isAISrcF16() const {
690  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
691  }
692 
693  bool isAISrcV2F16() const {
694  return isAISrcF16() || isAISrcB32();
695  }
696 
697  bool isAISrc_64B64() const {
698  return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
699  }
700 
701  bool isAISrc_64F64() const {
702  return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
703  }
704 
705  bool isAISrc_128B32() const {
706  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
707  }
708 
709  bool isAISrc_128B16() const {
710  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
711  }
712 
713  bool isAISrc_128V2B16() const {
714  return isAISrc_128B16();
715  }
716 
717  bool isAISrc_128F32() const {
718  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
719  }
720 
721  bool isAISrc_128F16() const {
722  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
723  }
724 
725  bool isAISrc_128V2F16() const {
726  return isAISrc_128F16() || isAISrc_128B32();
727  }
728 
729  bool isVISrc_128F16() const {
730  return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
731  }
732 
733  bool isVISrc_128V2F16() const {
734  return isVISrc_128F16() || isVISrc_128B32();
735  }
736 
737  bool isAISrc_256B64() const {
738  return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
739  }
740 
741  bool isAISrc_256F64() const {
742  return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
743  }
744 
745  bool isAISrc_512B32() const {
746  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
747  }
748 
749  bool isAISrc_512B16() const {
750  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
751  }
752 
753  bool isAISrc_512V2B16() const {
754  return isAISrc_512B16();
755  }
756 
757  bool isAISrc_512F32() const {
758  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
759  }
760 
761  bool isAISrc_512F16() const {
762  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
763  }
764 
765  bool isAISrc_512V2F16() const {
766  return isAISrc_512F16() || isAISrc_512B32();
767  }
768 
769  bool isAISrc_1024B32() const {
770  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
771  }
772 
773  bool isAISrc_1024B16() const {
774  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
775  }
776 
777  bool isAISrc_1024V2B16() const {
778  return isAISrc_1024B16();
779  }
780 
781  bool isAISrc_1024F32() const {
782  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
783  }
784 
785  bool isAISrc_1024F16() const {
786  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
787  }
788 
789  bool isAISrc_1024V2F16() const {
790  return isAISrc_1024F16() || isAISrc_1024B32();
791  }
792 
793  bool isKImmFP32() const {
794  return isLiteralImm(MVT::f32);
795  }
796 
797  bool isKImmFP16() const {
798  return isLiteralImm(MVT::f16);
799  }
800 
801  bool isMem() const override {
802  return false;
803  }
804 
805  bool isExpr() const {
806  return Kind == Expression;
807  }
808 
809  bool isSoppBrTarget() const {
810  return isExpr() || isImm();
811  }
812 
813  bool isSWaitCnt() const;
814  bool isHwreg() const;
815  bool isSendMsg() const;
816  bool isSwizzle() const;
817  bool isSMRDOffset8() const;
818  bool isSMEMOffset() const;
819  bool isSMRDLiteralOffset() const;
820  bool isDPP8() const;
821  bool isDPPCtrl() const;
822  bool isBLGP() const;
823  bool isCBSZ() const;
824  bool isABID() const;
825  bool isGPRIdxMode() const;
826  bool isS16Imm() const;
827  bool isU16Imm() const;
828  bool isEndpgm() const;
829 
830  StringRef getExpressionAsToken() const {
831  assert(isExpr());
832  const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
833  return S->getSymbol().getName();
834  }
835 
836  StringRef getToken() const {
837  assert(isToken());
838 
839  if (Kind == Expression)
840  return getExpressionAsToken();
841 
842  return StringRef(Tok.Data, Tok.Length);
843  }
844 
845  int64_t getImm() const {
846  assert(isImm());
847  return Imm.Val;
848  }
849 
850  void setImm(int64_t Val) {
851  assert(isImm());
852  Imm.Val = Val;
853  }
854 
855  ImmTy getImmTy() const {
856  assert(isImm());
857  return Imm.Type;
858  }
859 
860  unsigned getReg() const override {
861  assert(isRegKind());
862  return Reg.RegNo;
863  }
864 
865  SMLoc getStartLoc() const override {
866  return StartLoc;
867  }
868 
869  SMLoc getEndLoc() const override {
870  return EndLoc;
871  }
872 
873  SMRange getLocRange() const {
874  return SMRange(StartLoc, EndLoc);
875  }
876 
877  Modifiers getModifiers() const {
878  assert(isRegKind() || isImmTy(ImmTyNone));
879  return isRegKind() ? Reg.Mods : Imm.Mods;
880  }
881 
882  void setModifiers(Modifiers Mods) {
883  assert(isRegKind() || isImmTy(ImmTyNone));
884  if (isRegKind())
885  Reg.Mods = Mods;
886  else
887  Imm.Mods = Mods;
888  }
889 
890  bool hasModifiers() const {
891  return getModifiers().hasModifiers();
892  }
893 
894  bool hasFPModifiers() const {
895  return getModifiers().hasFPModifiers();
896  }
897 
898  bool hasIntModifiers() const {
899  return getModifiers().hasIntModifiers();
900  }
901 
902  uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
903 
904  void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
905 
906  void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
907 
908  template <unsigned Bitwidth>
909  void addKImmFPOperands(MCInst &Inst, unsigned N) const;
910 
911  void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
912  addKImmFPOperands<16>(Inst, N);
913  }
914 
915  void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
916  addKImmFPOperands<32>(Inst, N);
917  }
918 
919  void addRegOperands(MCInst &Inst, unsigned N) const;
920 
921  void addBoolRegOperands(MCInst &Inst, unsigned N) const {
922  addRegOperands(Inst, N);
923  }
924 
925  void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
926  if (isRegKind())
927  addRegOperands(Inst, N);
928  else if (isExpr())
929  Inst.addOperand(MCOperand::createExpr(Expr));
930  else
931  addImmOperands(Inst, N);
932  }
933 
934  void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
935  Modifiers Mods = getModifiers();
936  Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
937  if (isRegKind()) {
938  addRegOperands(Inst, N);
939  } else {
940  addImmOperands(Inst, N, false);
941  }
942  }
943 
944  void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
945  assert(!hasIntModifiers());
946  addRegOrImmWithInputModsOperands(Inst, N);
947  }
948 
949  void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
950  assert(!hasFPModifiers());
951  addRegOrImmWithInputModsOperands(Inst, N);
952  }
953 
954  void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
955  Modifiers Mods = getModifiers();
956  Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
957  assert(isRegKind());
958  addRegOperands(Inst, N);
959  }
960 
961  void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
962  assert(!hasIntModifiers());
963  addRegWithInputModsOperands(Inst, N);
964  }
965 
966  void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
967  assert(!hasFPModifiers());
968  addRegWithInputModsOperands(Inst, N);
969  }
970 
971  void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
972  if (isImm())
973  addImmOperands(Inst, N);
974  else {
975  assert(isExpr());
976  Inst.addOperand(MCOperand::createExpr(Expr));
977  }
978  }
979 
980  static void printImmTy(raw_ostream& OS, ImmTy Type) {
981  switch (Type) {
982  case ImmTyNone: OS << "None"; break;
983  case ImmTyGDS: OS << "GDS"; break;
984  case ImmTyLDS: OS << "LDS"; break;
985  case ImmTyOffen: OS << "Offen"; break;
986  case ImmTyIdxen: OS << "Idxen"; break;
987  case ImmTyAddr64: OS << "Addr64"; break;
988  case ImmTyOffset: OS << "Offset"; break;
989  case ImmTyInstOffset: OS << "InstOffset"; break;
990  case ImmTyOffset0: OS << "Offset0"; break;
991  case ImmTyOffset1: OS << "Offset1"; break;
992  case ImmTyCPol: OS << "CPol"; break;
993  case ImmTySWZ: OS << "SWZ"; break;
994  case ImmTyTFE: OS << "TFE"; break;
995  case ImmTyD16: OS << "D16"; break;
996  case ImmTyFORMAT: OS << "FORMAT"; break;
997  case ImmTyClampSI: OS << "ClampSI"; break;
998  case ImmTyOModSI: OS << "OModSI"; break;
999  case ImmTyDPP8: OS << "DPP8"; break;
1000  case ImmTyDppCtrl: OS << "DppCtrl"; break;
1001  case ImmTyDppRowMask: OS << "DppRowMask"; break;
1002  case ImmTyDppBankMask: OS << "DppBankMask"; break;
1003  case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1004  case ImmTyDppFi: OS << "FI"; break;
1005  case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1006  case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1007  case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1008  case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1009  case ImmTyDMask: OS << "DMask"; break;
1010  case ImmTyDim: OS << "Dim"; break;
1011  case ImmTyUNorm: OS << "UNorm"; break;
1012  case ImmTyDA: OS << "DA"; break;
1013  case ImmTyR128A16: OS << "R128A16"; break;
1014  case ImmTyA16: OS << "A16"; break;
1015  case ImmTyLWE: OS << "LWE"; break;
1016  case ImmTyOff: OS << "Off"; break;
1017  case ImmTyExpTgt: OS << "ExpTgt"; break;
1018  case ImmTyExpCompr: OS << "ExpCompr"; break;
1019  case ImmTyExpVM: OS << "ExpVM"; break;
1020  case ImmTyHwreg: OS << "Hwreg"; break;
1021  case ImmTySendMsg: OS << "SendMsg"; break;
1022  case ImmTyInterpSlot: OS << "InterpSlot"; break;
1023  case ImmTyInterpAttr: OS << "InterpAttr"; break;
1024  case ImmTyAttrChan: OS << "AttrChan"; break;
1025  case ImmTyOpSel: OS << "OpSel"; break;
1026  case ImmTyOpSelHi: OS << "OpSelHi"; break;
1027  case ImmTyNegLo: OS << "NegLo"; break;
1028  case ImmTyNegHi: OS << "NegHi"; break;
1029  case ImmTySwizzle: OS << "Swizzle"; break;
1030  case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1031  case ImmTyHigh: OS << "High"; break;
1032  case ImmTyBLGP: OS << "BLGP"; break;
1033  case ImmTyCBSZ: OS << "CBSZ"; break;
1034  case ImmTyABID: OS << "ABID"; break;
1035  case ImmTyEndpgm: OS << "Endpgm"; break;
1036  }
1037  }
1038 
1039  void print(raw_ostream &OS) const override {
1040  switch (Kind) {
1041  case Register:
1042  OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1043  break;
1044  case Immediate:
1045  OS << '<' << getImm();
1046  if (getImmTy() != ImmTyNone) {
1047  OS << " type: "; printImmTy(OS, getImmTy());
1048  }
1049  OS << " mods: " << Imm.Mods << '>';
1050  break;
1051  case Token:
1052  OS << '\'' << getToken() << '\'';
1053  break;
1054  case Expression:
1055  OS << "<expr " << *Expr << '>';
1056  break;
1057  }
1058  }
1059 
1060  static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1061  int64_t Val, SMLoc Loc,
1062  ImmTy Type = ImmTyNone,
1063  bool IsFPImm = false) {
1064  auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1065  Op->Imm.Val = Val;
1066  Op->Imm.IsFPImm = IsFPImm;
1067  Op->Imm.Kind = ImmKindTyNone;
1068  Op->Imm.Type = Type;
1069  Op->Imm.Mods = Modifiers();
1070  Op->StartLoc = Loc;
1071  Op->EndLoc = Loc;
1072  return Op;
1073  }
1074 
1075  static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1076  StringRef Str, SMLoc Loc,
1077  bool HasExplicitEncodingSize = true) {
1078  auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1079  Res->Tok.Data = Str.data();
1080  Res->Tok.Length = Str.size();
1081  Res->StartLoc = Loc;
1082  Res->EndLoc = Loc;
1083  return Res;
1084  }
1085 
1086  static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1087  unsigned RegNo, SMLoc S,
1088  SMLoc E) {
1089  auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1090  Op->Reg.RegNo = RegNo;
1091  Op->Reg.Mods = Modifiers();
1092  Op->StartLoc = S;
1093  Op->EndLoc = E;
1094  return Op;
1095  }
1096 
1097  static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1098  const class MCExpr *Expr, SMLoc S) {
1099  auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1100  Op->Expr = Expr;
1101  Op->StartLoc = S;
1102  Op->EndLoc = S;
1103  return Op;
1104  }
1105 };
1106 
1107 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1108  OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1109  return OS;
1110 }
1111 
1112 //===----------------------------------------------------------------------===//
1113 // AsmParser
1114 //===----------------------------------------------------------------------===//
1115 
1116 // Holds info related to the current kernel, e.g. count of SGPRs used.
1117 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1118 // .amdgpu_hsa_kernel or at EOF.
1119 class KernelScopeInfo {
1120  int SgprIndexUnusedMin = -1;
1121  int VgprIndexUnusedMin = -1;
1122  MCContext *Ctx = nullptr;
1123 
1124  void usesSgprAt(int i) {
1125  if (i >= SgprIndexUnusedMin) {
1126  SgprIndexUnusedMin = ++i;
1127  if (Ctx) {
1128  MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1129  Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1130  }
1131  }
1132  }
1133 
1134  void usesVgprAt(int i) {
1135  if (i >= VgprIndexUnusedMin) {
1136  VgprIndexUnusedMin = ++i;
1137  if (Ctx) {
1138  MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1139  Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
1140  }
1141  }
1142  }
1143 
1144 public:
1145  KernelScopeInfo() = default;
1146 
1147  void initialize(MCContext &Context) {
1148  Ctx = &Context;
1149  usesSgprAt(SgprIndexUnusedMin = -1);
1150  usesVgprAt(VgprIndexUnusedMin = -1);
1151  }
1152 
1153  void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
1154  switch (RegKind) {
1155  case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
1156  case IS_AGPR: // fall through
1157  case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1158  default: break;
1159  }
1160  }
1161 };
1162 
1163 class AMDGPUAsmParser : public MCTargetAsmParser {
1164  MCAsmParser &Parser;
1165 
1166  // Number of extra operands parsed after the first optional operand.
1167  // This may be necessary to skip hardcoded mandatory operands.
1168  static const unsigned MAX_OPR_LOOKAHEAD = 8;
1169 
1170  unsigned ForcedEncodingSize = 0;
1171  bool ForcedDPP = false;
1172  bool ForcedSDWA = false;
1173  KernelScopeInfo KernelScope;
1174  unsigned CPolSeen;
1175 
1176  /// @name Auto-generated Match Functions
1177  /// {
1178 
1179 #define GET_ASSEMBLER_HEADER
1180 #include "AMDGPUGenAsmMatcher.inc"
1181 
1182  /// }
1183 
1184 private:
1185  bool ParseAsAbsoluteExpression(uint32_t &Ret);
1186  bool OutOfRangeError(SMRange Range);
1187  /// Calculate VGPR/SGPR blocks required for given target, reserved
1188  /// registers, and user-specified NextFreeXGPR values.
1189  ///
1190  /// \param Features [in] Target features, used for bug corrections.
1191  /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1192  /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1193  /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1194  /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1195  /// descriptor field, if valid.
1196  /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1197  /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1198  /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1199  /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1200  /// \param VGPRBlocks [out] Result VGPR block count.
1201  /// \param SGPRBlocks [out] Result SGPR block count.
1202  bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1203  bool FlatScrUsed, bool XNACKUsed,
1204  Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1205  SMRange VGPRRange, unsigned NextFreeSGPR,
1206  SMRange SGPRRange, unsigned &VGPRBlocks,
1207  unsigned &SGPRBlocks);
1208  bool ParseDirectiveAMDGCNTarget();
1209  bool ParseDirectiveAMDHSAKernel();
1210  bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1211  bool ParseDirectiveHSACodeObjectVersion();
1212  bool ParseDirectiveHSACodeObjectISA();
1213  bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1214  bool ParseDirectiveAMDKernelCodeT();
1215  // TODO: Possibly make subtargetHasRegister const.
1216  bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1217  bool ParseDirectiveAMDGPUHsaKernel();
1218 
1219  bool ParseDirectiveISAVersion();
1220  bool ParseDirectiveHSAMetadata();
1221  bool ParseDirectivePALMetadataBegin();
1222  bool ParseDirectivePALMetadata();
1223  bool ParseDirectiveAMDGPULDS();
1224 
1225  /// Common code to parse out a block of text (typically YAML) between start and
1226  /// end directives.
1227  bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1228  const char *AssemblerDirectiveEnd,
1229  std::string &CollectString);
1230 
1231  bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1232  RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1233  bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1234  unsigned &RegNum, unsigned &RegWidth,
1235  bool RestoreOnFailure = false);
1236  bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1237  unsigned &RegNum, unsigned &RegWidth,
1238  SmallVectorImpl<AsmToken> &Tokens);
1239  unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1240  unsigned &RegWidth,
1241  SmallVectorImpl<AsmToken> &Tokens);
1242  unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1243  unsigned &RegWidth,
1244  SmallVectorImpl<AsmToken> &Tokens);
1245  unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1246  unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1247  bool ParseRegRange(unsigned& Num, unsigned& Width);
1248  unsigned getRegularReg(RegisterKind RegKind,
1249  unsigned RegNum,
1250  unsigned RegWidth,
1251  SMLoc Loc);
1252 
1253  bool isRegister();
1254  bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1255  Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1256  void initializeGprCountSymbol(RegisterKind RegKind);
1257  bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1258  unsigned RegWidth);
1259  void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1260  bool IsAtomic, bool IsLds = false);
1261  void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1262  bool IsGdsHardcoded);
1263 
1264 public:
1265  enum AMDGPUMatchResultTy {
1266  Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1267  };
1268  enum OperandMode {
1269  OperandMode_Default,
1270  OperandMode_NSA,
1271  };
1272 
1273  using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1274 
1275  AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1276  const MCInstrInfo &MII,
1277  const MCTargetOptions &Options)
1278  : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1279  MCAsmParserExtension::Initialize(Parser);
1280 
1281  if (getFeatureBits().none()) {
1282  // Set default features.
1283  copySTI().ToggleFeature("southern-islands");
1284  }
1285 
1286  setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1287 
1288  {
1289  // TODO: make those pre-defined variables read-only.
1290  // Currently there is none suitable machinery in the core llvm-mc for this.
1291  // MCSymbol::isRedefinable is intended for another purpose, and
1292  // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1293  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1294  MCContext &Ctx = getContext();
1295  if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
1296  MCSymbol *Sym =
1297  Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1298  Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1299  Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1300  Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1301  Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1302  Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1303  } else {
1304  MCSymbol *Sym =
1305  Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1306  Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1307  Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1308  Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1309  Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1310  Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1311  }
1312  if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
1313  initializeGprCountSymbol(IS_VGPR);
1314  initializeGprCountSymbol(IS_SGPR);
1315  } else
1316  KernelScope.initialize(getContext());
1317  }
1318  }
1319 
1320  bool hasMIMG_R128() const {
1321  return AMDGPU::hasMIMG_R128(getSTI());
1322  }
1323 
1324  bool hasPackedD16() const {
1325  return AMDGPU::hasPackedD16(getSTI());
1326  }
1327 
1328  bool hasGFX10A16() const {
1329  return AMDGPU::hasGFX10A16(getSTI());
1330  }
1331 
1332  bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1333 
1334  bool isSI() const {
1335  return AMDGPU::isSI(getSTI());
1336  }
1337 
1338  bool isCI() const {
1339  return AMDGPU::isCI(getSTI());
1340  }
1341 
1342  bool isVI() const {
1343  return AMDGPU::isVI(getSTI());
1344  }
1345 
1346  bool isGFX9() const {
1347  return AMDGPU::isGFX9(getSTI());
1348  }
1349 
1350  bool isGFX90A() const {
1351  return AMDGPU::isGFX90A(getSTI());
1352  }
1353 
1354  bool isGFX9Plus() const {
1355  return AMDGPU::isGFX9Plus(getSTI());
1356  }
1357 
1358  bool isGFX10() const {
1359  return AMDGPU::isGFX10(getSTI());
1360  }
1361 
1362  bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1363 
1364  bool isGFX10_BEncoding() const {
1365  return AMDGPU::isGFX10_BEncoding(getSTI());
1366  }
1367 
1368  bool hasInv2PiInlineImm() const {
1369  return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1370  }
1371 
1372  bool hasFlatOffsets() const {
1373  return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1374  }
1375 
1376  bool hasArchitectedFlatScratch() const {
1377  return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1378  }
1379 
1380  bool hasSGPR102_SGPR103() const {
1381  return !isVI() && !isGFX9();
1382  }
1383 
1384  bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1385 
1386  bool hasIntClamp() const {
1387  return getFeatureBits()[AMDGPU::FeatureIntClamp];
1388  }
1389 
1390  AMDGPUTargetStreamer &getTargetStreamer() {
1391  MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1392  return static_cast<AMDGPUTargetStreamer &>(TS);
1393  }
1394 
1395  const MCRegisterInfo *getMRI() const {
1396  // We need this const_cast because for some reason getContext() is not const
1397  // in MCAsmParser.
1398  return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1399  }
1400 
1401  const MCInstrInfo *getMII() const {
1402  return &MII;
1403  }
1404 
1405  const FeatureBitset &getFeatureBits() const {
1406  return getSTI().getFeatureBits();
1407  }
1408 
1409  void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1410  void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1411  void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1412 
1413  unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1414  bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1415  bool isForcedDPP() const { return ForcedDPP; }
1416  bool isForcedSDWA() const { return ForcedSDWA; }
1417  ArrayRef<unsigned> getMatchedVariants() const;
1418  StringRef getMatchedVariantName() const;
1419 
1420  std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1421  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1422  bool RestoreOnFailure);
1423  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1424  OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1425  SMLoc &EndLoc) override;
1426  unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1427  unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1428  unsigned Kind) override;
1429  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1430  OperandVector &Operands, MCStreamer &Out,
1431  uint64_t &ErrorInfo,
1432  bool MatchingInlineAsm) override;
1433  bool ParseDirective(AsmToken DirectiveID) override;
1434  OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1435  OperandMode Mode = OperandMode_Default);
1436  StringRef parseMnemonicSuffix(StringRef Name);
1437  bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1438  SMLoc NameLoc, OperandVector &Operands) override;
1439  //bool ProcessInstruction(MCInst &Inst);
1440 
1441  OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1442 
1443  OperandMatchResultTy
1444  parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1445  AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1446  bool (*ConvertResult)(int64_t &) = nullptr);
1447 
1448  OperandMatchResultTy
1449  parseOperandArrayWithPrefix(const char *Prefix,
1450  OperandVector &Operands,
1451  AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1452  bool (*ConvertResult)(int64_t&) = nullptr);
1453 
1454  OperandMatchResultTy
1455  parseNamedBit(StringRef Name, OperandVector &Operands,
1456  AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1457  OperandMatchResultTy parseCPol(OperandVector &Operands);
1458  OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1459  StringRef &Value,
1460  SMLoc &StringLoc);
1461 
1462  bool isModifier();
1463  bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1464  bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1465  bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1466  bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1467  bool parseSP3NegModifier();
1468  OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1469  OperandMatchResultTy parseReg(OperandVector &Operands);
1470  OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1471  OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1472  OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1473  OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1474  OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1475  OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1476  OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1477  OperandMatchResultTy parseUfmt(int64_t &Format);
1478  OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1479  OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1480  OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1481  OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1482  OperandMatchResultTy parseNumericFormat(int64_t &Format);
1483  bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1484  bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1485 
1486  void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1487  void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1488  void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1489  void cvtExp(MCInst &Inst, const OperandVector &Operands);
1490 
1491  bool parseCnt(int64_t &IntVal);
1492  OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1493  OperandMatchResultTy parseHwreg(OperandVector &Operands);
1494 
1495 private:
1496  struct OperandInfoTy {
1497  SMLoc Loc;
1498  int64_t Id;
1499  bool IsSymbolic = false;
1500  bool IsDefined = false;
1501 
1502  OperandInfoTy(int64_t Id_) : Id(Id_) {}
1503  };
1504 
1505  bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1506  bool validateSendMsg(const OperandInfoTy &Msg,
1507  const OperandInfoTy &Op,
1508  const OperandInfoTy &Stream);
1509 
1510  bool parseHwregBody(OperandInfoTy &HwReg,
1511  OperandInfoTy &Offset,
1512  OperandInfoTy &Width);
1513  bool validateHwreg(const OperandInfoTy &HwReg,
1514  const OperandInfoTy &Offset,
1515  const OperandInfoTy &Width);
1516 
1517  SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1518  SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1519 
1520  SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1521  const OperandVector &Operands) const;
1522  SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1523  SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1524  SMLoc getLitLoc(const OperandVector &Operands) const;
1525  SMLoc getConstLoc(const OperandVector &Operands) const;
1526 
1527  bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1528  bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1529  bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1530  bool validateSOPLiteral(const MCInst &Inst) const;
1531  bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1532  bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1533  bool validateIntClampSupported(const MCInst &Inst);
1534  bool validateMIMGAtomicDMask(const MCInst &Inst);
1535  bool validateMIMGGatherDMask(const MCInst &Inst);
1536  bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1537  bool validateMIMGDataSize(const MCInst &Inst);
1538  bool validateMIMGAddrSize(const MCInst &Inst);
1539  bool validateMIMGD16(const MCInst &Inst);
1540  bool validateMIMGDim(const MCInst &Inst);
1541  bool validateMIMGMSAA(const MCInst &Inst);
1542  bool validateOpSel(const MCInst &Inst);
1543  bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1544  bool validateVccOperand(unsigned Reg) const;
1545  bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1546  bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1547  bool validateAGPRLdSt(const MCInst &Inst) const;
1548  bool validateVGPRAlign(const MCInst &Inst) const;
1549  bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1550  bool validateDivScale(const MCInst &Inst);
1551  bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1552  const SMLoc &IDLoc);
1553  Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1554  unsigned getConstantBusLimit(unsigned Opcode) const;
1555  bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1556  bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1557  unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1558 
1559  bool isSupportedMnemo(StringRef Mnemo,
1560  const FeatureBitset &FBS);
1561  bool isSupportedMnemo(StringRef Mnemo,
1562  const FeatureBitset &FBS,
1563  ArrayRef<unsigned> Variants);
1564  bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1565 
1566  bool isId(const StringRef Id) const;
1567  bool isId(const AsmToken &Token, const StringRef Id) const;
1568  bool isToken(const AsmToken::TokenKind Kind) const;
1569  bool trySkipId(const StringRef Id);
1570  bool trySkipId(const StringRef Pref, const StringRef Id);
1571  bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1572  bool trySkipToken(const AsmToken::TokenKind Kind);
1573  bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1574  bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1575  bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1576 
1577  void peekTokens(MutableArrayRef<AsmToken> Tokens);
1578  AsmToken::TokenKind getTokenKind() const;
1579  bool parseExpr(int64_t &Imm, StringRef Expected = "");
1580  bool parseExpr(OperandVector &Operands);
1581  StringRef getTokenStr() const;
1582  AsmToken peekToken();
1583  AsmToken getToken() const;
1584  SMLoc getLoc() const;
1585  void lex();
1586 
1587 public:
1588  void onBeginOfFile() override;
1589 
1590  OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1591  OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1592 
1593  OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1594  OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1595  OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1596  OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1597  OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1598  OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1599 
1600  bool parseSwizzleOperand(int64_t &Op,
1601  const unsigned MinVal,
1602  const unsigned MaxVal,
1603  const StringRef ErrMsg,
1604  SMLoc &Loc);
1605  bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1606  const unsigned MinVal,
1607  const unsigned MaxVal,
1608  const StringRef ErrMsg);
1609  OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1610  bool parseSwizzleOffset(int64_t &Imm);
1611  bool parseSwizzleMacro(int64_t &Imm);
1612  bool parseSwizzleQuadPerm(int64_t &Imm);
1613  bool parseSwizzleBitmaskPerm(int64_t &Imm);
1614  bool parseSwizzleBroadcast(int64_t &Imm);
1615  bool parseSwizzleSwap(int64_t &Imm);
1616  bool parseSwizzleReverse(int64_t &Imm);
1617 
1618  OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1619  int64_t parseGPRIdxMacro();
1620 
1621  void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1622  void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1623  void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1624  void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1625 
1626  AMDGPUOperand::Ptr defaultCPol() const;
1627 
1628  AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1629  AMDGPUOperand::Ptr defaultSMEMOffset() const;
1630  AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1631  AMDGPUOperand::Ptr defaultFlatOffset() const;
1632 
1633  OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1634 
1635  void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1636  OptionalImmIndexMap &OptionalIdx);
1637  void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1638  void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1639  void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1640  void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1641  OptionalImmIndexMap &OptionalIdx);
1642 
1643  void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1644 
1645  void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1646  bool IsAtomic = false);
1647  void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1648  void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1649 
1650  void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1651 
1652  bool parseDimId(unsigned &Encoding);
1653  OperandMatchResultTy parseDim(OperandVector &Operands);
1654  OperandMatchResultTy parseDPP8(OperandVector &Operands);
1655  OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1656  bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1657  int64_t parseDPPCtrlSel(StringRef Ctrl);
1658  int64_t parseDPPCtrlPerm();
1659  AMDGPUOperand::Ptr defaultRowMask() const;
1660  AMDGPUOperand::Ptr defaultBankMask() const;
1661  AMDGPUOperand::Ptr defaultBoundCtrl() const;
1662  AMDGPUOperand::Ptr defaultFI() const;
1663  void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1664  void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1665 
1666  OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1667  AMDGPUOperand::ImmTy Type);
1668  OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1669  void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1670  void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1671  void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1672  void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1673  void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1674  void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1675  uint64_t BasicInstType,
1676  bool SkipDstVcc = false,
1677  bool SkipSrcVcc = false);
1678 
1679  AMDGPUOperand::Ptr defaultBLGP() const;
1680  AMDGPUOperand::Ptr defaultCBSZ() const;
1681  AMDGPUOperand::Ptr defaultABID() const;
1682 
1683  OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1684  AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1685 };
1686 
1687 struct OptionalOperand {
1688  const char *Name;
1689  AMDGPUOperand::ImmTy Type;
1690  bool IsBit;
1691  bool (*ConvertResult)(int64_t&);
1692 };
1693 
1694 } // end anonymous namespace
1695 
1696 // May be called with integer type with equivalent bitwidth.
1697 static const fltSemantics *getFltSemantics(unsigned Size) {
1698  switch (Size) {
1699  case 4:
1700  return &APFloat::IEEEsingle();
1701  case 8:
1702  return &APFloat::IEEEdouble();
1703  case 2:
1704  return &APFloat::IEEEhalf();
1705  default:
1706  llvm_unreachable("unsupported fp type");
1707  }
1708 }
1709 
1710 static const fltSemantics *getFltSemantics(MVT VT) {
1711  return getFltSemantics(VT.getSizeInBits() / 8);
1712 }
1713 
1714 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1715  switch (OperandType) {
1716  case AMDGPU::OPERAND_REG_IMM_INT32:
1717  case AMDGPU::OPERAND_REG_IMM_FP32:
1718  case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1719  case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1720  case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1721  case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1722  case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1723  case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1724  case AMDGPU::OPERAND_REG_IMM_V2FP32:
1725  case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1726  case AMDGPU::OPERAND_REG_IMM_V2INT32:
1727  case AMDGPU::OPERAND_KIMM32:
1728  return &APFloat::IEEEsingle();
1729  case AMDGPU::OPERAND_REG_IMM_INT64:
1730  case AMDGPU::OPERAND_REG_IMM_FP64:
1731  case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1732  case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1733  case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1734  return &APFloat::IEEEdouble();
1735  case AMDGPU::OPERAND_REG_IMM_INT16:
1736  case AMDGPU::OPERAND_REG_IMM_FP16:
1737  case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1738  case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1739  case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1740  case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1741  case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1742  case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1743  case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1744  case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1745  case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1746  case AMDGPU::OPERAND_REG_IMM_V2INT16:
1747  case AMDGPU::OPERAND_REG_IMM_V2FP16:
1748  case AMDGPU::OPERAND_KIMM16:
1749  return &APFloat::IEEEhalf();
1750  default:
1751  llvm_unreachable("unsupported fp type");
1752  }
1753 }
1754 
1755 //===----------------------------------------------------------------------===//
1756 // Operand
1757 //===----------------------------------------------------------------------===//
1758 
1759 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1760  bool Lost;
1761 
1762  // Convert literal to single precision
1763  APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1764  APFloat::rmNearestTiesToEven,
1765  &Lost);
1766  // We allow precision lost but not overflow or underflow
1767  if (Status != APFloat::opOK &&
1768  Lost &&
1769  ((Status & APFloat::opOverflow) != 0 ||
1770  (Status & APFloat::opUnderflow) != 0)) {
1771  return false;
1772  }
1773 
1774  return true;
1775 }
1776 
1777 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1778  return isUIntN(Size, Val) || isIntN(Size, Val);
1779 }
1780 
1781 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1782  if (VT.getScalarType() == MVT::i16) {
1783  // FP immediate values are broken.
1784  return isInlinableIntLiteral(Val);
1785  }
1786 
1787  // f16/v2f16 operands work correctly for all values.
1788  return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1789 }
1790 
1791 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1792 
1793  // This is a hack to enable named inline values like
1794  // shared_base with both 32-bit and 64-bit operands.
1795  // Note that these values are defined as
1796  // 32-bit operands only.
1797  if (isInlineValue()) {
1798  return true;
1799  }
1800 
1801  if (!isImmTy(ImmTyNone)) {
1802  // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1803  return false;
1804  }
1805  // TODO: We should avoid using host float here. It would be better to
1806  // check the float bit values which is what a few other places do.
1807  // We've had bot failures before due to weird NaN support on mips hosts.
1808 
1809  APInt Literal(64, Imm.Val);
1810 
1811  if (Imm.IsFPImm) { // We got fp literal token
1812  if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1813  return AMDGPU::isInlinableLiteral64(Imm.Val,
1814  AsmParser->hasInv2PiInlineImm());
1815  }
1816 
1817  APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1818  if (!canLosslesslyConvertToFPType(FPLiteral, type))
1819  return false;
1820 
1821  if (type.getScalarSizeInBits() == 16) {
1822  return isInlineableLiteralOp16(
1823  static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1824  type, AsmParser->hasInv2PiInlineImm());
1825  }
1826 
1827  // Check if single precision literal is inlinable
1829  static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1830  AsmParser->hasInv2PiInlineImm());
1831  }
1832 
1833  // We got int literal token.
1834  if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1835  return AMDGPU::isInlinableLiteral64(Imm.Val,
1836  AsmParser->hasInv2PiInlineImm());
1837  }
1838 
1839  if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1840  return false;
1841  }
1842 
1843  if (type.getScalarSizeInBits() == 16) {
1844  return isInlineableLiteralOp16(
1845  static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1846  type, AsmParser->hasInv2PiInlineImm());
1847  }
1848 
1850  static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1851  AsmParser->hasInv2PiInlineImm());
1852 }
1853 
1854 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1855  // Check that this immediate can be added as literal
1856  if (!isImmTy(ImmTyNone)) {
1857  return false;
1858  }
1859 
1860  if (!Imm.IsFPImm) {
1861  // We got int literal token.
1862 
1863  if (type == MVT::f64 && hasFPModifiers()) {
1864  // Cannot apply fp modifiers to int literals preserving the same semantics
1865  // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1866  // disable these cases.
1867  return false;
1868  }
1869 
1870  unsigned Size = type.getSizeInBits();
1871  if (Size == 64)
1872  Size = 32;
1873 
1874  // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1875  // types.
1876  return isSafeTruncation(Imm.Val, Size);
1877  }
1878 
1879  // We got fp literal token
1880  if (type == MVT::f64) { // Expected 64-bit fp operand
1881  // We would set low 64-bits of literal to zeroes but we accept this literals
1882  return true;
1883  }
1884 
1885  if (type == MVT::i64) { // Expected 64-bit int operand
1886  // We don't allow fp literals in 64-bit integer instructions. It is
1887  // unclear how we should encode them.
1888  return false;
1889  }
1890 
1891  // We allow fp literals with f16x2 operands assuming that the specified
1892  // literal goes into the lower half and the upper half is zero. We also
1893  // require that the literal may be losslesly converted to f16.
1894  MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1895  (type == MVT::v2i16)? MVT::i16 :
1896  (type == MVT::v2f32)? MVT::f32 : type;
1897 
1898  APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1899  return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1900 }
1901 
1902 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1903  return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1904 }
1905 
1906 bool AMDGPUOperand::isVRegWithInputMods() const {
1907  return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1908  // GFX90A allows DPP on 64-bit operands.
1909  (isRegClass(AMDGPU::VReg_64RegClassID) &&
1910  AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1911 }
1912 
1913 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1914  if (AsmParser->isVI())
1915  return isVReg32();
1916  else if (AsmParser->isGFX9Plus())
1917  return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1918  else
1919  return false;
1920 }
1921 
1922 bool AMDGPUOperand::isSDWAFP16Operand() const {
1923  return isSDWAOperand(MVT::f16);
1924 }
1925 
1926 bool AMDGPUOperand::isSDWAFP32Operand() const {
1927  return isSDWAOperand(MVT::f32);
1928 }
1929 
1930 bool AMDGPUOperand::isSDWAInt16Operand() const {
1931  return isSDWAOperand(MVT::i16);
1932 }
1933 
1934 bool AMDGPUOperand::isSDWAInt32Operand() const {
1935  return isSDWAOperand(MVT::i32);
1936 }
1937 
1938 bool AMDGPUOperand::isBoolReg() const {
1939  auto FB = AsmParser->getFeatureBits();
1940  return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1941  (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
1942 }
1943 
1944 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1945 {
1946  assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1947  assert(Size == 2 || Size == 4 || Size == 8);
1948 
1949  const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1950 
1951  if (Imm.Mods.Abs) {
1952  Val &= ~FpSignMask;
1953  }
1954  if (Imm.Mods.Neg) {
1955  Val ^= FpSignMask;
1956  }
1957 
1958  return Val;
1959 }
1960 
1961 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1962  if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1963  Inst.getNumOperands())) {
1964  addLiteralImmOperand(Inst, Imm.Val,
1965  ApplyModifiers &
1966  isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1967  } else {
1968  assert(!isImmTy(ImmTyNone) || !hasModifiers());
1969  Inst.addOperand(MCOperand::createImm(Imm.Val));
1970  setImmKindNone();
1971  }
1972 }
1973 
1974 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1975  const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1976  auto OpNum = Inst.getNumOperands();
1977  // Check that this operand accepts literals
1978  assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1979 
1980  if (ApplyModifiers) {
1981  assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1982  const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1983  Val = applyInputFPModifiers(Val, Size);
1984  }
1985 
1986  APInt Literal(64, Val);
1987  uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1988 
1989  if (Imm.IsFPImm) { // We got fp literal token
1990  switch (OpTy) {
1996  if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1997  AsmParser->hasInv2PiInlineImm())) {
1998  Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1999  setImmKindConst();
2000  return;
2001  }
2002 
2003  // Non-inlineable
2004  if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2005  // For fp operands we check if low 32 bits are zeros
2006  if (Literal.getLoBits(32) != 0) {
2007  const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2008  "Can't encode literal as exact 64-bit floating-point operand. "
2009  "Low 32-bits will be set to zero");
2010  }
2011 
2012  Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2013  setImmKindLiteral();
2014  return;
2015  }
2016 
2017  // We don't allow fp literals in 64-bit integer instructions. It is
2018  // unclear how we should encode them. This case should be checked earlier
2019  // in predicate methods (isLiteralImm())
2020  llvm_unreachable("fp literal in 64-bit integer instruction.");
2021 
2047  case AMDGPU::OPERAND_KIMM16: {
2048  bool lost;
2049  APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2050  // Convert literal to single precision
2051  FPLiteral.convert(*getOpFltSemantics(OpTy),
2053  // We allow precision lost but not overflow or underflow. This should be
2054  // checked earlier in isLiteralImm()
2055 
2056  uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2057  Inst.addOperand(MCOperand::createImm(ImmVal));
2058  setImmKindLiteral();
2059  return;
2060  }
2061  default:
2062  llvm_unreachable("invalid operand size");
2063  }
2064 
2065  return;
2066  }
2067 
2068  // We got int literal token.
2069  // Only sign extend inline immediates.
2070  switch (OpTy) {
2084  if (isSafeTruncation(Val, 32) &&
2085  AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2086  AsmParser->hasInv2PiInlineImm())) {
2087  Inst.addOperand(MCOperand::createImm(Val));
2088  setImmKindConst();
2089  return;
2090  }
2091 
2092  Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2093  setImmKindLiteral();
2094  return;
2095 
2101  if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2102  Inst.addOperand(MCOperand::createImm(Val));
2103  setImmKindConst();
2104  return;
2105  }
2106 
2108  setImmKindLiteral();
2109  return;
2110 
2118  if (isSafeTruncation(Val, 16) &&
2119  AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2120  AsmParser->hasInv2PiInlineImm())) {
2121  Inst.addOperand(MCOperand::createImm(Val));
2122  setImmKindConst();
2123  return;
2124  }
2125 
2126  Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2127  setImmKindLiteral();
2128  return;
2129 
2134  assert(isSafeTruncation(Val, 16));
2135  assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2136  AsmParser->hasInv2PiInlineImm()));
2137 
2138  Inst.addOperand(MCOperand::createImm(Val));
2139  return;
2140  }
2142  Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2143  setImmKindNone();
2144  return;
2146  Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2147  setImmKindNone();
2148  return;
2149  default:
2150  llvm_unreachable("invalid operand size");
2151  }
2152 }
2153 
2154 template <unsigned Bitwidth>
2155 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2156  APInt Literal(64, Imm.Val);
2157  setImmKindNone();
2158 
2159  if (!Imm.IsFPImm) {
2160  // We got int literal token.
2161  Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2162  return;
2163  }
2164 
2165  bool Lost;
2166  APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2167  FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2169  Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2170 }
2171 
2172 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2173  Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2174 }
2175 
2176 static bool isInlineValue(unsigned Reg) {
2177  switch (Reg) {
2178  case AMDGPU::SRC_SHARED_BASE:
2179  case AMDGPU::SRC_SHARED_LIMIT:
2180  case AMDGPU::SRC_PRIVATE_BASE:
2181  case AMDGPU::SRC_PRIVATE_LIMIT:
2182  case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2183  return true;
2184  case AMDGPU::SRC_VCCZ:
2185  case AMDGPU::SRC_EXECZ:
2186  case AMDGPU::SRC_SCC:
2187  return true;
2188  case AMDGPU::SGPR_NULL:
2189  return true;
2190  default:
2191  return false;
2192  }
2193 }
2194 
2195 bool AMDGPUOperand::isInlineValue() const {
2196  return isRegKind() && ::isInlineValue(getReg());
2197 }
2198 
2199 //===----------------------------------------------------------------------===//
2200 // AsmParser
2201 //===----------------------------------------------------------------------===//
2202 
2203 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2204  if (Is == IS_VGPR) {
2205  switch (RegWidth) {
2206  default: return -1;
2207  case 1: return AMDGPU::VGPR_32RegClassID;
2208  case 2: return AMDGPU::VReg_64RegClassID;
2209  case 3: return AMDGPU::VReg_96RegClassID;
2210  case 4: return AMDGPU::VReg_128RegClassID;
2211  case 5: return AMDGPU::VReg_160RegClassID;
2212  case 6: return AMDGPU::VReg_192RegClassID;
2213  case 7: return AMDGPU::VReg_224RegClassID;
2214  case 8: return AMDGPU::VReg_256RegClassID;
2215  case 16: return AMDGPU::VReg_512RegClassID;
2216  case 32: return AMDGPU::VReg_1024RegClassID;
2217  }
2218  } else if (Is == IS_TTMP) {
2219  switch (RegWidth) {
2220  default: return -1;
2221  case 1: return AMDGPU::TTMP_32RegClassID;
2222  case 2: return AMDGPU::TTMP_64RegClassID;
2223  case 4: return AMDGPU::TTMP_128RegClassID;
2224  case 8: return AMDGPU::TTMP_256RegClassID;
2225  case 16: return AMDGPU::TTMP_512RegClassID;
2226  }
2227  } else if (Is == IS_SGPR) {
2228  switch (RegWidth) {
2229  default: return -1;
2230  case 1: return AMDGPU::SGPR_32RegClassID;
2231  case 2: return AMDGPU::SGPR_64RegClassID;
2232  case 3: return AMDGPU::SGPR_96RegClassID;
2233  case 4: return AMDGPU::SGPR_128RegClassID;
2234  case 5: return AMDGPU::SGPR_160RegClassID;
2235  case 6: return AMDGPU::SGPR_192RegClassID;
2236  case 7: return AMDGPU::SGPR_224RegClassID;
2237  case 8: return AMDGPU::SGPR_256RegClassID;
2238  case 16: return AMDGPU::SGPR_512RegClassID;
2239  }
2240  } else if (Is == IS_AGPR) {
2241  switch (RegWidth) {
2242  default: return -1;
2243  case 1: return AMDGPU::AGPR_32RegClassID;
2244  case 2: return AMDGPU::AReg_64RegClassID;
2245  case 3: return AMDGPU::AReg_96RegClassID;
2246  case 4: return AMDGPU::AReg_128RegClassID;
2247  case 5: return AMDGPU::AReg_160RegClassID;
2248  case 6: return AMDGPU::AReg_192RegClassID;
2249  case 7: return AMDGPU::AReg_224RegClassID;
2250  case 8: return AMDGPU::AReg_256RegClassID;
2251  case 16: return AMDGPU::AReg_512RegClassID;
2252  case 32: return AMDGPU::AReg_1024RegClassID;
2253  }
2254  }
2255  return -1;
2256 }
2257 
2260  .Case("exec", AMDGPU::EXEC)
2261  .Case("vcc", AMDGPU::VCC)
2262  .Case("flat_scratch", AMDGPU::FLAT_SCR)
2263  .Case("xnack_mask", AMDGPU::XNACK_MASK)
2264  .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2265  .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2266  .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2267  .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2268  .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2269  .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2270  .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2271  .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2272  .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2273  .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2274  .Case("lds_direct", AMDGPU::LDS_DIRECT)
2275  .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2276  .Case("m0", AMDGPU::M0)
2277  .Case("vccz", AMDGPU::SRC_VCCZ)
2278  .Case("src_vccz", AMDGPU::SRC_VCCZ)
2279  .Case("execz", AMDGPU::SRC_EXECZ)
2280  .Case("src_execz", AMDGPU::SRC_EXECZ)
2281  .Case("scc", AMDGPU::SRC_SCC)
2282  .Case("src_scc", AMDGPU::SRC_SCC)
2283  .Case("tba", AMDGPU::TBA)
2284  .Case("tma", AMDGPU::TMA)
2285  .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2286  .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2287  .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2288  .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2289  .Case("vcc_lo", AMDGPU::VCC_LO)
2290  .Case("vcc_hi", AMDGPU::VCC_HI)
2291  .Case("exec_lo", AMDGPU::EXEC_LO)
2292  .Case("exec_hi", AMDGPU::EXEC_HI)
2293  .Case("tma_lo", AMDGPU::TMA_LO)
2294  .Case("tma_hi", AMDGPU::TMA_HI)
2295  .Case("tba_lo", AMDGPU::TBA_LO)
2296  .Case("tba_hi", AMDGPU::TBA_HI)
2297  .Case("pc", AMDGPU::PC_REG)
2298  .Case("null", AMDGPU::SGPR_NULL)
2299  .Default(AMDGPU::NoRegister);
2300 }
2301 
2302 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2303  SMLoc &EndLoc, bool RestoreOnFailure) {
2304  auto R = parseRegister();
2305  if (!R) return true;
2306  assert(R->isReg());
2307  RegNo = R->getReg();
2308  StartLoc = R->getStartLoc();
2309  EndLoc = R->getEndLoc();
2310  return false;
2311 }
2312 
2313 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2314  SMLoc &EndLoc) {
2315  return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2316 }
2317 
2318 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2319  SMLoc &StartLoc,
2320  SMLoc &EndLoc) {
2321  bool Result =
2322  ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2323  bool PendingErrors = getParser().hasPendingError();
2324  getParser().clearPendingErrors();
2325  if (PendingErrors)
2326  return MatchOperand_ParseFail;
2327  if (Result)
2328  return MatchOperand_NoMatch;
2329  return MatchOperand_Success;
2330 }
2331 
2332 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2333  RegisterKind RegKind, unsigned Reg1,
2334  SMLoc Loc) {
2335  switch (RegKind) {
2336  case IS_SPECIAL:
2337  if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2338  Reg = AMDGPU::EXEC;
2339  RegWidth = 2;
2340  return true;
2341  }
2342  if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2343  Reg = AMDGPU::FLAT_SCR;
2344  RegWidth = 2;
2345  return true;
2346  }
2347  if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2348  Reg = AMDGPU::XNACK_MASK;
2349  RegWidth = 2;
2350  return true;
2351  }
2352  if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2353  Reg = AMDGPU::VCC;
2354  RegWidth = 2;
2355  return true;
2356  }
2357  if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2358  Reg = AMDGPU::TBA;
2359  RegWidth = 2;
2360  return true;
2361  }
2362  if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2363  Reg = AMDGPU::TMA;
2364  RegWidth = 2;
2365  return true;
2366  }
2367  Error(Loc, "register does not fit in the list");
2368  return false;
2369  case IS_VGPR:
2370  case IS_SGPR:
2371  case IS_AGPR:
2372  case IS_TTMP:
2373  if (Reg1 != Reg + RegWidth) {
2374  Error(Loc, "registers in a list must have consecutive indices");
2375  return false;
2376  }
2377  RegWidth++;
2378  return true;
2379  default:
2380  llvm_unreachable("unexpected register kind");
2381  }
2382 }
2383 
2384 struct RegInfo {
2386  RegisterKind Kind;
2387 };
2388 
2389 static constexpr RegInfo RegularRegisters[] = {
2390  {{"v"}, IS_VGPR},
2391  {{"s"}, IS_SGPR},
2392  {{"ttmp"}, IS_TTMP},
2393  {{"acc"}, IS_AGPR},
2394  {{"a"}, IS_AGPR},
2395 };
2396 
2397 static bool isRegularReg(RegisterKind Kind) {
2398  return Kind == IS_VGPR ||
2399  Kind == IS_SGPR ||
2400  Kind == IS_TTMP ||
2401  Kind == IS_AGPR;
2402 }
2403 
2404 static const RegInfo* getRegularRegInfo(StringRef Str) {
2405  for (const RegInfo &Reg : RegularRegisters)
2406  if (Str.startswith(Reg.Name))
2407  return &Reg;
2408  return nullptr;
2409 }
2410 
2411 static bool getRegNum(StringRef Str, unsigned& Num) {
2412  return !Str.getAsInteger(10, Num);
2413 }
2414 
2415 bool
2416 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2417  const AsmToken &NextToken) const {
2418 
2419  // A list of consecutive registers: [s0,s1,s2,s3]
2420  if (Token.is(AsmToken::LBrac))
2421  return true;
2422 
2423  if (!Token.is(AsmToken::Identifier))
2424  return false;
2425 
2426  // A single register like s0 or a range of registers like s[0:1]
2427 
2428  StringRef Str = Token.getString();
2429  const RegInfo *Reg = getRegularRegInfo(Str);
2430  if (Reg) {
2431  StringRef RegName = Reg->Name;
2432  StringRef RegSuffix = Str.substr(RegName.size());
2433  if (!RegSuffix.empty()) {
2434  unsigned Num;
2435  // A single register with an index: rXX
2436  if (getRegNum(RegSuffix, Num))
2437  return true;
2438  } else {
2439  // A range of registers: r[XX:YY].
2440  if (NextToken.is(AsmToken::LBrac))
2441  return true;
2442  }
2443  }
2444 
2445  return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2446 }
2447 
2448 bool
2449 AMDGPUAsmParser::isRegister()
2450 {
2451  return isRegister(getToken(), peekToken());
2452 }
2453 
2454 unsigned
2455 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2456  unsigned RegNum,
2457  unsigned RegWidth,
2458  SMLoc Loc) {
2459 
2460  assert(isRegularReg(RegKind));
2461 
2462  unsigned AlignSize = 1;
2463  if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2464  // SGPR and TTMP registers must be aligned.
2465  // Max required alignment is 4 dwords.
2466  AlignSize = std::min(RegWidth, 4u);
2467  }
2468 
2469  if (RegNum % AlignSize != 0) {
2470  Error(Loc, "invalid register alignment");
2471  return AMDGPU::NoRegister;
2472  }
2473 
2474  unsigned RegIdx = RegNum / AlignSize;
2475  int RCID = getRegClass(RegKind, RegWidth);
2476  if (RCID == -1) {
2477  Error(Loc, "invalid or unsupported register size");
2478  return AMDGPU::NoRegister;
2479  }
2480 
2481  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2482  const MCRegisterClass RC = TRI->getRegClass(RCID);
2483  if (RegIdx >= RC.getNumRegs()) {
2484  Error(Loc, "register index is out of range");
2485  return AMDGPU::NoRegister;
2486  }
2487 
2488  return RC.getRegister(RegIdx);
2489 }
2490 
2491 bool
2492 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2493  int64_t RegLo, RegHi;
2494  if (!skipToken(AsmToken::LBrac, "missing register index"))
2495  return false;
2496 
2497  SMLoc FirstIdxLoc = getLoc();
2498  SMLoc SecondIdxLoc;
2499 
2500  if (!parseExpr(RegLo))
2501  return false;
2502 
2503  if (trySkipToken(AsmToken::Colon)) {
2504  SecondIdxLoc = getLoc();
2505  if (!parseExpr(RegHi))
2506  return false;
2507  } else {
2508  RegHi = RegLo;
2509  }
2510 
2511  if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2512  return false;
2513 
2514  if (!isUInt<32>(RegLo)) {
2515  Error(FirstIdxLoc, "invalid register index");
2516  return false;
2517  }
2518 
2519  if (!isUInt<32>(RegHi)) {
2520  Error(SecondIdxLoc, "invalid register index");
2521  return false;
2522  }
2523 
2524  if (RegLo > RegHi) {
2525  Error(FirstIdxLoc, "first register index should not exceed second index");
2526  return false;
2527  }
2528 
2529  Num = static_cast<unsigned>(RegLo);
2530  Width = (RegHi - RegLo) + 1;
2531  return true;
2532 }
2533 
2534 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2535  unsigned &RegNum, unsigned &RegWidth,
2536  SmallVectorImpl<AsmToken> &Tokens) {
2537  assert(isToken(AsmToken::Identifier));
2538  unsigned Reg = getSpecialRegForName(getTokenStr());
2539  if (Reg) {
2540  RegNum = 0;
2541  RegWidth = 1;
2542  RegKind = IS_SPECIAL;
2543  Tokens.push_back(getToken());
2544  lex(); // skip register name
2545  }
2546  return Reg;
2547 }
2548 
2549 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2550  unsigned &RegNum, unsigned &RegWidth,
2551  SmallVectorImpl<AsmToken> &Tokens) {
2552  assert(isToken(AsmToken::Identifier));
2553  StringRef RegName = getTokenStr();
2554  auto Loc = getLoc();
2555 
2556  const RegInfo *RI = getRegularRegInfo(RegName);
2557  if (!RI) {
2558  Error(Loc, "invalid register name");
2559  return AMDGPU::NoRegister;
2560  }
2561 
2562  Tokens.push_back(getToken());
2563  lex(); // skip register name
2564 
2565  RegKind = RI->Kind;
2566  StringRef RegSuffix = RegName.substr(RI->Name.size());
2567  if (!RegSuffix.empty()) {
2568  // Single 32-bit register: vXX.
2569  if (!getRegNum(RegSuffix, RegNum)) {
2570  Error(Loc, "invalid register index");
2571  return AMDGPU::NoRegister;
2572  }
2573  RegWidth = 1;
2574  } else {
2575  // Range of registers: v[XX:YY]. ":YY" is optional.
2576  if (!ParseRegRange(RegNum, RegWidth))
2577  return AMDGPU::NoRegister;
2578  }
2579 
2580  return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2581 }
2582 
2583 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2584  unsigned &RegWidth,
2585  SmallVectorImpl<AsmToken> &Tokens) {
2586  unsigned Reg = AMDGPU::NoRegister;
2587  auto ListLoc = getLoc();
2588 
2589  if (!skipToken(AsmToken::LBrac,
2590  "expected a register or a list of registers")) {
2591  return AMDGPU::NoRegister;
2592  }
2593 
2594  // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2595 
2596  auto Loc = getLoc();
2597  if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2598  return AMDGPU::NoRegister;
2599  if (RegWidth != 1) {
2600  Error(Loc, "expected a single 32-bit register");
2601  return AMDGPU::NoRegister;
2602  }
2603 
2604  for (; trySkipToken(AsmToken::Comma); ) {
2605  RegisterKind NextRegKind;
2606  unsigned NextReg, NextRegNum, NextRegWidth;
2607  Loc = getLoc();
2608 
2609  if (!ParseAMDGPURegister(NextRegKind, NextReg,
2610  NextRegNum, NextRegWidth,
2611  Tokens)) {
2612  return AMDGPU::NoRegister;
2613  }
2614  if (NextRegWidth != 1) {
2615  Error(Loc, "expected a single 32-bit register");
2616  return AMDGPU::NoRegister;
2617  }
2618  if (NextRegKind != RegKind) {
2619  Error(Loc, "registers in a list must be of the same kind");
2620  return AMDGPU::NoRegister;
2621  }
2622  if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2623  return AMDGPU::NoRegister;
2624  }
2625 
2626  if (!skipToken(AsmToken::RBrac,
2627  "expected a comma or a closing square bracket")) {
2628  return AMDGPU::NoRegister;
2629  }
2630 
2631  if (isRegularReg(RegKind))
2632  Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2633 
2634  return Reg;
2635 }
2636 
2637 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2638  unsigned &RegNum, unsigned &RegWidth,
2639  SmallVectorImpl<AsmToken> &Tokens) {
2640  auto Loc = getLoc();
2641  Reg = AMDGPU::NoRegister;
2642 
2643  if (isToken(AsmToken::Identifier)) {
2644  Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2645  if (Reg == AMDGPU::NoRegister)
2646  Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2647  } else {
2648  Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2649  }
2650 
2651  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2652  if (Reg == AMDGPU::NoRegister) {
2653  assert(Parser.hasPendingError());
2654  return false;
2655  }
2656 
2657  if (!subtargetHasRegister(*TRI, Reg)) {
2658  if (Reg == AMDGPU::SGPR_NULL) {
2659  Error(Loc, "'null' operand is not supported on this GPU");
2660  } else {
2661  Error(Loc, "register not available on this GPU");
2662  }
2663  return false;
2664  }
2665 
2666  return true;
2667 }
2668 
2669 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2670  unsigned &RegNum, unsigned &RegWidth,
2671  bool RestoreOnFailure /*=false*/) {
2672  Reg = AMDGPU::NoRegister;
2673 
2674  SmallVector<AsmToken, 1> Tokens;
2675  if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2676  if (RestoreOnFailure) {
2677  while (!Tokens.empty()) {
2678  getLexer().UnLex(Tokens.pop_back_val());
2679  }
2680  }
2681  return true;
2682  }
2683  return false;
2684 }
2685 
2687 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2688  switch (RegKind) {
2689  case IS_VGPR:
2690  return StringRef(".amdgcn.next_free_vgpr");
2691  case IS_SGPR:
2692  return StringRef(".amdgcn.next_free_sgpr");
2693  default:
2694  return None;
2695  }
2696 }
2697 
2698 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2699  auto SymbolName = getGprCountSymbolName(RegKind);
2700  assert(SymbolName && "initializing invalid register kind");
2701  MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2702  Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2703 }
2704 
2705 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2706  unsigned DwordRegIndex,
2707  unsigned RegWidth) {
2708  // Symbols are only defined for GCN targets
2709  if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2710  return true;
2711 
2712  auto SymbolName = getGprCountSymbolName(RegKind);
2713  if (!SymbolName)
2714  return true;
2715  MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2716 
2717  int64_t NewMax = DwordRegIndex + RegWidth - 1;
2718  int64_t OldCount;
2719 
2720  if (!Sym->isVariable())
2721  return !Error(getLoc(),
2722  ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2723  if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2724  return !Error(
2725  getLoc(),
2726  ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2727 
2728  if (OldCount <= NewMax)
2729  Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2730 
2731  return true;
2732 }
2733 
2734 std::unique_ptr<AMDGPUOperand>
2735 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2736  const auto &Tok = getToken();
2737  SMLoc StartLoc = Tok.getLoc();
2738  SMLoc EndLoc = Tok.getEndLoc();
2739  RegisterKind RegKind;
2740  unsigned Reg, RegNum, RegWidth;
2741 
2742  if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2743  return nullptr;
2744  }
2745  if (isHsaAbiVersion3Or4(&getSTI())) {
2746  if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2747  return nullptr;
2748  } else
2749  KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2750  return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2751 }
2752 
2754 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2755  // TODO: add syntactic sugar for 1/(2*PI)
2756 
2757  assert(!isRegister());
2758  assert(!isModifier());
2759 
2760  const auto& Tok = getToken();
2761  const auto& NextTok = peekToken();
2762  bool IsReal = Tok.is(AsmToken::Real);
2763  SMLoc S = getLoc();
2764  bool Negate = false;
2765 
2766  if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2767  lex();
2768  IsReal = true;
2769  Negate = true;
2770  }
2771 
2772  if (IsReal) {
2773  // Floating-point expressions are not supported.
2774  // Can only allow floating-point literals with an
2775  // optional sign.
2776 
2777  StringRef Num = getTokenStr();
2778  lex();
2779 
2780  APFloat RealVal(APFloat::IEEEdouble());
2781  auto roundMode = APFloat::rmNearestTiesToEven;
2782  if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2783  return MatchOperand_ParseFail;
2784  }
2785  if (Negate)
2786  RealVal.changeSign();
2787 
2788  Operands.push_back(
2789  AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2790  AMDGPUOperand::ImmTyNone, true));
2791 
2792  return MatchOperand_Success;
2793 
2794  } else {
2795  int64_t IntVal;
2796  const MCExpr *Expr;
2797  SMLoc S = getLoc();
2798 
2799  if (HasSP3AbsModifier) {
2800  // This is a workaround for handling expressions
2801  // as arguments of SP3 'abs' modifier, for example:
2802  // |1.0|
2803  // |-1|
2804  // |1+x|
2805  // This syntax is not compatible with syntax of standard
2806  // MC expressions (due to the trailing '|').
2807  SMLoc EndLoc;
2808  if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2809  return MatchOperand_ParseFail;
2810  } else {
2811  if (Parser.parseExpression(Expr))
2812  return MatchOperand_ParseFail;
2813  }
2814 
2815  if (Expr->evaluateAsAbsolute(IntVal)) {
2816  Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2817  } else {
2818  Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2819  }
2820 
2821  return MatchOperand_Success;
2822  }
2823 
2824  return MatchOperand_NoMatch;
2825 }
2826 
2828 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2829  if (!isRegister())
2830  return MatchOperand_NoMatch;
2831 
2832  if (auto R = parseRegister()) {
2833  assert(R->isReg());
2834  Operands.push_back(std::move(R));
2835  return MatchOperand_Success;
2836  }
2837  return MatchOperand_ParseFail;
2838 }
2839 
2841 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2842  auto res = parseReg(Operands);
2843  if (res != MatchOperand_NoMatch) {
2844  return res;
2845  } else if (isModifier()) {
2846  return MatchOperand_NoMatch;
2847  } else {
2848  return parseImm(Operands, HasSP3AbsMod);
2849  }
2850 }
2851 
2852 bool
2853 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2854  if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2855  const auto &str = Token.getString();
2856  return str == "abs" || str == "neg" || str == "sext";
2857  }
2858  return false;
2859 }
2860 
2861 bool
2862 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2863  return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2864 }
2865 
2866 bool
2867 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2868  return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2869 }
2870 
2871 bool
2872 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2873  return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2874 }
2875 
2876 // Check if this is an operand modifier or an opcode modifier
2877 // which may look like an expression but it is not. We should
2878 // avoid parsing these modifiers as expressions. Currently
2879 // recognized sequences are:
2880 // |...|
2881 // abs(...)
2882 // neg(...)
2883 // sext(...)
2884 // -reg
2885 // -|...|
2886 // -abs(...)
2887 // name:...
2888 // Note that simple opcode modifiers like 'gds' may be parsed as
2889 // expressions; this is a special case. See getExpressionAsToken.
2890 //
2891 bool
2892 AMDGPUAsmParser::isModifier() {
2893 
2894  AsmToken Tok = getToken();
2895  AsmToken NextToken[2];
2896  peekTokens(NextToken);
2897 
2898  return isOperandModifier(Tok, NextToken[0]) ||
2899  (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2900  isOpcodeModifierWithVal(Tok, NextToken[0]);
2901 }
2902 
2903 // Check if the current token is an SP3 'neg' modifier.
2904 // Currently this modifier is allowed in the following context:
2905 //
2906 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2907 // 2. Before an 'abs' modifier: -abs(...)
2908 // 3. Before an SP3 'abs' modifier: -|...|
2909 //
2910 // In all other cases "-" is handled as a part
2911 // of an expression that follows the sign.
2912 //
2913 // Note: When "-" is followed by an integer literal,
2914 // this is interpreted as integer negation rather
2915 // than a floating-point NEG modifier applied to N.
2916 // Beside being contr-intuitive, such use of floating-point
2917 // NEG modifier would have resulted in different meaning
2918 // of integer literals used with VOP1/2/C and VOP3,
2919 // for example:
2920 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2921 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2922 // Negative fp literals with preceding "-" are
2923 // handled likewise for unifomtity
2924 //
2925 bool
2926 AMDGPUAsmParser::parseSP3NegModifier() {
2927 
2928  AsmToken NextToken[2];
2929  peekTokens(NextToken);
2930 
2931  if (isToken(AsmToken::Minus) &&
2932  (isRegister(NextToken[0], NextToken[1]) ||
2933  NextToken[0].is(AsmToken::Pipe) ||
2934  isId(NextToken[0], "abs"))) {
2935  lex();
2936  return true;
2937  }
2938 
2939  return false;
2940 }
2941 
2943 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2944  bool AllowImm) {
2945  bool Neg, SP3Neg;
2946  bool Abs, SP3Abs;
2947  SMLoc Loc;
2948 
2949  // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2950  if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2951  Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2952  return MatchOperand_ParseFail;
2953  }
2954 
2955  SP3Neg = parseSP3NegModifier();
2956 
2957  Loc = getLoc();
2958  Neg = trySkipId("neg");
2959  if (Neg && SP3Neg) {
2960  Error(Loc, "expected register or immediate");
2961  return MatchOperand_ParseFail;
2962  }
2963  if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2964  return MatchOperand_ParseFail;
2965 
2966  Abs = trySkipId("abs");
2967  if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2968  return MatchOperand_ParseFail;
2969 
2970  Loc = getLoc();
2971  SP3Abs = trySkipToken(AsmToken::Pipe);
2972  if (Abs && SP3Abs) {
2973  Error(Loc, "expected register or immediate");
2974  return MatchOperand_ParseFail;
2975  }
2976 
2978  if (AllowImm) {
2979  Res = parseRegOrImm(Operands, SP3Abs);
2980  } else {
2981  Res = parseReg(Operands);
2982  }
2983  if (Res != MatchOperand_Success) {
2984  return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2985  }
2986 
2987  if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2988  return MatchOperand_ParseFail;
2989  if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2990  return MatchOperand_ParseFail;
2991  if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2992  return MatchOperand_ParseFail;
2993 
2994  AMDGPUOperand::Modifiers Mods;
2995  Mods.Abs = Abs || SP3Abs;
2996  Mods.Neg = Neg || SP3Neg;
2997 
2998  if (Mods.hasFPModifiers()) {
2999  AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3000  if (Op.isExpr()) {
3001  Error(Op.getStartLoc(), "expected an absolute expression");
3002  return MatchOperand_ParseFail;
3003  }
3004  Op.setModifiers(Mods);
3005  }
3006  return MatchOperand_Success;
3007 }
3008 
3010 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3011  bool AllowImm) {
3012  bool Sext = trySkipId("sext");
3013  if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3014  return MatchOperand_ParseFail;
3015 
3017  if (AllowImm) {
3018  Res = parseRegOrImm(Operands);
3019  } else {
3020  Res = parseReg(Operands);
3021  }
3022  if (Res != MatchOperand_Success) {
3023  return Sext? MatchOperand_ParseFail : Res;
3024  }
3025 
3026  if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3027  return MatchOperand_ParseFail;
3028 
3029  AMDGPUOperand::Modifiers Mods;
3030  Mods.Sext = Sext;
3031 
3032  if (Mods.hasIntModifiers()) {
3033  AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3034  if (Op.isExpr()) {
3035  Error(Op.getStartLoc(), "expected an absolute expression");
3036  return MatchOperand_ParseFail;
3037  }
3038  Op.setModifiers(Mods);
3039  }
3040 
3041  return MatchOperand_Success;
3042 }
3043 
3045 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3046  return parseRegOrImmWithFPInputMods(Operands, false);
3047 }
3048 
3050 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3051  return parseRegOrImmWithIntInputMods(Operands, false);
3052 }
3053 
3054 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3055  auto Loc = getLoc();
3056  if (trySkipId("off")) {
3057  Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3058  AMDGPUOperand::ImmTyOff, false));
3059  return MatchOperand_Success;
3060  }
3061 
3062  if (!isRegister())
3063  return MatchOperand_NoMatch;
3064 
3065  std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3066  if (Reg) {
3067  Operands.push_back(std::move(Reg));
3068  return MatchOperand_Success;
3069  }
3070 
3071  return MatchOperand_ParseFail;
3072 
3073 }
3074 
3075 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3076  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3077 
3078  if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3079  (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3080  (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3081  (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3082  return Match_InvalidOperand;
3083 
3084  if ((TSFlags & SIInstrFlags::VOP3) &&
3085  (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3086  getForcedEncodingSize() != 64)
3087  return Match_PreferE32;
3088 
3089  if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3090  Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3091  // v_mac_f32/16 allow only dst_sel == DWORD;
3092  auto OpNum =
3093  AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3094  const auto &Op = Inst.getOperand(OpNum);
3095  if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3096  return Match_InvalidOperand;
3097  }
3098  }
3099 
3100  return Match_Success;
3101 }
3102 
3104  static const unsigned Variants[] = {
3107  };
3108 
3109  return makeArrayRef(Variants);
3110 }
3111 
3112 // What asm variants we should check
3113 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3114  if (getForcedEncodingSize() == 32) {
3115  static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3116  return makeArrayRef(Variants);
3117  }
3118 
3119  if (isForcedVOP3()) {
3120  static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3121  return makeArrayRef(Variants);
3122  }
3123 
3124  if (isForcedSDWA()) {
3125  static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3127  return makeArrayRef(Variants);
3128  }
3129 
3130  if (isForcedDPP()) {
3131  static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3132  return makeArrayRef(Variants);
3133  }
3134 
3135  return getAllVariants();
3136 }
3137 
3138 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3139  if (getForcedEncodingSize() == 32)
3140  return "e32";
3141 
3142  if (isForcedVOP3())
3143  return "e64";
3144 
3145  if (isForcedSDWA())
3146  return "sdwa";
3147 
3148  if (isForcedDPP())
3149  return "dpp";
3150 
3151  return "";
3152 }
3153 
3154 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3155  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3156  const unsigned Num = Desc.getNumImplicitUses();
3157  for (unsigned i = 0; i < Num; ++i) {
3158  unsigned Reg = Desc.ImplicitUses[i];
3159  switch (Reg) {
3160  case AMDGPU::FLAT_SCR:
3161  case AMDGPU::VCC:
3162  case AMDGPU::VCC_LO:
3163  case AMDGPU::VCC_HI:
3164  case AMDGPU::M0:
3165  return Reg;
3166  default:
3167  break;
3168  }
3169  }
3170  return AMDGPU::NoRegister;
3171 }
3172 
3173 // NB: This code is correct only when used to check constant
3174 // bus limitations because GFX7 support no f16 inline constants.
3175 // Note that there are no cases when a GFX7 opcode violates
3176 // constant bus limitations due to the use of an f16 constant.
3177 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3178  unsigned OpIdx) const {
3179  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3180 
3181  if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3182  return false;
3183  }
3184 
3185  const MCOperand &MO = Inst.getOperand(OpIdx);
3186 
3187  int64_t Val = MO.getImm();
3188  auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3189 
3190  switch (OpSize) { // expected operand size
3191  case 8:
3192  return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3193  case 4:
3194  return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3195  case 2: {
3196  const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3200  return AMDGPU::isInlinableIntLiteral(Val);
3201 
3206 
3210  return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3211 
3212  return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3213  }
3214  default:
3215  llvm_unreachable("invalid operand size");
3216  }
3217 }
3218 
3219 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3220  if (!isGFX10Plus())
3221  return 1;
3222 
3223  switch (Opcode) {
3224  // 64-bit shift instructions can use only one scalar value input
3225  case AMDGPU::V_LSHLREV_B64_e64:
3226  case AMDGPU::V_LSHLREV_B64_gfx10:
3227  case AMDGPU::V_LSHRREV_B64_e64:
3228  case AMDGPU::V_LSHRREV_B64_gfx10:
3229  case AMDGPU::V_ASHRREV_I64_e64:
3230  case AMDGPU::V_ASHRREV_I64_gfx10:
3231  case AMDGPU::V_LSHL_B64_e64:
3232  case AMDGPU::V_LSHR_B64_e64:
3233  case AMDGPU::V_ASHR_I64_e64:
3234  return 1;
3235  default:
3236  return 2;
3237  }
3238 }
3239 
3240 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3241  const MCOperand &MO = Inst.getOperand(OpIdx);
3242  if (MO.isImm()) {
3243  return !isInlineConstant(Inst, OpIdx);
3244  } else if (MO.isReg()) {
3245  auto Reg = MO.getReg();
3246  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3247  auto PReg = mc2PseudoReg(Reg);
3248  return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3249  } else {
3250  return true;
3251  }
3252 }
3253 
3254 bool
3255 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3256  const OperandVector &Operands) {
3257  const unsigned Opcode = Inst.getOpcode();
3258  const MCInstrDesc &Desc = MII.get(Opcode);
3259  unsigned LastSGPR = AMDGPU::NoRegister;
3260  unsigned ConstantBusUseCount = 0;
3261  unsigned NumLiterals = 0;
3262  unsigned LiteralSize;
3263 
3264  if (Desc.TSFlags &
3268  SIInstrFlags::SDWA)) {
3269  // Check special imm operands (used by madmk, etc)
3270  if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3271  ++NumLiterals;
3272  LiteralSize = 4;
3273  }
3274 
3275  SmallDenseSet<unsigned> SGPRsUsed;
3276  unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3277  if (SGPRUsed != AMDGPU::NoRegister) {
3278  SGPRsUsed.insert(SGPRUsed);
3279  ++ConstantBusUseCount;
3280  }
3281 
3282  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3283  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3284  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3285 
3286  const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3287 
3288  for (int OpIdx : OpIndices) {
3289  if (OpIdx == -1) break;
3290 
3291  const MCOperand &MO = Inst.getOperand(OpIdx);
3292  if (usesConstantBus(Inst, OpIdx)) {
3293  if (MO.isReg()) {
3294  LastSGPR = mc2PseudoReg(MO.getReg());
3295  // Pairs of registers with a partial intersections like these
3296  // s0, s[0:1]
3297  // flat_scratch_lo, flat_scratch
3298  // flat_scratch_lo, flat_scratch_hi
3299  // are theoretically valid but they are disabled anyway.
3300  // Note that this code mimics SIInstrInfo::verifyInstruction
3301  if (!SGPRsUsed.count(LastSGPR)) {
3302  SGPRsUsed.insert(LastSGPR);
3303  ++ConstantBusUseCount;
3304  }
3305  } else { // Expression or a literal
3306 
3307  if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3308  continue; // special operand like VINTERP attr_chan
3309 
3310  // An instruction may use only one literal.
3311  // This has been validated on the previous step.
3312  // See validateVOPLiteral.
3313  // This literal may be used as more than one operand.
3314  // If all these operands are of the same size,
3315  // this literal counts as one scalar value.
3316  // Otherwise it counts as 2 scalar values.
3317  // See "GFX10 Shader Programming", section 3.6.2.3.
3318 
3319  unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3320  if (Size < 4) Size = 4;
3321 
3322  if (NumLiterals == 0) {
3323  NumLiterals = 1;
3324  LiteralSize = Size;
3325  } else if (LiteralSize != Size) {
3326  NumLiterals = 2;
3327  }
3328  }
3329  }
3330  }
3331  }
3332  ConstantBusUseCount += NumLiterals;
3333 
3334  if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3335  return true;
3336 
3337  SMLoc LitLoc = getLitLoc(Operands);
3338  SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3339  SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3340  Error(Loc, "invalid operand (violates constant bus restrictions)");
3341  return false;
3342 }
3343 
3344 bool
3345 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3346  const OperandVector &Operands) {
3347  const unsigned Opcode = Inst.getOpcode();
3348  const MCInstrDesc &Desc = MII.get(Opcode);
3349 
3350  const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3351  if (DstIdx == -1 ||
3352  Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3353  return true;
3354  }
3355 
3356  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3357 
3358  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3359  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3360  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3361 
3362  assert(DstIdx != -1);
3363  const MCOperand &Dst = Inst.getOperand(DstIdx);
3364  assert(Dst.isReg());
3365  const unsigned DstReg = mc2PseudoReg(Dst.getReg());
3366 
3367  const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3368 
3369  for (int SrcIdx : SrcIndices) {
3370  if (SrcIdx == -1) break;
3371  const MCOperand &Src = Inst.getOperand(SrcIdx);
3372  if (Src.isReg()) {
3373  const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3374  if (isRegIntersect(DstReg, SrcReg, TRI)) {
3375  Error(getRegLoc(SrcReg, Operands),
3376  "destination must be different than all sources");
3377  return false;
3378  }
3379  }
3380  }
3381 
3382  return true;
3383 }
3384 
3385 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3386 
3387  const unsigned Opc = Inst.getOpcode();
3388  const MCInstrDesc &Desc = MII.get(Opc);
3389 
3390  if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3391  int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3392  assert(ClampIdx != -1);
3393  return Inst.getOperand(ClampIdx).getImm() == 0;
3394  }
3395 
3396  return true;
3397 }
3398 
3399 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3400 
3401  const unsigned Opc = Inst.getOpcode();
3402  const MCInstrDesc &Desc = MII.get(Opc);
3403 
3404  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3405  return true;
3406 
3407  int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3408  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3409  int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3410 
3411  assert(VDataIdx != -1);
3412 
3413  if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3414  return true;
3415 
3416  unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3417  unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3418  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3419  if (DMask == 0)
3420  DMask = 1;
3421 
3422  unsigned DataSize =
3423  (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3424  if (hasPackedD16()) {
3425  int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3426  if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3427  DataSize = (DataSize + 1) / 2;
3428  }
3429 
3430  return (VDataSize / 4) == DataSize + TFESize;
3431 }
3432 
3433 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3434  const unsigned Opc = Inst.getOpcode();
3435  const MCInstrDesc &Desc = MII.get(Opc);
3436 
3437  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3438  return true;
3439 
3441 
3442  const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3443  AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3444  int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3445  int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3446  int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3447  int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3448 
3449  assert(VAddr0Idx != -1);
3450  assert(SrsrcIdx != -1);
3451  assert(SrsrcIdx > VAddr0Idx);
3452 
3453  if (DimIdx == -1)
3454  return true; // intersect_ray
3455 
3456  unsigned Dim = Inst.getOperand(DimIdx).getImm();
3458  bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3459  unsigned ActualAddrSize =
3460  IsNSA ? SrsrcIdx - VAddr0Idx
3461  : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3462  bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3463 
3464  unsigned ExpectedAddrSize =
3465  AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3466 
3467  if (!IsNSA) {
3468  if (ExpectedAddrSize > 8)
3469  ExpectedAddrSize = 16;
3470 
3471  // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3472  // This provides backward compatibility for assembly created
3473  // before 160b/192b/224b types were directly supported.
3474  if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3475  return true;
3476  }
3477 
3478  return ActualAddrSize == ExpectedAddrSize;
3479 }
3480 
3481 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3482 
3483  const unsigned Opc = Inst.getOpcode();
3484  const MCInstrDesc &Desc = MII.get(Opc);
3485 
3486  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3487  return true;
3488  if (!Desc.mayLoad() || !Desc.mayStore())
3489  return true; // Not atomic
3490 
3491  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3492  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3493 
3494  // This is an incomplete check because image_atomic_cmpswap
3495  // may only use 0x3 and 0xf while other atomic operations
3496  // may use 0x1 and 0x3. However these limitations are
3497  // verified when we check that dmask matches dst size.
3498  return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3499 }
3500 
3501 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3502 
3503  const unsigned Opc = Inst.getOpcode();
3504  const MCInstrDesc &Desc = MII.get(Opc);
3505 
3506  if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3507  return true;
3508 
3509  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3510  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3511 
3512  // GATHER4 instructions use dmask in a different fashion compared to
3513  // other MIMG instructions. The only useful DMASK values are
3514  // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3515  // (red,red,red,red) etc.) The ISA document doesn't mention
3516  // this.
3517  return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3518 }
3519 
3520 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3521  const unsigned Opc = Inst.getOpcode();
3522  const MCInstrDesc &Desc = MII.get(Opc);
3523 
3524  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3525  return true;
3526 
3528  const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3529  AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3530 
3531  if (!BaseOpcode->MSAA)
3532  return true;
3533 
3534  int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3535  assert(DimIdx != -1);
3536 
3537  unsigned Dim = Inst.getOperand(DimIdx).getImm();
3539 
3540  return DimInfo->MSAA;
3541 }
3542 
3543 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3544 {
3545  switch (Opcode) {
3546  case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3547  case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3548  case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3549  return true;
3550  default:
3551  return false;
3552  }
3553 }
3554 
3555 // movrels* opcodes should only allow VGPRS as src0.
3556 // This is specified in .td description for vop1/vop3,
3557 // but sdwa is handled differently. See isSDWAOperand.
3558 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3559  const OperandVector &Operands) {
3560 
3561  const unsigned Opc = Inst.getOpcode();
3562  const MCInstrDesc &Desc = MII.get(Opc);
3563 
3564  if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3565  return true;
3566 
3567  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3568  assert(Src0Idx != -1);
3569 
3570  SMLoc ErrLoc;
3571  const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3572  if (Src0.isReg()) {
3573  auto Reg = mc2PseudoReg(Src0.getReg());
3574  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3575  if (!isSGPR(Reg, TRI))
3576  return true;
3577  ErrLoc = getRegLoc(Reg, Operands);
3578  } else {
3579  ErrLoc = getConstLoc(Operands);
3580  }
3581 
3582  Error(ErrLoc, "source operand must be a VGPR");
3583  return false;
3584 }
3585 
3586 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3587  const OperandVector &Operands) {
3588 
3589  const unsigned Opc = Inst.getOpcode();
3590 
3591  if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3592  return true;
3593 
3594  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3595  assert(Src0Idx != -1);
3596 
3597  const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3598  if (!Src0.isReg())
3599  return true;
3600 
3601  auto Reg = mc2PseudoReg(Src0.getReg());
3602  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3603  if (isSGPR(Reg, TRI)) {
3604  Error(getRegLoc(Reg, Operands),
3605  "source operand must be either a VGPR or an inline constant");
3606  return false;
3607  }
3608 
3609  return true;
3610 }
3611 
3612 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3613  switch (Inst.getOpcode()) {
3614  default:
3615  return true;
3616  case V_DIV_SCALE_F32_gfx6_gfx7:
3617  case V_DIV_SCALE_F32_vi:
3618  case V_DIV_SCALE_F32_gfx10:
3619  case V_DIV_SCALE_F64_gfx6_gfx7:
3620  case V_DIV_SCALE_F64_vi:
3621  case V_DIV_SCALE_F64_gfx10:
3622  break;
3623  }
3624 
3625  // TODO: Check that src0 = src1 or src2.
3626 
3627  for (auto Name : {AMDGPU::OpName::src0_modifiers,
3628  AMDGPU::OpName::src2_modifiers,
3629  AMDGPU::OpName::src2_modifiers}) {
3631  .getImm() &
3632  SISrcMods::ABS) {
3633  return false;
3634  }
3635  }
3636 
3637  return true;
3638 }
3639 
3640 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3641 
3642  const unsigned Opc = Inst.getOpcode();
3643  const MCInstrDesc &Desc = MII.get(Opc);
3644 
3645  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3646  return true;
3647 
3648  int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3649  if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3650  if (isCI() || isSI())
3651  return false;
3652  }
3653 
3654  return true;
3655 }
3656 
3657 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3658  const unsigned Opc = Inst.getOpcode();
3659  const MCInstrDesc &Desc = MII.get(Opc);
3660 
3661  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3662  return true;
3663 
3664  int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3665  if (DimIdx < 0)
3666  return true;
3667 
3668  long Imm = Inst.getOperand(DimIdx).getImm();
3669  if (Imm < 0 || Imm >= 8)
3670  return false;
3671 
3672  return true;
3673 }
3674 
3675 static bool IsRevOpcode(const unsigned Opcode)
3676 {
3677  switch (Opcode) {
3678  case AMDGPU::V_SUBREV_F32_e32:
3679  case AMDGPU::V_SUBREV_F32_e64:
3680  case AMDGPU::V_SUBREV_F32_e32_gfx10:
3681  case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3682  case AMDGPU::V_SUBREV_F32_e32_vi:
3683  case AMDGPU::V_SUBREV_F32_e64_gfx10:
3684  case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3685  case AMDGPU::V_SUBREV_F32_e64_vi:
3686 
3687  case AMDGPU::V_SUBREV_CO_U32_e32:
3688  case AMDGPU::V_SUBREV_CO_U32_e64:
3689  case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3690  case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3691 
3692  case AMDGPU::V_SUBBREV_U32_e32:
3693  case AMDGPU::V_SUBBREV_U32_e64:
3694  case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3695  case AMDGPU::V_SUBBREV_U32_e32_vi:
3696  case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3697  case AMDGPU::V_SUBBREV_U32_e64_vi:
3698 
3699  case AMDGPU::V_SUBREV_U32_e32:
3700  case AMDGPU::V_SUBREV_U32_e64:
3701  case AMDGPU::V_SUBREV_U32_e32_gfx9:
3702  case AMDGPU::V_SUBREV_U32_e32_vi:
3703  case AMDGPU::V_SUBREV_U32_e64_gfx9:
3704  case AMDGPU::V_SUBREV_U32_e64_vi:
3705 
3706  case AMDGPU::V_SUBREV_F16_e32:
3707  case AMDGPU::V_SUBREV_F16_e64:
3708  case AMDGPU::V_SUBREV_F16_e32_gfx10:
3709  case AMDGPU::V_SUBREV_F16_e32_vi:
3710  case AMDGPU::V_SUBREV_F16_e64_gfx10:
3711  case AMDGPU::V_SUBREV_F16_e64_vi:
3712 
3713  case AMDGPU::V_SUBREV_U16_e32:
3714  case AMDGPU::V_SUBREV_U16_e64:
3715  case AMDGPU::V_SUBREV_U16_e32_vi:
3716  case AMDGPU::V_SUBREV_U16_e64_vi:
3717 
3718  case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3719  case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3720  case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3721 
3722  case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3723  case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3724 
3725  case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3726  case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3727 
3728  case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3729  case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3730 
3731  case AMDGPU::V_LSHRREV_B32_e32:
3732  case AMDGPU::V_LSHRREV_B32_e64:
3733  case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3734  case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3735  case AMDGPU::V_LSHRREV_B32_e32_vi:
3736  case AMDGPU::V_LSHRREV_B32_e64_vi:
3737  case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3738  case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3739 
3740  case AMDGPU::V_ASHRREV_I32_e32:
3741  case AMDGPU::V_ASHRREV_I32_e64:
3742  case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3743  case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3744  case AMDGPU::V_ASHRREV_I32_e32_vi:
3745  case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3746  case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3747  case AMDGPU::V_ASHRREV_I32_e64_vi:
3748 
3749  case AMDGPU::V_LSHLREV_B32_e32:
3750  case AMDGPU::V_LSHLREV_B32_e64:
3751  case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3752  case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3753  case AMDGPU::V_LSHLREV_B32_e32_vi:
3754  case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3755  case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3756  case AMDGPU::V_LSHLREV_B32_e64_vi:
3757 
3758  case AMDGPU::V_LSHLREV_B16_e32:
3759  case AMDGPU::V_LSHLREV_B16_e64:
3760  case AMDGPU::V_LSHLREV_B16_e32_vi:
3761  case AMDGPU::V_LSHLREV_B16_e64_vi:
3762  case AMDGPU::V_LSHLREV_B16_gfx10:
3763 
3764  case AMDGPU::V_LSHRREV_B16_e32:
3765  case AMDGPU::V_LSHRREV_B16_e64:
3766  case AMDGPU::V_LSHRREV_B16_e32_vi:
3767  case AMDGPU::V_LSHRREV_B16_e64_vi:
3768  case AMDGPU::V_LSHRREV_B16_gfx10:
3769 
3770  case AMDGPU::V_ASHRREV_I16_e32:
3771  case AMDGPU::V_ASHRREV_I16_e64:
3772  case AMDGPU::V_ASHRREV_I16_e32_vi:
3773  case AMDGPU::V_ASHRREV_I16_e64_vi:
3774  case AMDGPU::V_ASHRREV_I16_gfx10:
3775 
3776  case AMDGPU::V_LSHLREV_B64_e64:
3777  case AMDGPU::V_LSHLREV_B64_gfx10:
3778  case AMDGPU::V_LSHLREV_B64_vi:
3779 
3780  case AMDGPU::V_LSHRREV_B64_e64:
3781  case AMDGPU::V_LSHRREV_B64_gfx10:
3782  case AMDGPU::V_LSHRREV_B64_vi:
3783 
3784  case AMDGPU::V_ASHRREV_I64_e64:
3785  case AMDGPU::V_ASHRREV_I64_gfx10:
3786  case AMDGPU::V_ASHRREV_I64_vi:
3787 
3788  case AMDGPU::V_PK_LSHLREV_B16:
3789  case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3790  case AMDGPU::V_PK_LSHLREV_B16_vi:
3791 
3792  case AMDGPU::V_PK_LSHRREV_B16:
3793  case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3794  case AMDGPU::V_PK_LSHRREV_B16_vi:
3795  case AMDGPU::V_PK_ASHRREV_I16:
3796  case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3797  case AMDGPU::V_PK_ASHRREV_I16_vi:
3798  return true;
3799  default:
3800  return false;
3801  }
3802 }
3803 
3804 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3805 
3806  using namespace SIInstrFlags;
3807  const unsigned Opcode = Inst.getOpcode();
3808  const MCInstrDesc &Desc = MII.get(Opcode);
3809 
3810  // lds_direct register is defined so that it can be used
3811  // with 9-bit operands only. Ignore encodings which do not accept these.
3812  const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
3813  if ((Desc.TSFlags & Enc) == 0)
3814  return None;
3815 
3816  for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3817  auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
3818  if (SrcIdx == -1)
3819  break;
3820  const auto &Src = Inst.getOperand(SrcIdx);
3821  if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3822 
3823  if (isGFX90A())
3824  return StringRef("lds_direct is not supported on this GPU");
3825 
3826  if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
3827  return StringRef("lds_direct cannot be used with this instruction");
3828 
3829  if (SrcName != OpName::src0)
3830  return StringRef("lds_direct may be used as src0 only");
3831  }
3832  }
3833 
3834  return None;
3835 }
3836 
3837 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3838  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3839  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3840  if (Op.isFlatOffset())
3841  return Op.getStartLoc();
3842  }
3843  return getLoc();
3844 }
3845 
3846 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3847  const OperandVector &Operands) {
3848  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3849  if ((TSFlags & SIInstrFlags::FLAT) == 0)
3850  return true;
3851 
3852  auto Opcode = Inst.getOpcode();
3853  auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3854  assert(OpNum != -1);
3855 
3856  const auto &Op = Inst.getOperand(OpNum);
3857  if (!hasFlatOffsets() && Op.getImm() != 0) {
3858  Error(getFlatOffsetLoc(Operands),
3859  "flat offset modifier is not supported on this GPU");
3860  return false;
3861  }
3862 
3863  // For FLAT segment the offset must be positive;
3864  // MSB is ignored and forced to zero.
3866  unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
3867  if (!isIntN(OffsetSize, Op.getImm())) {
3868  Error(getFlatOffsetLoc(Operands),
3869  Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
3870  return false;
3871  }
3872  } else {
3873  unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
3874  if (!isUIntN(OffsetSize, Op.getImm())) {
3875  Error(getFlatOffsetLoc(Operands),
3876  Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
3877  return false;
3878  }
3879  }
3880 
3881  return true;
3882 }
3883 
3884 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3885  // Start with second operand because SMEM Offset cannot be dst or src0.
3886  for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
3887  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3888  if (Op.isSMEMOffset())
3889  return Op.getStartLoc();
3890  }
3891  return getLoc();
3892 }
3893 
3894 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3895  const OperandVector &Operands) {
3896  if (isCI() || isSI())
3897  return true;
3898 
3899  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3900  if ((TSFlags & SIInstrFlags::SMRD) == 0)
3901  return true;
3902 
3903  auto Opcode = Inst.getOpcode();
3904  auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3905  if (OpNum == -1)
3906  return true;
3907 
3908  const auto &Op = Inst.getOperand(OpNum);
3909  if (!Op.isImm())
3910  return true;
3911 
3912  uint64_t Offset = Op.getImm();
3913  bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3915  AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3916  return true;
3917 
3918  Error(getSMEMOffsetLoc(Operands),
3919  (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3920  "expected a 21-bit signed offset");
3921 
3922  return false;
3923 }
3924 
3925 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3926  unsigned Opcode = Inst.getOpcode();
3927  const MCInstrDesc &Desc = MII.get(Opcode);
3928  if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3929  return true;
3930 
3931  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3932  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3933 
3934  const int OpIndices[] = { Src0Idx, Src1Idx };
3935 
3936  unsigned NumExprs = 0;
3937  unsigned NumLiterals = 0;
3938  uint32_t LiteralValue;
3939 
3940  for (int OpIdx : OpIndices) {
3941  if (OpIdx == -1) break;
3942 
3943  const MCOperand &MO = Inst.getOperand(OpIdx);
3944  // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3945  if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3946  if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3947  uint32_t Value = static_cast<uint32_t>(MO.getImm());
3948  if (NumLiterals == 0 || LiteralValue != Value) {
3949  LiteralValue = Value;
3950  ++NumLiterals;
3951  }
3952  } else if (MO.isExpr()) {
3953  ++NumExprs;
3954  }
3955  }
3956  }
3957 
3958  return NumLiterals + NumExprs <= 1;
3959 }
3960 
3961 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3962  const unsigned Opc = Inst.getOpcode();
3963  if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3964  Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3965  int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3966  unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3967 
3968  if (OpSel & ~3)
3969  return false;
3970  }
3971  return true;
3972 }
3973 
3974 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
3975  const OperandVector &Operands) {
3976  const unsigned Opc = Inst.getOpcode();
3977  int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
3978  if (DppCtrlIdx < 0)
3979  return true;
3980  unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
3981 
3983  // DPP64 is supported for row_newbcast only.
3984  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3985  if (Src0Idx >= 0 &&
3986  getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
3987  SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
3988  Error(S, "64 bit dpp only supports row_newbcast");
3989  return false;
3990  }
3991  }
3992 
3993  return true;
3994 }
3995 
3996 // Check if VCC register matches wavefront size
3997 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3998  auto FB = getFeatureBits();
3999  return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4000  (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4001 }
4002 
4003 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4004 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4005  const OperandVector &Operands) {
4006  unsigned Opcode = Inst.getOpcode();
4007  const MCInstrDesc &Desc = MII.get(Opcode);
4008  const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
4009  if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4010  ImmIdx == -1)
4011  return true;
4012 
4013  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4014  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4015  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4016 
4017  const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx};
4018 
4019  unsigned NumExprs = 0;
4020  unsigned NumLiterals = 0;
4021  uint32_t LiteralValue;
4022 
4023  for (int OpIdx : OpIndices) {
4024  if (OpIdx == -1)
4025  continue;
4026 
4027  const MCOperand &MO = Inst.getOperand(OpIdx);
4028  if (!MO.isImm() && !MO.isExpr())
4029  continue;
4030  if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4031  continue;
4032 
4033  if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4034  getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4035  Error(getConstLoc(Operands),
4036  "inline constants are not allowed for this operand");
4037  return false;
4038  }
4039 
4040  if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4041  uint32_t Value = static_cast<uint32_t>(MO.getImm());
4042  if (NumLiterals == 0 || LiteralValue != Value) {
4043  LiteralValue = Value;
4044  ++NumLiterals;
4045  }
4046  } else if (MO.isExpr()) {
4047  ++NumExprs;
4048  }
4049  }
4050  NumLiterals += NumExprs;
4051 
4052  if (!NumLiterals)
4053  return true;
4054 
4055  if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4056  Error(getLitLoc(Operands), "literal operands are not supported");
4057  return false;
4058  }
4059 
4060  if (NumLiterals > 1) {
4061  Error(getLitLoc(Operands), "only one literal operand is allowed");
4062  return false;
4063  }
4064 
4065  return true;
4066 }
4067 
4068 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4069 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4070  const MCRegisterInfo *MRI) {
4071  int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4072  if (OpIdx < 0)
4073  return -1;
4074 
4075  const MCOperand &Op = Inst.getOperand(OpIdx);
4076  if (!Op.isReg())
4077  return -1;
4078 
4079  unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4080  auto Reg = Sub ? Sub : Op.getReg();
4081  const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4082  return AGPR32.contains(Reg) ? 1 : 0;
4083 }
4084 
4085 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4086  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4087  if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4089  SIInstrFlags::DS)) == 0)
4090  return true;
4091 
4092  uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4093  : AMDGPU::OpName::vdata;
4094 
4095  const MCRegisterInfo *MRI = getMRI();
4096  int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4097  int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4098 
4099  if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4100  int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4101  if (Data2Areg >= 0 && Data2Areg != DataAreg)
4102  return false;
4103  }
4104 
4105  auto FB = getFeatureBits();
4106  if (FB[AMDGPU::FeatureGFX90AInsts]) {
4107  if (DataAreg < 0 || DstAreg < 0)
4108  return true;
4109  return DstAreg == DataAreg;
4110  }
4111 
4112  return DstAreg < 1 && DataAreg < 1;
4113 }
4114 
4115 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4116  auto FB = getFeatureBits();
4117  if (!FB[AMDGPU::FeatureGFX90AInsts])
4118  return true;
4119 
4120  const MCRegisterInfo *MRI = getMRI();
4121  const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4122  const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4123  for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4124  const MCOperand &Op = Inst.getOperand(I);
4125  if (!Op.isReg())
4126  continue;
4127 
4128  unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4129  if (!Sub)
4130  continue;
4131 
4132  if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4133  return false;
4134  if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4135  return false;
4136  }
4137 
4138  return true;
4139 }
4140 
4141 // gfx90a has an undocumented limitation:
4142 // DS_GWS opcodes must use even aligned registers.
4143 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4144  const OperandVector &Operands) {
4145  if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4146  return true;
4147 
4148  int Opc = Inst.getOpcode();
4149  if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4150  Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4151  return true;
4152 
4153  const MCRegisterInfo *MRI = getMRI();
4154  const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4155  int Data0Pos =
4156  AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4157  assert(Data0Pos != -1);
4158  auto Reg = Inst.getOperand(Data0Pos).getReg();
4159  auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4160  if (RegIdx & 1) {
4161  SMLoc RegLoc = getRegLoc(Reg, Operands);
4162  Error(RegLoc, "vgpr must be even aligned");
4163  return false;
4164  }
4165 
4166  return true;
4167 }
4168 
4169 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4170  const OperandVector &Operands,
4171  const SMLoc &IDLoc) {
4172  int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4173  AMDGPU::OpName::cpol);
4174  if (CPolPos == -1)
4175  return true;
4176 
4177  unsigned CPol = Inst.getOperand(CPolPos).getImm();
4178 
4179  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4180  if ((TSFlags & (SIInstrFlags::SMRD)) &&
4182  Error(IDLoc, "invalid cache policy for SMRD instruction");
4183  return false;
4184  }
4185 
4186  if (isGFX90A() && (CPol & CPol::SCC)) {
4187  SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4188  StringRef CStr(S.getPointer());
4189  S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4190  Error(S, "scc is not supported on this GPU");
4191  return false;
4192  }
4193 
4195  return true;
4196 
4197  if (TSFlags & SIInstrFlags::IsAtomicRet) {
4198  if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4199  Error(IDLoc, "instruction must use glc");
4200  return false;
4201  }
4202  } else {
4203  if (CPol & CPol::GLC) {
4204  SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4205  StringRef CStr(S.getPointer());
4206  S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]);
4207  Error(S, "instruction must not use glc");
4208  return false;
4209  }
4210  }
4211 
4212  return true;
4213 }
4214 
4215 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4216  const SMLoc &IDLoc,
4217  const OperandVector &Operands) {
4218  if (auto ErrMsg = validateLdsDirect(Inst)) {
4219  Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4220  return false;
4221  }
4222  if (!validateSOPLiteral(Inst)) {
4223  Error(getLitLoc(Operands),
4224  "only one literal operand is allowed");
4225  return false;
4226  }
4227  if (!validateVOPLiteral(Inst, Operands)) {
4228  return false;
4229  }
4230  if (!validateConstantBusLimitations(Inst, Operands)) {
4231  return false;
4232  }
4233  if (!validateEarlyClobberLimitations(Inst, Operands)) {
4234  return false;
4235  }
4236  if (!validateIntClampSupported(Inst)) {
4237  Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4238  "integer clamping is not supported on this GPU");
4239  return false;
4240  }
4241  if (!validateOpSel(Inst)) {
4242  Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4243  "invalid op_sel operand");
4244  return false;
4245  }
4246  if (!validateDPP(Inst, Operands)) {
4247  return false;
4248  }
4249  // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4250  if (!validateMIMGD16(Inst)) {
4251  Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4252  "d16 modifier is not supported on this GPU");
4253  return false;
4254  }
4255  if (!validateMIMGDim(Inst)) {
4256  Error(IDLoc, "dim modifier is required on this GPU");
4257  return false;
4258  }
4259  if (!validateMIMGMSAA(Inst)) {
4260  Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4261  "invalid dim; must be MSAA type");
4262  return false;
4263  }
4264  if (!validateMIMGDataSize(Inst)) {
4265  Error(IDLoc,
4266  "image data size does not match dmask and tfe");
4267  return false;
4268  }
4269  if (!validateMIMGAddrSize(Inst)) {
4270  Error(IDLoc,
4271  "image address size does not match dim and a16");
4272  return false;
4273  }
4274  if (!validateMIMGAtomicDMask(Inst)) {
4275  Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4276  "invalid atomic image dmask");
4277  return false;
4278  }
4279  if (!validateMIMGGatherDMask(Inst)) {
4280  Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4281  "invalid image_gather dmask: only one bit must be set");
4282  return false;
4283  }
4284  if (!validateMovrels(Inst, Operands)) {
4285  return false;
4286  }
4287  if (!validateFlatOffset(Inst, Operands)) {
4288  return false;
4289  }
4290  if (!validateSMEMOffset(Inst, Operands)) {
4291  return false;
4292  }
4293  if (!validateMAIAccWrite(Inst, Operands)) {
4294  return false;
4295  }
4296  if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4297  return false;
4298  }
4299 
4300  if (!validateAGPRLdSt(Inst)) {
4301  Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4302  ? "invalid register class: data and dst should be all VGPR or AGPR"
4303  : "invalid register class: agpr loads and stores not supported on this GPU"
4304  );
4305  return false;
4306  }
4307  if (!validateVGPRAlign(Inst)) {
4308  Error(IDLoc,
4309  "invalid register class: vgpr tuples must be 64 bit aligned");
4310  return false;
4311  }
4312  if (!validateGWS(Inst, Operands)) {
4313  return false;
4314  }
4315 
4316  if (!validateDivScale(Inst)) {
4317  Error(IDLoc, "ABS not allowed in VOP3B instructions");
4318  return false;
4319  }
4320  if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4321  return false;
4322  }
4323 
4324  return true;
4325 }
4326 
4327 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4328  const FeatureBitset &FBS,
4329  unsigned VariantID = 0);
4330 
4331 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4332  const FeatureBitset &AvailableFeatures,
4333  unsigned VariantID);
4334 
4335 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4336  const FeatureBitset &FBS) {
4337  return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4338 }
4339 
4340 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4341  const FeatureBitset &FBS,
4342  ArrayRef<unsigned> Variants) {
4343  for (auto Variant : Variants) {
4344  if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4345  return true;
4346  }
4347 
4348  return false;
4349 }
4350 
4351 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4352  const SMLoc &IDLoc) {
4353  FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4354 
4355  // Check if requested instruction variant is supported.
4356  if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4357  return false;
4358 
4359  // This instruction is not supported.
4360  // Clear any other pending errors because they are no longer relevant.
4361  getParser().clearPendingErrors();
4362 
4363  // Requested instruction variant is not supported.
4364  // Check if any other variants are supported.
4365  StringRef VariantName = getMatchedVariantName();
4366  if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4367  return Error(IDLoc,
4368  Twine(VariantName,
4369  " variant of this instruction is not supported"));
4370  }
4371 
4372  // Finally check if this instruction is supported on any other GPU.
4373  if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4374  return Error(IDLoc, "instruction not supported on this GPU");
4375  }
4376 
4377  // Instruction not supported on any GPU. Probably a typo.
4378  std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4379  return Error(IDLoc, "invalid instruction" + Suggestion);
4380 }
4381 
4382 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4384  MCStreamer &Out,
4386  bool MatchingInlineAsm) {
4387  MCInst Inst;
4388  unsigned Result = Match_Success;
4389  for (auto Variant : getMatchedVariants()) {
4390  uint64_t EI;
4391  auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4392  Variant);
4393  // We order match statuses from least to most specific. We use most specific
4394  // status as resulting
4395  // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4396  if ((R == Match_Success) ||
4397  (R == Match_PreferE32) ||
4398  (R == Match_MissingFeature && Result != Match_PreferE32) ||
4399  (R == Match_InvalidOperand && Result != Match_MissingFeature
4400  && Result != Match_PreferE32) ||
4401  (R == Match_MnemonicFail && Result != Match_InvalidOperand
4402  && Result != Match_MissingFeature
4403  && Result != Match_PreferE32)) {
4404  Result = R;
4405  ErrorInfo = EI;
4406  }
4407  if (R == Match_Success)
4408  break;
4409  }
4410 
4411  if (Result == Match_Success) {
4412  if (!validateInstruction(Inst, IDLoc, Operands)) {
4413  return true;
4414  }
4415  Inst.setLoc(IDLoc);
4416  Out.emitInstruction(Inst, getSTI());
4417  return false;
4418  }
4419 
4420  StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4421  if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4422  return true;
4423  }
4424 
4425  switch (Result) {
4426  default: break;
4427  case Match_MissingFeature:
4428  // It has been verified that the specified instruction
4429  // mnemonic is valid. A match was found but it requires
4430  // features which are not supported on this GPU.
4431  return Error(IDLoc, "operands are not valid for this GPU or mode");
4432 
4433  case Match_InvalidOperand: {
4434  SMLoc ErrorLoc = IDLoc;
4435  if (ErrorInfo != ~0ULL) {
4436  if (ErrorInfo >= Operands.size()) {
4437  return Error(IDLoc, "too few operands for instruction");
4438  }
4439  ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4440  if (ErrorLoc == SMLoc())
4441  ErrorLoc = IDLoc;
4442  }
4443  return Error(ErrorLoc, "invalid operand for instruction");
4444  }
4445 
4446  case Match_PreferE32:
4447  return Error(IDLoc, "internal error: instruction without _e64 suffix "
4448  "should be encoded as e32");
4449  case Match_MnemonicFail:
4450  llvm_unreachable("Invalid instructions should have been handled already");
4451  }
4452  llvm_unreachable("Implement any new match types added!");
4453 }
4454 
4455 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4456  int64_t Tmp = -1;
4457  if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4458  return true;
4459  }
4460  if (getParser().parseAbsoluteExpression(Tmp)) {
4461  return true;
4462  }
4463  Ret = static_cast<uint32_t>(Tmp);
4464  return false;
4465 }
4466 
4467 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4468  uint32_t &Minor) {
4469  if (ParseAsAbsoluteExpression(Major))
4470  return TokError("invalid major version");
4471 
4472  if (!trySkipToken(AsmToken::Comma))
4473  return TokError("minor version number required, comma expected");
4474 
4475  if (ParseAsAbsoluteExpression(Minor))
4476  return TokError("invalid minor version");
4477 
4478  return false;
4479 }
4480 
4481 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4482  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4483  return TokError("directive only supported for amdgcn architecture");
4484 
4485  std::string TargetIDDirective;
4486  SMLoc TargetStart = getTok().getLoc();
4487  if (getParser().parseEscapedString(TargetIDDirective))
4488  return true;
4489 
4490  SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4491  if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4492  return getParser().Error(TargetRange.Start,
4493  (Twine(".amdgcn_target directive's target id ") +
4494  Twine(TargetIDDirective) +
4495  Twine(" does not match the specified target id ") +
4496  Twine(getTargetStreamer().getTargetID()->toString())).str());
4497 
4498  return false;
4499 }
4500 
4501 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4502  return Error(Range.Start, "value out of range", Range);
4503 }
4504 
4505 bool AMDGPUAsmParser::calculateGPRBlocks(
4506  const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4507  bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4508  SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4509  unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4510  // TODO(scott.linder): These calculations are duplicated from
4511  // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4512  IsaVersion Version = getIsaVersion(getSTI().getCPU());
4513 
4514  unsigned NumVGPRs = NextFreeVGPR;
4515  unsigned NumSGPRs = NextFreeSGPR;
4516 
4517  if (Version.Major >= 10)
4518  NumSGPRs = 0;
4519  else {
4520  unsigned MaxAddressableNumSGPRs =
4522 
4523  if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4524  NumSGPRs > MaxAddressableNumSGPRs)
4525  return OutOfRangeError(SGPRRange);
4526 
4527  NumSGPRs +=
4528  IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4529 
4530  if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4531  NumSGPRs > MaxAddressableNumSGPRs)
4532  return OutOfRangeError(SGPRRange);
4533 
4534  if (Features.test(FeatureSGPRInitBug))
4536  }
4537 
4538  VGPRBlocks =
4539  IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4540  SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4541 
4542  return false;
4543 }
4544 
4545 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4546  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4547  return TokError("directive only supported for amdgcn architecture");
4548 
4549  if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4550  return TokError("directive only supported for amdhsa OS");
4551 
4552  StringRef KernelName;
4553  if (getParser().parseIdentifier(KernelName))
4554  return true;
4555 
4557 
4558  StringSet<> Seen;
4559 
4560  IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4561 
4562  SMRange VGPRRange;
4563  uint64_t NextFreeVGPR = 0;
4564  uint64_t AccumOffset = 0;
4565  SMRange SGPRRange;
4566  uint64_t NextFreeSGPR = 0;
4567  unsigned UserSGPRCount = 0;
4568  bool ReserveVCC = true;
4569  bool ReserveFlatScr = true;
4570  Optional<bool> EnableWavefrontSize32;
4571 
4572  while (true) {
4573  while (trySkipToken(AsmToken::EndOfStatement));
4574 
4575  StringRef ID;
4576  SMRange IDRange = getTok().getLocRange();
4577  if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4578  return true;
4579 
4580  if (ID == ".end_amdhsa_kernel")
4581  break;
4582 
4583  if (Seen.find(ID) != Seen.end())
4584  return TokError(".amdhsa_ directives cannot be repeated");
4585  Seen.insert(ID);
4586 
4587  SMLoc ValStart = getLoc();
4588  int64_t IVal;
4589  if (getParser().parseAbsoluteExpression(IVal))
4590  return true;
4591  SMLoc ValEnd = getLoc();
4592  SMRange ValRange = SMRange(ValStart, ValEnd);
4593 
4594  if (IVal < 0)
4595  return OutOfRangeError(ValRange);
4596 
4597  uint64_t Val = IVal;
4598 
4599 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
4600  if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
4601  return OutOfRangeError(RANGE); \
4602  AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4603 
4604  if (ID == ".amdhsa_group_segment_fixed_size") {
4605  if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4606  return OutOfRangeError(ValRange);
4607  KD.group_segment_fixed_size = Val;
4608  } else if (ID == ".amdhsa_private_segment_fixed_size") {
4609  if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4610  return OutOfRangeError(ValRange);
4611  KD.private_segment_fixed_size = Val;
4612  } else if (ID == ".amdhsa_kernarg_size") {
4613  if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4614  return OutOfRangeError(ValRange);
4615  KD.kernarg_size = Val;
4616  } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4618  return Error(IDRange.Start,
4619  "directive is not supported with architected flat scratch",
4620  IDRange);
4622  KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4623  Val, ValRange);
4624  if (Val)
4625  UserSGPRCount += 4;
4626  } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4628  KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4629  ValRange);
4630  if (Val)
4631  UserSGPRCount += 2;
4632  } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4634  KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4635  ValRange);
4636  if (Val)
4637  UserSGPRCount += 2;
4638  } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4640  KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4641  Val, ValRange);
4642  if (Val)
4643  UserSGPRCount += 2;
4644  } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4646  KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4647  ValRange);
4648  if (Val)
4649  UserSGPRCount += 2;
4650  } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4652  return Error(IDRange.Start,
4653  "directive is not supported with architected flat scratch",
4654  IDRange);
4656  KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4657  ValRange);
4658  if (Val)
4659  UserSGPRCount += 2;
4660  } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4662  KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4663  Val, ValRange);
4664  if (Val)
4665  UserSGPRCount += 1;
4666  } else if (ID == ".amdhsa_wavefront_size32") {
4667  if (IVersion.Major < 10)
4668  return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4669  EnableWavefrontSize32 = Val;
4671  KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4672  Val, ValRange);
4673  } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4675  return Error(IDRange.Start,
4676  "directive is not supported with architected flat scratch",
4677  IDRange);
4679  COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4680  } else if (ID == ".amdhsa_enable_private_segment") {
4682  return Error(
4683  IDRange.Start,
4684  "directive is not supported without architected flat scratch",
4685  IDRange);
4687  COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4688  } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4690  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4691  ValRange);
4692  } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4694  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4695  ValRange);
4696  } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4698  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4699  ValRange);
4700  } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4702  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4703  ValRange);
4704  } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4706  COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4707  ValRange);
4708  } else if (ID == ".amdhsa_next_free_vgpr") {
4709  VGPRRange = ValRange;
4710  NextFreeVGPR = Val;
4711  } else if (ID == ".amdhsa_next_free_sgpr") {
4712  SGPRRange = ValRange;
4713  NextFreeSGPR = Val;
4714  } else if (ID == ".amdhsa_accum_offset") {
4715  if (!isGFX90A())
4716  return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4717  AccumOffset = Val;
4718  } else if (ID == ".amdhsa_reserve_vcc") {
4719  if (!isUInt<1>(Val))
4720  return OutOfRangeError(ValRange);
4721  ReserveVCC = Val;
4722  } else if (ID == ".amdhsa_reserve_flat_scratch") {
4723  if (IVersion.Major < 7)
4724  return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4726  return Error(IDRange.Start,
4727  "directive is not supported with architected flat scratch",
4728  IDRange);
4729  if (!isUInt<1>(Val))
4730  return OutOfRangeError(ValRange);
4731  ReserveFlatScr = Val;
4732  } else if (ID == ".amdhsa_reserve_xnack_mask") {
4733  if (IVersion.Major < 8)
4734  return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4735  if (!isUInt<1>(Val))
4736  return OutOfRangeError(ValRange);
4737  if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
4738  return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
4739  IDRange);
4740  } else if (ID == ".amdhsa_float_round_mode_32") {
4742  COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4743  } else if (ID == ".amdhsa_float_round_mode_16_64") {
4745  COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4746  } else if (ID == ".amdhsa_float_denorm_mode_32") {
4748  COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4749  } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4751  COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4752  ValRange);
4753  } else if (ID == ".amdhsa_dx10_clamp") {
4755  COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4756  } else if (ID == ".amdhsa_ieee_mode") {
4757  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4758  Val, ValRange);
4759  } else if (ID == ".amdhsa_fp16_overflow") {
4760  if (IVersion.Major < 9)
4761  return Error(IDRange.Start, "directive requires gfx9+", IDRange);
4762  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4763  ValRange);
4764  } else if (ID == ".amdhsa_tg_split") {
4765  if (!isGFX90A())
4766  return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4767  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
4768  ValRange);
4769  } else if (ID == ".amdhsa_workgroup_processor_mode") {
4770  if (IVersion.Major < 10)
4771  return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4772  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4773  ValRange);
4774  } else if (ID == ".amdhsa_memory_ordered") {
4775  if (IVersion.Major < 10)
4776  return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4777  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4778  ValRange);
4779  } else if (ID == ".amdhsa_forward_progress") {
4780  if (IVersion.Major < 10)
4781  return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4782  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4783  ValRange);
4784  } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4786  KD.compute_pgm_rsrc2,
4787  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4788  ValRange);
4789  } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4791  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4792  Val, ValRange);
4793  } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4795  KD.compute_pgm_rsrc2,
4796  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4797  ValRange);
4798  } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4800  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4801  Val, ValRange);
4802  } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4804  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4805  Val, ValRange);
4806  } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4808  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4809  Val, ValRange);
4810  } else if (ID == ".amdhsa_exception_int_div_zero") {
4812  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4813  Val, ValRange);
4814  } else {
4815  return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
4816  }
4817 
4818 #undef PARSE_BITS_ENTRY
4819  }
4820 
4821  if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4822  return TokError(".amdhsa_next_free_vgpr directive is required");
4823 
4824  if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4825  return TokError(".amdhsa_next_free_sgpr directive is required");
4826 
4827  unsigned VGPRBlocks;
4828  unsigned SGPRBlocks;
4829  if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4830  getTargetStreamer().getTargetID()->isXnackOnOrAny(),
4831  EnableWavefrontSize32, NextFreeVGPR,
4832  VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4833  SGPRBlocks))
4834  return true;
4835 
4836  if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4837  VGPRBlocks))
4838  return OutOfRangeError(VGPRRange);
4840  COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4841 
4842  if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4843  SGPRBlocks))
4844  return OutOfRangeError(SGPRRange);
4846  COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4847  SGPRBlocks);
4848 
4849  if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4850  return TokError("too many user SGPRs enabled");
4851  AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4852  UserSGPRCount);
4853 
4854  if (isGFX90A()) {
4855  if (Seen.find(".amdhsa_accum_offset") == Seen.end())
4856  return TokError(".amdhsa_accum_offset directive is required");
4857  if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
4858  return TokError("accum_offset should be in range [4..256] in "
4859  "increments of 4");
4860  if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
4861  return TokError("accum_offset exceeds total VGPR allocation");
4862  AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
4863  (AccumOffset / 4 - 1));
4864  }
4865 
4866  getTargetStreamer().EmitAmdhsaKernelDescriptor(
4867  getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4868  ReserveFlatScr);
4869  return false;
4870 }
4871 
4872 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4873  uint32_t Major;
4874  uint32_t Minor;
4875 
4876  if (ParseDirectiveMajorMinor(Major, Minor))
4877  return true;
4878 
4879  getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4880  return false;
4881 }
4882 
4883 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4884  uint32_t Major;
4885  uint32_t Minor;
4886  uint32_t Stepping;
4887  StringRef VendorName;
4888  StringRef ArchName;
4889 
4890  // If this directive has no arguments, then use the ISA version for the
4891  // targeted GPU.
4892  if (isToken(AsmToken::EndOfStatement)) {
4894  getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
4895  ISA.Stepping,
4896  "AMD", "AMDGPU");
4897  return false;
4898  }
4899 
4900  if (ParseDirectiveMajorMinor(Major, Minor))
4901  return true;
4902 
4903  if (!trySkipToken(AsmToken::Comma))
4904  return TokError("stepping version number required, comma expected");
4905 
4906  if (ParseAsAbsoluteExpression(Stepping))
4907  return TokError("invalid stepping version");
4908 
4909  if (!trySkipToken(AsmToken::Comma))
4910  return TokError("vendor name required, comma expected");
4911 
4912  if (!parseString(VendorName, "invalid vendor name"))
4913  return true;
4914 
4915  if (!trySkipToken(AsmToken::Comma))
4916  return TokError("arch name required, comma expected");
4917 
4918  if (!parseString(ArchName, "invalid arch name"))
4919  return true;
4920 
4921  getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
4922  VendorName, ArchName);
4923  return false;
4924 }
4925 
4926 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4927  amd_kernel_code_t &Header) {
4928  // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4929  // assembly for backwards compatibility.
4930  if (ID == "max_scratch_backing_memory_byte_size") {
4931  Parser.eatToEndOfStatement();
4932  return false;
4933  }
4934 
4935  SmallString<40> ErrStr;
4936  raw_svector_ostream Err(ErrStr);
4937  if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4938  return TokError(Err.str());
4939  }
4940  Lex();
4941 
4942  if (ID == "enable_wavefront_size32") {
4944  if (!isGFX10Plus())
4945  return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4946  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4947  return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4948  } else {
4949  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4950  return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4951  }
4952  }
4953 
4954  if (ID == "wavefront_size") {
4955  if (Header.wavefront_size == 5) {
4956  if (!isGFX10Plus())
4957  return TokError("wavefront_size=5 is only allowed on GFX10+");
4958  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4959  return TokError("wavefront_size=5 requires +WavefrontSize32");
4960  } else if (Header.wavefront_size == 6) {
4961  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4962  return TokError("wavefront_size=6 requires +WavefrontSize64");
4963  }
4964  }
4965 
4966  if (ID == "enable_wgp_mode") {
4968  !isGFX10Plus())
4969  return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4970  }
4971 
4972  if (ID == "enable_mem_ordered") {
4974  !isGFX10Plus())
4975  return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4976  }
4977 
4978  if (ID == "enable_fwd_progress") {
4980  !isGFX10Plus())
4981  return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4982  }
4983 
4984  return false;
4985 }
4986 
4987 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4988  amd_kernel_code_t Header;
4989  AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4990 
4991  while (true) {
4992  // Lex EndOfStatement. This is in a while loop, because lexing a comment
4993  // will set the current token to EndOfStatement.
4994  while(trySkipToken(AsmToken::EndOfStatement));
4995 
4996  StringRef ID;
4997  if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
4998  return true;
4999 
5000  if (ID == ".end_amd_kernel_code_t")
5001  break;
5002 
5003  if (ParseAMDKernelCodeTValue(ID, Header))
5004  return true;
5005  }
5006 
5007  getTargetStreamer().EmitAMDKernelCodeT(Header);
5008 
5009  return false;
5010 }
5011 
5012 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5013  StringRef KernelName;
5014  if (!parseId(KernelName, "expected symbol name"))
5015  return true;
5016 
5017  getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5019 
5020  KernelScope.initialize(getContext());
5021  return false;
5022 }
5023 
5024 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5025  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5026  return Error(getLoc(),
5027  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5028  "architectures");
5029  }
5030 
5031  auto TargetIDDirective = getLexer().getTok().getStringContents();
5032  if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5033  return Error(getParser().getTok().getLoc(), "target id must match options");
5034 
5035  getTargetStreamer().EmitISAVersion();
5036  Lex();
5037 
5038  return false;
5039 }
5040 
5041 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5042  const char *AssemblerDirectiveBegin;
5043  const char *AssemblerDirectiveEnd;
5045  isHsaAbiVersion3Or4(&getSTI())
5046  ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5048  : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5049  HSAMD::AssemblerDirectiveEnd);
5050 
5051  if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5052  return Error(getLoc(),
5053  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5054  "not available on non-amdhsa OSes")).str());
5055  }
5056 
5057  std::string HSAMetadataString;
5058  if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5059  HSAMetadataString))
5060  return true;
5061 
5062  if (isHsaAbiVersion3Or4(&getSTI())) {
5063  if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5064  return Error(getLoc(), "invalid HSA metadata");
5065  } else {
5066  if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5067  return Error(getLoc(), "invalid HSA metadata");
5068  }
5069 
5070  return false;
5071 }
5072 
5073 /// Common code to parse out a block of text (typically YAML) between start and
5074 /// end directives.
5075 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5076  const char *AssemblerDirectiveEnd,
5077  std::string &CollectString) {
5078 
5079  raw_string_ostream CollectStream(CollectString);
5080 
5081  getLexer().setSkipSpace(false);
5082 
5083  bool FoundEnd = false;
5084  while (!isToken(AsmToken::Eof)) {
5085  while (isToken(AsmToken::Space)) {
5086  CollectStream << getTokenStr();
5087  Lex();
5088  }
5089 
5090  if (trySkipId(AssemblerDirectiveEnd)) {
5091  FoundEnd = true;
5092  break;
5093  }
5094 
5095  CollectStream << Parser.parseStringToEndOfStatement()
5096  << getContext().getAsmInfo()->getSeparatorString();
5097 
5098  Parser.eatToEndOfStatement();
5099  }
5100 
5101  getLexer().setSkipSpace(true);
5102 
5103  if (isToken(AsmToken::Eof) && !FoundEnd) {
5104  return TokError(Twine("expected directive ") +
5105  Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5106  }
5107 
5108  CollectStream.flush();
5109  return false;
5110 }
5111 
5112 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5113 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5114  std::string String;
5115  if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5117  return true;
5118 
5119  auto PALMetadata = getTargetStreamer().getPALMetadata();
5120  if (!PALMetadata->setFromString(String))
5121  return Error(getLoc(), "invalid PAL metadata");
5122  return false;
5123 }
5124 
5125 /// Parse the assembler directive for old linear-format PAL metadata.
5126 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5127  if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5128  return Error(getLoc(),
5129  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5130  "not available on non-amdpal OSes")).str());
5131  }
5132 
5133  auto PALMetadata = getTargetStreamer().getPALMetadata();
5134  PALMetadata->setLegacy();
5135  for (;;) {
5136  uint32_t Key, Value;
5137  if (ParseAsAbsoluteExpression(Key)) {
5138  return TokError(Twine("invalid value in ") +
5140  }
5141  if (!trySkipToken(AsmToken::Comma)) {
5142  return TokError(Twine("expected an even number of values in ") +
5144  }
5145  if (ParseAsAbsoluteExpression(Value)) {
5146  return TokError(Twine("invalid value in ") +
5148  }
5149  PALMetadata->setRegister(Key, Value);
5150  if (!trySkipToken(AsmToken::Comma))
5151  break;
5152  }
5153  return false;
5154 }
5155 
5156 /// ParseDirectiveAMDGPULDS
5157 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5158 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5159  if (getParser().checkForValidSection())
5160  return true;
5161 
5162  StringRef Name;
5163  SMLoc NameLoc = getLoc();
5164  if (getParser().parseIdentifier(Name))
5165  return TokError("expected identifier in directive");
5166 
5167  MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5168  if (parseToken(AsmToken::Comma, "expected ','"))
5169  return true;
5170 
5171  unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5172 
5173  int64_t Size;
5174  SMLoc SizeLoc = getLoc();
5175  if (getParser().parseAbsoluteExpression(Size))
5176  return true;
5177  if (Size < 0)
5178  return Error(SizeLoc, "size must be non-negative");
5179  if (Size > LocalMemorySize)
5180  return Error(SizeLoc, "size is too large");
5181 
5182  int64_t Alignment = 4;
5183  if (trySkipToken(AsmToken::Comma)) {
5184  SMLoc AlignLoc = getLoc();
5185  if (getParser().parseAbsoluteExpression(Alignment))
5186  return true;
5187  if (Alignment < 0 || !isPowerOf2_64(Alignment))
5188  return Error(AlignLoc, "alignment must be a power of two");
5189 
5190  // Alignment larger than the size of LDS is possible in theory, as long
5191  // as the linker manages to place to symbol at address 0, but we do want
5192  // to make sure the alignment fits nicely into a 32-bit integer.
5193  if (Alignment >= 1u << 31)
5194  return Error(AlignLoc, "alignment is too large");
5195  }
5196 
5197  if (parseToken(AsmToken::EndOfStatement,
5198  "unexpected token in '.amdgpu_lds' directive"))
5199  return true;
5200 
5201  Symbol->redefineIfPossible();
5202  if (!Symbol->isUndefined())
5203  return Error(NameLoc, "invalid symbol redefinition");
5204 
5205  getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5206  return false;
5207 }
5208 
5209 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5210  StringRef IDVal = DirectiveID.getString();
5211 
5212  if (isHsaAbiVersion3Or4(&getSTI())) {
5213  if (IDVal == ".amdhsa_kernel")
5214  return ParseDirectiveAMDHSAKernel();
5215 
5216  // TODO: Restructure/combine with PAL metadata directive.
5218  return ParseDirectiveHSAMetadata();
5219  } else {
5220  if (IDVal == ".hsa_code_object_version")
5221  return ParseDirectiveHSACodeObjectVersion();
5222 
5223  if (IDVal == ".hsa_code_object_isa")
5224  return ParseDirectiveHSACodeObjectISA();
5225 
5226  if (IDVal == ".amd_kernel_code_t")
5227  return ParseDirectiveAMDKernelCodeT();
5228 
5229  if (IDVal == ".amdgpu_hsa_kernel")
5230  return ParseDirectiveAMDGPUHsaKernel();
5231 
5232  if (IDVal == ".amd_amdgpu_isa")
5233  return ParseDirectiveISAVersion();
5234 
5236  return ParseDirectiveHSAMetadata();
5237  }
5238 
5239  if (IDVal == ".amdgcn_target")
5240  return ParseDirectiveAMDGCNTarget();
5241 
5242  if (IDVal == ".amdgpu_lds")
5243  return ParseDirectiveAMDGPULDS();
5244 
5245  if (IDVal == PALMD::AssemblerDirectiveBegin)
5246  return ParseDirectivePALMetadataBegin();
5247 
5248  if (IDVal == PALMD::AssemblerDirective)
5249  return ParseDirectivePALMetadata();
5250 
5251  return true;
5252 }
5253 
5254 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5255  unsigned RegNo) {
5256 
5257  for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
5258  R.isValid(); ++R) {
5259  if (*R == RegNo)
5260  return isGFX9Plus();
5261  }
5262 
5263  // GFX10 has 2 more SGPRs 104 and 105.
5264  for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
5265  R.isValid(); ++R) {
5266  if (*R == RegNo)
5267  return hasSGPR104_SGPR105();
5268  }
5269 
5270  switch (RegNo) {
5271  case AMDGPU::SRC_SHARED_BASE:
5272  case AMDGPU::SRC_SHARED_LIMIT:
5273  case AMDGPU::SRC_PRIVATE_BASE:
5274  case AMDGPU::SRC_PRIVATE_LIMIT:
5275  case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5276  return isGFX9Plus();
5277  case AMDGPU::TBA:
5278  case AMDGPU::TBA_LO:
5279  case AMDGPU::TBA_HI:
5280  case AMDGPU::TMA:
5281  case AMDGPU::TMA_LO:
5282  case AMDGPU::TMA_HI:
5283  return !isGFX9Plus();
5284  case AMDGPU::XNACK_MASK:
5285  case AMDGPU::XNACK_MASK_LO:
5286  case AMDGPU::XNACK_MASK_HI:
5287  return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5288  case AMDGPU::SGPR_NULL:
5289  return isGFX10Plus();
5290  default:
5291  break;
5292  }
5293 
5294  if (isCI())
5295  return true;
5296 
5297  if (isSI() || isGFX10Plus()) {
5298  // No flat_scr on SI.
5299  // On GFX10 flat scratch is not a valid register operand and can only be
5300  // accessed with s_setreg/s_getreg.
5301  switch (RegNo) {
5302  case AMDGPU::FLAT_SCR:
5303  case AMDGPU::FLAT_SCR_LO:
5304  case AMDGPU::FLAT_SCR_HI:
5305  return false;
5306  default:
5307  return true;
5308  }
5309  }
5310 
5311  // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5312  // SI/CI have.
5313  for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
5314  R.isValid(); ++R) {
5315  if (*R == RegNo)
5316  return hasSGPR102_SGPR103();
5317  }
5318 
5319  return true;
5320 }
5321 
5323 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5324  OperandMode Mode) {
5325  // Try to parse with a custom parser
5326  OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5327 
5328  // If we successfully parsed the operand or if there as an error parsing,
5329  // we are done.
5330  //
5331  // If we are parsing after we reach EndOfStatement then this means we
5332  // are appending default values to the Operands list. This is only done
5333  // by custom parser, so we shouldn't continue on to the generic parsing.
5334  if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5335  isToken(AsmToken::EndOfStatement))
5336  return ResTy;
5337 
5338  SMLoc RBraceLoc;
5339  SMLoc LBraceLoc = getLoc();
5340  if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5341  unsigned Prefix = Operands.size();
5342 
5343  for (;;) {
5344  auto Loc = getLoc();
5345  ResTy = parseReg(Operands);
5346  if (ResTy == MatchOperand_NoMatch)
5347  Error(Loc, "expected a register");
5348  if (ResTy != MatchOperand_Success)
5349  return MatchOperand_ParseFail;
5350 
5351  RBraceLoc = getLoc();
5352  if (trySkipToken(AsmToken::RBrac))
5353  break;
5354 
5355  if (!skipToken(AsmToken::Comma,
5356  "expected a comma or a closing square bracket")) {
5357  return MatchOperand_ParseFail;
5358  }
5359  }
5360 
5361  if (Operands.size() - Prefix > 1) {
5362  Operands.insert(Operands.begin() + Prefix,
5363  AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5364  Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5365  }
5366 
5367  return MatchOperand_Success;
5368  }
5369 
5370  return parseRegOrImm(Operands);
5371 }
5372 
5373 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5374  // Clear any forced encodings from the previous instruction.
5375  setForcedEncodingSize(0);
5376  setForcedDPP(false);
5377  setForcedSDWA(false);
5378 
5379  if (Name.endswith("_e64")) {
5380  setForcedEncodingSize(64);
5381  return Name.substr(0, Name.size() - 4);
5382  } else if (Name.endswith("_e32")) {
5383  setForcedEncodingSize(32);
5384  return Name.substr(0, Name.size() - 4);
5385  } else if (Name.endswith("_dpp")) {
5386  setForcedDPP(true);
5387  return Name.substr(0, Name.size() - 4);
5388  } else if (Name.endswith("_sdwa")) {
5389  setForcedSDWA(true);
5390  return Name.substr(0, Name.size() - 5);
5391  }
5392  return Name;
5393 }
5394 
5395 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5396  StringRef Name,
5397  SMLoc NameLoc, OperandVector &Operands) {
5398  // Add the instruction mnemonic
5399  Name = parseMnemonicSuffix(Name);
5400  Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5401 
5402  bool IsMIMG = Name.startswith("image_");
5403 
5404  while (!trySkipToken(AsmToken::EndOfStatement)) {
5405  OperandMode Mode = OperandMode_Default;
5406  if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5407  Mode = OperandMode_NSA;
5408  CPolSeen = 0;
5409  OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5410 
5411  if (Res != MatchOperand_Success) {
5412  checkUnsupportedInstruction(Name, NameLoc);
5413  if (!Parser.hasPendingError()) {
5414  // FIXME: use real operand location rather than the current location.
5415  StringRef Msg =
5416  (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5417  "not a valid operand.";
5418  Error(getLoc(), Msg);
5419  }
5420  while (!trySkipToken(AsmToken::EndOfStatement)) {
5421  lex();
5422  }
5423  return true;
5424  }
5425 
5426  // Eat the comma or space if there is one.
5427  trySkipToken(AsmToken::Comma);
5428  }
5429 
5430  return false;
5431 }
5432 
5433 //===----------------------------------------------------------------------===//
5434 // Utility functions
5435 //===----------------------------------------------------------------------===//
5436 
5438 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5439 
5440  if (!trySkipId(Prefix, AsmToken::Colon))
5441  return MatchOperand_NoMatch;
5442 
5443  return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5444 }
5445 
5447 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5448  AMDGPUOperand::ImmTy ImmTy,
5449  bool (*ConvertResult)(int64_t&)) {
5450  SMLoc S = getLoc();
5451  int64_t Value = 0;
5452 
5453  OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5454  if (Res != MatchOperand_Success)
5455  return Res;
5456 
5457  if (ConvertResult && !ConvertResult(Value)) {
5458  Error(S, "invalid " + StringRef(Prefix) + " value.");
5459  }
5460 
5461  Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5462  return MatchOperand_Success;
5463 }
5464 
5466 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5468  AMDGPUOperand::ImmTy ImmTy,
5469  bool (*ConvertResult)(int64_t&)) {
5470  SMLoc S = getLoc();
5471  if (!trySkipId(Prefix, AsmToken::Colon))
5472  return MatchOperand_NoMatch;
5473 
5474  if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5475  return MatchOperand_ParseFail;
5476 
5477  unsigned Val = 0;
5478  const unsigned MaxSize = 4;
5479 
5480  // FIXME: How to verify the number of elements matches the number of src
5481  // operands?
5482  for (int I = 0; ; ++I) {
5483  int64_t Op;
5484  SMLoc Loc = getLoc();
5485  if (!parseExpr(Op))
5486  return MatchOperand_ParseFail;
5487 
5488  if (Op != 0 && Op != 1) {
5489  Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5490  return MatchOperand_ParseFail;
5491  }
5492 
5493  Val |= (Op << I);
5494 
5495  if (trySkipToken(AsmToken::RBrac))
5496  break;
5497 
5498  if (I + 1 == MaxSize) {
5499  Error(getLoc(), "expected a closing square bracket");
5500  return MatchOperand_ParseFail;
5501  }
5502 
5503  if (!skipToken(AsmToken::Comma, "expected a comma"))
5504  return MatchOperand_ParseFail;
5505  }
5506 
5507  Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5508  return MatchOperand_Success;
5509 }
5510 
5512 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5513  AMDGPUOperand::ImmTy ImmTy) {
5514  int64_t Bit;
5515  SMLoc S = getLoc();
5516 
5517  if (trySkipId(Name)) {
5518  Bit = 1;
5519  } else if (trySkipId("no", Name)) {
5520  Bit = 0;
5521  } else {
5522  return MatchOperand_NoMatch;
5523  }
5524 
5525  if (Name == "r128" && !hasMIMG_R128()) {
5526  Error(S, "r128 modifier is not supported on this GPU");
5527  return MatchOperand_ParseFail;
5528  }
5529  if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5530  Error(S, "a16 modifier is not supported on this GPU");
5531  return MatchOperand_ParseFail;
5532  }
5533 
5534  if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5535  ImmTy = AMDGPUOperand::ImmTyR128A16;
5536 
5537  Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5538  return MatchOperand_Success;
5539 }
5540 
5542 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5543  unsigned CPolOn = 0;
5544  unsigned CPolOff = 0;
5545  SMLoc S = getLoc();
5546 
5547  if (trySkipId("glc"))
5548  CPolOn = AMDGPU::CPol::GLC;
5549  else if (trySkipId("noglc"))
5550  CPolOff = AMDGPU::CPol::GLC;
5551  else if (trySkipId("slc"))
5552  CPolOn = AMDGPU::CPol::SLC;
5553  else if (trySkipId("noslc"))
5554  CPolOff = AMDGPU::CPol::SLC;
5555  else if (trySkipId("dlc"))
5556  CPolOn = AMDGPU::CPol::DLC;
5557  else if (trySkipId("nodlc"))
5558  CPolOff = AMDGPU::CPol::DLC;
5559  else if (trySkipId("scc"))
5560  CPolOn = AMDGPU::CPol::SCC;
5561  else if (trySkipId("noscc"))
5562  CPolOff = AMDGPU::CPol::SCC;
5563  else
5564  return MatchOperand_NoMatch;
5565 
5566  if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5567  Error(S, "dlc modifier is not supported on this GPU");
5568  return MatchOperand_ParseFail;
5569  }
5570 
5571  if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5572  Error(S, "scc modifier is not supported on this GPU");
5573  return MatchOperand_ParseFail;
5574  }
5575 
5576  if (CPolSeen & (CPolOn | CPolOff)) {
5577  Error(S, "duplicate cache policy modifier");
5578  return MatchOperand_ParseFail;
5579  }
5580 
5581  CPolSeen |= (CPolOn | CPolOff);
5582 
5583  for (unsigned I = 1; I != Operands.size(); ++I) {
5584  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5585  if (Op.isCPol()) {
5586  Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5587  return MatchOperand_Success;
5588  }
5589  }
5590 
5591  Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5592  AMDGPUOperand::ImmTyCPol));
5593 
5594  return MatchOperand_Success;
5595 }
5596 
5598  MCInst& Inst, const OperandVector& Operands,
5599  AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5600  AMDGPUOperand::ImmTy ImmT,
5601  int64_t Default = 0) {
5602  auto i = OptionalIdx.find(ImmT);
5603  if (i != OptionalIdx.end()) {
5604  unsigned Idx = i->second;
5605  ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5606  } else {
5607  Inst.addOperand(MCOperand::createImm(Default));
5608  }
5609 }
5610 
5612 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5613  StringRef &Value,
5614  SMLoc &StringLoc) {
5615  if (!trySkipId(Prefix, AsmToken::Colon))
5616  return MatchOperand_NoMatch;
5617 
5618  StringLoc = getLoc();
5619  return parseId(Value, "expected an identifier") ? MatchOperand_Success
5621 }
5622 
5623 //===----------------------------------------------------------------------===//
5624 // MTBUF format
5625 //===----------------------------------------------------------------------===//
5626 
5627 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5628  int64_t MaxVal,
5629  int64_t &Fmt) {
5630  int64_t Val;
5631  SMLoc Loc = getLoc();
5632 
5633  auto Res = parseIntWithPrefix(Pref, Val);
5634  if (Res == MatchOperand_ParseFail)
5635  return false;
5636  if (Res == MatchOperand_NoMatch)
5637  return true;
5638 
5639  if (Val < 0 || Val > MaxVal) {
5640  Error(Loc, Twine("out of range ", StringRef(Pref)));
5641  return false;
5642  }
5643 
5644  Fmt = Val;
5645  return true;
5646 }
5647 
5648 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5649 // values to live in a joint format operand in the MCInst encoding.
5651 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5652  using namespace llvm::AMDGPU::MTBUFFormat;
5653 
5654  int64_t Dfmt = DFMT_UNDEF;
5655  int64_t Nfmt = NFMT_UNDEF;
5656 
5657  // dfmt and nfmt can appear in either order, and each is optional.
5658  for (int I = 0; I < 2; ++I) {
5659  if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5660  return MatchOperand_ParseFail;
5661 
5662  if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5663  return MatchOperand_ParseFail;
5664  }
5665  // Skip optional comma between dfmt/nfmt
5666  // but guard against 2 commas following each other.
5667  if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5668  !peekToken().is(AsmToken::Comma)) {
5669  trySkipToken(AsmToken::Comma);
5670  }
5671  }
5672 
5673  if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5674  return MatchOperand_NoMatch;
5675 
5676  Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5677  Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5678 
5679  Format = encodeDfmtNfmt(Dfmt, Nfmt);
5680  return MatchOperand_Success;
5681 }
5682 
5684 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5685  using namespace llvm::AMDGPU::MTBUFFormat;
5686 
5687  int64_t Fmt = UFMT_UNDEF;
5688 
5689  if (!tryParseFmt("format", UFMT_MAX, Fmt))
5690  return MatchOperand_ParseFail;
5691 
5692  if (Fmt == UFMT_UNDEF)
5693  return MatchOperand_NoMatch;
5694 
5695  Format = Fmt;
5696  return MatchOperand_Success;
5697 }
5698 
5699 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5700  int64_t &Nfmt,
5701  StringRef FormatStr,
5702  SMLoc Loc) {
5703  using namespace llvm::AMDGPU::MTBUFFormat;
5704  int64_t Format;
5705 
5706  Format = getDfmt(FormatStr);
5707  if (Format != DFMT_UNDEF) {
5708  Dfmt = Format;
5709  return true;
5710  }
5711 
5712  Format = getNfmt(FormatStr, getSTI());
5713  if (Format != NFMT_UNDEF) {
5714  Nfmt = Format;
5715  return true;
5716  }
5717 
5718  Error(Loc, "unsupported format");
5719  return false;
5720 }
5721 
5723 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5724  SMLoc FormatLoc,
5725  int64_t &Format) {
5726  using namespace llvm::AMDGPU::MTBUFFormat;
5727 
5728  int64_t Dfmt = DFMT_UNDEF;
5729  int64_t Nfmt = NFMT_UNDEF;
5730  if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5731  return MatchOperand_ParseFail;
5732 
5733  if (trySkipToken(AsmToken::Comma)) {
5734  StringRef Str;
5735  SMLoc Loc = getLoc();
5736  if (!parseId(Str, "expected a format string") ||
5737  !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5738  return MatchOperand_ParseFail;
5739  }
5740  if (Dfmt == DFMT_UNDEF) {
5741  Error(Loc, "duplicate numeric format");
5742  return MatchOperand_ParseFail;
5743  } else if (Nfmt == NFMT_UNDEF) {
5744  Error(Loc, "duplicate data format");
5745  return MatchOperand_ParseFail;
5746  }
5747  }
5748 
5749  Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5750  Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5751 
5752  if (isGFX10Plus()) {
5753  auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5754  if (Ufmt == UFMT_UNDEF) {
5755  Error(FormatLoc, "unsupported format");
5756  return MatchOperand_ParseFail;
5757  }
5758  Format = Ufmt;
5759  } else {
5760  Format = encodeDfmtNfmt(Dfmt, Nfmt);
5761  }
5762 
5763  return MatchOperand_Success;
5764 }
5765 
5767 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5768  SMLoc Loc,
5769  int64_t &Format) {
5770  using namespace llvm::AMDGPU::MTBUFFormat;
5771 
5772  auto Id = getUnifiedFormat(FormatStr);
5773  if (Id == UFMT_UNDEF)
5774  return MatchOperand_NoMatch;
5775 
5776  if (!isGFX10Plus()) {
5777  Error(Loc, "unified format is not supported on this GPU");
5778  return MatchOperand_ParseFail;
5779  }
5780 
5781  Format = Id;
5782  return MatchOperand_Success;
5783 }
5784 
5786 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5787  using namespace llvm::AMDGPU::MTBUFFormat;
5788  SMLoc Loc = getLoc();
5789 
5790  if (!parseExpr(Format))
5791  return MatchOperand_ParseFail;
5792  if (!isValidFormatEncoding(Format, getSTI())) {
5793  Error(Loc, "out of range format");
5794  return MatchOperand_ParseFail;
5795  }
5796 
5797  return MatchOperand_Success;
5798 }
5799 
5801 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5802  using namespace llvm::AMDGPU::MTBUFFormat;
5803 
5804  if (!trySkipId("format", AsmToken::Colon))
5805  return MatchOperand_NoMatch;
5806 
5807  if (trySkipToken(AsmToken::LBrac)) {
5808  StringRef FormatStr;
5809  SMLoc Loc = getLoc();
5810  if (!parseId(FormatStr, "expected a format string"))
5811  return MatchOperand_ParseFail;
5812 
5813  auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5814  if (Res == MatchOperand_NoMatch)
5815  Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5816  if (Res != MatchOperand_Success)
5817  return Res;
5818 
5819  if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5820  return MatchOperand_ParseFail;
5821 
5822  return MatchOperand_Success;
5823  }
5824 
5825  return parseNumericFormat(Format);
5826 }
5827 
5829 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5830  using namespace llvm::AMDGPU::MTBUFFormat;
5831 
5832  int64_t Format = getDefaultFormatEncoding(getSTI());
5834  SMLoc Loc = getLoc();
5835 
5836  // Parse legacy format syntax.
5837  Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5838  if (Res == MatchOperand_ParseFail)
5839  return Res;
5840 
5841  bool FormatFound = (Res == MatchOperand_Success);
5842 
5843  Operands.push_back(
5844  AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5845 
5846  if (FormatFound)
5847  trySkipToken(AsmToken::Comma);
5848 
5849  if (isToken(AsmToken::EndOfStatement)) {
5850  // We are expecting an soffset operand,
5851  // but let matcher handle the error.
5852  return MatchOperand_Success;
5853  }
5854 
5855  // Parse soffset.
5856  Res = parseRegOrImm(Operands);
5857  if (Res != MatchOperand_Success)
5858  return Res;
5859 
5860  trySkipToken(AsmToken::Comma);
5861 
5862  if (!FormatFound) {
5863  Res = parseSymbolicOrNumericFormat(Format);
5864  if (Res == MatchOperand_ParseFail)
5865  return Res;
5866  if (Res == MatchOperand_Success) {
5867  auto Size = Operands.size();
5868  AMDGPUOperand &