LLVM  13.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
19 #include "llvm/ADT/APFloat.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/MC/MCAsmInfo.h"
24 #include "llvm/MC/MCContext.h"
25 #include "llvm/MC/MCExpr.h"
26 #include "llvm/MC/MCInst.h"
30 #include "llvm/MC/MCSymbol.h"
33 #include "llvm/Support/Casting.h"
37 
38 using namespace llvm;
39 using namespace llvm::AMDGPU;
40 using namespace llvm::amdhsa;
41 
42 namespace {
43 
44 class AMDGPUAsmParser;
45 
46 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
47 
48 //===----------------------------------------------------------------------===//
49 // Operand
50 //===----------------------------------------------------------------------===//
51 
52 class AMDGPUOperand : public MCParsedAsmOperand {
53  enum KindTy {
54  Token,
55  Immediate,
56  Register,
58  } Kind;
59 
60  SMLoc StartLoc, EndLoc;
61  const AMDGPUAsmParser *AsmParser;
62 
63 public:
64  AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
65  : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
66 
67  using Ptr = std::unique_ptr<AMDGPUOperand>;
68 
69  struct Modifiers {
70  bool Abs = false;
71  bool Neg = false;
72  bool Sext = false;
73 
74  bool hasFPModifiers() const { return Abs || Neg; }
75  bool hasIntModifiers() const { return Sext; }
76  bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
77 
78  int64_t getFPModifiersOperand() const {
79  int64_t Operand = 0;
80  Operand |= Abs ? SISrcMods::ABS : 0u;
81  Operand |= Neg ? SISrcMods::NEG : 0u;
82  return Operand;
83  }
84 
85  int64_t getIntModifiersOperand() const {
86  int64_t Operand = 0;
87  Operand |= Sext ? SISrcMods::SEXT : 0u;
88  return Operand;
89  }
90 
91  int64_t getModifiersOperand() const {
92  assert(!(hasFPModifiers() && hasIntModifiers())
93  && "fp and int modifiers should not be used simultaneously");
94  if (hasFPModifiers()) {
95  return getFPModifiersOperand();
96  } else if (hasIntModifiers()) {
97  return getIntModifiersOperand();
98  } else {
99  return 0;
100  }
101  }
102 
103  friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
104  };
105 
106  enum ImmTy {
107  ImmTyNone,
108  ImmTyGDS,
109  ImmTyLDS,
110  ImmTyOffen,
111  ImmTyIdxen,
112  ImmTyAddr64,
113  ImmTyOffset,
114  ImmTyInstOffset,
115  ImmTyOffset0,
116  ImmTyOffset1,
117  ImmTyDLC,
118  ImmTySCCB,
119  ImmTyGLC,
120  ImmTySLC,
121  ImmTySWZ,
122  ImmTyTFE,
123  ImmTyD16,
124  ImmTyClampSI,
125  ImmTyOModSI,
126  ImmTyDPP8,
127  ImmTyDppCtrl,
128  ImmTyDppRowMask,
129  ImmTyDppBankMask,
130  ImmTyDppBoundCtrl,
131  ImmTyDppFi,
132  ImmTySdwaDstSel,
133  ImmTySdwaSrc0Sel,
134  ImmTySdwaSrc1Sel,
135  ImmTySdwaDstUnused,
136  ImmTyDMask,
137  ImmTyDim,
138  ImmTyUNorm,
139  ImmTyDA,
140  ImmTyR128A16,
141  ImmTyA16,
142  ImmTyLWE,
143  ImmTyExpTgt,
144  ImmTyExpCompr,
145  ImmTyExpVM,
146  ImmTyFORMAT,
147  ImmTyHwreg,
148  ImmTyOff,
149  ImmTySendMsg,
150  ImmTyInterpSlot,
151  ImmTyInterpAttr,
152  ImmTyAttrChan,
153  ImmTyOpSel,
154  ImmTyOpSelHi,
155  ImmTyNegLo,
156  ImmTyNegHi,
157  ImmTySwizzle,
158  ImmTyGprIdxMode,
159  ImmTyHigh,
160  ImmTyBLGP,
161  ImmTyCBSZ,
162  ImmTyABID,
163  ImmTyEndpgm,
164  };
165 
166  enum ImmKindTy {
167  ImmKindTyNone,
168  ImmKindTyLiteral,
169  ImmKindTyConst,
170  };
171 
172 private:
173  struct TokOp {
174  const char *Data;
175  unsigned Length;
176  };
177 
178  struct ImmOp {
179  int64_t Val;
180  ImmTy Type;
181  bool IsFPImm;
182  mutable ImmKindTy Kind;
183  Modifiers Mods;
184  };
185 
186  struct RegOp {
187  unsigned RegNo;
188  Modifiers Mods;
189  };
190 
191  union {
192  TokOp Tok;
193  ImmOp Imm;
194  RegOp Reg;
195  const MCExpr *Expr;
196  };
197 
198 public:
199  bool isToken() const override {
200  if (Kind == Token)
201  return true;
202 
203  // When parsing operands, we can't always tell if something was meant to be
204  // a token, like 'gds', or an expression that references a global variable.
205  // In this case, we assume the string is an expression, and if we need to
206  // interpret is a token, then we treat the symbol name as the token.
207  return isSymbolRefExpr();
208  }
209 
210  bool isSymbolRefExpr() const {
211  return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
212  }
213 
214  bool isImm() const override {
215  return Kind == Immediate;
216  }
217 
218  void setImmKindNone() const {
219  assert(isImm());
220  Imm.Kind = ImmKindTyNone;
221  }
222 
223  void setImmKindLiteral() const {
224  assert(isImm());
225  Imm.Kind = ImmKindTyLiteral;
226  }
227 
228  void setImmKindConst() const {
229  assert(isImm());
230  Imm.Kind = ImmKindTyConst;
231  }
232 
233  bool IsImmKindLiteral() const {
234  return isImm() && Imm.Kind == ImmKindTyLiteral;
235  }
236 
237  bool isImmKindConst() const {
238  return isImm() && Imm.Kind == ImmKindTyConst;
239  }
240 
241  bool isInlinableImm(MVT type) const;
242  bool isLiteralImm(MVT type) const;
243 
244  bool isRegKind() const {
245  return Kind == Register;
246  }
247 
248  bool isReg() const override {
249  return isRegKind() && !hasModifiers();
250  }
251 
252  bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
253  return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
254  }
255 
256  bool isRegOrImmWithInt16InputMods() const {
257  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
258  }
259 
260  bool isRegOrImmWithInt32InputMods() const {
261  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
262  }
263 
264  bool isRegOrImmWithInt64InputMods() const {
265  return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
266  }
267 
268  bool isRegOrImmWithFP16InputMods() const {
269  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
270  }
271 
272  bool isRegOrImmWithFP32InputMods() const {
273  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
274  }
275 
276  bool isRegOrImmWithFP64InputMods() const {
277  return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
278  }
279 
280  bool isVReg() const {
281  return isRegClass(AMDGPU::VGPR_32RegClassID) ||
282  isRegClass(AMDGPU::VReg_64RegClassID) ||
283  isRegClass(AMDGPU::VReg_96RegClassID) ||
284  isRegClass(AMDGPU::VReg_128RegClassID) ||
285  isRegClass(AMDGPU::VReg_160RegClassID) ||
286  isRegClass(AMDGPU::VReg_192RegClassID) ||
287  isRegClass(AMDGPU::VReg_256RegClassID) ||
288  isRegClass(AMDGPU::VReg_512RegClassID) ||
289  isRegClass(AMDGPU::VReg_1024RegClassID);
290  }
291 
292  bool isVReg32() const {
293  return isRegClass(AMDGPU::VGPR_32RegClassID);
294  }
295 
296  bool isVReg32OrOff() const {
297  return isOff() || isVReg32();
298  }
299 
300  bool isNull() const {
301  return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
302  }
303 
304  bool isVRegWithInputMods() const;
305 
306  bool isSDWAOperand(MVT type) const;
307  bool isSDWAFP16Operand() const;
308  bool isSDWAFP32Operand() const;
309  bool isSDWAInt16Operand() const;
310  bool isSDWAInt32Operand() const;
311 
312  bool isImmTy(ImmTy ImmT) const {
313  return isImm() && Imm.Type == ImmT;
314  }
315 
316  bool isImmModifier() const {
317  return isImm() && Imm.Type != ImmTyNone;
318  }
319 
320  bool isClampSI() const { return isImmTy(ImmTyClampSI); }
321  bool isOModSI() const { return isImmTy(ImmTyOModSI); }
322  bool isDMask() const { return isImmTy(ImmTyDMask); }
323  bool isDim() const { return isImmTy(ImmTyDim); }
324  bool isUNorm() const { return isImmTy(ImmTyUNorm); }
325  bool isDA() const { return isImmTy(ImmTyDA); }
326  bool isR128A16() const { return isImmTy(ImmTyR128A16); }
327  bool isGFX10A16() const { return isImmTy(ImmTyA16); }
328  bool isLWE() const { return isImmTy(ImmTyLWE); }
329  bool isOff() const { return isImmTy(ImmTyOff); }
330  bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
331  bool isExpVM() const { return isImmTy(ImmTyExpVM); }
332  bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
333  bool isOffen() const { return isImmTy(ImmTyOffen); }
334  bool isIdxen() const { return isImmTy(ImmTyIdxen); }
335  bool isAddr64() const { return isImmTy(ImmTyAddr64); }
336  bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
337  bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
338  bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
339 
340  bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
341  bool isGDS() const { return isImmTy(ImmTyGDS); }
342  bool isLDS() const { return isImmTy(ImmTyLDS); }
343  bool isDLC() const { return isImmTy(ImmTyDLC); }
344  bool isSCCB() const { return isImmTy(ImmTySCCB); }
345  bool isGLC() const { return isImmTy(ImmTyGLC); }
346  // "GLC_1" is a MatchClass of the GLC_1 operand with the default and forced
347  // value of the GLC operand.
348  bool isGLC_1() const { return isImmTy(ImmTyGLC); }
349  bool isSLC() const { return isImmTy(ImmTySLC); }
350  bool isSWZ() const { return isImmTy(ImmTySWZ); }
351  bool isTFE() const { return isImmTy(ImmTyTFE); }
352  bool isD16() const { return isImmTy(ImmTyD16); }
353  bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
354  bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
355  bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
356  bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
357  bool isFI() const { return isImmTy(ImmTyDppFi); }
358  bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
359  bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
360  bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
361  bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
362  bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
363  bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
364  bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
365  bool isOpSel() const { return isImmTy(ImmTyOpSel); }
366  bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
367  bool isNegLo() const { return isImmTy(ImmTyNegLo); }
368  bool isNegHi() const { return isImmTy(ImmTyNegHi); }
369  bool isHigh() const { return isImmTy(ImmTyHigh); }
370 
371  bool isMod() const {
372  return isClampSI() || isOModSI();
373  }
374 
375  bool isRegOrImm() const {
376  return isReg() || isImm();
377  }
378 
379  bool isRegClass(unsigned RCID) const;
380 
381  bool isInlineValue() const;
382 
383  bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
384  return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
385  }
386 
387  bool isSCSrcB16() const {
388  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
389  }
390 
391  bool isSCSrcV2B16() const {
392  return isSCSrcB16();
393  }
394 
395  bool isSCSrcB32() const {
396  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
397  }
398 
399  bool isSCSrcB64() const {
400  return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
401  }
402 
403  bool isBoolReg() const;
404 
405  bool isSCSrcF16() const {
406  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
407  }
408 
409  bool isSCSrcV2F16() const {
410  return isSCSrcF16();
411  }
412 
413  bool isSCSrcF32() const {
414  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
415  }
416 
417  bool isSCSrcF64() const {
418  return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
419  }
420 
421  bool isSSrcB32() const {
422  return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
423  }
424 
425  bool isSSrcB16() const {
426  return isSCSrcB16() || isLiteralImm(MVT::i16);
427  }
428 
429  bool isSSrcV2B16() const {
430  llvm_unreachable("cannot happen");
431  return isSSrcB16();
432  }
433 
434  bool isSSrcB64() const {
435  // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
436  // See isVSrc64().
437  return isSCSrcB64() || isLiteralImm(MVT::i64);
438  }
439 
440  bool isSSrcF32() const {
441  return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
442  }
443 
444  bool isSSrcF64() const {
445  return isSCSrcB64() || isLiteralImm(MVT::f64);
446  }
447 
448  bool isSSrcF16() const {
449  return isSCSrcB16() || isLiteralImm(MVT::f16);
450  }
451 
452  bool isSSrcV2F16() const {
453  llvm_unreachable("cannot happen");
454  return isSSrcF16();
455  }
456 
457  bool isSSrcV2FP32() const {
458  llvm_unreachable("cannot happen");
459  return isSSrcF32();
460  }
461 
462  bool isSCSrcV2FP32() const {
463  llvm_unreachable("cannot happen");
464  return isSCSrcF32();
465  }
466 
467  bool isSSrcV2INT32() const {
468  llvm_unreachable("cannot happen");
469  return isSSrcB32();
470  }
471 
472  bool isSCSrcV2INT32() const {
473  llvm_unreachable("cannot happen");
474  return isSCSrcB32();
475  }
476 
477  bool isSSrcOrLdsB32() const {
478  return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
479  isLiteralImm(MVT::i32) || isExpr();
480  }
481 
482  bool isVCSrcB32() const {
483  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
484  }
485 
486  bool isVCSrcB64() const {
487  return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
488  }
489 
490  bool isVCSrcB16() const {
491  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
492  }
493 
494  bool isVCSrcV2B16() const {
495  return isVCSrcB16();
496  }
497 
498  bool isVCSrcF32() const {
499  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
500  }
501 
502  bool isVCSrcF64() const {
503  return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
504  }
505 
506  bool isVCSrcF16() const {
507  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
508  }
509 
510  bool isVCSrcV2F16() const {
511  return isVCSrcF16();
512  }
513 
514  bool isVSrcB32() const {
515  return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
516  }
517 
518  bool isVSrcB64() const {
519  return isVCSrcF64() || isLiteralImm(MVT::i64);
520  }
521 
522  bool isVSrcB16() const {
523  return isVCSrcB16() || isLiteralImm(MVT::i16);
524  }
525 
526  bool isVSrcV2B16() const {
527  return isVSrcB16() || isLiteralImm(MVT::v2i16);
528  }
529 
530  bool isVCSrcV2FP32() const {
531  return isVCSrcF64();
532  }
533 
534  bool isVSrcV2FP32() const {
535  return isVSrcF64() || isLiteralImm(MVT::v2f32);
536  }
537 
538  bool isVCSrcV2INT32() const {
539  return isVCSrcB64();
540  }
541 
542  bool isVSrcV2INT32() const {
543  return isVSrcB64() || isLiteralImm(MVT::v2i32);
544  }
545 
546  bool isVSrcF32() const {
547  return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
548  }
549 
550  bool isVSrcF64() const {
551  return isVCSrcF64() || isLiteralImm(MVT::f64);
552  }
553 
554  bool isVSrcF16() const {
555  return isVCSrcF16() || isLiteralImm(MVT::f16);
556  }
557 
558  bool isVSrcV2F16() const {
559  return isVSrcF16() || isLiteralImm(MVT::v2f16);
560  }
561 
562  bool isVISrcB32() const {
563  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
564  }
565 
566  bool isVISrcB16() const {
567  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
568  }
569 
570  bool isVISrcV2B16() const {
571  return isVISrcB16();
572  }
573 
574  bool isVISrcF32() const {
575  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
576  }
577 
578  bool isVISrcF16() const {
579  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
580  }
581 
582  bool isVISrcV2F16() const {
583  return isVISrcF16() || isVISrcB32();
584  }
585 
586  bool isVISrc_64B64() const {
587  return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
588  }
589 
590  bool isVISrc_64F64() const {
591  return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
592  }
593 
594  bool isVISrc_64V2FP32() const {
595  return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
596  }
597 
598  bool isVISrc_64V2INT32() const {
599  return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
600  }
601 
602  bool isVISrc_256B64() const {
603  return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
604  }
605 
606  bool isVISrc_256F64() const {
607  return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
608  }
609 
610  bool isVISrc_128B16() const {
611  return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
612  }
613 
614  bool isVISrc_128V2B16() const {
615  return isVISrc_128B16();
616  }
617 
618  bool isVISrc_128B32() const {
619  return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
620  }
621 
622  bool isVISrc_128F32() const {
623  return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
624  }
625 
626  bool isVISrc_256V2FP32() const {
627  return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
628  }
629 
630  bool isVISrc_256V2INT32() const {
631  return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
632  }
633 
634  bool isVISrc_512B32() const {
635  return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
636  }
637 
638  bool isVISrc_512B16() const {
639  return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
640  }
641 
642  bool isVISrc_512V2B16() const {
643  return isVISrc_512B16();
644  }
645 
646  bool isVISrc_512F32() const {
647  return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
648  }
649 
650  bool isVISrc_512F16() const {
651  return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
652  }
653 
654  bool isVISrc_512V2F16() const {
655  return isVISrc_512F16() || isVISrc_512B32();
656  }
657 
658  bool isVISrc_1024B32() const {
659  return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
660  }
661 
662  bool isVISrc_1024B16() const {
663  return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
664  }
665 
666  bool isVISrc_1024V2B16() const {
667  return isVISrc_1024B16();
668  }
669 
670  bool isVISrc_1024F32() const {
671  return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
672  }
673 
674  bool isVISrc_1024F16() const {
675  return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
676  }
677 
678  bool isVISrc_1024V2F16() const {
679  return isVISrc_1024F16() || isVISrc_1024B32();
680  }
681 
682  bool isAISrcB32() const {
683  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
684  }
685 
686  bool isAISrcB16() const {
687  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
688  }
689 
690  bool isAISrcV2B16() const {
691  return isAISrcB16();
692  }
693 
694  bool isAISrcF32() const {
695  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
696  }
697 
698  bool isAISrcF16() const {
699  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
700  }
701 
702  bool isAISrcV2F16() const {
703  return isAISrcF16() || isAISrcB32();
704  }
705 
706  bool isAISrc_64B64() const {
707  return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
708  }
709 
710  bool isAISrc_64F64() const {
711  return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
712  }
713 
714  bool isAISrc_128B32() const {
715  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
716  }
717 
718  bool isAISrc_128B16() const {
719  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
720  }
721 
722  bool isAISrc_128V2B16() const {
723  return isAISrc_128B16();
724  }
725 
726  bool isAISrc_128F32() const {
727  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
728  }
729 
730  bool isAISrc_128F16() const {
731  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
732  }
733 
734  bool isAISrc_128V2F16() const {
735  return isAISrc_128F16() || isAISrc_128B32();
736  }
737 
738  bool isVISrc_128F16() const {
739  return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
740  }
741 
742  bool isVISrc_128V2F16() const {
743  return isVISrc_128F16() || isVISrc_128B32();
744  }
745 
746  bool isAISrc_256B64() const {
747  return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
748  }
749 
750  bool isAISrc_256F64() const {
751  return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
752  }
753 
754  bool isAISrc_512B32() const {
755  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
756  }
757 
758  bool isAISrc_512B16() const {
759  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
760  }
761 
762  bool isAISrc_512V2B16() const {
763  return isAISrc_512B16();
764  }
765 
766  bool isAISrc_512F32() const {
767  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
768  }
769 
770  bool isAISrc_512F16() const {
771  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
772  }
773 
774  bool isAISrc_512V2F16() const {
775  return isAISrc_512F16() || isAISrc_512B32();
776  }
777 
778  bool isAISrc_1024B32() const {
779  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
780  }
781 
782  bool isAISrc_1024B16() const {
783  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
784  }
785 
786  bool isAISrc_1024V2B16() const {
787  return isAISrc_1024B16();
788  }
789 
790  bool isAISrc_1024F32() const {
791  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
792  }
793 
794  bool isAISrc_1024F16() const {
795  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
796  }
797 
798  bool isAISrc_1024V2F16() const {
799  return isAISrc_1024F16() || isAISrc_1024B32();
800  }
801 
802  bool isKImmFP32() const {
803  return isLiteralImm(MVT::f32);
804  }
805 
806  bool isKImmFP16() const {
807  return isLiteralImm(MVT::f16);
808  }
809 
810  bool isMem() const override {
811  return false;
812  }
813 
814  bool isExpr() const {
815  return Kind == Expression;
816  }
817 
818  bool isSoppBrTarget() const {
819  return isExpr() || isImm();
820  }
821 
822  bool isSWaitCnt() const;
823  bool isHwreg() const;
824  bool isSendMsg() const;
825  bool isSwizzle() const;
826  bool isSMRDOffset8() const;
827  bool isSMEMOffset() const;
828  bool isSMRDLiteralOffset() const;
829  bool isDPP8() const;
830  bool isDPPCtrl() const;
831  bool isBLGP() const;
832  bool isCBSZ() const;
833  bool isABID() const;
834  bool isGPRIdxMode() const;
835  bool isS16Imm() const;
836  bool isU16Imm() const;
837  bool isEndpgm() const;
838 
839  StringRef getExpressionAsToken() const {
840  assert(isExpr());
841  const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
842  return S->getSymbol().getName();
843  }
844 
845  StringRef getToken() const {
846  assert(isToken());
847 
848  if (Kind == Expression)
849  return getExpressionAsToken();
850 
851  return StringRef(Tok.Data, Tok.Length);
852  }
853 
854  int64_t getImm() const {
855  assert(isImm());
856  return Imm.Val;
857  }
858 
859  void setImm(int64_t Val) {
860  assert(isImm());
861  Imm.Val = Val;
862  }
863 
864  ImmTy getImmTy() const {
865  assert(isImm());
866  return Imm.Type;
867  }
868 
869  unsigned getReg() const override {
870  assert(isRegKind());
871  return Reg.RegNo;
872  }
873 
874  SMLoc getStartLoc() const override {
875  return StartLoc;
876  }
877 
878  SMLoc getEndLoc() const override {
879  return EndLoc;
880  }
881 
882  SMRange getLocRange() const {
883  return SMRange(StartLoc, EndLoc);
884  }
885 
886  Modifiers getModifiers() const {
887  assert(isRegKind() || isImmTy(ImmTyNone));
888  return isRegKind() ? Reg.Mods : Imm.Mods;
889  }
890 
891  void setModifiers(Modifiers Mods) {
892  assert(isRegKind() || isImmTy(ImmTyNone));
893  if (isRegKind())
894  Reg.Mods = Mods;
895  else
896  Imm.Mods = Mods;
897  }
898 
899  bool hasModifiers() const {
900  return getModifiers().hasModifiers();
901  }
902 
903  bool hasFPModifiers() const {
904  return getModifiers().hasFPModifiers();
905  }
906 
907  bool hasIntModifiers() const {
908  return getModifiers().hasIntModifiers();
909  }
910 
911  uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
912 
913  void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
914 
915  void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
916 
917  template <unsigned Bitwidth>
918  void addKImmFPOperands(MCInst &Inst, unsigned N) const;
919 
920  void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
921  addKImmFPOperands<16>(Inst, N);
922  }
923 
924  void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
925  addKImmFPOperands<32>(Inst, N);
926  }
927 
928  void addRegOperands(MCInst &Inst, unsigned N) const;
929 
930  void addBoolRegOperands(MCInst &Inst, unsigned N) const {
931  addRegOperands(Inst, N);
932  }
933 
934  void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
935  if (isRegKind())
936  addRegOperands(Inst, N);
937  else if (isExpr())
938  Inst.addOperand(MCOperand::createExpr(Expr));
939  else
940  addImmOperands(Inst, N);
941  }
942 
943  void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
944  Modifiers Mods = getModifiers();
945  Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
946  if (isRegKind()) {
947  addRegOperands(Inst, N);
948  } else {
949  addImmOperands(Inst, N, false);
950  }
951  }
952 
953  void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
954  assert(!hasIntModifiers());
955  addRegOrImmWithInputModsOperands(Inst, N);
956  }
957 
958  void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
959  assert(!hasFPModifiers());
960  addRegOrImmWithInputModsOperands(Inst, N);
961  }
962 
963  void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
964  Modifiers Mods = getModifiers();
965  Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
966  assert(isRegKind());
967  addRegOperands(Inst, N);
968  }
969 
970  void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
971  assert(!hasIntModifiers());
972  addRegWithInputModsOperands(Inst, N);
973  }
974 
975  void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
976  assert(!hasFPModifiers());
977  addRegWithInputModsOperands(Inst, N);
978  }
979 
980  void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
981  if (isImm())
982  addImmOperands(Inst, N);
983  else {
984  assert(isExpr());
985  Inst.addOperand(MCOperand::createExpr(Expr));
986  }
987  }
988 
989  static void printImmTy(raw_ostream& OS, ImmTy Type) {
990  switch (Type) {
991  case ImmTyNone: OS << "None"; break;
992  case ImmTyGDS: OS << "GDS"; break;
993  case ImmTyLDS: OS << "LDS"; break;
994  case ImmTyOffen: OS << "Offen"; break;
995  case ImmTyIdxen: OS << "Idxen"; break;
996  case ImmTyAddr64: OS << "Addr64"; break;
997  case ImmTyOffset: OS << "Offset"; break;
998  case ImmTyInstOffset: OS << "InstOffset"; break;
999  case ImmTyOffset0: OS << "Offset0"; break;
1000  case ImmTyOffset1: OS << "Offset1"; break;
1001  case ImmTyDLC: OS << "DLC"; break;
1002  case ImmTySCCB: OS << "SCCB"; break;
1003  case ImmTyGLC: OS << "GLC"; break;
1004  case ImmTySLC: OS << "SLC"; break;
1005  case ImmTySWZ: OS << "SWZ"; break;
1006  case ImmTyTFE: OS << "TFE"; break;
1007  case ImmTyD16: OS << "D16"; break;
1008  case ImmTyFORMAT: OS << "FORMAT"; break;
1009  case ImmTyClampSI: OS << "ClampSI"; break;
1010  case ImmTyOModSI: OS << "OModSI"; break;
1011  case ImmTyDPP8: OS << "DPP8"; break;
1012  case ImmTyDppCtrl: OS << "DppCtrl"; break;
1013  case ImmTyDppRowMask: OS << "DppRowMask"; break;
1014  case ImmTyDppBankMask: OS << "DppBankMask"; break;
1015  case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1016  case ImmTyDppFi: OS << "FI"; break;
1017  case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1018  case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1019  case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1020  case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1021  case ImmTyDMask: OS << "DMask"; break;
1022  case ImmTyDim: OS << "Dim"; break;
1023  case ImmTyUNorm: OS << "UNorm"; break;
1024  case ImmTyDA: OS << "DA"; break;
1025  case ImmTyR128A16: OS << "R128A16"; break;
1026  case ImmTyA16: OS << "A16"; break;
1027  case ImmTyLWE: OS << "LWE"; break;
1028  case ImmTyOff: OS << "Off"; break;
1029  case ImmTyExpTgt: OS << "ExpTgt"; break;
1030  case ImmTyExpCompr: OS << "ExpCompr"; break;
1031  case ImmTyExpVM: OS << "ExpVM"; break;
1032  case ImmTyHwreg: OS << "Hwreg"; break;
1033  case ImmTySendMsg: OS << "SendMsg"; break;
1034  case ImmTyInterpSlot: OS << "InterpSlot"; break;
1035  case ImmTyInterpAttr: OS << "InterpAttr"; break;
1036  case ImmTyAttrChan: OS << "AttrChan"; break;
1037  case ImmTyOpSel: OS << "OpSel"; break;
1038  case ImmTyOpSelHi: OS << "OpSelHi"; break;
1039  case ImmTyNegLo: OS << "NegLo"; break;
1040  case ImmTyNegHi: OS << "NegHi"; break;
1041  case ImmTySwizzle: OS << "Swizzle"; break;
1042  case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1043  case ImmTyHigh: OS << "High"; break;
1044  case ImmTyBLGP: OS << "BLGP"; break;
1045  case ImmTyCBSZ: OS << "CBSZ"; break;
1046  case ImmTyABID: OS << "ABID"; break;
1047  case ImmTyEndpgm: OS << "Endpgm"; break;
1048  }
1049  }
1050 
1051  void print(raw_ostream &OS) const override {
1052  switch (Kind) {
1053  case Register:
1054  OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1055  break;
1056  case Immediate:
1057  OS << '<' << getImm();
1058  if (getImmTy() != ImmTyNone) {
1059  OS << " type: "; printImmTy(OS, getImmTy());
1060  }
1061  OS << " mods: " << Imm.Mods << '>';
1062  break;
1063  case Token:
1064  OS << '\'' << getToken() << '\'';
1065  break;
1066  case Expression:
1067  OS << "<expr " << *Expr << '>';
1068  break;
1069  }
1070  }
1071 
1072  static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1073  int64_t Val, SMLoc Loc,
1074  ImmTy Type = ImmTyNone,
1075  bool IsFPImm = false) {
1076  auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1077  Op->Imm.Val = Val;
1078  Op->Imm.IsFPImm = IsFPImm;
1079  Op->Imm.Kind = ImmKindTyNone;
1080  Op->Imm.Type = Type;
1081  Op->Imm.Mods = Modifiers();
1082  Op->StartLoc = Loc;
1083  Op->EndLoc = Loc;
1084  return Op;
1085  }
1086 
1087  static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1088  StringRef Str, SMLoc Loc,
1089  bool HasExplicitEncodingSize = true) {
1090  auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1091  Res->Tok.Data = Str.data();
1092  Res->Tok.Length = Str.size();
1093  Res->StartLoc = Loc;
1094  Res->EndLoc = Loc;
1095  return Res;
1096  }
1097 
1098  static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1099  unsigned RegNo, SMLoc S,
1100  SMLoc E) {
1101  auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1102  Op->Reg.RegNo = RegNo;
1103  Op->Reg.Mods = Modifiers();
1104  Op->StartLoc = S;
1105  Op->EndLoc = E;
1106  return Op;
1107  }
1108 
1109  static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1110  const class MCExpr *Expr, SMLoc S) {
1111  auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1112  Op->Expr = Expr;
1113  Op->StartLoc = S;
1114  Op->EndLoc = S;
1115  return Op;
1116  }
1117 };
1118 
1119 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1120  OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1121  return OS;
1122 }
1123 
1124 //===----------------------------------------------------------------------===//
1125 // AsmParser
1126 //===----------------------------------------------------------------------===//
1127 
1128 // Holds info related to the current kernel, e.g. count of SGPRs used.
1129 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1130 // .amdgpu_hsa_kernel or at EOF.
1131 class KernelScopeInfo {
1132  int SgprIndexUnusedMin = -1;
1133  int VgprIndexUnusedMin = -1;
1134  MCContext *Ctx = nullptr;
1135 
1136  void usesSgprAt(int i) {
1137  if (i >= SgprIndexUnusedMin) {
1138  SgprIndexUnusedMin = ++i;
1139  if (Ctx) {
1140  MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1141  Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1142  }
1143  }
1144  }
1145 
1146  void usesVgprAt(int i) {
1147  if (i >= VgprIndexUnusedMin) {
1148  VgprIndexUnusedMin = ++i;
1149  if (Ctx) {
1150  MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1151  Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
1152  }
1153  }
1154  }
1155 
1156 public:
1157  KernelScopeInfo() = default;
1158 
1159  void initialize(MCContext &Context) {
1160  Ctx = &Context;
1161  usesSgprAt(SgprIndexUnusedMin = -1);
1162  usesVgprAt(VgprIndexUnusedMin = -1);
1163  }
1164 
1165  void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
1166  switch (RegKind) {
1167  case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
1168  case IS_AGPR: // fall through
1169  case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1170  default: break;
1171  }
1172  }
1173 };
1174 
1175 class AMDGPUAsmParser : public MCTargetAsmParser {
1176  MCAsmParser &Parser;
1177 
1178  // Number of extra operands parsed after the first optional operand.
1179  // This may be necessary to skip hardcoded mandatory operands.
1180  static const unsigned MAX_OPR_LOOKAHEAD = 8;
1181 
1182  unsigned ForcedEncodingSize = 0;
1183  bool ForcedDPP = false;
1184  bool ForcedSDWA = false;
1185  KernelScopeInfo KernelScope;
1186 
1187  /// @name Auto-generated Match Functions
1188  /// {
1189 
1190 #define GET_ASSEMBLER_HEADER
1191 #include "AMDGPUGenAsmMatcher.inc"
1192 
1193  /// }
1194 
1195 private:
1196  bool ParseAsAbsoluteExpression(uint32_t &Ret);
1197  bool OutOfRangeError(SMRange Range);
1198  /// Calculate VGPR/SGPR blocks required for given target, reserved
1199  /// registers, and user-specified NextFreeXGPR values.
1200  ///
1201  /// \param Features [in] Target features, used for bug corrections.
1202  /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1203  /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1204  /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1205  /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1206  /// descriptor field, if valid.
1207  /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1208  /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1209  /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1210  /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1211  /// \param VGPRBlocks [out] Result VGPR block count.
1212  /// \param SGPRBlocks [out] Result SGPR block count.
1213  bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1214  bool FlatScrUsed, bool XNACKUsed,
1215  Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1216  SMRange VGPRRange, unsigned NextFreeSGPR,
1217  SMRange SGPRRange, unsigned &VGPRBlocks,
1218  unsigned &SGPRBlocks);
1219  bool ParseDirectiveAMDGCNTarget();
1220  bool ParseDirectiveAMDHSAKernel();
1221  bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1222  bool ParseDirectiveHSACodeObjectVersion();
1223  bool ParseDirectiveHSACodeObjectISA();
1224  bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1225  bool ParseDirectiveAMDKernelCodeT();
1226  bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1227  bool ParseDirectiveAMDGPUHsaKernel();
1228 
1229  bool ParseDirectiveISAVersion();
1230  bool ParseDirectiveHSAMetadata();
1231  bool ParseDirectivePALMetadataBegin();
1232  bool ParseDirectivePALMetadata();
1233  bool ParseDirectiveAMDGPULDS();
1234 
1235  /// Common code to parse out a block of text (typically YAML) between start and
1236  /// end directives.
1237  bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1238  const char *AssemblerDirectiveEnd,
1239  std::string &CollectString);
1240 
1241  bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1242  RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1243  bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1244  unsigned &RegNum, unsigned &RegWidth,
1245  bool RestoreOnFailure = false);
1246  bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1247  unsigned &RegNum, unsigned &RegWidth,
1248  SmallVectorImpl<AsmToken> &Tokens);
1249  unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1250  unsigned &RegWidth,
1251  SmallVectorImpl<AsmToken> &Tokens);
1252  unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1253  unsigned &RegWidth,
1254  SmallVectorImpl<AsmToken> &Tokens);
1255  unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1256  unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1257  bool ParseRegRange(unsigned& Num, unsigned& Width);
1258  unsigned getRegularReg(RegisterKind RegKind,
1259  unsigned RegNum,
1260  unsigned RegWidth,
1261  SMLoc Loc);
1262 
1263  bool isRegister();
1264  bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1265  Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1266  void initializeGprCountSymbol(RegisterKind RegKind);
1267  bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1268  unsigned RegWidth);
1269  void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1270  bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1271  void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1272  bool IsGdsHardcoded);
1273 
1274 public:
1275  enum AMDGPUMatchResultTy {
1276  Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1277  };
1278  enum OperandMode {
1279  OperandMode_Default,
1280  OperandMode_NSA,
1281  };
1282 
1283  using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1284 
1285  AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1286  const MCInstrInfo &MII,
1287  const MCTargetOptions &Options)
1288  : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1289  MCAsmParserExtension::Initialize(Parser);
1290 
1291  if (getFeatureBits().none()) {
1292  // Set default features.
1293  copySTI().ToggleFeature("southern-islands");
1294  }
1295 
1296  setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1297 
1298  {
1299  // TODO: make those pre-defined variables read-only.
1300  // Currently there is none suitable machinery in the core llvm-mc for this.
1301  // MCSymbol::isRedefinable is intended for another purpose, and
1302  // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1303  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1304  MCContext &Ctx = getContext();
1305  if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) {
1306  MCSymbol *Sym =
1307  Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1308  Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1309  Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1310  Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1311  Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1312  Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1313  } else {
1314  MCSymbol *Sym =
1315  Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1316  Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1317  Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1318  Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1319  Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1320  Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1321  }
1322  if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) {
1323  initializeGprCountSymbol(IS_VGPR);
1324  initializeGprCountSymbol(IS_SGPR);
1325  } else
1326  KernelScope.initialize(getContext());
1327  }
1328  }
1329 
1330  bool hasXNACK() const {
1331  return AMDGPU::hasXNACK(getSTI());
1332  }
1333 
1334  bool hasMIMG_R128() const {
1335  return AMDGPU::hasMIMG_R128(getSTI());
1336  }
1337 
1338  bool hasPackedD16() const {
1339  return AMDGPU::hasPackedD16(getSTI());
1340  }
1341 
1342  bool hasGFX10A16() const {
1343  return AMDGPU::hasGFX10A16(getSTI());
1344  }
1345 
1346  bool isSI() const {
1347  return AMDGPU::isSI(getSTI());
1348  }
1349 
1350  bool isCI() const {
1351  return AMDGPU::isCI(getSTI());
1352  }
1353 
1354  bool isVI() const {
1355  return AMDGPU::isVI(getSTI());
1356  }
1357 
1358  bool isGFX9() const {
1359  return AMDGPU::isGFX9(getSTI());
1360  }
1361 
1362  bool isGFX90A() const {
1363  return AMDGPU::isGFX90A(getSTI());
1364  }
1365 
1366  bool isGFX9Plus() const {
1367  return AMDGPU::isGFX9Plus(getSTI());
1368  }
1369 
1370  bool isGFX10() const {
1371  return AMDGPU::isGFX10(getSTI());
1372  }
1373 
1374  bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1375 
1376  bool isGFX10_BEncoding() const {
1377  return AMDGPU::isGFX10_BEncoding(getSTI());
1378  }
1379 
1380  bool hasInv2PiInlineImm() const {
1381  return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1382  }
1383 
1384  bool hasFlatOffsets() const {
1385  return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1386  }
1387 
1388  bool hasSGPR102_SGPR103() const {
1389  return !isVI() && !isGFX9();
1390  }
1391 
1392  bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1393 
1394  bool hasIntClamp() const {
1395  return getFeatureBits()[AMDGPU::FeatureIntClamp];
1396  }
1397 
1398  AMDGPUTargetStreamer &getTargetStreamer() {
1399  MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1400  return static_cast<AMDGPUTargetStreamer &>(TS);
1401  }
1402 
1403  const MCRegisterInfo *getMRI() const {
1404  // We need this const_cast because for some reason getContext() is not const
1405  // in MCAsmParser.
1406  return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1407  }
1408 
1409  const MCInstrInfo *getMII() const {
1410  return &MII;
1411  }
1412 
1413  const FeatureBitset &getFeatureBits() const {
1414  return getSTI().getFeatureBits();
1415  }
1416 
1417  void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1418  void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1419  void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1420 
1421  unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1422  bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1423  bool isForcedDPP() const { return ForcedDPP; }
1424  bool isForcedSDWA() const { return ForcedSDWA; }
1425  ArrayRef<unsigned> getMatchedVariants() const;
1426  StringRef getMatchedVariantName() const;
1427 
1428  std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1429  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1430  bool RestoreOnFailure);
1431  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1432  OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1433  SMLoc &EndLoc) override;
1434  unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1435  unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1436  unsigned Kind) override;
1437  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1438  OperandVector &Operands, MCStreamer &Out,
1439  uint64_t &ErrorInfo,
1440  bool MatchingInlineAsm) override;
1441  bool ParseDirective(AsmToken DirectiveID) override;
1442  OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1443  OperandMode Mode = OperandMode_Default);
1444  StringRef parseMnemonicSuffix(StringRef Name);
1445  bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1446  SMLoc NameLoc, OperandVector &Operands) override;
1447  //bool ProcessInstruction(MCInst &Inst);
1448 
1449  OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1450 
1451  OperandMatchResultTy
1452  parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1453  AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1454  bool (*ConvertResult)(int64_t &) = nullptr);
1455 
1456  OperandMatchResultTy
1457  parseOperandArrayWithPrefix(const char *Prefix,
1458  OperandVector &Operands,
1459  AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1460  bool (*ConvertResult)(int64_t&) = nullptr);
1461 
1462  OperandMatchResultTy
1463  parseNamedBit(StringRef Name, OperandVector &Operands,
1464  AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1465  OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1466  StringRef &Value,
1467  SMLoc &StringLoc);
1468 
1469  bool isModifier();
1470  bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1471  bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1472  bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1473  bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1474  bool parseSP3NegModifier();
1475  OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1476  OperandMatchResultTy parseReg(OperandVector &Operands);
1477  OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1478  OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1479  OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1480  OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1481  OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1482  OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1483  OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1484  OperandMatchResultTy parseUfmt(int64_t &Format);
1485  OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1486  OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1487  OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1488  OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1489  OperandMatchResultTy parseNumericFormat(int64_t &Format);
1490  bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1491  bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1492 
1493  void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1494  void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1495  void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1496  void cvtExp(MCInst &Inst, const OperandVector &Operands);
1497 
1498  bool parseCnt(int64_t &IntVal);
1499  OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1500  OperandMatchResultTy parseHwreg(OperandVector &Operands);
1501 
1502 private:
1503  struct OperandInfoTy {
1504  SMLoc Loc;
1505  int64_t Id;
1506  bool IsSymbolic = false;
1507  bool IsDefined = false;
1508 
1509  OperandInfoTy(int64_t Id_) : Id(Id_) {}
1510  };
1511 
1512  bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1513  bool validateSendMsg(const OperandInfoTy &Msg,
1514  const OperandInfoTy &Op,
1515  const OperandInfoTy &Stream);
1516 
1517  bool parseHwregBody(OperandInfoTy &HwReg,
1518  OperandInfoTy &Offset,
1519  OperandInfoTy &Width);
1520  bool validateHwreg(const OperandInfoTy &HwReg,
1521  const OperandInfoTy &Offset,
1522  const OperandInfoTy &Width);
1523 
1524  SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1525  SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1526 
1527  SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1528  const OperandVector &Operands) const;
1529  SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1530  SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1531  SMLoc getLitLoc(const OperandVector &Operands) const;
1532  SMLoc getConstLoc(const OperandVector &Operands) const;
1533 
1534  bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1535  bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1536  bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1537  bool validateSOPLiteral(const MCInst &Inst) const;
1538  bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1539  bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1540  bool validateIntClampSupported(const MCInst &Inst);
1541  bool validateMIMGAtomicDMask(const MCInst &Inst);
1542  bool validateMIMGGatherDMask(const MCInst &Inst);
1543  bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1544  bool validateMIMGDataSize(const MCInst &Inst);
1545  bool validateMIMGAddrSize(const MCInst &Inst);
1546  bool validateMIMGD16(const MCInst &Inst);
1547  bool validateMIMGDim(const MCInst &Inst);
1548  bool validateLdsDirect(const MCInst &Inst);
1549  bool validateOpSel(const MCInst &Inst);
1550  bool validateVccOperand(unsigned Reg) const;
1551  bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands);
1552  bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1553  bool validateAGPRLdSt(const MCInst &Inst) const;
1554  bool validateVGPRAlign(const MCInst &Inst) const;
1555  bool validateDivScale(const MCInst &Inst);
1556  bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1557  const SMLoc &IDLoc);
1558  unsigned getConstantBusLimit(unsigned Opcode) const;
1559  bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1560  bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1561  unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1562 
1563  bool isSupportedMnemo(StringRef Mnemo,
1564  const FeatureBitset &FBS);
1565  bool isSupportedMnemo(StringRef Mnemo,
1566  const FeatureBitset &FBS,
1567  ArrayRef<unsigned> Variants);
1568  bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1569 
1570  bool isId(const StringRef Id) const;
1571  bool isId(const AsmToken &Token, const StringRef Id) const;
1572  bool isToken(const AsmToken::TokenKind Kind) const;
1573  bool trySkipId(const StringRef Id);
1574  bool trySkipId(const StringRef Pref, const StringRef Id);
1575  bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1576  bool trySkipToken(const AsmToken::TokenKind Kind);
1577  bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1578  bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1579  bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1580 
1581  void peekTokens(MutableArrayRef<AsmToken> Tokens);
1582  AsmToken::TokenKind getTokenKind() const;
1583  bool parseExpr(int64_t &Imm, StringRef Expected = "");
1584  bool parseExpr(OperandVector &Operands);
1585  StringRef getTokenStr() const;
1586  AsmToken peekToken();
1587  AsmToken getToken() const;
1588  SMLoc getLoc() const;
1589  void lex();
1590 
1591 public:
1592  OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1593  OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1594 
1595  OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1596  OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1597  OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1598  OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1599  OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1600  OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1601 
1602  bool parseSwizzleOperand(int64_t &Op,
1603  const unsigned MinVal,
1604  const unsigned MaxVal,
1605  const StringRef ErrMsg,
1606  SMLoc &Loc);
1607  bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1608  const unsigned MinVal,
1609  const unsigned MaxVal,
1610  const StringRef ErrMsg);
1611  OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1612  bool parseSwizzleOffset(int64_t &Imm);
1613  bool parseSwizzleMacro(int64_t &Imm);
1614  bool parseSwizzleQuadPerm(int64_t &Imm);
1615  bool parseSwizzleBitmaskPerm(int64_t &Imm);
1616  bool parseSwizzleBroadcast(int64_t &Imm);
1617  bool parseSwizzleSwap(int64_t &Imm);
1618  bool parseSwizzleReverse(int64_t &Imm);
1619 
1620  OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1621  int64_t parseGPRIdxMacro();
1622 
1623  void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1624  void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1625  void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1626  void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1627  void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1628 
1629  AMDGPUOperand::Ptr defaultDLC() const;
1630  AMDGPUOperand::Ptr defaultSCCB() const;
1631  AMDGPUOperand::Ptr defaultGLC() const;
1632  AMDGPUOperand::Ptr defaultGLC_1() const;
1633  AMDGPUOperand::Ptr defaultSLC() const;
1634 
1635  AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1636  AMDGPUOperand::Ptr defaultSMEMOffset() const;
1637  AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1638  AMDGPUOperand::Ptr defaultFlatOffset() const;
1639 
1640  OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1641 
1642  void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1643  OptionalImmIndexMap &OptionalIdx);
1644  void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1645  void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1646  void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1647 
1648  void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1649 
1650  void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1651  bool IsAtomic = false);
1652  void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1653  void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1654 
1655  bool parseDimId(unsigned &Encoding);
1656  OperandMatchResultTy parseDim(OperandVector &Operands);
1657  OperandMatchResultTy parseDPP8(OperandVector &Operands);
1658  OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1659  bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1660  int64_t parseDPPCtrlSel(StringRef Ctrl);
1661  int64_t parseDPPCtrlPerm();
1662  AMDGPUOperand::Ptr defaultRowMask() const;
1663  AMDGPUOperand::Ptr defaultBankMask() const;
1664  AMDGPUOperand::Ptr defaultBoundCtrl() const;
1665  AMDGPUOperand::Ptr defaultFI() const;
1666  void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1667  void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1668 
1669  OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1670  AMDGPUOperand::ImmTy Type);
1671  OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1672  void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1673  void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1674  void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1675  void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1676  void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1677  void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1678  uint64_t BasicInstType,
1679  bool SkipDstVcc = false,
1680  bool SkipSrcVcc = false);
1681 
1682  AMDGPUOperand::Ptr defaultBLGP() const;
1683  AMDGPUOperand::Ptr defaultCBSZ() const;
1684  AMDGPUOperand::Ptr defaultABID() const;
1685 
1686  OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1687  AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1688 };
1689 
1690 struct OptionalOperand {
1691  const char *Name;
1692  AMDGPUOperand::ImmTy Type;
1693  bool IsBit;
1694  bool (*ConvertResult)(int64_t&);
1695 };
1696 
1697 } // end anonymous namespace
1698 
1699 // May be called with integer type with equivalent bitwidth.
1700 static const fltSemantics *getFltSemantics(unsigned Size) {
1701  switch (Size) {
1702  case 4:
1703  return &APFloat::IEEEsingle();
1704  case 8:
1705  return &APFloat::IEEEdouble();
1706  case 2:
1707  return &APFloat::IEEEhalf();
1708  default:
1709  llvm_unreachable("unsupported fp type");
1710  }
1711 }
1712 
1713 static const fltSemantics *getFltSemantics(MVT VT) {
1714  return getFltSemantics(VT.getSizeInBits() / 8);
1715 }
1716 
1717 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1718  switch (OperandType) {
1719  case AMDGPU::OPERAND_REG_IMM_INT32:
1720  case AMDGPU::OPERAND_REG_IMM_FP32:
1721  case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1722  case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1723  case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1724  case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1725  case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1726  case AMDGPU::OPERAND_REG_IMM_V2FP32:
1727  case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1728  case AMDGPU::OPERAND_REG_IMM_V2INT32:
1729  return &APFloat::IEEEsingle();
1730  case AMDGPU::OPERAND_REG_IMM_INT64:
1731  case AMDGPU::OPERAND_REG_IMM_FP64:
1732  case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1733  case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1734  case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1735  return &APFloat::IEEEdouble();
1736  case AMDGPU::OPERAND_REG_IMM_INT16:
1737  case AMDGPU::OPERAND_REG_IMM_FP16:
1738  case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1739  case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1740  case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1741  case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1742  case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1743  case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1744  case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1745  case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1746  case AMDGPU::OPERAND_REG_IMM_V2INT16:
1747  case AMDGPU::OPERAND_REG_IMM_V2FP16:
1748  return &APFloat::IEEEhalf();
1749  default:
1750  llvm_unreachable("unsupported fp type");
1751  }
1752 }
1753 
1754 //===----------------------------------------------------------------------===//
1755 // Operand
1756 //===----------------------------------------------------------------------===//
1757 
1758 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1759  bool Lost;
1760 
1761  // Convert literal to single precision
1762  APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1763  APFloat::rmNearestTiesToEven,
1764  &Lost);
1765  // We allow precision lost but not overflow or underflow
1766  if (Status != APFloat::opOK &&
1767  Lost &&
1768  ((Status & APFloat::opOverflow) != 0 ||
1769  (Status & APFloat::opUnderflow) != 0)) {
1770  return false;
1771  }
1772 
1773  return true;
1774 }
1775 
1776 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1777  return isUIntN(Size, Val) || isIntN(Size, Val);
1778 }
1779 
1780 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1781  if (VT.getScalarType() == MVT::i16) {
1782  // FP immediate values are broken.
1783  return isInlinableIntLiteral(Val);
1784  }
1785 
1786  // f16/v2f16 operands work correctly for all values.
1787  return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1788 }
1789 
1790 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1791 
1792  // This is a hack to enable named inline values like
1793  // shared_base with both 32-bit and 64-bit operands.
1794  // Note that these values are defined as
1795  // 32-bit operands only.
1796  if (isInlineValue()) {
1797  return true;
1798  }
1799 
1800  if (!isImmTy(ImmTyNone)) {
1801  // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1802  return false;
1803  }
1804  // TODO: We should avoid using host float here. It would be better to
1805  // check the float bit values which is what a few other places do.
1806  // We've had bot failures before due to weird NaN support on mips hosts.
1807 
1808  APInt Literal(64, Imm.Val);
1809 
1810  if (Imm.IsFPImm) { // We got fp literal token
1811  if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1812  return AMDGPU::isInlinableLiteral64(Imm.Val,
1813  AsmParser->hasInv2PiInlineImm());
1814  }
1815 
1816  APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1817  if (!canLosslesslyConvertToFPType(FPLiteral, type))
1818  return false;
1819 
1820  if (type.getScalarSizeInBits() == 16) {
1821  return isInlineableLiteralOp16(
1822  static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1823  type, AsmParser->hasInv2PiInlineImm());
1824  }
1825 
1826  // Check if single precision literal is inlinable
1828  static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1829  AsmParser->hasInv2PiInlineImm());
1830  }
1831 
1832  // We got int literal token.
1833  if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1834  return AMDGPU::isInlinableLiteral64(Imm.Val,
1835  AsmParser->hasInv2PiInlineImm());
1836  }
1837 
1838  if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1839  return false;
1840  }
1841 
1842  if (type.getScalarSizeInBits() == 16) {
1843  return isInlineableLiteralOp16(
1844  static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1845  type, AsmParser->hasInv2PiInlineImm());
1846  }
1847 
1849  static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1850  AsmParser->hasInv2PiInlineImm());
1851 }
1852 
1853 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1854  // Check that this immediate can be added as literal
1855  if (!isImmTy(ImmTyNone)) {
1856  return false;
1857  }
1858 
1859  if (!Imm.IsFPImm) {
1860  // We got int literal token.
1861 
1862  if (type == MVT::f64 && hasFPModifiers()) {
1863  // Cannot apply fp modifiers to int literals preserving the same semantics
1864  // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1865  // disable these cases.
1866  return false;
1867  }
1868 
1869  unsigned Size = type.getSizeInBits();
1870  if (Size == 64)
1871  Size = 32;
1872 
1873  // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1874  // types.
1875  return isSafeTruncation(Imm.Val, Size);
1876  }
1877 
1878  // We got fp literal token
1879  if (type == MVT::f64) { // Expected 64-bit fp operand
1880  // We would set low 64-bits of literal to zeroes but we accept this literals
1881  return true;
1882  }
1883 
1884  if (type == MVT::i64) { // Expected 64-bit int operand
1885  // We don't allow fp literals in 64-bit integer instructions. It is
1886  // unclear how we should encode them.
1887  return false;
1888  }
1889 
1890  // We allow fp literals with f16x2 operands assuming that the specified
1891  // literal goes into the lower half and the upper half is zero. We also
1892  // require that the literal may be losslesly converted to f16.
1893  MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1894  (type == MVT::v2i16)? MVT::i16 :
1895  (type == MVT::v2f32)? MVT::f32 : type;
1896 
1897  APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1898  return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1899 }
1900 
1901 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1902  return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1903 }
1904 
1905 bool AMDGPUOperand::isVRegWithInputMods() const {
1906  return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1907  // GFX90A allows DPP on 64-bit operands.
1908  (isRegClass(AMDGPU::VReg_64RegClassID) &&
1909  AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1910 }
1911 
1912 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1913  if (AsmParser->isVI())
1914  return isVReg32();
1915  else if (AsmParser->isGFX9Plus())
1916  return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1917  else
1918  return false;
1919 }
1920 
1921 bool AMDGPUOperand::isSDWAFP16Operand() const {
1922  return isSDWAOperand(MVT::f16);
1923 }
1924 
1925 bool AMDGPUOperand::isSDWAFP32Operand() const {
1926  return isSDWAOperand(MVT::f32);
1927 }
1928 
1929 bool AMDGPUOperand::isSDWAInt16Operand() const {
1930  return isSDWAOperand(MVT::i16);
1931 }
1932 
1933 bool AMDGPUOperand::isSDWAInt32Operand() const {
1934  return isSDWAOperand(MVT::i32);
1935 }
1936 
1937 bool AMDGPUOperand::isBoolReg() const {
1938  return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1939  (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1940 }
1941 
1942 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1943 {
1944  assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1945  assert(Size == 2 || Size == 4 || Size == 8);
1946 
1947  const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1948 
1949  if (Imm.Mods.Abs) {
1950  Val &= ~FpSignMask;
1951  }
1952  if (Imm.Mods.Neg) {
1953  Val ^= FpSignMask;
1954  }
1955 
1956  return Val;
1957 }
1958 
1959 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1960  if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1961  Inst.getNumOperands())) {
1962  addLiteralImmOperand(Inst, Imm.Val,
1963  ApplyModifiers &
1964  isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1965  } else {
1966  assert(!isImmTy(ImmTyNone) || !hasModifiers());
1967  Inst.addOperand(MCOperand::createImm(Imm.Val));
1968  setImmKindNone();
1969  }
1970 }
1971 
1972 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1973  const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1974  auto OpNum = Inst.getNumOperands();
1975  // Check that this operand accepts literals
1976  assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1977 
1978  if (ApplyModifiers) {
1979  assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1980  const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1981  Val = applyInputFPModifiers(Val, Size);
1982  }
1983 
1984  APInt Literal(64, Val);
1985  uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1986 
1987  if (Imm.IsFPImm) { // We got fp literal token
1988  switch (OpTy) {
1994  if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1995  AsmParser->hasInv2PiInlineImm())) {
1996  Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1997  setImmKindConst();
1998  return;
1999  }
2000 
2001  // Non-inlineable
2002  if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2003  // For fp operands we check if low 32 bits are zeros
2004  if (Literal.getLoBits(32) != 0) {
2005  const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2006  "Can't encode literal as exact 64-bit floating-point operand. "
2007  "Low 32-bits will be set to zero");
2008  }
2009 
2010  Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2011  setImmKindLiteral();
2012  return;
2013  }
2014 
2015  // We don't allow fp literals in 64-bit integer instructions. It is
2016  // unclear how we should encode them. This case should be checked earlier
2017  // in predicate methods (isLiteralImm())
2018  llvm_unreachable("fp literal in 64-bit integer instruction.");
2019 
2042  bool lost;
2043  APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2044  // Convert literal to single precision
2045  FPLiteral.convert(*getOpFltSemantics(OpTy),
2047  // We allow precision lost but not overflow or underflow. This should be
2048  // checked earlier in isLiteralImm()
2049 
2050  uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2051  Inst.addOperand(MCOperand::createImm(ImmVal));
2052  setImmKindLiteral();
2053  return;
2054  }
2055  default:
2056  llvm_unreachable("invalid operand size");
2057  }
2058 
2059  return;
2060  }
2061 
2062  // We got int literal token.
2063  // Only sign extend inline immediates.
2064  switch (OpTy) {
2077  if (isSafeTruncation(Val, 32) &&
2078  AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2079  AsmParser->hasInv2PiInlineImm())) {
2080  Inst.addOperand(MCOperand::createImm(Val));
2081  setImmKindConst();
2082  return;
2083  }
2084 
2085  Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2086  setImmKindLiteral();
2087  return;
2088 
2094  if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2095  Inst.addOperand(MCOperand::createImm(Val));
2096  setImmKindConst();
2097  return;
2098  }
2099 
2101  setImmKindLiteral();
2102  return;
2103 
2110  if (isSafeTruncation(Val, 16) &&
2111  AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2112  AsmParser->hasInv2PiInlineImm())) {
2113  Inst.addOperand(MCOperand::createImm(Val));
2114  setImmKindConst();
2115  return;
2116  }
2117 
2118  Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2119  setImmKindLiteral();
2120  return;
2121 
2126  assert(isSafeTruncation(Val, 16));
2127  assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2128  AsmParser->hasInv2PiInlineImm()));
2129 
2130  Inst.addOperand(MCOperand::createImm(Val));
2131  return;
2132  }
2133  default:
2134  llvm_unreachable("invalid operand size");
2135  }
2136 }
2137 
2138 template <unsigned Bitwidth>
2139 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2140  APInt Literal(64, Imm.Val);
2141  setImmKindNone();
2142 
2143  if (!Imm.IsFPImm) {
2144  // We got int literal token.
2145  Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2146  return;
2147  }
2148 
2149  bool Lost;
2150  APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2151  FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2153  Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2154 }
2155 
2156 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2157  Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2158 }
2159 
2160 static bool isInlineValue(unsigned Reg) {
2161  switch (Reg) {
2162  case AMDGPU::SRC_SHARED_BASE:
2163  case AMDGPU::SRC_SHARED_LIMIT:
2164  case AMDGPU::SRC_PRIVATE_BASE:
2165  case AMDGPU::SRC_PRIVATE_LIMIT:
2166  case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2167  return true;
2168  case AMDGPU::SRC_VCCZ:
2169  case AMDGPU::SRC_EXECZ:
2170  case AMDGPU::SRC_SCC:
2171  return true;
2172  case AMDGPU::SGPR_NULL:
2173  return true;
2174  default:
2175  return false;
2176  }
2177 }
2178 
2179 bool AMDGPUOperand::isInlineValue() const {
2180  return isRegKind() && ::isInlineValue(getReg());
2181 }
2182 
2183 //===----------------------------------------------------------------------===//
2184 // AsmParser
2185 //===----------------------------------------------------------------------===//
2186 
2187 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2188  if (Is == IS_VGPR) {
2189  switch (RegWidth) {
2190  default: return -1;
2191  case 1: return AMDGPU::VGPR_32RegClassID;
2192  case 2: return AMDGPU::VReg_64RegClassID;
2193  case 3: return AMDGPU::VReg_96RegClassID;
2194  case 4: return AMDGPU::VReg_128RegClassID;
2195  case 5: return AMDGPU::VReg_160RegClassID;
2196  case 6: return AMDGPU::VReg_192RegClassID;
2197  case 8: return AMDGPU::VReg_256RegClassID;
2198  case 16: return AMDGPU::VReg_512RegClassID;
2199  case 32: return AMDGPU::VReg_1024RegClassID;
2200  }
2201  } else if (Is == IS_TTMP) {
2202  switch (RegWidth) {
2203  default: return -1;
2204  case 1: return AMDGPU::TTMP_32RegClassID;
2205  case 2: return AMDGPU::TTMP_64RegClassID;
2206  case 4: return AMDGPU::TTMP_128RegClassID;
2207  case 8: return AMDGPU::TTMP_256RegClassID;
2208  case 16: return AMDGPU::TTMP_512RegClassID;
2209  }
2210  } else if (Is == IS_SGPR) {
2211  switch (RegWidth) {
2212  default: return -1;
2213  case 1: return AMDGPU::SGPR_32RegClassID;
2214  case 2: return AMDGPU::SGPR_64RegClassID;
2215  case 3: return AMDGPU::SGPR_96RegClassID;
2216  case 4: return AMDGPU::SGPR_128RegClassID;
2217  case 5: return AMDGPU::SGPR_160RegClassID;
2218  case 6: return AMDGPU::SGPR_192RegClassID;
2219  case 8: return AMDGPU::SGPR_256RegClassID;
2220  case 16: return AMDGPU::SGPR_512RegClassID;
2221  }
2222  } else if (Is == IS_AGPR) {
2223  switch (RegWidth) {
2224  default: return -1;
2225  case 1: return AMDGPU::AGPR_32RegClassID;
2226  case 2: return AMDGPU::AReg_64RegClassID;
2227  case 3: return AMDGPU::AReg_96RegClassID;
2228  case 4: return AMDGPU::AReg_128RegClassID;
2229  case 5: return AMDGPU::AReg_160RegClassID;
2230  case 6: return AMDGPU::AReg_192RegClassID;
2231  case 8: return AMDGPU::AReg_256RegClassID;
2232  case 16: return AMDGPU::AReg_512RegClassID;
2233  case 32: return AMDGPU::AReg_1024RegClassID;
2234  }
2235  }
2236  return -1;
2237 }
2238 
2241  .Case("exec", AMDGPU::EXEC)
2242  .Case("vcc", AMDGPU::VCC)
2243  .Case("flat_scratch", AMDGPU::FLAT_SCR)
2244  .Case("xnack_mask", AMDGPU::XNACK_MASK)
2245  .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2246  .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2247  .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2248  .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2249  .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2250  .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2251  .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2252  .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2253  .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2254  .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2255  .Case("lds_direct", AMDGPU::LDS_DIRECT)
2256  .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2257  .Case("m0", AMDGPU::M0)
2258  .Case("vccz", AMDGPU::SRC_VCCZ)
2259  .Case("src_vccz", AMDGPU::SRC_VCCZ)
2260  .Case("execz", AMDGPU::SRC_EXECZ)
2261  .Case("src_execz", AMDGPU::SRC_EXECZ)
2262  .Case("scc", AMDGPU::SRC_SCC)
2263  .Case("src_scc", AMDGPU::SRC_SCC)
2264  .Case("tba", AMDGPU::TBA)
2265  .Case("tma", AMDGPU::TMA)
2266  .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2267  .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2268  .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2269  .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2270  .Case("vcc_lo", AMDGPU::VCC_LO)
2271  .Case("vcc_hi", AMDGPU::VCC_HI)
2272  .Case("exec_lo", AMDGPU::EXEC_LO)
2273  .Case("exec_hi", AMDGPU::EXEC_HI)
2274  .Case("tma_lo", AMDGPU::TMA_LO)
2275  .Case("tma_hi", AMDGPU::TMA_HI)
2276  .Case("tba_lo", AMDGPU::TBA_LO)
2277  .Case("tba_hi", AMDGPU::TBA_HI)
2278  .Case("pc", AMDGPU::PC_REG)
2279  .Case("null", AMDGPU::SGPR_NULL)
2280  .Default(AMDGPU::NoRegister);
2281 }
2282 
2283 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2284  SMLoc &EndLoc, bool RestoreOnFailure) {
2285  auto R = parseRegister();
2286  if (!R) return true;
2287  assert(R->isReg());
2288  RegNo = R->getReg();
2289  StartLoc = R->getStartLoc();
2290  EndLoc = R->getEndLoc();
2291  return false;
2292 }
2293 
2294 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2295  SMLoc &EndLoc) {
2296  return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2297 }
2298 
2299 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2300  SMLoc &StartLoc,
2301  SMLoc &EndLoc) {
2302  bool Result =
2303  ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2304  bool PendingErrors = getParser().hasPendingError();
2305  getParser().clearPendingErrors();
2306  if (PendingErrors)
2307  return MatchOperand_ParseFail;
2308  if (Result)
2309  return MatchOperand_NoMatch;
2310  return MatchOperand_Success;
2311 }
2312 
2313 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2314  RegisterKind RegKind, unsigned Reg1,
2315  SMLoc Loc) {
2316  switch (RegKind) {
2317  case IS_SPECIAL:
2318  if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2319  Reg = AMDGPU::EXEC;
2320  RegWidth = 2;
2321  return true;
2322  }
2323  if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2324  Reg = AMDGPU::FLAT_SCR;
2325  RegWidth = 2;
2326  return true;
2327  }
2328  if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2329  Reg = AMDGPU::XNACK_MASK;
2330  RegWidth = 2;
2331  return true;
2332  }
2333  if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2334  Reg = AMDGPU::VCC;
2335  RegWidth = 2;
2336  return true;
2337  }
2338  if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2339  Reg = AMDGPU::TBA;
2340  RegWidth = 2;
2341  return true;
2342  }
2343  if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2344  Reg = AMDGPU::TMA;
2345  RegWidth = 2;
2346  return true;
2347  }
2348  Error(Loc, "register does not fit in the list");
2349  return false;
2350  case IS_VGPR:
2351  case IS_SGPR:
2352  case IS_AGPR:
2353  case IS_TTMP:
2354  if (Reg1 != Reg + RegWidth) {
2355  Error(Loc, "registers in a list must have consecutive indices");
2356  return false;
2357  }
2358  RegWidth++;
2359  return true;
2360  default:
2361  llvm_unreachable("unexpected register kind");
2362  }
2363 }
2364 
2365 struct RegInfo {
2367  RegisterKind Kind;
2368 };
2369 
2370 static constexpr RegInfo RegularRegisters[] = {
2371  {{"v"}, IS_VGPR},
2372  {{"s"}, IS_SGPR},
2373  {{"ttmp"}, IS_TTMP},
2374  {{"acc"}, IS_AGPR},
2375  {{"a"}, IS_AGPR},
2376 };
2377 
2378 static bool isRegularReg(RegisterKind Kind) {
2379  return Kind == IS_VGPR ||
2380  Kind == IS_SGPR ||
2381  Kind == IS_TTMP ||
2382  Kind == IS_AGPR;
2383 }
2384 
2385 static const RegInfo* getRegularRegInfo(StringRef Str) {
2386  for (const RegInfo &Reg : RegularRegisters)
2387  if (Str.startswith(Reg.Name))
2388  return &Reg;
2389  return nullptr;
2390 }
2391 
2392 static bool getRegNum(StringRef Str, unsigned& Num) {
2393  return !Str.getAsInteger(10, Num);
2394 }
2395 
2396 bool
2397 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2398  const AsmToken &NextToken) const {
2399 
2400  // A list of consecutive registers: [s0,s1,s2,s3]
2401  if (Token.is(AsmToken::LBrac))
2402  return true;
2403 
2404  if (!Token.is(AsmToken::Identifier))
2405  return false;
2406 
2407  // A single register like s0 or a range of registers like s[0:1]
2408 
2409  StringRef Str = Token.getString();
2410  const RegInfo *Reg = getRegularRegInfo(Str);
2411  if (Reg) {
2412  StringRef RegName = Reg->Name;
2413  StringRef RegSuffix = Str.substr(RegName.size());
2414  if (!RegSuffix.empty()) {
2415  unsigned Num;
2416  // A single register with an index: rXX
2417  if (getRegNum(RegSuffix, Num))
2418  return true;
2419  } else {
2420  // A range of registers: r[XX:YY].
2421  if (NextToken.is(AsmToken::LBrac))
2422  return true;
2423  }
2424  }
2425 
2426  return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2427 }
2428 
2429 bool
2430 AMDGPUAsmParser::isRegister()
2431 {
2432  return isRegister(getToken(), peekToken());
2433 }
2434 
2435 unsigned
2436 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2437  unsigned RegNum,
2438  unsigned RegWidth,
2439  SMLoc Loc) {
2440 
2441  assert(isRegularReg(RegKind));
2442 
2443  unsigned AlignSize = 1;
2444  if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2445  // SGPR and TTMP registers must be aligned.
2446  // Max required alignment is 4 dwords.
2447  AlignSize = std::min(RegWidth, 4u);
2448  }
2449 
2450  if (RegNum % AlignSize != 0) {
2451  Error(Loc, "invalid register alignment");
2452  return AMDGPU::NoRegister;
2453  }
2454 
2455  unsigned RegIdx = RegNum / AlignSize;
2456  int RCID = getRegClass(RegKind, RegWidth);
2457  if (RCID == -1) {
2458  Error(Loc, "invalid or unsupported register size");
2459  return AMDGPU::NoRegister;
2460  }
2461 
2462  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2463  const MCRegisterClass RC = TRI->getRegClass(RCID);
2464  if (RegIdx >= RC.getNumRegs()) {
2465  Error(Loc, "register index is out of range");
2466  return AMDGPU::NoRegister;
2467  }
2468 
2469  return RC.getRegister(RegIdx);
2470 }
2471 
2472 bool
2473 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2474  int64_t RegLo, RegHi;
2475  if (!skipToken(AsmToken::LBrac, "missing register index"))
2476  return false;
2477 
2478  SMLoc FirstIdxLoc = getLoc();
2479  SMLoc SecondIdxLoc;
2480 
2481  if (!parseExpr(RegLo))
2482  return false;
2483 
2484  if (trySkipToken(AsmToken::Colon)) {
2485  SecondIdxLoc = getLoc();
2486  if (!parseExpr(RegHi))
2487  return false;
2488  } else {
2489  RegHi = RegLo;
2490  }
2491 
2492  if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2493  return false;
2494 
2495  if (!isUInt<32>(RegLo)) {
2496  Error(FirstIdxLoc, "invalid register index");
2497  return false;
2498  }
2499 
2500  if (!isUInt<32>(RegHi)) {
2501  Error(SecondIdxLoc, "invalid register index");
2502  return false;
2503  }
2504 
2505  if (RegLo > RegHi) {
2506  Error(FirstIdxLoc, "first register index should not exceed second index");
2507  return false;
2508  }
2509 
2510  Num = static_cast<unsigned>(RegLo);
2511  Width = (RegHi - RegLo) + 1;
2512  return true;
2513 }
2514 
2515 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2516  unsigned &RegNum, unsigned &RegWidth,
2517  SmallVectorImpl<AsmToken> &Tokens) {
2518  assert(isToken(AsmToken::Identifier));
2519  unsigned Reg = getSpecialRegForName(getTokenStr());
2520  if (Reg) {
2521  RegNum = 0;
2522  RegWidth = 1;
2523  RegKind = IS_SPECIAL;
2524  Tokens.push_back(getToken());
2525  lex(); // skip register name
2526  }
2527  return Reg;
2528 }
2529 
2530 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2531  unsigned &RegNum, unsigned &RegWidth,
2532  SmallVectorImpl<AsmToken> &Tokens) {
2533  assert(isToken(AsmToken::Identifier));
2534  StringRef RegName = getTokenStr();
2535  auto Loc = getLoc();
2536 
2537  const RegInfo *RI = getRegularRegInfo(RegName);
2538  if (!RI) {
2539  Error(Loc, "invalid register name");
2540  return AMDGPU::NoRegister;
2541  }
2542 
2543  Tokens.push_back(getToken());
2544  lex(); // skip register name
2545 
2546  RegKind = RI->Kind;
2547  StringRef RegSuffix = RegName.substr(RI->Name.size());
2548  if (!RegSuffix.empty()) {
2549  // Single 32-bit register: vXX.
2550  if (!getRegNum(RegSuffix, RegNum)) {
2551  Error(Loc, "invalid register index");
2552  return AMDGPU::NoRegister;
2553  }
2554  RegWidth = 1;
2555  } else {
2556  // Range of registers: v[XX:YY]. ":YY" is optional.
2557  if (!ParseRegRange(RegNum, RegWidth))
2558  return AMDGPU::NoRegister;
2559  }
2560 
2561  return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2562 }
2563 
2564 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2565  unsigned &RegWidth,
2566  SmallVectorImpl<AsmToken> &Tokens) {
2567  unsigned Reg = AMDGPU::NoRegister;
2568  auto ListLoc = getLoc();
2569 
2570  if (!skipToken(AsmToken::LBrac,
2571  "expected a register or a list of registers")) {
2572  return AMDGPU::NoRegister;
2573  }
2574 
2575  // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2576 
2577  auto Loc = getLoc();
2578  if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2579  return AMDGPU::NoRegister;
2580  if (RegWidth != 1) {
2581  Error(Loc, "expected a single 32-bit register");
2582  return AMDGPU::NoRegister;
2583  }
2584 
2585  for (; trySkipToken(AsmToken::Comma); ) {
2586  RegisterKind NextRegKind;
2587  unsigned NextReg, NextRegNum, NextRegWidth;
2588  Loc = getLoc();
2589 
2590  if (!ParseAMDGPURegister(NextRegKind, NextReg,
2591  NextRegNum, NextRegWidth,
2592  Tokens)) {
2593  return AMDGPU::NoRegister;
2594  }
2595  if (NextRegWidth != 1) {
2596  Error(Loc, "expected a single 32-bit register");
2597  return AMDGPU::NoRegister;
2598  }
2599  if (NextRegKind != RegKind) {
2600  Error(Loc, "registers in a list must be of the same kind");
2601  return AMDGPU::NoRegister;
2602  }
2603  if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2604  return AMDGPU::NoRegister;
2605  }
2606 
2607  if (!skipToken(AsmToken::RBrac,
2608  "expected a comma or a closing square bracket")) {
2609  return AMDGPU::NoRegister;
2610  }
2611 
2612  if (isRegularReg(RegKind))
2613  Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2614 
2615  return Reg;
2616 }
2617 
2618 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2619  unsigned &RegNum, unsigned &RegWidth,
2620  SmallVectorImpl<AsmToken> &Tokens) {
2621  auto Loc = getLoc();
2622  Reg = AMDGPU::NoRegister;
2623 
2624  if (isToken(AsmToken::Identifier)) {
2625  Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2626  if (Reg == AMDGPU::NoRegister)
2627  Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2628  } else {
2629  Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2630  }
2631 
2632  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2633  if (Reg == AMDGPU::NoRegister) {
2634  assert(Parser.hasPendingError());
2635  return false;
2636  }
2637 
2638  if (!subtargetHasRegister(*TRI, Reg)) {
2639  if (Reg == AMDGPU::SGPR_NULL) {
2640  Error(Loc, "'null' operand is not supported on this GPU");
2641  } else {
2642  Error(Loc, "register not available on this GPU");
2643  }
2644  return false;
2645  }
2646 
2647  return true;
2648 }
2649 
2650 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2651  unsigned &RegNum, unsigned &RegWidth,
2652  bool RestoreOnFailure /*=false*/) {
2653  Reg = AMDGPU::NoRegister;
2654 
2655  SmallVector<AsmToken, 1> Tokens;
2656  if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2657  if (RestoreOnFailure) {
2658  while (!Tokens.empty()) {
2659  getLexer().UnLex(Tokens.pop_back_val());
2660  }
2661  }
2662  return true;
2663  }
2664  return false;
2665 }
2666 
2668 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2669  switch (RegKind) {
2670  case IS_VGPR:
2671  return StringRef(".amdgcn.next_free_vgpr");
2672  case IS_SGPR:
2673  return StringRef(".amdgcn.next_free_sgpr");
2674  default:
2675  return None;
2676  }
2677 }
2678 
2679 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2680  auto SymbolName = getGprCountSymbolName(RegKind);
2681  assert(SymbolName && "initializing invalid register kind");
2682  MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2683  Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2684 }
2685 
2686 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2687  unsigned DwordRegIndex,
2688  unsigned RegWidth) {
2689  // Symbols are only defined for GCN targets
2690  if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2691  return true;
2692 
2693  auto SymbolName = getGprCountSymbolName(RegKind);
2694  if (!SymbolName)
2695  return true;
2696  MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2697 
2698  int64_t NewMax = DwordRegIndex + RegWidth - 1;
2699  int64_t OldCount;
2700 
2701  if (!Sym->isVariable())
2702  return !Error(getLoc(),
2703  ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2704  if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2705  return !Error(
2706  getLoc(),
2707  ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2708 
2709  if (OldCount <= NewMax)
2710  Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2711 
2712  return true;
2713 }
2714 
2715 std::unique_ptr<AMDGPUOperand>
2716 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2717  const auto &Tok = getToken();
2718  SMLoc StartLoc = Tok.getLoc();
2719  SMLoc EndLoc = Tok.getEndLoc();
2720  RegisterKind RegKind;
2721  unsigned Reg, RegNum, RegWidth;
2722 
2723  if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2724  return nullptr;
2725  }
2726  if (isHsaAbiVersion3(&getSTI())) {
2727  if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2728  return nullptr;
2729  } else
2730  KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2731  return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2732 }
2733 
2735 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2736  // TODO: add syntactic sugar for 1/(2*PI)
2737 
2738  assert(!isRegister());
2739  assert(!isModifier());
2740 
2741  const auto& Tok = getToken();
2742  const auto& NextTok = peekToken();
2743  bool IsReal = Tok.is(AsmToken::Real);
2744  SMLoc S = getLoc();
2745  bool Negate = false;
2746 
2747  if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2748  lex();
2749  IsReal = true;
2750  Negate = true;
2751  }
2752 
2753  if (IsReal) {
2754  // Floating-point expressions are not supported.
2755  // Can only allow floating-point literals with an
2756  // optional sign.
2757 
2758  StringRef Num = getTokenStr();
2759  lex();
2760 
2761  APFloat RealVal(APFloat::IEEEdouble());
2762  auto roundMode = APFloat::rmNearestTiesToEven;
2763  if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2764  return MatchOperand_ParseFail;
2765  }
2766  if (Negate)
2767  RealVal.changeSign();
2768 
2769  Operands.push_back(
2770  AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2771  AMDGPUOperand::ImmTyNone, true));
2772 
2773  return MatchOperand_Success;
2774 
2775  } else {
2776  int64_t IntVal;
2777  const MCExpr *Expr;
2778  SMLoc S = getLoc();
2779 
2780  if (HasSP3AbsModifier) {
2781  // This is a workaround for handling expressions
2782  // as arguments of SP3 'abs' modifier, for example:
2783  // |1.0|
2784  // |-1|
2785  // |1+x|
2786  // This syntax is not compatible with syntax of standard
2787  // MC expressions (due to the trailing '|').
2788  SMLoc EndLoc;
2789  if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2790  return MatchOperand_ParseFail;
2791  } else {
2792  if (Parser.parseExpression(Expr))
2793  return MatchOperand_ParseFail;
2794  }
2795 
2796  if (Expr->evaluateAsAbsolute(IntVal)) {
2797  Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2798  } else {
2799  Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2800  }
2801 
2802  return MatchOperand_Success;
2803  }
2804 
2805  return MatchOperand_NoMatch;
2806 }
2807 
2809 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2810  if (!isRegister())
2811  return MatchOperand_NoMatch;
2812 
2813  if (auto R = parseRegister()) {
2814  assert(R->isReg());
2815  Operands.push_back(std::move(R));
2816  return MatchOperand_Success;
2817  }
2818  return MatchOperand_ParseFail;
2819 }
2820 
2822 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2823  auto res = parseReg(Operands);
2824  if (res != MatchOperand_NoMatch) {
2825  return res;
2826  } else if (isModifier()) {
2827  return MatchOperand_NoMatch;
2828  } else {
2829  return parseImm(Operands, HasSP3AbsMod);
2830  }
2831 }
2832 
2833 bool
2834 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2835  if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2836  const auto &str = Token.getString();
2837  return str == "abs" || str == "neg" || str == "sext";
2838  }
2839  return false;
2840 }
2841 
2842 bool
2843 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2844  return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2845 }
2846 
2847 bool
2848 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2849  return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2850 }
2851 
2852 bool
2853 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2854  return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2855 }
2856 
2857 // Check if this is an operand modifier or an opcode modifier
2858 // which may look like an expression but it is not. We should
2859 // avoid parsing these modifiers as expressions. Currently
2860 // recognized sequences are:
2861 // |...|
2862 // abs(...)
2863 // neg(...)
2864 // sext(...)
2865 // -reg
2866 // -|...|
2867 // -abs(...)
2868 // name:...
2869 // Note that simple opcode modifiers like 'gds' may be parsed as
2870 // expressions; this is a special case. See getExpressionAsToken.
2871 //
2872 bool
2873 AMDGPUAsmParser::isModifier() {
2874 
2875  AsmToken Tok = getToken();
2876  AsmToken NextToken[2];
2877  peekTokens(NextToken);
2878 
2879  return isOperandModifier(Tok, NextToken[0]) ||
2880  (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2881  isOpcodeModifierWithVal(Tok, NextToken[0]);
2882 }
2883 
2884 // Check if the current token is an SP3 'neg' modifier.
2885 // Currently this modifier is allowed in the following context:
2886 //
2887 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2888 // 2. Before an 'abs' modifier: -abs(...)
2889 // 3. Before an SP3 'abs' modifier: -|...|
2890 //
2891 // In all other cases "-" is handled as a part
2892 // of an expression that follows the sign.
2893 //
2894 // Note: When "-" is followed by an integer literal,
2895 // this is interpreted as integer negation rather
2896 // than a floating-point NEG modifier applied to N.
2897 // Beside being contr-intuitive, such use of floating-point
2898 // NEG modifier would have resulted in different meaning
2899 // of integer literals used with VOP1/2/C and VOP3,
2900 // for example:
2901 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2902 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2903 // Negative fp literals with preceding "-" are
2904 // handled likewise for unifomtity
2905 //
2906 bool
2907 AMDGPUAsmParser::parseSP3NegModifier() {
2908 
2909  AsmToken NextToken[2];
2910  peekTokens(NextToken);
2911 
2912  if (isToken(AsmToken::Minus) &&
2913  (isRegister(NextToken[0], NextToken[1]) ||
2914  NextToken[0].is(AsmToken::Pipe) ||
2915  isId(NextToken[0], "abs"))) {
2916  lex();
2917  return true;
2918  }
2919 
2920  return false;
2921 }
2922 
2924 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2925  bool AllowImm) {
2926  bool Neg, SP3Neg;
2927  bool Abs, SP3Abs;
2928  SMLoc Loc;
2929 
2930  // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2931  if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2932  Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2933  return MatchOperand_ParseFail;
2934  }
2935 
2936  SP3Neg = parseSP3NegModifier();
2937 
2938  Loc = getLoc();
2939  Neg = trySkipId("neg");
2940  if (Neg && SP3Neg) {
2941  Error(Loc, "expected register or immediate");
2942  return MatchOperand_ParseFail;
2943  }
2944  if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2945  return MatchOperand_ParseFail;
2946 
2947  Abs = trySkipId("abs");
2948  if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2949  return MatchOperand_ParseFail;
2950 
2951  Loc = getLoc();
2952  SP3Abs = trySkipToken(AsmToken::Pipe);
2953  if (Abs && SP3Abs) {
2954  Error(Loc, "expected register or immediate");
2955  return MatchOperand_ParseFail;
2956  }
2957 
2959  if (AllowImm) {
2960  Res = parseRegOrImm(Operands, SP3Abs);
2961  } else {
2962  Res = parseReg(Operands);
2963  }
2964  if (Res != MatchOperand_Success) {
2965  return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2966  }
2967 
2968  if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2969  return MatchOperand_ParseFail;
2970  if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2971  return MatchOperand_ParseFail;
2972  if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2973  return MatchOperand_ParseFail;
2974 
2975  AMDGPUOperand::Modifiers Mods;
2976  Mods.Abs = Abs || SP3Abs;
2977  Mods.Neg = Neg || SP3Neg;
2978 
2979  if (Mods.hasFPModifiers()) {
2980  AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2981  if (Op.isExpr()) {
2982  Error(Op.getStartLoc(), "expected an absolute expression");
2983  return MatchOperand_ParseFail;
2984  }
2985  Op.setModifiers(Mods);
2986  }
2987  return MatchOperand_Success;
2988 }
2989 
2991 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2992  bool AllowImm) {
2993  bool Sext = trySkipId("sext");
2994  if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2995  return MatchOperand_ParseFail;
2996 
2998  if (AllowImm) {
2999  Res = parseRegOrImm(Operands);
3000  } else {
3001  Res = parseReg(Operands);
3002  }
3003  if (Res != MatchOperand_Success) {
3004  return Sext? MatchOperand_ParseFail : Res;
3005  }
3006 
3007  if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3008  return MatchOperand_ParseFail;
3009 
3010  AMDGPUOperand::Modifiers Mods;
3011  Mods.Sext = Sext;
3012 
3013  if (Mods.hasIntModifiers()) {
3014  AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3015  if (Op.isExpr()) {
3016  Error(Op.getStartLoc(), "expected an absolute expression");
3017  return MatchOperand_ParseFail;
3018  }
3019  Op.setModifiers(Mods);
3020  }
3021 
3022  return MatchOperand_Success;
3023 }
3024 
3026 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3027  return parseRegOrImmWithFPInputMods(Operands, false);
3028 }
3029 
3031 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3032  return parseRegOrImmWithIntInputMods(Operands, false);
3033 }
3034 
3035 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3036  auto Loc = getLoc();
3037  if (trySkipId("off")) {
3038  Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3039  AMDGPUOperand::ImmTyOff, false));
3040  return MatchOperand_Success;
3041  }
3042 
3043  if (!isRegister())
3044  return MatchOperand_NoMatch;
3045 
3046  std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3047  if (Reg) {
3048  Operands.push_back(std::move(Reg));
3049  return MatchOperand_Success;
3050  }
3051 
3052  return MatchOperand_ParseFail;
3053 
3054 }
3055 
3056 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3057  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3058 
3059  if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3060  (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3061  (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3062  (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3063  return Match_InvalidOperand;
3064 
3065  if ((TSFlags & SIInstrFlags::VOP3) &&
3066  (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3067  getForcedEncodingSize() != 64)
3068  return Match_PreferE32;
3069 
3070  if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3071  Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3072  // v_mac_f32/16 allow only dst_sel == DWORD;
3073  auto OpNum =
3074  AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3075  const auto &Op = Inst.getOperand(OpNum);
3076  if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3077  return Match_InvalidOperand;
3078  }
3079  }
3080 
3081  return Match_Success;
3082 }
3083 
3085  static const unsigned Variants[] = {
3088  };
3089 
3090  return makeArrayRef(Variants);
3091 }
3092 
3093 // What asm variants we should check
3094 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3095  if (getForcedEncodingSize() == 32) {
3096  static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3097  return makeArrayRef(Variants);
3098  }
3099 
3100  if (isForcedVOP3()) {
3101  static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3102  return makeArrayRef(Variants);
3103  }
3104 
3105  if (isForcedSDWA()) {
3106  static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3108  return makeArrayRef(Variants);
3109  }
3110 
3111  if (isForcedDPP()) {
3112  static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3113  return makeArrayRef(Variants);
3114  }
3115 
3116  return getAllVariants();
3117 }
3118 
3119 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3120  if (getForcedEncodingSize() == 32)
3121  return "e32";
3122 
3123  if (isForcedVOP3())
3124  return "e64";
3125 
3126  if (isForcedSDWA())
3127  return "sdwa";
3128 
3129  if (isForcedDPP())
3130  return "dpp";
3131 
3132  return "";
3133 }
3134 
3135 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3136  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3137  const unsigned Num = Desc.getNumImplicitUses();
3138  for (unsigned i = 0; i < Num; ++i) {
3139  unsigned Reg = Desc.ImplicitUses[i];
3140  switch (Reg) {
3141  case AMDGPU::FLAT_SCR:
3142  case AMDGPU::VCC:
3143  case AMDGPU::VCC_LO:
3144  case AMDGPU::VCC_HI:
3145  case AMDGPU::M0:
3146  return Reg;
3147  default:
3148  break;
3149  }
3150  }
3151  return AMDGPU::NoRegister;
3152 }
3153 
3154 // NB: This code is correct only when used to check constant
3155 // bus limitations because GFX7 support no f16 inline constants.
3156 // Note that there are no cases when a GFX7 opcode violates
3157 // constant bus limitations due to the use of an f16 constant.
3158 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3159  unsigned OpIdx) const {
3160  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3161 
3162  if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3163  return false;
3164  }
3165 
3166  const MCOperand &MO = Inst.getOperand(OpIdx);
3167 
3168  int64_t Val = MO.getImm();
3169  auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3170 
3171  switch (OpSize) { // expected operand size
3172  case 8:
3173  return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3174  case 4:
3175  return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3176  case 2: {
3177  const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3181  return AMDGPU::isInlinableIntLiteral(Val);
3182 
3187 
3191  return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3192 
3193  return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3194  }
3195  default:
3196  llvm_unreachable("invalid operand size");
3197  }
3198 }
3199 
3200 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3201  if (!isGFX10Plus())
3202  return 1;
3203 
3204  switch (Opcode) {
3205  // 64-bit shift instructions can use only one scalar value input
3206  case AMDGPU::V_LSHLREV_B64_e64:
3207  case AMDGPU::V_LSHLREV_B64_gfx10:
3208  case AMDGPU::V_LSHRREV_B64_e64:
3209  case AMDGPU::V_LSHRREV_B64_gfx10:
3210  case AMDGPU::V_ASHRREV_I64_e64:
3211  case AMDGPU::V_ASHRREV_I64_gfx10:
3212  case AMDGPU::V_LSHL_B64_e64:
3213  case AMDGPU::V_LSHR_B64_e64:
3214  case AMDGPU::V_ASHR_I64_e64:
3215  return 1;
3216  default:
3217  return 2;
3218  }
3219 }
3220 
3221 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3222  const MCOperand &MO = Inst.getOperand(OpIdx);
3223  if (MO.isImm()) {
3224  return !isInlineConstant(Inst, OpIdx);
3225  } else if (MO.isReg()) {
3226  auto Reg = MO.getReg();
3227  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3228  auto PReg = mc2PseudoReg(Reg);
3229  return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3230  } else {
3231  return true;
3232  }
3233 }
3234 
3235 bool
3236 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3237  const OperandVector &Operands) {
3238  const unsigned Opcode = Inst.getOpcode();
3239  const MCInstrDesc &Desc = MII.get(Opcode);
3240  unsigned LastSGPR = AMDGPU::NoRegister;
3241  unsigned ConstantBusUseCount = 0;
3242  unsigned NumLiterals = 0;
3243  unsigned LiteralSize;
3244 
3245  if (Desc.TSFlags &
3249  SIInstrFlags::SDWA)) {
3250  // Check special imm operands (used by madmk, etc)
3251  if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3252  ++ConstantBusUseCount;
3253  }
3254 
3255  SmallDenseSet<unsigned> SGPRsUsed;
3256  unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3257  if (SGPRUsed != AMDGPU::NoRegister) {
3258  SGPRsUsed.insert(SGPRUsed);
3259  ++ConstantBusUseCount;
3260  }
3261 
3262  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3263  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3264  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3265 
3266  const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3267 
3268  for (int OpIdx : OpIndices) {
3269  if (OpIdx == -1) break;
3270 
3271  const MCOperand &MO = Inst.getOperand(OpIdx);
3272  if (usesConstantBus(Inst, OpIdx)) {
3273  if (MO.isReg()) {
3274  LastSGPR = mc2PseudoReg(MO.getReg());
3275  // Pairs of registers with a partial intersections like these
3276  // s0, s[0:1]
3277  // flat_scratch_lo, flat_scratch
3278  // flat_scratch_lo, flat_scratch_hi
3279  // are theoretically valid but they are disabled anyway.
3280  // Note that this code mimics SIInstrInfo::verifyInstruction
3281  if (!SGPRsUsed.count(LastSGPR)) {
3282  SGPRsUsed.insert(LastSGPR);
3283  ++ConstantBusUseCount;
3284  }
3285  } else { // Expression or a literal
3286 
3287  if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3288  continue; // special operand like VINTERP attr_chan
3289 
3290  // An instruction may use only one literal.
3291  // This has been validated on the previous step.
3292  // See validateVOP3Literal.
3293  // This literal may be used as more than one operand.
3294  // If all these operands are of the same size,
3295  // this literal counts as one scalar value.
3296  // Otherwise it counts as 2 scalar values.
3297  // See "GFX10 Shader Programming", section 3.6.2.3.
3298 
3299  unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3300  if (Size < 4) Size = 4;
3301 
3302  if (NumLiterals == 0) {
3303  NumLiterals = 1;
3304  LiteralSize = Size;
3305  } else if (LiteralSize != Size) {
3306  NumLiterals = 2;
3307  }
3308  }
3309  }
3310  }
3311  }
3312  ConstantBusUseCount += NumLiterals;
3313 
3314  if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3315  return true;
3316 
3317  SMLoc LitLoc = getLitLoc(Operands);
3318  SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3319  SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3320  Error(Loc, "invalid operand (violates constant bus restrictions)");
3321  return false;
3322 }
3323 
3324 bool
3325 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3326  const OperandVector &Operands) {
3327  const unsigned Opcode = Inst.getOpcode();
3328  const MCInstrDesc &Desc = MII.get(Opcode);
3329 
3330  const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3331  if (DstIdx == -1 ||
3332  Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3333  return true;
3334  }
3335 
3336  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3337 
3338  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3339  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3340  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3341 
3342  assert(DstIdx != -1);
3343  const MCOperand &Dst = Inst.getOperand(DstIdx);
3344  assert(Dst.isReg());
3345  const unsigned DstReg = mc2PseudoReg(Dst.getReg());
3346 
3347  const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3348 
3349  for (int SrcIdx : SrcIndices) {
3350  if (SrcIdx == -1) break;
3351  const MCOperand &Src = Inst.getOperand(SrcIdx);
3352  if (Src.isReg()) {
3353  const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3354  if (isRegIntersect(DstReg, SrcReg, TRI)) {
3355  Error(getRegLoc(SrcReg, Operands),
3356  "destination must be different than all sources");
3357  return false;
3358  }
3359  }
3360  }
3361 
3362  return true;
3363 }
3364 
3365 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3366 
3367  const unsigned Opc = Inst.getOpcode();
3368  const MCInstrDesc &Desc = MII.get(Opc);
3369 
3370  if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3371  int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3372  assert(ClampIdx != -1);
3373  return Inst.getOperand(ClampIdx).getImm() == 0;
3374  }
3375 
3376  return true;
3377 }
3378 
3379 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3380 
3381  const unsigned Opc = Inst.getOpcode();
3382  const MCInstrDesc &Desc = MII.get(Opc);
3383 
3384  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3385  return true;
3386 
3387  int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3388  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3389  int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3390 
3391  assert(VDataIdx != -1);
3392 
3393  if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3394  return true;
3395 
3396  unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3397  unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3398  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3399  if (DMask == 0)
3400  DMask = 1;
3401 
3402  unsigned DataSize =
3403  (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3404  if (hasPackedD16()) {
3405  int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3406  if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3407  DataSize = (DataSize + 1) / 2;
3408  }
3409 
3410  return (VDataSize / 4) == DataSize + TFESize;
3411 }
3412 
3413 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3414  const unsigned Opc = Inst.getOpcode();
3415  const MCInstrDesc &Desc = MII.get(Opc);
3416 
3417  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3418  return true;
3419 
3421 
3422  const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3423  AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3424  int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3425  int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3426  int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3427 
3428  assert(VAddr0Idx != -1);
3429  assert(SrsrcIdx != -1);
3430  assert(SrsrcIdx > VAddr0Idx);
3431 
3432  if (DimIdx == -1)
3433  return true; // intersect_ray
3434 
3435  unsigned Dim = Inst.getOperand(DimIdx).getImm();
3437  bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3438  unsigned VAddrSize =
3439  IsNSA ? SrsrcIdx - VAddr0Idx
3440  : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3441 
3442  unsigned AddrSize = BaseOpcode->NumExtraArgs +
3443  (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
3444  (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
3445  (BaseOpcode->LodOrClampOrMip ? 1 : 0);
3446  if (!IsNSA) {
3447  if (AddrSize > 8)
3448  AddrSize = 16;
3449  else if (AddrSize > 4)
3450  AddrSize = 8;
3451  }
3452 
3453  return VAddrSize == AddrSize;
3454 }
3455 
3456 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3457 
3458  const unsigned Opc = Inst.getOpcode();
3459  const MCInstrDesc &Desc = MII.get(Opc);
3460 
3461  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3462  return true;
3463  if (!Desc.mayLoad() || !Desc.mayStore())
3464  return true; // Not atomic
3465 
3466  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3467  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3468 
3469  // This is an incomplete check because image_atomic_cmpswap
3470  // may only use 0x3 and 0xf while other atomic operations
3471  // may use 0x1 and 0x3. However these limitations are
3472  // verified when we check that dmask matches dst size.
3473  return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3474 }
3475 
3476 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3477 
3478  const unsigned Opc = Inst.getOpcode();
3479  const MCInstrDesc &Desc = MII.get(Opc);
3480 
3481  if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3482  return true;
3483 
3484  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3485  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3486 
3487  // GATHER4 instructions use dmask in a different fashion compared to
3488  // other MIMG instructions. The only useful DMASK values are
3489  // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3490  // (red,red,red,red) etc.) The ISA document doesn't mention
3491  // this.
3492  return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3493 }
3494 
3495 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3496 {
3497  switch (Opcode) {
3498  case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3499  case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3500  case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3501  return true;
3502  default:
3503  return false;
3504  }
3505 }
3506 
3507 // movrels* opcodes should only allow VGPRS as src0.
3508 // This is specified in .td description for vop1/vop3,
3509 // but sdwa is handled differently. See isSDWAOperand.
3510 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3511  const OperandVector &Operands) {
3512 
3513  const unsigned Opc = Inst.getOpcode();
3514  const MCInstrDesc &Desc = MII.get(Opc);
3515 
3516  if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3517  return true;
3518 
3519  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3520  assert(Src0Idx != -1);
3521 
3522  SMLoc ErrLoc;
3523  const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3524  if (Src0.isReg()) {
3525  auto Reg = mc2PseudoReg(Src0.getReg());
3526  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3527  if (!isSGPR(Reg, TRI))
3528  return true;
3529  ErrLoc = getRegLoc(Reg, Operands);
3530  } else {
3531  ErrLoc = getConstLoc(Operands);
3532  }
3533 
3534  Error(ErrLoc, "source operand must be a VGPR");
3535  return false;
3536 }
3537 
3538 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3539  const OperandVector &Operands) {
3540 
3541  const unsigned Opc = Inst.getOpcode();
3542 
3543  if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3544  return true;
3545 
3546  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3547  assert(Src0Idx != -1);
3548 
3549  const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3550  if (!Src0.isReg())
3551  return true;
3552 
3553  auto Reg = mc2PseudoReg(Src0.getReg());
3554  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3555  if (isSGPR(Reg, TRI)) {
3556  Error(getRegLoc(Reg, Operands),
3557  "source operand must be either a VGPR or an inline constant");
3558  return false;
3559  }
3560 
3561  return true;
3562 }
3563 
3564 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3565  switch (Inst.getOpcode()) {
3566  default:
3567  return true;
3568  case V_DIV_SCALE_F32_gfx6_gfx7:
3569  case V_DIV_SCALE_F32_vi:
3570  case V_DIV_SCALE_F32_gfx10:
3571  case V_DIV_SCALE_F64_gfx6_gfx7:
3572  case V_DIV_SCALE_F64_vi:
3573  case V_DIV_SCALE_F64_gfx10:
3574  break;
3575  }
3576 
3577  // TODO: Check that src0 = src1 or src2.
3578 
3579  for (auto Name : {AMDGPU::OpName::src0_modifiers,
3580  AMDGPU::OpName::src2_modifiers,
3581  AMDGPU::OpName::src2_modifiers}) {
3583  .getImm() &
3584  SISrcMods::ABS) {
3585  return false;
3586  }
3587  }
3588 
3589  return true;
3590 }
3591 
3592 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3593 
3594  const unsigned Opc = Inst.getOpcode();
3595  const MCInstrDesc &Desc = MII.get(Opc);
3596 
3597  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3598  return true;
3599 
3600  int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3601  if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3602  if (isCI() || isSI())
3603  return false;
3604  }
3605 
3606  return true;
3607 }
3608 
3609 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3610  const unsigned Opc = Inst.getOpcode();
3611  const MCInstrDesc &Desc = MII.get(Opc);
3612 
3613  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3614  return true;
3615 
3616  int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3617  if (DimIdx < 0)
3618  return true;
3619 
3620  long Imm = Inst.getOperand(DimIdx).getImm();
3621  if (Imm < 0 || Imm >= 8)
3622  return false;
3623 
3624  return true;
3625 }
3626 
3627 static bool IsRevOpcode(const unsigned Opcode)
3628 {
3629  switch (Opcode) {
3630  case AMDGPU::V_SUBREV_F32_e32:
3631  case AMDGPU::V_SUBREV_F32_e64:
3632  case AMDGPU::V_SUBREV_F32_e32_gfx10:
3633  case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3634  case AMDGPU::V_SUBREV_F32_e32_vi:
3635  case AMDGPU::V_SUBREV_F32_e64_gfx10:
3636  case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3637  case AMDGPU::V_SUBREV_F32_e64_vi:
3638 
3639  case AMDGPU::V_SUBREV_CO_U32_e32:
3640  case AMDGPU::V_SUBREV_CO_U32_e64:
3641  case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3642  case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3643 
3644  case AMDGPU::V_SUBBREV_U32_e32:
3645  case AMDGPU::V_SUBBREV_U32_e64:
3646  case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3647  case AMDGPU::V_SUBBREV_U32_e32_vi:
3648  case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3649  case AMDGPU::V_SUBBREV_U32_e64_vi:
3650 
3651  case AMDGPU::V_SUBREV_U32_e32:
3652  case AMDGPU::V_SUBREV_U32_e64:
3653  case AMDGPU::V_SUBREV_U32_e32_gfx9:
3654  case AMDGPU::V_SUBREV_U32_e32_vi:
3655  case AMDGPU::V_SUBREV_U32_e64_gfx9:
3656  case AMDGPU::V_SUBREV_U32_e64_vi:
3657 
3658  case AMDGPU::V_SUBREV_F16_e32:
3659  case AMDGPU::V_SUBREV_F16_e64:
3660  case AMDGPU::V_SUBREV_F16_e32_gfx10:
3661  case AMDGPU::V_SUBREV_F16_e32_vi:
3662  case AMDGPU::V_SUBREV_F16_e64_gfx10:
3663  case AMDGPU::V_SUBREV_F16_e64_vi:
3664 
3665  case AMDGPU::V_SUBREV_U16_e32:
3666  case AMDGPU::V_SUBREV_U16_e64:
3667  case AMDGPU::V_SUBREV_U16_e32_vi:
3668  case AMDGPU::V_SUBREV_U16_e64_vi:
3669 
3670  case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3671  case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3672  case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3673 
3674  case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3675  case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3676 
3677  case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3678  case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3679 
3680  case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3681  case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3682 
3683  case AMDGPU::V_LSHRREV_B32_e32:
3684  case AMDGPU::V_LSHRREV_B32_e64:
3685  case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3686  case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3687  case AMDGPU::V_LSHRREV_B32_e32_vi:
3688  case AMDGPU::V_LSHRREV_B32_e64_vi:
3689  case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3690  case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3691 
3692  case AMDGPU::V_ASHRREV_I32_e32:
3693  case AMDGPU::V_ASHRREV_I32_e64:
3694  case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3695  case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3696  case AMDGPU::V_ASHRREV_I32_e32_vi:
3697  case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3698  case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3699  case AMDGPU::V_ASHRREV_I32_e64_vi:
3700 
3701  case AMDGPU::V_LSHLREV_B32_e32:
3702  case AMDGPU::V_LSHLREV_B32_e64:
3703  case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3704  case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3705  case AMDGPU::V_LSHLREV_B32_e32_vi:
3706  case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3707  case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3708  case AMDGPU::V_LSHLREV_B32_e64_vi:
3709 
3710  case AMDGPU::V_LSHLREV_B16_e32:
3711  case AMDGPU::V_LSHLREV_B16_e64:
3712  case AMDGPU::V_LSHLREV_B16_e32_vi:
3713  case AMDGPU::V_LSHLREV_B16_e64_vi:
3714  case AMDGPU::V_LSHLREV_B16_gfx10:
3715 
3716  case AMDGPU::V_LSHRREV_B16_e32:
3717  case AMDGPU::V_LSHRREV_B16_e64:
3718  case AMDGPU::V_LSHRREV_B16_e32_vi:
3719  case AMDGPU::V_LSHRREV_B16_e64_vi:
3720  case AMDGPU::V_LSHRREV_B16_gfx10:
3721 
3722  case AMDGPU::V_ASHRREV_I16_e32:
3723  case AMDGPU::V_ASHRREV_I16_e64:
3724  case AMDGPU::V_ASHRREV_I16_e32_vi:
3725  case AMDGPU::V_ASHRREV_I16_e64_vi:
3726  case AMDGPU::V_ASHRREV_I16_gfx10:
3727 
3728  case AMDGPU::V_LSHLREV_B64_e64:
3729  case AMDGPU::V_LSHLREV_B64_gfx10:
3730  case AMDGPU::V_LSHLREV_B64_vi:
3731 
3732  case AMDGPU::V_LSHRREV_B64_e64:
3733  case AMDGPU::V_LSHRREV_B64_gfx10:
3734  case AMDGPU::V_LSHRREV_B64_vi:
3735 
3736  case AMDGPU::V_ASHRREV_I64_e64:
3737  case AMDGPU::V_ASHRREV_I64_gfx10:
3738  case AMDGPU::V_ASHRREV_I64_vi:
3739 
3740  case AMDGPU::V_PK_LSHLREV_B16:
3741  case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3742  case AMDGPU::V_PK_LSHLREV_B16_vi:
3743 
3744  case AMDGPU::V_PK_LSHRREV_B16:
3745  case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3746  case AMDGPU::V_PK_LSHRREV_B16_vi:
3747  case AMDGPU::V_PK_ASHRREV_I16:
3748  case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3749  case AMDGPU::V_PK_ASHRREV_I16_vi:
3750  return true;
3751  default:
3752  return false;
3753  }
3754 }
3755 
3756 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3757 
3758  using namespace SIInstrFlags;
3759  const unsigned Opcode = Inst.getOpcode();
3760  const MCInstrDesc &Desc = MII.get(Opcode);
3761 
3762  // lds_direct register is defined so that it can be used
3763  // with 9-bit operands only. Ignore encodings which do not accept these.
3764  if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3765  return true;
3766 
3767  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3768  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3769  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3770 
3771  const int SrcIndices[] = { Src1Idx, Src2Idx };
3772 
3773  // lds_direct cannot be specified as either src1 or src2.
3774  for (int SrcIdx : SrcIndices) {
3775  if (SrcIdx == -1) break;
3776  const MCOperand &Src = Inst.getOperand(SrcIdx);
3777  if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3778  return false;
3779  }
3780  }
3781 
3782  if (Src0Idx == -1)
3783  return true;
3784 
3785  const MCOperand &Src = Inst.getOperand(Src0Idx);
3786  if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3787  return true;
3788 
3789  // lds_direct is specified as src0. Check additional limitations.
3790  return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3791 }
3792 
3793 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3794  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3795  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3796  if (Op.isFlatOffset())
3797  return Op.getStartLoc();
3798  }
3799  return getLoc();
3800 }
3801 
3802 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3803  const OperandVector &Operands) {
3804  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3805  if ((TSFlags & SIInstrFlags::FLAT) == 0)
3806  return true;
3807 
3808  auto Opcode = Inst.getOpcode();
3809  auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3810  assert(OpNum != -1);
3811 
3812  const auto &Op = Inst.getOperand(OpNum);
3813  if (!hasFlatOffsets() && Op.getImm() != 0) {
3814  Error(getFlatOffsetLoc(Operands),
3815  "flat offset modifier is not supported on this GPU");
3816  return false;
3817  }
3818 
3819  // For FLAT segment the offset must be positive;
3820  // MSB is ignored and forced to zero.
3822  unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
3823  if (!isIntN(OffsetSize, Op.getImm())) {
3824  Error(getFlatOffsetLoc(Operands),
3825  Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
3826  return false;
3827  }
3828  } else {
3829  unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
3830  if (!isUIntN(OffsetSize, Op.getImm())) {
3831  Error(getFlatOffsetLoc(Operands),
3832  Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
3833  return false;
3834  }
3835  }
3836 
3837  return true;
3838 }
3839 
3840 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3841  // Start with second operand because SMEM Offset cannot be dst or src0.
3842  for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
3843  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3844  if (Op.isSMEMOffset())
3845  return Op.getStartLoc();
3846  }
3847  return getLoc();
3848 }
3849 
3850 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3851  const OperandVector &Operands) {
3852  if (isCI() || isSI())
3853  return true;
3854 
3855  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3856  if ((TSFlags & SIInstrFlags::SMRD) == 0)
3857  return true;
3858 
3859  auto Opcode = Inst.getOpcode();
3860  auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3861  if (OpNum == -1)
3862  return true;
3863 
3864  const auto &Op = Inst.getOperand(OpNum);
3865  if (!Op.isImm())
3866  return true;
3867 
3868  uint64_t Offset = Op.getImm();
3869  bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3871  AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3872  return true;
3873 
3874  Error(getSMEMOffsetLoc(Operands),
3875  (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3876  "expected a 21-bit signed offset");
3877 
3878  return false;
3879 }
3880 
3881 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3882  unsigned Opcode = Inst.getOpcode();
3883  const MCInstrDesc &Desc = MII.get(Opcode);
3884  if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3885  return true;
3886 
3887  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3888  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3889 
3890  const int OpIndices[] = { Src0Idx, Src1Idx };
3891 
3892  unsigned NumExprs = 0;
3893  unsigned NumLiterals = 0;
3894  uint32_t LiteralValue;
3895 
3896  for (int OpIdx : OpIndices) {
3897  if (OpIdx == -1) break;
3898 
3899  const MCOperand &MO = Inst.getOperand(OpIdx);
3900  // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3901  if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3902  if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3903  uint32_t Value = static_cast<uint32_t>(MO.getImm());
3904  if (NumLiterals == 0 || LiteralValue != Value) {
3905  LiteralValue = Value;
3906  ++NumLiterals;
3907  }
3908  } else if (MO.isExpr()) {
3909  ++NumExprs;
3910  }
3911  }
3912  }
3913 
3914  return NumLiterals + NumExprs <= 1;
3915 }
3916 
3917 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3918  const unsigned Opc = Inst.getOpcode();
3919  if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3920  Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3921  int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3922  unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3923 
3924  if (OpSel & ~3)
3925  return false;
3926  }
3927  return true;
3928 }
3929 
3930 // Check if VCC register matches wavefront size
3931 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3932  auto FB = getFeatureBits();
3933  return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3934  (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3935 }
3936 
3937 // VOP3 literal is only allowed in GFX10+ and only one can be used
3938 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst,
3939  const OperandVector &Operands) {
3940  unsigned Opcode = Inst.getOpcode();
3941  const MCInstrDesc &Desc = MII.get(Opcode);
3943  return true;
3944 
3945  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3946  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3947  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3948 
3949  const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3950 
3951  unsigned NumExprs = 0;
3952  unsigned NumLiterals = 0;
3953  uint32_t LiteralValue;
3954 
3955  for (int OpIdx : OpIndices) {
3956  if (OpIdx == -1) break;
3957 
3958  const MCOperand &MO = Inst.getOperand(OpIdx);
3959  if (!MO.isImm() && !MO.isExpr())
3960  continue;
3961  if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
3962  continue;
3963 
3964  if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3965  getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
3966  Error(getConstLoc(Operands),
3967  "inline constants are not allowed for this operand");
3968  return false;
3969  }
3970 
3971  if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3972  uint32_t Value = static_cast<uint32_t>(MO.getImm());
3973  if (NumLiterals == 0 || LiteralValue != Value) {
3974  LiteralValue = Value;
3975  ++NumLiterals;
3976  }
3977  } else if (MO.isExpr()) {
3978  ++NumExprs;
3979  }
3980  }
3981  NumLiterals += NumExprs;
3982 
3983  if (!NumLiterals)
3984  return true;
3985 
3986  if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
3987  Error(getLitLoc(Operands), "literal operands are not supported");
3988  return false;
3989  }
3990 
3991  if (NumLiterals > 1) {
3992  Error(getLitLoc(Operands), "only one literal operand is allowed");
3993  return false;
3994  }
3995 
3996  return true;
3997 }
3998 
3999 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4000 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4001  const MCRegisterInfo *MRI) {
4002  int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4003  if (OpIdx < 0)
4004  return -1;
4005 
4006  const MCOperand &Op = Inst.getOperand(OpIdx);
4007  if (!Op.isReg())
4008  return -1;
4009 
4010  unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4011  auto Reg = Sub ? Sub : Op.getReg();
4012  const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4013  return AGRP32.contains(Reg) ? 1 : 0;
4014 }
4015 
4016 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4017  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4018  if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4020  SIInstrFlags::DS)) == 0)
4021  return true;
4022 
4023  uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4024  : AMDGPU::OpName::vdata;
4025 
4026  const MCRegisterInfo *MRI = getMRI();
4027  int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4028  int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4029 
4030  if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4031  int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4032  if (Data2Areg >= 0 && Data2Areg != DataAreg)
4033  return false;
4034  }
4035 
4036  auto FB = getFeatureBits();
4037  if (FB[AMDGPU::FeatureGFX90AInsts]) {
4038  if (DataAreg < 0 || DstAreg < 0)
4039  return true;
4040  return DstAreg == DataAreg;
4041  }
4042 
4043  return DstAreg < 1 && DataAreg < 1;
4044 }
4045 
4046 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4047  auto FB = getFeatureBits();
4048  if (!FB[AMDGPU::FeatureGFX90AInsts])
4049  return true;
4050 
4051  const MCRegisterInfo *MRI = getMRI();
4052  const MCRegisterClass &VGRP32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4053  const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4054  for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4055  const MCOperand &Op = Inst.getOperand(I);
4056  if (!Op.isReg())
4057  continue;
4058 
4059  unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4060  if (!Sub)
4061  continue;
4062 
4063  if (VGRP32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4064  return false;
4065  if (AGRP32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4066  return false;
4067  }
4068 
4069  return true;
4070 }
4071 
4072 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4073  const OperandVector &Operands,
4074  const SMLoc &IDLoc) {
4075  int GLCPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4076  AMDGPU::OpName::glc1);
4077  if (GLCPos != -1) {
4078  // -1 is set by GLC_1 default operand. In all cases "glc" must be present
4079  // in the asm string, and the default value means it is not present.
4080  if (Inst.getOperand(GLCPos).getImm() == -1) {
4081  Error(IDLoc, "instruction must use glc");
4082  return false;
4083  }
4084  }
4085 
4086  return true;
4087 }
4088 
4089 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4090  const SMLoc &IDLoc,
4091  const OperandVector &Operands) {
4092  if (!validateLdsDirect(Inst)) {
4093  Error(getRegLoc(AMDGPU::LDS_DIRECT, Operands),
4094  "invalid use of lds_direct");
4095  return false;
4096  }
4097  if (!validateSOPLiteral(Inst)) {
4098  Error(getLitLoc(Operands),
4099  "only one literal operand is allowed");
4100  return false;
4101  }
4102  if (!validateVOP3Literal(Inst, Operands)) {
4103  return false;
4104  }
4105  if (!validateConstantBusLimitations(Inst, Operands)) {
4106  return false;
4107  }
4108  if (!validateEarlyClobberLimitations(Inst, Operands)) {
4109  return false;
4110  }
4111  if (!validateIntClampSupported(Inst)) {
4112  Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4113  "integer clamping is not supported on this GPU");
4114  return false;
4115  }
4116  if (!validateOpSel(Inst)) {
4117  Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4118  "invalid op_sel operand");
4119  return false;
4120  }
4121  // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4122  if (!validateMIMGD16(Inst)) {
4123  Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4124  "d16 modifier is not supported on this GPU");
4125  return false;
4126  }
4127  if (!validateMIMGDim(Inst)) {
4128  Error(IDLoc, "dim modifier is required on this GPU");
4129  return false;
4130  }
4131  if (!validateMIMGDataSize(Inst)) {
4132  Error(IDLoc,
4133  "image data size does not match dmask and tfe");
4134  return false;
4135  }
4136  if (!validateMIMGAddrSize(Inst)) {
4137  Error(IDLoc,
4138  "image address size does not match dim and a16");
4139  return false;
4140  }
4141  if (!validateMIMGAtomicDMask(Inst)) {
4142  Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4143  "invalid atomic image dmask");
4144  return false;
4145  }
4146  if (!validateMIMGGatherDMask(Inst)) {
4147  Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4148  "invalid image_gather dmask: only one bit must be set");
4149  return false;
4150  }
4151  if (!validateMovrels(Inst, Operands)) {
4152  return false;
4153  }
4154  if (!validateFlatOffset(Inst, Operands)) {
4155  return false;
4156  }
4157  if (!validateSMEMOffset(Inst, Operands)) {
4158  return false;
4159  }
4160  if (!validateMAIAccWrite(Inst, Operands)) {
4161  return false;
4162  }
4163  if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4164  return false;
4165  }
4166 
4167  if (!validateAGPRLdSt(Inst)) {
4168  Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4169  ? "invalid register class: data and dst should be all VGPR or AGPR"
4170  : "invalid register class: agpr loads and stores not supported on this GPU"
4171  );
4172  return false;
4173  }
4174  if (!validateVGPRAlign(Inst)) {
4175  Error(IDLoc,
4176  "invalid register class: vgpr tuples must be 64 bit aligned");
4177  return false;
4178  }
4179 
4180  if (!validateDivScale(Inst)) {
4181  Error(IDLoc, "ABS not allowed in VOP3B instructions");
4182  return false;
4183  }
4184  if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4185  return false;
4186  }
4187 
4188  return true;
4189 }
4190 
4191 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4192  const FeatureBitset &FBS,
4193  unsigned VariantID = 0);
4194 
4195 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4196  const FeatureBitset &AvailableFeatures,
4197  unsigned VariantID);
4198 
4199 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4200  const FeatureBitset &FBS) {
4201  return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4202 }
4203 
4204 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4205  const FeatureBitset &FBS,
4206  ArrayRef<unsigned> Variants) {
4207  for (auto Variant : Variants) {
4208  if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4209  return true;
4210  }
4211 
4212  return false;
4213 }
4214 
4215 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4216  const SMLoc &IDLoc) {
4217  FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4218 
4219  // Check if requested instruction variant is supported.
4220  if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4221  return false;
4222 
4223  // This instruction is not supported.
4224  // Clear any other pending errors because they are no longer relevant.
4225  getParser().clearPendingErrors();
4226 
4227  // Requested instruction variant is not supported.
4228  // Check if any other variants are supported.
4229  StringRef VariantName = getMatchedVariantName();
4230  if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4231  return Error(IDLoc,
4232  Twine(VariantName,
4233  " variant of this instruction is not supported"));
4234  }
4235 
4236  // Finally check if this instruction is supported on any other GPU.
4237  if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4238  return Error(IDLoc, "instruction not supported on this GPU");
4239  }
4240 
4241  // Instruction not supported on any GPU. Probably a typo.
4242  std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4243  return Error(IDLoc, "invalid instruction" + Suggestion);
4244 }
4245 
4246 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4248  MCStreamer &Out,
4249  uint64_t &ErrorInfo,
4250  bool MatchingInlineAsm) {
4251  MCInst Inst;
4252  unsigned Result = Match_Success;
4253  for (auto Variant : getMatchedVariants()) {
4254  uint64_t EI;
4255  auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4256  Variant);
4257  // We order match statuses from least to most specific. We use most specific
4258  // status as resulting
4259  // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4260  if ((R == Match_Success) ||
4261  (R == Match_PreferE32) ||
4262  (R == Match_MissingFeature && Result != Match_PreferE32) ||
4263  (R == Match_InvalidOperand && Result != Match_MissingFeature
4264  && Result != Match_PreferE32) ||
4265  (R == Match_MnemonicFail && Result != Match_InvalidOperand
4266  && Result != Match_MissingFeature
4267  && Result != Match_PreferE32)) {
4268  Result = R;
4269  ErrorInfo = EI;
4270  }
4271  if (R == Match_Success)
4272  break;
4273  }
4274 
4275  if (Result == Match_Success) {
4276  if (!validateInstruction(Inst, IDLoc, Operands)) {
4277  return true;
4278  }
4279  Inst.setLoc(IDLoc);
4280  Out.emitInstruction(Inst, getSTI());
4281  return false;
4282  }
4283 
4284  StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4285  if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4286  return true;
4287  }
4288 
4289  switch (Result) {
4290  default: break;
4291  case Match_MissingFeature:
4292  // It has been verified that the specified instruction
4293  // mnemonic is valid. A match was found but it requires
4294  // features which are not supported on this GPU.
4295  return Error(IDLoc, "operands are not valid for this GPU or mode");
4296 
4297  case Match_InvalidOperand: {
4298  SMLoc ErrorLoc = IDLoc;
4299  if (ErrorInfo != ~0ULL) {
4300  if (ErrorInfo >= Operands.size()) {
4301  return Error(IDLoc, "too few operands for instruction");
4302  }
4303  ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4304  if (ErrorLoc == SMLoc())
4305  ErrorLoc = IDLoc;
4306  }
4307  return Error(ErrorLoc, "invalid operand for instruction");
4308  }
4309 
4310  case Match_PreferE32:
4311  return Error(IDLoc, "internal error: instruction without _e64 suffix "
4312  "should be encoded as e32");
4313  case Match_MnemonicFail:
4314  llvm_unreachable("Invalid instructions should have been handled already");
4315  }
4316  llvm_unreachable("Implement any new match types added!");
4317 }
4318 
4319 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4320  int64_t Tmp = -1;
4321  if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4322  return true;
4323  }
4324  if (getParser().parseAbsoluteExpression(Tmp)) {
4325  return true;
4326  }
4327  Ret = static_cast<uint32_t>(Tmp);
4328  return false;
4329 }
4330 
4331 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4332  uint32_t &Minor) {
4333  if (ParseAsAbsoluteExpression(Major))
4334  return TokError("invalid major version");
4335 
4336  if (!trySkipToken(AsmToken::Comma))
4337  return TokError("minor version number required, comma expected");
4338 
4339  if (ParseAsAbsoluteExpression(Minor))
4340  return TokError("invalid minor version");
4341 
4342  return false;
4343 }
4344 
4345 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4346  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4347  return TokError("directive only supported for amdgcn architecture");
4348 
4349  std::string Target;
4350 
4351  SMLoc TargetStart = getLoc();
4352  if (getParser().parseEscapedString(Target))
4353  return true;
4354  SMRange TargetRange = SMRange(TargetStart, getLoc());
4355 
4356  std::string ExpectedTarget;
4357  raw_string_ostream ExpectedTargetOS(ExpectedTarget);
4358  IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
4359 
4360  if (Target != ExpectedTargetOS.str())
4361  return Error(TargetRange.Start, "target must match options", TargetRange);
4362 
4363  getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
4364  return false;
4365 }
4366 
4367 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4368  return Error(Range.Start, "value out of range", Range);
4369 }
4370 
4371 bool AMDGPUAsmParser::calculateGPRBlocks(
4372  const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4373  bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4374  SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4375  unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4376  // TODO(scott.linder): These calculations are duplicated from
4377  // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4378  IsaVersion Version = getIsaVersion(getSTI().getCPU());
4379 
4380  unsigned NumVGPRs = NextFreeVGPR;
4381  unsigned NumSGPRs = NextFreeSGPR;
4382 
4383  if (Version.Major >= 10)
4384  NumSGPRs = 0;
4385  else {
4386  unsigned MaxAddressableNumSGPRs =
4388 
4389  if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4390  NumSGPRs > MaxAddressableNumSGPRs)
4391  return OutOfRangeError(SGPRRange);
4392 
4393  NumSGPRs +=
4394  IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4395 
4396  if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4397  NumSGPRs > MaxAddressableNumSGPRs)
4398  return OutOfRangeError(SGPRRange);
4399 
4400  if (Features.test(FeatureSGPRInitBug))
4402  }
4403 
4404  VGPRBlocks =
4405  IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4406  SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4407 
4408  return false;
4409 }
4410 
4411 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4412  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4413  return TokError("directive only supported for amdgcn architecture");
4414 
4415  if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4416  return TokError("directive only supported for amdhsa OS");
4417 
4418  StringRef KernelName;
4419  if (getParser().parseIdentifier(KernelName))
4420  return true;
4421 
4423 
4424  StringSet<> Seen;
4425 
4426  IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4427 
4428  SMRange VGPRRange;
4429  uint64_t NextFreeVGPR = 0;
4430  uint64_t AccumOffset = 0;
4431  SMRange SGPRRange;
4432  uint64_t NextFreeSGPR = 0;
4433  unsigned UserSGPRCount = 0;
4434  bool ReserveVCC = true;
4435  bool ReserveFlatScr = true;
4436  bool ReserveXNACK = hasXNACK();
4437  Optional<bool> EnableWavefrontSize32;
4438 
4439  while (true) {
4440  while (trySkipToken(AsmToken::EndOfStatement));
4441 
4442  StringRef ID;
4443  SMRange IDRange = getTok().getLocRange();
4444  if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4445  return true;
4446 
4447  if (ID == ".end_amdhsa_kernel")
4448  break;
4449 
4450  if (Seen.find(ID) != Seen.end())
4451  return TokError(".amdhsa_ directives cannot be repeated");
4452  Seen.insert(ID);
4453 
4454  SMLoc ValStart = getLoc();
4455  int64_t IVal;
4456  if (getParser().parseAbsoluteExpression(IVal))
4457  return true;
4458  SMLoc ValEnd = getLoc();
4459  SMRange ValRange = SMRange(ValStart, ValEnd);
4460 
4461  if (IVal < 0)
4462  return OutOfRangeError(ValRange);
4463 
4464  uint64_t Val = IVal;
4465 
4466 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
4467  if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
4468  return OutOfRangeError(RANGE); \
4469  AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4470 
4471  if (ID == ".amdhsa_group_segment_fixed_size") {
4472  if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4473  return OutOfRangeError(ValRange);
4474  KD.group_segment_fixed_size = Val;
4475  } else if (ID == ".amdhsa_private_segment_fixed_size") {
4476  if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4477  return OutOfRangeError(ValRange);
4478  KD.private_segment_fixed_size = Val;
4479  } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4481  KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4482  Val, ValRange);
4483  if (Val)
4484  UserSGPRCount += 4;
4485  } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4487  KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4488  ValRange);
4489  if (Val)
4490  UserSGPRCount += 2;
4491  } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4493  KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4494  ValRange);
4495  if (Val)
4496  UserSGPRCount += 2;
4497  } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4499  KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4500  Val, ValRange);
4501  if (Val)
4502  UserSGPRCount += 2;
4503  } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4505  KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4506  ValRange);
4507  if (Val)
4508  UserSGPRCount += 2;
4509  } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4511  KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4512  ValRange);
4513  if (Val)
4514  UserSGPRCount += 2;
4515  } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4517  KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4518  Val, ValRange);
4519  if (Val)
4520  UserSGPRCount += 1;
4521  } else if (ID == ".amdhsa_wavefront_size32") {
4522  if (IVersion.Major < 10)
4523  return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4524  EnableWavefrontSize32 = Val;
4526  KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4527  Val, ValRange);
4528  } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4530  KD.compute_pgm_rsrc2,
4531  COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val,
4532  ValRange);
4533  } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4535  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4536  ValRange);
4537  } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4539  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4540  ValRange);
4541  } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4543  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4544  ValRange);
4545  } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4547  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4548  ValRange);
4549  } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4551  COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4552  ValRange);
4553  } else if (ID == ".amdhsa_next_free_vgpr") {
4554  VGPRRange = ValRange;
4555  NextFreeVGPR = Val;
4556  } else if (ID == ".amdhsa_next_free_sgpr") {
4557  SGPRRange = ValRange;
4558  NextFreeSGPR = Val;
4559  } else if (ID == ".amdhsa_accum_offset") {
4560  if (!isGFX90A())
4561  return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4562  AccumOffset = Val;
4563  } else if (ID == ".amdhsa_reserve_vcc") {
4564  if (!isUInt<1>(Val))
4565  return OutOfRangeError(ValRange);
4566  ReserveVCC = Val;
4567  } else if (ID == ".amdhsa_reserve_flat_scratch") {
4568  if (IVersion.Major < 7)
4569  return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4570  if (!isUInt<1>(Val))
4571  return OutOfRangeError(ValRange);
4572  ReserveFlatScr = Val;
4573  } else if (ID == ".amdhsa_reserve_xnack_mask") {
4574  if (IVersion.Major < 8)
4575  return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4576  if (!isUInt<1>(Val))
4577  return OutOfRangeError(ValRange);
4578  ReserveXNACK = Val;
4579  } else if (ID == ".amdhsa_float_round_mode_32") {
4581  COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4582  } else if (ID == ".amdhsa_float_round_mode_16_64") {
4584  COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4585  } else if (ID == ".amdhsa_float_denorm_mode_32") {
4587  COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4588  } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4590  COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4591  ValRange);
4592  } else if (ID == ".amdhsa_dx10_clamp") {
4594  COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4595  } else if (ID == ".amdhsa_ieee_mode") {
4596  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4597  Val, ValRange);
4598  } else if (ID == ".amdhsa_fp16_overflow") {
4599  if (IVersion.Major < 9)
4600  return Error(IDRange.Start, "directive requires gfx9+", IDRange);
4601  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4602  ValRange);
4603  } else if (ID == ".amdhsa_tg_split") {
4604  if (!isGFX90A())
4605  return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4606  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
4607  ValRange);
4608  } else if (ID == ".amdhsa_workgroup_processor_mode") {
4609  if (IVersion.Major < 10)
4610  return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4611  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4612  ValRange);
4613  } else if (ID == ".amdhsa_memory_ordered") {
4614  if (IVersion.Major < 10)
4615  return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4616  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4617  ValRange);
4618  } else if (ID == ".amdhsa_forward_progress") {
4619  if (IVersion.Major < 10)
4620  return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4621  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4622  ValRange);
4623  } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4625  KD.compute_pgm_rsrc2,
4626  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4627  ValRange);
4628  } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4630  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4631  Val, ValRange);
4632  } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4634  KD.compute_pgm_rsrc2,
4635  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4636  ValRange);
4637  } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4639  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4640  Val, ValRange);
4641  } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4643  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4644  Val, ValRange);
4645  } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4647  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4648  Val, ValRange);
4649  } else if (ID == ".amdhsa_exception_int_div_zero") {
4651  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4652  Val, ValRange);
4653  } else {
4654  return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
4655  }
4656 
4657 #undef PARSE_BITS_ENTRY
4658  }
4659 
4660  if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4661  return TokError(".amdhsa_next_free_vgpr directive is required");
4662 
4663  if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4664  return TokError(".amdhsa_next_free_sgpr directive is required");
4665 
4666  unsigned VGPRBlocks;
4667  unsigned SGPRBlocks;
4668  if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4669  ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
4670  VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4671  SGPRBlocks))
4672  return true;
4673 
4674  if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4675  VGPRBlocks))
4676  return OutOfRangeError(VGPRRange);
4678  COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4679 
4680  if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4681  SGPRBlocks))
4682  return OutOfRangeError(SGPRRange);
4684  COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4685  SGPRBlocks);
4686 
4687  if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4688  return TokError("too many user SGPRs enabled");
4689  AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4690  UserSGPRCount);
4691 
4692  if (isGFX90A()) {
4693  if (Seen.find(".amdhsa_accum_offset") == Seen.end())
4694  return TokError(".amdhsa_accum_offset directive is required");
4695  if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
4696  return TokError("accum_offset should be in range [4..256] in "
4697  "increments of 4");
4698  if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
4699  return TokError("accum_offset exceeds total VGPR allocation");
4700  AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
4701  (AccumOffset / 4 - 1));
4702  }
4703 
4704  getTargetStreamer().EmitAmdhsaKernelDescriptor(
4705  getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4706  ReserveFlatScr, ReserveXNACK);
4707  return false;
4708 }
4709 
4710 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4711  uint32_t Major;
4712  uint32_t Minor;
4713 
4714  if (ParseDirectiveMajorMinor(Major, Minor))
4715  return true;
4716 
4717  getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4718  return false;
4719 }
4720 
4721 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4722  uint32_t Major;
4723  uint32_t Minor;
4724  uint32_t Stepping;
4725  StringRef VendorName;
4726  StringRef ArchName;
4727 
4728  // If this directive has no arguments, then use the ISA version for the
4729  // targeted GPU.
4730  if (isToken(AsmToken::EndOfStatement)) {
4732  getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
4733  ISA.Stepping,
4734  "AMD", "AMDGPU");
4735  return false;
4736  }
4737 
4738  if (ParseDirectiveMajorMinor(Major, Minor))
4739  return true;
4740 
4741  if (!trySkipToken(AsmToken::Comma))
4742  return TokError("stepping version number required, comma expected");
4743 
4744  if (ParseAsAbsoluteExpression(Stepping))
4745  return TokError("invalid stepping version");
4746 
4747  if (!trySkipToken(AsmToken::Comma))
4748  return TokError("vendor name required, comma expected");
4749 
4750  if (!parseString(VendorName, "invalid vendor name"))
4751  return true;
4752 
4753  if (!trySkipToken(AsmToken::Comma))
4754  return TokError("arch name required, comma expected");
4755 
4756  if (!parseString(ArchName, "invalid arch name"))
4757  return true;
4758 
4759  getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
4760  VendorName, ArchName);
4761  return false;
4762 }
4763 
4764 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4765  amd_kernel_code_t &Header) {
4766  // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4767  // assembly for backwards compatibility.
4768  if (ID == "max_scratch_backing_memory_byte_size") {
4769  Parser.eatToEndOfStatement();
4770  return false;
4771  }
4772 
4773  SmallString<40> ErrStr;
4774  raw_svector_ostream Err(ErrStr);
4775  if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4776  return TokError(Err.str());
4777  }
4778  Lex();
4779 
4780  if (ID == "enable_wavefront_size32") {
4782  if (!isGFX10Plus())
4783  return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4784  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4785  return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4786  } else {
4787  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4788  return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4789  }
4790  }
4791 
4792  if (ID == "wavefront_size") {
4793  if (Header.wavefront_size == 5) {
4794  if (!isGFX10Plus())
4795  return TokError("wavefront_size=5 is only allowed on GFX10+");
4796  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4797  return TokError("wavefront_size=5 requires +WavefrontSize32");
4798  } else if (Header.wavefront_size == 6) {
4799  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4800  return TokError("wavefront_size=6 requires +WavefrontSize64");
4801  }
4802  }
4803 
4804  if (ID == "enable_wgp_mode") {
4806  !isGFX10Plus())
4807  return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4808  }
4809 
4810  if (ID == "enable_mem_ordered") {
4812  !isGFX10Plus())
4813  return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4814  }
4815 
4816  if (ID == "enable_fwd_progress") {
4818  !isGFX10Plus())
4819  return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4820  }
4821 
4822  return false;
4823 }
4824 
4825 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4826  amd_kernel_code_t Header;
4827  AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4828 
4829  while (true) {
4830  // Lex EndOfStatement. This is in a while loop, because lexing a comment
4831  // will set the current token to EndOfStatement.
4832  while(trySkipToken(AsmToken::EndOfStatement));
4833 
4834  StringRef ID;
4835  if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
4836  return true;
4837 
4838  if (ID == ".end_amd_kernel_code_t")
4839  break;
4840 
4841  if (ParseAMDKernelCodeTValue(ID, Header))
4842  return true;
4843  }
4844 
4845  getTargetStreamer().EmitAMDKernelCodeT(Header);
4846 
4847  return false;
4848 }
4849 
4850 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4851  StringRef KernelName;
4852  if (!parseId(KernelName, "expected symbol name"))
4853  return true;
4854 
4855  getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4857 
4858  KernelScope.initialize(getContext());
4859  return false;
4860 }
4861 
4862 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4863  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4864  return Error(getLoc(),
4865  ".amd_amdgpu_isa directive is not available on non-amdgcn "
4866  "architectures");
4867  }
4868 
4869  auto ISAVersionStringFromASM = getToken().getStringContents();
4870 
4871  std::string ISAVersionStringFromSTI;
4872  raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
4873  IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
4874 
4875  if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4876  return Error(getLoc(),
4877  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4878  "arguments specified through the command line");
4879  }
4880 
4881  getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4882  Lex();
4883 
4884  return false;
4885 }
4886 
4887 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4888  const char *AssemblerDirectiveBegin;
4889  const char *AssemblerDirectiveEnd;
4891  isHsaAbiVersion3(&getSTI())
4892  ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4894  : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4895  HSAMD::AssemblerDirectiveEnd);
4896 
4897  if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4898  return Error(getLoc(),
4899  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4900  "not available on non-amdhsa OSes")).str());
4901  }
4902 
4903  std::string HSAMetadataString;
4904  if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4905  HSAMetadataString))
4906  return true;
4907 
4908  if (isHsaAbiVersion3(&getSTI())) {
4909  if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4910  return Error(getLoc(), "invalid HSA metadata");
4911  } else {
4912  if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4913  return Error(getLoc(), "invalid HSA metadata");
4914  }
4915 
4916  return false;
4917 }
4918 
4919 /// Common code to parse out a block of text (typically YAML) between start and
4920 /// end directives.
4921 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4922  const char *AssemblerDirectiveEnd,
4923  std::string &CollectString) {
4924 
4925  raw_string_ostream CollectStream(CollectString);
4926 
4927  getLexer().setSkipSpace(false);
4928 
4929  bool FoundEnd = false;
4930  while (!isToken(AsmToken::Eof)) {
4931  while (isToken(AsmToken::Space)) {
4932  CollectStream << getTokenStr();
4933  Lex();
4934  }
4935 
4936  if (trySkipId(AssemblerDirectiveEnd)) {
4937  FoundEnd = true;
4938  break;
4939  }
4940 
4941  CollectStream << Parser.parseStringToEndOfStatement()
4942  << getContext().getAsmInfo()->getSeparatorString();
4943 
4944  Parser.eatToEndOfStatement();
4945  }
4946 
4947  getLexer().setSkipSpace(true);
4948 
4949  if (isToken(AsmToken::Eof) && !FoundEnd) {
4950  return TokError(Twine("expected directive ") +
4951  Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4952  }
4953 
4954  CollectStream.flush();
4955  return false;
4956 }
4957 
4958 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4959 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4960  std::string String;
4961  if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4963  return true;
4964 
4965  auto PALMetadata = getTargetStreamer().getPALMetadata();
4966  if (!PALMetadata->setFromString(String))
4967  return Error(getLoc(), "invalid PAL metadata");
4968  return false;
4969 }
4970 
4971 /// Parse the assembler directive for old linear-format PAL metadata.
4972 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4973  if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4974  return Error(getLoc(),
4975  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4976  "not available on non-amdpal OSes")).str());
4977  }
4978 
4979  auto PALMetadata = getTargetStreamer().getPALMetadata();
4980  PALMetadata->setLegacy();
4981  for (;;) {
4982  uint32_t Key, Value;
4983  if (ParseAsAbsoluteExpression(Key)) {
4984  return TokError(Twine("invalid value in ") +
4986  }
4987  if (!trySkipToken(AsmToken::Comma)) {
4988  return TokError(Twine("expected an even number of values in ") +
4990  }
4991  if (ParseAsAbsoluteExpression(Value)) {
4992  return TokError(Twine("invalid value in ") +
4994  }
4995  PALMetadata->setRegister(Key, Value);
4996  if (!trySkipToken(AsmToken::Comma))
4997  break;
4998  }
4999  return false;
5000 }
5001 
5002 /// ParseDirectiveAMDGPULDS
5003 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5004 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5005  if (getParser().checkForValidSection())
5006  return true;
5007 
5008  StringRef Name;
5009  SMLoc NameLoc = getLoc();
5010  if (getParser().parseIdentifier(Name))
5011  return TokError("expected identifier in directive");
5012 
5013  MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5014  if (parseToken(AsmToken::Comma, "expected ','"))
5015  return true;
5016 
5017  unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5018 
5019  int64_t Size;
5020  SMLoc SizeLoc = getLoc();
5021  if (getParser().parseAbsoluteExpression(Size))
5022  return true;
5023  if (Size < 0)
5024  return Error(SizeLoc, "size must be non-negative");
5025  if (Size > LocalMemorySize)
5026  return Error(SizeLoc, "size is too large");
5027 
5028  int64_t Alignment = 4;
5029  if (trySkipToken(AsmToken::Comma)) {
5030  SMLoc AlignLoc = getLoc();
5031  if (getParser().parseAbsoluteExpression(Alignment))
5032  return true;
5033  if (Alignment < 0 || !isPowerOf2_64(Alignment))
5034  return Error(AlignLoc, "alignment must be a power of two");
5035 
5036  // Alignment larger than the size of LDS is possible in theory, as long
5037  // as the linker manages to place to symbol at address 0, but we do want
5038  // to make sure the alignment fits nicely into a 32-bit integer.
5039  if (Alignment >= 1u << 31)
5040  return Error(AlignLoc, "alignment is too large");
5041  }
5042 
5043  if (parseToken(AsmToken::EndOfStatement,
5044  "unexpected token in '.amdgpu_lds' directive"))
5045  return true;
5046 
5047  Symbol->redefineIfPossible();
5048  if (!Symbol->isUndefined())
5049  return Error(NameLoc, "invalid symbol redefinition");
5050 
5051  getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5052  return false;
5053 }
5054 
5055 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5056  StringRef IDVal = DirectiveID.getString();
5057 
5058  if (isHsaAbiVersion3(&getSTI())) {
5059  if (IDVal == ".amdgcn_target")
5060  return ParseDirectiveAMDGCNTarget();
5061 
5062  if (IDVal == ".amdhsa_kernel")
5063  return ParseDirectiveAMDHSAKernel();
5064 
5065  // TODO: Restructure/combine with PAL metadata directive.
5067  return ParseDirectiveHSAMetadata();
5068  } else {
5069  if (IDVal == ".hsa_code_object_version")
5070  return ParseDirectiveHSACodeObjectVersion();
5071 
5072  if (IDVal == ".hsa_code_object_isa")
5073  return ParseDirectiveHSACodeObjectISA();
5074 
5075  if (IDVal == ".amd_kernel_code_t")
5076  return ParseDirectiveAMDKernelCodeT();
5077 
5078  if (IDVal == ".amdgpu_hsa_kernel")
5079  return ParseDirectiveAMDGPUHsaKernel();
5080 
5081  if (IDVal == ".amd_amdgpu_isa")
5082  return ParseDirectiveISAVersion();
5083 
5085  return ParseDirectiveHSAMetadata();
5086  }
5087 
5088  if (IDVal == ".amdgpu_lds")
5089  return ParseDirectiveAMDGPULDS();
5090 
5091  if (IDVal == PALMD::AssemblerDirectiveBegin)
5092  return ParseDirectivePALMetadataBegin();
5093 
5094  if (IDVal == PALMD::AssemblerDirective)
5095  return ParseDirectivePALMetadata();
5096 
5097  return true;
5098 }
5099 
5100 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5101  unsigned RegNo) const {
5102 
5103  for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
5104  R.isValid(); ++R) {
5105  if (*R == RegNo)
5106  return isGFX9Plus();
5107  }
5108 
5109  // GFX10 has 2 more SGPRs 104 and 105.
5110  for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
5111  R.isValid(); ++R) {
5112  if (*R == RegNo)
5113  return hasSGPR104_SGPR105();
5114  }
5115 
5116  switch (RegNo) {
5117  case AMDGPU::SRC_SHARED_BASE:
5118  case AMDGPU::SRC_SHARED_LIMIT:
5119  case AMDGPU::SRC_PRIVATE_BASE:
5120  case AMDGPU::SRC_PRIVATE_LIMIT:
5121  case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5122  return isGFX9Plus();
5123  case AMDGPU::TBA:
5124  case AMDGPU::TBA_LO:
5125  case AMDGPU::TBA_HI:
5126  case AMDGPU::TMA:
5127  case AMDGPU::TMA_LO:
5128  case AMDGPU::TMA_HI:
5129  return !isGFX9Plus();
5130  case AMDGPU::XNACK_MASK:
5131  case AMDGPU::XNACK_MASK_LO:
5132  case AMDGPU::XNACK_MASK_HI:
5133  return (isVI() || isGFX9()) && hasXNACK();
5134  case AMDGPU::SGPR_NULL:
5135  return isGFX10Plus();
5136  default:
5137  break;
5138  }
5139 
5140  if (isCI())
5141  return true;
5142 
5143  if (isSI() || isGFX10Plus()) {
5144  // No flat_scr on SI.
5145  // On GFX10 flat scratch is not a valid register operand and can only be
5146  // accessed with s_setreg/s_getreg.
5147  switch (RegNo) {
5148  case AMDGPU::FLAT_SCR:
5149  case AMDGPU::FLAT_SCR_LO:
5150  case AMDGPU::FLAT_SCR_HI:
5151  return false;
5152  default:
5153  return true;
5154  }
5155  }
5156 
5157  // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5158  // SI/CI have.
5159  for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
5160  R.isValid(); ++R) {
5161  if (*R == RegNo)
5162  return hasSGPR102_SGPR103();
5163  }
5164 
5165  return true;
5166 }
5167 
5169 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5170  OperandMode Mode) {
5171  // Try to parse with a custom parser
5172  OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5173 
5174  // If we successfully parsed the operand or if there as an error parsing,
5175  // we are done.
5176  //
5177  // If we are parsing after we reach EndOfStatement then this means we
5178  // are appending default values to the Operands list. This is only done
5179  // by custom parser, so we shouldn't continue on to the generic parsing.
5180  if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5181  isToken(AsmToken::EndOfStatement))
5182  return ResTy;
5183 
5184  SMLoc RBraceLoc;
5185  SMLoc LBraceLoc = getLoc();
5186  if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5187  unsigned Prefix = Operands.size();
5188 
5189  for (;;) {
5190  auto Loc = getLoc();
5191  ResTy = parseReg(Operands);
5192  if (ResTy == MatchOperand_NoMatch)
5193  Error(Loc, "expected a register");
5194  if (ResTy != MatchOperand_Success)
5195  return MatchOperand_ParseFail;
5196 
5197  RBraceLoc = getLoc();
5198  if (trySkipToken(AsmToken::RBrac))
5199  break;
5200 
5201  if (!skipToken(AsmToken::Comma,
5202  "expected a comma or a closing square bracket")) {
5203  return MatchOperand_ParseFail;
5204  }
5205  }
5206 
5207  if (Operands.size() - Prefix > 1) {
5208  Operands.insert(Operands.begin() + Prefix,
5209  AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5210  Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5211  }
5212 
5213  return MatchOperand_Success;
5214  }
5215 
5216  return parseRegOrImm(Operands);
5217 }
5218 
5219 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5220  // Clear any forced encodings from the previous instruction.
5221  setForcedEncodingSize(0);
5222  setForcedDPP(false);
5223  setForcedSDWA(false);
5224 
5225  if (Name.endswith("_e64")) {
5226  setForcedEncodingSize(64);
5227  return Name.substr(0, Name.size() - 4);
5228  } else if (Name.endswith("_e32")) {
5229  setForcedEncodingSize(32);
5230  return Name.substr(0, Name.size() - 4);
5231  } else if (Name.endswith("_dpp")) {
5232  setForcedDPP(true);
5233  return Name.substr(0, Name.size() - 4);
5234  } else if (Name.endswith("_sdwa")) {
5235  setForcedSDWA(true);
5236  return Name.substr(0, Name.size() - 5);
5237  }
5238  return Name;
5239 }
5240 
5241 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5242  StringRef Name,
5243  SMLoc NameLoc, OperandVector &Operands) {
5244  // Add the instruction mnemonic
5245  Name = parseMnemonicSuffix(Name);
5246  Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5247 
5248  bool IsMIMG = Name.startswith("image_");
5249 
5250  while (!trySkipToken(AsmToken::EndOfStatement)) {
5251  OperandMode Mode = OperandMode_Default;
5252  if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5253  Mode = OperandMode_NSA;
5254  OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5255 
5256  if (Res != MatchOperand_Success) {
5257  checkUnsupportedInstruction(Name, NameLoc);
5258  if (!Parser.hasPendingError()) {
5259  // FIXME: use real operand location rather than the current location.
5260  StringRef Msg =
5261  (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5262  "not a valid operand.";
5263  Error(getLoc(), Msg);
5264  }
5265  while (!trySkipToken(AsmToken::EndOfStatement)) {
5266  lex();
5267  }
5268  return true;
5269  }
5270 
5271  // Eat the comma or space if there is one.
5272  trySkipToken(AsmToken::Comma);
5273  }
5274 
5275  return false;
5276 }
5277 
5278 //===----------------------------------------------------------------------===//
5279 // Utility functions
5280 //===----------------------------------------------------------------------===//
5281 
5283 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5284 
5285  if (!trySkipId(Prefix, AsmToken::Colon))
5286  return MatchOperand_NoMatch;
5287 
5288  return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5289 }
5290 
5292 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5293  AMDGPUOperand::ImmTy ImmTy,
5294  bool (*ConvertResult)(int64_t&)) {
5295  SMLoc S = getLoc();
5296  int64_t Value = 0;
5297 
5298  OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5299  if (Res != MatchOperand_Success)
5300  return Res;
5301 
5302  if (ConvertResult && !ConvertResult(Value)) {
5303  Error(S, "invalid " + StringRef(Prefix) + " value.");
5304  }
5305 
5306  Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5307  return MatchOperand_Success;
5308 }
5309 
5311 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5313  AMDGPUOperand::ImmTy ImmTy,
5314  bool (*ConvertResult)(int64_t&)) {
5315  SMLoc S = getLoc();
5316  if (!trySkipId(Prefix, AsmToken::Colon))
5317  return MatchOperand_NoMatch;
5318 
5319  if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5320  return MatchOperand_ParseFail;
5321 
5322  unsigned Val = 0;
5323  const unsigned MaxSize = 4;
5324 
5325  // FIXME: How to verify the number of elements matches the number of src
5326  // operands?
5327  for (int I = 0; ; ++I) {
5328  int64_t Op;
5329  SMLoc Loc = getLoc();
5330  if (!parseExpr(Op))
5331  return MatchOperand_ParseFail;
5332 
5333  if (Op != 0 && Op != 1) {
5334  Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5335  return MatchOperand_ParseFail;
5336  }
5337 
5338  Val |= (Op << I);
5339 
5340  if (trySkipToken(AsmToken::RBrac))
5341  break;
5342 
5343  if (I + 1 == MaxSize) {
5344  Error(getLoc(), "expected a closing square bracket");
5345  return MatchOperand_ParseFail;
5346  }
5347 
5348  if (!skipToken(AsmToken::Comma, "expected a comma"))
5349  return MatchOperand_ParseFail;
5350  }
5351 
5352  Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5353  return MatchOperand_Success;
5354 }
5355 
5357 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5358  AMDGPUOperand::ImmTy ImmTy) {
5359  int64_t Bit;
5360  SMLoc S = getLoc();
5361 
5362  if (trySkipId(Name)) {
5363  Bit = 1;
5364  } else if (trySkipId("no", Name)) {
5365  Bit = 0;
5366  } else {
5367  return MatchOperand_NoMatch;
5368  }
5369 
5370  if (Name == "r128" && !hasMIMG_R128()) {
5371  Error(S, "r128 modifier is not supported on this GPU");
5372  return MatchOperand_ParseFail;
5373  }
5374  if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5375  Error(S, "a16 modifier is not supported on this GPU");
5376  return MatchOperand_ParseFail;
5377  }
5378  if (!isGFX10Plus() && ImmTy == AMDGPUOperand::ImmTyDLC) {
5379  Error(S, "dlc modifier is not supported on this GPU");
5380  return MatchOperand_ParseFail;
5381  }
5382  if (!isGFX90A() && ImmTy == AMDGPUOperand::ImmTySCCB)
5383  return MatchOperand_ParseFail;
5384 
5385  if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5386  ImmTy = AMDGPUOperand::ImmTyR128A16;
5387 
5388  Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5389  return MatchOperand_Success;
5390 }
5391 
5393  MCInst& Inst, const OperandVector& Operands,
5394  AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5395  AMDGPUOperand::ImmTy ImmT,
5396  int64_t Default = 0) {
5397  auto i = OptionalIdx.find(ImmT);
5398  if (i != OptionalIdx.end()) {
5399  unsigned Idx = i->second;
5400  ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5401  } else {
5402  Inst.addOperand(MCOperand::createImm(Default));
5403  }
5404 }
5405 
5407 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5408  StringRef &Value,
5409  SMLoc &StringLoc) {
5410  if (!trySkipId(Prefix, AsmToken::Colon))
5411  return MatchOperand_NoMatch;
5412 
5413  StringLoc = getLoc();
5414  return parseId(Value, "expected an identifier") ? MatchOperand_Success
5416 }
5417 
5418 //===----------------------------------------------------------------------===//
5419 // MTBUF format
5420 //===----------------------------------------------------------------------===//
5421 
5422 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5423  int64_t MaxVal,
5424  int64_t &Fmt) {
5425  int64_t Val;
5426  SMLoc Loc = getLoc();
5427 
5428  auto Res = parseIntWithPrefix(Pref, Val);
5429  if (Res == MatchOperand_ParseFail)
5430  return false;
5431  if (Res == MatchOperand_NoMatch)
5432  return true;
5433 
5434  if (Val < 0 || Val > MaxVal) {
5435  Error(Loc, Twine("out of range ", StringRef(Pref)));
5436  return false;
5437  }
5438 
5439  Fmt = Val;
5440  return true;
5441 }
5442 
5443 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5444 // values to live in a joint format operand in the MCInst encoding.
5446 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5447  using namespace llvm::AMDGPU::MTBUFFormat;
5448 
5449  int64_t Dfmt = DFMT_UNDEF;
5450  int64_t Nfmt = NFMT_UNDEF;
5451 
5452  // dfmt and nfmt can appear in either order, and each is optional.
5453  for (int I = 0; I < 2; ++I) {
5454  if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5455  return MatchOperand_ParseFail;
5456 
5457  if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5458  return MatchOperand_ParseFail;
5459  }
5460  // Skip optional comma between dfmt/nfmt
5461  // but guard against 2 commas following each other.
5462  if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5463  !peekToken().is(AsmToken::Comma)) {
5464  trySkipToken(AsmToken::Comma);
5465  }
5466  }
5467 
5468  if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5469  return MatchOperand_NoMatch;
5470 
5471  Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5472  Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5473 
5474  Format = encodeDfmtNfmt(Dfmt, Nfmt);
5475  return MatchOperand_Success;
5476 }
5477 
5479 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5480  using namespace llvm::AMDGPU::MTBUFFormat;
5481 
5482  int64_t Fmt = UFMT_UNDEF;
5483 
5484  if (!tryParseFmt("format", UFMT_MAX, Fmt))
5485  return MatchOperand_ParseFail;
5486 
5487  if (Fmt == UFMT_UNDEF)
5488  return MatchOperand_NoMatch;
5489 
5490  Format = Fmt;
5491  return MatchOperand_Success;
5492 }
5493 
5494 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5495  int64_t &Nfmt,
5496  StringRef FormatStr,
5497  SMLoc Loc) {
5498  using namespace llvm::AMDGPU::MTBUFFormat;
5499  int64_t Format;
5500 
5501  Format = getDfmt(FormatStr);
5502  if (Format != DFMT_UNDEF) {
5503  Dfmt = Format;
5504  return true;
5505  }
5506 
5507  Format = getNfmt(FormatStr, getSTI());
5508  if (Format != NFMT_UNDEF) {
5509  Nfmt = Format;
5510  return true;
5511  }
5512 
5513  Error(Loc, "unsupported format");
5514  return false;
5515 }
5516 
5518 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5519  SMLoc FormatLoc,
5520  int64_t &Format) {
5521  using namespace llvm::AMDGPU::MTBUFFormat;
5522 
5523  int64_t Dfmt = DFMT_UNDEF;
5524  int64_t Nfmt = NFMT_UNDEF;
5525  if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5526  return MatchOperand_ParseFail;
5527 
5528  if (trySkipToken(AsmToken::Comma)) {
5529  StringRef Str;
5530  SMLoc Loc = getLoc();
5531  if (!parseId(Str, "expected a format string") ||
5532  !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5533  return MatchOperand_ParseFail;
5534  }
5535  if (Dfmt == DFMT_UNDEF) {
5536  Error(Loc, "duplicate numeric format");
5537  return MatchOperand_ParseFail;
5538  } else if (Nfmt == NFMT_UNDEF) {
5539  Error(Loc, "duplicate data format");
5540  return MatchOperand_ParseFail;
5541  }
5542  }
5543 
5544  Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5545  Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5546 
5547  if (isGFX10Plus()) {
5548  auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5549  if (Ufmt == UFMT_UNDEF) {
5550  Error(FormatLoc, "unsupported format");
5551  return MatchOperand_ParseFail;
5552  }
5553  Format = Ufmt;
5554  } else {
5555  Format = encodeDfmtNfmt(Dfmt, Nfmt);
5556  }
5557 
5558  return MatchOperand_Success;
5559 }
5560 
5562 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5563  SMLoc Loc,
5564  int64_t &Format) {
5565  using namespace llvm::AMDGPU::MTBUFFormat;
5566 
5567  auto Id = getUnifiedFormat(FormatStr);
5568  if (Id == UFMT_UNDEF)
5569  return MatchOperand_NoMatch;
5570 
5571  if (!isGFX10Plus()) {
5572  Error(Loc, "unified format is not supported on this GPU");
5573  return MatchOperand_ParseFail;
5574  }
5575 
5576  Format = Id;
5577  return MatchOperand_Success;
5578 }
5579 
5581 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5582  using namespace llvm::AMDGPU::MTBUFFormat;
5583  SMLoc Loc = getLoc();
5584 
5585  if (!parseExpr(Format))
5586  return MatchOperand_ParseFail;
5587  if (!isValidFormatEncoding(Format, getSTI())) {
5588  Error(Loc, "out of range format");
5589  return MatchOperand_ParseFail;
5590  }
5591 
5592  return MatchOperand_Success;
5593 }
5594 
5596 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5597  using namespace llvm::AMDGPU::MTBUFFormat;
5598 
5599  if (!trySkipId("format", AsmToken::Colon))
5600  return MatchOperand_NoMatch;
5601 
5602  if (trySkipToken(AsmToken::LBrac)) {
5603  StringRef FormatStr;
5604  SMLoc Loc = getLoc();
5605  if (!parseId(FormatStr, "expected a format string"))
5606  return MatchOperand_ParseFail;
5607 
5608  auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5609  if (Res == MatchOperand_NoMatch)
5610  Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5611  if (Res != MatchOperand_Success)
5612  return Res;
5613 
5614  if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5615  return MatchOperand_ParseFail;
5616 
5617  return MatchOperand_Success;
5618  }
5619 
5620  return parseNumericFormat(Format);
5621 }
5622 
5624 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5625  using namespace llvm::AMDGPU::MTBUFFormat;
5626 
5627  int64_t Format = getDefaultFormatEncoding(getSTI());
5629  SMLoc Loc = getLoc();
5630 
5631  // Parse legacy format syntax.
5632  Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5633  if (Res == MatchOperand_ParseFail)
5634  return Res;
5635 
5636  bool FormatFound = (Res == MatchOperand_Success);
5637 
5638  Operands.push_back(
5639  AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5640 
5641  if (FormatFound)
5642  trySkipToken(AsmToken::Comma);
5643 
5644  if (isToken(AsmToken::EndOfStatement)) {
5645  // We are expecting an soffset operand,
5646  // but let matcher handle the error.
5647  return MatchOperand_Success;
5648  }
5649 
5650  // Parse soffset.
5651  Res = parseRegOrImm(Operands);
5652  if (Res != MatchOperand_Success)
5653  return Res;
5654 
5655  trySkipToken(AsmToken::Comma);
5656 
5657  if (!FormatFound) {
5658  Res = parseSymbolicOrNumericFormat(Format);
5659  if (Res == MatchOperand_ParseFail)
5660  return Res;
5661  if (Res == MatchOperand_Success) {
5662  auto Size = Operands.size();
5663  AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5664  assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5665  Op.setImm(Format);
5666  }
5667  return MatchOperand_Success;
5668  }
5669 
5670  if (isId("format") && peekToken().is(AsmToken::Colon)) {
5671  Error(getLoc(), "duplicate format");
5672  return MatchOperand_ParseFail;
5673  }
5674  return MatchOperand_Success;
5675 }
5676 
5677 //===----------------------------------------------------------------------===//
5678 // ds
5679 //===----------------------------------------------------------------------===//
5680 
5681 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5682  const OperandVector &Operands) {
5683  OptionalImmIndexMap OptionalIdx;
5684 
5685  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5686  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5687 
5688  // Add the register arguments
5689  if (Op.isReg()) {
5690  Op.addRegOperands(Inst, 1);
5691  continue;
5692  }
5693 
5694  // Handle optional arguments
5695  OptionalIdx[Op.getImmTy()] = i;
5696  }
5697 
5698  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
5699  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
5700  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5701 
5703 }
5704 
5705 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
5706  bool IsGdsHardcoded) {
5707  OptionalImmIndexMap OptionalIdx;
5708 
5709  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5710  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5711 
5712  // Add the register arguments
5713  if (Op.isReg()) {
5714  Op.addRegOperands(Inst, 1);
5715  continue;
5716  }
5717 
5718  if (Op.isToken() && Op.getToken() == "gds") {
5719  IsGdsHardcoded = true;
5720  continue;
5721  }
5722 
5723  // Handle optional arguments
5724  OptionalIdx[Op.getImmTy()] = i;
5725  }
5726 
5727  AMDGPUOperand::ImmTy OffsetType =
5728  (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
5729  Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
5730  Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
5731  AMDGPUOperand::ImmTyOffset;
5732 
5733  addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
5734 
5735  if (!IsGdsHardcoded) {
5736  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5737  }
5739 }
5740 
5741 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
5742  OptionalImmIndexMap OptionalIdx;
5743 
5744  unsigned OperandIdx[4];
5745  unsigned EnMask = 0;
5746  int SrcIdx = 0;
5747 
5748  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5749  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5750 
5751  // Add the register arguments
5752  if (Op.isReg()) {
5753  assert(SrcIdx < 4);
5754  OperandIdx[SrcIdx] = Inst.size();
5755  Op.addRegOperands(Inst, 1);
5756  ++SrcIdx;
5757  continue;
5758  }
5759 
5760  if (Op.isOff()) {
5761  assert(SrcIdx < 4);
5762  OperandIdx[SrcIdx] = Inst.size();
5763  Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
5764  ++SrcIdx;
5765  continue;
5766  }
5767 
5768  if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
5769  Op.addImmOperands(Inst, 1);
5770  continue;
5771  }
5772 
5773  if (Op.isToken() && Op.getToken() == "done")
5774  continue;
5775 
5776  // Handle optional arguments
5777  OptionalIdx[Op.getImmTy()] = i;
5778  }
5779 
5780  assert(SrcIdx == 4);
5781 
5782  bool Compr = false;
5783  if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
5784  Compr = true;
5785  Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
5786  Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
5787  Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
5788  }
5789 
5790  for (auto i = 0; i < SrcIdx; ++i) {
5791  if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
5792  EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
5793  }
5794  }
5795 
5796  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
5797  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
5798 
5799  Inst.addOperand(MCOperand::createImm(EnMask));
5800 }
5801 
5802 //===----------------------------------------------------------------------===//
5803 // s_waitcnt
5804 //===----------------------------------------------------------------------===//
5805 
5806 static bool
5808  const AMDGPU::IsaVersion ISA,
5809  int64_t &IntVal,
5810  int64_t CntVal,
5811  bool Saturate,
5812  unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
5813  unsigned (*decode)(const IsaVersion &Version, unsigned))
5814 {
5815  bool Failed = false;
5816 
5817  IntVal = encode(ISA, IntVal, CntVal);
5818  if (CntVal != decode(ISA, IntVal)) {
5819  if (Saturate) {
5820  IntVal = encode(ISA, IntVal, -1);
5821  } else {
5822  Failed = true;
5823  }
5824  }
5825  return Failed;
5826 }
5827 
5828 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
5829 
5830  SMLoc CntLoc = getLoc();
5831  StringRef CntName = getTokenStr();
5832 
5833  if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
5834  !skipToken(AsmToken::LParen, "expected a left parenthesis"))
5835  return false;
5836 
5837  int64_t CntVal;
5838  SMLoc ValLoc = getLoc();
5839  if (!parseExpr(CntVal))
5840  return false;
5841 
5843 
5844  bool Failed = true;
5845  bool Sat = CntName.endswith("_sat");
5846 
5847  if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
5848  Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
5849  } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
5850  Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
5851  } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
5852  Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
5853  } else {
5854  Error(CntLoc, "invalid counter name " + CntName);
5855  return false;
5856  }
5857 
5858  if (Failed) {
5859  Error(ValLoc, "too large value for " + CntName);
5860  return false;
5861  }
5862 
5863  if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
5864  return false;
5865 
5866  if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
5867  if (isToken(AsmToken::EndOfStatement)) {
5868  Error(getLoc(), "expected a counter name");
5869  return false;
5870  }
5871  }
5872 
5873  return true;
5874 }
5875 
5877 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
5879  int64_t Waitcnt = getWaitcntBitMask(ISA);
<