LLVM  10.0.0svn
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
43 #include "llvm/MC/MCSymbol.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
51 #include "llvm/Support/SMLoc.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <cstring>
59 #include <iterator>
60 #include <map>
61 #include <memory>
62 #include <string>
63 
64 using namespace llvm;
65 using namespace llvm::AMDGPU;
66 using namespace llvm::amdhsa;
67 
68 namespace {
69 
70 class AMDGPUAsmParser;
71 
72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
73 
74 //===----------------------------------------------------------------------===//
75 // Operand
76 //===----------------------------------------------------------------------===//
77 
78 class AMDGPUOperand : public MCParsedAsmOperand {
79  enum KindTy {
80  Token,
81  Immediate,
82  Register,
83  Expression
84  } Kind;
85 
86  SMLoc StartLoc, EndLoc;
87  const AMDGPUAsmParser *AsmParser;
88 
89 public:
90  AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91  : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
92 
93  using Ptr = std::unique_ptr<AMDGPUOperand>;
94 
95  struct Modifiers {
96  bool Abs = false;
97  bool Neg = false;
98  bool Sext = false;
99 
100  bool hasFPModifiers() const { return Abs || Neg; }
101  bool hasIntModifiers() const { return Sext; }
102  bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
103 
104  int64_t getFPModifiersOperand() const {
105  int64_t Operand = 0;
106  Operand |= Abs ? SISrcMods::ABS : 0u;
107  Operand |= Neg ? SISrcMods::NEG : 0u;
108  return Operand;
109  }
110 
111  int64_t getIntModifiersOperand() const {
112  int64_t Operand = 0;
113  Operand |= Sext ? SISrcMods::SEXT : 0u;
114  return Operand;
115  }
116 
117  int64_t getModifiersOperand() const {
118  assert(!(hasFPModifiers() && hasIntModifiers())
119  && "fp and int modifiers should not be used simultaneously");
120  if (hasFPModifiers()) {
121  return getFPModifiersOperand();
122  } else if (hasIntModifiers()) {
123  return getIntModifiersOperand();
124  } else {
125  return 0;
126  }
127  }
128 
129  friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
130  };
131 
132  enum ImmTy {
133  ImmTyNone,
134  ImmTyGDS,
135  ImmTyLDS,
136  ImmTyOffen,
137  ImmTyIdxen,
138  ImmTyAddr64,
139  ImmTyOffset,
140  ImmTyInstOffset,
141  ImmTyOffset0,
142  ImmTyOffset1,
143  ImmTyDLC,
144  ImmTyGLC,
145  ImmTySLC,
146  ImmTyTFE,
147  ImmTyD16,
148  ImmTyClampSI,
149  ImmTyOModSI,
150  ImmTyDPP8,
151  ImmTyDppCtrl,
152  ImmTyDppRowMask,
153  ImmTyDppBankMask,
154  ImmTyDppBoundCtrl,
155  ImmTyDppFi,
156  ImmTySdwaDstSel,
157  ImmTySdwaSrc0Sel,
158  ImmTySdwaSrc1Sel,
159  ImmTySdwaDstUnused,
160  ImmTyDMask,
161  ImmTyDim,
162  ImmTyUNorm,
163  ImmTyDA,
164  ImmTyR128A16,
165  ImmTyLWE,
166  ImmTyExpTgt,
167  ImmTyExpCompr,
168  ImmTyExpVM,
169  ImmTyFORMAT,
170  ImmTyHwreg,
171  ImmTyOff,
172  ImmTySendMsg,
173  ImmTyInterpSlot,
174  ImmTyInterpAttr,
175  ImmTyAttrChan,
176  ImmTyOpSel,
177  ImmTyOpSelHi,
178  ImmTyNegLo,
179  ImmTyNegHi,
180  ImmTySwizzle,
181  ImmTyGprIdxMode,
182  ImmTyHigh,
183  ImmTyBLGP,
184  ImmTyCBSZ,
185  ImmTyABID,
186  ImmTyEndpgm,
187  };
188 
189 private:
190  struct TokOp {
191  const char *Data;
192  unsigned Length;
193  };
194 
195  struct ImmOp {
196  int64_t Val;
197  ImmTy Type;
198  bool IsFPImm;
199  Modifiers Mods;
200  };
201 
202  struct RegOp {
203  unsigned RegNo;
204  Modifiers Mods;
205  };
206 
207  union {
208  TokOp Tok;
209  ImmOp Imm;
210  RegOp Reg;
211  const MCExpr *Expr;
212  };
213 
214 public:
215  bool isToken() const override {
216  if (Kind == Token)
217  return true;
218 
219  // When parsing operands, we can't always tell if something was meant to be
220  // a token, like 'gds', or an expression that references a global variable.
221  // In this case, we assume the string is an expression, and if we need to
222  // interpret is a token, then we treat the symbol name as the token.
223  return isSymbolRefExpr();
224  }
225 
226  bool isSymbolRefExpr() const {
227  return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
228  }
229 
230  bool isImm() const override {
231  return Kind == Immediate;
232  }
233 
234  bool isInlinableImm(MVT type) const;
235  bool isLiteralImm(MVT type) const;
236 
237  bool isRegKind() const {
238  return Kind == Register;
239  }
240 
241  bool isReg() const override {
242  return isRegKind() && !hasModifiers();
243  }
244 
245  bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
246  return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
247  }
248 
249  bool isRegOrImmWithInt16InputMods() const {
250  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
251  }
252 
253  bool isRegOrImmWithInt32InputMods() const {
254  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
255  }
256 
257  bool isRegOrImmWithInt64InputMods() const {
258  return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
259  }
260 
261  bool isRegOrImmWithFP16InputMods() const {
262  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
263  }
264 
265  bool isRegOrImmWithFP32InputMods() const {
266  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
267  }
268 
269  bool isRegOrImmWithFP64InputMods() const {
270  return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
271  }
272 
273  bool isVReg() const {
274  return isRegClass(AMDGPU::VGPR_32RegClassID) ||
275  isRegClass(AMDGPU::VReg_64RegClassID) ||
276  isRegClass(AMDGPU::VReg_96RegClassID) ||
277  isRegClass(AMDGPU::VReg_128RegClassID) ||
278  isRegClass(AMDGPU::VReg_160RegClassID) ||
279  isRegClass(AMDGPU::VReg_256RegClassID) ||
280  isRegClass(AMDGPU::VReg_512RegClassID) ||
281  isRegClass(AMDGPU::VReg_1024RegClassID);
282  }
283 
284  bool isVReg32() const {
285  return isRegClass(AMDGPU::VGPR_32RegClassID);
286  }
287 
288  bool isVReg32OrOff() const {
289  return isOff() || isVReg32();
290  }
291 
292  bool isSDWAOperand(MVT type) const;
293  bool isSDWAFP16Operand() const;
294  bool isSDWAFP32Operand() const;
295  bool isSDWAInt16Operand() const;
296  bool isSDWAInt32Operand() const;
297 
298  bool isImmTy(ImmTy ImmT) const {
299  return isImm() && Imm.Type == ImmT;
300  }
301 
302  bool isImmModifier() const {
303  return isImm() && Imm.Type != ImmTyNone;
304  }
305 
306  bool isClampSI() const { return isImmTy(ImmTyClampSI); }
307  bool isOModSI() const { return isImmTy(ImmTyOModSI); }
308  bool isDMask() const { return isImmTy(ImmTyDMask); }
309  bool isDim() const { return isImmTy(ImmTyDim); }
310  bool isUNorm() const { return isImmTy(ImmTyUNorm); }
311  bool isDA() const { return isImmTy(ImmTyDA); }
312  bool isR128A16() const { return isImmTy(ImmTyR128A16); }
313  bool isLWE() const { return isImmTy(ImmTyLWE); }
314  bool isOff() const { return isImmTy(ImmTyOff); }
315  bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
316  bool isExpVM() const { return isImmTy(ImmTyExpVM); }
317  bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
318  bool isOffen() const { return isImmTy(ImmTyOffen); }
319  bool isIdxen() const { return isImmTy(ImmTyIdxen); }
320  bool isAddr64() const { return isImmTy(ImmTyAddr64); }
321  bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
322  bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
323  bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
324 
325  bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
326  bool isGDS() const { return isImmTy(ImmTyGDS); }
327  bool isLDS() const { return isImmTy(ImmTyLDS); }
328  bool isDLC() const { return isImmTy(ImmTyDLC); }
329  bool isGLC() const { return isImmTy(ImmTyGLC); }
330  bool isSLC() const { return isImmTy(ImmTySLC); }
331  bool isTFE() const { return isImmTy(ImmTyTFE); }
332  bool isD16() const { return isImmTy(ImmTyD16); }
333  bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
334  bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
335  bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
336  bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
337  bool isFI() const { return isImmTy(ImmTyDppFi); }
338  bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
339  bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
340  bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
341  bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
342  bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
343  bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
344  bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
345  bool isOpSel() const { return isImmTy(ImmTyOpSel); }
346  bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
347  bool isNegLo() const { return isImmTy(ImmTyNegLo); }
348  bool isNegHi() const { return isImmTy(ImmTyNegHi); }
349  bool isHigh() const { return isImmTy(ImmTyHigh); }
350 
351  bool isMod() const {
352  return isClampSI() || isOModSI();
353  }
354 
355  bool isRegOrImm() const {
356  return isReg() || isImm();
357  }
358 
359  bool isRegClass(unsigned RCID) const;
360 
361  bool isInlineValue() const;
362 
363  bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
364  return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
365  }
366 
367  bool isSCSrcB16() const {
368  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
369  }
370 
371  bool isSCSrcV2B16() const {
372  return isSCSrcB16();
373  }
374 
375  bool isSCSrcB32() const {
376  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
377  }
378 
379  bool isSCSrcB64() const {
380  return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
381  }
382 
383  bool isBoolReg() const;
384 
385  bool isSCSrcF16() const {
386  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
387  }
388 
389  bool isSCSrcV2F16() const {
390  return isSCSrcF16();
391  }
392 
393  bool isSCSrcF32() const {
394  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
395  }
396 
397  bool isSCSrcF64() const {
398  return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
399  }
400 
401  bool isSSrcB32() const {
402  return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
403  }
404 
405  bool isSSrcB16() const {
406  return isSCSrcB16() || isLiteralImm(MVT::i16);
407  }
408 
409  bool isSSrcV2B16() const {
410  llvm_unreachable("cannot happen");
411  return isSSrcB16();
412  }
413 
414  bool isSSrcB64() const {
415  // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
416  // See isVSrc64().
417  return isSCSrcB64() || isLiteralImm(MVT::i64);
418  }
419 
420  bool isSSrcF32() const {
421  return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
422  }
423 
424  bool isSSrcF64() const {
425  return isSCSrcB64() || isLiteralImm(MVT::f64);
426  }
427 
428  bool isSSrcF16() const {
429  return isSCSrcB16() || isLiteralImm(MVT::f16);
430  }
431 
432  bool isSSrcV2F16() const {
433  llvm_unreachable("cannot happen");
434  return isSSrcF16();
435  }
436 
437  bool isSSrcOrLdsB32() const {
438  return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
439  isLiteralImm(MVT::i32) || isExpr();
440  }
441 
442  bool isVCSrcB32() const {
443  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
444  }
445 
446  bool isVCSrcB64() const {
447  return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
448  }
449 
450  bool isVCSrcB16() const {
451  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
452  }
453 
454  bool isVCSrcV2B16() const {
455  return isVCSrcB16();
456  }
457 
458  bool isVCSrcF32() const {
459  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
460  }
461 
462  bool isVCSrcF64() const {
463  return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
464  }
465 
466  bool isVCSrcF16() const {
467  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
468  }
469 
470  bool isVCSrcV2F16() const {
471  return isVCSrcF16();
472  }
473 
474  bool isVSrcB32() const {
475  return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
476  }
477 
478  bool isVSrcB64() const {
479  return isVCSrcF64() || isLiteralImm(MVT::i64);
480  }
481 
482  bool isVSrcB16() const {
483  return isVCSrcF16() || isLiteralImm(MVT::i16);
484  }
485 
486  bool isVSrcV2B16() const {
487  return isVSrcB16() || isLiteralImm(MVT::v2i16);
488  }
489 
490  bool isVSrcF32() const {
491  return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
492  }
493 
494  bool isVSrcF64() const {
495  return isVCSrcF64() || isLiteralImm(MVT::f64);
496  }
497 
498  bool isVSrcF16() const {
499  return isVCSrcF16() || isLiteralImm(MVT::f16);
500  }
501 
502  bool isVSrcV2F16() const {
503  return isVSrcF16() || isLiteralImm(MVT::v2f16);
504  }
505 
506  bool isVISrcB32() const {
507  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
508  }
509 
510  bool isVISrcB16() const {
511  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
512  }
513 
514  bool isVISrcV2B16() const {
515  return isVISrcB16();
516  }
517 
518  bool isVISrcF32() const {
519  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
520  }
521 
522  bool isVISrcF16() const {
523  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
524  }
525 
526  bool isVISrcV2F16() const {
527  return isVISrcF16() || isVISrcB32();
528  }
529 
530  bool isAISrcB32() const {
531  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
532  }
533 
534  bool isAISrcB16() const {
535  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
536  }
537 
538  bool isAISrcV2B16() const {
539  return isAISrcB16();
540  }
541 
542  bool isAISrcF32() const {
543  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
544  }
545 
546  bool isAISrcF16() const {
547  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
548  }
549 
550  bool isAISrcV2F16() const {
551  return isAISrcF16() || isAISrcB32();
552  }
553 
554  bool isAISrc_128B32() const {
555  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
556  }
557 
558  bool isAISrc_128B16() const {
559  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
560  }
561 
562  bool isAISrc_128V2B16() const {
563  return isAISrc_128B16();
564  }
565 
566  bool isAISrc_128F32() const {
567  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
568  }
569 
570  bool isAISrc_128F16() const {
571  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
572  }
573 
574  bool isAISrc_128V2F16() const {
575  return isAISrc_128F16() || isAISrc_128B32();
576  }
577 
578  bool isAISrc_512B32() const {
579  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
580  }
581 
582  bool isAISrc_512B16() const {
583  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
584  }
585 
586  bool isAISrc_512V2B16() const {
587  return isAISrc_512B16();
588  }
589 
590  bool isAISrc_512F32() const {
591  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
592  }
593 
594  bool isAISrc_512F16() const {
595  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
596  }
597 
598  bool isAISrc_512V2F16() const {
599  return isAISrc_512F16() || isAISrc_512B32();
600  }
601 
602  bool isAISrc_1024B32() const {
603  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
604  }
605 
606  bool isAISrc_1024B16() const {
607  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
608  }
609 
610  bool isAISrc_1024V2B16() const {
611  return isAISrc_1024B16();
612  }
613 
614  bool isAISrc_1024F32() const {
615  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
616  }
617 
618  bool isAISrc_1024F16() const {
619  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
620  }
621 
622  bool isAISrc_1024V2F16() const {
623  return isAISrc_1024F16() || isAISrc_1024B32();
624  }
625 
626  bool isKImmFP32() const {
627  return isLiteralImm(MVT::f32);
628  }
629 
630  bool isKImmFP16() const {
631  return isLiteralImm(MVT::f16);
632  }
633 
634  bool isMem() const override {
635  return false;
636  }
637 
638  bool isExpr() const {
639  return Kind == Expression;
640  }
641 
642  bool isSoppBrTarget() const {
643  return isExpr() || isImm();
644  }
645 
646  bool isSWaitCnt() const;
647  bool isHwreg() const;
648  bool isSendMsg() const;
649  bool isSwizzle() const;
650  bool isSMRDOffset8() const;
651  bool isSMRDOffset20() const;
652  bool isSMRDLiteralOffset() const;
653  bool isDPP8() const;
654  bool isDPPCtrl() const;
655  bool isBLGP() const;
656  bool isCBSZ() const;
657  bool isABID() const;
658  bool isGPRIdxMode() const;
659  bool isS16Imm() const;
660  bool isU16Imm() const;
661  bool isEndpgm() const;
662 
663  StringRef getExpressionAsToken() const {
664  assert(isExpr());
665  const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
666  return S->getSymbol().getName();
667  }
668 
669  StringRef getToken() const {
670  assert(isToken());
671 
672  if (Kind == Expression)
673  return getExpressionAsToken();
674 
675  return StringRef(Tok.Data, Tok.Length);
676  }
677 
678  int64_t getImm() const {
679  assert(isImm());
680  return Imm.Val;
681  }
682 
683  ImmTy getImmTy() const {
684  assert(isImm());
685  return Imm.Type;
686  }
687 
688  unsigned getReg() const override {
689  assert(isRegKind());
690  return Reg.RegNo;
691  }
692 
693  SMLoc getStartLoc() const override {
694  return StartLoc;
695  }
696 
697  SMLoc getEndLoc() const override {
698  return EndLoc;
699  }
700 
701  SMRange getLocRange() const {
702  return SMRange(StartLoc, EndLoc);
703  }
704 
705  Modifiers getModifiers() const {
706  assert(isRegKind() || isImmTy(ImmTyNone));
707  return isRegKind() ? Reg.Mods : Imm.Mods;
708  }
709 
710  void setModifiers(Modifiers Mods) {
711  assert(isRegKind() || isImmTy(ImmTyNone));
712  if (isRegKind())
713  Reg.Mods = Mods;
714  else
715  Imm.Mods = Mods;
716  }
717 
718  bool hasModifiers() const {
719  return getModifiers().hasModifiers();
720  }
721 
722  bool hasFPModifiers() const {
723  return getModifiers().hasFPModifiers();
724  }
725 
726  bool hasIntModifiers() const {
727  return getModifiers().hasIntModifiers();
728  }
729 
730  uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
731 
732  void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
733 
734  void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
735 
736  template <unsigned Bitwidth>
737  void addKImmFPOperands(MCInst &Inst, unsigned N) const;
738 
739  void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
740  addKImmFPOperands<16>(Inst, N);
741  }
742 
743  void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
744  addKImmFPOperands<32>(Inst, N);
745  }
746 
747  void addRegOperands(MCInst &Inst, unsigned N) const;
748 
749  void addBoolRegOperands(MCInst &Inst, unsigned N) const {
750  addRegOperands(Inst, N);
751  }
752 
753  void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
754  if (isRegKind())
755  addRegOperands(Inst, N);
756  else if (isExpr())
757  Inst.addOperand(MCOperand::createExpr(Expr));
758  else
759  addImmOperands(Inst, N);
760  }
761 
762  void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
763  Modifiers Mods = getModifiers();
764  Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
765  if (isRegKind()) {
766  addRegOperands(Inst, N);
767  } else {
768  addImmOperands(Inst, N, false);
769  }
770  }
771 
772  void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
773  assert(!hasIntModifiers());
774  addRegOrImmWithInputModsOperands(Inst, N);
775  }
776 
777  void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
778  assert(!hasFPModifiers());
779  addRegOrImmWithInputModsOperands(Inst, N);
780  }
781 
782  void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
783  Modifiers Mods = getModifiers();
784  Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
785  assert(isRegKind());
786  addRegOperands(Inst, N);
787  }
788 
789  void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
790  assert(!hasIntModifiers());
791  addRegWithInputModsOperands(Inst, N);
792  }
793 
794  void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
795  assert(!hasFPModifiers());
796  addRegWithInputModsOperands(Inst, N);
797  }
798 
799  void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
800  if (isImm())
801  addImmOperands(Inst, N);
802  else {
803  assert(isExpr());
804  Inst.addOperand(MCOperand::createExpr(Expr));
805  }
806  }
807 
808  static void printImmTy(raw_ostream& OS, ImmTy Type) {
809  switch (Type) {
810  case ImmTyNone: OS << "None"; break;
811  case ImmTyGDS: OS << "GDS"; break;
812  case ImmTyLDS: OS << "LDS"; break;
813  case ImmTyOffen: OS << "Offen"; break;
814  case ImmTyIdxen: OS << "Idxen"; break;
815  case ImmTyAddr64: OS << "Addr64"; break;
816  case ImmTyOffset: OS << "Offset"; break;
817  case ImmTyInstOffset: OS << "InstOffset"; break;
818  case ImmTyOffset0: OS << "Offset0"; break;
819  case ImmTyOffset1: OS << "Offset1"; break;
820  case ImmTyDLC: OS << "DLC"; break;
821  case ImmTyGLC: OS << "GLC"; break;
822  case ImmTySLC: OS << "SLC"; break;
823  case ImmTyTFE: OS << "TFE"; break;
824  case ImmTyD16: OS << "D16"; break;
825  case ImmTyFORMAT: OS << "FORMAT"; break;
826  case ImmTyClampSI: OS << "ClampSI"; break;
827  case ImmTyOModSI: OS << "OModSI"; break;
828  case ImmTyDPP8: OS << "DPP8"; break;
829  case ImmTyDppCtrl: OS << "DppCtrl"; break;
830  case ImmTyDppRowMask: OS << "DppRowMask"; break;
831  case ImmTyDppBankMask: OS << "DppBankMask"; break;
832  case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
833  case ImmTyDppFi: OS << "FI"; break;
834  case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
835  case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
836  case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
837  case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
838  case ImmTyDMask: OS << "DMask"; break;
839  case ImmTyDim: OS << "Dim"; break;
840  case ImmTyUNorm: OS << "UNorm"; break;
841  case ImmTyDA: OS << "DA"; break;
842  case ImmTyR128A16: OS << "R128A16"; break;
843  case ImmTyLWE: OS << "LWE"; break;
844  case ImmTyOff: OS << "Off"; break;
845  case ImmTyExpTgt: OS << "ExpTgt"; break;
846  case ImmTyExpCompr: OS << "ExpCompr"; break;
847  case ImmTyExpVM: OS << "ExpVM"; break;
848  case ImmTyHwreg: OS << "Hwreg"; break;
849  case ImmTySendMsg: OS << "SendMsg"; break;
850  case ImmTyInterpSlot: OS << "InterpSlot"; break;
851  case ImmTyInterpAttr: OS << "InterpAttr"; break;
852  case ImmTyAttrChan: OS << "AttrChan"; break;
853  case ImmTyOpSel: OS << "OpSel"; break;
854  case ImmTyOpSelHi: OS << "OpSelHi"; break;
855  case ImmTyNegLo: OS << "NegLo"; break;
856  case ImmTyNegHi: OS << "NegHi"; break;
857  case ImmTySwizzle: OS << "Swizzle"; break;
858  case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
859  case ImmTyHigh: OS << "High"; break;
860  case ImmTyBLGP: OS << "BLGP"; break;
861  case ImmTyCBSZ: OS << "CBSZ"; break;
862  case ImmTyABID: OS << "ABID"; break;
863  case ImmTyEndpgm: OS << "Endpgm"; break;
864  }
865  }
866 
867  void print(raw_ostream &OS) const override {
868  switch (Kind) {
869  case Register:
870  OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
871  break;
872  case Immediate:
873  OS << '<' << getImm();
874  if (getImmTy() != ImmTyNone) {
875  OS << " type: "; printImmTy(OS, getImmTy());
876  }
877  OS << " mods: " << Imm.Mods << '>';
878  break;
879  case Token:
880  OS << '\'' << getToken() << '\'';
881  break;
882  case Expression:
883  OS << "<expr " << *Expr << '>';
884  break;
885  }
886  }
887 
888  static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
889  int64_t Val, SMLoc Loc,
890  ImmTy Type = ImmTyNone,
891  bool IsFPImm = false) {
892  auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
893  Op->Imm.Val = Val;
894  Op->Imm.IsFPImm = IsFPImm;
895  Op->Imm.Type = Type;
896  Op->Imm.Mods = Modifiers();
897  Op->StartLoc = Loc;
898  Op->EndLoc = Loc;
899  return Op;
900  }
901 
902  static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
903  StringRef Str, SMLoc Loc,
904  bool HasExplicitEncodingSize = true) {
905  auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
906  Res->Tok.Data = Str.data();
907  Res->Tok.Length = Str.size();
908  Res->StartLoc = Loc;
909  Res->EndLoc = Loc;
910  return Res;
911  }
912 
913  static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
914  unsigned RegNo, SMLoc S,
915  SMLoc E) {
916  auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
917  Op->Reg.RegNo = RegNo;
918  Op->Reg.Mods = Modifiers();
919  Op->StartLoc = S;
920  Op->EndLoc = E;
921  return Op;
922  }
923 
924  static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
925  const class MCExpr *Expr, SMLoc S) {
926  auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
927  Op->Expr = Expr;
928  Op->StartLoc = S;
929  Op->EndLoc = S;
930  return Op;
931  }
932 };
933 
934 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
935  OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
936  return OS;
937 }
938 
939 //===----------------------------------------------------------------------===//
940 // AsmParser
941 //===----------------------------------------------------------------------===//
942 
943 // Holds info related to the current kernel, e.g. count of SGPRs used.
944 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
945 // .amdgpu_hsa_kernel or at EOF.
946 class KernelScopeInfo {
947  int SgprIndexUnusedMin = -1;
948  int VgprIndexUnusedMin = -1;
949  MCContext *Ctx = nullptr;
950 
951  void usesSgprAt(int i) {
952  if (i >= SgprIndexUnusedMin) {
953  SgprIndexUnusedMin = ++i;
954  if (Ctx) {
955  MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
956  Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
957  }
958  }
959  }
960 
961  void usesVgprAt(int i) {
962  if (i >= VgprIndexUnusedMin) {
963  VgprIndexUnusedMin = ++i;
964  if (Ctx) {
965  MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
966  Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
967  }
968  }
969  }
970 
971 public:
972  KernelScopeInfo() = default;
973 
974  void initialize(MCContext &Context) {
975  Ctx = &Context;
976  usesSgprAt(SgprIndexUnusedMin = -1);
977  usesVgprAt(VgprIndexUnusedMin = -1);
978  }
979 
980  void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
981  switch (RegKind) {
982  case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
983  case IS_AGPR: // fall through
984  case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
985  default: break;
986  }
987  }
988 };
989 
990 class AMDGPUAsmParser : public MCTargetAsmParser {
991  MCAsmParser &Parser;
992 
993  // Number of extra operands parsed after the first optional operand.
994  // This may be necessary to skip hardcoded mandatory operands.
995  static const unsigned MAX_OPR_LOOKAHEAD = 8;
996 
997  unsigned ForcedEncodingSize = 0;
998  bool ForcedDPP = false;
999  bool ForcedSDWA = false;
1000  KernelScopeInfo KernelScope;
1001 
1002  /// @name Auto-generated Match Functions
1003  /// {
1004 
1005 #define GET_ASSEMBLER_HEADER
1006 #include "AMDGPUGenAsmMatcher.inc"
1007 
1008  /// }
1009 
1010 private:
1011  bool ParseAsAbsoluteExpression(uint32_t &Ret);
1012  bool OutOfRangeError(SMRange Range);
1013  /// Calculate VGPR/SGPR blocks required for given target, reserved
1014  /// registers, and user-specified NextFreeXGPR values.
1015  ///
1016  /// \param Features [in] Target features, used for bug corrections.
1017  /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1018  /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1019  /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1020  /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1021  /// descriptor field, if valid.
1022  /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1023  /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1024  /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1025  /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1026  /// \param VGPRBlocks [out] Result VGPR block count.
1027  /// \param SGPRBlocks [out] Result SGPR block count.
1028  bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1029  bool FlatScrUsed, bool XNACKUsed,
1030  Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1031  SMRange VGPRRange, unsigned NextFreeSGPR,
1032  SMRange SGPRRange, unsigned &VGPRBlocks,
1033  unsigned &SGPRBlocks);
1034  bool ParseDirectiveAMDGCNTarget();
1035  bool ParseDirectiveAMDHSAKernel();
1036  bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1037  bool ParseDirectiveHSACodeObjectVersion();
1038  bool ParseDirectiveHSACodeObjectISA();
1039  bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1040  bool ParseDirectiveAMDKernelCodeT();
1041  bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1042  bool ParseDirectiveAMDGPUHsaKernel();
1043 
1044  bool ParseDirectiveISAVersion();
1045  bool ParseDirectiveHSAMetadata();
1046  bool ParseDirectivePALMetadataBegin();
1047  bool ParseDirectivePALMetadata();
1048  bool ParseDirectiveAMDGPULDS();
1049 
1050  /// Common code to parse out a block of text (typically YAML) between start and
1051  /// end directives.
1052  bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1053  const char *AssemblerDirectiveEnd,
1054  std::string &CollectString);
1055 
1056  bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1057  RegisterKind RegKind, unsigned Reg1,
1058  unsigned RegNum);
1059  bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
1060  unsigned& RegNum, unsigned& RegWidth,
1061  unsigned *DwordRegIndex);
1062  bool isRegister();
1063  bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1064  Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1065  void initializeGprCountSymbol(RegisterKind RegKind);
1066  bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1067  unsigned RegWidth);
1068  void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1069  bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1070  void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1071  bool IsGdsHardcoded);
1072 
1073 public:
1074  enum AMDGPUMatchResultTy {
1075  Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1076  };
1077  enum OperandMode {
1078  OperandMode_Default,
1079  OperandMode_NSA,
1080  };
1081 
1082  using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1083 
1084  AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1085  const MCInstrInfo &MII,
1086  const MCTargetOptions &Options)
1087  : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1088  MCAsmParserExtension::Initialize(Parser);
1089 
1090  if (getFeatureBits().none()) {
1091  // Set default features.
1092  copySTI().ToggleFeature("southern-islands");
1093  }
1094 
1095  setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1096 
1097  {
1098  // TODO: make those pre-defined variables read-only.
1099  // Currently there is none suitable machinery in the core llvm-mc for this.
1100  // MCSymbol::isRedefinable is intended for another purpose, and
1101  // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1102  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1103  MCContext &Ctx = getContext();
1104  if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1105  MCSymbol *Sym =
1106  Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1107  Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1108  Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1109  Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1110  Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1111  Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1112  } else {
1113  MCSymbol *Sym =
1114  Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1115  Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1116  Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1117  Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1118  Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1119  Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1120  }
1121  if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1122  initializeGprCountSymbol(IS_VGPR);
1123  initializeGprCountSymbol(IS_SGPR);
1124  } else
1125  KernelScope.initialize(getContext());
1126  }
1127  }
1128 
1129  bool hasXNACK() const {
1130  return AMDGPU::hasXNACK(getSTI());
1131  }
1132 
1133  bool hasMIMG_R128() const {
1134  return AMDGPU::hasMIMG_R128(getSTI());
1135  }
1136 
1137  bool hasPackedD16() const {
1138  return AMDGPU::hasPackedD16(getSTI());
1139  }
1140 
1141  bool isSI() const {
1142  return AMDGPU::isSI(getSTI());
1143  }
1144 
1145  bool isCI() const {
1146  return AMDGPU::isCI(getSTI());
1147  }
1148 
1149  bool isVI() const {
1150  return AMDGPU::isVI(getSTI());
1151  }
1152 
1153  bool isGFX9() const {
1154  return AMDGPU::isGFX9(getSTI());
1155  }
1156 
1157  bool isGFX10() const {
1158  return AMDGPU::isGFX10(getSTI());
1159  }
1160 
1161  bool hasInv2PiInlineImm() const {
1162  return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1163  }
1164 
1165  bool hasFlatOffsets() const {
1166  return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1167  }
1168 
1169  bool hasSGPR102_SGPR103() const {
1170  return !isVI() && !isGFX9();
1171  }
1172 
1173  bool hasSGPR104_SGPR105() const {
1174  return isGFX10();
1175  }
1176 
1177  bool hasIntClamp() const {
1178  return getFeatureBits()[AMDGPU::FeatureIntClamp];
1179  }
1180 
1181  AMDGPUTargetStreamer &getTargetStreamer() {
1182  MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1183  return static_cast<AMDGPUTargetStreamer &>(TS);
1184  }
1185 
1186  const MCRegisterInfo *getMRI() const {
1187  // We need this const_cast because for some reason getContext() is not const
1188  // in MCAsmParser.
1189  return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1190  }
1191 
1192  const MCInstrInfo *getMII() const {
1193  return &MII;
1194  }
1195 
1196  const FeatureBitset &getFeatureBits() const {
1197  return getSTI().getFeatureBits();
1198  }
1199 
1200  void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1201  void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1202  void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1203 
1204  unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1205  bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1206  bool isForcedDPP() const { return ForcedDPP; }
1207  bool isForcedSDWA() const { return ForcedSDWA; }
1208  ArrayRef<unsigned> getMatchedVariants() const;
1209 
1210  std::unique_ptr<AMDGPUOperand> parseRegister();
1211  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1212  unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1213  unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1214  unsigned Kind) override;
1215  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1216  OperandVector &Operands, MCStreamer &Out,
1217  uint64_t &ErrorInfo,
1218  bool MatchingInlineAsm) override;
1219  bool ParseDirective(AsmToken DirectiveID) override;
1220  OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1221  OperandMode Mode = OperandMode_Default);
1222  StringRef parseMnemonicSuffix(StringRef Name);
1223  bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1224  SMLoc NameLoc, OperandVector &Operands) override;
1225  //bool ProcessInstruction(MCInst &Inst);
1226 
1227  OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1228 
1229  OperandMatchResultTy
1230  parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1231  AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1232  bool (*ConvertResult)(int64_t &) = nullptr);
1233 
1234  OperandMatchResultTy
1235  parseOperandArrayWithPrefix(const char *Prefix,
1236  OperandVector &Operands,
1237  AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1238  bool (*ConvertResult)(int64_t&) = nullptr);
1239 
1240  OperandMatchResultTy
1241  parseNamedBit(const char *Name, OperandVector &Operands,
1242  AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1243  OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1244  StringRef &Value);
1245 
1246  bool isModifier();
1247  bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1248  bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1249  bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1250  bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1251  bool parseSP3NegModifier();
1252  OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1253  OperandMatchResultTy parseReg(OperandVector &Operands);
1254  OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1255  OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1256  OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1257  OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1258  OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1259  OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1260  OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1261 
1262  void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1263  void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1264  void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1265  void cvtExp(MCInst &Inst, const OperandVector &Operands);
1266 
1267  bool parseCnt(int64_t &IntVal);
1268  OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1269  OperandMatchResultTy parseHwreg(OperandVector &Operands);
1270 
1271 private:
1272  struct OperandInfoTy {
1273  int64_t Id;
1274  bool IsSymbolic = false;
1275  bool IsDefined = false;
1276 
1277  OperandInfoTy(int64_t Id_) : Id(Id_) {}
1278  };
1279 
1280  bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1281  bool validateSendMsg(const OperandInfoTy &Msg,
1282  const OperandInfoTy &Op,
1283  const OperandInfoTy &Stream,
1284  const SMLoc Loc);
1285 
1286  bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1287  bool validateHwreg(const OperandInfoTy &HwReg,
1288  const int64_t Offset,
1289  const int64_t Width,
1290  const SMLoc Loc);
1291 
1292  void errorExpTgt();
1293  OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1294  SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1295 
1296  bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1297  bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1298  bool validateSOPLiteral(const MCInst &Inst) const;
1299  bool validateConstantBusLimitations(const MCInst &Inst);
1300  bool validateEarlyClobberLimitations(const MCInst &Inst);
1301  bool validateIntClampSupported(const MCInst &Inst);
1302  bool validateMIMGAtomicDMask(const MCInst &Inst);
1303  bool validateMIMGGatherDMask(const MCInst &Inst);
1304  bool validateMIMGDataSize(const MCInst &Inst);
1305  bool validateMIMGAddrSize(const MCInst &Inst);
1306  bool validateMIMGD16(const MCInst &Inst);
1307  bool validateMIMGDim(const MCInst &Inst);
1308  bool validateLdsDirect(const MCInst &Inst);
1309  bool validateOpSel(const MCInst &Inst);
1310  bool validateVccOperand(unsigned Reg) const;
1311  bool validateVOP3Literal(const MCInst &Inst) const;
1312  bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1313  bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1314  unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1315 
1316  bool isId(const StringRef Id) const;
1317  bool isId(const AsmToken &Token, const StringRef Id) const;
1318  bool isToken(const AsmToken::TokenKind Kind) const;
1319  bool trySkipId(const StringRef Id);
1320  bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1321  bool trySkipToken(const AsmToken::TokenKind Kind);
1322  bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1323  bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1324  void peekTokens(MutableArrayRef<AsmToken> Tokens);
1325  AsmToken::TokenKind getTokenKind() const;
1326  bool parseExpr(int64_t &Imm);
1327  bool parseExpr(OperandVector &Operands);
1328  StringRef getTokenStr() const;
1329  AsmToken peekToken();
1330  AsmToken getToken() const;
1331  SMLoc getLoc() const;
1332  void lex();
1333 
1334 public:
1335  OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1336  OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1337 
1338  OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1339  OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1340  OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1341  OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1342  OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1343  OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1344 
1345  bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1346  const unsigned MinVal,
1347  const unsigned MaxVal,
1348  const StringRef ErrMsg);
1349  OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1350  bool parseSwizzleOffset(int64_t &Imm);
1351  bool parseSwizzleMacro(int64_t &Imm);
1352  bool parseSwizzleQuadPerm(int64_t &Imm);
1353  bool parseSwizzleBitmaskPerm(int64_t &Imm);
1354  bool parseSwizzleBroadcast(int64_t &Imm);
1355  bool parseSwizzleSwap(int64_t &Imm);
1356  bool parseSwizzleReverse(int64_t &Imm);
1357 
1358  OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1359  int64_t parseGPRIdxMacro();
1360 
1361  void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1362  void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1363  void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1364  void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1365  void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1366 
1367  AMDGPUOperand::Ptr defaultDLC() const;
1368  AMDGPUOperand::Ptr defaultGLC() const;
1369  AMDGPUOperand::Ptr defaultSLC() const;
1370 
1371  AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1372  AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1373  AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1374  AMDGPUOperand::Ptr defaultFlatOffset() const;
1375 
1376  OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1377 
1378  void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1379  OptionalImmIndexMap &OptionalIdx);
1380  void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1381  void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1382  void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1383 
1384  void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1385 
1386  void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1387  bool IsAtomic = false);
1388  void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1389 
1390  OperandMatchResultTy parseDim(OperandVector &Operands);
1391  OperandMatchResultTy parseDPP8(OperandVector &Operands);
1392  OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1393  AMDGPUOperand::Ptr defaultRowMask() const;
1394  AMDGPUOperand::Ptr defaultBankMask() const;
1395  AMDGPUOperand::Ptr defaultBoundCtrl() const;
1396  AMDGPUOperand::Ptr defaultFI() const;
1397  void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1398  void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1399 
1400  OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1401  AMDGPUOperand::ImmTy Type);
1402  OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1403  void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1404  void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1405  void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1406  void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1407  void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1408  uint64_t BasicInstType, bool skipVcc = false);
1409 
1410  AMDGPUOperand::Ptr defaultBLGP() const;
1411  AMDGPUOperand::Ptr defaultCBSZ() const;
1412  AMDGPUOperand::Ptr defaultABID() const;
1413 
1414  OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1415  AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1416 };
1417 
1418 struct OptionalOperand {
1419  const char *Name;
1420  AMDGPUOperand::ImmTy Type;
1421  bool IsBit;
1422  bool (*ConvertResult)(int64_t&);
1423 };
1424 
1425 } // end anonymous namespace
1426 
1427 // May be called with integer type with equivalent bitwidth.
1428 static const fltSemantics *getFltSemantics(unsigned Size) {
1429  switch (Size) {
1430  case 4:
1431  return &APFloat::IEEEsingle();
1432  case 8:
1433  return &APFloat::IEEEdouble();
1434  case 2:
1435  return &APFloat::IEEEhalf();
1436  default:
1437  llvm_unreachable("unsupported fp type");
1438  }
1439 }
1440 
1441 static const fltSemantics *getFltSemantics(MVT VT) {
1442  return getFltSemantics(VT.getSizeInBits() / 8);
1443 }
1444 
1445 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1446  switch (OperandType) {
1447  case AMDGPU::OPERAND_REG_IMM_INT32:
1448  case AMDGPU::OPERAND_REG_IMM_FP32:
1449  case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1450  case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1451  case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1452  case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1453  return &APFloat::IEEEsingle();
1454  case AMDGPU::OPERAND_REG_IMM_INT64:
1455  case AMDGPU::OPERAND_REG_IMM_FP64:
1456  case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1457  case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1458  return &APFloat::IEEEdouble();
1459  case AMDGPU::OPERAND_REG_IMM_INT16:
1460  case AMDGPU::OPERAND_REG_IMM_FP16:
1461  case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1462  case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1463  case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1464  case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1465  case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1466  case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1467  case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1468  case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1469  case AMDGPU::OPERAND_REG_IMM_V2INT16:
1470  case AMDGPU::OPERAND_REG_IMM_V2FP16:
1471  return &APFloat::IEEEhalf();
1472  default:
1473  llvm_unreachable("unsupported fp type");
1474  }
1475 }
1476 
1477 //===----------------------------------------------------------------------===//
1478 // Operand
1479 //===----------------------------------------------------------------------===//
1480 
1481 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1482  bool Lost;
1483 
1484  // Convert literal to single precision
1485  APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1486  APFloat::rmNearestTiesToEven,
1487  &Lost);
1488  // We allow precision lost but not overflow or underflow
1489  if (Status != APFloat::opOK &&
1490  Lost &&
1491  ((Status & APFloat::opOverflow) != 0 ||
1492  (Status & APFloat::opUnderflow) != 0)) {
1493  return false;
1494  }
1495 
1496  return true;
1497 }
1498 
1499 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1500  return isUIntN(Size, Val) || isIntN(Size, Val);
1501 }
1502 
1503 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1504 
1505  // This is a hack to enable named inline values like
1506  // shared_base with both 32-bit and 64-bit operands.
1507  // Note that these values are defined as
1508  // 32-bit operands only.
1509  if (isInlineValue()) {
1510  return true;
1511  }
1512 
1513  if (!isImmTy(ImmTyNone)) {
1514  // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1515  return false;
1516  }
1517  // TODO: We should avoid using host float here. It would be better to
1518  // check the float bit values which is what a few other places do.
1519  // We've had bot failures before due to weird NaN support on mips hosts.
1520 
1521  APInt Literal(64, Imm.Val);
1522 
1523  if (Imm.IsFPImm) { // We got fp literal token
1524  if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1525  return AMDGPU::isInlinableLiteral64(Imm.Val,
1526  AsmParser->hasInv2PiInlineImm());
1527  }
1528 
1529  APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1530  if (!canLosslesslyConvertToFPType(FPLiteral, type))
1531  return false;
1532 
1533  if (type.getScalarSizeInBits() == 16) {
1535  static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1536  AsmParser->hasInv2PiInlineImm());
1537  }
1538 
1539  // Check if single precision literal is inlinable
1541  static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1542  AsmParser->hasInv2PiInlineImm());
1543  }
1544 
1545  // We got int literal token.
1546  if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1547  return AMDGPU::isInlinableLiteral64(Imm.Val,
1548  AsmParser->hasInv2PiInlineImm());
1549  }
1550 
1551  if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1552  return false;
1553  }
1554 
1555  if (type.getScalarSizeInBits() == 16) {
1557  static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1558  AsmParser->hasInv2PiInlineImm());
1559  }
1560 
1562  static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1563  AsmParser->hasInv2PiInlineImm());
1564 }
1565 
1566 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1567  // Check that this immediate can be added as literal
1568  if (!isImmTy(ImmTyNone)) {
1569  return false;
1570  }
1571 
1572  if (!Imm.IsFPImm) {
1573  // We got int literal token.
1574 
1575  if (type == MVT::f64 && hasFPModifiers()) {
1576  // Cannot apply fp modifiers to int literals preserving the same semantics
1577  // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1578  // disable these cases.
1579  return false;
1580  }
1581 
1582  unsigned Size = type.getSizeInBits();
1583  if (Size == 64)
1584  Size = 32;
1585 
1586  // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1587  // types.
1588  return isSafeTruncation(Imm.Val, Size);
1589  }
1590 
1591  // We got fp literal token
1592  if (type == MVT::f64) { // Expected 64-bit fp operand
1593  // We would set low 64-bits of literal to zeroes but we accept this literals
1594  return true;
1595  }
1596 
1597  if (type == MVT::i64) { // Expected 64-bit int operand
1598  // We don't allow fp literals in 64-bit integer instructions. It is
1599  // unclear how we should encode them.
1600  return false;
1601  }
1602 
1603  // We allow fp literals with f16x2 operands assuming that the specified
1604  // literal goes into the lower half and the upper half is zero. We also
1605  // require that the literal may be losslesly converted to f16.
1606  MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1607  (type == MVT::v2i16)? MVT::i16 : type;
1608 
1609  APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1610  return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1611 }
1612 
1613 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1614  return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1615 }
1616 
1617 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1618  if (AsmParser->isVI())
1619  return isVReg32();
1620  else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1621  return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1622  else
1623  return false;
1624 }
1625 
1626 bool AMDGPUOperand::isSDWAFP16Operand() const {
1627  return isSDWAOperand(MVT::f16);
1628 }
1629 
1630 bool AMDGPUOperand::isSDWAFP32Operand() const {
1631  return isSDWAOperand(MVT::f32);
1632 }
1633 
1634 bool AMDGPUOperand::isSDWAInt16Operand() const {
1635  return isSDWAOperand(MVT::i16);
1636 }
1637 
1638 bool AMDGPUOperand::isSDWAInt32Operand() const {
1639  return isSDWAOperand(MVT::i32);
1640 }
1641 
1642 bool AMDGPUOperand::isBoolReg() const {
1643  return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1644  (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1645 }
1646 
1647 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1648 {
1649  assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1650  assert(Size == 2 || Size == 4 || Size == 8);
1651 
1652  const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1653 
1654  if (Imm.Mods.Abs) {
1655  Val &= ~FpSignMask;
1656  }
1657  if (Imm.Mods.Neg) {
1658  Val ^= FpSignMask;
1659  }
1660 
1661  return Val;
1662 }
1663 
1664 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1665  if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1666  Inst.getNumOperands())) {
1667  addLiteralImmOperand(Inst, Imm.Val,
1668  ApplyModifiers &
1669  isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1670  } else {
1671  assert(!isImmTy(ImmTyNone) || !hasModifiers());
1672  Inst.addOperand(MCOperand::createImm(Imm.Val));
1673  }
1674 }
1675 
1676 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1677  const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1678  auto OpNum = Inst.getNumOperands();
1679  // Check that this operand accepts literals
1680  assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1681 
1682  if (ApplyModifiers) {
1683  assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1684  const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1685  Val = applyInputFPModifiers(Val, Size);
1686  }
1687 
1688  APInt Literal(64, Val);
1689  uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1690 
1691  if (Imm.IsFPImm) { // We got fp literal token
1692  switch (OpTy) {
1697  if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1698  AsmParser->hasInv2PiInlineImm())) {
1699  Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1700  return;
1701  }
1702 
1703  // Non-inlineable
1704  if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1705  // For fp operands we check if low 32 bits are zeros
1706  if (Literal.getLoBits(32) != 0) {
1707  const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1708  "Can't encode literal as exact 64-bit floating-point operand. "
1709  "Low 32-bits will be set to zero");
1710  }
1711 
1712  Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1713  return;
1714  }
1715 
1716  // We don't allow fp literals in 64-bit integer instructions. It is
1717  // unclear how we should encode them. This case should be checked earlier
1718  // in predicate methods (isLiteralImm())
1719  llvm_unreachable("fp literal in 64-bit integer instruction.");
1720 
1739  bool lost;
1740  APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1741  // Convert literal to single precision
1742  FPLiteral.convert(*getOpFltSemantics(OpTy),
1744  // We allow precision lost but not overflow or underflow. This should be
1745  // checked earlier in isLiteralImm()
1746 
1747  uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1748  Inst.addOperand(MCOperand::createImm(ImmVal));
1749  return;
1750  }
1751  default:
1752  llvm_unreachable("invalid operand size");
1753  }
1754 
1755  return;
1756  }
1757 
1758  // We got int literal token.
1759  // Only sign extend inline immediates.
1760  switch (OpTy) {
1769  if (isSafeTruncation(Val, 32) &&
1770  AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1771  AsmParser->hasInv2PiInlineImm())) {
1772  Inst.addOperand(MCOperand::createImm(Val));
1773  return;
1774  }
1775 
1776  Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1777  return;
1778 
1783  if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1784  Inst.addOperand(MCOperand::createImm(Val));
1785  return;
1786  }
1787 
1789  return;
1790 
1797  if (isSafeTruncation(Val, 16) &&
1798  AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1799  AsmParser->hasInv2PiInlineImm())) {
1800  Inst.addOperand(MCOperand::createImm(Val));
1801  return;
1802  }
1803 
1804  Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1805  return;
1806 
1811  assert(isSafeTruncation(Val, 16));
1812  assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1813  AsmParser->hasInv2PiInlineImm()));
1814 
1815  Inst.addOperand(MCOperand::createImm(Val));
1816  return;
1817  }
1818  default:
1819  llvm_unreachable("invalid operand size");
1820  }
1821 }
1822 
1823 template <unsigned Bitwidth>
1824 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1825  APInt Literal(64, Imm.Val);
1826 
1827  if (!Imm.IsFPImm) {
1828  // We got int literal token.
1829  Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1830  return;
1831  }
1832 
1833  bool Lost;
1834  APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1835  FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1837  Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1838 }
1839 
1840 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1841  Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1842 }
1843 
1844 static bool isInlineValue(unsigned Reg) {
1845  switch (Reg) {
1846  case AMDGPU::SRC_SHARED_BASE:
1847  case AMDGPU::SRC_SHARED_LIMIT:
1848  case AMDGPU::SRC_PRIVATE_BASE:
1849  case AMDGPU::SRC_PRIVATE_LIMIT:
1850  case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1851  return true;
1852  case AMDGPU::SRC_VCCZ:
1853  case AMDGPU::SRC_EXECZ:
1854  case AMDGPU::SRC_SCC:
1855  return true;
1856  default:
1857  return false;
1858  }
1859 }
1860 
1861 bool AMDGPUOperand::isInlineValue() const {
1862  return isRegKind() && ::isInlineValue(getReg());
1863 }
1864 
1865 //===----------------------------------------------------------------------===//
1866 // AsmParser
1867 //===----------------------------------------------------------------------===//
1868 
1869 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1870  if (Is == IS_VGPR) {
1871  switch (RegWidth) {
1872  default: return -1;
1873  case 1: return AMDGPU::VGPR_32RegClassID;
1874  case 2: return AMDGPU::VReg_64RegClassID;
1875  case 3: return AMDGPU::VReg_96RegClassID;
1876  case 4: return AMDGPU::VReg_128RegClassID;
1877  case 5: return AMDGPU::VReg_160RegClassID;
1878  case 8: return AMDGPU::VReg_256RegClassID;
1879  case 16: return AMDGPU::VReg_512RegClassID;
1880  case 32: return AMDGPU::VReg_1024RegClassID;
1881  }
1882  } else if (Is == IS_TTMP) {
1883  switch (RegWidth) {
1884  default: return -1;
1885  case 1: return AMDGPU::TTMP_32RegClassID;
1886  case 2: return AMDGPU::TTMP_64RegClassID;
1887  case 4: return AMDGPU::TTMP_128RegClassID;
1888  case 8: return AMDGPU::TTMP_256RegClassID;
1889  case 16: return AMDGPU::TTMP_512RegClassID;
1890  }
1891  } else if (Is == IS_SGPR) {
1892  switch (RegWidth) {
1893  default: return -1;
1894  case 1: return AMDGPU::SGPR_32RegClassID;
1895  case 2: return AMDGPU::SGPR_64RegClassID;
1896  case 4: return AMDGPU::SGPR_128RegClassID;
1897  case 8: return AMDGPU::SGPR_256RegClassID;
1898  case 16: return AMDGPU::SGPR_512RegClassID;
1899  }
1900  } else if (Is == IS_AGPR) {
1901  switch (RegWidth) {
1902  default: return -1;
1903  case 1: return AMDGPU::AGPR_32RegClassID;
1904  case 2: return AMDGPU::AReg_64RegClassID;
1905  case 4: return AMDGPU::AReg_128RegClassID;
1906  case 16: return AMDGPU::AReg_512RegClassID;
1907  case 32: return AMDGPU::AReg_1024RegClassID;
1908  }
1909  }
1910  return -1;
1911 }
1912 
1913 static unsigned getSpecialRegForName(StringRef RegName) {
1914  return StringSwitch<unsigned>(RegName)
1915  .Case("exec", AMDGPU::EXEC)
1916  .Case("vcc", AMDGPU::VCC)
1917  .Case("flat_scratch", AMDGPU::FLAT_SCR)
1918  .Case("xnack_mask", AMDGPU::XNACK_MASK)
1919  .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
1920  .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
1921  .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1922  .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1923  .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
1924  .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
1925  .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1926  .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1927  .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1928  .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1929  .Case("lds_direct", AMDGPU::LDS_DIRECT)
1930  .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1931  .Case("m0", AMDGPU::M0)
1932  .Case("vccz", AMDGPU::SRC_VCCZ)
1933  .Case("src_vccz", AMDGPU::SRC_VCCZ)
1934  .Case("execz", AMDGPU::SRC_EXECZ)
1935  .Case("src_execz", AMDGPU::SRC_EXECZ)
1936  .Case("scc", AMDGPU::SRC_SCC)
1937  .Case("src_scc", AMDGPU::SRC_SCC)
1938  .Case("tba", AMDGPU::TBA)
1939  .Case("tma", AMDGPU::TMA)
1940  .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1941  .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1942  .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1943  .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1944  .Case("vcc_lo", AMDGPU::VCC_LO)
1945  .Case("vcc_hi", AMDGPU::VCC_HI)
1946  .Case("exec_lo", AMDGPU::EXEC_LO)
1947  .Case("exec_hi", AMDGPU::EXEC_HI)
1948  .Case("tma_lo", AMDGPU::TMA_LO)
1949  .Case("tma_hi", AMDGPU::TMA_HI)
1950  .Case("tba_lo", AMDGPU::TBA_LO)
1951  .Case("tba_hi", AMDGPU::TBA_HI)
1952  .Case("null", AMDGPU::SGPR_NULL)
1953  .Default(0);
1954 }
1955 
1956 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1957  SMLoc &EndLoc) {
1958  auto R = parseRegister();
1959  if (!R) return true;
1960  assert(R->isReg());
1961  RegNo = R->getReg();
1962  StartLoc = R->getStartLoc();
1963  EndLoc = R->getEndLoc();
1964  return false;
1965 }
1966 
1967 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1968  RegisterKind RegKind, unsigned Reg1,
1969  unsigned RegNum) {
1970  switch (RegKind) {
1971  case IS_SPECIAL:
1972  if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1973  Reg = AMDGPU::EXEC;
1974  RegWidth = 2;
1975  return true;
1976  }
1977  if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1978  Reg = AMDGPU::FLAT_SCR;
1979  RegWidth = 2;
1980  return true;
1981  }
1982  if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
1983  Reg = AMDGPU::XNACK_MASK;
1984  RegWidth = 2;
1985  return true;
1986  }
1987  if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
1988  Reg = AMDGPU::VCC;
1989  RegWidth = 2;
1990  return true;
1991  }
1992  if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
1993  Reg = AMDGPU::TBA;
1994  RegWidth = 2;
1995  return true;
1996  }
1997  if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
1998  Reg = AMDGPU::TMA;
1999  RegWidth = 2;
2000  return true;
2001  }
2002  return false;
2003  case IS_VGPR:
2004  case IS_SGPR:
2005  case IS_AGPR:
2006  case IS_TTMP:
2007  if (Reg1 != Reg + RegWidth) {
2008  return false;
2009  }
2010  RegWidth++;
2011  return true;
2012  default:
2013  llvm_unreachable("unexpected register kind");
2014  }
2015 }
2016 
2017 static const StringRef Registers[] = {
2018  { "v" },
2019  { "s" },
2020  { "ttmp" },
2021  { "acc" },
2022  { "a" },
2023 };
2024 
2025 bool
2026 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2027  const AsmToken &NextToken) const {
2028 
2029  // A list of consecutive registers: [s0,s1,s2,s3]
2030  if (Token.is(AsmToken::LBrac))
2031  return true;
2032 
2033  if (!Token.is(AsmToken::Identifier))
2034  return false;
2035 
2036  // A single register like s0 or a range of registers like s[0:1]
2037 
2038  StringRef RegName = Token.getString();
2039 
2040  for (StringRef Reg : Registers) {
2041  if (RegName.startswith(Reg)) {
2042  if (Reg.size() < RegName.size()) {
2043  unsigned RegNum;
2044  // A single register with an index: rXX
2045  if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum))
2046  return true;
2047  } else {
2048  // A range of registers: r[XX:YY].
2049  if (NextToken.is(AsmToken::LBrac))
2050  return true;
2051  }
2052  }
2053  }
2054 
2055  return getSpecialRegForName(RegName);
2056 }
2057 
2058 bool
2059 AMDGPUAsmParser::isRegister()
2060 {
2061  return isRegister(getToken(), peekToken());
2062 }
2063 
2064 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2065  unsigned &RegNum, unsigned &RegWidth,
2066  unsigned *DwordRegIndex) {
2067  if (DwordRegIndex) { *DwordRegIndex = 0; }
2068  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2069  if (getLexer().is(AsmToken::Identifier)) {
2070  StringRef RegName = Parser.getTok().getString();
2071  if ((Reg = getSpecialRegForName(RegName))) {
2072  Parser.Lex();
2073  RegKind = IS_SPECIAL;
2074  } else {
2075  unsigned RegNumIndex = 0;
2076  if (RegName[0] == 'v') {
2077  RegNumIndex = 1;
2078  RegKind = IS_VGPR;
2079  } else if (RegName[0] == 's') {
2080  RegNumIndex = 1;
2081  RegKind = IS_SGPR;
2082  } else if (RegName[0] == 'a') {
2083  RegNumIndex = RegName.startswith("acc") ? 3 : 1;
2084  RegKind = IS_AGPR;
2085  } else if (RegName.startswith("ttmp")) {
2086  RegNumIndex = strlen("ttmp");
2087  RegKind = IS_TTMP;
2088  } else {
2089  return false;
2090  }
2091  if (RegName.size() > RegNumIndex) {
2092  // Single 32-bit register: vXX.
2093  if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
2094  return false;
2095  Parser.Lex();
2096  RegWidth = 1;
2097  } else {
2098  // Range of registers: v[XX:YY]. ":YY" is optional.
2099  Parser.Lex();
2100  int64_t RegLo, RegHi;
2101  if (getLexer().isNot(AsmToken::LBrac))
2102  return false;
2103  Parser.Lex();
2104 
2105  if (getParser().parseAbsoluteExpression(RegLo))
2106  return false;
2107 
2108  const bool isRBrace = getLexer().is(AsmToken::RBrac);
2109  if (!isRBrace && getLexer().isNot(AsmToken::Colon))
2110  return false;
2111  Parser.Lex();
2112 
2113  if (isRBrace) {
2114  RegHi = RegLo;
2115  } else {
2116  if (getParser().parseAbsoluteExpression(RegHi))
2117  return false;
2118 
2119  if (getLexer().isNot(AsmToken::RBrac))
2120  return false;
2121  Parser.Lex();
2122  }
2123  RegNum = (unsigned) RegLo;
2124  RegWidth = (RegHi - RegLo) + 1;
2125  }
2126  }
2127  } else if (getLexer().is(AsmToken::LBrac)) {
2128  // List of consecutive registers: [s0,s1,s2,s3]
2129  Parser.Lex();
2130  if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
2131  return false;
2132  if (RegWidth != 1)
2133  return false;
2134  RegisterKind RegKind1;
2135  unsigned Reg1, RegNum1, RegWidth1;
2136  do {
2137  if (getLexer().is(AsmToken::Comma)) {
2138  Parser.Lex();
2139  } else if (getLexer().is(AsmToken::RBrac)) {
2140  Parser.Lex();
2141  break;
2142  } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
2143  if (RegWidth1 != 1) {
2144  return false;
2145  }
2146  if (RegKind1 != RegKind) {
2147  return false;
2148  }
2149  if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
2150  return false;
2151  }
2152  } else {
2153  return false;
2154  }
2155  } while (true);
2156  } else {
2157  return false;
2158  }
2159  switch (RegKind) {
2160  case IS_SPECIAL:
2161  RegNum = 0;
2162  RegWidth = 1;
2163  break;
2164  case IS_VGPR:
2165  case IS_SGPR:
2166  case IS_AGPR:
2167  case IS_TTMP:
2168  {
2169  unsigned Size = 1;
2170  if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2171  // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
2172  Size = std::min(RegWidth, 4u);
2173  }
2174  if (RegNum % Size != 0)
2175  return false;
2176  if (DwordRegIndex) { *DwordRegIndex = RegNum; }
2177  RegNum = RegNum / Size;
2178  int RCID = getRegClass(RegKind, RegWidth);
2179  if (RCID == -1)
2180  return false;
2181  const MCRegisterClass RC = TRI->getRegClass(RCID);
2182  if (RegNum >= RC.getNumRegs())
2183  return false;
2184  Reg = RC.getRegister(RegNum);
2185  break;
2186  }
2187 
2188  default:
2189  llvm_unreachable("unexpected register kind");
2190  }
2191 
2192  if (!subtargetHasRegister(*TRI, Reg))
2193  return false;
2194  return true;
2195 }
2196 
2198 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2199  switch (RegKind) {
2200  case IS_VGPR:
2201  return StringRef(".amdgcn.next_free_vgpr");
2202  case IS_SGPR:
2203  return StringRef(".amdgcn.next_free_sgpr");
2204  default:
2205  return None;
2206  }
2207 }
2208 
2209 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2210  auto SymbolName = getGprCountSymbolName(RegKind);
2211  assert(SymbolName && "initializing invalid register kind");
2212  MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2213  Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2214 }
2215 
2216 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2217  unsigned DwordRegIndex,
2218  unsigned RegWidth) {
2219  // Symbols are only defined for GCN targets
2220  if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2221  return true;
2222 
2223  auto SymbolName = getGprCountSymbolName(RegKind);
2224  if (!SymbolName)
2225  return true;
2226  MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2227 
2228  int64_t NewMax = DwordRegIndex + RegWidth - 1;
2229  int64_t OldCount;
2230 
2231  if (!Sym->isVariable())
2232  return !Error(getParser().getTok().getLoc(),
2233  ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2234  if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2235  return !Error(
2236  getParser().getTok().getLoc(),
2237  ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2238 
2239  if (OldCount <= NewMax)
2240  Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2241 
2242  return true;
2243 }
2244 
2245 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
2246  const auto &Tok = Parser.getTok();
2247  SMLoc StartLoc = Tok.getLoc();
2248  SMLoc EndLoc = Tok.getEndLoc();
2250  unsigned Reg, RegNum, RegWidth, DwordRegIndex;
2251 
2252  if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
2253  //FIXME: improve error messages (bug 41303).
2254  Error(StartLoc, "not a valid operand.");
2255  return nullptr;
2256  }
2257  if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2258  if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth))
2259  return nullptr;
2260  } else
2261  KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
2262  return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2263 }
2264 
2266 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2267  // TODO: add syntactic sugar for 1/(2*PI)
2268 
2269  assert(!isRegister());
2270  assert(!isModifier());
2271 
2272  const auto& Tok = getToken();
2273  const auto& NextTok = peekToken();
2274  bool IsReal = Tok.is(AsmToken::Real);
2275  SMLoc S = getLoc();
2276  bool Negate = false;
2277 
2278  if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2279  lex();
2280  IsReal = true;
2281  Negate = true;
2282  }
2283 
2284  if (IsReal) {
2285  // Floating-point expressions are not supported.
2286  // Can only allow floating-point literals with an
2287  // optional sign.
2288 
2289  StringRef Num = getTokenStr();
2290  lex();
2291 
2292  APFloat RealVal(APFloat::IEEEdouble());
2293  auto roundMode = APFloat::rmNearestTiesToEven;
2294  if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) {
2295  return MatchOperand_ParseFail;
2296  }
2297  if (Negate)
2298  RealVal.changeSign();
2299 
2300  Operands.push_back(
2301  AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2302  AMDGPUOperand::ImmTyNone, true));
2303 
2304  return MatchOperand_Success;
2305 
2306  } else {
2307  int64_t IntVal;
2308  const MCExpr *Expr;
2309  SMLoc S = getLoc();
2310 
2311  if (HasSP3AbsModifier) {
2312  // This is a workaround for handling expressions
2313  // as arguments of SP3 'abs' modifier, for example:
2314  // |1.0|
2315  // |-1|
2316  // |1+x|
2317  // This syntax is not compatible with syntax of standard
2318  // MC expressions (due to the trailing '|').
2319  SMLoc EndLoc;
2320  if (getParser().parsePrimaryExpr(Expr, EndLoc))
2321  return MatchOperand_ParseFail;
2322  } else {
2323  if (Parser.parseExpression(Expr))
2324  return MatchOperand_ParseFail;
2325  }
2326 
2327  if (Expr->evaluateAsAbsolute(IntVal)) {
2328  Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2329  } else {
2330  Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2331  }
2332 
2333  return MatchOperand_Success;
2334  }
2335 
2336  return MatchOperand_NoMatch;
2337 }
2338 
2340 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2341  if (!isRegister())
2342  return MatchOperand_NoMatch;
2343 
2344  if (auto R = parseRegister()) {
2345  assert(R->isReg());
2346  Operands.push_back(std::move(R));
2347  return MatchOperand_Success;
2348  }
2349  return MatchOperand_ParseFail;
2350 }
2351 
2353 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2354  auto res = parseReg(Operands);
2355  if (res != MatchOperand_NoMatch) {
2356  return res;
2357  } else if (isModifier()) {
2358  return MatchOperand_NoMatch;
2359  } else {
2360  return parseImm(Operands, HasSP3AbsMod);
2361  }
2362 }
2363 
2364 bool
2365 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2366  if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2367  const auto &str = Token.getString();
2368  return str == "abs" || str == "neg" || str == "sext";
2369  }
2370  return false;
2371 }
2372 
2373 bool
2374 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2375  return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2376 }
2377 
2378 bool
2379 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2380  return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2381 }
2382 
2383 bool
2384 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2385  return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2386 }
2387 
2388 // Check if this is an operand modifier or an opcode modifier
2389 // which may look like an expression but it is not. We should
2390 // avoid parsing these modifiers as expressions. Currently
2391 // recognized sequences are:
2392 // |...|
2393 // abs(...)
2394 // neg(...)
2395 // sext(...)
2396 // -reg
2397 // -|...|
2398 // -abs(...)
2399 // name:...
2400 // Note that simple opcode modifiers like 'gds' may be parsed as
2401 // expressions; this is a special case. See getExpressionAsToken.
2402 //
2403 bool
2404 AMDGPUAsmParser::isModifier() {
2405 
2406  AsmToken Tok = getToken();
2407  AsmToken NextToken[2];
2408  peekTokens(NextToken);
2409 
2410  return isOperandModifier(Tok, NextToken[0]) ||
2411  (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2412  isOpcodeModifierWithVal(Tok, NextToken[0]);
2413 }
2414 
2415 // Check if the current token is an SP3 'neg' modifier.
2416 // Currently this modifier is allowed in the following context:
2417 //
2418 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2419 // 2. Before an 'abs' modifier: -abs(...)
2420 // 3. Before an SP3 'abs' modifier: -|...|
2421 //
2422 // In all other cases "-" is handled as a part
2423 // of an expression that follows the sign.
2424 //
2425 // Note: When "-" is followed by an integer literal,
2426 // this is interpreted as integer negation rather
2427 // than a floating-point NEG modifier applied to N.
2428 // Beside being contr-intuitive, such use of floating-point
2429 // NEG modifier would have resulted in different meaning
2430 // of integer literals used with VOP1/2/C and VOP3,
2431 // for example:
2432 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2433 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2434 // Negative fp literals with preceding "-" are
2435 // handled likewise for unifomtity
2436 //
2437 bool
2438 AMDGPUAsmParser::parseSP3NegModifier() {
2439 
2440  AsmToken NextToken[2];
2441  peekTokens(NextToken);
2442 
2443  if (isToken(AsmToken::Minus) &&
2444  (isRegister(NextToken[0], NextToken[1]) ||
2445  NextToken[0].is(AsmToken::Pipe) ||
2446  isId(NextToken[0], "abs"))) {
2447  lex();
2448  return true;
2449  }
2450 
2451  return false;
2452 }
2453 
2455 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2456  bool AllowImm) {
2457  bool Neg, SP3Neg;
2458  bool Abs, SP3Abs;
2459  SMLoc Loc;
2460 
2461  // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2462  if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2463  Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2464  return MatchOperand_ParseFail;
2465  }
2466 
2467  SP3Neg = parseSP3NegModifier();
2468 
2469  Loc = getLoc();
2470  Neg = trySkipId("neg");
2471  if (Neg && SP3Neg) {
2472  Error(Loc, "expected register or immediate");
2473  return MatchOperand_ParseFail;
2474  }
2475  if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2476  return MatchOperand_ParseFail;
2477 
2478  Abs = trySkipId("abs");
2479  if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2480  return MatchOperand_ParseFail;
2481 
2482  Loc = getLoc();
2483  SP3Abs = trySkipToken(AsmToken::Pipe);
2484  if (Abs && SP3Abs) {
2485  Error(Loc, "expected register or immediate");
2486  return MatchOperand_ParseFail;
2487  }
2488 
2490  if (AllowImm) {
2491  Res = parseRegOrImm(Operands, SP3Abs);
2492  } else {
2493  Res = parseReg(Operands);
2494  }
2495  if (Res != MatchOperand_Success) {
2496  return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2497  }
2498 
2499  if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2500  return MatchOperand_ParseFail;
2501  if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2502  return MatchOperand_ParseFail;
2503  if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2504  return MatchOperand_ParseFail;
2505 
2506  AMDGPUOperand::Modifiers Mods;
2507  Mods.Abs = Abs || SP3Abs;
2508  Mods.Neg = Neg || SP3Neg;
2509 
2510  if (Mods.hasFPModifiers()) {
2511  AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2512  if (Op.isExpr()) {
2513  Error(Op.getStartLoc(), "expected an absolute expression");
2514  return MatchOperand_ParseFail;
2515  }
2516  Op.setModifiers(Mods);
2517  }
2518  return MatchOperand_Success;
2519 }
2520 
2522 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2523  bool AllowImm) {
2524  bool Sext = trySkipId("sext");
2525  if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2526  return MatchOperand_ParseFail;
2527 
2529  if (AllowImm) {
2530  Res = parseRegOrImm(Operands);
2531  } else {
2532  Res = parseReg(Operands);
2533  }
2534  if (Res != MatchOperand_Success) {
2535  return Sext? MatchOperand_ParseFail : Res;
2536  }
2537 
2538  if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2539  return MatchOperand_ParseFail;
2540 
2541  AMDGPUOperand::Modifiers Mods;
2542  Mods.Sext = Sext;
2543 
2544  if (Mods.hasIntModifiers()) {
2545  AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2546  if (Op.isExpr()) {
2547  Error(Op.getStartLoc(), "expected an absolute expression");
2548  return MatchOperand_ParseFail;
2549  }
2550  Op.setModifiers(Mods);
2551  }
2552 
2553  return MatchOperand_Success;
2554 }
2555 
2557 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2558  return parseRegOrImmWithFPInputMods(Operands, false);
2559 }
2560 
2562 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2563  return parseRegOrImmWithIntInputMods(Operands, false);
2564 }
2565 
2566 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2567  auto Loc = getLoc();
2568  if (trySkipId("off")) {
2569  Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2570  AMDGPUOperand::ImmTyOff, false));
2571  return MatchOperand_Success;
2572  }
2573 
2574  if (!isRegister())
2575  return MatchOperand_NoMatch;
2576 
2577  std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2578  if (Reg) {
2579  Operands.push_back(std::move(Reg));
2580  return MatchOperand_Success;
2581  }
2582 
2583  return MatchOperand_ParseFail;
2584 
2585 }
2586 
2587 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2588  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2589 
2590  if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2591  (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2592  (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2593  (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2594  return Match_InvalidOperand;
2595 
2596  if ((TSFlags & SIInstrFlags::VOP3) &&
2597  (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2598  getForcedEncodingSize() != 64)
2599  return Match_PreferE32;
2600 
2601  if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2602  Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2603  // v_mac_f32/16 allow only dst_sel == DWORD;
2604  auto OpNum =
2605  AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2606  const auto &Op = Inst.getOperand(OpNum);
2607  if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2608  return Match_InvalidOperand;
2609  }
2610  }
2611 
2612  return Match_Success;
2613 }
2614 
2615 // What asm variants we should check
2616 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2617  if (getForcedEncodingSize() == 32) {
2618  static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2619  return makeArrayRef(Variants);
2620  }
2621 
2622  if (isForcedVOP3()) {
2623  static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2624  return makeArrayRef(Variants);
2625  }
2626 
2627  if (isForcedSDWA()) {
2628  static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2630  return makeArrayRef(Variants);
2631  }
2632 
2633  if (isForcedDPP()) {
2634  static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2635  return makeArrayRef(Variants);
2636  }
2637 
2638  static const unsigned Variants[] = {
2641  };
2642 
2643  return makeArrayRef(Variants);
2644 }
2645 
2646 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2647  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2648  const unsigned Num = Desc.getNumImplicitUses();
2649  for (unsigned i = 0; i < Num; ++i) {
2650  unsigned Reg = Desc.ImplicitUses[i];
2651  switch (Reg) {
2652  case AMDGPU::FLAT_SCR:
2653  case AMDGPU::VCC:
2654  case AMDGPU::VCC_LO:
2655  case AMDGPU::VCC_HI:
2656  case AMDGPU::M0:
2657  case AMDGPU::SGPR_NULL:
2658  return Reg;
2659  default:
2660  break;
2661  }
2662  }
2663  return AMDGPU::NoRegister;
2664 }
2665 
2666 // NB: This code is correct only when used to check constant
2667 // bus limitations because GFX7 support no f16 inline constants.
2668 // Note that there are no cases when a GFX7 opcode violates
2669 // constant bus limitations due to the use of an f16 constant.
2670 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2671  unsigned OpIdx) const {
2672  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2673 
2674  if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2675  return false;
2676  }
2677 
2678  const MCOperand &MO = Inst.getOperand(OpIdx);
2679 
2680  int64_t Val = MO.getImm();
2681  auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2682 
2683  switch (OpSize) { // expected operand size
2684  case 8:
2685  return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2686  case 4:
2687  return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2688  case 2: {
2689  const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2690  if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2691  OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2692  OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
2693  OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
2694  OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 ||
2695  OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) {
2696  return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2697  } else {
2698  return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2699  }
2700  }
2701  default:
2702  llvm_unreachable("invalid operand size");
2703  }
2704 }
2705 
2706 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2707  const MCOperand &MO = Inst.getOperand(OpIdx);
2708  if (MO.isImm()) {
2709  return !isInlineConstant(Inst, OpIdx);
2710  }
2711  return !MO.isReg() ||
2712  isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo());
2713 }
2714 
2715 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2716  const unsigned Opcode = Inst.getOpcode();
2717  const MCInstrDesc &Desc = MII.get(Opcode);
2718  unsigned ConstantBusUseCount = 0;
2719  unsigned NumLiterals = 0;
2720  unsigned LiteralSize;
2721 
2722  if (Desc.TSFlags &
2726  SIInstrFlags::SDWA)) {
2727  // Check special imm operands (used by madmk, etc)
2728  if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2729  ++ConstantBusUseCount;
2730  }
2731 
2732  SmallDenseSet<unsigned> SGPRsUsed;
2733  unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2734  if (SGPRUsed != AMDGPU::NoRegister) {
2735  SGPRsUsed.insert(SGPRUsed);
2736  ++ConstantBusUseCount;
2737  }
2738 
2739  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2740  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2741  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2742 
2743  const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2744 
2745  for (int OpIdx : OpIndices) {
2746  if (OpIdx == -1) break;
2747 
2748  const MCOperand &MO = Inst.getOperand(OpIdx);
2749  if (usesConstantBus(Inst, OpIdx)) {
2750  if (MO.isReg()) {
2751  const unsigned Reg = mc2PseudoReg(MO.getReg());
2752  // Pairs of registers with a partial intersections like these
2753  // s0, s[0:1]
2754  // flat_scratch_lo, flat_scratch
2755  // flat_scratch_lo, flat_scratch_hi
2756  // are theoretically valid but they are disabled anyway.
2757  // Note that this code mimics SIInstrInfo::verifyInstruction
2758  if (!SGPRsUsed.count(Reg)) {
2759  SGPRsUsed.insert(Reg);
2760  ++ConstantBusUseCount;
2761  }
2762  } else { // Expression or a literal
2763 
2764  if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
2765  continue; // special operand like VINTERP attr_chan
2766 
2767  // An instruction may use only one literal.
2768  // This has been validated on the previous step.
2769  // See validateVOP3Literal.
2770  // This literal may be used as more than one operand.
2771  // If all these operands are of the same size,
2772  // this literal counts as one scalar value.
2773  // Otherwise it counts as 2 scalar values.
2774  // See "GFX10 Shader Programming", section 3.6.2.3.
2775 
2776  unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
2777  if (Size < 4) Size = 4;
2778 
2779  if (NumLiterals == 0) {
2780  NumLiterals = 1;
2781  LiteralSize = Size;
2782  } else if (LiteralSize != Size) {
2783  NumLiterals = 2;
2784  }
2785  }
2786  }
2787  }
2788  }
2789  ConstantBusUseCount += NumLiterals;
2790 
2791  if (isGFX10())
2792  return ConstantBusUseCount <= 2;
2793 
2794  return ConstantBusUseCount <= 1;
2795 }
2796 
2797 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2798  const unsigned Opcode = Inst.getOpcode();
2799  const MCInstrDesc &Desc = MII.get(Opcode);
2800 
2801  const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2802  if (DstIdx == -1 ||
2803  Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2804  return true;
2805  }
2806 
2807  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2808 
2809  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2810  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2811  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2812 
2813  assert(DstIdx != -1);
2814  const MCOperand &Dst = Inst.getOperand(DstIdx);
2815  assert(Dst.isReg());
2816  const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2817 
2818  const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2819 
2820  for (int SrcIdx : SrcIndices) {
2821  if (SrcIdx == -1) break;
2822  const MCOperand &Src = Inst.getOperand(SrcIdx);
2823  if (Src.isReg()) {
2824  const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2825  if (isRegIntersect(DstReg, SrcReg, TRI)) {
2826  return false;
2827  }
2828  }
2829  }
2830 
2831  return true;
2832 }
2833 
2834 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2835 
2836  const unsigned Opc = Inst.getOpcode();
2837  const MCInstrDesc &Desc = MII.get(Opc);
2838 
2839  if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2840  int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2841  assert(ClampIdx != -1);
2842  return Inst.getOperand(ClampIdx).getImm() == 0;
2843  }
2844 
2845  return true;
2846 }
2847 
2848 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2849 
2850  const unsigned Opc = Inst.getOpcode();
2851  const MCInstrDesc &Desc = MII.get(Opc);
2852 
2853  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2854  return true;
2855 
2856  int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
2857  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2858  int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
2859 
2860  assert(VDataIdx != -1);
2861  assert(DMaskIdx != -1);
2862  assert(TFEIdx != -1);
2863 
2864  unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
2865  unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
2866  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2867  if (DMask == 0)
2868  DMask = 1;
2869 
2870  unsigned DataSize =
2871  (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
2872  if (hasPackedD16()) {
2873  int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2874  if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
2875  DataSize = (DataSize + 1) / 2;
2876  }
2877 
2878  return (VDataSize / 4) == DataSize + TFESize;
2879 }
2880 
2881 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
2882  const unsigned Opc = Inst.getOpcode();
2883  const MCInstrDesc &Desc = MII.get(Opc);
2884 
2885  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
2886  return true;
2887 
2888  const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
2889  const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2890  AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
2891  int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
2892  int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
2893  int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2894 
2895  assert(VAddr0Idx != -1);
2896  assert(SrsrcIdx != -1);
2897  assert(DimIdx != -1);
2898  assert(SrsrcIdx > VAddr0Idx);
2899 
2900  unsigned Dim = Inst.getOperand(DimIdx).getImm();
2901  const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
2902  bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
2903  unsigned VAddrSize =
2904  IsNSA ? SrsrcIdx - VAddr0Idx
2905  : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
2906 
2907  unsigned AddrSize = BaseOpcode->NumExtraArgs +
2908  (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
2909  (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
2910  (BaseOpcode->LodOrClampOrMip ? 1 : 0);
2911  if (!IsNSA) {
2912  if (AddrSize > 8)
2913  AddrSize = 16;
2914  else if (AddrSize > 4)
2915  AddrSize = 8;
2916  }
2917 
2918  return VAddrSize == AddrSize;
2919 }
2920 
2921 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
2922 
2923  const unsigned Opc = Inst.getOpcode();
2924  const MCInstrDesc &Desc = MII.get(Opc);
2925 
2926  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2927  return true;
2928  if (!Desc.mayLoad() || !Desc.mayStore())
2929  return true; // Not atomic
2930 
2931  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2932  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2933 
2934  // This is an incomplete check because image_atomic_cmpswap
2935  // may only use 0x3 and 0xf while other atomic operations
2936  // may use 0x1 and 0x3. However these limitations are
2937  // verified when we check that dmask matches dst size.
2938  return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
2939 }
2940 
2941 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
2942 
2943  const unsigned Opc = Inst.getOpcode();
2944  const MCInstrDesc &Desc = MII.get(Opc);
2945 
2946  if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
2947  return true;
2948 
2949  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2950  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2951 
2952  // GATHER4 instructions use dmask in a different fashion compared to
2953  // other MIMG instructions. The only useful DMASK values are
2954  // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
2955  // (red,red,red,red) etc.) The ISA document doesn't mention
2956  // this.
2957  return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
2958 }
2959 
2960 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
2961 
2962  const unsigned Opc = Inst.getOpcode();
2963  const MCInstrDesc &Desc = MII.get(Opc);
2964 
2965  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2966  return true;
2967 
2968  int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2969  if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
2970  if (isCI() || isSI())
2971  return false;
2972  }
2973 
2974  return true;
2975 }
2976 
2977 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
2978  const unsigned Opc = Inst.getOpcode();
2979  const MCInstrDesc &Desc = MII.get(Opc);
2980 
2981  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2982  return true;
2983 
2984  int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2985  if (DimIdx < 0)
2986  return true;
2987 
2988  long Imm = Inst.getOperand(DimIdx).getImm();
2989  if (Imm < 0 || Imm >= 8)
2990  return false;
2991 
2992  return true;
2993 }
2994 
2995 static bool IsRevOpcode(const unsigned Opcode)
2996 {
2997  switch (Opcode) {
2998  case AMDGPU::V_SUBREV_F32_e32:
2999  case AMDGPU::V_SUBREV_F32_e64:
3000  case AMDGPU::V_SUBREV_F32_e32_gfx10:
3001  case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3002  case AMDGPU::V_SUBREV_F32_e32_vi:
3003  case AMDGPU::V_SUBREV_F32_e64_gfx10:
3004  case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3005  case AMDGPU::V_SUBREV_F32_e64_vi:
3006 
3007  case AMDGPU::V_SUBREV_I32_e32:
3008  case AMDGPU::V_SUBREV_I32_e64:
3009  case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3010  case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3011 
3012  case AMDGPU::V_SUBBREV_U32_e32:
3013  case AMDGPU::V_SUBBREV_U32_e64:
3014  case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3015  case AMDGPU::V_SUBBREV_U32_e32_vi:
3016  case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3017  case AMDGPU::V_SUBBREV_U32_e64_vi:
3018 
3019  case AMDGPU::V_SUBREV_U32_e32:
3020  case AMDGPU::V_SUBREV_U32_e64:
3021  case AMDGPU::V_SUBREV_U32_e32_gfx9:
3022  case AMDGPU::V_SUBREV_U32_e32_vi:
3023  case AMDGPU::V_SUBREV_U32_e64_gfx9:
3024  case AMDGPU::V_SUBREV_U32_e64_vi:
3025 
3026  case AMDGPU::V_SUBREV_F16_e32:
3027  case AMDGPU::V_SUBREV_F16_e64:
3028  case AMDGPU::V_SUBREV_F16_e32_gfx10:
3029  case AMDGPU::V_SUBREV_F16_e32_vi:
3030  case AMDGPU::V_SUBREV_F16_e64_gfx10:
3031  case AMDGPU::V_SUBREV_F16_e64_vi:
3032 
3033  case AMDGPU::V_SUBREV_U16_e32:
3034  case AMDGPU::V_SUBREV_U16_e64:
3035  case AMDGPU::V_SUBREV_U16_e32_vi:
3036  case AMDGPU::V_SUBREV_U16_e64_vi:
3037 
3038  case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3039  case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3040  case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3041 
3042  case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3043  case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3044 
3045  case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3046  case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3047 
3048  case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3049  case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3050 
3051  case AMDGPU::V_LSHRREV_B32_e32:
3052  case AMDGPU::V_LSHRREV_B32_e64:
3053  case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3054  case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3055  case AMDGPU::V_LSHRREV_B32_e32_vi:
3056  case AMDGPU::V_LSHRREV_B32_e64_vi:
3057  case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3058  case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3059 
3060  case AMDGPU::V_ASHRREV_I32_e32:
3061  case AMDGPU::V_ASHRREV_I32_e64:
3062  case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3063  case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3064  case AMDGPU::V_ASHRREV_I32_e32_vi:
3065  case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3066  case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3067  case AMDGPU::V_ASHRREV_I32_e64_vi:
3068 
3069  case AMDGPU::V_LSHLREV_B32_e32:
3070  case AMDGPU::V_LSHLREV_B32_e64:
3071  case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3072  case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3073  case AMDGPU::V_LSHLREV_B32_e32_vi:
3074  case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3075  case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3076  case AMDGPU::V_LSHLREV_B32_e64_vi:
3077 
3078  case AMDGPU::V_LSHLREV_B16_e32:
3079  case AMDGPU::V_LSHLREV_B16_e64:
3080  case AMDGPU::V_LSHLREV_B16_e32_vi:
3081  case AMDGPU::V_LSHLREV_B16_e64_vi:
3082  case AMDGPU::V_LSHLREV_B16_gfx10:
3083 
3084  case AMDGPU::V_LSHRREV_B16_e32:
3085  case AMDGPU::V_LSHRREV_B16_e64:
3086  case AMDGPU::V_LSHRREV_B16_e32_vi:
3087  case AMDGPU::V_LSHRREV_B16_e64_vi:
3088  case AMDGPU::V_LSHRREV_B16_gfx10:
3089 
3090  case AMDGPU::V_ASHRREV_I16_e32:
3091  case AMDGPU::V_ASHRREV_I16_e64:
3092  case AMDGPU::V_ASHRREV_I16_e32_vi:
3093  case AMDGPU::V_ASHRREV_I16_e64_vi:
3094  case AMDGPU::V_ASHRREV_I16_gfx10:
3095 
3096  case AMDGPU::V_LSHLREV_B64:
3097  case AMDGPU::V_LSHLREV_B64_gfx10:
3098  case AMDGPU::V_LSHLREV_B64_vi:
3099 
3100  case AMDGPU::V_LSHRREV_B64:
3101  case AMDGPU::V_LSHRREV_B64_gfx10:
3102  case AMDGPU::V_LSHRREV_B64_vi:
3103 
3104  case AMDGPU::V_ASHRREV_I64:
3105  case AMDGPU::V_ASHRREV_I64_gfx10:
3106  case AMDGPU::V_ASHRREV_I64_vi:
3107 
3108  case AMDGPU::V_PK_LSHLREV_B16:
3109  case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3110  case AMDGPU::V_PK_LSHLREV_B16_vi:
3111 
3112  case AMDGPU::V_PK_LSHRREV_B16:
3113  case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3114  case AMDGPU::V_PK_LSHRREV_B16_vi:
3115  case AMDGPU::V_PK_ASHRREV_I16:
3116  case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3117  case AMDGPU::V_PK_ASHRREV_I16_vi:
3118  return true;
3119  default:
3120  return false;
3121  }
3122 }
3123 
3124 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3125 
3126  using namespace SIInstrFlags;
3127  const unsigned Opcode = Inst.getOpcode();
3128  const MCInstrDesc &Desc = MII.get(Opcode);
3129 
3130  // lds_direct register is defined so that it can be used
3131  // with 9-bit operands only. Ignore encodings which do not accept these.
3132  if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3133  return true;
3134 
3135  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3136  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3137  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3138 
3139  const int SrcIndices[] = { Src1Idx, Src2Idx };
3140 
3141  // lds_direct cannot be specified as either src1 or src2.
3142  for (int SrcIdx : SrcIndices) {
3143  if (SrcIdx == -1) break;
3144  const MCOperand &Src = Inst.getOperand(SrcIdx);
3145  if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3146  return false;
3147  }
3148  }
3149 
3150  if (Src0Idx == -1)
3151  return true;
3152 
3153  const MCOperand &Src = Inst.getOperand(Src0Idx);
3154  if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3155  return true;
3156 
3157  // lds_direct is specified as src0. Check additional limitations.
3158  return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3159 }
3160 
3161 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3162  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3163  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3164  if (Op.isFlatOffset())
3165  return Op.getStartLoc();
3166  }
3167  return getLoc();
3168 }
3169 
3170 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3171  const OperandVector &Operands) {
3172  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3173  if ((TSFlags & SIInstrFlags::FLAT) == 0)
3174  return true;
3175 
3176  auto Opcode = Inst.getOpcode();
3177  auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3178  assert(OpNum != -1);
3179 
3180  const auto &Op = Inst.getOperand(OpNum);
3181  if (!hasFlatOffsets() && Op.getImm() != 0) {
3182  Error(getFlatOffsetLoc(Operands),
3183  "flat offset modifier is not supported on this GPU");
3184  return false;
3185  }
3186 
3187  // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
3188  // For FLAT segment the offset must be positive;
3189  // MSB is ignored and forced to zero.
3190  unsigned OffsetSize = isGFX9() ? 13 : 12;
3191  if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
3192  if (!isIntN(OffsetSize, Op.getImm())) {
3193  Error(getFlatOffsetLoc(Operands),
3194  isGFX9() ? "expected a 13-bit signed offset" :
3195  "expected a 12-bit signed offset");
3196  return false;
3197  }
3198  } else {
3199  if (!isUIntN(OffsetSize - 1, Op.getImm())) {
3200  Error(getFlatOffsetLoc(Operands),
3201  isGFX9() ? "expected a 12-bit unsigned offset" :
3202  "expected an 11-bit unsigned offset");
3203  return false;
3204  }
3205  }
3206 
3207  return true;
3208 }
3209 
3210 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3211  unsigned Opcode = Inst.getOpcode();
3212  const MCInstrDesc &Desc = MII.get(Opcode);
3213  if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3214  return true;
3215 
3216  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3217  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3218 
3219  const int OpIndices[] = { Src0Idx, Src1Idx };
3220 
3221  unsigned NumLiterals = 0;
3222  uint32_t LiteralValue;
3223 
3224  for (int OpIdx : OpIndices) {
3225  if (OpIdx == -1) break;
3226 
3227  const MCOperand &MO = Inst.getOperand(OpIdx);
3228  if (MO.isImm() &&
3229  // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3230  AMDGPU::isSISrcOperand(Desc, OpIdx) &&
3231  !isInlineConstant(Inst, OpIdx)) {
3232  uint32_t Value = static_cast<uint32_t>(MO.getImm());
3233  if (NumLiterals == 0 || LiteralValue != Value) {
3234  LiteralValue = Value;
3235  ++NumLiterals;
3236  }
3237  }
3238  }
3239 
3240  return NumLiterals <= 1;
3241 }
3242 
3243 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3244  const unsigned Opc = Inst.getOpcode();
3245  if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3246  Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3247  int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3248  unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3249 
3250  if (OpSel & ~3)
3251  return false;
3252  }
3253  return true;
3254 }
3255 
3256 // Check if VCC register matches wavefront size
3257 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3258  auto FB = getFeatureBits();
3259  return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3260  (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3261 }
3262 
3263 // VOP3 literal is only allowed in GFX10+ and only one can be used
3264 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
3265  unsigned Opcode = Inst.getOpcode();
3266  const MCInstrDesc &Desc = MII.get(Opcode);
3268  return true;
3269 
3270  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3271  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3272  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3273 
3274  const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3275 
3276  unsigned NumLiterals = 0;
3277  uint32_t LiteralValue;
3278 
3279  for (int OpIdx : OpIndices) {
3280  if (OpIdx == -1) break;
3281 
3282  const MCOperand &MO = Inst.getOperand(OpIdx);
3283  if (!MO.isImm() || !AMDGPU::isSISrcOperand(Desc, OpIdx))
3284  continue;
3285 
3286  if (!isInlineConstant(Inst, OpIdx)) {
3287  uint32_t Value = static_cast<uint32_t>(MO.getImm());
3288  if (NumLiterals == 0 || LiteralValue != Value) {
3289  LiteralValue = Value;
3290  ++NumLiterals;
3291  }
3292  }
3293  }
3294 
3295  return !NumLiterals ||
3296  (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
3297 }
3298 
3299 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3300  const SMLoc &IDLoc,
3301  const OperandVector &Operands) {
3302  if (!validateLdsDirect(Inst)) {
3303  Error(IDLoc,
3304  "invalid use of lds_direct");
3305  return false;
3306  }
3307  if (!validateSOPLiteral(Inst)) {
3308  Error(IDLoc,
3309  "only one literal operand is allowed");
3310  return false;
3311  }
3312  if (!validateVOP3Literal(Inst)) {
3313  Error(IDLoc,
3314  "invalid literal operand");
3315  return false;
3316  }
3317  if (!validateConstantBusLimitations(Inst)) {
3318  Error(IDLoc,
3319  "invalid operand (violates constant bus restrictions)");
3320  return false;
3321  }
3322  if (!validateEarlyClobberLimitations(Inst)) {
3323  Error(IDLoc,
3324  "destination must be different than all sources");
3325  return false;
3326  }
3327  if (!validateIntClampSupported(Inst)) {
3328  Error(IDLoc,
3329  "integer clamping is not supported on this GPU");
3330  return false;
3331  }
3332  if (!validateOpSel(Inst)) {
3333  Error(IDLoc,
3334  "invalid op_sel operand");
3335  return false;
3336  }
3337  // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3338  if (!validateMIMGD16(Inst)) {
3339  Error(IDLoc,
3340  "d16 modifier is not supported on this GPU");
3341  return false;
3342  }
3343  if (!validateMIMGDim(Inst)) {
3344  Error(IDLoc, "dim modifier is required on this GPU");
3345  return false;
3346  }
3347  if (!validateMIMGDataSize(Inst)) {
3348  Error(IDLoc,
3349  "image data size does not match dmask and tfe");
3350  return false;
3351  }
3352  if (!validateMIMGAddrSize(Inst)) {
3353  Error(IDLoc,
3354  "image address size does not match dim and a16");
3355  return false;
3356  }
3357  if (!validateMIMGAtomicDMask(Inst)) {
3358  Error(IDLoc,
3359  "invalid atomic image dmask");
3360  return false;
3361  }
3362  if (!validateMIMGGatherDMask(Inst)) {
3363  Error(IDLoc,
3364  "invalid image_gather dmask: only one bit must be set");
3365  return false;
3366  }
3367  if (!validateFlatOffset(Inst, Operands)) {
3368  return false;
3369  }
3370 
3371  return true;
3372 }
3373 
3374 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3375  const FeatureBitset &FBS,
3376  unsigned VariantID = 0);
3377 
3378 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3379  OperandVector &Operands,
3380  MCStreamer &Out,
3381  uint64_t &ErrorInfo,
3382  bool MatchingInlineAsm) {
3383  MCInst Inst;
3384  unsigned Result = Match_Success;
3385  for (auto Variant : getMatchedVariants()) {
3386  uint64_t EI;
3387  auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3388  Variant);
3389  // We order match statuses from least to most specific. We use most specific
3390  // status as resulting
3391  // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3392  if ((R == Match_Success) ||
3393  (R == Match_PreferE32) ||
3394  (R == Match_MissingFeature && Result != Match_PreferE32) ||
3395  (R == Match_InvalidOperand && Result != Match_MissingFeature
3396  && Result != Match_PreferE32) ||
3397  (R == Match_MnemonicFail && Result != Match_InvalidOperand
3398  && Result != Match_MissingFeature
3399  && Result != Match_PreferE32)) {
3400  Result = R;
3401  ErrorInfo = EI;
3402  }
3403  if (R == Match_Success)
3404  break;
3405  }
3406 
3407  switch (Result) {
3408  default: break;
3409  case Match_Success:
3410  if (!validateInstruction(Inst, IDLoc, Operands)) {
3411  return true;
3412  }
3413  Inst.setLoc(IDLoc);
3414  Out.EmitInstruction(Inst, getSTI());
3415  return false;
3416 
3417  case Match_MissingFeature:
3418  return Error(IDLoc, "instruction not supported on this GPU");
3419 
3420  case Match_MnemonicFail: {
3421  FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3422  std::string Suggestion = AMDGPUMnemonicSpellCheck(
3423  ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
3424  return Error(IDLoc, "invalid instruction" + Suggestion,
3425  ((AMDGPUOperand &)*Operands[0]).getLocRange());
3426  }
3427 
3428  case Match_InvalidOperand: {
3429  SMLoc ErrorLoc = IDLoc;
3430  if (ErrorInfo != ~0ULL) {
3431  if (ErrorInfo >= Operands.size()) {
3432  return Error(IDLoc, "too few operands for instruction");
3433  }
3434  ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3435  if (ErrorLoc == SMLoc())
3436  ErrorLoc = IDLoc;
3437  }
3438  return Error(ErrorLoc, "invalid operand for instruction");
3439  }
3440 
3441  case Match_PreferE32:
3442  return Error(IDLoc, "internal error: instruction without _e64 suffix "
3443  "should be encoded as e32");
3444  }
3445  llvm_unreachable("Implement any new match types added!");
3446 }
3447 
3448 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3449  int64_t Tmp = -1;
3450  if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3451  return true;
3452  }
3453  if (getParser().parseAbsoluteExpression(Tmp)) {
3454  return true;
3455  }
3456  Ret = static_cast<uint32_t>(Tmp);
3457  return false;
3458 }
3459 
3460 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3461  uint32_t &Minor) {
3462  if (ParseAsAbsoluteExpression(Major))
3463  return TokError("invalid major version");
3464 
3465  if (getLexer().isNot(AsmToken::Comma))
3466  return TokError("minor version number required, comma expected");
3467  Lex();
3468 
3469  if (ParseAsAbsoluteExpression(Minor))
3470  return TokError("invalid minor version");
3471 
3472  return false;
3473 }
3474 
3475 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3476  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3477  return TokError("directive only supported for amdgcn architecture");
3478 
3479  std::string Target;
3480 
3481  SMLoc TargetStart = getTok().getLoc();
3482  if (getParser().parseEscapedString(Target))
3483  return true;
3484  SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
3485 
3486  std::string ExpectedTarget;
3487  raw_string_ostream ExpectedTargetOS(ExpectedTarget);
3488  IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
3489 
3490  if (Target != ExpectedTargetOS.str())
3491  return getParser().Error(TargetRange.Start, "target must match options",
3492  TargetRange);
3493 
3494  getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
3495  return false;
3496 }
3497 
3498 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3499  return getParser().Error(Range.Start, "value out of range", Range);
3500 }
3501 
3502 bool AMDGPUAsmParser::calculateGPRBlocks(
3503  const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
3504  bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
3505  SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
3506  unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
3507  // TODO(scott.linder): These calculations are duplicated from
3508  // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3509  IsaVersion Version = getIsaVersion(getSTI().getCPU());
3510 
3511  unsigned NumVGPRs = NextFreeVGPR;
3512  unsigned NumSGPRs = NextFreeSGPR;
3513 
3514  if (Version.Major >= 10)
3515  NumSGPRs = 0;
3516  else {
3517  unsigned MaxAddressableNumSGPRs =
3519 
3520  if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
3521  NumSGPRs > MaxAddressableNumSGPRs)
3522  return OutOfRangeError(SGPRRange);
3523 
3524  NumSGPRs +=
3525  IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
3526 
3527  if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
3528  NumSGPRs > MaxAddressableNumSGPRs)
3529  return OutOfRangeError(SGPRRange);
3530 
3531  if (Features.test(FeatureSGPRInitBug))
3533  }
3534 
3535  VGPRBlocks =
3536  IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
3537  SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
3538 
3539  return false;
3540 }
3541 
3542 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
3543  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3544  return TokError("directive only supported for amdgcn architecture");
3545 
3546  if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
3547  return TokError("directive only supported for amdhsa OS");
3548 
3549  StringRef KernelName;
3550  if (getParser().parseIdentifier(KernelName))
3551  return true;
3552 
3554 
3555  StringSet<> Seen;
3556 
3557  IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
3558 
3559  SMRange VGPRRange;
3560  uint64_t NextFreeVGPR = 0;
3561  SMRange SGPRRange;
3562  uint64_t NextFreeSGPR = 0;
3563  unsigned UserSGPRCount = 0;
3564  bool ReserveVCC = true;
3565  bool ReserveFlatScr = true;
3566  bool ReserveXNACK = hasXNACK();
3567  Optional<bool> EnableWavefrontSize32;
3568 
3569  while (true) {
3570  while (getLexer().is(AsmToken::EndOfStatement))
3571  Lex();
3572 
3573  if (getLexer().isNot(AsmToken::Identifier))
3574  return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
3575 
3576  StringRef ID = getTok().getIdentifier();
3577  SMRange IDRange = getTok().getLocRange();
3578  Lex();
3579 
3580  if (ID == ".end_amdhsa_kernel")
3581  break;
3582 
3583  if (Seen.find(ID) != Seen.end())
3584  return TokError(".amdhsa_ directives cannot be repeated");
3585  Seen.insert(ID);
3586 
3587  SMLoc ValStart = getTok().getLoc();
3588  int64_t IVal;
3589  if (getParser().parseAbsoluteExpression(IVal))
3590  return true;
3591  SMLoc ValEnd = getTok().getLoc();
3592  SMRange ValRange = SMRange(ValStart, ValEnd);
3593 
3594  if (IVal < 0)
3595  return OutOfRangeError(ValRange);
3596 
3597  uint64_t Val = IVal;
3598 
3599 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
3600  if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
3601  return OutOfRangeError(RANGE); \
3602  AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3603 
3604  if (ID == ".amdhsa_group_segment_fixed_size") {
3605  if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3606  return OutOfRangeError(ValRange);
3607  KD.group_segment_fixed_size = Val;
3608  } else if (ID == ".amdhsa_private_segment_fixed_size") {
3609  if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3610  return OutOfRangeError(ValRange);
3611  KD.private_segment_fixed_size = Val;
3612  } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3614  KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3615  Val, ValRange);
3616  UserSGPRCount += 4;
3617  } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3619  KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3620  ValRange);
3621  UserSGPRCount += 2;
3622  } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
3624  KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
3625  ValRange);
3626  UserSGPRCount += 2;
3627  } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3629  KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
3630  Val, ValRange);
3631  UserSGPRCount += 2;
3632  } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
3634  KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
3635  ValRange);
3636  UserSGPRCount += 2;
3637  } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
3639  KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
3640  ValRange);
3641  UserSGPRCount += 2;
3642  } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
3644  KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
3645  Val, ValRange);
3646  UserSGPRCount += 1;
3647  } else if (ID == ".amdhsa_wavefront_size32") {
3648  if (IVersion.Major < 10)
3649  return getParser().Error(IDRange.Start, "directive requires gfx10+",
3650  IDRange);
3651  EnableWavefrontSize32 = Val;
3653  KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
3654  Val, ValRange);
3655  } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3657  KD.compute_pgm_rsrc2,
3658  COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
3659  ValRange);
3660  } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
3662  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
3663  ValRange);
3664  } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
3666  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
3667  ValRange);
3668  } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
3670  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
3671  ValRange);
3672  } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
3674  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
3675  ValRange);
3676  } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
3678  COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
3679  ValRange);
3680  } else if (ID == ".amdhsa_next_free_vgpr") {
3681  VGPRRange = ValRange;
3682  NextFreeVGPR = Val;
3683  } else if (ID == ".amdhsa_next_free_sgpr") {
3684  SGPRRange = ValRange;
3685  NextFreeSGPR = Val;
3686  } else if (ID == ".amdhsa_reserve_vcc") {
3687  if (!isUInt<1>(Val))
3688  return OutOfRangeError(ValRange);
3689  ReserveVCC = Val;
3690  } else if (ID == ".amdhsa_reserve_flat_scratch") {
3691  if (IVersion.Major < 7)
3692  return getParser().Error(IDRange.Start, "directive requires gfx7+",
3693  IDRange);
3694  if (!isUInt<1>(Val))
3695  return OutOfRangeError(ValRange);
3696  ReserveFlatScr = Val;
3697  } else if (ID == ".amdhsa_reserve_xnack_mask") {
3698  if (IVersion.Major < 8)
3699  return getParser().Error(IDRange.Start, "directive requires gfx8+",
3700  IDRange);
3701  if (!isUInt<1>(Val))
3702  return OutOfRangeError(ValRange);
3703  ReserveXNACK = Val;
3704  } else if (ID == ".amdhsa_float_round_mode_32") {
3706  COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
3707  } else if (ID == ".amdhsa_float_round_mode_16_64") {
3709  COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
3710  } else if (ID == ".amdhsa_float_denorm_mode_32") {
3712  COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
3713  } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
3715  COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
3716  ValRange);
3717  } else if (ID == ".amdhsa_dx10_clamp") {
3719  COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
3720  } else if (ID == ".amdhsa_ieee_mode") {
3721  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
3722  Val, ValRange);
3723  } else if (ID == ".amdhsa_fp16_overflow") {
3724  if (IVersion.Major < 9)
3725  return getParser().Error(IDRange.Start, "directive requires gfx9+",
3726  IDRange);
3727  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
3728  ValRange);
3729  } else if (ID == ".amdhsa_workgroup_processor_mode") {
3730  if (IVersion.Major < 10)
3731  return getParser().Error(IDRange.Start, "directive requires gfx10+",
3732  IDRange);
3733  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
3734  ValRange);
3735  } else if (ID == ".amdhsa_memory_ordered") {
3736  if (IVersion.Major < 10)
3737  return getParser().Error(IDRange.Start, "directive requires gfx10+",
3738  IDRange);
3739  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
3740  ValRange);
3741  } else if (ID == ".amdhsa_forward_progress") {
3742  if (IVersion.Major < 10)
3743  return getParser().Error(IDRange.Start, "directive requires gfx10+",
3744  IDRange);
3745  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
3746  ValRange);
3747  } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
3749  KD.compute_pgm_rsrc2,
3750  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
3751  ValRange);
3752  } else if (ID == ".amdhsa_exception_fp_denorm_src") {
3754  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
3755  Val, ValRange);
3756  } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
3758  KD.compute_pgm_rsrc2,
3759  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
3760  ValRange);
3761  } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
3763  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
3764  Val, ValRange);
3765  } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
3767  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
3768  Val, ValRange);
3769  } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
3771  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
3772  Val, ValRange);
3773  } else if (ID == ".amdhsa_exception_int_div_zero") {
3775  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
3776  Val, ValRange);
3777  } else {
3778  return getParser().Error(IDRange.Start,
3779  "unknown .amdhsa_kernel directive", IDRange);
3780  }
3781 
3782 #undef PARSE_BITS_ENTRY
3783  }
3784 
3785  if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
3786  return TokError(".amdhsa_next_free_vgpr directive is required");
3787 
3788  if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
3789  return TokError(".amdhsa_next_free_sgpr directive is required");
3790 
3791  unsigned VGPRBlocks;
3792  unsigned SGPRBlocks;
3793  if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
3794  ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
3795  VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
3796  SGPRBlocks))
3797  return true;
3798 
3799  if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
3800  VGPRBlocks))
3801  return OutOfRangeError(VGPRRange);
3803  COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
3804 
3805  if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
3806  SGPRBlocks))
3807  return OutOfRangeError(SGPRRange);
3809  COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
3810  SGPRBlocks);
3811 
3812  if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
3813  return TokError("too many user SGPRs enabled");
3814  AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
3815  UserSGPRCount);
3816 
3817  getTargetStreamer().EmitAmdhsaKernelDescriptor(
3818  getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
3819  ReserveFlatScr, ReserveXNACK);
3820  return false;
3821 }
3822 
3823 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
3824  uint32_t Major;
3825  uint32_t Minor;
3826 
3827  if (ParseDirectiveMajorMinor(Major, Minor))
3828  return true;
3829 
3830  getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
3831  return false;
3832 }
3833 
3834 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
3835  uint32_t Major;
3836  uint32_t Minor;
3837  uint32_t Stepping;
3838  StringRef VendorName;
3839  StringRef ArchName;
3840 
3841  // If this directive has no arguments, then use the ISA version for the
3842  // targeted GPU.
3843  if (getLexer().is(AsmToken::EndOfStatement)) {
3844  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3845  getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
3846  ISA.Stepping,
3847  "AMD", "AMDGPU");
3848  return false;
3849  }
3850 
3851  if (ParseDirectiveMajorMinor(Major, Minor))
3852  return true;
3853 
3854  if (getLexer().isNot(AsmToken::Comma))
3855  return TokError("stepping version number required, comma expected");
3856  Lex();
3857 
3858  if (ParseAsAbsoluteExpression(Stepping))
3859  return TokError("invalid stepping version");
3860 
3861  if (getLexer().isNot(AsmToken::Comma))
3862  return TokError("vendor name required, comma expected");
3863  Lex();
3864 
3865  if (getLexer().isNot(AsmToken::String))
3866  return TokError("invalid vendor name");
3867 
3868  VendorName = getLexer().getTok().getStringContents();
3869  Lex();
3870 
3871  if (getLexer().isNot(AsmToken::Comma))
3872  return TokError("arch name required, comma expected");
3873  Lex();
3874 
3875  if (getLexer().isNot(AsmToken::String))
3876  return TokError("invalid arch name");
3877 
3878  ArchName = getLexer().getTok().getStringContents();
3879  Lex();
3880 
3881  getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
3882  VendorName, ArchName);
3883  return false;
3884 }
3885 
3886 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
3887  amd_kernel_code_t &Header) {
3888  // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
3889  // assembly for backwards compatibility.
3890  if (ID == "max_scratch_backing_memory_byte_size") {
3891  Parser.eatToEndOfStatement();
3892  return false;
3893  }
3894 
3895  SmallString<40> ErrStr;
3896  raw_svector_ostream Err(ErrStr);
3897  if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
3898  return TokError(Err.str());
3899  }
3900  Lex();
3901 
3902  if (ID == "enable_wavefront_size32") {
3904  if (!isGFX10())
3905  return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
3906  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
3907  return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
3908  } else {
3909  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
3910  return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
3911  }
3912  }
3913 
3914  if (ID == "wavefront_size") {
3915  if (Header.wavefront_size == 5) {
3916  if (!isGFX10())
3917  return TokError("wavefront_size=5 is only allowed on GFX10+");
3918  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
3919  return TokError("wavefront_size=5 requires +WavefrontSize32");
3920  } else if (Header.wavefront_size == 6) {
3921  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
3922  return TokError("wavefront_size=6 requires +WavefrontSize64");
3923  }
3924  }
3925 
3926  if (ID == "enable_wgp_mode") {
3928  return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
3929  }
3930 
3931  if (ID == "enable_mem_ordered") {
3933  return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
3934  }
3935 
3936  if (ID == "enable_fwd_progress") {
3938  return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
3939  }
3940 
3941  return false;
3942 }
3943 
3944 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
3945  amd_kernel_code_t Header;
3946  AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
3947 
3948  while (true) {
3949  // Lex EndOfStatement. This is in a while loop, because lexing a comment
3950  // will set the current token to EndOfStatement.
3951  while(getLexer().is(AsmToken::EndOfStatement))
3952  Lex();
3953 
3954  if (getLexer().isNot(AsmToken::Identifier))
3955  return TokError("expected value identifier or .end_amd_kernel_code_t");
3956 
3957  StringRef ID = getLexer().getTok().getIdentifier();
3958  Lex();
3959 
3960  if (ID == ".end_amd_kernel_code_t")
3961  break;
3962 
3963  if (ParseAMDKernelCodeTValue(ID, Header))
3964  return true;
3965  }
3966 
3967  getTargetStreamer().EmitAMDKernelCodeT(Header);
3968 
3969  return false;
3970 }
3971 
3972 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
3973  if (getLexer().isNot(AsmToken::Identifier))
3974  return TokError("expected symbol name");
3975 
3976  StringRef KernelName = Parser.getTok().getString();
3977 
3978  getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
3980  Lex();
3981  if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
3982  KernelScope.initialize(getContext());
3983  return false;
3984 }
3985 
3986 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
3987  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
3988  return Error(getParser().getTok().getLoc(),
3989  ".amd_amdgpu_isa directive is not available on non-amdgcn "
3990  "architectures");
3991  }
3992 
3993  auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
3994 
3995  std::string ISAVersionStringFromSTI;
3996  raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
3997  IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
3998 
3999  if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4000  return Error(getParser().getTok().getLoc(),
4001  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4002  "arguments specified through the command line");
4003  }
4004 
4005  getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4006  Lex();
4007 
4008  return false;
4009 }
4010 
4011 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4012  const char *AssemblerDirectiveBegin;
4013  const char *AssemblerDirectiveEnd;
4014  std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4016  ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4018  : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4020 
4021  if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4022  return Error(getParser().getTok().getLoc(),
4023  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4024  "not available on non-amdhsa OSes")).str());
4025  }
4026 
4027  std::string HSAMetadataString;
4028  if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4029  HSAMetadataString))
4030  return true;
4031 
4032  if (IsaInfo::hasCodeObjectV3(&getSTI())) {
4033  if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4034  return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4035  } else {
4036  if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4037  return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4038  }
4039 
4040  return false;
4041 }
4042 
4043 /// Common code to parse out a block of text (typically YAML) between start and
4044 /// end directives.
4045 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4046  const char *AssemblerDirectiveEnd,
4047  std::string &CollectString) {
4048 
4049  raw_string_ostream CollectStream(CollectString);
4050 
4051  getLexer().setSkipSpace(false);
4052 
4053  bool FoundEnd = false;
4054  while (!getLexer().is(AsmToken::Eof)) {
4055  while (getLexer().is(AsmToken::Space)) {
4056  CollectStream << getLexer().getTok().getString();
4057  Lex();
4058  }
4059 
4060  if (getLexer().is(AsmToken::Identifier)) {
4061  StringRef ID = getLexer().getTok().getIdentifier();
4062  if (ID == AssemblerDirectiveEnd) {
4063  Lex();
4064  FoundEnd = true;
4065  break;
4066  }
4067  }
4068 
4069  CollectStream << Parser.parseStringToEndOfStatement()
4070  << getContext().getAsmInfo()->getSeparatorString();
4071 
4072  Parser.eatToEndOfStatement();
4073  }
4074 
4075  getLexer().setSkipSpace(true);
4076 
4077  if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
4078  return TokError(Twine("expected directive ") +
4079  Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4080  }
4081 
4082  CollectStream.flush();
4083  return false;
4084 }
4085 
4086 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4087 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4088  std::string String;
4089  if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4091  return true;
4092 
4093  auto PALMetadata = getTargetStreamer().getPALMetadata();
4094  if (!PALMetadata->setFromString(String))
4095  return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
4096  return false;
4097 }
4098 
4099 /// Parse the assembler directive for old linear-format PAL metadata.
4100 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4101  if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4102  return Error(getParser().getTok().getLoc(),
4103  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4104  "not available on non-amdpal OSes")).str());
4105  }
4106 
4107  auto PALMetadata = getTargetStreamer().getPALMetadata();
4108  PALMetadata->setLegacy();
4109  for (;;) {
4110  uint32_t Key, Value;
4111  if (ParseAsAbsoluteExpression(Key)) {
4112  return TokError(Twine("invalid value in ") +
4114  }
4115  if (getLexer().isNot(AsmToken::Comma)) {
4116  return TokError(Twine("expected an even number of values in ") +
4118  }
4119  Lex();
4120  if (ParseAsAbsoluteExpression(Value)) {
4121  return TokError(Twine("invalid value in ") +
4123  }
4124  PALMetadata->setRegister(Key, Value);
4125  if (getLexer().isNot(AsmToken::Comma))
4126  break;
4127  Lex();
4128  }
4129  return false;
4130 }
4131 
4132 /// ParseDirectiveAMDGPULDS
4133 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
4134 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4135  if (getParser().checkForValidSection())
4136  return true;
4137 
4138  StringRef Name;
4139  SMLoc NameLoc = getLexer().getLoc();
4140  if (getParser().parseIdentifier(Name))
4141  return TokError("expected identifier in directive");
4142 
4143  MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
4144  if (parseToken(AsmToken::Comma, "expected ','"))
4145  return true;
4146 
4147  unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4148 
4149  int64_t Size;
4150  SMLoc SizeLoc = getLexer().getLoc();
4151  if (getParser().parseAbsoluteExpression(Size))
4152  return true;
4153  if (Size < 0)
4154  return Error(SizeLoc, "size must be non-negative");
4155  if (Size > LocalMemorySize)
4156  return Error(SizeLoc, "size is too large");
4157 
4158  int64_t Align = 4;
4159  if (getLexer().is(AsmToken::Comma)) {
4160  Lex();
4161  SMLoc AlignLoc = getLexer().getLoc();
4162  if (getParser().parseAbsoluteExpression(Align))
4163  return true;
4164  if (Align < 0 || !isPowerOf2_64(Align))
4165  return Error(AlignLoc, "alignment must be a power of two");
4166 
4167  // Alignment larger than the size of LDS is possible in theory, as long
4168  // as the linker manages to place to symbol at address 0, but we do want
4169  // to make sure the alignment fits nicely into a 32-bit integer.
4170  if (Align >= 1u << 31)
4171  return Error(AlignLoc, "alignment is too large");
4172  }
4173 
4174  if (parseToken(AsmToken::EndOfStatement,
4175  "unexpected token in '.amdgpu_lds' directive"))
4176  return true;
4177 
4178  Symbol->redefineIfPossible();
4179  if (!Symbol->isUndefined())
4180  return Error(NameLoc, "invalid symbol redefinition");
4181 
4182  getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align);
4183  return false;
4184 }
4185 
4186 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
4187  StringRef IDVal = DirectiveID.getString();
4188 
4189  if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
4190  if (IDVal == ".amdgcn_target")
4191  return ParseDirectiveAMDGCNTarget();
4192 
4193  if (IDVal == ".amdhsa_kernel")
4194  return ParseDirectiveAMDHSAKernel();
4195 
4196  // TODO: Restructure/combine with PAL metadata directive.
4198  return ParseDirectiveHSAMetadata();
4199  } else {
4200  if (IDVal == ".hsa_code_object_version")
4201  return ParseDirectiveHSACodeObjectVersion();
4202 
4203  if (IDVal == ".hsa_code_object_isa")
4204  return ParseDirectiveHSACodeObjectISA();
4205 
4206  if (IDVal == ".amd_kernel_code_t")
4207  return ParseDirectiveAMDKernelCodeT();
4208 
4209  if (IDVal == ".amdgpu_hsa_kernel")
4210  return ParseDirectiveAMDGPUHsaKernel();
4211 
4212  if (IDVal == ".amd_amdgpu_isa")
4213  return ParseDirectiveISAVersion();
4214 
4216  return ParseDirectiveHSAMetadata();
4217  }
4218 
4219  if (IDVal == ".amdgpu_lds")
4220  return ParseDirectiveAMDGPULDS();
4221 
4222  if (IDVal == PALMD::AssemblerDirectiveBegin)
4223  return ParseDirectivePALMetadataBegin();
4224 
4225  if (IDVal == PALMD::AssemblerDirective)
4226  return ParseDirectivePALMetadata();
4227 
4228  return true;
4229 }
4230 
4231 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4232  unsigned RegNo) const {
4233 
4234  for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4235  R.isValid(); ++R) {
4236  if (*R == RegNo)
4237  return isGFX9() || isGFX10();
4238  }
4239 
4240  // GFX10 has 2 more SGPRs 104 and 105.
4241  for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4242  R.isValid(); ++R) {
4243  if (*R == RegNo)
4244  return hasSGPR104_SGPR105();
4245  }
4246 
4247  switch (RegNo) {
4248  case AMDGPU::SRC_SHARED_BASE:
4249  case AMDGPU::SRC_SHARED_LIMIT:
4250  case AMDGPU::SRC_PRIVATE_BASE:
4251  case AMDGPU::SRC_PRIVATE_LIMIT:
4252  case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4253  return !isCI() && !isSI() && !isVI();
4254  case AMDGPU::TBA:
4255  case AMDGPU::TBA_LO:
4256  case AMDGPU::TBA_HI:
4257  case AMDGPU::TMA:
4258  case AMDGPU::TMA_LO:
4259  case AMDGPU::TMA_HI:
4260  return !isGFX9() && !isGFX10();
4261  case AMDGPU::XNACK_MASK:
4262  case AMDGPU::XNACK_MASK_LO:
4263  case AMDGPU::XNACK_MASK_HI:
4264  return !isCI() && !isSI() && !isGFX10() && hasXNACK();
4265  case AMDGPU::SGPR_NULL:
4266  return isGFX10();
4267  default:
4268  break;
4269  }
4270 
4271  if (isCI())
4272  return true;
4273 
4274  if (isSI() || isGFX10()) {
4275  // No flat_scr on SI.
4276  // On GFX10 flat scratch is not a valid register operand and can only be
4277  // accessed with s_setreg/s_getreg.
4278  switch (RegNo) {
4279  case AMDGPU::FLAT_SCR:
4280  case AMDGPU::FLAT_SCR_LO:
4281  case AMDGPU::FLAT_SCR_HI:
4282  return false;
4283  default:
4284  return true;
4285  }
4286  }
4287 
4288  // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4289  // SI/CI have.
4290  for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4291  R.isValid(); ++R) {
4292  if (*R == RegNo)
4293  return hasSGPR102_SGPR103();
4294  }
4295 
4296  return true;
4297 }
4298 
4300 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4301  OperandMode Mode) {
4302  // Try to parse with a custom parser
4303  OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4304 
4305  // If we successfully parsed the operand or if there as an error parsing,
4306  // we are done.
4307  //
4308  // If we are parsing after we reach EndOfStatement then this means we
4309  // are appending default values to the Operands list. This is only done
4310  // by custom parser, so we shouldn't continue on to the generic parsing.
4311  if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4312  getLexer().is(AsmToken::EndOfStatement))
4313  return ResTy;
4314 
4315  if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
4316  unsigned Prefix = Operands.size();
4317  SMLoc LBraceLoc = getTok().getLoc();
4318  Parser.Lex(); // eat the '['
4319 
4320  for (;;) {
4321  ResTy = parseReg(Operands);
4322  if (ResTy != MatchOperand_Success)
4323  return ResTy;
4324 
4325  if (getLexer().is(AsmToken::RBrac))
4326  break;
4327 
4328  if (getLexer().isNot(AsmToken::Comma))
4329  return MatchOperand_ParseFail;
4330  Parser.Lex();
4331  }
4332 
4333  if (Operands.size() - Prefix > 1) {
4334  Operands.insert(Operands.begin() + Prefix,
4335  AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4336  Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
4337  getTok().getLoc()));
4338  }
4339 
4340  Parser.Lex(); // eat the ']'
4341  return MatchOperand_Success;
4342  }
4343 
4344  return parseRegOrImm(Operands);
4345 }
4346 
4347 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4348  // Clear any forced encodings from the previous instruction.
4349  setForcedEncodingSize(0);
4350  setForcedDPP(false);
4351  setForcedSDWA(false);
4352 
4353  if (Name.endswith("_e64")) {
4354  setForcedEncodingSize(64);
4355  return Name.substr(0, Name.size() - 4);
4356  } else if (Name.endswith("_e32")) {
4357  setForcedEncodingSize(32);
4358  return Name.substr(0, Name.size() - 4);
4359  } else if (Name.endswith("_dpp")) {
4360  setForcedDPP(true);
4361  return Name.substr(0, Name.size() - 4);
4362  } else if (Name.endswith("_sdwa")) {
4363  setForcedSDWA(true);
4364  return Name.substr(0, Name.size() - 5);
4365  }
4366  return Name;
4367 }
4368 
4369 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4370  StringRef Name,
4371  SMLoc NameLoc, OperandVector &Operands) {
4372  // Add the instruction mnemonic
4373  Name = parseMnemonicSuffix(Name);
4374  Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4375 
4376  bool IsMIMG = Name.startswith("image_");
4377 
4378  while (!getLexer().is(AsmToken::EndOfStatement)) {
4379  OperandMode Mode = OperandMode_Default;
4380  if (IsMIMG && isGFX10() && Operands.size() == 2)
4381  Mode = OperandMode_NSA;
4382  OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4383 
4384  // Eat the comma or space if there is one.
4385  if (getLexer().is(AsmToken::Comma))
4386  Parser.Lex();
4387 
4388  switch (Res) {
4389  case MatchOperand_Success: break;
4391  // FIXME: use real operand location rather than the current location.
4392  Error(getLexer().getLoc(), "failed parsing operand.");
4393  while (!getLexer().is(AsmToken::EndOfStatement)) {
4394  Parser.Lex();
4395  }
4396  return true;
4397  case MatchOperand_NoMatch:
4398  // FIXME: use real operand location rather than the current location.
4399  Error(getLexer().getLoc(), "not a valid operand.");
4400  while (!getLexer().is(AsmToken::EndOfStatement)) {
4401  Parser.Lex();
4402  }
4403  return true;
4404  }
4405  }
4406 
4407  return false;
4408 }
4409 
4410 //===----------------------------------------------------------------------===//
4411 // Utility functions
4412 //===----------------------------------------------------------------------===//
4413 
4415 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4416 
4417  if (!trySkipId(Prefix, AsmToken::Colon))
4418  return MatchOperand_NoMatch;
4419 
4420  return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4421 }
4422 
4424 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4425  AMDGPUOperand::ImmTy ImmTy,
4426  bool (*ConvertResult)(int64_t&)) {
4427  SMLoc S = getLoc();
4428  int64_t Value = 0;
4429 
4430  OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4431  if (Res != MatchOperand_Success)
4432  return Res;
4433 
4434  if (ConvertResult && !ConvertResult(Value)) {
4435  Error(S, "invalid " + StringRef(Prefix) + " value.");
4436  }
4437 
4438  Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4439  return MatchOperand_Success;
4440 }
4441 
4443 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
4444  OperandVector &Operands,
4445  AMDGPUOperand::ImmTy ImmTy,
4446  bool (*ConvertResult)(int64_t&)) {
4447  SMLoc S = getLoc();
4448  if (!trySkipId(Prefix, AsmToken::Colon))
4449  return MatchOperand_NoMatch;
4450 
4451  if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
4452  return MatchOperand_ParseFail;
4453 
4454  unsigned Val = 0;
4455  const unsigned MaxSize = 4;
4456 
4457  // FIXME: How to verify the number of elements matches the number of src
4458  // operands?
4459  for (int I = 0; ; ++I) {
4460  int64_t Op;
4461  SMLoc Loc = getLoc();
4462  if (!parseExpr(Op))
4463  return MatchOperand_ParseFail;
4464 
4465  if (Op != 0 && Op != 1) {
4466  Error(Loc, "invalid " + StringRef(Prefix) + " value.");
4467  return MatchOperand_ParseFail;
4468  }
4469 
4470  Val |= (Op << I);
4471 
4472  if (trySkipToken(AsmToken::RBrac))
4473  break;
4474 
4475  if (I + 1 == MaxSize) {
4476  Error(getLoc(), "expected a closing square bracket");
4477  return MatchOperand_ParseFail;
4478  }
4479 
4480  if (!skipToken(AsmToken::Comma, "expected a comma"))
4481  return MatchOperand_ParseFail;
4482  }
4483 
4484  Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
4485  return MatchOperand_Success;
4486 }
4487 
4489 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
4490  AMDGPUOperand::ImmTy ImmTy) {
4491  int64_t Bit = 0;
4492  SMLoc S = Parser.getTok().getLoc();
4493 
4494  // We are at the end of the statement, and this is a default argument, so
4495  // use a default value.
4496  if (getLexer().isNot(AsmToken::EndOfStatement)) {
4497  switch(getLexer().getKind()) {
4498  case AsmToken::Identifier: {
4499  StringRef Tok = Parser.getTok().getString();
4500  if (Tok == Name) {
4501  if (Tok == "r128" && isGFX9())
4502  Error(S, "r128 modifier is not supported on this GPU");
4503  if (Tok == "a16" && !isGFX9() && !isGFX10())
4504  Error(S, "a16 modifier is not supported on this GPU");
4505  Bit = 1;
4506  Parser.Lex();
4507  } else if (Tok.startswith("no") && Tok.endswith(Name)) {
4508  Bit = 0;
4509  Parser.Lex();
4510  } else {
4511  return MatchOperand_NoMatch;
4512  }
4513  break;
4514  }
4515  default:
4516  return MatchOperand_NoMatch;
4517  }
4518  }
4519 
4520  if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
4521  return MatchOperand_ParseFail;
4522 
4523  Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
4524  return MatchOperand_Success;
4525 }
4526 
4528  MCInst& Inst, const OperandVector& Operands,
4529  AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
4530  AMDGPUOperand::ImmTy ImmT,
4531  int64_t Default = 0) {
4532  auto i = OptionalIdx.find(ImmT);
4533  if (i != OptionalIdx.end()) {
4534  unsigned Idx = i->second;
4535  ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
4536  } else {
4537  Inst.addOperand(MCOperand::createImm(Default));
4538  }
4539 }
4540 
4542 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
4543  if (getLexer().isNot(AsmToken::Identifier)) {
4544  return MatchOperand_NoMatch;
4545  }
4546  StringRef Tok = Parser.getTok().getString();
4547  if (Tok != Prefix) {
4548  return MatchOperand_NoMatch;
4549  }
4550 
4551  Parser.Lex();
4552  if (getLexer().isNot(AsmToken::Colon)) {
4553  return MatchOperand_ParseFail;
4554  }
4555 
4556  Parser.Lex();
4557  if (getLexer().isNot(AsmToken::Identifier)) {
4558  return MatchOperand_ParseFail;
4559  }
4560 
4561  Value = Parser.getTok().getString();
4562  return MatchOperand_Success;
4563 }
4564 
4565 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
4566 // values to live in a joint format operand in the MCInst encoding.
4568 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
4569  SMLoc S = Parser.getTok().getLoc();
4570  int64_t Dfmt = 0, Nfmt = 0;
4571  // dfmt and nfmt can appear in either order, and each is optional.
4572  bool GotDfmt = false, GotNfmt = false;
4573  while (!GotDfmt || !GotNfmt) {
4574  if (!GotDfmt) {
4575  auto Res = parseIntWithPrefix("dfmt", Dfmt);
4576  if (Res != MatchOperand_NoMatch) {
4577  if (Res != MatchOperand_Success)
4578  return Res;
4579  if (Dfmt >= 16) {
4580  Error(Parser.getTok().getLoc(), "out of range dfmt");
4581  return MatchOperand_ParseFail;
4582  }
4583  GotDfmt = true;
4584  Parser.Lex();
4585  continue;
4586  }
4587  }
4588  if (!GotNfmt) {
4589  auto Res = parseIntWithPrefix("nfmt", Nfmt);
4590  if (Res != MatchOperand_NoMatch) {
4591  if (Res != MatchOperand_Success)
4592  return Res;
4593  if (Nfmt >= 8) {
4594  Error(Parser.getTok().getLoc(), "out of range nfmt");
4595  return MatchOperand_ParseFail;
4596  }
4597  GotNfmt = true;
4598  Parser.Lex();
4599  continue;
4600  }
4601  }
4602  break;
4603  }
4604  if (!GotDfmt && !GotNfmt)
4605  return MatchOperand_NoMatch;
4606  auto Format = Dfmt | Nfmt << 4;
4607  Operands.push_back(
4608  AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
4609  return MatchOperand_Success;
4610 }
4611 
4612 //===----------------------------------------------------------------------===//
4613 // ds
4614 //===----------------------------------------------------------------------===//
4615 
4616 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
4617  const OperandVector &Operands) {
4618  OptionalImmIndexMap OptionalIdx;
4619 
4620  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4621  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4622 
4623  // Add the register arguments
4624  if (Op.isReg()) {
4625  Op.addRegOperands(Inst, 1);
4626  continue;
4627  }
4628 
4629  // Handle optional arguments
4630  OptionalIdx[Op.getImmTy()] = i;
4631  }
4632 
4633  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
4634  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
4635  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4636 
4637  Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4638 }
4639 
4640 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
4641  bool IsGdsHardcoded) {
4642  OptionalImmIndexMap OptionalIdx;
4643 
4644  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4645  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4646 
4647  // Add the register arguments
4648  if (Op.isReg()) {
4649  Op.addRegOperands(Inst, 1);
4650  continue;
4651  }
4652 
4653  if (Op.isToken() && Op.getToken() == "gds") {
4654  IsGdsHardcoded = true;
4655  continue;
4656  }
4657 
4658  // Handle optional arguments
4659  OptionalIdx[Op.getImmTy()] = i;
4660  }
4661 
4662  AMDGPUOperand::ImmTy OffsetType =
4663  (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
4664  Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
4665  Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
4666  AMDGPUOperand::ImmTyOffset;
4667 
4668  addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
4669 
4670  if (!IsGdsHardcoded) {
4671  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4672  }
4673  Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4674 }
4675 
4676 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
4677  OptionalImmIndexMap OptionalIdx;
4678 
4679  unsigned OperandIdx[4];
4680  unsigned EnMask = 0;
4681  int SrcIdx = 0;
4682 
4683  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4684  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4685 
4686  // Add the register arguments
4687  if (Op.isReg()) {
4688  assert(SrcIdx < 4);
4689  OperandIdx[SrcIdx] = Inst.size();
4690  Op.addRegOperands(Inst, 1);
4691  ++SrcIdx;
4692  continue;
4693  }
4694 
4695  if (Op.isOff()) {
4696  assert(SrcIdx < 4);
4697  OperandIdx[SrcIdx] = Inst.size();
4698  Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
4699  ++SrcIdx;
4700  continue;
4701  }
4702 
4703  if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
4704  Op.addImmOperands(Inst, 1);
4705  continue;
4706  }
4707 
4708  if (Op.isToken() && Op.getToken() == "done")
4709  continue;
4710 
4711  // Handle optional arguments
4712  OptionalIdx[Op.getImmTy()] = i;
4713  }
4714 
4715  assert(SrcIdx == 4);
4716 
4717  bool Compr = false;
4718  if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
4719  Compr = true;
4720  Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
4721  Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
4722  Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
4723  }
4724 
4725  for (auto i = 0; i < SrcIdx; ++i) {
4726  if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
4727  EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
4728  }
4729  }
4730 
4731  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
4732  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
4733 
4734  Inst.addOperand(MCOperand::createImm(EnMask));
4735 }
4736 
4737 //===----------------------------------------------------------------------===//
4738 // s_waitcnt
4739 //===----------------------------------------------------------------------===//
4740 
4741 static bool
4743  const AMDGPU::IsaVersion ISA,
4744  int64_t &IntVal,
4745  int64_t CntVal,
4746  bool Saturate,
4747  unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
4748  unsigned (*decode)(const IsaVersion &Version, unsigned))
4749 {
4750  bool Failed = false;
4751 
4752  IntVal = encode(ISA, IntVal, CntVal);
4753  if (CntVal != decode(ISA, IntVal)) {
4754  if (Saturate) {
4755  IntVal = encode(ISA, IntVal, -1);
4756  } else {
4757  Failed = true;
4758  }
4759  }
4760  return Failed;
4761 }
4762 
4763 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
4764 
4765  SMLoc CntLoc = getLoc();
4766  StringRef CntName = getTokenStr();
4767 
4768  if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
4769  !skipToken(AsmToken::LParen, "expected a left parenthesis"))
4770  return false;
4771 
4772  int64_t CntVal;
4773  SMLoc ValLoc = getLoc();
4774  if (!parseExpr(CntVal))
4775  return false;
4776 
4777  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4778 
4779  bool Failed = true;
4780  bool Sat = CntName.endswith("_sat");
4781 
4782  if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
4783  Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
4784  } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
4785  Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
4786  } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
4787  Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
4788  } else {
4789  Error(CntLoc, "invalid counter name " + CntName);
4790  return false;
4791  }
4792 
4793  if (Failed) {
4794  Error(ValLoc, "too large value for " + CntName);
4795  return false;
4796  }
4797 
4798  if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
4799  return false;
4800 
4801  if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
4802  if (isToken(AsmToken::EndOfStatement)) {
4803  Error(getLoc(), "expected a counter name");
4804  return false;
4805  }
4806  }
4807 
4808  return true;
4809 }
4810 
4812 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
4813  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4814  int64_t Waitcnt = getWaitcntBitMask(ISA);
4815  SMLoc S = getLoc();
4816 
4817  // If parse failed, do not return error code
4818  // to avoid excessive error messages.
4819  if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
4820  while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement));
4821  } else {
4822  parseExpr(Waitcnt);
4823  }
4824 
4825  Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
4826  return MatchOperand_Success;
4827 }
4828 
4829 bool
4830 AMDGPUOperand::isSWaitCnt() const {
4831  return isImm();
4832 }
4833 
4834 //===----------------------------------------------------------------------===//
4835 // hwreg
4836 //===----------------------------------------------------------------------===//
4837 
4838 bool
4839 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
4840  int64_t &Offset,
4841  int64_t &Width) {
4842  using namespace llvm::AMDGPU::Hwreg;
4843 
4844  // The register may be specified by name or using a numeric code
4845  if (isToken(AsmToken::Identifier) &&
4846  (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
4847  HwReg.IsSymbolic = true;
4848  lex(); // skip message name
4849  } else if (!parseExpr(HwReg.Id)) {
4850  return false;
4851  }
4852 
4853  if (trySkipToken(AsmToken::RParen))
4854  return true;
4855 
4856  // parse optional params
4857  return
4858  skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
4859  parseExpr(Offset) &&
4860  skipToken(AsmToken::Comma, "expected a comma") &&
4861  parseExpr(Width) &&
4862  skipToken(AsmToken::RParen, "expected a closing parenthesis");
4863 }
4864 
4865 bool
4866 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
4867  const int64_t Offset,
4868  const int64_t Width,
4869  const SMLoc Loc) {
4870 
4871  using namespace llvm::AMDGPU::Hwreg;
4872 
4873  if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
4874  Error(Loc, "specified hardware register is not supported on this GPU");
4875  return false;
4876  } else if (!isValidHwreg(HwReg.Id)) {
4877  Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
4878  return false;
4879  } else if (!isValidHwregOffset(Offset)) {
4880  Error(Loc, "invalid bit offset: only 5-bit values are legal");
4881  return false;
4882  } else if (!isValidHwregWidth(Width)) {
4883  Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
4884  return false;
4885  }
4886  return true;
4887 }
4888 
4890 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
4891  using namespace llvm::AMDGPU::Hwreg;
4892 
4893  int64_t ImmVal = 0;
4894  SMLoc Loc = getLoc();
4895 
4896  // If parse failed, do not return error code
4897  // to avoid excessive error messages.
4898  if (trySkipId("hwreg", AsmToken::LParen)) {
4899  OperandInfoTy HwReg(ID_UNKNOWN_);
4900  int64_t Offset = OFFSET_DEFAULT_;
4901  int64_t Width = WIDTH_DEFAULT_;
4902  if (parseHwregBody(HwReg, Offset, Width) &&
4903  validateHwreg(HwReg, Offset, Width, Loc)) {
4904  ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
4905  }
4906  } else if (parseExpr(ImmVal)) {
4907  if (ImmVal < 0 || !isUInt<16>(ImmVal))
4908  Error(Loc, "invalid immediate: only 16-bit values are legal");
4909  }
4910 
4911  Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
4912  return MatchOperand_Success;
4913 }
4914 
4915 bool AMDGPUOperand::isHwreg() const {
4916  return isImmTy(ImmTyHwreg);
4917 }
4918 
4919 //===----------------------------------------------------------------------===//
4920 // sendmsg
4921 //===----------------------------------------------------------------------===//
4922 
4923 bool
4924 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
4925  OperandInfoTy &Op,
4926  OperandInfoTy &Stream) {
4927  using namespace llvm::AMDGPU::SendMsg;
4928 
4929  if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
4930  Msg.IsSymbolic = true;
4931  lex(); // skip message name
4932  } else if (!parseExpr(Msg.Id)) {
4933  return false;
4934  }
4935 
4936  if (trySkipToken(AsmToken::Comma)) {
4937  Op.IsDefined = true;
4938  if (isToken(AsmToken::Identifier) &&
4939  (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
4940  lex(); // skip operation name
4941  } else if (!parseExpr(Op.Id)) {
4942  return false;
4943  }
4944 
4945  if (trySkipToken(AsmToken::Comma)) {
4946  Stream.IsDefined = true;
4947  if (!parseExpr(Stream.Id))
4948  return false;
4949  }
4950  }
4951 
4952  return skipToken(AsmToken::RParen, "expected a closing parenthesis");
4953 }
4954 
4955 bool
4956 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
4957  const OperandInfoTy &Op,
4958  const OperandInfoTy &Stream,
4959  const SMLoc S) {
4960  using namespace llvm::AMDGPU::SendMsg;
4961 
4962  // Validation strictness depends on whether message is specified
4963  // in a symbolc or in a numeric form. In the latter case
4964  // only encoding possibility is checked.
4965  bool Strict = Msg.IsSymbolic;
4966 
4967  if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
4968  Error(S, "invalid message id");
4969  return false;
4970  } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
4971  Error(S, Op.IsDefined ?
4972  "message does not support operations" :
4973  "missing message operation");
4974  return false;
4975  } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
4976  Error(S, "invalid operation id");
4977  return false;
4978  } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
4979  Error(S, "message operation does not support streams");
4980  return false;
4981  } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
4982  Error(S, "invalid message stream id");
4983  return false;
4984  }
4985  return true;
4986 }
4987 
4989 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
4990  using namespace llvm::AMDGPU::SendMsg;
4991 
4992  int64_t ImmVal = 0;
4993  SMLoc Loc = getLoc();
4994 
4995  // If parse failed, do not return error code
4996  // to avoid excessive error messages.
4997  if (trySkipId("sendmsg", AsmToken::LParen)) {
4998  OperandInfoTy Msg(ID_UNKNOWN_);
4999  OperandInfoTy Op(OP_NONE_);
5000  OperandInfoTy Stream(STREAM_ID_NONE_);
5001  if (parseSendMsgBody(Msg, Op, Stream) &&
5002  validateSendMsg(Msg, Op, Stream, Loc)) {
5003  ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
5004  }
5005  } else if (parseExpr(ImmVal)) {
5006  if (ImmVal < 0 || !isUInt<16>(ImmVal))
5007  Error(Loc, "invalid immediate: only 16-bit values are legal");
5008  }
5009 
5010  Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
5011  return MatchOperand_Success;
5012 }
5013 
5014 bool AMDGPUOperand::isSendMsg() const {
5015  return isImmTy(ImmTySendMsg);
5016 }
5017 
5018 //===----------------------------------------------------------------------===//
5019 // v_interp
5020 //===----------------------------------------------------------------------===//
5021 
5022 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
5023  if (getLexer().getKind() != AsmToken::Identifier)
5024  return MatchOperand_NoMatch;
5025 
5026  StringRef Str = Parser.getTok().getString();
5027  int Slot = StringSwitch<int>(Str)
5028  .Case("p10", 0)
5029  .Case("p20", 1)
5030  .Case("p0", 2)
5031  .Default(-1);
5032 
5033  SMLoc S = Parser.getTok().getLoc();
5034  if (Slot == -1)
5035  return MatchOperand_ParseFail;
5036 
5037  Parser.Lex();
5038  Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
5039  AMDGPUOperand::ImmTyInterpSlot));
5040  return MatchOperand_Success;
5041 }
5042 
5043 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
5044  if (getLexer().getKind() != AsmToken::Identifier)
5045  return MatchOperand_NoMatch;
5046 
5047  StringRef Str = Parser.getTok().getString();
5048  if (!Str.startswith("attr"))
5049  return MatchOperand_NoMatch;
5050 
5051  StringRef Chan = Str.take_back(2);
5052  int AttrChan = StringSwitch<int>(Chan)
5053  .Case(".x", 0)
5054  .Case(".y", 1)
5055  .Case(".z", 2)
5056  .Case(".w", 3)
5057  .Default(-1);
5058  if (AttrChan == -1)
5059  return MatchOperand_ParseFail;
5060 
5061  Str = Str.drop_back(2).drop_front(4);
5062 
5063  uint8_t Attr;
5064  if (Str.getAsInteger(10, Attr))
5065  return MatchOperand_ParseFail;
5066 
5067  SMLoc S = Parser.getTok().getLoc();
5068  Parser.Lex();
5069  if (Attr > 63) {
5070  Error(S, "out of bounds attr");
5071  return MatchOperand_Success;
5072  }
5073 
5074  SMLoc SChan = SMLoc::getFromPointer(Chan.data());
5075 
5076  Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
5077  AMDGPUOperand::ImmTyInterpAttr));
5078  Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
5079  AMDGPUOperand::ImmTyAttrChan));
5080  return MatchOperand_Success;
5081 }
5082 
5083 //===----------------------------------------------------------------------===//
5084 // exp
5085 //===----------------------------------------------------------------------===//
5086 
5087 void AMDGPUAsmParser::errorExpTgt() {
5088  Error(Parser.getTok().getLoc(), "invalid exp target");
5089 }
5090 
5091 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
5092  uint8_t &Val) {
5093  if (Str == "null") {
5094  Val = 9;
5095  return MatchOperand_Success;
5096  }
5097 
5098  if (Str.startswith("mrt")) {
5099  Str = Str.drop_front(3);
5100  if (Str == "z") { // == mrtz
5101  Val = 8;
5102  return MatchOperand_Success;
5103  }
5104 
5105  if (Str.getAsInteger(10, Val))
5106  return MatchOperand_ParseFail;
5107 
5108  if (Val > 7)
5109  errorExpTgt();
5110 
5111  return MatchOperand_Success;
5112  }
5113 
5114  if (Str.startswith("pos")) {
5115  Str = Str.drop_front(3);
5116  if (Str.getAsInteger(10, Val))
5117  return MatchOperand_ParseFail;
5118 
5119  if (Val > 4 || (Val == 4 && !isGFX10()))
5120  errorExpTgt();
5121 
5122  Val += 12;
5123  return MatchOperand_Success;
5124  }
5125 
5126  if (isGFX10() && Str == "prim") {
5127  Val = 20;
5128  return MatchOperand_Success;
5129  }
5130 
5131  if (Str.startswith("param")) {
5132  Str = Str.drop_front(5);
5133  if (Str.getAsInteger(10, Val))
5134  return MatchOperand_ParseFail;
5135 
5136  if (Val >= 32)
5137  errorExpTgt();
5138 
5139  Val += 32;
5140  return MatchOperand_Success;
5141  }
5142 
5143  if (Str.startswith("invalid_target_")) {
5144  Str = Str.drop_front(15);
5145  if (Str.getAsInteger(10, Val))
5146  return MatchOperand_ParseFail;
5147 
5148  errorExpTgt();
5149  return MatchOperand_Success;
5150  }
5151 
5152  return MatchOperand_NoMatch;
5153 }
5154 
5155 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
5156  uint8_t Val;
5157  StringRef Str = Parser.getTok().getString();
5158 
5159  auto Res = parseExpTgtImpl(Str, Val);
5160  if (Res != MatchOperand_Success)
5161  return Res;
5162 
5163  SMLoc S = Parser.getTok().getLoc();
5164  Parser.Lex();
5165 
5166  Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
5167  AMDGPUOperand::ImmTyExpTgt));
5168  return MatchOperand_Success;
5169 }
5170 
5171 //===----------------------------------------------------------------------===//
5172 // parser helpers
5173 //===----------------------------------------------------------------------===//
5174 
5175 bool
5176 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
5177  return Token.is(AsmToken::Identifier) && Token.getString() == Id;
5178 }
5179 
5180 bool
5181 AMDGPUAsmParser::isId(const StringRef Id) const {
5182  return isId(getToken(), Id);
5183 }
5184 
5185 bool
5186 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
5187  return getTokenKind() == Kind;
5188 }
5189 
5190 bool
5191 AMDGPUAsmParser::trySkipId(const StringRef Id) {
5192  if (isId(Id)) {
5193  lex();
5194  return true;
5195  }
5196  return false;
5197 }
5198 
5199 bool
5200 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
5201  if (isId(Id) && peekToken().is(Kind)) {
5202  lex();
5203  lex();
5204  return true;
5205  }
5206  return false;
5207 }
5208 
5209 bool
5210 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
5211  if (isToken(Kind)) {
5212  lex();
5213  return true;
5214  }
5215  return false;
5216 }
5217 
5218 bool
5219 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
5220  const StringRef ErrMsg) {
5221  if (!trySkipToken(Kind)) {
5222  Error(getLoc(), ErrMsg);
5223  return false;
5224  }
5225  return true;
5226 }
5227 
5228 bool
5229 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
5230  return !getParser().parseAbsoluteExpression(Imm);
5231 }
5232 
5233 bool
5234 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
5235  SMLoc S = getLoc();
5236 
5237  const MCExpr *Expr;
5238  if (Parser.parseExpression(Expr))
5239  return false;
5240 
5241  int64_t IntVal;
5242  if (Expr->evaluateAsAbsolute(IntVal)) {
5243  Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
5244  } else {
5245  Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
5246  }
5247  return true;
5248 }
5249 
5250 bool
5251 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
5252  if (isToken(AsmToken::String)) {
5253  Val = getToken().getStringContents();
5254  lex();
5255  return true;
5256  } else {
5257  Error(getLoc(), ErrMsg);
5258  return false;
5259  }
5260 }
5261 
5262 AsmToken
5263 AMDGPUAsmParser::getToken() const {
5264  return Parser.getTok();
5265 }
5266 
5267 AsmToken
5268 AMDGPUAsmParser::peekToken() {
5269  return getLexer().peekTok();
5270 }
5271 
5272 void
5273 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
5274  auto TokCount = getLexer().peekTokens(Tokens);
5275 
5276  for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
5277  Tokens[Idx] = AsmToken(AsmToken::Error, "");
5278 }
5279 
5281 AMDGPUAsmParser::getTokenKind() const {
5282  return getLexer().getKind();
5283 }
5284 
5285 SMLoc
5286 AMDGPUAsmParser::getLoc() const {
5287  return getToken().getLoc();
5288 }
5289 
5290 StringRef
5291 AMDGPUAsmParser::getTokenStr() const {
5292  return getToken().getString();
5293 }
5294 
5295 void
5296 AMDGPUAsmParser::lex() {
5297  Parser.Lex();
5298 }
5299 
5300 //===----------------------------------------------------------------------===//
5301 // swizzle
5302 //===----------------------------------------------------------------------===//
5303 
5305 static unsigned
5306 encodeBitmaskPerm(const unsigned AndMask,
5307  const unsigned OrMask,
5308  const unsigned XorMask) {
5309  using namespace llvm::AMDGPU::Swizzle;
5310 
5311  return BITMASK_PERM_ENC |
5312  (AndMask << BITMASK_AND_SHIFT) |
5313  (OrMask << BITMASK_OR_SHIFT) |
5314  (XorMask << BITMASK_XOR_SHIFT);
5315 }
5316 
5317 bool
5318 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
5319  const unsigned MinVal,
5320  const unsigned MaxVal,
5321  const StringRef ErrMsg) {
5322  for (unsigned i = 0; i < OpNum; ++i) {
5323  if (!skipToken(AsmToken::Comma, "expected a comma")){
5324  return false;
5325  }
5326  SMLoc ExprLoc = Parser.getTok().getLoc();
5327  if (!parseExpr(Op[i])) {
5328  return false;
5329  }
5330  if (Op[i] < MinVal || Op[i] > MaxVal) {
5331  Error(ExprLoc, ErrMsg);
5332  return false;
5333  }
5334  }
5335 
5336  return true;
5337 }
5338 
5339 bool
5340 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
5341  using namespace llvm::AMDGPU::Swizzle;
5342 
5343  int64_t Lane[LANE_NUM];
5344  if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
5345  "expected a 2-bit lane id")) {
5346  Imm = QUAD_PERM_ENC;
5347  for (unsigned I = 0; I < LANE_NUM; ++I) {
5348  Imm |= Lane[I] << (LANE_SHIFT * I);
5349  }
5350  return true;
5351  }
5352  return false;
5353 }
5354 
5355 bool
5356 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
5357  using namespace llvm::AMDGPU::Swizzle;
5358 
5359  SMLoc S = Parser.getTok().getLoc();
5360  int64_t GroupSize;
5361  int64_t LaneIdx;
5362 
5363  if (!parseSwizzleOperands(1, &GroupSize,
5364  2, 32,
5365  "group size must be in the interval [2,32]")) {
5366  return false;
5367  }
5368  if (!isPowerOf2_64(GroupSize)) {
5369  Error(S, "group size must be a power of two");
5370  return false;
5371  }
5372  if (parseSwizzleOperands(1, &LaneIdx,
5373  0, GroupSize - 1,
5374  "lane id must be in the interval [0,group size - 1]")) {
5375  Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
5376  return true;
5377  }
5378  return false;
5379 }
5380 
5381 bool
5382 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
5383  using namespace llvm::AMDGPU::Swizzle;
5384 
5385  SMLoc S = Parser.getTok().getLoc();
5386  int64_t GroupSize;
5387 
5388  if (!parseSwizzleOperands(1, &GroupSize,
5389  2, 32, "group size must be in the interval [2,32]")) {
5390  return false;
5391  }
5392  if (!isPowerOf2_64(GroupSize)) {
5393  Error(S, "group size must be a power of two");
5394  return false;
5395  }
5396 
5397  Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
5398  return true;
5399 }
5400 
5401 bool
5402 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
5403  using namespace llvm::AMDGPU::Swizzle;
5404 
5405  SMLoc S = Parser.getTok().getLoc();
5406  int64_t GroupSize;
5407 
5408  if (!parseSwizzleOperands(1, &GroupSize,
5409  1, 16, "group size must be in the interval [1,16]")) {
5410  return false;
5411  }
5412  if (!isPowerOf2_64(GroupSize)) {
5413  Error(S, "group size must be a power of two");
5414  return false;
5415  }
5416 
5417  Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
5418  return true;
5419 }
5420 
5421 bool
5422 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
5423  using namespace llvm::AMDGPU::Swizzle;
5424 
5425  if (!skipToken(AsmToken::Comma, "expected a comma")) {
5426  return false;
5427  }
5428 
5429  StringRef Ctl;
5430  SMLoc StrLoc = Parser.getTok().getLoc();
5431  if (!parseString(Ctl)) {
5432  return false;
5433  }
5434  if (Ctl.size() != BITMASK_WIDTH) {
5435  Error(StrLoc, "expected a 5-character mask");
5436  return false;
5437  }
5438 
5439  unsigned AndMask = 0;
5440  unsigned OrMask = 0;
5441  unsigned XorMask = 0;
5442 
5443  for (size_t i = 0; i < Ctl.size(); ++i) {
5444  unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
5445  switch(Ctl[i]) {
5446  default:
5447  Error(StrLoc, "invalid mask");
5448  return false;
5449  case '0':
5450  break;
5451  case '1':
5452  OrMask |= Mask;
5453  break;
5454  case 'p':
5455  AndMask |= Mask;
5456  break;
5457  case 'i':
5458  AndMask |= Mask;
5459  XorMask |= Mask;
5460  break;
5461  }
5462  }
5463 
5464  Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
5465  return true;
5466 }
5467 
5468 bool
5469 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
5470 
5471  SMLoc OffsetLoc = Parser.getTok().getLoc();
5472 
5473  if (!parseExpr(Imm)) {
5474  return false;
5475  }
5476  if (!isUInt<16>(Imm)) {
5477  Error(OffsetLoc, "expected a 16-bit offset");
5478  return false;
5479  }
5480  return true;
5481 }
5482 
5483 bool
5484 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
5485  using namespace llvm::AMDGPU::Swizzle;
5486 
5487  if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
5488 
5489  SMLoc ModeLoc = Parser.getTok().getLoc();
5490  bool Ok = false;
5491 
5492  if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
5493  Ok = parseSwizzleQuadPerm(Imm);
5494  } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
5495  Ok = parseSwizzleBitmaskPerm(Imm);
5496  } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
5497  Ok = parseSwizzleBroadcast(Imm);
5498  } else if (trySkipId(IdSymbolic[ID_SWAP])) {
5499  Ok = parseSwizzleSwap(Imm);
5500  } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
5501  Ok = parseSwizzleReverse(Imm);
5502  } else {
5503  Error(ModeLoc, "expected a swizzle mode");
5504  }
5505 
5506  return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
5507  }
5508 
5509  return false;
5510 }
5511 
5513 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
5514  SMLoc S = Parser.getTok().getLoc();
5515  int64_t Imm = 0;
5516 
5517  if (trySkipId("offset")) {
5518 
5519  bool Ok = false;
5520  if (skipToken(AsmToken::Colon, "expected a colon")) {
5521  if (trySkipId("swizzle")) {
5522  Ok = parseSwizzleMacro(Imm);
5523  } else {
5524  Ok = parseSwizzleOffset(Imm);
5525  }
5526  }
5527 
5528  Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
5529 
5531  } else {
5532  // Swizzle "offset" operand is optional.
5533  // If it is omitted, try parsing other optional operands.
5534  return parseOptionalOpr(Operands);
5535  }
5536 }
5537 
5538 bool
5539 AMDGPUOperand::isSwizzle() const {
5540  return isImmTy(ImmTySwizzle);
5541 }
5542 
5543 //===----------------------------------------------------------------------===//
5544 // VGPR Index Mode
5545 //===----------------------------------------------------------------------===//
5546 
5547 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
5548 
5549  using namespace llvm::AMDGPU::VGPRIndexMode;
5550 
5551  if (trySkipToken(AsmToken::RParen)) {
5552  return OFF;
5553  }
5554 
5555  int64_t Imm = 0;
5556 
5557  while (true) {
5558  unsigned Mode = 0;
5559  SMLoc S = Parser.getTok().getLoc();
5560 
5561  for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
5562  if (trySkipId(IdSymbolic[ModeId])) {
5563  Mode = 1 << ModeId;
5564  break;
5565  }
5566  }
5567 
5568  if (Mode == 0) {
5569  Error(S, (Imm == 0)?
5570  "expected a VGPR index mode or a closing parenthesis" :
5571  "expected a VGPR index mode");
5572  break;
5573  }
5574 
5575  if (Imm & Mode) {
5576  Error(S, "duplicate VGPR index mode");
5577  break;
5578  }
5579  Imm |= Mode;
5580 
5581  if (trySkipToken(AsmToken::RParen))
5582  break;
5583  if (!skipToken(AsmToken::Comma,
5584  "expected a comma or a closing parenthesis"))
5585  break;
5586  }
5587 
5588  return Imm;
5589 }
5590 
5592 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
5593 
5594  int64_t Imm = 0;
5595  SMLoc S = Parser.getTok().getLoc();
5596 
5597  if (getLexer().getKind() == AsmToken::Identifier &&
5598  Parser.getTok().getString() == "gpr_idx" &&
5599  getLexer().peekTok().is(AsmToken::LParen)) {
5600 
5601  Parser.Lex();
5602  Parser.Lex();
5603 
5604  // If parse failed, trigger an error but do not return error code
5605  // to avoid excessive error messages.
5606  Imm = parseGPRIdxMacro();
5607 
5608  } else {
5609  if (getParser().parseAbsoluteExpression(Imm))
5610  return MatchOperand_NoMatch;
5611  if (Imm < 0 || !isUInt<4>(Imm)) {
5612  Error(S, "invalid immediate: only 4-bit values are legal");
5613  }
5614  }
5615 
5616  Operands.push_back(
5617  AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
5618  return MatchOperand_Success;
5619 }
5620 
5621 bool AMDGPUOperand::isGPRIdxMode() const {
5622  return isImmTy(ImmTyGprIdxMode);
5623 }
5624 
5625 //===----------------------------------------------------------------------===//
5626 // sopp branch targets
5627 //===----------------------------------------------------------------------===//
5628 
5630 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
5631 
5632  // Make sure we are not parsing something
5633  // that looks like a label or an expression but is not.
5634  // This will improve error messages.
5635  if (isRegister() || isModifier())
5636  return MatchOperand_NoMatch;
5637 
5638  if (parseExpr(Operands)) {
5639 
5640  AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
5641  assert(Opr.isImm() || Opr.isExpr());
5642  SMLoc Loc = Opr.getStartLoc();
5643 
5644  // Currently we do not support arbitrary expressions as branch targets.
5645  // Only labels and absolute expressions are accepted.
5646  if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
5647  Error(Loc, "expected an absolute expression or a label");
5648  } else if (Opr.isImm() && !Opr.isS16Imm()) {
5649  Error(Loc, "expected a 16-bit signed jump offset");
5650  }
5651  }
5652 
5653  return MatchOperand_Success; // avoid excessive error messages
5654 }
5655 
5656 //===----------------------------------------------------------------------===//
5657 // Boolean holding registers
5658 //===----------------------------------------------------------------------===//
5659 
5661 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
5662  return parseReg(Operands);
5663 }
5664 
5665 //===----------------------------------------------------------------------===//
5666 // mubuf
5667 //===----------------------------------------------------------------------===//
5668 
5669 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
5670  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
5671 }
5672 
5673 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
5674  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
5675 }
5676 
5677 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
5678  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
5679 }
5680 
5681 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
5682  const OperandVector &Operands,
5683  bool IsAtomic,
5684  bool IsAtomicReturn,
5685  bool IsLds) {
5686  bool IsLdsOpcode = IsLds;
5687  bool HasLdsModifier = false;
5688  OptionalImmIndexMap OptionalIdx;
5689  assert(IsAtomicReturn ? IsAtomic : true);
5690  unsigned FirstOperandIdx = 1;
5691 
5692  for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
5693  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5694 
5695  // Add the register arguments
5696  if (Op.isReg()) {
5697  Op.addRegOperands(Inst, 1);
5698  // Insert a tied src for atomic return dst.
5699  // This cannot be postponed as subsequent calls to
5700  // addImmOperands rely on correct number of MC operands.
5701  if (IsAtomicReturn && i == FirstOperandIdx)
5702  Op.addRegOperands(Inst, 1);
5703  continue;
5704  }
5705 
5706  // Handle the case where soffset is an immediate
5707  if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5708  Op.addImmOperands(Inst, 1);
5709  continue;
5710  }
5711 
5712  HasLdsModifier |= Op.isLDS();
5713 
5714  // Handle tokens like 'offen' which are sometimes hard-coded into the
5715  // asm string. There are no MCInst operands for these.
5716  if (Op.isToken()) {
5717  continue;
5718  }
5719  assert(Op.isImm());
5720 
5721  // Handle optional arguments
5722  OptionalIdx[Op.getImmTy()] = i;
5723  }
5724 
5725  // This is a workaround for an llvm quirk which may result in an
5726  // incorrect instruction selection. Lds and non-lds versions of
5727  // MUBUF instructions are identical except that lds versions
5728  // have mandatory 'lds' modifier. However this modifier follows
5729  // optional modifiers and llvm asm matcher regards this 'lds'
5730  // modifier as an optional one. As a result, an lds version
5731  // of opcode may be selected even if it has no 'lds' modifier.
5732  if (IsLdsOpcode && !HasLdsModifier) {
5733  int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
5734  if (NoLdsOpcode != -1) { // Got lds version - correct it.
5735  Inst.setOpcode(NoLdsOpcode);
5736  IsLdsOpcode = false;
5737  }
5738  }
5739 
5740  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
5741  if (!IsAtomic) { // glc is hard-coded.
5742  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5743  }
5744  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5745 
5746  if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
5747  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5748  }
5749 
5750  if (isGFX10())
5751  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5752 }
5753 
5754 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
5755  OptionalImmIndexMap OptionalIdx;
5756 
5757  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5758  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5759 
5760  // Add the register arguments
5761  if (Op.isReg()) {
5762  Op.addRegOperands(Inst, 1);
5763  continue;
5764  }
5765 
5766  // Handle the case where soffset is an immediate
5767  if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5768  Op.addImmOperands(Inst, 1);
5769  continue;
5770  }
5771 
5772  // Handle tokens like 'offen' which are sometimes hard-coded into the
5773  // asm string. There are no MCInst operands for these.
5774  if (Op.isToken()) {
5775  continue;
5776  }
5777  assert(Op.isImm());
5778 
5779  // Handle optional arguments
5780  OptionalIdx[Op.getImmTy()] = i;
5781  }
5782 
5783  addOptionalImmOperand(Inst, Operands, OptionalIdx,
5784  AMDGPUOperand::ImmTyOffset);
5785  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
5786  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5787  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5788  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5789 
5790  if (isGFX10())
5791  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5792 }
5793 
5794 //===----------------------------------------------------------------------===//
5795 // mimg
5796 //===----------------------------------------------------------------------===//
5797 
5798 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
5799  bool IsAtomic) {
5800  unsigned I = 1;
5801  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5802  for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5803  ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5804  }
5805 
5806  if (IsAtomic) {
5807  // Add src, same as dst
5808  assert(Desc.getNumDefs() == 1);
5809  ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
5810  }
5811 
5812  OptionalImmIndexMap OptionalIdx;
5813 
5814  for (unsigned E = Operands.size(); I != E; ++I) {
5815  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5816 
5817  // Add the register arguments
5818  if (Op.isReg()) {
5819  Op.addRegOperands(Inst, 1);
5820  } else if (Op.isImmModifier()) {
5821  OptionalIdx[Op.getImmTy()] = I;
5822  } else if (!Op.isToken()) {
5823  llvm_unreachable("unexpected operand type");
5824  }
5825  }
5826 
5827  bool IsGFX10 = isGFX10();
5828 
5829  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
5830  if (IsGFX10)
5831  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
5832  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
5833  if (IsGFX10)
5834  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5835  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5836  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5837  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
5838  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5839  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
5840  if (!IsGFX10)
5841  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
5842  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
5843 }
5844 
5845 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
5846  cvtMIMG(Inst, Operands, true);
5847 }
5848 
5849 //===----------------------------------------------------------------------===//
5850 // smrd
5851 //===----------------------------------------------------------------------===//
5852 
5853 bool AMDGPUOperand::isSMRDOffset8() const {
5854  return isImm() && isUInt<8>(getImm());
5855 }
5856 
5857 bool AMDGPUOperand::isSMRDOffset20() const {
5858  return isImm() && isUInt<20>(getImm());
5859 }
5860 
5861 bool AMDGPUOperand::isSMRDLiteralOffset() const {
5862  // 32-bit literals are only supported on CI and we only want to use them
5863  // when the offset is > 8-bits.
5864  return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
5865 }
5866 
5867 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
5868  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5869 }
5870 
5871 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
5872  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5873 }
5874 
5875 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
5876  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5877 }
5878 
5879 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
5880  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5881 }
5882 
5883 //===----------------------------------------------------------------------===//
5884 // vop3
5885 //===----------------------------------------------------------------------===//
5886 
5887 static bool ConvertOmodMul(int64_t &Mul) {
5888  if (Mul != 1 && Mul != 2 && Mul != 4)
5889  return false;
5890 
5891  Mul >>= 1;
5892  return true;
5893 }
5894 
5895 static bool ConvertOmodDiv(int64_t &Div) {
5896  if (Div == 1) {
5897  Div = 0;
5898  return true;
5899  }
5900 
5901  if (Div == 2) {
5902  Div = 3;
5903  return true;
5904  }
5905 
5906  return false;
5907 }
5908 
5909 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
5910  if (BoundCtrl == 0) {
5911  BoundCtrl = 1;
5912  return true;
5913  }
5914 
5915  if (BoundCtrl == -1) {
5916  BoundCtrl = 0;
5917  return true;
5918  }
5919 
5920  return false;
5921 }
5922 
5923 // Note: the order in this table matches the order of operands in AsmString.
5924 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
5925  {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr},
5926  {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr},
5927  {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr},
5928  {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
5929  {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
5930  {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr},
5931  {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr},
5932  {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr},
5933  {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
5934  {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr},
5935  {"format", AMDGPUOperand::ImmTyFORMAT, false, nullptr},
5936  {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr},
5937  {"slc", AMDGPUOperand::ImmTySLC, true, nullptr},
5938  {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr},
5939  {"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
5940  {"high", AMDGPUOperand::ImmTyHigh, true, nullptr},
5941  {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr},
5942  {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
5943  {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr},
5944  {"da", AMDGPUOperand::ImmTyDA, true, nullptr},
5945  {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr},
5946  {"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr},
5947  {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr},
5948  {"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
5949  {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr},
5950  {"dim", AMDGPUOperand::ImmTyDim, false, nullptr},
5951  {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
5952  {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
5953  {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
5954  {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr},
5955  {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
5956  {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
5957  {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
5958  {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
5959  {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
5960  {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
5961  {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
5962  {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
5963  {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
5964  {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
5965  {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
5966  {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
5967  {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
5968 };
5969 
5970 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
5971  unsigned size = Operands.size();
5972  assert(size > 0);
5973 
5974  OperandMatchResultTy res = parseOptionalOpr(Operands);
5975 
5976  // This is a hack to enable hardcoded mandatory operands which follow
5977  // optional operands.
5978  //
5979  // Current design assumes that all operands after the first optional operand
5980  // are also optional. However implementation of some instructions violates
5981  // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
5982  //
5983  // To alleviate this problem, we have to (implicitly) parse extra operands
5984  // to make sure autogenerated parser of custom operands never hit hardcoded
5985  // mandatory operands.
5986 
5987  if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
5988 
5989  // We have parsed the first optional operand.
5990  // Parse as many operands as necessary to skip all mandatory operands.
5991 
5992  for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
5993  if (res != MatchOperand_Success ||
5994  getLexer().is(AsmToken::EndOfStatement)) break;
5995  if (getLexer().is(AsmToken::Comma)) Parser.Lex();
5996  res = parseOptionalOpr(Operands);
5997  }
5998  }
5999 
6000  return res;
6001 }
6002 
6003 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
6005  for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
6006  // try to parse any optional operand here
6007  if (Op.IsBit) {
6008  res = parseNamedBit(Op.Name, Operands, Op.Type);
6009  } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
6010  res = parseOModOperand(Operands);
6011  } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
6012  Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
6013  Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
6014  res = parseSDWASel(Operands, Op.Name, Op.Type);
6015  } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
6016  res = parseSDWADstUnused(Operands);
6017  } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
6018  Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
6019  Op.Type == AMDGPUOperand::ImmTyNegLo ||
6020  Op.Type == AMDGPUOperand::ImmTyNegHi) {
6021  res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
6022  Op.ConvertResult);
6023  } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
6024  res = parseDim(Operands);
6025  } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) {
6026  res = parseDfmtNfmt(Operands);
6027  } else {
6028  res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
6029  }
6030  if (res != MatchOperand_NoMatch) {
6031  return res;
6032  }
6033  }
6034  return MatchOperand_NoMatch;
6035 }
6036 
6037 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
6038  StringRef Name = Parser.getTok().getString();
6039  if (Name == "mul") {
6040  return parseIntWithPrefix("mul", Operands,
6041  AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
6042  }
6043 
6044  if (Name == "div") {
6045  return parseIntWithPrefix("div", Operands,
6046  AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
6047  }
6048 
6049  return MatchOperand_NoMatch;
6050 }
6051 
6052 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
6053  cvtVOP3P(Inst, Operands);
6054 
6055  int Opc = Inst.getOpcode();
6056 
6057  int SrcNum;
6058  const int Ops[] = { AMDGPU::OpName::src0,
6059  AMDGPU::OpName::src1,
6060  AMDGPU::OpName::src2 };
6061  for (SrcNum = 0;
6062  SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
6063  ++SrcNum);
6064  assert(SrcNum > 0);
6065 
6066  int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6067  unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6068 
6069  if ((OpSel & (1 << SrcNum)) != 0) {
6070  int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
6071  uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
6072  Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
6073  }
6074 }
6075 
6076 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
6077  // 1. This operand is input modifiers
6078  return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
6079  // 2. This is not last operand
6080  && Desc.NumOperands > (OpNum + 1)
6081  // 3. Next operand is register class
6082  && Desc.OpInfo[OpNum + 1].RegClass != -1
6083  // 4. Next register is not tied to any other operand
6084  && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
6085 }
6086 
6087 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
6088 {
6089  OptionalImmIndexMap OptionalIdx;
6090  unsigned Opc = Inst.getOpcode();
6091 
6092  unsigned I = 1;
6093  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6094  for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6095  ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6096  }
6097 
6098  for (unsigned E = Operands.size(); I != E; ++I) {
6099  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6100  if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6101  Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6102  } else if (Op.isInterpSlot() ||
6103  Op.isInterpAttr() ||
6104  Op.isAttrChan()) {
6105  Inst.addOperand(MCOperand::createImm(Op.getImm()));
6106  } else if (Op.isImmModifier()) {
6107  OptionalIdx[Op.getImmTy()] = I;
6108  } else {
6109  llvm_unreachable("unhandled operand type");
6110  }
6111  }
6112 
6113  if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
6114  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
6115  }
6116 
6117  if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6118  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6119  }
6120 
6121  if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6122  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6123  }
6124 }
6125 
6126 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
6127  OptionalImmIndexMap &OptionalIdx) {
6128  unsigned Opc = Inst.getOpcode();
6129 
6130  unsigned I = 1;
6131  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6132  for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6133  ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6134  }
6135 
6136  if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
6137  // This instruction has src modifiers
6138  for (unsigned E = Operands.size(); I != E; ++I) {
6139  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6140  if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6141  Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6142  } else if (Op.isImmModifier()) {
6143  OptionalIdx[Op.getImmTy()] = I;
6144  } else if (Op.isRegOrImm()) {
6145  Op.addRegOrImmOperands(Inst, 1);
6146  } else {
6147  llvm_unreachable("unhandled operand type");
6148  }
6149  }
6150  } else {
6151  // No src modifiers
6152  for (unsigned E = Operands.size(); I != E; ++I) {
6153  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6154  if (Op.isMod()) {
6155  OptionalIdx[Op.getImmTy()] = I;
6156  } else {
6157  Op.addRegOrImmOperands(Inst, 1);
6158  }
6159  }
6160  }
6161 
6162  if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6163  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6164  }
6165 
6166  if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6167  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6168  }
6169 
6170  // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
6171  // it has src2 register operand that is tied to dst operand
6172  // we don't allow modifiers for this operand in assembler so src2_modifiers
6173  // should be 0.
6174  if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
6175  Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
6176  Opc == AMDGPU::V_MAC_F32_e64_vi ||
6177  Opc == AMDGPU::V_MAC_F16_e64_vi ||
6178  Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
6179  Opc == AMDGPU::V_FMAC_F32_e64_vi ||
6180  Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
6181  auto it = Inst.begin();
6182  std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
6183  it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
6184  ++it;
6185  Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6186  }
6187 }
6188 
6189 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
6190  OptionalImmIndexMap OptionalIdx;
6191  cvtVOP3(Inst, Operands, OptionalIdx);
6192 }
6193 
6194 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
6195  const OperandVector &Operands) {
6196  OptionalImmIndexMap OptIdx;
6197  const int Opc = Inst.getOpcode();
6198  const MCInstrDesc &Desc = MII.get(Opc);
6199 
6200  const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
6201 
6202  cvtVOP3(Inst, Operands, OptIdx);
6203 
6204  if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
6205  assert(!IsPacked);
6206  Inst.addOperand(Inst.getOperand(0));
6207  }
6208 
6209  // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
6210  // instruction, and then figure out where to actually put the modifiers
6211 
6212  addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
6213 
6214  int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
6215  if (OpSelHiIdx != -1) {
6216  int DefaultVal = IsPacked ? -1 : 0;
6217  addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
6218  DefaultVal);
6219  }
6220 
6221  int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
6222  if (NegLoIdx != -1) {
6223  assert(IsPacked);
6224  addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
6225  addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
6226  }
6227 
6228  const int Ops[] = { AMDGPU::OpName::src0,
6229  AMDGPU::OpName::src1,
6230  AMDGPU::OpName::src2 };
6231  const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
6232  AMDGPU::OpName::src1_modifiers,
6233  AMDGPU::OpName::src2_modifiers };