LLVM  10.0.0svn
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
43 #include "llvm/MC/MCSymbol.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
51 #include "llvm/Support/SMLoc.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <cstring>
59 #include <iterator>
60 #include <map>
61 #include <memory>
62 #include <string>
63 
64 using namespace llvm;
65 using namespace llvm::AMDGPU;
66 using namespace llvm::amdhsa;
67 
68 namespace {
69 
70 class AMDGPUAsmParser;
71 
72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
73 
74 //===----------------------------------------------------------------------===//
75 // Operand
76 //===----------------------------------------------------------------------===//
77 
78 class AMDGPUOperand : public MCParsedAsmOperand {
79  enum KindTy {
80  Token,
81  Immediate,
82  Register,
83  Expression
84  } Kind;
85 
86  SMLoc StartLoc, EndLoc;
87  const AMDGPUAsmParser *AsmParser;
88 
89 public:
90  AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91  : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
92 
93  using Ptr = std::unique_ptr<AMDGPUOperand>;
94 
95  struct Modifiers {
96  bool Abs = false;
97  bool Neg = false;
98  bool Sext = false;
99 
100  bool hasFPModifiers() const { return Abs || Neg; }
101  bool hasIntModifiers() const { return Sext; }
102  bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
103 
104  int64_t getFPModifiersOperand() const {
105  int64_t Operand = 0;
106  Operand |= Abs ? SISrcMods::ABS : 0u;
107  Operand |= Neg ? SISrcMods::NEG : 0u;
108  return Operand;
109  }
110 
111  int64_t getIntModifiersOperand() const {
112  int64_t Operand = 0;
113  Operand |= Sext ? SISrcMods::SEXT : 0u;
114  return Operand;
115  }
116 
117  int64_t getModifiersOperand() const {
118  assert(!(hasFPModifiers() && hasIntModifiers())
119  && "fp and int modifiers should not be used simultaneously");
120  if (hasFPModifiers()) {
121  return getFPModifiersOperand();
122  } else if (hasIntModifiers()) {
123  return getIntModifiersOperand();
124  } else {
125  return 0;
126  }
127  }
128 
129  friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
130  };
131 
132  enum ImmTy {
133  ImmTyNone,
134  ImmTyGDS,
135  ImmTyLDS,
136  ImmTyOffen,
137  ImmTyIdxen,
138  ImmTyAddr64,
139  ImmTyOffset,
140  ImmTyInstOffset,
141  ImmTyOffset0,
142  ImmTyOffset1,
143  ImmTyDLC,
144  ImmTyGLC,
145  ImmTySLC,
146  ImmTyTFE,
147  ImmTyD16,
148  ImmTyClampSI,
149  ImmTyOModSI,
150  ImmTyDPP8,
151  ImmTyDppCtrl,
152  ImmTyDppRowMask,
153  ImmTyDppBankMask,
154  ImmTyDppBoundCtrl,
155  ImmTyDppFi,
156  ImmTySdwaDstSel,
157  ImmTySdwaSrc0Sel,
158  ImmTySdwaSrc1Sel,
159  ImmTySdwaDstUnused,
160  ImmTyDMask,
161  ImmTyDim,
162  ImmTyUNorm,
163  ImmTyDA,
164  ImmTyR128A16,
165  ImmTyLWE,
166  ImmTyExpTgt,
167  ImmTyExpCompr,
168  ImmTyExpVM,
169  ImmTyFORMAT,
170  ImmTyHwreg,
171  ImmTyOff,
172  ImmTySendMsg,
173  ImmTyInterpSlot,
174  ImmTyInterpAttr,
175  ImmTyAttrChan,
176  ImmTyOpSel,
177  ImmTyOpSelHi,
178  ImmTyNegLo,
179  ImmTyNegHi,
180  ImmTySwizzle,
181  ImmTyGprIdxMode,
182  ImmTyHigh,
183  ImmTyBLGP,
184  ImmTyCBSZ,
185  ImmTyABID,
186  ImmTyEndpgm,
187  };
188 
189 private:
190  struct TokOp {
191  const char *Data;
192  unsigned Length;
193  };
194 
195  struct ImmOp {
196  int64_t Val;
197  ImmTy Type;
198  bool IsFPImm;
199  Modifiers Mods;
200  };
201 
202  struct RegOp {
203  unsigned RegNo;
204  Modifiers Mods;
205  };
206 
207  union {
208  TokOp Tok;
209  ImmOp Imm;
210  RegOp Reg;
211  const MCExpr *Expr;
212  };
213 
214 public:
215  bool isToken() const override {
216  if (Kind == Token)
217  return true;
218 
219  // When parsing operands, we can't always tell if something was meant to be
220  // a token, like 'gds', or an expression that references a global variable.
221  // In this case, we assume the string is an expression, and if we need to
222  // interpret is a token, then we treat the symbol name as the token.
223  return isSymbolRefExpr();
224  }
225 
226  bool isSymbolRefExpr() const {
227  return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
228  }
229 
230  bool isImm() const override {
231  return Kind == Immediate;
232  }
233 
234  bool isInlinableImm(MVT type) const;
235  bool isLiteralImm(MVT type) const;
236 
237  bool isRegKind() const {
238  return Kind == Register;
239  }
240 
241  bool isReg() const override {
242  return isRegKind() && !hasModifiers();
243  }
244 
245  bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
246  return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
247  }
248 
249  bool isRegOrImmWithInt16InputMods() const {
250  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
251  }
252 
253  bool isRegOrImmWithInt32InputMods() const {
254  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
255  }
256 
257  bool isRegOrImmWithInt64InputMods() const {
258  return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
259  }
260 
261  bool isRegOrImmWithFP16InputMods() const {
262  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
263  }
264 
265  bool isRegOrImmWithFP32InputMods() const {
266  return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
267  }
268 
269  bool isRegOrImmWithFP64InputMods() const {
270  return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
271  }
272 
273  bool isVReg() const {
274  return isRegClass(AMDGPU::VGPR_32RegClassID) ||
275  isRegClass(AMDGPU::VReg_64RegClassID) ||
276  isRegClass(AMDGPU::VReg_96RegClassID) ||
277  isRegClass(AMDGPU::VReg_128RegClassID) ||
278  isRegClass(AMDGPU::VReg_160RegClassID) ||
279  isRegClass(AMDGPU::VReg_256RegClassID) ||
280  isRegClass(AMDGPU::VReg_512RegClassID) ||
281  isRegClass(AMDGPU::VReg_1024RegClassID);
282  }
283 
284  bool isVReg32() const {
285  return isRegClass(AMDGPU::VGPR_32RegClassID);
286  }
287 
288  bool isVReg32OrOff() const {
289  return isOff() || isVReg32();
290  }
291 
292  bool isSDWAOperand(MVT type) const;
293  bool isSDWAFP16Operand() const;
294  bool isSDWAFP32Operand() const;
295  bool isSDWAInt16Operand() const;
296  bool isSDWAInt32Operand() const;
297 
298  bool isImmTy(ImmTy ImmT) const {
299  return isImm() && Imm.Type == ImmT;
300  }
301 
302  bool isImmModifier() const {
303  return isImm() && Imm.Type != ImmTyNone;
304  }
305 
306  bool isClampSI() const { return isImmTy(ImmTyClampSI); }
307  bool isOModSI() const { return isImmTy(ImmTyOModSI); }
308  bool isDMask() const { return isImmTy(ImmTyDMask); }
309  bool isDim() const { return isImmTy(ImmTyDim); }
310  bool isUNorm() const { return isImmTy(ImmTyUNorm); }
311  bool isDA() const { return isImmTy(ImmTyDA); }
312  bool isR128A16() const { return isImmTy(ImmTyR128A16); }
313  bool isLWE() const { return isImmTy(ImmTyLWE); }
314  bool isOff() const { return isImmTy(ImmTyOff); }
315  bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
316  bool isExpVM() const { return isImmTy(ImmTyExpVM); }
317  bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
318  bool isOffen() const { return isImmTy(ImmTyOffen); }
319  bool isIdxen() const { return isImmTy(ImmTyIdxen); }
320  bool isAddr64() const { return isImmTy(ImmTyAddr64); }
321  bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
322  bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
323  bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
324 
325  bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
326  bool isGDS() const { return isImmTy(ImmTyGDS); }
327  bool isLDS() const { return isImmTy(ImmTyLDS); }
328  bool isDLC() const { return isImmTy(ImmTyDLC); }
329  bool isGLC() const { return isImmTy(ImmTyGLC); }
330  bool isSLC() const { return isImmTy(ImmTySLC); }
331  bool isTFE() const { return isImmTy(ImmTyTFE); }
332  bool isD16() const { return isImmTy(ImmTyD16); }
333  bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
334  bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
335  bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
336  bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
337  bool isFI() const { return isImmTy(ImmTyDppFi); }
338  bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
339  bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
340  bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
341  bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
342  bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
343  bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
344  bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
345  bool isOpSel() const { return isImmTy(ImmTyOpSel); }
346  bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
347  bool isNegLo() const { return isImmTy(ImmTyNegLo); }
348  bool isNegHi() const { return isImmTy(ImmTyNegHi); }
349  bool isHigh() const { return isImmTy(ImmTyHigh); }
350 
351  bool isMod() const {
352  return isClampSI() || isOModSI();
353  }
354 
355  bool isRegOrImm() const {
356  return isReg() || isImm();
357  }
358 
359  bool isRegClass(unsigned RCID) const;
360 
361  bool isInlineValue() const;
362 
363  bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
364  return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
365  }
366 
367  bool isSCSrcB16() const {
368  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
369  }
370 
371  bool isSCSrcV2B16() const {
372  return isSCSrcB16();
373  }
374 
375  bool isSCSrcB32() const {
376  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
377  }
378 
379  bool isSCSrcB64() const {
380  return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
381  }
382 
383  bool isBoolReg() const;
384 
385  bool isSCSrcF16() const {
386  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
387  }
388 
389  bool isSCSrcV2F16() const {
390  return isSCSrcF16();
391  }
392 
393  bool isSCSrcF32() const {
394  return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
395  }
396 
397  bool isSCSrcF64() const {
398  return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
399  }
400 
401  bool isSSrcB32() const {
402  return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
403  }
404 
405  bool isSSrcB16() const {
406  return isSCSrcB16() || isLiteralImm(MVT::i16);
407  }
408 
409  bool isSSrcV2B16() const {
410  llvm_unreachable("cannot happen");
411  return isSSrcB16();
412  }
413 
414  bool isSSrcB64() const {
415  // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
416  // See isVSrc64().
417  return isSCSrcB64() || isLiteralImm(MVT::i64);
418  }
419 
420  bool isSSrcF32() const {
421  return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
422  }
423 
424  bool isSSrcF64() const {
425  return isSCSrcB64() || isLiteralImm(MVT::f64);
426  }
427 
428  bool isSSrcF16() const {
429  return isSCSrcB16() || isLiteralImm(MVT::f16);
430  }
431 
432  bool isSSrcV2F16() const {
433  llvm_unreachable("cannot happen");
434  return isSSrcF16();
435  }
436 
437  bool isSSrcOrLdsB32() const {
438  return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
439  isLiteralImm(MVT::i32) || isExpr();
440  }
441 
442  bool isVCSrcB32() const {
443  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
444  }
445 
446  bool isVCSrcB64() const {
447  return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
448  }
449 
450  bool isVCSrcB16() const {
451  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
452  }
453 
454  bool isVCSrcV2B16() const {
455  return isVCSrcB16();
456  }
457 
458  bool isVCSrcF32() const {
459  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
460  }
461 
462  bool isVCSrcF64() const {
463  return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
464  }
465 
466  bool isVCSrcF16() const {
467  return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
468  }
469 
470  bool isVCSrcV2F16() const {
471  return isVCSrcF16();
472  }
473 
474  bool isVSrcB32() const {
475  return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
476  }
477 
478  bool isVSrcB64() const {
479  return isVCSrcF64() || isLiteralImm(MVT::i64);
480  }
481 
482  bool isVSrcB16() const {
483  return isVCSrcF16() || isLiteralImm(MVT::i16);
484  }
485 
486  bool isVSrcV2B16() const {
487  return isVSrcB16() || isLiteralImm(MVT::v2i16);
488  }
489 
490  bool isVSrcF32() const {
491  return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
492  }
493 
494  bool isVSrcF64() const {
495  return isVCSrcF64() || isLiteralImm(MVT::f64);
496  }
497 
498  bool isVSrcF16() const {
499  return isVCSrcF16() || isLiteralImm(MVT::f16);
500  }
501 
502  bool isVSrcV2F16() const {
503  return isVSrcF16() || isLiteralImm(MVT::v2f16);
504  }
505 
506  bool isVISrcB32() const {
507  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
508  }
509 
510  bool isVISrcB16() const {
511  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
512  }
513 
514  bool isVISrcV2B16() const {
515  return isVISrcB16();
516  }
517 
518  bool isVISrcF32() const {
519  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
520  }
521 
522  bool isVISrcF16() const {
523  return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
524  }
525 
526  bool isVISrcV2F16() const {
527  return isVISrcF16() || isVISrcB32();
528  }
529 
530  bool isAISrcB32() const {
531  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
532  }
533 
534  bool isAISrcB16() const {
535  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
536  }
537 
538  bool isAISrcV2B16() const {
539  return isAISrcB16();
540  }
541 
542  bool isAISrcF32() const {
543  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
544  }
545 
546  bool isAISrcF16() const {
547  return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
548  }
549 
550  bool isAISrcV2F16() const {
551  return isAISrcF16() || isAISrcB32();
552  }
553 
554  bool isAISrc_128B32() const {
555  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
556  }
557 
558  bool isAISrc_128B16() const {
559  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
560  }
561 
562  bool isAISrc_128V2B16() const {
563  return isAISrc_128B16();
564  }
565 
566  bool isAISrc_128F32() const {
567  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
568  }
569 
570  bool isAISrc_128F16() const {
571  return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
572  }
573 
574  bool isAISrc_128V2F16() const {
575  return isAISrc_128F16() || isAISrc_128B32();
576  }
577 
578  bool isAISrc_512B32() const {
579  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
580  }
581 
582  bool isAISrc_512B16() const {
583  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
584  }
585 
586  bool isAISrc_512V2B16() const {
587  return isAISrc_512B16();
588  }
589 
590  bool isAISrc_512F32() const {
591  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
592  }
593 
594  bool isAISrc_512F16() const {
595  return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
596  }
597 
598  bool isAISrc_512V2F16() const {
599  return isAISrc_512F16() || isAISrc_512B32();
600  }
601 
602  bool isAISrc_1024B32() const {
603  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
604  }
605 
606  bool isAISrc_1024B16() const {
607  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
608  }
609 
610  bool isAISrc_1024V2B16() const {
611  return isAISrc_1024B16();
612  }
613 
614  bool isAISrc_1024F32() const {
615  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
616  }
617 
618  bool isAISrc_1024F16() const {
619  return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
620  }
621 
622  bool isAISrc_1024V2F16() const {
623  return isAISrc_1024F16() || isAISrc_1024B32();
624  }
625 
626  bool isKImmFP32() const {
627  return isLiteralImm(MVT::f32);
628  }
629 
630  bool isKImmFP16() const {
631  return isLiteralImm(MVT::f16);
632  }
633 
634  bool isMem() const override {
635  return false;
636  }
637 
638  bool isExpr() const {
639  return Kind == Expression;
640  }
641 
642  bool isSoppBrTarget() const {
643  return isExpr() || isImm();
644  }
645 
646  bool isSWaitCnt() const;
647  bool isHwreg() const;
648  bool isSendMsg() const;
649  bool isSwizzle() const;
650  bool isSMRDOffset8() const;
651  bool isSMRDOffset20() const;
652  bool isSMRDLiteralOffset() const;
653  bool isDPP8() const;
654  bool isDPPCtrl() const;
655  bool isBLGP() const;
656  bool isCBSZ() const;
657  bool isABID() const;
658  bool isGPRIdxMode() const;
659  bool isS16Imm() const;
660  bool isU16Imm() const;
661  bool isEndpgm() const;
662 
663  StringRef getExpressionAsToken() const {
664  assert(isExpr());
665  const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
666  return S->getSymbol().getName();
667  }
668 
669  StringRef getToken() const {
670  assert(isToken());
671 
672  if (Kind == Expression)
673  return getExpressionAsToken();
674 
675  return StringRef(Tok.Data, Tok.Length);
676  }
677 
678  int64_t getImm() const {
679  assert(isImm());
680  return Imm.Val;
681  }
682 
683  ImmTy getImmTy() const {
684  assert(isImm());
685  return Imm.Type;
686  }
687 
688  unsigned getReg() const override {
689  assert(isRegKind());
690  return Reg.RegNo;
691  }
692 
693  SMLoc getStartLoc() const override {
694  return StartLoc;
695  }
696 
697  SMLoc getEndLoc() const override {
698  return EndLoc;
699  }
700 
701  SMRange getLocRange() const {
702  return SMRange(StartLoc, EndLoc);
703  }
704 
705  Modifiers getModifiers() const {
706  assert(isRegKind() || isImmTy(ImmTyNone));
707  return isRegKind() ? Reg.Mods : Imm.Mods;
708  }
709 
710  void setModifiers(Modifiers Mods) {
711  assert(isRegKind() || isImmTy(ImmTyNone));
712  if (isRegKind())
713  Reg.Mods = Mods;
714  else
715  Imm.Mods = Mods;
716  }
717 
718  bool hasModifiers() const {
719  return getModifiers().hasModifiers();
720  }
721 
722  bool hasFPModifiers() const {
723  return getModifiers().hasFPModifiers();
724  }
725 
726  bool hasIntModifiers() const {
727  return getModifiers().hasIntModifiers();
728  }
729 
730  uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
731 
732  void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
733 
734  void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
735 
736  template <unsigned Bitwidth>
737  void addKImmFPOperands(MCInst &Inst, unsigned N) const;
738 
739  void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
740  addKImmFPOperands<16>(Inst, N);
741  }
742 
743  void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
744  addKImmFPOperands<32>(Inst, N);
745  }
746 
747  void addRegOperands(MCInst &Inst, unsigned N) const;
748 
749  void addBoolRegOperands(MCInst &Inst, unsigned N) const {
750  addRegOperands(Inst, N);
751  }
752 
753  void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
754  if (isRegKind())
755  addRegOperands(Inst, N);
756  else if (isExpr())
757  Inst.addOperand(MCOperand::createExpr(Expr));
758  else
759  addImmOperands(Inst, N);
760  }
761 
762  void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
763  Modifiers Mods = getModifiers();
764  Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
765  if (isRegKind()) {
766  addRegOperands(Inst, N);
767  } else {
768  addImmOperands(Inst, N, false);
769  }
770  }
771 
772  void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
773  assert(!hasIntModifiers());
774  addRegOrImmWithInputModsOperands(Inst, N);
775  }
776 
777  void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
778  assert(!hasFPModifiers());
779  addRegOrImmWithInputModsOperands(Inst, N);
780  }
781 
782  void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
783  Modifiers Mods = getModifiers();
784  Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
785  assert(isRegKind());
786  addRegOperands(Inst, N);
787  }
788 
789  void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
790  assert(!hasIntModifiers());
791  addRegWithInputModsOperands(Inst, N);
792  }
793 
794  void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
795  assert(!hasFPModifiers());
796  addRegWithInputModsOperands(Inst, N);
797  }
798 
799  void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
800  if (isImm())
801  addImmOperands(Inst, N);
802  else {
803  assert(isExpr());
804  Inst.addOperand(MCOperand::createExpr(Expr));
805  }
806  }
807 
808  static void printImmTy(raw_ostream& OS, ImmTy Type) {
809  switch (Type) {
810  case ImmTyNone: OS << "None"; break;
811  case ImmTyGDS: OS << "GDS"; break;
812  case ImmTyLDS: OS << "LDS"; break;
813  case ImmTyOffen: OS << "Offen"; break;
814  case ImmTyIdxen: OS << "Idxen"; break;
815  case ImmTyAddr64: OS << "Addr64"; break;
816  case ImmTyOffset: OS << "Offset"; break;
817  case ImmTyInstOffset: OS << "InstOffset"; break;
818  case ImmTyOffset0: OS << "Offset0"; break;
819  case ImmTyOffset1: OS << "Offset1"; break;
820  case ImmTyDLC: OS << "DLC"; break;
821  case ImmTyGLC: OS << "GLC"; break;
822  case ImmTySLC: OS << "SLC"; break;
823  case ImmTyTFE: OS << "TFE"; break;
824  case ImmTyD16: OS << "D16"; break;
825  case ImmTyFORMAT: OS << "FORMAT"; break;
826  case ImmTyClampSI: OS << "ClampSI"; break;
827  case ImmTyOModSI: OS << "OModSI"; break;
828  case ImmTyDPP8: OS << "DPP8"; break;
829  case ImmTyDppCtrl: OS << "DppCtrl"; break;
830  case ImmTyDppRowMask: OS << "DppRowMask"; break;
831  case ImmTyDppBankMask: OS << "DppBankMask"; break;
832  case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
833  case ImmTyDppFi: OS << "FI"; break;
834  case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
835  case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
836  case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
837  case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
838  case ImmTyDMask: OS << "DMask"; break;
839  case ImmTyDim: OS << "Dim"; break;
840  case ImmTyUNorm: OS << "UNorm"; break;
841  case ImmTyDA: OS << "DA"; break;
842  case ImmTyR128A16: OS << "R128A16"; break;
843  case ImmTyLWE: OS << "LWE"; break;
844  case ImmTyOff: OS << "Off"; break;
845  case ImmTyExpTgt: OS << "ExpTgt"; break;
846  case ImmTyExpCompr: OS << "ExpCompr"; break;
847  case ImmTyExpVM: OS << "ExpVM"; break;
848  case ImmTyHwreg: OS << "Hwreg"; break;
849  case ImmTySendMsg: OS << "SendMsg"; break;
850  case ImmTyInterpSlot: OS << "InterpSlot"; break;
851  case ImmTyInterpAttr: OS << "InterpAttr"; break;
852  case ImmTyAttrChan: OS << "AttrChan"; break;
853  case ImmTyOpSel: OS << "OpSel"; break;
854  case ImmTyOpSelHi: OS << "OpSelHi"; break;
855  case ImmTyNegLo: OS << "NegLo"; break;
856  case ImmTyNegHi: OS << "NegHi"; break;
857  case ImmTySwizzle: OS << "Swizzle"; break;
858  case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
859  case ImmTyHigh: OS << "High"; break;
860  case ImmTyBLGP: OS << "BLGP"; break;
861  case ImmTyCBSZ: OS << "CBSZ"; break;
862  case ImmTyABID: OS << "ABID"; break;
863  case ImmTyEndpgm: OS << "Endpgm"; break;
864  }
865  }
866 
867  void print(raw_ostream &OS) const override {
868  switch (Kind) {
869  case Register:
870  OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
871  break;
872  case Immediate:
873  OS << '<' << getImm();
874  if (getImmTy() != ImmTyNone) {
875  OS << " type: "; printImmTy(OS, getImmTy());
876  }
877  OS << " mods: " << Imm.Mods << '>';
878  break;
879  case Token:
880  OS << '\'' << getToken() << '\'';
881  break;
882  case Expression:
883  OS << "<expr " << *Expr << '>';
884  break;
885  }
886  }
887 
888  static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
889  int64_t Val, SMLoc Loc,
890  ImmTy Type = ImmTyNone,
891  bool IsFPImm = false) {
892  auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
893  Op->Imm.Val = Val;
894  Op->Imm.IsFPImm = IsFPImm;
895  Op->Imm.Type = Type;
896  Op->Imm.Mods = Modifiers();
897  Op->StartLoc = Loc;
898  Op->EndLoc = Loc;
899  return Op;
900  }
901 
902  static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
903  StringRef Str, SMLoc Loc,
904  bool HasExplicitEncodingSize = true) {
905  auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
906  Res->Tok.Data = Str.data();
907  Res->Tok.Length = Str.size();
908  Res->StartLoc = Loc;
909  Res->EndLoc = Loc;
910  return Res;
911  }
912 
913  static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
914  unsigned RegNo, SMLoc S,
915  SMLoc E) {
916  auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
917  Op->Reg.RegNo = RegNo;
918  Op->Reg.Mods = Modifiers();
919  Op->StartLoc = S;
920  Op->EndLoc = E;
921  return Op;
922  }
923 
924  static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
925  const class MCExpr *Expr, SMLoc S) {
926  auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
927  Op->Expr = Expr;
928  Op->StartLoc = S;
929  Op->EndLoc = S;
930  return Op;
931  }
932 };
933 
934 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
935  OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
936  return OS;
937 }
938 
939 //===----------------------------------------------------------------------===//
940 // AsmParser
941 //===----------------------------------------------------------------------===//
942 
943 // Holds info related to the current kernel, e.g. count of SGPRs used.
944 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
945 // .amdgpu_hsa_kernel or at EOF.
946 class KernelScopeInfo {
947  int SgprIndexUnusedMin = -1;
948  int VgprIndexUnusedMin = -1;
949  MCContext *Ctx = nullptr;
950 
951  void usesSgprAt(int i) {
952  if (i >= SgprIndexUnusedMin) {
953  SgprIndexUnusedMin = ++i;
954  if (Ctx) {
955  MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
956  Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
957  }
958  }
959  }
960 
961  void usesVgprAt(int i) {
962  if (i >= VgprIndexUnusedMin) {
963  VgprIndexUnusedMin = ++i;
964  if (Ctx) {
965  MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
966  Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
967  }
968  }
969  }
970 
971 public:
972  KernelScopeInfo() = default;
973 
974  void initialize(MCContext &Context) {
975  Ctx = &Context;
976  usesSgprAt(SgprIndexUnusedMin = -1);
977  usesVgprAt(VgprIndexUnusedMin = -1);
978  }
979 
980  void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
981  switch (RegKind) {
982  case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
983  case IS_AGPR: // fall through
984  case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
985  default: break;
986  }
987  }
988 };
989 
990 class AMDGPUAsmParser : public MCTargetAsmParser {
991  MCAsmParser &Parser;
992 
993  // Number of extra operands parsed after the first optional operand.
994  // This may be necessary to skip hardcoded mandatory operands.
995  static const unsigned MAX_OPR_LOOKAHEAD = 8;
996 
997  unsigned ForcedEncodingSize = 0;
998  bool ForcedDPP = false;
999  bool ForcedSDWA = false;
1000  KernelScopeInfo KernelScope;
1001 
1002  /// @name Auto-generated Match Functions
1003  /// {
1004 
1005 #define GET_ASSEMBLER_HEADER
1006 #include "AMDGPUGenAsmMatcher.inc"
1007 
1008  /// }
1009 
1010 private:
1011  bool ParseAsAbsoluteExpression(uint32_t &Ret);
1012  bool OutOfRangeError(SMRange Range);
1013  /// Calculate VGPR/SGPR blocks required for given target, reserved
1014  /// registers, and user-specified NextFreeXGPR values.
1015  ///
1016  /// \param Features [in] Target features, used for bug corrections.
1017  /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1018  /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1019  /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1020  /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1021  /// descriptor field, if valid.
1022  /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1023  /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1024  /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1025  /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1026  /// \param VGPRBlocks [out] Result VGPR block count.
1027  /// \param SGPRBlocks [out] Result SGPR block count.
1028  bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1029  bool FlatScrUsed, bool XNACKUsed,
1030  Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1031  SMRange VGPRRange, unsigned NextFreeSGPR,
1032  SMRange SGPRRange, unsigned &VGPRBlocks,
1033  unsigned &SGPRBlocks);
1034  bool ParseDirectiveAMDGCNTarget();
1035  bool ParseDirectiveAMDHSAKernel();
1036  bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1037  bool ParseDirectiveHSACodeObjectVersion();
1038  bool ParseDirectiveHSACodeObjectISA();
1039  bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1040  bool ParseDirectiveAMDKernelCodeT();
1041  bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1042  bool ParseDirectiveAMDGPUHsaKernel();
1043 
1044  bool ParseDirectiveISAVersion();
1045  bool ParseDirectiveHSAMetadata();
1046  bool ParseDirectivePALMetadataBegin();
1047  bool ParseDirectivePALMetadata();
1048  bool ParseDirectiveAMDGPULDS();
1049 
1050  /// Common code to parse out a block of text (typically YAML) between start and
1051  /// end directives.
1052  bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1053  const char *AssemblerDirectiveEnd,
1054  std::string &CollectString);
1055 
1056  bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1057  RegisterKind RegKind, unsigned Reg1,
1058  unsigned RegNum);
1059  bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
1060  unsigned& RegNum, unsigned& RegWidth,
1061  unsigned *DwordRegIndex);
1062  bool isRegister();
1063  bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1064  Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1065  void initializeGprCountSymbol(RegisterKind RegKind);
1066  bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1067  unsigned RegWidth);
1068  void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1069  bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1070  void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1071  bool IsGdsHardcoded);
1072 
1073 public:
1074  enum AMDGPUMatchResultTy {
1075  Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1076  };
1077  enum OperandMode {
1078  OperandMode_Default,
1079  OperandMode_NSA,
1080  };
1081 
1082  using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1083 
1084  AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1085  const MCInstrInfo &MII,
1086  const MCTargetOptions &Options)
1087  : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1088  MCAsmParserExtension::Initialize(Parser);
1089 
1090  if (getFeatureBits().none()) {
1091  // Set default features.
1092  copySTI().ToggleFeature("southern-islands");
1093  }
1094 
1095  setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1096 
1097  {
1098  // TODO: make those pre-defined variables read-only.
1099  // Currently there is none suitable machinery in the core llvm-mc for this.
1100  // MCSymbol::isRedefinable is intended for another purpose, and
1101  // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1102  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1103  MCContext &Ctx = getContext();
1104  if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1105  MCSymbol *Sym =
1106  Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1107  Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1108  Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1109  Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1110  Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1111  Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1112  } else {
1113  MCSymbol *Sym =
1114  Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1115  Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1116  Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1117  Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1118  Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1119  Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1120  }
1121  if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1122  initializeGprCountSymbol(IS_VGPR);
1123  initializeGprCountSymbol(IS_SGPR);
1124  } else
1125  KernelScope.initialize(getContext());
1126  }
1127  }
1128 
1129  bool hasXNACK() const {
1130  return AMDGPU::hasXNACK(getSTI());
1131  }
1132 
1133  bool hasMIMG_R128() const {
1134  return AMDGPU::hasMIMG_R128(getSTI());
1135  }
1136 
1137  bool hasPackedD16() const {
1138  return AMDGPU::hasPackedD16(getSTI());
1139  }
1140 
1141  bool isSI() const {
1142  return AMDGPU::isSI(getSTI());
1143  }
1144 
1145  bool isCI() const {
1146  return AMDGPU::isCI(getSTI());
1147  }
1148 
1149  bool isVI() const {
1150  return AMDGPU::isVI(getSTI());
1151  }
1152 
1153  bool isGFX9() const {
1154  return AMDGPU::isGFX9(getSTI());
1155  }
1156 
1157  bool isGFX10() const {
1158  return AMDGPU::isGFX10(getSTI());
1159  }
1160 
1161  bool hasInv2PiInlineImm() const {
1162  return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1163  }
1164 
1165  bool hasFlatOffsets() const {
1166  return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1167  }
1168 
1169  bool hasSGPR102_SGPR103() const {
1170  return !isVI() && !isGFX9();
1171  }
1172 
1173  bool hasSGPR104_SGPR105() const {
1174  return isGFX10();
1175  }
1176 
1177  bool hasIntClamp() const {
1178  return getFeatureBits()[AMDGPU::FeatureIntClamp];
1179  }
1180 
1181  AMDGPUTargetStreamer &getTargetStreamer() {
1182  MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1183  return static_cast<AMDGPUTargetStreamer &>(TS);
1184  }
1185 
1186  const MCRegisterInfo *getMRI() const {
1187  // We need this const_cast because for some reason getContext() is not const
1188  // in MCAsmParser.
1189  return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1190  }
1191 
1192  const MCInstrInfo *getMII() const {
1193  return &MII;
1194  }
1195 
1196  const FeatureBitset &getFeatureBits() const {
1197  return getSTI().getFeatureBits();
1198  }
1199 
1200  void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1201  void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1202  void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1203 
1204  unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1205  bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1206  bool isForcedDPP() const { return ForcedDPP; }
1207  bool isForcedSDWA() const { return ForcedSDWA; }
1208  ArrayRef<unsigned> getMatchedVariants() const;
1209 
1210  std::unique_ptr<AMDGPUOperand> parseRegister();
1211  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1212  unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1213  unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1214  unsigned Kind) override;
1215  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1216  OperandVector &Operands, MCStreamer &Out,
1217  uint64_t &ErrorInfo,
1218  bool MatchingInlineAsm) override;
1219  bool ParseDirective(AsmToken DirectiveID) override;
1220  OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1221  OperandMode Mode = OperandMode_Default);
1222  StringRef parseMnemonicSuffix(StringRef Name);
1223  bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1224  SMLoc NameLoc, OperandVector &Operands) override;
1225  //bool ProcessInstruction(MCInst &Inst);
1226 
1227  OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1228 
1229  OperandMatchResultTy
1230  parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1231  AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1232  bool (*ConvertResult)(int64_t &) = nullptr);
1233 
1234  OperandMatchResultTy
1235  parseOperandArrayWithPrefix(const char *Prefix,
1236  OperandVector &Operands,
1237  AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1238  bool (*ConvertResult)(int64_t&) = nullptr);
1239 
1240  OperandMatchResultTy
1241  parseNamedBit(const char *Name, OperandVector &Operands,
1242  AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1243  OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1244  StringRef &Value);
1245 
1246  bool isModifier();
1247  bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1248  bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1249  bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1250  bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1251  bool parseSP3NegModifier();
1252  OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1253  OperandMatchResultTy parseReg(OperandVector &Operands);
1254  OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1255  OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1256  OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1257  OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1258  OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1259  OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1260  OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1261 
1262  void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1263  void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1264  void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1265  void cvtExp(MCInst &Inst, const OperandVector &Operands);
1266 
1267  bool parseCnt(int64_t &IntVal);
1268  OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1269  OperandMatchResultTy parseHwreg(OperandVector &Operands);
1270 
1271 private:
1272  struct OperandInfoTy {
1273  int64_t Id;
1274  bool IsSymbolic = false;
1275  bool IsDefined = false;
1276 
1277  OperandInfoTy(int64_t Id_) : Id(Id_) {}
1278  };
1279 
1280  bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1281  bool validateSendMsg(const OperandInfoTy &Msg,
1282  const OperandInfoTy &Op,
1283  const OperandInfoTy &Stream,
1284  const SMLoc Loc);
1285 
1286  bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1287  bool validateHwreg(const OperandInfoTy &HwReg,
1288  const int64_t Offset,
1289  const int64_t Width,
1290  const SMLoc Loc);
1291 
1292  void errorExpTgt();
1293  OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1294  SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1295 
1296  bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1297  bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1298  bool validateSOPLiteral(const MCInst &Inst) const;
1299  bool validateConstantBusLimitations(const MCInst &Inst);
1300  bool validateEarlyClobberLimitations(const MCInst &Inst);
1301  bool validateIntClampSupported(const MCInst &Inst);
1302  bool validateMIMGAtomicDMask(const MCInst &Inst);
1303  bool validateMIMGGatherDMask(const MCInst &Inst);
1304  bool validateMIMGDataSize(const MCInst &Inst);
1305  bool validateMIMGAddrSize(const MCInst &Inst);
1306  bool validateMIMGD16(const MCInst &Inst);
1307  bool validateMIMGDim(const MCInst &Inst);
1308  bool validateLdsDirect(const MCInst &Inst);
1309  bool validateOpSel(const MCInst &Inst);
1310  bool validateVccOperand(unsigned Reg) const;
1311  bool validateVOP3Literal(const MCInst &Inst) const;
1312  unsigned getConstantBusLimit(unsigned Opcode) const;
1313  bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1314  bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1315  unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1316 
1317  bool isId(const StringRef Id) const;
1318  bool isId(const AsmToken &Token, const StringRef Id) const;
1319  bool isToken(const AsmToken::TokenKind Kind) const;
1320  bool trySkipId(const StringRef Id);
1321  bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1322  bool trySkipToken(const AsmToken::TokenKind Kind);
1323  bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1324  bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1325  void peekTokens(MutableArrayRef<AsmToken> Tokens);
1326  AsmToken::TokenKind getTokenKind() const;
1327  bool parseExpr(int64_t &Imm);
1328  bool parseExpr(OperandVector &Operands);
1329  StringRef getTokenStr() const;
1330  AsmToken peekToken();
1331  AsmToken getToken() const;
1332  SMLoc getLoc() const;
1333  void lex();
1334 
1335 public:
1336  OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1337  OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1338 
1339  OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1340  OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1341  OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1342  OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1343  OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1344  OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1345 
1346  bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1347  const unsigned MinVal,
1348  const unsigned MaxVal,
1349  const StringRef ErrMsg);
1350  OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1351  bool parseSwizzleOffset(int64_t &Imm);
1352  bool parseSwizzleMacro(int64_t &Imm);
1353  bool parseSwizzleQuadPerm(int64_t &Imm);
1354  bool parseSwizzleBitmaskPerm(int64_t &Imm);
1355  bool parseSwizzleBroadcast(int64_t &Imm);
1356  bool parseSwizzleSwap(int64_t &Imm);
1357  bool parseSwizzleReverse(int64_t &Imm);
1358 
1359  OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1360  int64_t parseGPRIdxMacro();
1361 
1362  void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1363  void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1364  void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1365  void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1366  void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1367 
1368  AMDGPUOperand::Ptr defaultDLC() const;
1369  AMDGPUOperand::Ptr defaultGLC() const;
1370  AMDGPUOperand::Ptr defaultSLC() const;
1371 
1372  AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1373  AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1374  AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1375  AMDGPUOperand::Ptr defaultFlatOffset() const;
1376 
1377  OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1378 
1379  void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1380  OptionalImmIndexMap &OptionalIdx);
1381  void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1382  void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1383  void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1384 
1385  void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1386 
1387  void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1388  bool IsAtomic = false);
1389  void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1390 
1391  OperandMatchResultTy parseDim(OperandVector &Operands);
1392  OperandMatchResultTy parseDPP8(OperandVector &Operands);
1393  OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1394  AMDGPUOperand::Ptr defaultRowMask() const;
1395  AMDGPUOperand::Ptr defaultBankMask() const;
1396  AMDGPUOperand::Ptr defaultBoundCtrl() const;
1397  AMDGPUOperand::Ptr defaultFI() const;
1398  void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1399  void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1400 
1401  OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1402  AMDGPUOperand::ImmTy Type);
1403  OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1404  void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1405  void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1406  void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1407  void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1408  void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1409  uint64_t BasicInstType, bool skipVcc = false);
1410 
1411  AMDGPUOperand::Ptr defaultBLGP() const;
1412  AMDGPUOperand::Ptr defaultCBSZ() const;
1413  AMDGPUOperand::Ptr defaultABID() const;
1414 
1415  OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1416  AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1417 };
1418 
1419 struct OptionalOperand {
1420  const char *Name;
1421  AMDGPUOperand::ImmTy Type;
1422  bool IsBit;
1423  bool (*ConvertResult)(int64_t&);
1424 };
1425 
1426 } // end anonymous namespace
1427 
1428 // May be called with integer type with equivalent bitwidth.
1429 static const fltSemantics *getFltSemantics(unsigned Size) {
1430  switch (Size) {
1431  case 4:
1432  return &APFloat::IEEEsingle();
1433  case 8:
1434  return &APFloat::IEEEdouble();
1435  case 2:
1436  return &APFloat::IEEEhalf();
1437  default:
1438  llvm_unreachable("unsupported fp type");
1439  }
1440 }
1441 
1442 static const fltSemantics *getFltSemantics(MVT VT) {
1443  return getFltSemantics(VT.getSizeInBits() / 8);
1444 }
1445 
1446 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1447  switch (OperandType) {
1448  case AMDGPU::OPERAND_REG_IMM_INT32:
1449  case AMDGPU::OPERAND_REG_IMM_FP32:
1450  case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1451  case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1452  case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1453  case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1454  return &APFloat::IEEEsingle();
1455  case AMDGPU::OPERAND_REG_IMM_INT64:
1456  case AMDGPU::OPERAND_REG_IMM_FP64:
1457  case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1458  case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1459  return &APFloat::IEEEdouble();
1460  case AMDGPU::OPERAND_REG_IMM_INT16:
1461  case AMDGPU::OPERAND_REG_IMM_FP16:
1462  case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1463  case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1464  case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1465  case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1466  case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1467  case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1468  case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1469  case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1470  case AMDGPU::OPERAND_REG_IMM_V2INT16:
1471  case AMDGPU::OPERAND_REG_IMM_V2FP16:
1472  return &APFloat::IEEEhalf();
1473  default:
1474  llvm_unreachable("unsupported fp type");
1475  }
1476 }
1477 
1478 //===----------------------------------------------------------------------===//
1479 // Operand
1480 //===----------------------------------------------------------------------===//
1481 
1482 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1483  bool Lost;
1484 
1485  // Convert literal to single precision
1486  APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1487  APFloat::rmNearestTiesToEven,
1488  &Lost);
1489  // We allow precision lost but not overflow or underflow
1490  if (Status != APFloat::opOK &&
1491  Lost &&
1492  ((Status & APFloat::opOverflow) != 0 ||
1493  (Status & APFloat::opUnderflow) != 0)) {
1494  return false;
1495  }
1496 
1497  return true;
1498 }
1499 
1500 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1501  return isUIntN(Size, Val) || isIntN(Size, Val);
1502 }
1503 
1504 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1505 
1506  // This is a hack to enable named inline values like
1507  // shared_base with both 32-bit and 64-bit operands.
1508  // Note that these values are defined as
1509  // 32-bit operands only.
1510  if (isInlineValue()) {
1511  return true;
1512  }
1513 
1514  if (!isImmTy(ImmTyNone)) {
1515  // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1516  return false;
1517  }
1518  // TODO: We should avoid using host float here. It would be better to
1519  // check the float bit values which is what a few other places do.
1520  // We've had bot failures before due to weird NaN support on mips hosts.
1521 
1522  APInt Literal(64, Imm.Val);
1523 
1524  if (Imm.IsFPImm) { // We got fp literal token
1525  if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1526  return AMDGPU::isInlinableLiteral64(Imm.Val,
1527  AsmParser->hasInv2PiInlineImm());
1528  }
1529 
1530  APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1531  if (!canLosslesslyConvertToFPType(FPLiteral, type))
1532  return false;
1533 
1534  if (type.getScalarSizeInBits() == 16) {
1536  static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1537  AsmParser->hasInv2PiInlineImm());
1538  }
1539 
1540  // Check if single precision literal is inlinable
1542  static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1543  AsmParser->hasInv2PiInlineImm());
1544  }
1545 
1546  // We got int literal token.
1547  if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1548  return AMDGPU::isInlinableLiteral64(Imm.Val,
1549  AsmParser->hasInv2PiInlineImm());
1550  }
1551 
1552  if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1553  return false;
1554  }
1555 
1556  if (type.getScalarSizeInBits() == 16) {
1558  static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1559  AsmParser->hasInv2PiInlineImm());
1560  }
1561 
1563  static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1564  AsmParser->hasInv2PiInlineImm());
1565 }
1566 
1567 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1568  // Check that this immediate can be added as literal
1569  if (!isImmTy(ImmTyNone)) {
1570  return false;
1571  }
1572 
1573  if (!Imm.IsFPImm) {
1574  // We got int literal token.
1575 
1576  if (type == MVT::f64 && hasFPModifiers()) {
1577  // Cannot apply fp modifiers to int literals preserving the same semantics
1578  // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1579  // disable these cases.
1580  return false;
1581  }
1582 
1583  unsigned Size = type.getSizeInBits();
1584  if (Size == 64)
1585  Size = 32;
1586 
1587  // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1588  // types.
1589  return isSafeTruncation(Imm.Val, Size);
1590  }
1591 
1592  // We got fp literal token
1593  if (type == MVT::f64) { // Expected 64-bit fp operand
1594  // We would set low 64-bits of literal to zeroes but we accept this literals
1595  return true;
1596  }
1597 
1598  if (type == MVT::i64) { // Expected 64-bit int operand
1599  // We don't allow fp literals in 64-bit integer instructions. It is
1600  // unclear how we should encode them.
1601  return false;
1602  }
1603 
1604  // We allow fp literals with f16x2 operands assuming that the specified
1605  // literal goes into the lower half and the upper half is zero. We also
1606  // require that the literal may be losslesly converted to f16.
1607  MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1608  (type == MVT::v2i16)? MVT::i16 : type;
1609 
1610  APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1611  return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1612 }
1613 
1614 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1615  return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1616 }
1617 
1618 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1619  if (AsmParser->isVI())
1620  return isVReg32();
1621  else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1622  return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1623  else
1624  return false;
1625 }
1626 
1627 bool AMDGPUOperand::isSDWAFP16Operand() const {
1628  return isSDWAOperand(MVT::f16);
1629 }
1630 
1631 bool AMDGPUOperand::isSDWAFP32Operand() const {
1632  return isSDWAOperand(MVT::f32);
1633 }
1634 
1635 bool AMDGPUOperand::isSDWAInt16Operand() const {
1636  return isSDWAOperand(MVT::i16);
1637 }
1638 
1639 bool AMDGPUOperand::isSDWAInt32Operand() const {
1640  return isSDWAOperand(MVT::i32);
1641 }
1642 
1643 bool AMDGPUOperand::isBoolReg() const {
1644  return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1645  (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1646 }
1647 
1648 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1649 {
1650  assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1651  assert(Size == 2 || Size == 4 || Size == 8);
1652 
1653  const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1654 
1655  if (Imm.Mods.Abs) {
1656  Val &= ~FpSignMask;
1657  }
1658  if (Imm.Mods.Neg) {
1659  Val ^= FpSignMask;
1660  }
1661 
1662  return Val;
1663 }
1664 
1665 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1666  if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1667  Inst.getNumOperands())) {
1668  addLiteralImmOperand(Inst, Imm.Val,
1669  ApplyModifiers &
1670  isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1671  } else {
1672  assert(!isImmTy(ImmTyNone) || !hasModifiers());
1673  Inst.addOperand(MCOperand::createImm(Imm.Val));
1674  }
1675 }
1676 
1677 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1678  const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1679  auto OpNum = Inst.getNumOperands();
1680  // Check that this operand accepts literals
1681  assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1682 
1683  if (ApplyModifiers) {
1684  assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1685  const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1686  Val = applyInputFPModifiers(Val, Size);
1687  }
1688 
1689  APInt Literal(64, Val);
1690  uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1691 
1692  if (Imm.IsFPImm) { // We got fp literal token
1693  switch (OpTy) {
1698  if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1699  AsmParser->hasInv2PiInlineImm())) {
1700  Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1701  return;
1702  }
1703 
1704  // Non-inlineable
1705  if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1706  // For fp operands we check if low 32 bits are zeros
1707  if (Literal.getLoBits(32) != 0) {
1708  const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1709  "Can't encode literal as exact 64-bit floating-point operand. "
1710  "Low 32-bits will be set to zero");
1711  }
1712 
1713  Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1714  return;
1715  }
1716 
1717  // We don't allow fp literals in 64-bit integer instructions. It is
1718  // unclear how we should encode them. This case should be checked earlier
1719  // in predicate methods (isLiteralImm())
1720  llvm_unreachable("fp literal in 64-bit integer instruction.");
1721 
1740  bool lost;
1741  APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1742  // Convert literal to single precision
1743  FPLiteral.convert(*getOpFltSemantics(OpTy),
1745  // We allow precision lost but not overflow or underflow. This should be
1746  // checked earlier in isLiteralImm()
1747 
1748  uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1749  Inst.addOperand(MCOperand::createImm(ImmVal));
1750  return;
1751  }
1752  default:
1753  llvm_unreachable("invalid operand size");
1754  }
1755 
1756  return;
1757  }
1758 
1759  // We got int literal token.
1760  // Only sign extend inline immediates.
1761  switch (OpTy) {
1770  if (isSafeTruncation(Val, 32) &&
1771  AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1772  AsmParser->hasInv2PiInlineImm())) {
1773  Inst.addOperand(MCOperand::createImm(Val));
1774  return;
1775  }
1776 
1777  Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1778  return;
1779 
1784  if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1785  Inst.addOperand(MCOperand::createImm(Val));
1786  return;
1787  }
1788 
1790  return;
1791 
1798  if (isSafeTruncation(Val, 16) &&
1799  AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1800  AsmParser->hasInv2PiInlineImm())) {
1801  Inst.addOperand(MCOperand::createImm(Val));
1802  return;
1803  }
1804 
1805  Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1806  return;
1807 
1812  assert(isSafeTruncation(Val, 16));
1813  assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1814  AsmParser->hasInv2PiInlineImm()));
1815 
1816  Inst.addOperand(MCOperand::createImm(Val));
1817  return;
1818  }
1819  default:
1820  llvm_unreachable("invalid operand size");
1821  }
1822 }
1823 
1824 template <unsigned Bitwidth>
1825 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1826  APInt Literal(64, Imm.Val);
1827 
1828  if (!Imm.IsFPImm) {
1829  // We got int literal token.
1830  Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1831  return;
1832  }
1833 
1834  bool Lost;
1835  APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1836  FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1838  Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1839 }
1840 
1841 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1842  Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1843 }
1844 
1845 static bool isInlineValue(unsigned Reg) {
1846  switch (Reg) {
1847  case AMDGPU::SRC_SHARED_BASE:
1848  case AMDGPU::SRC_SHARED_LIMIT:
1849  case AMDGPU::SRC_PRIVATE_BASE:
1850  case AMDGPU::SRC_PRIVATE_LIMIT:
1851  case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1852  return true;
1853  case AMDGPU::SRC_VCCZ:
1854  case AMDGPU::SRC_EXECZ:
1855  case AMDGPU::SRC_SCC:
1856  return true;
1857  case AMDGPU::SGPR_NULL:
1858  return true;
1859  default:
1860  return false;
1861  }
1862 }
1863 
1864 bool AMDGPUOperand::isInlineValue() const {
1865  return isRegKind() && ::isInlineValue(getReg());
1866 }
1867 
1868 //===----------------------------------------------------------------------===//
1869 // AsmParser
1870 //===----------------------------------------------------------------------===//
1871 
1872 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1873  if (Is == IS_VGPR) {
1874  switch (RegWidth) {
1875  default: return -1;
1876  case 1: return AMDGPU::VGPR_32RegClassID;
1877  case 2: return AMDGPU::VReg_64RegClassID;
1878  case 3: return AMDGPU::VReg_96RegClassID;
1879  case 4: return AMDGPU::VReg_128RegClassID;
1880  case 5: return AMDGPU::VReg_160RegClassID;
1881  case 8: return AMDGPU::VReg_256RegClassID;
1882  case 16: return AMDGPU::VReg_512RegClassID;
1883  case 32: return AMDGPU::VReg_1024RegClassID;
1884  }
1885  } else if (Is == IS_TTMP) {
1886  switch (RegWidth) {
1887  default: return -1;
1888  case 1: return AMDGPU::TTMP_32RegClassID;
1889  case 2: return AMDGPU::TTMP_64RegClassID;
1890  case 4: return AMDGPU::TTMP_128RegClassID;
1891  case 8: return AMDGPU::TTMP_256RegClassID;
1892  case 16: return AMDGPU::TTMP_512RegClassID;
1893  }
1894  } else if (Is == IS_SGPR) {
1895  switch (RegWidth) {
1896  default: return -1;
1897  case 1: return AMDGPU::SGPR_32RegClassID;
1898  case 2: return AMDGPU::SGPR_64RegClassID;
1899  case 4: return AMDGPU::SGPR_128RegClassID;
1900  case 8: return AMDGPU::SGPR_256RegClassID;
1901  case 16: return AMDGPU::SGPR_512RegClassID;
1902  }
1903  } else if (Is == IS_AGPR) {
1904  switch (RegWidth) {
1905  default: return -1;
1906  case 1: return AMDGPU::AGPR_32RegClassID;
1907  case 2: return AMDGPU::AReg_64RegClassID;
1908  case 4: return AMDGPU::AReg_128RegClassID;
1909  case 16: return AMDGPU::AReg_512RegClassID;
1910  case 32: return AMDGPU::AReg_1024RegClassID;
1911  }
1912  }
1913  return -1;
1914 }
1915 
1916 static unsigned getSpecialRegForName(StringRef RegName) {
1917  return StringSwitch<unsigned>(RegName)
1918  .Case("exec", AMDGPU::EXEC)
1919  .Case("vcc", AMDGPU::VCC)
1920  .Case("flat_scratch", AMDGPU::FLAT_SCR)
1921  .Case("xnack_mask", AMDGPU::XNACK_MASK)
1922  .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
1923  .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
1924  .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1925  .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1926  .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
1927  .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
1928  .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1929  .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1930  .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1931  .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1932  .Case("lds_direct", AMDGPU::LDS_DIRECT)
1933  .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1934  .Case("m0", AMDGPU::M0)
1935  .Case("vccz", AMDGPU::SRC_VCCZ)
1936  .Case("src_vccz", AMDGPU::SRC_VCCZ)
1937  .Case("execz", AMDGPU::SRC_EXECZ)
1938  .Case("src_execz", AMDGPU::SRC_EXECZ)
1939  .Case("scc", AMDGPU::SRC_SCC)
1940  .Case("src_scc", AMDGPU::SRC_SCC)
1941  .Case("tba", AMDGPU::TBA)
1942  .Case("tma", AMDGPU::TMA)
1943  .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1944  .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1945  .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1946  .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1947  .Case("vcc_lo", AMDGPU::VCC_LO)
1948  .Case("vcc_hi", AMDGPU::VCC_HI)
1949  .Case("exec_lo", AMDGPU::EXEC_LO)
1950  .Case("exec_hi", AMDGPU::EXEC_HI)
1951  .Case("tma_lo", AMDGPU::TMA_LO)
1952  .Case("tma_hi", AMDGPU::TMA_HI)
1953  .Case("tba_lo", AMDGPU::TBA_LO)
1954  .Case("tba_hi", AMDGPU::TBA_HI)
1955  .Case("null", AMDGPU::SGPR_NULL)
1956  .Default(0);
1957 }
1958 
1959 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1960  SMLoc &EndLoc) {
1961  auto R = parseRegister();
1962  if (!R) return true;
1963  assert(R->isReg());
1964  RegNo = R->getReg();
1965  StartLoc = R->getStartLoc();
1966  EndLoc = R->getEndLoc();
1967  return false;
1968 }
1969 
1970 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1971  RegisterKind RegKind, unsigned Reg1,
1972  unsigned RegNum) {
1973  switch (RegKind) {
1974  case IS_SPECIAL:
1975  if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1976  Reg = AMDGPU::EXEC;
1977  RegWidth = 2;
1978  return true;
1979  }
1980  if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1981  Reg = AMDGPU::FLAT_SCR;
1982  RegWidth = 2;
1983  return true;
1984  }
1985  if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
1986  Reg = AMDGPU::XNACK_MASK;
1987  RegWidth = 2;
1988  return true;
1989  }
1990  if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
1991  Reg = AMDGPU::VCC;
1992  RegWidth = 2;
1993  return true;
1994  }
1995  if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
1996  Reg = AMDGPU::TBA;
1997  RegWidth = 2;
1998  return true;
1999  }
2000  if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2001  Reg = AMDGPU::TMA;
2002  RegWidth = 2;
2003  return true;
2004  }
2005  return false;
2006  case IS_VGPR:
2007  case IS_SGPR:
2008  case IS_AGPR:
2009  case IS_TTMP:
2010  if (Reg1 != Reg + RegWidth) {
2011  return false;
2012  }
2013  RegWidth++;
2014  return true;
2015  default:
2016  llvm_unreachable("unexpected register kind");
2017  }
2018 }
2019 
2020 static constexpr StringLiteral Registers[] = {"v", "s", "ttmp", "acc", "a"};
2021 
2022 bool
2023 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2024  const AsmToken &NextToken) const {
2025 
2026  // A list of consecutive registers: [s0,s1,s2,s3]
2027  if (Token.is(AsmToken::LBrac))
2028  return true;
2029 
2030  if (!Token.is(AsmToken::Identifier))
2031  return false;
2032 
2033  // A single register like s0 or a range of registers like s[0:1]
2034 
2035  StringRef RegName = Token.getString();
2036 
2037  for (StringRef Reg : Registers) {
2038  if (RegName.startswith(Reg)) {
2039  if (Reg.size() < RegName.size()) {
2040  unsigned RegNum;
2041  // A single register with an index: rXX
2042  if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum))
2043  return true;
2044  } else {
2045  // A range of registers: r[XX:YY].
2046  if (NextToken.is(AsmToken::LBrac))
2047  return true;
2048  }
2049  }
2050  }
2051 
2052  return getSpecialRegForName(RegName);
2053 }
2054 
2055 bool
2056 AMDGPUAsmParser::isRegister()
2057 {
2058  return isRegister(getToken(), peekToken());
2059 }
2060 
2061 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2062  unsigned &RegNum, unsigned &RegWidth,
2063  unsigned *DwordRegIndex) {
2064  if (DwordRegIndex) { *DwordRegIndex = 0; }
2065  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2066  if (getLexer().is(AsmToken::Identifier)) {
2067  StringRef RegName = Parser.getTok().getString();
2068  if ((Reg = getSpecialRegForName(RegName))) {
2069  Parser.Lex();
2070  RegKind = IS_SPECIAL;
2071  } else {
2072  unsigned RegNumIndex = 0;
2073  if (RegName[0] == 'v') {
2074  RegNumIndex = 1;
2075  RegKind = IS_VGPR;
2076  } else if (RegName[0] == 's') {
2077  RegNumIndex = 1;
2078  RegKind = IS_SGPR;
2079  } else if (RegName[0] == 'a') {
2080  RegNumIndex = RegName.startswith("acc") ? 3 : 1;
2081  RegKind = IS_AGPR;
2082  } else if (RegName.startswith("ttmp")) {
2083  RegNumIndex = strlen("ttmp");
2084  RegKind = IS_TTMP;
2085  } else {
2086  return false;
2087  }
2088  if (RegName.size() > RegNumIndex) {
2089  // Single 32-bit register: vXX.
2090  if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
2091  return false;
2092  Parser.Lex();
2093  RegWidth = 1;
2094  } else {
2095  // Range of registers: v[XX:YY]. ":YY" is optional.
2096  Parser.Lex();
2097  int64_t RegLo, RegHi;
2098  if (getLexer().isNot(AsmToken::LBrac))
2099  return false;
2100  Parser.Lex();
2101 
2102  if (getParser().parseAbsoluteExpression(RegLo))
2103  return false;
2104 
2105  const bool isRBrace = getLexer().is(AsmToken::RBrac);
2106  if (!isRBrace && getLexer().isNot(AsmToken::Colon))
2107  return false;
2108  Parser.Lex();
2109 
2110  if (isRBrace) {
2111  RegHi = RegLo;
2112  } else {
2113  if (getParser().parseAbsoluteExpression(RegHi))
2114  return false;
2115 
2116  if (getLexer().isNot(AsmToken::RBrac))
2117  return false;
2118  Parser.Lex();
2119  }
2120  RegNum = (unsigned) RegLo;
2121  RegWidth = (RegHi - RegLo) + 1;
2122  }
2123  }
2124  } else if (getLexer().is(AsmToken::LBrac)) {
2125  // List of consecutive registers: [s0,s1,s2,s3]
2126  Parser.Lex();
2127  if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
2128  return false;
2129  if (RegWidth != 1)
2130  return false;
2131  RegisterKind RegKind1;
2132  unsigned Reg1, RegNum1, RegWidth1;
2133  do {
2134  if (getLexer().is(AsmToken::Comma)) {
2135  Parser.Lex();
2136  } else if (getLexer().is(AsmToken::RBrac)) {
2137  Parser.Lex();
2138  break;
2139  } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
2140  if (RegWidth1 != 1) {
2141  return false;
2142  }
2143  if (RegKind1 != RegKind) {
2144  return false;
2145  }
2146  if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
2147  return false;
2148  }
2149  } else {
2150  return false;
2151  }
2152  } while (true);
2153  } else {
2154  return false;
2155  }
2156  switch (RegKind) {
2157  case IS_SPECIAL:
2158  RegNum = 0;
2159  RegWidth = 1;
2160  break;
2161  case IS_VGPR:
2162  case IS_SGPR:
2163  case IS_AGPR:
2164  case IS_TTMP:
2165  {
2166  unsigned Size = 1;
2167  if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2168  // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
2169  Size = std::min(RegWidth, 4u);
2170  }
2171  if (RegNum % Size != 0)
2172  return false;
2173  if (DwordRegIndex) { *DwordRegIndex = RegNum; }
2174  RegNum = RegNum / Size;
2175  int RCID = getRegClass(RegKind, RegWidth);
2176  if (RCID == -1)
2177  return false;
2178  const MCRegisterClass RC = TRI->getRegClass(RCID);
2179  if (RegNum >= RC.getNumRegs())
2180  return false;
2181  Reg = RC.getRegister(RegNum);
2182  break;
2183  }
2184 
2185  default:
2186  llvm_unreachable("unexpected register kind");
2187  }
2188 
2189  if (!subtargetHasRegister(*TRI, Reg))
2190  return false;
2191  return true;
2192 }
2193 
2195 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2196  switch (RegKind) {
2197  case IS_VGPR:
2198  return StringRef(".amdgcn.next_free_vgpr");
2199  case IS_SGPR:
2200  return StringRef(".amdgcn.next_free_sgpr");
2201  default:
2202  return None;
2203  }
2204 }
2205 
2206 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2207  auto SymbolName = getGprCountSymbolName(RegKind);
2208  assert(SymbolName && "initializing invalid register kind");
2209  MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2210  Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2211 }
2212 
2213 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2214  unsigned DwordRegIndex,
2215  unsigned RegWidth) {
2216  // Symbols are only defined for GCN targets
2217  if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2218  return true;
2219 
2220  auto SymbolName = getGprCountSymbolName(RegKind);
2221  if (!SymbolName)
2222  return true;
2223  MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2224 
2225  int64_t NewMax = DwordRegIndex + RegWidth - 1;
2226  int64_t OldCount;
2227 
2228  if (!Sym->isVariable())
2229  return !Error(getParser().getTok().getLoc(),
2230  ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2231  if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2232  return !Error(
2233  getParser().getTok().getLoc(),
2234  ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2235 
2236  if (OldCount <= NewMax)
2237  Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2238 
2239  return true;
2240 }
2241 
2242 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
2243  const auto &Tok = Parser.getTok();
2244  SMLoc StartLoc = Tok.getLoc();
2245  SMLoc EndLoc = Tok.getEndLoc();
2247  unsigned Reg, RegNum, RegWidth, DwordRegIndex;
2248 
2249  if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
2250  //FIXME: improve error messages (bug 41303).
2251  Error(StartLoc, "not a valid operand.");
2252  return nullptr;
2253  }
2254  if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2255  if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth))
2256  return nullptr;
2257  } else
2258  KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
2259  return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2260 }
2261 
2263 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2264  // TODO: add syntactic sugar for 1/(2*PI)
2265 
2266  assert(!isRegister());
2267  assert(!isModifier());
2268 
2269  const auto& Tok = getToken();
2270  const auto& NextTok = peekToken();
2271  bool IsReal = Tok.is(AsmToken::Real);
2272  SMLoc S = getLoc();
2273  bool Negate = false;
2274 
2275  if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2276  lex();
2277  IsReal = true;
2278  Negate = true;
2279  }
2280 
2281  if (IsReal) {
2282  // Floating-point expressions are not supported.
2283  // Can only allow floating-point literals with an
2284  // optional sign.
2285 
2286  StringRef Num = getTokenStr();
2287  lex();
2288 
2289  APFloat RealVal(APFloat::IEEEdouble());
2290  auto roundMode = APFloat::rmNearestTiesToEven;
2291  if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) {
2292  return MatchOperand_ParseFail;
2293  }
2294  if (Negate)
2295  RealVal.changeSign();
2296 
2297  Operands.push_back(
2298  AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2299  AMDGPUOperand::ImmTyNone, true));
2300 
2301  return MatchOperand_Success;
2302 
2303  } else {
2304  int64_t IntVal;
2305  const MCExpr *Expr;
2306  SMLoc S = getLoc();
2307 
2308  if (HasSP3AbsModifier) {
2309  // This is a workaround for handling expressions
2310  // as arguments of SP3 'abs' modifier, for example:
2311  // |1.0|
2312  // |-1|
2313  // |1+x|
2314  // This syntax is not compatible with syntax of standard
2315  // MC expressions (due to the trailing '|').
2316  SMLoc EndLoc;
2317  if (getParser().parsePrimaryExpr(Expr, EndLoc))
2318  return MatchOperand_ParseFail;
2319  } else {
2320  if (Parser.parseExpression(Expr))
2321  return MatchOperand_ParseFail;
2322  }
2323 
2324  if (Expr->evaluateAsAbsolute(IntVal)) {
2325  Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2326  } else {
2327  Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2328  }
2329 
2330  return MatchOperand_Success;
2331  }
2332 
2333  return MatchOperand_NoMatch;
2334 }
2335 
2337 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2338  if (!isRegister())
2339  return MatchOperand_NoMatch;
2340 
2341  if (auto R = parseRegister()) {
2342  assert(R->isReg());
2343  Operands.push_back(std::move(R));
2344  return MatchOperand_Success;
2345  }
2346  return MatchOperand_ParseFail;
2347 }
2348 
2350 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2351  auto res = parseReg(Operands);
2352  if (res != MatchOperand_NoMatch) {
2353  return res;
2354  } else if (isModifier()) {
2355  return MatchOperand_NoMatch;
2356  } else {
2357  return parseImm(Operands, HasSP3AbsMod);
2358  }
2359 }
2360 
2361 bool
2362 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2363  if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2364  const auto &str = Token.getString();
2365  return str == "abs" || str == "neg" || str == "sext";
2366  }
2367  return false;
2368 }
2369 
2370 bool
2371 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2372  return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2373 }
2374 
2375 bool
2376 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2377  return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2378 }
2379 
2380 bool
2381 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2382  return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2383 }
2384 
2385 // Check if this is an operand modifier or an opcode modifier
2386 // which may look like an expression but it is not. We should
2387 // avoid parsing these modifiers as expressions. Currently
2388 // recognized sequences are:
2389 // |...|
2390 // abs(...)
2391 // neg(...)
2392 // sext(...)
2393 // -reg
2394 // -|...|
2395 // -abs(...)
2396 // name:...
2397 // Note that simple opcode modifiers like 'gds' may be parsed as
2398 // expressions; this is a special case. See getExpressionAsToken.
2399 //
2400 bool
2401 AMDGPUAsmParser::isModifier() {
2402 
2403  AsmToken Tok = getToken();
2404  AsmToken NextToken[2];
2405  peekTokens(NextToken);
2406 
2407  return isOperandModifier(Tok, NextToken[0]) ||
2408  (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2409  isOpcodeModifierWithVal(Tok, NextToken[0]);
2410 }
2411 
2412 // Check if the current token is an SP3 'neg' modifier.
2413 // Currently this modifier is allowed in the following context:
2414 //
2415 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2416 // 2. Before an 'abs' modifier: -abs(...)
2417 // 3. Before an SP3 'abs' modifier: -|...|
2418 //
2419 // In all other cases "-" is handled as a part
2420 // of an expression that follows the sign.
2421 //
2422 // Note: When "-" is followed by an integer literal,
2423 // this is interpreted as integer negation rather
2424 // than a floating-point NEG modifier applied to N.
2425 // Beside being contr-intuitive, such use of floating-point
2426 // NEG modifier would have resulted in different meaning
2427 // of integer literals used with VOP1/2/C and VOP3,
2428 // for example:
2429 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2430 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2431 // Negative fp literals with preceding "-" are
2432 // handled likewise for unifomtity
2433 //
2434 bool
2435 AMDGPUAsmParser::parseSP3NegModifier() {
2436 
2437  AsmToken NextToken[2];
2438  peekTokens(NextToken);
2439 
2440  if (isToken(AsmToken::Minus) &&
2441  (isRegister(NextToken[0], NextToken[1]) ||
2442  NextToken[0].is(AsmToken::Pipe) ||
2443  isId(NextToken[0], "abs"))) {
2444  lex();
2445  return true;
2446  }
2447 
2448  return false;
2449 }
2450 
2452 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2453  bool AllowImm) {
2454  bool Neg, SP3Neg;
2455  bool Abs, SP3Abs;
2456  SMLoc Loc;
2457 
2458  // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2459  if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2460  Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2461  return MatchOperand_ParseFail;
2462  }
2463 
2464  SP3Neg = parseSP3NegModifier();
2465 
2466  Loc = getLoc();
2467  Neg = trySkipId("neg");
2468  if (Neg && SP3Neg) {
2469  Error(Loc, "expected register or immediate");
2470  return MatchOperand_ParseFail;
2471  }
2472  if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2473  return MatchOperand_ParseFail;
2474 
2475  Abs = trySkipId("abs");
2476  if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2477  return MatchOperand_ParseFail;
2478 
2479  Loc = getLoc();
2480  SP3Abs = trySkipToken(AsmToken::Pipe);
2481  if (Abs && SP3Abs) {
2482  Error(Loc, "expected register or immediate");
2483  return MatchOperand_ParseFail;
2484  }
2485 
2487  if (AllowImm) {
2488  Res = parseRegOrImm(Operands, SP3Abs);
2489  } else {
2490  Res = parseReg(Operands);
2491  }
2492  if (Res != MatchOperand_Success) {
2493  return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2494  }
2495 
2496  if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2497  return MatchOperand_ParseFail;
2498  if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2499  return MatchOperand_ParseFail;
2500  if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2501  return MatchOperand_ParseFail;
2502 
2503  AMDGPUOperand::Modifiers Mods;
2504  Mods.Abs = Abs || SP3Abs;
2505  Mods.Neg = Neg || SP3Neg;
2506 
2507  if (Mods.hasFPModifiers()) {
2508  AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2509  if (Op.isExpr()) {
2510  Error(Op.getStartLoc(), "expected an absolute expression");
2511  return MatchOperand_ParseFail;
2512  }
2513  Op.setModifiers(Mods);
2514  }
2515  return MatchOperand_Success;
2516 }
2517 
2519 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2520  bool AllowImm) {
2521  bool Sext = trySkipId("sext");
2522  if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2523  return MatchOperand_ParseFail;
2524 
2526  if (AllowImm) {
2527  Res = parseRegOrImm(Operands);
2528  } else {
2529  Res = parseReg(Operands);
2530  }
2531  if (Res != MatchOperand_Success) {
2532  return Sext? MatchOperand_ParseFail : Res;
2533  }
2534 
2535  if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2536  return MatchOperand_ParseFail;
2537 
2538  AMDGPUOperand::Modifiers Mods;
2539  Mods.Sext = Sext;
2540 
2541  if (Mods.hasIntModifiers()) {
2542  AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2543  if (Op.isExpr()) {
2544  Error(Op.getStartLoc(), "expected an absolute expression");
2545  return MatchOperand_ParseFail;
2546  }
2547  Op.setModifiers(Mods);
2548  }
2549 
2550  return MatchOperand_Success;
2551 }
2552 
2554 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2555  return parseRegOrImmWithFPInputMods(Operands, false);
2556 }
2557 
2559 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2560  return parseRegOrImmWithIntInputMods(Operands, false);
2561 }
2562 
2563 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2564  auto Loc = getLoc();
2565  if (trySkipId("off")) {
2566  Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2567  AMDGPUOperand::ImmTyOff, false));
2568  return MatchOperand_Success;
2569  }
2570 
2571  if (!isRegister())
2572  return MatchOperand_NoMatch;
2573 
2574  std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2575  if (Reg) {
2576  Operands.push_back(std::move(Reg));
2577  return MatchOperand_Success;
2578  }
2579 
2580  return MatchOperand_ParseFail;
2581 
2582 }
2583 
2584 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2585  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2586 
2587  if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2588  (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2589  (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2590  (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2591  return Match_InvalidOperand;
2592 
2593  if ((TSFlags & SIInstrFlags::VOP3) &&
2594  (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2595  getForcedEncodingSize() != 64)
2596  return Match_PreferE32;
2597 
2598  if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2599  Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2600  // v_mac_f32/16 allow only dst_sel == DWORD;
2601  auto OpNum =
2602  AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2603  const auto &Op = Inst.getOperand(OpNum);
2604  if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2605  return Match_InvalidOperand;
2606  }
2607  }
2608 
2609  return Match_Success;
2610 }
2611 
2612 // What asm variants we should check
2613 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2614  if (getForcedEncodingSize() == 32) {
2615  static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2616  return makeArrayRef(Variants);
2617  }
2618 
2619  if (isForcedVOP3()) {
2620  static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2621  return makeArrayRef(Variants);
2622  }
2623 
2624  if (isForcedSDWA()) {
2625  static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2627  return makeArrayRef(Variants);
2628  }
2629 
2630  if (isForcedDPP()) {
2631  static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2632  return makeArrayRef(Variants);
2633  }
2634 
2635  static const unsigned Variants[] = {
2638  };
2639 
2640  return makeArrayRef(Variants);
2641 }
2642 
2643 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2644  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2645  const unsigned Num = Desc.getNumImplicitUses();
2646  for (unsigned i = 0; i < Num; ++i) {
2647  unsigned Reg = Desc.ImplicitUses[i];
2648  switch (Reg) {
2649  case AMDGPU::FLAT_SCR:
2650  case AMDGPU::VCC:
2651  case AMDGPU::VCC_LO:
2652  case AMDGPU::VCC_HI:
2653  case AMDGPU::M0:
2654  return Reg;
2655  default:
2656  break;
2657  }
2658  }
2659  return AMDGPU::NoRegister;
2660 }
2661 
2662 // NB: This code is correct only when used to check constant
2663 // bus limitations because GFX7 support no f16 inline constants.
2664 // Note that there are no cases when a GFX7 opcode violates
2665 // constant bus limitations due to the use of an f16 constant.
2666 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2667  unsigned OpIdx) const {
2668  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2669 
2670  if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2671  return false;
2672  }
2673 
2674  const MCOperand &MO = Inst.getOperand(OpIdx);
2675 
2676  int64_t Val = MO.getImm();
2677  auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2678 
2679  switch (OpSize) { // expected operand size
2680  case 8:
2681  return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2682  case 4:
2683  return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2684  case 2: {
2685  const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2686  if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2687  OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2688  OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
2689  OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
2690  OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 ||
2691  OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) {
2692  return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2693  } else {
2694  return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2695  }
2696  }
2697  default:
2698  llvm_unreachable("invalid operand size");
2699  }
2700 }
2701 
2702 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
2703  if (!isGFX10())
2704  return 1;
2705 
2706  switch (Opcode) {
2707  // 64-bit shift instructions can use only one scalar value input
2708  case AMDGPU::V_LSHLREV_B64:
2709  case AMDGPU::V_LSHLREV_B64_gfx10:
2710  case AMDGPU::V_LSHL_B64:
2711  case AMDGPU::V_LSHRREV_B64:
2712  case AMDGPU::V_LSHRREV_B64_gfx10:
2713  case AMDGPU::V_LSHR_B64:
2714  case AMDGPU::V_ASHRREV_I64:
2715  case AMDGPU::V_ASHRREV_I64_gfx10:
2716  case AMDGPU::V_ASHR_I64:
2717  return 1;
2718  default:
2719  return 2;
2720  }
2721 }
2722 
2723 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2724  const MCOperand &MO = Inst.getOperand(OpIdx);
2725  if (MO.isImm()) {
2726  return !isInlineConstant(Inst, OpIdx);
2727  } else if (MO.isReg()) {
2728  auto Reg = MO.getReg();
2729  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2730  return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL;
2731  } else {
2732  return true;
2733  }
2734 }
2735 
2736 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2737  const unsigned Opcode = Inst.getOpcode();
2738  const MCInstrDesc &Desc = MII.get(Opcode);
2739  unsigned ConstantBusUseCount = 0;
2740  unsigned NumLiterals = 0;
2741  unsigned LiteralSize;
2742 
2743  if (Desc.TSFlags &
2747  SIInstrFlags::SDWA)) {
2748  // Check special imm operands (used by madmk, etc)
2749  if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2750  ++ConstantBusUseCount;
2751  }
2752 
2753  SmallDenseSet<unsigned> SGPRsUsed;
2754  unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2755  if (SGPRUsed != AMDGPU::NoRegister) {
2756  SGPRsUsed.insert(SGPRUsed);
2757  ++ConstantBusUseCount;
2758  }
2759 
2760  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2761  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2762  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2763 
2764  const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2765 
2766  for (int OpIdx : OpIndices) {
2767  if (OpIdx == -1) break;
2768 
2769  const MCOperand &MO = Inst.getOperand(OpIdx);
2770  if (usesConstantBus(Inst, OpIdx)) {
2771  if (MO.isReg()) {
2772  const unsigned Reg = mc2PseudoReg(MO.getReg());
2773  // Pairs of registers with a partial intersections like these
2774  // s0, s[0:1]
2775  // flat_scratch_lo, flat_scratch
2776  // flat_scratch_lo, flat_scratch_hi
2777  // are theoretically valid but they are disabled anyway.
2778  // Note that this code mimics SIInstrInfo::verifyInstruction
2779  if (!SGPRsUsed.count(Reg)) {
2780  SGPRsUsed.insert(Reg);
2781  ++ConstantBusUseCount;
2782  }
2783  } else { // Expression or a literal
2784 
2785  if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
2786  continue; // special operand like VINTERP attr_chan
2787 
2788  // An instruction may use only one literal.
2789  // This has been validated on the previous step.
2790  // See validateVOP3Literal.
2791  // This literal may be used as more than one operand.
2792  // If all these operands are of the same size,
2793  // this literal counts as one scalar value.
2794  // Otherwise it counts as 2 scalar values.
2795  // See "GFX10 Shader Programming", section 3.6.2.3.
2796 
2797  unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
2798  if (Size < 4) Size = 4;
2799 
2800  if (NumLiterals == 0) {
2801  NumLiterals = 1;
2802  LiteralSize = Size;
2803  } else if (LiteralSize != Size) {
2804  NumLiterals = 2;
2805  }
2806  }
2807  }
2808  }
2809  }
2810  ConstantBusUseCount += NumLiterals;
2811 
2812  return ConstantBusUseCount <= getConstantBusLimit(Opcode);
2813 }
2814 
2815 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2816  const unsigned Opcode = Inst.getOpcode();
2817  const MCInstrDesc &Desc = MII.get(Opcode);
2818 
2819  const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2820  if (DstIdx == -1 ||
2821  Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2822  return true;
2823  }
2824 
2825  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2826 
2827  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2828  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2829  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2830 
2831  assert(DstIdx != -1);
2832  const MCOperand &Dst = Inst.getOperand(DstIdx);
2833  assert(Dst.isReg());
2834  const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2835 
2836  const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2837 
2838  for (int SrcIdx : SrcIndices) {
2839  if (SrcIdx == -1) break;
2840  const MCOperand &Src = Inst.getOperand(SrcIdx);
2841  if (Src.isReg()) {
2842  const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2843  if (isRegIntersect(DstReg, SrcReg, TRI)) {
2844  return false;
2845  }
2846  }
2847  }
2848 
2849  return true;
2850 }
2851 
2852 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2853 
2854  const unsigned Opc = Inst.getOpcode();
2855  const MCInstrDesc &Desc = MII.get(Opc);
2856 
2857  if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2858  int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2859  assert(ClampIdx != -1);
2860  return Inst.getOperand(ClampIdx).getImm() == 0;
2861  }
2862 
2863  return true;
2864 }
2865 
2866 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2867 
2868  const unsigned Opc = Inst.getOpcode();
2869  const MCInstrDesc &Desc = MII.get(Opc);
2870 
2871  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2872  return true;
2873 
2874  int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
2875  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2876  int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
2877 
2878  assert(VDataIdx != -1);
2879  assert(DMaskIdx != -1);
2880  assert(TFEIdx != -1);
2881 
2882  unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
2883  unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
2884  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2885  if (DMask == 0)
2886  DMask = 1;
2887 
2888  unsigned DataSize =
2889  (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
2890  if (hasPackedD16()) {
2891  int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2892  if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
2893  DataSize = (DataSize + 1) / 2;
2894  }
2895 
2896  return (VDataSize / 4) == DataSize + TFESize;
2897 }
2898 
2899 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
2900  const unsigned Opc = Inst.getOpcode();
2901  const MCInstrDesc &Desc = MII.get(Opc);
2902 
2903  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
2904  return true;
2905 
2906  const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
2907  const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2908  AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
2909  int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
2910  int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
2911  int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2912 
2913  assert(VAddr0Idx != -1);
2914  assert(SrsrcIdx != -1);
2915  assert(DimIdx != -1);
2916  assert(SrsrcIdx > VAddr0Idx);
2917 
2918  unsigned Dim = Inst.getOperand(DimIdx).getImm();
2919  const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
2920  bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
2921  unsigned VAddrSize =
2922  IsNSA ? SrsrcIdx - VAddr0Idx
2923  : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
2924 
2925  unsigned AddrSize = BaseOpcode->NumExtraArgs +
2926  (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
2927  (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
2928  (BaseOpcode->LodOrClampOrMip ? 1 : 0);
2929  if (!IsNSA) {
2930  if (AddrSize > 8)
2931  AddrSize = 16;
2932  else if (AddrSize > 4)
2933  AddrSize = 8;
2934  }
2935 
2936  return VAddrSize == AddrSize;
2937 }
2938 
2939 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
2940 
2941  const unsigned Opc = Inst.getOpcode();
2942  const MCInstrDesc &Desc = MII.get(Opc);
2943 
2944  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2945  return true;
2946  if (!Desc.mayLoad() || !Desc.mayStore())
2947  return true; // Not atomic
2948 
2949  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2950  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2951 
2952  // This is an incomplete check because image_atomic_cmpswap
2953  // may only use 0x3 and 0xf while other atomic operations
2954  // may use 0x1 and 0x3. However these limitations are
2955  // verified when we check that dmask matches dst size.
2956  return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
2957 }
2958 
2959 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
2960 
2961  const unsigned Opc = Inst.getOpcode();
2962  const MCInstrDesc &Desc = MII.get(Opc);
2963 
2964  if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
2965  return true;
2966 
2967  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2968  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2969 
2970  // GATHER4 instructions use dmask in a different fashion compared to
2971  // other MIMG instructions. The only useful DMASK values are
2972  // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
2973  // (red,red,red,red) etc.) The ISA document doesn't mention
2974  // this.
2975  return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
2976 }
2977 
2978 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
2979 
2980  const unsigned Opc = Inst.getOpcode();
2981  const MCInstrDesc &Desc = MII.get(Opc);
2982 
2983  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2984  return true;
2985 
2986  int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2987  if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
2988  if (isCI() || isSI())
2989  return false;
2990  }
2991 
2992  return true;
2993 }
2994 
2995 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
2996  const unsigned Opc = Inst.getOpcode();
2997  const MCInstrDesc &Desc = MII.get(Opc);
2998 
2999  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3000  return true;
3001 
3002  int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3003  if (DimIdx < 0)
3004  return true;
3005 
3006  long Imm = Inst.getOperand(DimIdx).getImm();
3007  if (Imm < 0 || Imm >= 8)
3008  return false;
3009 
3010  return true;
3011 }
3012 
3013 static bool IsRevOpcode(const unsigned Opcode)
3014 {
3015  switch (Opcode) {
3016  case AMDGPU::V_SUBREV_F32_e32:
3017  case AMDGPU::V_SUBREV_F32_e64:
3018  case AMDGPU::V_SUBREV_F32_e32_gfx10:
3019  case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3020  case AMDGPU::V_SUBREV_F32_e32_vi:
3021  case AMDGPU::V_SUBREV_F32_e64_gfx10:
3022  case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3023  case AMDGPU::V_SUBREV_F32_e64_vi:
3024 
3025  case AMDGPU::V_SUBREV_I32_e32:
3026  case AMDGPU::V_SUBREV_I32_e64:
3027  case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3028  case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3029 
3030  case AMDGPU::V_SUBBREV_U32_e32:
3031  case AMDGPU::V_SUBBREV_U32_e64:
3032  case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3033  case AMDGPU::V_SUBBREV_U32_e32_vi:
3034  case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3035  case AMDGPU::V_SUBBREV_U32_e64_vi:
3036 
3037  case AMDGPU::V_SUBREV_U32_e32:
3038  case AMDGPU::V_SUBREV_U32_e64:
3039  case AMDGPU::V_SUBREV_U32_e32_gfx9:
3040  case AMDGPU::V_SUBREV_U32_e32_vi:
3041  case AMDGPU::V_SUBREV_U32_e64_gfx9:
3042  case AMDGPU::V_SUBREV_U32_e64_vi:
3043 
3044  case AMDGPU::V_SUBREV_F16_e32:
3045  case AMDGPU::V_SUBREV_F16_e64:
3046  case AMDGPU::V_SUBREV_F16_e32_gfx10:
3047  case AMDGPU::V_SUBREV_F16_e32_vi:
3048  case AMDGPU::V_SUBREV_F16_e64_gfx10:
3049  case AMDGPU::V_SUBREV_F16_e64_vi:
3050 
3051  case AMDGPU::V_SUBREV_U16_e32:
3052  case AMDGPU::V_SUBREV_U16_e64:
3053  case AMDGPU::V_SUBREV_U16_e32_vi:
3054  case AMDGPU::V_SUBREV_U16_e64_vi:
3055 
3056  case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3057  case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3058  case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3059 
3060  case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3061  case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3062 
3063  case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3064  case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3065 
3066  case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3067  case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3068 
3069  case AMDGPU::V_LSHRREV_B32_e32:
3070  case AMDGPU::V_LSHRREV_B32_e64:
3071  case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3072  case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3073  case AMDGPU::V_LSHRREV_B32_e32_vi:
3074  case AMDGPU::V_LSHRREV_B32_e64_vi:
3075  case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3076  case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3077 
3078  case AMDGPU::V_ASHRREV_I32_e32:
3079  case AMDGPU::V_ASHRREV_I32_e64:
3080  case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3081  case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3082  case AMDGPU::V_ASHRREV_I32_e32_vi:
3083  case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3084  case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3085  case AMDGPU::V_ASHRREV_I32_e64_vi:
3086 
3087  case AMDGPU::V_LSHLREV_B32_e32:
3088  case AMDGPU::V_LSHLREV_B32_e64:
3089  case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3090  case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3091  case AMDGPU::V_LSHLREV_B32_e32_vi:
3092  case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3093  case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3094  case AMDGPU::V_LSHLREV_B32_e64_vi:
3095 
3096  case AMDGPU::V_LSHLREV_B16_e32:
3097  case AMDGPU::V_LSHLREV_B16_e64:
3098  case AMDGPU::V_LSHLREV_B16_e32_vi:
3099  case AMDGPU::V_LSHLREV_B16_e64_vi:
3100  case AMDGPU::V_LSHLREV_B16_gfx10:
3101 
3102  case AMDGPU::V_LSHRREV_B16_e32:
3103  case AMDGPU::V_LSHRREV_B16_e64:
3104  case AMDGPU::V_LSHRREV_B16_e32_vi:
3105  case AMDGPU::V_LSHRREV_B16_e64_vi:
3106  case AMDGPU::V_LSHRREV_B16_gfx10:
3107 
3108  case AMDGPU::V_ASHRREV_I16_e32:
3109  case AMDGPU::V_ASHRREV_I16_e64:
3110  case AMDGPU::V_ASHRREV_I16_e32_vi:
3111  case AMDGPU::V_ASHRREV_I16_e64_vi:
3112  case AMDGPU::V_ASHRREV_I16_gfx10:
3113 
3114  case AMDGPU::V_LSHLREV_B64:
3115  case AMDGPU::V_LSHLREV_B64_gfx10:
3116  case AMDGPU::V_LSHLREV_B64_vi:
3117 
3118  case AMDGPU::V_LSHRREV_B64:
3119  case AMDGPU::V_LSHRREV_B64_gfx10:
3120  case AMDGPU::V_LSHRREV_B64_vi:
3121 
3122  case AMDGPU::V_ASHRREV_I64:
3123  case AMDGPU::V_ASHRREV_I64_gfx10:
3124  case AMDGPU::V_ASHRREV_I64_vi:
3125 
3126  case AMDGPU::V_PK_LSHLREV_B16:
3127  case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3128  case AMDGPU::V_PK_LSHLREV_B16_vi:
3129 
3130  case AMDGPU::V_PK_LSHRREV_B16:
3131  case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3132  case AMDGPU::V_PK_LSHRREV_B16_vi:
3133  case AMDGPU::V_PK_ASHRREV_I16:
3134  case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3135  case AMDGPU::V_PK_ASHRREV_I16_vi:
3136  return true;
3137  default:
3138  return false;
3139  }
3140 }
3141 
3142 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3143 
3144  using namespace SIInstrFlags;
3145  const unsigned Opcode = Inst.getOpcode();
3146  const MCInstrDesc &Desc = MII.get(Opcode);
3147 
3148  // lds_direct register is defined so that it can be used
3149  // with 9-bit operands only. Ignore encodings which do not accept these.
3150  if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3151  return true;
3152 
3153  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3154  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3155  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3156 
3157  const int SrcIndices[] = { Src1Idx, Src2Idx };
3158 
3159  // lds_direct cannot be specified as either src1 or src2.
3160  for (int SrcIdx : SrcIndices) {
3161  if (SrcIdx == -1) break;
3162  const MCOperand &Src = Inst.getOperand(SrcIdx);
3163  if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3164  return false;
3165  }
3166  }
3167 
3168  if (Src0Idx == -1)
3169  return true;
3170 
3171  const MCOperand &Src = Inst.getOperand(Src0Idx);
3172  if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3173  return true;
3174 
3175  // lds_direct is specified as src0. Check additional limitations.
3176  return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3177 }
3178 
3179 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3180  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3181  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3182  if (Op.isFlatOffset())
3183  return Op.getStartLoc();
3184  }
3185  return getLoc();
3186 }
3187 
3188 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3189  const OperandVector &Operands) {
3190  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3191  if ((TSFlags & SIInstrFlags::FLAT) == 0)
3192  return true;
3193 
3194  auto Opcode = Inst.getOpcode();
3195  auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3196  assert(OpNum != -1);
3197 
3198  const auto &Op = Inst.getOperand(OpNum);
3199  if (!hasFlatOffsets() && Op.getImm() != 0) {
3200  Error(getFlatOffsetLoc(Operands),
3201  "flat offset modifier is not supported on this GPU");
3202  return false;
3203  }
3204 
3205  // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
3206  // For FLAT segment the offset must be positive;
3207  // MSB is ignored and forced to zero.
3208  unsigned OffsetSize = isGFX9() ? 13 : 12;
3209  if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
3210  if (!isIntN(OffsetSize, Op.getImm())) {
3211  Error(getFlatOffsetLoc(Operands),
3212  isGFX9() ? "expected a 13-bit signed offset" :
3213  "expected a 12-bit signed offset");
3214  return false;
3215  }
3216  } else {
3217  if (!isUIntN(OffsetSize - 1, Op.getImm())) {
3218  Error(getFlatOffsetLoc(Operands),
3219  isGFX9() ? "expected a 12-bit unsigned offset" :
3220  "expected an 11-bit unsigned offset");
3221  return false;
3222  }
3223  }
3224 
3225  return true;
3226 }
3227 
3228 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3229  unsigned Opcode = Inst.getOpcode();
3230  const MCInstrDesc &Desc = MII.get(Opcode);
3231  if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3232  return true;
3233 
3234  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3235  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3236 
3237  const int OpIndices[] = { Src0Idx, Src1Idx };
3238 
3239  unsigned NumLiterals = 0;
3240  uint32_t LiteralValue;
3241 
3242  for (int OpIdx : OpIndices) {
3243  if (OpIdx == -1) break;
3244 
3245  const MCOperand &MO = Inst.getOperand(OpIdx);
3246  if (MO.isImm() &&
3247  // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3248  AMDGPU::isSISrcOperand(Desc, OpIdx) &&
3249  !isInlineConstant(Inst, OpIdx)) {
3250  uint32_t Value = static_cast<uint32_t>(MO.getImm());
3251  if (NumLiterals == 0 || LiteralValue != Value) {
3252  LiteralValue = Value;
3253  ++NumLiterals;
3254  }
3255  }
3256  }
3257 
3258  return NumLiterals <= 1;
3259 }
3260 
3261 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3262  const unsigned Opc = Inst.getOpcode();
3263  if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3264  Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3265  int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3266  unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3267 
3268  if (OpSel & ~3)
3269  return false;
3270  }
3271  return true;
3272 }
3273 
3274 // Check if VCC register matches wavefront size
3275 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3276  auto FB = getFeatureBits();
3277  return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3278  (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3279 }
3280 
3281 // VOP3 literal is only allowed in GFX10+ and only one can be used
3282 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
3283  unsigned Opcode = Inst.getOpcode();
3284  const MCInstrDesc &Desc = MII.get(Opcode);
3286  return true;
3287 
3288  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3289  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3290  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3291 
3292  const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3293 
3294  unsigned NumLiterals = 0;
3295  uint32_t LiteralValue;
3296 
3297  for (int OpIdx : OpIndices) {
3298  if (OpIdx == -1) break;
3299 
3300  const MCOperand &MO = Inst.getOperand(OpIdx);
3301  if (!MO.isImm() || !AMDGPU::isSISrcOperand(Desc, OpIdx))
3302  continue;
3303 
3304  if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3305  getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug])
3306  return false;
3307 
3308  if (!isInlineConstant(Inst, OpIdx)) {
3309  uint32_t Value = static_cast<uint32_t>(MO.getImm());
3310  if (NumLiterals == 0 || LiteralValue != Value) {
3311  LiteralValue = Value;
3312  ++NumLiterals;
3313  }
3314  }
3315  }
3316 
3317  return !NumLiterals ||
3318  (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
3319 }
3320 
3321 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3322  const SMLoc &IDLoc,
3323  const OperandVector &Operands) {
3324  if (!validateLdsDirect(Inst)) {
3325  Error(IDLoc,
3326  "invalid use of lds_direct");
3327  return false;
3328  }
3329  if (!validateSOPLiteral(Inst)) {
3330  Error(IDLoc,
3331  "only one literal operand is allowed");
3332  return false;
3333  }
3334  if (!validateVOP3Literal(Inst)) {
3335  Error(IDLoc,
3336  "invalid literal operand");
3337  return false;
3338  }
3339  if (!validateConstantBusLimitations(Inst)) {
3340  Error(IDLoc,
3341  "invalid operand (violates constant bus restrictions)");
3342  return false;
3343  }
3344  if (!validateEarlyClobberLimitations(Inst)) {
3345  Error(IDLoc,
3346  "destination must be different than all sources");
3347  return false;
3348  }
3349  if (!validateIntClampSupported(Inst)) {
3350  Error(IDLoc,
3351  "integer clamping is not supported on this GPU");
3352  return false;
3353  }
3354  if (!validateOpSel(Inst)) {
3355  Error(IDLoc,
3356  "invalid op_sel operand");
3357  return false;
3358  }
3359  // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3360  if (!validateMIMGD16(Inst)) {
3361  Error(IDLoc,
3362  "d16 modifier is not supported on this GPU");
3363  return false;
3364  }
3365  if (!validateMIMGDim(Inst)) {
3366  Error(IDLoc, "dim modifier is required on this GPU");
3367  return false;
3368  }
3369  if (!validateMIMGDataSize(Inst)) {
3370  Error(IDLoc,
3371  "image data size does not match dmask and tfe");
3372  return false;
3373  }
3374  if (!validateMIMGAddrSize(Inst)) {
3375  Error(IDLoc,
3376  "image address size does not match dim and a16");
3377  return false;
3378  }
3379  if (!validateMIMGAtomicDMask(Inst)) {
3380  Error(IDLoc,
3381  "invalid atomic image dmask");
3382  return false;
3383  }
3384  if (!validateMIMGGatherDMask(Inst)) {
3385  Error(IDLoc,
3386  "invalid image_gather dmask: only one bit must be set");
3387  return false;
3388  }
3389  if (!validateFlatOffset(Inst, Operands)) {
3390  return false;
3391  }
3392 
3393  return true;
3394 }
3395 
3396 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3397  const FeatureBitset &FBS,
3398  unsigned VariantID = 0);
3399 
3400 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3401  OperandVector &Operands,
3402  MCStreamer &Out,
3403  uint64_t &ErrorInfo,
3404  bool MatchingInlineAsm) {
3405  MCInst Inst;
3406  unsigned Result = Match_Success;
3407  for (auto Variant : getMatchedVariants()) {
3408  uint64_t EI;
3409  auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3410  Variant);
3411  // We order match statuses from least to most specific. We use most specific
3412  // status as resulting
3413  // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3414  if ((R == Match_Success) ||
3415  (R == Match_PreferE32) ||
3416  (R == Match_MissingFeature && Result != Match_PreferE32) ||
3417  (R == Match_InvalidOperand && Result != Match_MissingFeature
3418  && Result != Match_PreferE32) ||
3419  (R == Match_MnemonicFail && Result != Match_InvalidOperand
3420  && Result != Match_MissingFeature
3421  && Result != Match_PreferE32)) {
3422  Result = R;
3423  ErrorInfo = EI;
3424  }
3425  if (R == Match_Success)
3426  break;
3427  }
3428 
3429  switch (Result) {
3430  default: break;
3431  case Match_Success:
3432  if (!validateInstruction(Inst, IDLoc, Operands)) {
3433  return true;
3434  }
3435  Inst.setLoc(IDLoc);
3436  Out.EmitInstruction(Inst, getSTI());
3437  return false;
3438 
3439  case Match_MissingFeature:
3440  return Error(IDLoc, "instruction not supported on this GPU");
3441 
3442  case Match_MnemonicFail: {
3443  FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3444  std::string Suggestion = AMDGPUMnemonicSpellCheck(
3445  ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
3446  return Error(IDLoc, "invalid instruction" + Suggestion,
3447  ((AMDGPUOperand &)*Operands[0]).getLocRange());
3448  }
3449 
3450  case Match_InvalidOperand: {
3451  SMLoc ErrorLoc = IDLoc;
3452  if (ErrorInfo != ~0ULL) {
3453  if (ErrorInfo >= Operands.size()) {
3454  return Error(IDLoc, "too few operands for instruction");
3455  }
3456  ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3457  if (ErrorLoc == SMLoc())
3458  ErrorLoc = IDLoc;
3459  }
3460  return Error(ErrorLoc, "invalid operand for instruction");
3461  }
3462 
3463  case Match_PreferE32:
3464  return Error(IDLoc, "internal error: instruction without _e64 suffix "
3465  "should be encoded as e32");
3466  }
3467  llvm_unreachable("Implement any new match types added!");
3468 }
3469 
3470 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3471  int64_t Tmp = -1;
3472  if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3473  return true;
3474  }
3475  if (getParser().parseAbsoluteExpression(Tmp)) {
3476  return true;
3477  }
3478  Ret = static_cast<uint32_t>(Tmp);
3479  return false;
3480 }
3481 
3482 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3483  uint32_t &Minor) {
3484  if (ParseAsAbsoluteExpression(Major))
3485  return TokError("invalid major version");
3486 
3487  if (getLexer().isNot(AsmToken::Comma))
3488  return TokError("minor version number required, comma expected");
3489  Lex();
3490 
3491  if (ParseAsAbsoluteExpression(Minor))
3492  return TokError("invalid minor version");
3493 
3494  return false;
3495 }
3496 
3497 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3498  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3499  return TokError("directive only supported for amdgcn architecture");
3500 
3501  std::string Target;
3502 
3503  SMLoc TargetStart = getTok().getLoc();
3504  if (getParser().parseEscapedString(Target))
3505  return true;
3506  SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
3507 
3508  std::string ExpectedTarget;
3509  raw_string_ostream ExpectedTargetOS(ExpectedTarget);
3510  IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
3511 
3512  if (Target != ExpectedTargetOS.str())
3513  return getParser().Error(TargetRange.Start, "target must match options",
3514  TargetRange);
3515 
3516  getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
3517  return false;
3518 }
3519 
3520 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3521  return getParser().Error(Range.Start, "value out of range", Range);
3522 }
3523 
3524 bool AMDGPUAsmParser::calculateGPRBlocks(
3525  const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
3526  bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
3527  SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
3528  unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
3529  // TODO(scott.linder): These calculations are duplicated from
3530  // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3531  IsaVersion Version = getIsaVersion(getSTI().getCPU());
3532 
3533  unsigned NumVGPRs = NextFreeVGPR;
3534  unsigned NumSGPRs = NextFreeSGPR;
3535 
3536  if (Version.Major >= 10)
3537  NumSGPRs = 0;
3538  else {
3539  unsigned MaxAddressableNumSGPRs =
3541 
3542  if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
3543  NumSGPRs > MaxAddressableNumSGPRs)
3544  return OutOfRangeError(SGPRRange);
3545 
3546  NumSGPRs +=
3547  IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
3548 
3549  if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
3550  NumSGPRs > MaxAddressableNumSGPRs)
3551  return OutOfRangeError(SGPRRange);
3552 
3553  if (Features.test(FeatureSGPRInitBug))
3555  }
3556 
3557  VGPRBlocks =
3558  IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
3559  SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
3560 
3561  return false;
3562 }
3563 
3564 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
3565  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3566  return TokError("directive only supported for amdgcn architecture");
3567 
3568  if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
3569  return TokError("directive only supported for amdhsa OS");
3570 
3571  StringRef KernelName;
3572  if (getParser().parseIdentifier(KernelName))
3573  return true;
3574 
3576 
3577  StringSet<> Seen;
3578 
3579  IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
3580 
3581  SMRange VGPRRange;
3582  uint64_t NextFreeVGPR = 0;
3583  SMRange SGPRRange;
3584  uint64_t NextFreeSGPR = 0;
3585  unsigned UserSGPRCount = 0;
3586  bool ReserveVCC = true;
3587  bool ReserveFlatScr = true;
3588  bool ReserveXNACK = hasXNACK();
3589  Optional<bool> EnableWavefrontSize32;
3590 
3591  while (true) {
3592  while (getLexer().is(AsmToken::EndOfStatement))
3593  Lex();
3594 
3595  if (getLexer().isNot(AsmToken::Identifier))
3596  return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
3597 
3598  StringRef ID = getTok().getIdentifier();
3599  SMRange IDRange = getTok().getLocRange();
3600  Lex();
3601 
3602  if (ID == ".end_amdhsa_kernel")
3603  break;
3604 
3605  if (Seen.find(ID) != Seen.end())
3606  return TokError(".amdhsa_ directives cannot be repeated");
3607  Seen.insert(ID);
3608 
3609  SMLoc ValStart = getTok().getLoc();
3610  int64_t IVal;
3611  if (getParser().parseAbsoluteExpression(IVal))
3612  return true;
3613  SMLoc ValEnd = getTok().getLoc();
3614  SMRange ValRange = SMRange(ValStart, ValEnd);
3615 
3616  if (IVal < 0)
3617  return OutOfRangeError(ValRange);
3618 
3619  uint64_t Val = IVal;
3620 
3621 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
3622  if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
3623  return OutOfRangeError(RANGE); \
3624  AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3625 
3626  if (ID == ".amdhsa_group_segment_fixed_size") {
3627  if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3628  return OutOfRangeError(ValRange);
3629  KD.group_segment_fixed_size = Val;
3630  } else if (ID == ".amdhsa_private_segment_fixed_size") {
3631  if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3632  return OutOfRangeError(ValRange);
3633  KD.private_segment_fixed_size = Val;
3634  } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3636  KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3637  Val, ValRange);
3638  if (Val)
3639  UserSGPRCount += 4;
3640  } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3642  KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3643  ValRange);
3644  if (Val)
3645  UserSGPRCount += 2;
3646  } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
3648  KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
3649  ValRange);
3650  if (Val)
3651  UserSGPRCount += 2;
3652  } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3654  KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
3655  Val, ValRange);
3656  if (Val)
3657  UserSGPRCount += 2;
3658  } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
3660  KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
3661  ValRange);
3662  if (Val)
3663  UserSGPRCount += 2;
3664  } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
3666  KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
3667  ValRange);
3668  if (Val)
3669  UserSGPRCount += 2;
3670  } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
3672  KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
3673  Val, ValRange);
3674  if (Val)
3675  UserSGPRCount += 1;
3676  } else if (ID == ".amdhsa_wavefront_size32") {
3677  if (IVersion.Major < 10)
3678  return getParser().Error(IDRange.Start, "directive requires gfx10+",
3679  IDRange);
3680  EnableWavefrontSize32 = Val;
3682  KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
3683  Val, ValRange);
3684  } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3686  KD.compute_pgm_rsrc2,
3687  COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
3688  ValRange);
3689  } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
3691  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
3692  ValRange);
3693  } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
3695  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
3696  ValRange);
3697  } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
3699  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
3700  ValRange);
3701  } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
3703  COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
3704  ValRange);
3705  } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
3707  COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
3708  ValRange);
3709  } else if (ID == ".amdhsa_next_free_vgpr") {
3710  VGPRRange = ValRange;
3711  NextFreeVGPR = Val;
3712  } else if (ID == ".amdhsa_next_free_sgpr") {
3713  SGPRRange = ValRange;
3714  NextFreeSGPR = Val;
3715  } else if (ID == ".amdhsa_reserve_vcc") {
3716  if (!isUInt<1>(Val))
3717  return OutOfRangeError(ValRange);
3718  ReserveVCC = Val;
3719  } else if (ID == ".amdhsa_reserve_flat_scratch") {
3720  if (IVersion.Major < 7)
3721  return getParser().Error(IDRange.Start, "directive requires gfx7+",
3722  IDRange);
3723  if (!isUInt<1>(Val))
3724  return OutOfRangeError(ValRange);
3725  ReserveFlatScr = Val;
3726  } else if (ID == ".amdhsa_reserve_xnack_mask") {
3727  if (IVersion.Major < 8)
3728  return getParser().Error(IDRange.Start, "directive requires gfx8+",
3729  IDRange);
3730  if (!isUInt<1>(Val))
3731  return OutOfRangeError(ValRange);
3732  ReserveXNACK = Val;
3733  } else if (ID == ".amdhsa_float_round_mode_32") {
3735  COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
3736  } else if (ID == ".amdhsa_float_round_mode_16_64") {
3738  COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
3739  } else if (ID == ".amdhsa_float_denorm_mode_32") {
3741  COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
3742  } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
3744  COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
3745  ValRange);
3746  } else if (ID == ".amdhsa_dx10_clamp") {
3748  COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
3749  } else if (ID == ".amdhsa_ieee_mode") {
3750  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
3751  Val, ValRange);
3752  } else if (ID == ".amdhsa_fp16_overflow") {
3753  if (IVersion.Major < 9)
3754  return getParser().Error(IDRange.Start, "directive requires gfx9+",
3755  IDRange);
3756  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
3757  ValRange);
3758  } else if (ID == ".amdhsa_workgroup_processor_mode") {
3759  if (IVersion.Major < 10)
3760  return getParser().Error(IDRange.Start, "directive requires gfx10+",
3761  IDRange);
3762  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
3763  ValRange);
3764  } else if (ID == ".amdhsa_memory_ordered") {
3765  if (IVersion.Major < 10)
3766  return getParser().Error(IDRange.Start, "directive requires gfx10+",
3767  IDRange);
3768  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
3769  ValRange);
3770  } else if (ID == ".amdhsa_forward_progress") {
3771  if (IVersion.Major < 10)
3772  return getParser().Error(IDRange.Start, "directive requires gfx10+",
3773  IDRange);
3774  PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
3775  ValRange);
3776  } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
3778  KD.compute_pgm_rsrc2,
3779  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
3780  ValRange);
3781  } else if (ID == ".amdhsa_exception_fp_denorm_src") {
3783  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
3784  Val, ValRange);
3785  } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
3787  KD.compute_pgm_rsrc2,
3788  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
3789  ValRange);
3790  } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
3792  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
3793  Val, ValRange);
3794  } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
3796  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
3797  Val, ValRange);
3798  } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
3800  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
3801  Val, ValRange);
3802  } else if (ID == ".amdhsa_exception_int_div_zero") {
3804  COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
3805  Val, ValRange);
3806  } else {
3807  return getParser().Error(IDRange.Start,
3808  "unknown .amdhsa_kernel directive", IDRange);
3809  }
3810 
3811 #undef PARSE_BITS_ENTRY
3812  }
3813 
3814  if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
3815  return TokError(".amdhsa_next_free_vgpr directive is required");
3816 
3817  if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
3818  return TokError(".amdhsa_next_free_sgpr directive is required");
3819 
3820  unsigned VGPRBlocks;
3821  unsigned SGPRBlocks;
3822  if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
3823  ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
3824  VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
3825  SGPRBlocks))
3826  return true;
3827 
3828  if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
3829  VGPRBlocks))
3830  return OutOfRangeError(VGPRRange);
3832  COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
3833 
3834  if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
3835  SGPRBlocks))
3836  return OutOfRangeError(SGPRRange);
3838  COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
3839  SGPRBlocks);
3840 
3841  if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
3842  return TokError("too many user SGPRs enabled");
3843  AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
3844  UserSGPRCount);
3845 
3846  getTargetStreamer().EmitAmdhsaKernelDescriptor(
3847  getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
3848  ReserveFlatScr, ReserveXNACK);
3849  return false;
3850 }
3851 
3852 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
3853  uint32_t Major;
3854  uint32_t Minor;
3855 
3856  if (ParseDirectiveMajorMinor(Major, Minor))
3857  return true;
3858 
3859  getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
3860  return false;
3861 }
3862 
3863 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
3864  uint32_t Major;
3865  uint32_t Minor;
3866  uint32_t Stepping;
3867  StringRef VendorName;
3868  StringRef ArchName;
3869 
3870  // If this directive has no arguments, then use the ISA version for the
3871  // targeted GPU.
3872  if (getLexer().is(AsmToken::EndOfStatement)) {
3873  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3874  getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
3875  ISA.Stepping,
3876  "AMD", "AMDGPU");
3877  return false;
3878  }
3879 
3880  if (ParseDirectiveMajorMinor(Major, Minor))
3881  return true;
3882 
3883  if (getLexer().isNot(AsmToken::Comma))
3884  return TokError("stepping version number required, comma expected");
3885  Lex();
3886 
3887  if (ParseAsAbsoluteExpression(Stepping))
3888  return TokError("invalid stepping version");
3889 
3890  if (getLexer().isNot(AsmToken::Comma))
3891  return TokError("vendor name required, comma expected");
3892  Lex();
3893 
3894  if (getLexer().isNot(AsmToken::String))
3895  return TokError("invalid vendor name");
3896 
3897  VendorName = getLexer().getTok().getStringContents();
3898  Lex();
3899 
3900  if (getLexer().isNot(AsmToken::Comma))
3901  return TokError("arch name required, comma expected");
3902  Lex();
3903 
3904  if (getLexer().isNot(AsmToken::String))
3905  return TokError("invalid arch name");
3906 
3907  ArchName = getLexer().getTok().getStringContents();
3908  Lex();
3909 
3910  getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
3911  VendorName, ArchName);
3912  return false;
3913 }
3914 
3915 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
3916  amd_kernel_code_t &Header) {
3917  // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
3918  // assembly for backwards compatibility.
3919  if (ID == "max_scratch_backing_memory_byte_size") {
3920  Parser.eatToEndOfStatement();
3921  return false;
3922  }
3923 
3924  SmallString<40> ErrStr;
3925  raw_svector_ostream Err(ErrStr);
3926  if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
3927  return TokError(Err.str());
3928  }
3929  Lex();
3930 
3931  if (ID == "enable_wavefront_size32") {
3933  if (!isGFX10())
3934  return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
3935  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
3936  return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
3937  } else {
3938  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
3939  return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
3940  }
3941  }
3942 
3943  if (ID == "wavefront_size") {
3944  if (Header.wavefront_size == 5) {
3945  if (!isGFX10())
3946  return TokError("wavefront_size=5 is only allowed on GFX10+");
3947  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
3948  return TokError("wavefront_size=5 requires +WavefrontSize32");
3949  } else if (Header.wavefront_size == 6) {
3950  if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
3951  return TokError("wavefront_size=6 requires +WavefrontSize64");
3952  }
3953  }
3954 
3955  if (ID == "enable_wgp_mode") {
3957  return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
3958  }
3959 
3960  if (ID == "enable_mem_ordered") {
3962  return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
3963  }
3964 
3965  if (ID == "enable_fwd_progress") {
3967  return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
3968  }
3969 
3970  return false;
3971 }
3972 
3973 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
3974  amd_kernel_code_t Header;
3975  AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
3976 
3977  while (true) {
3978  // Lex EndOfStatement. This is in a while loop, because lexing a comment
3979  // will set the current token to EndOfStatement.
3980  while(getLexer().is(AsmToken::EndOfStatement))
3981  Lex();
3982 
3983  if (getLexer().isNot(AsmToken::Identifier))
3984  return TokError("expected value identifier or .end_amd_kernel_code_t");
3985 
3986  StringRef ID = getLexer().getTok().getIdentifier();
3987  Lex();
3988 
3989  if (ID == ".end_amd_kernel_code_t")
3990  break;
3991 
3992  if (ParseAMDKernelCodeTValue(ID, Header))
3993  return true;
3994  }
3995 
3996  getTargetStreamer().EmitAMDKernelCodeT(Header);
3997 
3998  return false;
3999 }
4000 
4001 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4002  if (getLexer().isNot(AsmToken::Identifier))
4003  return TokError("expected symbol name");
4004 
4005  StringRef KernelName = Parser.getTok().getString();
4006 
4007  getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4009  Lex();
4010  if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
4011  KernelScope.initialize(getContext());
4012  return false;
4013 }
4014 
4015 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4016  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4017  return Error(getParser().getTok().getLoc(),
4018  ".amd_amdgpu_isa directive is not available on non-amdgcn "
4019  "architectures");
4020  }
4021 
4022  auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
4023 
4024  std::string ISAVersionStringFromSTI;
4025  raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
4026  IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
4027 
4028  if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4029  return Error(getParser().getTok().getLoc(),
4030  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4031  "arguments specified through the command line");
4032  }
4033 
4034  getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4035  Lex();
4036 
4037  return false;
4038 }
4039 
4040 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4041  const char *AssemblerDirectiveBegin;
4042  const char *AssemblerDirectiveEnd;
4043  std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4045  ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4047  : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4049 
4050  if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4051  return Error(getParser().getTok().getLoc(),
4052  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4053  "not available on non-amdhsa OSes")).str());
4054  }
4055 
4056  std::string HSAMetadataString;
4057  if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4058  HSAMetadataString))
4059  return true;
4060 
4061  if (IsaInfo::hasCodeObjectV3(&getSTI())) {
4062  if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4063  return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4064  } else {
4065  if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4066  return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4067  }
4068 
4069  return false;
4070 }
4071 
4072 /// Common code to parse out a block of text (typically YAML) between start and
4073 /// end directives.
4074 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4075  const char *AssemblerDirectiveEnd,
4076  std::string &CollectString) {
4077 
4078  raw_string_ostream CollectStream(CollectString);
4079 
4080  getLexer().setSkipSpace(false);
4081 
4082  bool FoundEnd = false;
4083  while (!getLexer().is(AsmToken::Eof)) {
4084  while (getLexer().is(AsmToken::Space)) {
4085  CollectStream << getLexer().getTok().getString();
4086  Lex();
4087  }
4088 
4089  if (getLexer().is(AsmToken::Identifier)) {
4090  StringRef ID = getLexer().getTok().getIdentifier();
4091  if (ID == AssemblerDirectiveEnd) {
4092  Lex();
4093  FoundEnd = true;
4094  break;
4095  }
4096  }
4097 
4098  CollectStream << Parser.parseStringToEndOfStatement()
4099  << getContext().getAsmInfo()->getSeparatorString();
4100 
4101  Parser.eatToEndOfStatement();
4102  }
4103 
4104  getLexer().setSkipSpace(true);
4105 
4106  if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
4107  return TokError(Twine("expected directive ") +
4108  Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4109  }
4110 
4111  CollectStream.flush();
4112  return false;
4113 }
4114 
4115 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4116 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4117  std::string String;
4118  if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4120  return true;
4121 
4122  auto PALMetadata = getTargetStreamer().getPALMetadata();
4123  if (!PALMetadata->setFromString(String))
4124  return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
4125  return false;
4126 }
4127 
4128 /// Parse the assembler directive for old linear-format PAL metadata.
4129 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4130  if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4131  return Error(getParser().getTok().getLoc(),
4132  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4133  "not available on non-amdpal OSes")).str());
4134  }
4135 
4136  auto PALMetadata = getTargetStreamer().getPALMetadata();
4137  PALMetadata->setLegacy();
4138  for (;;) {
4139  uint32_t Key, Value;
4140  if (ParseAsAbsoluteExpression(Key)) {
4141  return TokError(Twine("invalid value in ") +
4143  }
4144  if (getLexer().isNot(AsmToken::Comma)) {
4145  return TokError(Twine("expected an even number of values in ") +
4147  }
4148  Lex();
4149  if (ParseAsAbsoluteExpression(Value)) {
4150  return TokError(Twine("invalid value in ") +
4152  }
4153  PALMetadata->setRegister(Key, Value);
4154  if (getLexer().isNot(AsmToken::Comma))
4155  break;
4156  Lex();
4157  }
4158  return false;
4159 }
4160 
4161 /// ParseDirectiveAMDGPULDS
4162 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
4163 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4164  if (getParser().checkForValidSection())
4165  return true;
4166 
4167  StringRef Name;
4168  SMLoc NameLoc = getLexer().getLoc();
4169  if (getParser().parseIdentifier(Name))
4170  return TokError("expected identifier in directive");
4171 
4172  MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
4173  if (parseToken(AsmToken::Comma, "expected ','"))
4174  return true;
4175 
4176  unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4177 
4178  int64_t Size;
4179  SMLoc SizeLoc = getLexer().getLoc();
4180  if (getParser().parseAbsoluteExpression(Size))
4181  return true;
4182  if (Size < 0)
4183  return Error(SizeLoc, "size must be non-negative");
4184  if (Size > LocalMemorySize)
4185  return Error(SizeLoc, "size is too large");
4186 
4187  int64_t Align = 4;
4188  if (getLexer().is(AsmToken::Comma)) {
4189  Lex();
4190  SMLoc AlignLoc = getLexer().getLoc();
4191  if (getParser().parseAbsoluteExpression(Align))
4192  return true;
4193  if (Align < 0 || !isPowerOf2_64(Align))
4194  return Error(AlignLoc, "alignment must be a power of two");
4195 
4196  // Alignment larger than the size of LDS is possible in theory, as long
4197  // as the linker manages to place to symbol at address 0, but we do want
4198  // to make sure the alignment fits nicely into a 32-bit integer.
4199  if (Align >= 1u << 31)
4200  return Error(AlignLoc, "alignment is too large");
4201  }
4202 
4203  if (parseToken(AsmToken::EndOfStatement,
4204  "unexpected token in '.amdgpu_lds' directive"))
4205  return true;
4206 
4207  Symbol->redefineIfPossible();
4208  if (!Symbol->isUndefined())
4209  return Error(NameLoc, "invalid symbol redefinition");
4210 
4211  getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align);
4212  return false;
4213 }
4214 
4215 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
4216  StringRef IDVal = DirectiveID.getString();
4217 
4218  if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
4219  if (IDVal == ".amdgcn_target")
4220  return ParseDirectiveAMDGCNTarget();
4221 
4222  if (IDVal == ".amdhsa_kernel")
4223  return ParseDirectiveAMDHSAKernel();
4224 
4225  // TODO: Restructure/combine with PAL metadata directive.
4227  return ParseDirectiveHSAMetadata();
4228  } else {
4229  if (IDVal == ".hsa_code_object_version")
4230  return ParseDirectiveHSACodeObjectVersion();
4231 
4232  if (IDVal == ".hsa_code_object_isa")
4233  return ParseDirectiveHSACodeObjectISA();
4234 
4235  if (IDVal == ".amd_kernel_code_t")
4236  return ParseDirectiveAMDKernelCodeT();
4237 
4238  if (IDVal == ".amdgpu_hsa_kernel")
4239  return ParseDirectiveAMDGPUHsaKernel();
4240 
4241  if (IDVal == ".amd_amdgpu_isa")
4242  return ParseDirectiveISAVersion();
4243 
4245  return ParseDirectiveHSAMetadata();
4246  }
4247 
4248  if (IDVal == ".amdgpu_lds")
4249  return ParseDirectiveAMDGPULDS();
4250 
4251  if (IDVal == PALMD::AssemblerDirectiveBegin)
4252  return ParseDirectivePALMetadataBegin();
4253 
4254  if (IDVal == PALMD::AssemblerDirective)
4255  return ParseDirectivePALMetadata();
4256 
4257  return true;
4258 }
4259 
4260 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4261  unsigned RegNo) const {
4262 
4263  for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4264  R.isValid(); ++R) {
4265  if (*R == RegNo)
4266  return isGFX9() || isGFX10();
4267  }
4268 
4269  // GFX10 has 2 more SGPRs 104 and 105.
4270  for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4271  R.isValid(); ++R) {
4272  if (*R == RegNo)
4273  return hasSGPR104_SGPR105();
4274  }
4275 
4276  switch (RegNo) {
4277  case AMDGPU::SRC_SHARED_BASE:
4278  case AMDGPU::SRC_SHARED_LIMIT:
4279  case AMDGPU::SRC_PRIVATE_BASE:
4280  case AMDGPU::SRC_PRIVATE_LIMIT:
4281  case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4282  return !isCI() && !isSI() && !isVI();
4283  case AMDGPU::TBA:
4284  case AMDGPU::TBA_LO:
4285  case AMDGPU::TBA_HI:
4286  case AMDGPU::TMA:
4287  case AMDGPU::TMA_LO:
4288  case AMDGPU::TMA_HI:
4289  return !isGFX9() && !isGFX10();
4290  case AMDGPU::XNACK_MASK:
4291  case AMDGPU::XNACK_MASK_LO:
4292  case AMDGPU::XNACK_MASK_HI:
4293  return !isCI() && !isSI() && !isGFX10() && hasXNACK();
4294  case AMDGPU::SGPR_NULL:
4295  return isGFX10();
4296  default:
4297  break;
4298  }
4299 
4300  if (isCI())
4301  return true;
4302 
4303  if (isSI() || isGFX10()) {
4304  // No flat_scr on SI.
4305  // On GFX10 flat scratch is not a valid register operand and can only be
4306  // accessed with s_setreg/s_getreg.
4307  switch (RegNo) {
4308  case AMDGPU::FLAT_SCR:
4309  case AMDGPU::FLAT_SCR_LO:
4310  case AMDGPU::FLAT_SCR_HI:
4311  return false;
4312  default:
4313  return true;
4314  }
4315  }
4316 
4317  // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4318  // SI/CI have.
4319  for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4320  R.isValid(); ++R) {
4321  if (*R == RegNo)
4322  return hasSGPR102_SGPR103();
4323  }
4324 
4325  return true;
4326 }
4327 
4329 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4330  OperandMode Mode) {
4331  // Try to parse with a custom parser
4332  OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4333 
4334  // If we successfully parsed the operand or if there as an error parsing,
4335  // we are done.
4336  //
4337  // If we are parsing after we reach EndOfStatement then this means we
4338  // are appending default values to the Operands list. This is only done
4339  // by custom parser, so we shouldn't continue on to the generic parsing.
4340  if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4341  getLexer().is(AsmToken::EndOfStatement))
4342  return ResTy;
4343 
4344  if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
4345  unsigned Prefix = Operands.size();
4346  SMLoc LBraceLoc = getTok().getLoc();
4347  Parser.Lex(); // eat the '['
4348 
4349  for (;;) {
4350  ResTy = parseReg(Operands);
4351  if (ResTy != MatchOperand_Success)
4352  return ResTy;
4353 
4354  if (getLexer().is(AsmToken::RBrac))
4355  break;
4356 
4357  if (getLexer().isNot(AsmToken::Comma))
4358  return MatchOperand_ParseFail;
4359  Parser.Lex();
4360  }
4361 
4362  if (Operands.size() - Prefix > 1) {
4363  Operands.insert(Operands.begin() + Prefix,
4364  AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4365  Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
4366  getTok().getLoc()));
4367  }
4368 
4369  Parser.Lex(); // eat the ']'
4370  return MatchOperand_Success;
4371  }
4372 
4373  return parseRegOrImm(Operands);
4374 }
4375 
4376 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4377  // Clear any forced encodings from the previous instruction.
4378  setForcedEncodingSize(0);
4379  setForcedDPP(false);
4380  setForcedSDWA(false);
4381 
4382  if (Name.endswith("_e64")) {
4383  setForcedEncodingSize(64);
4384  return Name.substr(0, Name.size() - 4);
4385  } else if (Name.endswith("_e32")) {
4386  setForcedEncodingSize(32);
4387  return Name.substr(0, Name.size() - 4);
4388  } else if (Name.endswith("_dpp")) {
4389  setForcedDPP(true);
4390  return Name.substr(0, Name.size() - 4);
4391  } else if (Name.endswith("_sdwa")) {
4392  setForcedSDWA(true);
4393  return Name.substr(0, Name.size() - 5);
4394  }
4395  return Name;
4396 }
4397 
4398 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4399  StringRef Name,
4400  SMLoc NameLoc, OperandVector &Operands) {
4401  // Add the instruction mnemonic
4402  Name = parseMnemonicSuffix(Name);
4403  Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4404 
4405  bool IsMIMG = Name.startswith("image_");
4406 
4407  while (!getLexer().is(AsmToken::EndOfStatement)) {
4408  OperandMode Mode = OperandMode_Default;
4409  if (IsMIMG && isGFX10() && Operands.size() == 2)
4410  Mode = OperandMode_NSA;
4411  OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4412 
4413  // Eat the comma or space if there is one.
4414  if (getLexer().is(AsmToken::Comma))
4415  Parser.Lex();
4416 
4417  switch (Res) {
4418  case MatchOperand_Success: break;
4420  // FIXME: use real operand location rather than the current location.
4421  Error(getLexer().getLoc(), "failed parsing operand.");
4422  while (!getLexer().is(AsmToken::EndOfStatement)) {
4423  Parser.Lex();
4424  }
4425  return true;
4426  case MatchOperand_NoMatch:
4427  // FIXME: use real operand location rather than the current location.
4428  Error(getLexer().getLoc(), "not a valid operand.");
4429  while (!getLexer().is(AsmToken::EndOfStatement)) {
4430  Parser.Lex();
4431  }
4432  return true;
4433  }
4434  }
4435 
4436  return false;
4437 }
4438 
4439 //===----------------------------------------------------------------------===//
4440 // Utility functions
4441 //===----------------------------------------------------------------------===//
4442 
4444 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4445 
4446  if (!trySkipId(Prefix, AsmToken::Colon))
4447  return MatchOperand_NoMatch;
4448 
4449  return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4450 }
4451 
4453 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4454  AMDGPUOperand::ImmTy ImmTy,
4455  bool (*ConvertResult)(int64_t&)) {
4456  SMLoc S = getLoc();
4457  int64_t Value = 0;
4458 
4459  OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4460  if (Res != MatchOperand_Success)
4461  return Res;
4462 
4463  if (ConvertResult && !ConvertResult(Value)) {
4464  Error(S, "invalid " + StringRef(Prefix) + " value.");
4465  }
4466 
4467  Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4468  return MatchOperand_Success;
4469 }
4470 
4472 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
4473  OperandVector &Operands,
4474  AMDGPUOperand::ImmTy ImmTy,
4475  bool (*ConvertResult)(int64_t&)) {
4476  SMLoc S = getLoc();
4477  if (!trySkipId(Prefix, AsmToken::Colon))
4478  return MatchOperand_NoMatch;
4479 
4480  if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
4481  return MatchOperand_ParseFail;
4482 
4483  unsigned Val = 0;
4484  const unsigned MaxSize = 4;
4485 
4486  // FIXME: How to verify the number of elements matches the number of src
4487  // operands?
4488  for (int I = 0; ; ++I) {
4489  int64_t Op;
4490  SMLoc Loc = getLoc();
4491  if (!parseExpr(Op))
4492  return MatchOperand_ParseFail;
4493 
4494  if (Op != 0 && Op != 1) {
4495  Error(Loc, "invalid " + StringRef(Prefix) + " value.");
4496  return MatchOperand_ParseFail;
4497  }
4498 
4499  Val |= (Op << I);
4500 
4501  if (trySkipToken(AsmToken::RBrac))
4502  break;
4503 
4504  if (I + 1 == MaxSize) {
4505  Error(getLoc(), "expected a closing square bracket");
4506  return MatchOperand_ParseFail;
4507  }
4508 
4509  if (!skipToken(AsmToken::Comma, "expected a comma"))
4510  return MatchOperand_ParseFail;
4511  }
4512 
4513  Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
4514  return MatchOperand_Success;
4515 }
4516 
4518 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
4519  AMDGPUOperand::ImmTy ImmTy) {
4520  int64_t Bit = 0;
4521  SMLoc S = Parser.getTok().getLoc();
4522 
4523  // We are at the end of the statement, and this is a default argument, so
4524  // use a default value.
4525  if (getLexer().isNot(AsmToken::EndOfStatement)) {
4526  switch(getLexer().getKind()) {
4527  case AsmToken::Identifier: {
4528  StringRef Tok = Parser.getTok().getString();
4529  if (Tok == Name) {
4530  if (Tok == "r128" && isGFX9())
4531  Error(S, "r128 modifier is not supported on this GPU");
4532  if (Tok == "a16" && !isGFX9() && !isGFX10())
4533  Error(S, "a16 modifier is not supported on this GPU");
4534  Bit = 1;
4535  Parser.Lex();
4536  } else if (Tok.startswith("no") && Tok.endswith(Name)) {
4537  Bit = 0;
4538  Parser.Lex();
4539  } else {
4540  return MatchOperand_NoMatch;
4541  }
4542  break;
4543  }
4544  default:
4545  return MatchOperand_NoMatch;
4546  }
4547  }
4548 
4549  if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
4550  return MatchOperand_ParseFail;
4551 
4552  Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
4553  return MatchOperand_Success;
4554 }
4555 
4557  MCInst& Inst, const OperandVector& Operands,
4558  AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
4559  AMDGPUOperand::ImmTy ImmT,
4560  int64_t Default = 0) {
4561  auto i = OptionalIdx.find(ImmT);
4562  if (i != OptionalIdx.end()) {
4563  unsigned Idx = i->second;
4564  ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
4565  } else {
4566  Inst.addOperand(MCOperand::createImm(Default));
4567  }
4568 }
4569 
4571 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
4572  if (getLexer().isNot(AsmToken::Identifier)) {
4573  return MatchOperand_NoMatch;
4574  }
4575  StringRef Tok = Parser.getTok().getString();
4576  if (Tok != Prefix) {
4577  return MatchOperand_NoMatch;
4578  }
4579 
4580  Parser.Lex();
4581  if (getLexer().isNot(AsmToken::Colon)) {
4582  return MatchOperand_ParseFail;
4583  }
4584 
4585  Parser.Lex();
4586  if (getLexer().isNot(AsmToken::Identifier)) {
4587  return MatchOperand_ParseFail;
4588  }
4589 
4590  Value = Parser.getTok().getString();
4591  return MatchOperand_Success;
4592 }
4593 
4594 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
4595 // values to live in a joint format operand in the MCInst encoding.
4597 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
4598  SMLoc S = Parser.getTok().getLoc();
4599  int64_t Dfmt = 0, Nfmt = 0;
4600  // dfmt and nfmt can appear in either order, and each is optional.
4601  bool GotDfmt = false, GotNfmt = false;
4602  while (!GotDfmt || !GotNfmt) {
4603  if (!GotDfmt) {
4604  auto Res = parseIntWithPrefix("dfmt", Dfmt);
4605  if (Res != MatchOperand_NoMatch) {
4606  if (Res != MatchOperand_Success)
4607  return Res;
4608  if (Dfmt >= 16) {
4609  Error(Parser.getTok().getLoc(), "out of range dfmt");
4610  return MatchOperand_ParseFail;
4611  }
4612  GotDfmt = true;
4613  Parser.Lex();
4614  continue;
4615  }
4616  }
4617  if (!GotNfmt) {
4618  auto Res = parseIntWithPrefix("nfmt", Nfmt);
4619  if (Res != MatchOperand_NoMatch) {
4620  if (Res != MatchOperand_Success)
4621  return Res;
4622  if (Nfmt >= 8) {
4623  Error(Parser.getTok().getLoc(), "out of range nfmt");
4624  return MatchOperand_ParseFail;
4625  }
4626  GotNfmt = true;
4627  Parser.Lex();
4628  continue;
4629  }
4630  }
4631  break;
4632  }
4633  if (!GotDfmt && !GotNfmt)
4634  return MatchOperand_NoMatch;
4635  auto Format = Dfmt | Nfmt << 4;
4636  Operands.push_back(
4637  AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
4638  return MatchOperand_Success;
4639 }
4640 
4641 //===----------------------------------------------------------------------===//
4642 // ds
4643 //===----------------------------------------------------------------------===//
4644 
4645 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
4646  const OperandVector &Operands) {
4647  OptionalImmIndexMap OptionalIdx;
4648 
4649  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4650  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4651 
4652  // Add the register arguments
4653  if (Op.isReg()) {
4654  Op.addRegOperands(Inst, 1);
4655  continue;
4656  }
4657 
4658  // Handle optional arguments
4659  OptionalIdx[Op.getImmTy()] = i;
4660  }
4661 
4662  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
4663  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
4664  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4665 
4666  Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4667 }
4668 
4669 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
4670  bool IsGdsHardcoded) {
4671  OptionalImmIndexMap OptionalIdx;
4672 
4673  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4674  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4675 
4676  // Add the register arguments
4677  if (Op.isReg()) {
4678  Op.addRegOperands(Inst, 1);
4679  continue;
4680  }
4681 
4682  if (Op.isToken() && Op.getToken() == "gds") {
4683  IsGdsHardcoded = true;
4684  continue;
4685  }
4686 
4687  // Handle optional arguments
4688  OptionalIdx[Op.getImmTy()] = i;
4689  }
4690 
4691  AMDGPUOperand::ImmTy OffsetType =
4692  (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
4693  Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
4694  Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
4695  AMDGPUOperand::ImmTyOffset;
4696 
4697  addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
4698 
4699  if (!IsGdsHardcoded) {
4700  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4701  }
4702  Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4703 }
4704 
4705 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
4706  OptionalImmIndexMap OptionalIdx;
4707 
4708  unsigned OperandIdx[4];
4709  unsigned EnMask = 0;
4710  int SrcIdx = 0;
4711 
4712  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4713  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4714 
4715  // Add the register arguments
4716  if (Op.isReg()) {
4717  assert(SrcIdx < 4);
4718  OperandIdx[SrcIdx] = Inst.size();
4719  Op.addRegOperands(Inst, 1);
4720  ++SrcIdx;
4721  continue;
4722  }
4723 
4724  if (Op.isOff()) {
4725  assert(SrcIdx < 4);
4726  OperandIdx[SrcIdx] = Inst.size();
4727  Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
4728  ++SrcIdx;
4729  continue;
4730  }
4731 
4732  if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
4733  Op.addImmOperands(Inst, 1);
4734  continue;
4735  }
4736 
4737  if (Op.isToken() && Op.getToken() == "done")
4738  continue;
4739 
4740  // Handle optional arguments
4741  OptionalIdx[Op.getImmTy()] = i;
4742  }
4743 
4744  assert(SrcIdx == 4);
4745 
4746  bool Compr = false;
4747  if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
4748  Compr = true;
4749  Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
4750  Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
4751  Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
4752  }
4753 
4754  for (auto i = 0; i < SrcIdx; ++i) {
4755  if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
4756  EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
4757  }
4758  }
4759 
4760  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
4761  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
4762 
4763  Inst.addOperand(MCOperand::createImm(EnMask));
4764 }
4765 
4766 //===----------------------------------------------------------------------===//
4767 // s_waitcnt
4768 //===----------------------------------------------------------------------===//
4769 
4770 static bool
4772  const AMDGPU::IsaVersion ISA,
4773  int64_t &IntVal,
4774  int64_t CntVal,
4775  bool Saturate,
4776  unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
4777  unsigned (*decode)(const IsaVersion &Version, unsigned))
4778 {
4779  bool Failed = false;
4780 
4781  IntVal = encode(ISA, IntVal, CntVal);
4782  if (CntVal != decode(ISA, IntVal)) {
4783  if (Saturate) {
4784  IntVal = encode(ISA, IntVal, -1);
4785  } else {
4786  Failed = true;
4787  }
4788  }
4789  return Failed;
4790 }
4791 
4792 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
4793 
4794  SMLoc CntLoc = getLoc();
4795  StringRef CntName = getTokenStr();
4796 
4797  if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
4798  !skipToken(AsmToken::LParen, "expected a left parenthesis"))
4799  return false;
4800 
4801  int64_t CntVal;
4802  SMLoc ValLoc = getLoc();
4803  if (!parseExpr(CntVal))
4804  return false;
4805 
4806  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4807 
4808  bool Failed = true;
4809  bool Sat = CntName.endswith("_sat");
4810 
4811  if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
4812  Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
4813  } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
4814  Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
4815  } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
4816  Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
4817  } else {
4818  Error(CntLoc, "invalid counter name " + CntName);
4819  return false;
4820  }
4821 
4822  if (Failed) {
4823  Error(ValLoc, "too large value for " + CntName);
4824  return false;
4825  }
4826 
4827  if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
4828  return false;
4829 
4830  if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
4831  if (isToken(AsmToken::EndOfStatement)) {
4832  Error(getLoc(), "expected a counter name");
4833  return false;
4834  }
4835  }
4836 
4837  return true;
4838 }
4839 
4841 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
4842  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4843  int64_t Waitcnt = getWaitcntBitMask(ISA);
4844  SMLoc S = getLoc();
4845 
4846  // If parse failed, do not return error code
4847  // to avoid excessive error messages.
4848  if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
4849  while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement));
4850  } else {
4851  parseExpr(Waitcnt);
4852  }
4853 
4854  Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
4855  return MatchOperand_Success;
4856 }
4857 
4858 bool
4859 AMDGPUOperand::isSWaitCnt() const {
4860  return isImm();
4861 }
4862 
4863 //===----------------------------------------------------------------------===//
4864 // hwreg
4865 //===----------------------------------------------------------------------===//
4866 
4867 bool
4868 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
4869  int64_t &Offset,
4870  int64_t &Width) {
4871  using namespace llvm::AMDGPU::Hwreg;
4872 
4873  // The register may be specified by name or using a numeric code
4874  if (isToken(AsmToken::Identifier) &&
4875  (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
4876  HwReg.IsSymbolic = true;
4877  lex(); // skip message name
4878  } else if (!parseExpr(HwReg.Id)) {
4879  return false;
4880  }
4881 
4882  if (trySkipToken(AsmToken::RParen))
4883  return true;
4884 
4885  // parse optional params
4886  return
4887  skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
4888  parseExpr(Offset) &&
4889  skipToken(AsmToken::Comma, "expected a comma") &&
4890  parseExpr(Width) &&
4891  skipToken(AsmToken::RParen, "expected a closing parenthesis");
4892 }
4893 
4894 bool
4895 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
4896  const int64_t Offset,
4897  const int64_t Width,
4898  const SMLoc Loc) {
4899 
4900  using namespace llvm::AMDGPU::Hwreg;
4901 
4902  if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
4903  Error(Loc, "specified hardware register is not supported on this GPU");
4904  return false;
4905  } else if (!isValidHwreg(HwReg.Id)) {
4906  Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
4907  return false;
4908  } else if (!isValidHwregOffset(Offset)) {
4909  Error(Loc, "invalid bit offset: only 5-bit values are legal");
4910  return false;
4911  } else if (!isValidHwregWidth(Width)) {
4912  Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
4913  return false;
4914  }
4915  return true;
4916 }
4917 
4919 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
4920  using namespace llvm::AMDGPU::Hwreg;
4921 
4922  int64_t ImmVal = 0;
4923  SMLoc Loc = getLoc();
4924 
4925  // If parse failed, do not return error code
4926  // to avoid excessive error messages.
4927  if (trySkipId("hwreg", AsmToken::LParen)) {
4928  OperandInfoTy HwReg(ID_UNKNOWN_);
4929  int64_t Offset = OFFSET_DEFAULT_;
4930  int64_t Width = WIDTH_DEFAULT_;
4931  if (parseHwregBody(HwReg, Offset, Width) &&
4932  validateHwreg(HwReg, Offset, Width, Loc)) {
4933  ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
4934  }
4935  } else if (parseExpr(ImmVal)) {
4936  if (ImmVal < 0 || !isUInt<16>(ImmVal))
4937  Error(Loc, "invalid immediate: only 16-bit values are legal");
4938  }
4939 
4940  Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
4941  return MatchOperand_Success;
4942 }
4943 
4944 bool AMDGPUOperand::isHwreg() const {
4945  return isImmTy(ImmTyHwreg);
4946 }
4947 
4948 //===----------------------------------------------------------------------===//
4949 // sendmsg
4950 //===----------------------------------------------------------------------===//
4951 
4952 bool
4953 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
4954  OperandInfoTy &Op,
4955  OperandInfoTy &Stream) {
4956  using namespace llvm::AMDGPU::SendMsg;
4957 
4958  if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
4959  Msg.IsSymbolic = true;
4960  lex(); // skip message name
4961  } else if (!parseExpr(Msg.Id)) {
4962  return false;
4963  }
4964 
4965  if (trySkipToken(AsmToken::Comma)) {
4966  Op.IsDefined = true;
4967  if (isToken(AsmToken::Identifier) &&
4968  (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
4969  lex(); // skip operation name
4970  } else if (!parseExpr(Op.Id)) {
4971  return false;
4972  }
4973 
4974  if (trySkipToken(AsmToken::Comma)) {
4975  Stream.IsDefined = true;
4976  if (!parseExpr(Stream.Id))
4977  return false;
4978  }
4979  }
4980 
4981  return skipToken(AsmToken::RParen, "expected a closing parenthesis");
4982 }
4983 
4984 bool
4985 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
4986  const OperandInfoTy &Op,
4987  const OperandInfoTy &Stream,
4988  const SMLoc S) {
4989  using namespace llvm::AMDGPU::SendMsg;
4990 
4991  // Validation strictness depends on whether message is specified
4992  // in a symbolc or in a numeric form. In the latter case
4993  // only encoding possibility is checked.
4994  bool Strict = Msg.IsSymbolic;
4995 
4996  if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
4997  Error(S, "invalid message id");
4998  return false;
4999  } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
5000  Error(S, Op.IsDefined ?
5001  "message does not support operations" :
5002  "missing message operation");
5003  return false;
5004  } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
5005  Error(S, "invalid operation id");
5006  return false;
5007  } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
5008  Error(S, "message operation does not support streams");
5009  return false;
5010  } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
5011  Error(S, "invalid message stream id");
5012  return false;
5013  }
5014  return true;
5015 }
5016 
5018 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
5019  using namespace llvm::AMDGPU::SendMsg;
5020 
5021  int64_t ImmVal = 0;
5022  SMLoc Loc = getLoc();
5023 
5024  // If parse failed, do not return error code
5025  // to avoid excessive error messages.
5026  if (trySkipId("sendmsg", AsmToken::LParen)) {
5027  OperandInfoTy Msg(ID_UNKNOWN_);
5028  OperandInfoTy Op(OP_NONE_);
5029  OperandInfoTy Stream(STREAM_ID_NONE_);
5030  if (parseSendMsgBody(Msg, Op, Stream) &&
5031  validateSendMsg(Msg, Op, Stream, Loc)) {
5032  ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
5033  }
5034  } else if (parseExpr(ImmVal)) {
5035  if (ImmVal < 0 || !isUInt<16>(ImmVal))
5036  Error(Loc, "invalid immediate: only 16-bit values are legal");
5037  }
5038 
5039  Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
5040  return MatchOperand_Success;
5041 }
5042 
5043 bool AMDGPUOperand::isSendMsg() const {
5044  return isImmTy(ImmTySendMsg);
5045 }
5046 
5047 //===----------------------------------------------------------------------===//
5048 // v_interp
5049 //===----------------------------------------------------------------------===//
5050 
5051 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
5052  if (getLexer().getKind() != AsmToken::Identifier)
5053  return MatchOperand_NoMatch;
5054 
5055  StringRef Str = Parser.getTok().getString();
5056  int Slot = StringSwitch<int>(Str)
5057  .Case("p10", 0)
5058  .Case("p20", 1)
5059  .Case("p0", 2)
5060  .Default(-1);
5061 
5062  SMLoc S = Parser.getTok().getLoc();
5063  if (Slot == -1)
5064  return MatchOperand_ParseFail;
5065 
5066  Parser.Lex();
5067  Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
5068  AMDGPUOperand::ImmTyInterpSlot));
5069  return MatchOperand_Success;
5070 }
5071 
5072 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
5073  if (getLexer().getKind() != AsmToken::Identifier)
5074  return MatchOperand_NoMatch;
5075 
5076  StringRef Str = Parser.getTok().getString();
5077  if (!Str.startswith("attr"))
5078  return MatchOperand_NoMatch;
5079 
5080  StringRef Chan = Str.take_back(2);
5081  int AttrChan = StringSwitch<int>(Chan)
5082  .Case(".x", 0)
5083  .Case(".y", 1)
5084  .Case(".z", 2)
5085  .Case(".w", 3)
5086  .Default(-1);
5087  if (AttrChan == -1)
5088  return MatchOperand_ParseFail;
5089 
5090  Str = Str.drop_back(2).drop_front(4);
5091 
5092  uint8_t Attr;
5093  if (Str.getAsInteger(10, Attr))
5094  return MatchOperand_ParseFail;
5095 
5096  SMLoc S = Parser.getTok().getLoc();
5097  Parser.Lex();
5098  if (Attr > 63) {
5099  Error(S, "out of bounds attr");
5100  return MatchOperand_Success;
5101  }
5102 
5103  SMLoc SChan = SMLoc::getFromPointer(Chan.data());
5104 
5105  Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
5106  AMDGPUOperand::ImmTyInterpAttr));
5107  Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
5108  AMDGPUOperand::ImmTyAttrChan));
5109  return MatchOperand_Success;
5110 }
5111 
5112 //===----------------------------------------------------------------------===//
5113 // exp
5114 //===----------------------------------------------------------------------===//
5115 
5116 void AMDGPUAsmParser::errorExpTgt() {
5117  Error(Parser.getTok().getLoc(), "invalid exp target");
5118 }
5119 
5120 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
5121  uint8_t &Val) {
5122  if (Str == "null") {
5123  Val = 9;
5124  return MatchOperand_Success;
5125  }
5126 
5127  if (Str.startswith("mrt")) {
5128  Str = Str.drop_front(3);
5129  if (Str == "z") { // == mrtz
5130  Val = 8;
5131  return MatchOperand_Success;
5132  }
5133 
5134  if (Str.getAsInteger(10, Val))
5135  return MatchOperand_ParseFail;
5136 
5137  if (Val > 7)
5138  errorExpTgt();
5139 
5140  return MatchOperand_Success;
5141  }
5142 
5143  if (Str.startswith("pos")) {
5144  Str = Str.drop_front(3);
5145  if (Str.getAsInteger(10, Val))
5146  return MatchOperand_ParseFail;
5147 
5148  if (Val > 4 || (Val == 4 && !isGFX10()))
5149  errorExpTgt();
5150 
5151  Val += 12;
5152  return MatchOperand_Success;
5153  }
5154 
5155  if (isGFX10() && Str == "prim") {
5156  Val = 20;
5157  return MatchOperand_Success;
5158  }
5159 
5160  if (Str.startswith("param")) {
5161  Str = Str.drop_front(5);
5162  if (Str.getAsInteger(10, Val))
5163  return MatchOperand_ParseFail;
5164 
5165  if (Val >= 32)
5166  errorExpTgt();
5167 
5168  Val += 32;
5169  return MatchOperand_Success;
5170  }
5171 
5172  if (Str.startswith("invalid_target_")) {
5173  Str = Str.drop_front(15);
5174  if (Str.getAsInteger(10, Val))
5175  return MatchOperand_ParseFail;
5176 
5177  errorExpTgt();
5178  return MatchOperand_Success;
5179  }
5180 
5181  return MatchOperand_NoMatch;
5182 }
5183 
5184 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
5185  uint8_t Val;
5186  StringRef Str = Parser.getTok().getString();
5187 
5188  auto Res = parseExpTgtImpl(Str, Val);
5189  if (Res != MatchOperand_Success)
5190  return Res;
5191 
5192  SMLoc S = Parser.getTok().getLoc();
5193  Parser.Lex();
5194 
5195  Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
5196  AMDGPUOperand::ImmTyExpTgt));
5197  return MatchOperand_Success;
5198 }
5199 
5200 //===----------------------------------------------------------------------===//
5201 // parser helpers
5202 //===----------------------------------------------------------------------===//
5203 
5204 bool
5205 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
5206  return Token.is(AsmToken::Identifier) && Token.getString() == Id;
5207 }
5208 
5209 bool
5210 AMDGPUAsmParser::isId(const StringRef Id) const {
5211  return isId(getToken(), Id);
5212 }
5213 
5214 bool
5215 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
5216  return getTokenKind() == Kind;
5217 }
5218 
5219 bool
5220 AMDGPUAsmParser::trySkipId(const StringRef Id) {
5221  if (isId(Id)) {
5222  lex();
5223  return true;
5224  }
5225  return false;
5226 }
5227 
5228 bool
5229 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
5230  if (isId(Id) && peekToken().is(Kind)) {
5231  lex();
5232  lex();
5233  return true;
5234  }
5235  return false;
5236 }
5237 
5238 bool
5239 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
5240  if (isToken(Kind)) {
5241  lex();
5242  return true;
5243  }
5244  return false;
5245 }
5246 
5247 bool
5248 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
5249  const StringRef ErrMsg) {
5250  if (!trySkipToken(Kind)) {
5251  Error(getLoc(), ErrMsg);
5252  return false;
5253  }
5254  return true;
5255 }
5256 
5257 bool
5258 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
5259  return !getParser().parseAbsoluteExpression(Imm);
5260 }
5261 
5262 bool
5263 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
5264  SMLoc S = getLoc();
5265 
5266  const MCExpr *Expr;
5267  if (Parser.parseExpression(Expr))
5268  return false;
5269 
5270  int64_t IntVal;
5271  if (Expr->evaluateAsAbsolute(IntVal)) {
5272  Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
5273  } else {
5274  Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
5275  }
5276  return true;
5277 }
5278 
5279 bool
5280 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
5281  if (isToken(AsmToken::String)) {
5282  Val = getToken().getStringContents();
5283  lex();
5284  return true;
5285  } else {
5286  Error(getLoc(), ErrMsg);
5287  return false;
5288  }
5289 }
5290 
5291 AsmToken
5292 AMDGPUAsmParser::getToken() const {
5293  return Parser.getTok();
5294 }
5295 
5296 AsmToken
5297 AMDGPUAsmParser::peekToken() {
5298  return getLexer().peekTok();
5299 }
5300 
5301 void
5302 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
5303  auto TokCount = getLexer().peekTokens(Tokens);
5304 
5305  for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
5306  Tokens[Idx] = AsmToken(AsmToken::Error, "");
5307 }
5308 
5310 AMDGPUAsmParser::getTokenKind() const {
5311  return getLexer().getKind();
5312 }
5313 
5314 SMLoc
5315 AMDGPUAsmParser::getLoc() const {
5316  return getToken().getLoc();
5317 }
5318 
5319 StringRef
5320 AMDGPUAsmParser::getTokenStr() const {
5321  return getToken().getString();
5322 }
5323 
5324 void
5325 AMDGPUAsmParser::lex() {
5326  Parser.Lex();
5327 }
5328 
5329 //===----------------------------------------------------------------------===//
5330 // swizzle
5331 //===----------------------------------------------------------------------===//
5332 
5334 static unsigned
5335 encodeBitmaskPerm(const unsigned AndMask,
5336  const unsigned OrMask,
5337  const unsigned XorMask) {
5338  using namespace llvm::AMDGPU::Swizzle;
5339 
5340  return BITMASK_PERM_ENC |
5341  (AndMask << BITMASK_AND_SHIFT) |
5342  (OrMask << BITMASK_OR_SHIFT) |
5343  (XorMask << BITMASK_XOR_SHIFT);
5344 }
5345 
5346 bool
5347 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
5348  const unsigned MinVal,
5349  const unsigned MaxVal,
5350  const StringRef ErrMsg) {
5351  for (unsigned i = 0; i < OpNum; ++i) {
5352  if (!skipToken(AsmToken::Comma, "expected a comma")){
5353  return false;
5354  }
5355  SMLoc ExprLoc = Parser.getTok().getLoc();
5356  if (!parseExpr(Op[i])) {
5357  return false;
5358  }
5359  if (Op[i] < MinVal || Op[i] > MaxVal) {
5360  Error(ExprLoc, ErrMsg);
5361  return false;
5362  }
5363  }
5364 
5365  return true;
5366 }
5367 
5368 bool
5369 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
5370  using namespace llvm::AMDGPU::Swizzle;
5371 
5372  int64_t Lane[LANE_NUM];
5373  if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
5374  "expected a 2-bit lane id")) {
5375  Imm = QUAD_PERM_ENC;
5376  for (unsigned I = 0; I < LANE_NUM; ++I) {
5377  Imm |= Lane[I] << (LANE_SHIFT * I);
5378  }
5379  return true;
5380  }
5381  return false;
5382 }
5383 
5384 bool
5385 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
5386  using namespace llvm::AMDGPU::Swizzle;
5387 
5388  SMLoc S = Parser.getTok().getLoc();
5389  int64_t GroupSize;
5390  int64_t LaneIdx;
5391 
5392  if (!parseSwizzleOperands(1, &GroupSize,
5393  2, 32,
5394  "group size must be in the interval [2,32]")) {
5395  return false;
5396  }
5397  if (!isPowerOf2_64(GroupSize)) {
5398  Error(S, "group size must be a power of two");
5399  return false;
5400  }
5401  if (parseSwizzleOperands(1, &LaneIdx,
5402  0, GroupSize - 1,
5403  "lane id must be in the interval [0,group size - 1]")) {
5404  Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
5405  return true;
5406  }
5407  return false;
5408 }
5409 
5410 bool
5411 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
5412  using namespace llvm::AMDGPU::Swizzle;
5413 
5414  SMLoc S = Parser.getTok().getLoc();
5415  int64_t GroupSize;
5416 
5417  if (!parseSwizzleOperands(1, &GroupSize,
5418  2, 32, "group size must be in the interval [2,32]")) {
5419  return false;
5420  }
5421  if (!isPowerOf2_64(GroupSize)) {
5422  Error(S, "group size must be a power of two");
5423  return false;
5424  }
5425 
5426  Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
5427  return true;
5428 }
5429 
5430 bool
5431 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
5432  using namespace llvm::AMDGPU::Swizzle;
5433 
5434  SMLoc S = Parser.getTok().getLoc();
5435  int64_t GroupSize;
5436 
5437  if (!parseSwizzleOperands(1, &GroupSize,
5438  1, 16, "group size must be in the interval [1,16]")) {
5439  return false;
5440  }
5441  if (!isPowerOf2_64(GroupSize)) {
5442  Error(S, "group size must be a power of two");
5443  return false;
5444  }
5445 
5446  Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
5447  return true;
5448 }
5449 
5450 bool
5451 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
5452  using namespace llvm::AMDGPU::Swizzle;
5453 
5454  if (!skipToken(AsmToken::Comma, "expected a comma")) {
5455  return false;
5456  }
5457 
5458  StringRef Ctl;
5459  SMLoc StrLoc = Parser.getTok().getLoc();
5460  if (!parseString(Ctl)) {
5461  return false;
5462  }
5463  if (Ctl.size() != BITMASK_WIDTH) {
5464  Error(StrLoc, "expected a 5-character mask");
5465  return false;
5466  }
5467 
5468  unsigned AndMask = 0;
5469  unsigned OrMask = 0;
5470  unsigned XorMask = 0;
5471 
5472  for (size_t i = 0; i < Ctl.size(); ++i) {
5473  unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
5474  switch(Ctl[i]) {
5475  default:
5476  Error(StrLoc, "invalid mask");
5477  return false;
5478  case '0':
5479  break;
5480  case '1':
5481  OrMask |= Mask;
5482  break;
5483  case 'p':
5484  AndMask |= Mask;
5485  break;
5486  case 'i':
5487  AndMask |= Mask;
5488  XorMask |= Mask;
5489  break;
5490  }
5491  }
5492 
5493  Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
5494  return true;
5495 }
5496 
5497 bool
5498 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
5499 
5500  SMLoc OffsetLoc = Parser.getTok().getLoc();
5501 
5502  if (!parseExpr(Imm)) {
5503  return false;
5504  }
5505  if (!isUInt<16>(Imm)) {
5506  Error(OffsetLoc, "expected a 16-bit offset");
5507  return false;
5508  }
5509  return true;
5510 }
5511 
5512 bool
5513 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
5514  using namespace llvm::AMDGPU::Swizzle;
5515 
5516  if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
5517 
5518  SMLoc ModeLoc = Parser.getTok().getLoc();
5519  bool Ok = false;
5520 
5521  if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
5522  Ok = parseSwizzleQuadPerm(Imm);
5523  } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
5524  Ok = parseSwizzleBitmaskPerm(Imm);
5525  } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
5526  Ok = parseSwizzleBroadcast(Imm);
5527  } else if (trySkipId(IdSymbolic[ID_SWAP])) {
5528  Ok = parseSwizzleSwap(Imm);
5529  } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
5530  Ok = parseSwizzleReverse(Imm);
5531  } else {
5532  Error(ModeLoc, "expected a swizzle mode");
5533  }
5534 
5535  return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
5536  }
5537 
5538  return false;
5539 }
5540 
5542 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
5543  SMLoc S = Parser.getTok().getLoc();
5544  int64_t Imm = 0;
5545 
5546  if (trySkipId("offset")) {
5547 
5548  bool Ok = false;
5549  if (skipToken(AsmToken::Colon, "expected a colon")) {
5550  if (trySkipId("swizzle")) {
5551  Ok = parseSwizzleMacro(Imm);
5552  } else {
5553  Ok = parseSwizzleOffset(Imm);
5554  }
5555  }
5556 
5557  Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
5558 
5560  } else {
5561  // Swizzle "offset" operand is optional.
5562  // If it is omitted, try parsing other optional operands.
5563  return parseOptionalOpr(Operands);
5564  }
5565 }
5566 
5567 bool
5568 AMDGPUOperand::isSwizzle() const {
5569  return isImmTy(ImmTySwizzle);
5570 }
5571 
5572 //===----------------------------------------------------------------------===//
5573 // VGPR Index Mode
5574 //===----------------------------------------------------------------------===//
5575 
5576 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
5577 
5578  using namespace llvm::AMDGPU::VGPRIndexMode;
5579 
5580  if (trySkipToken(AsmToken::RParen)) {
5581  return OFF;
5582  }
5583 
5584  int64_t Imm = 0;
5585 
5586  while (true) {
5587  unsigned Mode = 0;
5588  SMLoc S = Parser.getTok().getLoc();
5589 
5590  for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
5591  if (trySkipId(IdSymbolic[ModeId])) {
5592  Mode = 1 << ModeId;
5593  break;
5594  }
5595  }
5596 
5597  if (Mode == 0) {
5598  Error(S, (Imm == 0)?
5599  "expected a VGPR index mode or a closing parenthesis" :
5600  "expected a VGPR index mode");
5601  break;
5602  }
5603 
5604  if (Imm & Mode) {
5605  Error(S, "duplicate VGPR index mode");
5606  break;
5607  }
5608  Imm |= Mode;
5609 
5610  if (trySkipToken(AsmToken::RParen))
5611  break;
5612  if (!skipToken(AsmToken::Comma,
5613  "expected a comma or a closing parenthesis"))
5614  break;
5615  }
5616 
5617  return Imm;
5618 }
5619 
5621 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
5622 
5623  int64_t Imm = 0;
5624  SMLoc S = Parser.getTok().getLoc();
5625 
5626  if (getLexer().getKind() == AsmToken::Identifier &&
5627  Parser.getTok().getString() == "gpr_idx" &&
5628  getLexer().peekTok().is(AsmToken::LParen)) {
5629 
5630  Parser.Lex();
5631  Parser.Lex();
5632 
5633  // If parse failed, trigger an error but do not return error code
5634  // to avoid excessive error messages.
5635  Imm = parseGPRIdxMacro();
5636 
5637  } else {
5638  if (getParser().parseAbsoluteExpression(Imm))
5639  return MatchOperand_NoMatch;
5640  if (Imm < 0 || !isUInt<4>(Imm)) {
5641  Error(S, "invalid immediate: only 4-bit values are legal");
5642  }
5643  }
5644 
5645  Operands.push_back(
5646  AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
5647  return MatchOperand_Success;
5648 }
5649 
5650 bool AMDGPUOperand::isGPRIdxMode() const {
5651  return isImmTy(ImmTyGprIdxMode);
5652 }
5653 
5654 //===----------------------------------------------------------------------===//
5655 // sopp branch targets
5656 //===----------------------------------------------------------------------===//
5657 
5659 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
5660 
5661  // Make sure we are not parsing something
5662  // that looks like a label or an expression but is not.
5663  // This will improve error messages.
5664  if (isRegister() || isModifier())
5665  return MatchOperand_NoMatch;
5666 
5667  if (parseExpr(Operands)) {
5668 
5669  AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
5670  assert(Opr.isImm() || Opr.isExpr());
5671  SMLoc Loc = Opr.getStartLoc();
5672 
5673  // Currently we do not support arbitrary expressions as branch targets.
5674  // Only labels and absolute expressions are accepted.
5675  if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
5676  Error(Loc, "expected an absolute expression or a label");
5677  } else if (Opr.isImm() && !Opr.isS16Imm()) {
5678  Error(Loc, "expected a 16-bit signed jump offset");
5679  }
5680  }
5681 
5682  return MatchOperand_Success; // avoid excessive error messages
5683 }
5684 
5685 //===----------------------------------------------------------------------===//
5686 // Boolean holding registers
5687 //===----------------------------------------------------------------------===//
5688 
5690 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
5691  return parseReg(Operands);
5692 }
5693 
5694 //===----------------------------------------------------------------------===//
5695 // mubuf
5696 //===----------------------------------------------------------------------===//
5697 
5698 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
5699  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
5700 }
5701 
5702 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
5703  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
5704 }
5705 
5706 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
5707  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
5708 }
5709 
5710 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
5711  const OperandVector &Operands,
5712  bool IsAtomic,
5713  bool IsAtomicReturn,
5714  bool IsLds) {
5715  bool IsLdsOpcode = IsLds;
5716  bool HasLdsModifier = false;
5717  OptionalImmIndexMap OptionalIdx;
5718  assert(IsAtomicReturn ? IsAtomic : true);
5719  unsigned FirstOperandIdx = 1;
5720 
5721  for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
5722  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5723 
5724  // Add the register arguments
5725  if (Op.isReg()) {
5726  Op.addRegOperands(Inst, 1);
5727  // Insert a tied src for atomic return dst.
5728  // This cannot be postponed as subsequent calls to
5729  // addImmOperands rely on correct number of MC operands.
5730  if (IsAtomicReturn && i == FirstOperandIdx)
5731  Op.addRegOperands(Inst, 1);
5732  continue;
5733  }
5734 
5735  // Handle the case where soffset is an immediate
5736  if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5737  Op.addImmOperands(Inst, 1);
5738  continue;
5739  }
5740 
5741  HasLdsModifier |= Op.isLDS();
5742 
5743  // Handle tokens like 'offen' which are sometimes hard-coded into the
5744  // asm string. There are no MCInst operands for these.
5745  if (Op.isToken()) {
5746  continue;
5747  }
5748  assert(Op.isImm());
5749 
5750  // Handle optional arguments
5751  OptionalIdx[Op.getImmTy()] = i;
5752  }
5753 
5754  // This is a workaround for an llvm quirk which may result in an
5755  // incorrect instruction selection. Lds and non-lds versions of
5756  // MUBUF instructions are identical except that lds versions
5757  // have mandatory 'lds' modifier. However this modifier follows
5758  // optional modifiers and llvm asm matcher regards this 'lds'
5759  // modifier as an optional one. As a result, an lds version
5760  // of opcode may be selected even if it has no 'lds' modifier.
5761  if (IsLdsOpcode && !HasLdsModifier) {
5762  int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
5763  if (NoLdsOpcode != -1) { // Got lds version - correct it.
5764  Inst.setOpcode(NoLdsOpcode);
5765  IsLdsOpcode = false;
5766  }
5767  }
5768 
5769  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
5770  if (!IsAtomic) { // glc is hard-coded.
5771  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5772  }
5773  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5774 
5775  if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
5776  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5777  }
5778 
5779  if (isGFX10())
5780  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5781 }
5782 
5783 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
5784  OptionalImmIndexMap OptionalIdx;
5785 
5786  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5787  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5788 
5789  // Add the register arguments
5790  if (Op.isReg()) {
5791  Op.addRegOperands(Inst, 1);
5792  continue;
5793  }
5794 
5795  // Handle the case where soffset is an immediate
5796  if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5797  Op.addImmOperands(Inst, 1);
5798  continue;
5799  }
5800 
5801  // Handle tokens like 'offen' which are sometimes hard-coded into the
5802  // asm string. There are no MCInst operands for these.
5803  if (Op.isToken()) {
5804  continue;
5805  }
5806  assert(Op.isImm());
5807 
5808  // Handle optional arguments
5809  OptionalIdx[Op.getImmTy()] = i;
5810  }
5811 
5812  addOptionalImmOperand(Inst, Operands, OptionalIdx,
5813  AMDGPUOperand::ImmTyOffset);
5814  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
5815  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5816  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5817  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5818 
5819  if (isGFX10())
5820  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5821 }
5822 
5823 //===----------------------------------------------------------------------===//
5824 // mimg
5825 //===----------------------------------------------------------------------===//
5826 
5827 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
5828  bool IsAtomic) {
5829  unsigned I = 1;
5830  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5831  for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5832  ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5833  }
5834 
5835  if (IsAtomic) {
5836  // Add src, same as dst
5837  assert(Desc.getNumDefs() == 1);
5838  ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
5839  }
5840 
5841  OptionalImmIndexMap OptionalIdx;
5842 
5843  for (unsigned E = Operands.size(); I != E; ++I) {
5844  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5845 
5846  // Add the register arguments
5847  if (Op.isReg()) {
5848  Op.addRegOperands(Inst, 1);
5849  } else if (Op.isImmModifier()) {
5850  OptionalIdx[Op.getImmTy()] = I;
5851  } else if (!Op.isToken()) {
5852  llvm_unreachable("unexpected operand type");
5853  }
5854  }
5855 
5856  bool IsGFX10 = isGFX10();
5857 
5858  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
5859  if (IsGFX10)
5860  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
5861  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
5862  if (IsGFX10)
5863  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5864  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5865  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5866  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
5867  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5868  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
5869  if (!IsGFX10)
5870  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
5871  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
5872 }
5873 
5874 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
5875  cvtMIMG(Inst, Operands, true);
5876 }
5877 
5878 //===----------------------------------------------------------------------===//
5879 // smrd
5880 //===----------------------------------------------------------------------===//
5881 
5882 bool AMDGPUOperand::isSMRDOffset8() const {
5883  return isImm() && isUInt<8>(getImm());
5884 }
5885 
5886 bool AMDGPUOperand::isSMRDOffset20() const {
5887  return isImm() && isUInt<20>(getImm());
5888 }
5889 
5890 bool AMDGPUOperand::isSMRDLiteralOffset() const {
5891  // 32-bit literals are only supported on CI and we only want to use them
5892  // when the offset is > 8-bits.
5893  return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
5894 }
5895 
5896 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
5897  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5898 }
5899 
5900 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
5901  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5902 }
5903 
5904 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
5905  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5906 }
5907 
5908 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
5909  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5910 }
5911 
5912 //===----------------------------------------------------------------------===//
5913 // vop3
5914 //===----------------------------------------------------------------------===//
5915 
5916 static bool ConvertOmodMul(int64_t &Mul) {
5917  if (Mul != 1 && Mul != 2 && Mul != 4)
5918  return false;
5919 
5920  Mul >>= 1;
5921  return true;
5922 }
5923 
5924 static bool ConvertOmodDiv(int64_t &Div) {
5925  if (Div == 1) {
5926  Div = 0;
5927  return true;
5928  }
5929 
5930  if (Div == 2) {
5931  Div = 3;
5932  return true;
5933  }
5934 
5935  return false;
5936 }
5937 
5938 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
5939  if (BoundCtrl == 0) {
5940  BoundCtrl = 1;
5941  return true;
5942  }
5943 
5944  if (BoundCtrl == -1) {
5945  BoundCtrl = 0;
5946  return true;
5947  }
5948 
5949  return false;
5950 }
5951 
5952 // Note: the order in this table matches the order of operands in AsmString.
5953 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
5954  {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr},
5955  {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr},
5956  {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr},
5957  {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
5958  {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
5959  {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr},
5960  {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr},
5961  {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr},
5962  {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
5963  {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr},
5964  {"format", AMDGPUOperand::ImmTyFORMAT, false, nullptr},
5965  {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr},
5966  {"slc", AMDGPUOperand::ImmTySLC, true, nullptr},
5967  {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr},
5968  {"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
5969  {"high", AMDGPUOperand::ImmTyHigh, true, nullptr},
5970  {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr},
5971  {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
5972  {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr},
5973  {"da", AMDGPUOperand::ImmTyDA, true, nullptr},
5974  {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr},
5975  {"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr},
5976  {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr},
5977  {"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
5978  {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr},
5979  {"dim", AMDGPUOperand::ImmTyDim, false, nullptr},
5980  {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
5981  {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
5982  {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
5983  {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr},
5984  {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
5985  {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
5986  {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
5987  {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
5988  {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
5989  {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
5990  {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
5991  {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
5992  {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
5993  {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
5994  {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
5995  {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
5996  {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
5997 };
5998 
5999 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
6000  unsigned size = Operands.size();
6001  assert(size > 0);
6002 
6003  OperandMatchResultTy res = parseOptionalOpr(Operands);
6004 
6005  // This is a hack to enable hardcoded mandatory operands which follow
6006  // optional operands.
6007  //
6008  // Current design assumes that all operands after the first optional operand
6009  // are also optional. However implementation of some instructions violates
6010  // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
6011  //
6012  // To alleviate this problem, we have to (implicitly) parse extra operands
6013  // to make sure autogenerated parser of custom operands never hit hardcoded
6014  // mandatory operands.
6015 
6016  if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
6017 
6018  // We have parsed the first optional operand.
6019  // Parse as many operands as necessary to skip all mandatory operands.
6020 
6021  for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
6022  if (res != MatchOperand_Success ||
6023  getLexer().is(AsmToken::EndOfStatement)) break;
6024  if (getLexer().is(AsmToken::Comma)) Parser.Lex();
6025  res = parseOptionalOpr(Operands);
6026  }
6027  }
6028 
6029  return res;
6030 }
6031 
6032 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
6034  for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
6035  // try to parse any optional operand here
6036  if (Op.IsBit) {
6037  res = parseNamedBit(Op.Name, Operands, Op.Type);
6038  } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
6039  res = parseOModOperand(Operands);
6040  } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
6041  Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
6042  Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
6043  res = parseSDWASel(Operands, Op.Name, Op.Type);
6044  } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
6045  res = parseSDWADstUnused(Operands);
6046  } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
6047  Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
6048  Op.Type == AMDGPUOperand::ImmTyNegLo ||
6049  Op.Type == AMDGPUOperand::ImmTyNegHi) {
6050  res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
6051  Op.ConvertResult);
6052  } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
6053  res = parseDim(Operands);
6054  } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) {
6055  res = parseDfmtNfmt(Operands);
6056  } else {
6057  res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
6058  }
6059  if (res != MatchOperand_NoMatch) {
6060  return res;
6061  }
6062  }
6063  return MatchOperand_NoMatch;
6064 }
6065 
6066 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
6067  StringRef Name = Parser.getTok().getString();
6068  if (Name == "mul") {
6069  return parseIntWithPrefix("mul", Operands,
6070  AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
6071  }
6072 
6073  if (Name == "div") {
6074  return parseIntWithPrefix("div", Operands,
6075  AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
6076  }
6077 
6078  return MatchOperand_NoMatch;
6079 }
6080 
6081 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
6082  cvtVOP3P(Inst, Operands);
6083 
6084  int Opc = Inst.getOpcode();
6085 
6086  int SrcNum;
6087  const int Ops[] = { AMDGPU::OpName::src0,
6088  AMDGPU::OpName::src1,
6089  AMDGPU::OpName::src2 };
6090  for (SrcNum = 0;
6091  SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
6092  ++SrcNum);
6093  assert(SrcNum > 0);
6094 
6095  int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6096  unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6097 
6098  if ((OpSel & (1 << SrcNum)) != 0) {
6099  int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
6100  uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
6101  Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
6102  }
6103 }
6104 
6105 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
6106  // 1. This operand is input modifiers
6107  return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
6108  // 2. This is not last operand
6109  && Desc.NumOperands > (OpNum + 1)
6110  // 3. Next operand is register class
6111  && Desc.OpInfo[OpNum + 1].RegClass != -1
6112  // 4. Next register is not tied to any other operand
6113  && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
6114 }
6115 
6116 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
6117 {
6118  OptionalImmIndexMap OptionalIdx;
6119  unsigned Opc = Inst.getOpcode();
6120 
6121  unsigned I = 1;
6122  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6123  for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6124  ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6125  }
6126 
6127  for (unsigned E = Operands.size(); I != E; ++I) {
6128  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6129  if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6130  Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6131  } else if (Op.isInterpSlot() ||
6132  Op.isInterpAttr() ||
6133  Op.isAttrChan()) {
6134  Inst.addOperand(MCOperand::createImm(Op.getImm()));
6135  } else if (Op.isImmModifier()) {
6136  OptionalIdx[Op.getImmTy()] = I;
6137  } else {
6138  llvm_unreachable("unhandled operand type");
6139  }
6140  }
6141 
6142  if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
6143  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
6144  }
6145 
6146  if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6147  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6148  }
6149 
6150  if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6151  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6152  }
6153 }
6154 
6155 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
6156  OptionalImmIndexMap &OptionalIdx) {
6157  unsigned Opc = Inst.getOpcode();
6158 
6159  unsigned I = 1;
6160  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6161  for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6162  ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6163  }
6164 
6165  if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
6166  // This instruction has src modifiers
6167  for (unsigned E = Operands.size(); I != E; ++I) {
6168  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6169  if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6170  Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6171  } else if (Op.isImmModifier()) {
6172  OptionalIdx[Op.getImmTy()] = I;
6173  } else if (Op.isRegOrImm()) {
6174  Op.addRegOrImmOperands(Inst, 1);
6175  } else {
6176  llvm_unreachable("unhandled operand type");
6177  }
6178  }
6179  } else {
6180  // No src modifiers
6181  for (unsigned E = Operands.size(); I != E; ++I) {
6182  AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6183  if (Op.isMod()) {
6184  OptionalIdx[Op.getImmTy()] = I;
6185  } else {
6186  Op.addRegOrImmOperands(Inst, 1);
6187  }
6188  }
6189  }
6190 
6191  if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6192  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6193  }
6194 
6195  if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6196  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6197  }
6198 
6199  // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
6200  // it has src2 register operand that is tied to dst operand
6201  // we don't allow modifiers for this operand in assembler so src2_modifiers
6202  // should be 0.
6203  if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
6204  Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
6205  Opc == AMDGPU::V_MAC_F32_e64_vi ||
6206  Opc == AMDGPU::V_MAC_F16_e64_vi ||
6207  Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
6208  Opc == AMDGPU::V_FMAC_F32_e64_vi ||
6209  Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
6210  auto it = Inst.begin();
6211  std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
6212  it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
6213  ++it;
6214  Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6215  }
6216 }
6217 
6218 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
6219  OptionalImmIndexMap OptionalIdx;
6220  cvtVOP3(Inst, Operands, OptionalIdx);
6221 }
6222 
6223 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
6224  const OperandVector &Operands) {
6225  OptionalImmIndexMap OptIdx;
6226  const int Opc = Inst.getOpcode();
6227  const MCInstrDesc &Desc = MII.get(Opc);
6228 
6229  const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
6230 
6231  cvtVOP3(Inst, Operands,