53enum RegisterKind { IS_UNKNOWN,
IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
67 SMLoc StartLoc, EndLoc;
68 const AMDGPUAsmParser *AsmParser;
71 AMDGPUOperand(KindTy Kind_,
const AMDGPUAsmParser *AsmParser_)
72 :
Kind(Kind_), AsmParser(AsmParser_) {}
74 using Ptr = std::unique_ptr<AMDGPUOperand>;
82 bool hasFPModifiers()
const {
return Abs || Neg; }
83 bool hasIntModifiers()
const {
return Sext; }
84 bool hasModifiers()
const {
return hasFPModifiers() || hasIntModifiers(); }
86 int64_t getFPModifiersOperand()
const {
93 int64_t getIntModifiersOperand()
const {
99 int64_t getModifiersOperand()
const {
100 assert(!(hasFPModifiers() && hasIntModifiers())
101 &&
"fp and int modifiers should not be used simultaneously");
102 if (hasFPModifiers()) {
103 return getFPModifiersOperand();
104 }
else if (hasIntModifiers()) {
105 return getIntModifiersOperand();
187 ImmKindTyMandatoryLiteral,
201 mutable ImmKindTy
Kind;
218 bool isToken()
const override {
return Kind == Token; }
220 bool isSymbolRefExpr()
const {
221 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
224 bool isImm()
const override {
225 return Kind == Immediate;
228 void setImmKindNone()
const {
230 Imm.Kind = ImmKindTyNone;
233 void setImmKindLiteral()
const {
235 Imm.Kind = ImmKindTyLiteral;
238 void setImmKindMandatoryLiteral()
const {
240 Imm.Kind = ImmKindTyMandatoryLiteral;
243 void setImmKindConst()
const {
245 Imm.Kind = ImmKindTyConst;
248 bool IsImmKindLiteral()
const {
249 return isImm() &&
Imm.Kind == ImmKindTyLiteral;
252 bool IsImmKindMandatoryLiteral()
const {
253 return isImm() &&
Imm.Kind == ImmKindTyMandatoryLiteral;
256 bool isImmKindConst()
const {
257 return isImm() &&
Imm.Kind == ImmKindTyConst;
260 bool isInlinableImm(
MVT type)
const;
261 bool isLiteralImm(
MVT type)
const;
263 bool isRegKind()
const {
267 bool isReg()
const override {
268 return isRegKind() && !hasModifiers();
271 bool isRegOrInline(
unsigned RCID,
MVT type)
const {
272 return isRegClass(RCID) || isInlinableImm(type);
276 return isRegOrInline(RCID, type) || isLiteralImm(type);
279 bool isRegOrImmWithInt16InputMods()
const {
283 bool isRegOrImmWithIntT16InputMods()
const {
287 bool isRegOrImmWithInt32InputMods()
const {
291 bool isRegOrInlineImmWithInt16InputMods()
const {
292 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
295 bool isRegOrInlineImmWithInt32InputMods()
const {
296 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
299 bool isRegOrImmWithInt64InputMods()
const {
303 bool isRegOrImmWithFP16InputMods()
const {
307 bool isRegOrImmWithFPT16InputMods()
const {
311 bool isRegOrImmWithFP32InputMods()
const {
315 bool isRegOrImmWithFP64InputMods()
const {
319 template <
bool IsFake16>
bool isRegOrInlineImmWithFP16InputMods()
const {
320 return isRegOrInline(
321 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
324 bool isRegOrInlineImmWithFP32InputMods()
const {
325 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
328 bool isPackedFP16InputMods()
const {
332 bool isVReg()
const {
333 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
334 isRegClass(AMDGPU::VReg_64RegClassID) ||
335 isRegClass(AMDGPU::VReg_96RegClassID) ||
336 isRegClass(AMDGPU::VReg_128RegClassID) ||
337 isRegClass(AMDGPU::VReg_160RegClassID) ||
338 isRegClass(AMDGPU::VReg_192RegClassID) ||
339 isRegClass(AMDGPU::VReg_256RegClassID) ||
340 isRegClass(AMDGPU::VReg_512RegClassID) ||
341 isRegClass(AMDGPU::VReg_1024RegClassID);
344 bool isVReg32()
const {
345 return isRegClass(AMDGPU::VGPR_32RegClassID);
348 bool isVReg32OrOff()
const {
349 return isOff() || isVReg32();
352 bool isNull()
const {
353 return isRegKind() &&
getReg() == AMDGPU::SGPR_NULL;
356 bool isVRegWithInputMods()
const;
357 template <
bool IsFake16>
bool isT16VRegWithInputMods()
const;
359 bool isSDWAOperand(
MVT type)
const;
360 bool isSDWAFP16Operand()
const;
361 bool isSDWAFP32Operand()
const;
362 bool isSDWAInt16Operand()
const;
363 bool isSDWAInt32Operand()
const;
365 bool isImmTy(ImmTy ImmT)
const {
369 template <ImmTy Ty>
bool isImmTy()
const {
return isImmTy(Ty); }
371 bool isImmLiteral()
const {
return isImmTy(ImmTyNone); }
373 bool isImmModifier()
const {
374 return isImm() &&
Imm.Type != ImmTyNone;
377 bool isOModSI()
const {
return isImmTy(ImmTyOModSI); }
378 bool isDMask()
const {
return isImmTy(ImmTyDMask); }
379 bool isDim()
const {
return isImmTy(ImmTyDim); }
380 bool isR128A16()
const {
return isImmTy(ImmTyR128A16); }
381 bool isOff()
const {
return isImmTy(ImmTyOff); }
382 bool isExpTgt()
const {
return isImmTy(ImmTyExpTgt); }
383 bool isOffen()
const {
return isImmTy(ImmTyOffen); }
384 bool isIdxen()
const {
return isImmTy(ImmTyIdxen); }
385 bool isAddr64()
const {
return isImmTy(ImmTyAddr64); }
386 bool isOffset()
const {
return isImmTy(ImmTyOffset); }
387 bool isOffset0()
const {
return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
388 bool isOffset1()
const {
return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
389 bool isSMEMOffsetMod()
const {
return isImmTy(ImmTySMEMOffsetMod); }
390 bool isFlatOffset()
const {
return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
391 bool isGDS()
const {
return isImmTy(ImmTyGDS); }
392 bool isLDS()
const {
return isImmTy(ImmTyLDS); }
393 bool isCPol()
const {
return isImmTy(ImmTyCPol); }
394 bool isIndexKey8bit()
const {
return isImmTy(ImmTyIndexKey8bit); }
395 bool isIndexKey16bit()
const {
return isImmTy(ImmTyIndexKey16bit); }
396 bool isTFE()
const {
return isImmTy(ImmTyTFE); }
397 bool isFORMAT()
const {
return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
398 bool isDppBankMask()
const {
return isImmTy(ImmTyDppBankMask); }
399 bool isDppRowMask()
const {
return isImmTy(ImmTyDppRowMask); }
400 bool isDppBoundCtrl()
const {
return isImmTy(ImmTyDppBoundCtrl); }
401 bool isDppFI()
const {
return isImmTy(ImmTyDppFI); }
402 bool isSDWADstSel()
const {
return isImmTy(ImmTySDWADstSel); }
403 bool isSDWASrc0Sel()
const {
return isImmTy(ImmTySDWASrc0Sel); }
404 bool isSDWASrc1Sel()
const {
return isImmTy(ImmTySDWASrc1Sel); }
405 bool isSDWADstUnused()
const {
return isImmTy(ImmTySDWADstUnused); }
406 bool isInterpSlot()
const {
return isImmTy(ImmTyInterpSlot); }
407 bool isInterpAttr()
const {
return isImmTy(ImmTyInterpAttr); }
408 bool isInterpAttrChan()
const {
return isImmTy(ImmTyInterpAttrChan); }
409 bool isOpSel()
const {
return isImmTy(ImmTyOpSel); }
410 bool isOpSelHi()
const {
return isImmTy(ImmTyOpSelHi); }
411 bool isNegLo()
const {
return isImmTy(ImmTyNegLo); }
412 bool isNegHi()
const {
return isImmTy(ImmTyNegHi); }
414 bool isRegOrImm()
const {
418 bool isRegClass(
unsigned RCID)
const;
422 bool isRegOrInlineNoMods(
unsigned RCID,
MVT type)
const {
423 return isRegOrInline(RCID, type) && !hasModifiers();
426 bool isSCSrcB16()
const {
427 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
430 bool isSCSrcV2B16()
const {
434 bool isSCSrc_b32()
const {
435 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
438 bool isSCSrc_b64()
const {
439 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
442 bool isBoolReg()
const;
444 bool isSCSrcF16()
const {
445 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
448 bool isSCSrcV2F16()
const {
452 bool isSCSrcF32()
const {
453 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
456 bool isSCSrcF64()
const {
457 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
460 bool isSSrc_b32()
const {
461 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
464 bool isSSrc_b16()
const {
return isSCSrcB16() || isLiteralImm(MVT::i16); }
466 bool isSSrcV2B16()
const {
471 bool isSSrc_b64()
const {
474 return isSCSrc_b64() || isLiteralImm(MVT::i64);
477 bool isSSrc_f32()
const {
478 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
481 bool isSSrcF64()
const {
return isSCSrc_b64() || isLiteralImm(MVT::f64); }
483 bool isSSrc_bf16()
const {
return isSCSrcB16() || isLiteralImm(MVT::bf16); }
485 bool isSSrc_f16()
const {
return isSCSrcB16() || isLiteralImm(MVT::f16); }
487 bool isSSrcV2F16()
const {
492 bool isSSrcV2FP32()
const {
497 bool isSCSrcV2FP32()
const {
502 bool isSSrcV2INT32()
const {
507 bool isSCSrcV2INT32()
const {
509 return isSCSrc_b32();
512 bool isSSrcOrLds_b32()
const {
513 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
514 isLiteralImm(MVT::i32) || isExpr();
517 bool isVCSrc_b32()
const {
518 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
521 bool isVCSrcB64()
const {
522 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
525 bool isVCSrcTB16()
const {
526 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
529 bool isVCSrcTB16_Lo128()
const {
530 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
533 bool isVCSrcFake16B16_Lo128()
const {
534 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
537 bool isVCSrc_b16()
const {
538 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
541 bool isVCSrc_v2b16()
const {
return isVCSrc_b16(); }
543 bool isVCSrc_f32()
const {
544 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
547 bool isVCSrcF64()
const {
548 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
551 bool isVCSrcTBF16()
const {
552 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
555 bool isVCSrcTF16()
const {
556 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
559 bool isVCSrcTBF16_Lo128()
const {
560 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
563 bool isVCSrcTF16_Lo128()
const {
564 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
567 bool isVCSrcFake16BF16_Lo128()
const {
568 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
571 bool isVCSrcFake16F16_Lo128()
const {
572 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
575 bool isVCSrc_bf16()
const {
576 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
579 bool isVCSrc_f16()
const {
580 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
583 bool isVCSrc_v2bf16()
const {
return isVCSrc_bf16(); }
585 bool isVCSrc_v2f16()
const {
return isVCSrc_f16(); }
587 bool isVSrc_b32()
const {
588 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
591 bool isVSrc_b64()
const {
return isVCSrcF64() || isLiteralImm(MVT::i64); }
593 bool isVSrcT_b16()
const {
return isVCSrcTB16() || isLiteralImm(MVT::i16); }
595 bool isVSrcT_b16_Lo128()
const {
596 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
599 bool isVSrcFake16_b16_Lo128()
const {
600 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
603 bool isVSrc_b16()
const {
return isVCSrc_b16() || isLiteralImm(MVT::i16); }
605 bool isVSrc_v2b16()
const {
return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
607 bool isVCSrcV2FP32()
const {
611 bool isVSrc_v2f32()
const {
return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
613 bool isVCSrcV2INT32()
const {
617 bool isVSrc_v2b32()
const {
return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
619 bool isVSrc_f32()
const {
620 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
623 bool isVSrc_f64()
const {
return isVCSrcF64() || isLiteralImm(MVT::f64); }
625 bool isVSrcT_bf16()
const {
return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
627 bool isVSrcT_f16()
const {
return isVCSrcTF16() || isLiteralImm(MVT::f16); }
629 bool isVSrcT_bf16_Lo128()
const {
630 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
633 bool isVSrcT_f16_Lo128()
const {
634 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
637 bool isVSrcFake16_bf16_Lo128()
const {
638 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
641 bool isVSrcFake16_f16_Lo128()
const {
642 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
645 bool isVSrc_bf16()
const {
return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
647 bool isVSrc_f16()
const {
return isVCSrc_f16() || isLiteralImm(MVT::f16); }
649 bool isVSrc_v2bf16()
const {
650 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
653 bool isVSrc_v2f16()
const {
return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
655 bool isVISrcB32()
const {
656 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
659 bool isVISrcB16()
const {
660 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
663 bool isVISrcV2B16()
const {
667 bool isVISrcF32()
const {
668 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
671 bool isVISrcF16()
const {
672 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
675 bool isVISrcV2F16()
const {
676 return isVISrcF16() || isVISrcB32();
679 bool isVISrc_64_bf16()
const {
680 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
683 bool isVISrc_64_f16()
const {
684 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
687 bool isVISrc_64_b32()
const {
688 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
691 bool isVISrc_64B64()
const {
692 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
695 bool isVISrc_64_f64()
const {
696 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
699 bool isVISrc_64V2FP32()
const {
700 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
703 bool isVISrc_64V2INT32()
const {
704 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
707 bool isVISrc_256_b32()
const {
708 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
711 bool isVISrc_256_f32()
const {
712 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
715 bool isVISrc_256B64()
const {
716 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
719 bool isVISrc_256_f64()
const {
720 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
723 bool isVISrc_128B16()
const {
724 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
727 bool isVISrc_128V2B16()
const {
728 return isVISrc_128B16();
731 bool isVISrc_128_b32()
const {
732 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
735 bool isVISrc_128_f32()
const {
736 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
739 bool isVISrc_256V2FP32()
const {
740 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
743 bool isVISrc_256V2INT32()
const {
744 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
747 bool isVISrc_512_b32()
const {
748 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
751 bool isVISrc_512B16()
const {
752 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
755 bool isVISrc_512V2B16()
const {
756 return isVISrc_512B16();
759 bool isVISrc_512_f32()
const {
760 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
763 bool isVISrc_512F16()
const {
764 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
767 bool isVISrc_512V2F16()
const {
768 return isVISrc_512F16() || isVISrc_512_b32();
771 bool isVISrc_1024_b32()
const {
772 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
775 bool isVISrc_1024B16()
const {
776 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
779 bool isVISrc_1024V2B16()
const {
780 return isVISrc_1024B16();
783 bool isVISrc_1024_f32()
const {
784 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
787 bool isVISrc_1024F16()
const {
788 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
791 bool isVISrc_1024V2F16()
const {
792 return isVISrc_1024F16() || isVISrc_1024_b32();
795 bool isAISrcB32()
const {
796 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
799 bool isAISrcB16()
const {
800 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
803 bool isAISrcV2B16()
const {
807 bool isAISrcF32()
const {
808 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
811 bool isAISrcF16()
const {
812 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
815 bool isAISrcV2F16()
const {
816 return isAISrcF16() || isAISrcB32();
819 bool isAISrc_64B64()
const {
820 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
823 bool isAISrc_64_f64()
const {
824 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
827 bool isAISrc_128_b32()
const {
828 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
831 bool isAISrc_128B16()
const {
832 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
835 bool isAISrc_128V2B16()
const {
836 return isAISrc_128B16();
839 bool isAISrc_128_f32()
const {
840 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
843 bool isAISrc_128F16()
const {
844 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
847 bool isAISrc_128V2F16()
const {
848 return isAISrc_128F16() || isAISrc_128_b32();
851 bool isVISrc_128_bf16()
const {
852 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
855 bool isVISrc_128_f16()
const {
856 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
859 bool isVISrc_128V2F16()
const {
860 return isVISrc_128_f16() || isVISrc_128_b32();
863 bool isAISrc_256B64()
const {
864 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
867 bool isAISrc_256_f64()
const {
868 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
871 bool isAISrc_512_b32()
const {
872 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
875 bool isAISrc_512B16()
const {
876 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
879 bool isAISrc_512V2B16()
const {
880 return isAISrc_512B16();
883 bool isAISrc_512_f32()
const {
884 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
887 bool isAISrc_512F16()
const {
888 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
891 bool isAISrc_512V2F16()
const {
892 return isAISrc_512F16() || isAISrc_512_b32();
895 bool isAISrc_1024_b32()
const {
896 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
899 bool isAISrc_1024B16()
const {
900 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
903 bool isAISrc_1024V2B16()
const {
904 return isAISrc_1024B16();
907 bool isAISrc_1024_f32()
const {
908 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
911 bool isAISrc_1024F16()
const {
912 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
915 bool isAISrc_1024V2F16()
const {
916 return isAISrc_1024F16() || isAISrc_1024_b32();
919 bool isKImmFP32()
const {
920 return isLiteralImm(MVT::f32);
923 bool isKImmFP16()
const {
924 return isLiteralImm(MVT::f16);
927 bool isMem()
const override {
931 bool isExpr()
const {
935 bool isSOPPBrTarget()
const {
return isExpr() ||
isImm(); }
937 bool isSWaitCnt()
const;
938 bool isDepCtr()
const;
939 bool isSDelayALU()
const;
940 bool isHwreg()
const;
941 bool isSendMsg()
const;
942 bool isSplitBarrier()
const;
943 bool isSwizzle()
const;
944 bool isSMRDOffset8()
const;
945 bool isSMEMOffset()
const;
946 bool isSMRDLiteralOffset()
const;
948 bool isDPPCtrl()
const;
952 bool isGPRIdxMode()
const;
953 bool isS16Imm()
const;
954 bool isU16Imm()
const;
955 bool isEndpgm()
const;
956 bool isWaitVDST()
const;
957 bool isWaitEXP()
const;
958 bool isWaitVAVDst()
const;
959 bool isWaitVMVSrc()
const;
961 auto getPredicate(std::function<
bool(
const AMDGPUOperand &
Op)>
P)
const {
962 return std::bind(
P, *
this);
970 int64_t getImm()
const {
975 void setImm(int64_t Val) {
980 ImmTy getImmTy()
const {
990 SMLoc getStartLoc()
const override {
994 SMLoc getEndLoc()
const override {
999 return SMRange(StartLoc, EndLoc);
1002 Modifiers getModifiers()
const {
1003 assert(isRegKind() || isImmTy(ImmTyNone));
1004 return isRegKind() ?
Reg.Mods :
Imm.Mods;
1007 void setModifiers(Modifiers Mods) {
1008 assert(isRegKind() || isImmTy(ImmTyNone));
1015 bool hasModifiers()
const {
1016 return getModifiers().hasModifiers();
1019 bool hasFPModifiers()
const {
1020 return getModifiers().hasFPModifiers();
1023 bool hasIntModifiers()
const {
1024 return getModifiers().hasIntModifiers();
1029 void addImmOperands(
MCInst &Inst,
unsigned N,
bool ApplyModifiers =
true)
const;
1031 void addLiteralImmOperand(
MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const;
1033 void addRegOperands(
MCInst &Inst,
unsigned N)
const;
1035 void addRegOrImmOperands(
MCInst &Inst,
unsigned N)
const {
1037 addRegOperands(Inst,
N);
1039 addImmOperands(Inst,
N);
1042 void addRegOrImmWithInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1043 Modifiers Mods = getModifiers();
1046 addRegOperands(Inst,
N);
1048 addImmOperands(Inst,
N,
false);
1052 void addRegOrImmWithFPInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1053 assert(!hasIntModifiers());
1054 addRegOrImmWithInputModsOperands(Inst,
N);
1057 void addRegOrImmWithIntInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1058 assert(!hasFPModifiers());
1059 addRegOrImmWithInputModsOperands(Inst,
N);
1062 void addRegWithInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1063 Modifiers Mods = getModifiers();
1066 addRegOperands(Inst,
N);
1069 void addRegWithFPInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1070 assert(!hasIntModifiers());
1071 addRegWithInputModsOperands(Inst,
N);
1074 void addRegWithIntInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1075 assert(!hasFPModifiers());
1076 addRegWithInputModsOperands(Inst,
N);
1082 case ImmTyNone:
OS <<
"None";
break;
1083 case ImmTyGDS:
OS <<
"GDS";
break;
1084 case ImmTyLDS:
OS <<
"LDS";
break;
1085 case ImmTyOffen:
OS <<
"Offen";
break;
1086 case ImmTyIdxen:
OS <<
"Idxen";
break;
1087 case ImmTyAddr64:
OS <<
"Addr64";
break;
1088 case ImmTyOffset:
OS <<
"Offset";
break;
1089 case ImmTyInstOffset:
OS <<
"InstOffset";
break;
1090 case ImmTyOffset0:
OS <<
"Offset0";
break;
1091 case ImmTyOffset1:
OS <<
"Offset1";
break;
1092 case ImmTySMEMOffsetMod:
OS <<
"SMEMOffsetMod";
break;
1093 case ImmTyCPol:
OS <<
"CPol";
break;
1094 case ImmTyIndexKey8bit:
OS <<
"index_key";
break;
1095 case ImmTyIndexKey16bit:
OS <<
"index_key";
break;
1096 case ImmTyTFE:
OS <<
"TFE";
break;
1097 case ImmTyD16:
OS <<
"D16";
break;
1098 case ImmTyFORMAT:
OS <<
"FORMAT";
break;
1099 case ImmTyClampSI:
OS <<
"ClampSI";
break;
1100 case ImmTyOModSI:
OS <<
"OModSI";
break;
1101 case ImmTyDPP8:
OS <<
"DPP8";
break;
1102 case ImmTyDppCtrl:
OS <<
"DppCtrl";
break;
1103 case ImmTyDppRowMask:
OS <<
"DppRowMask";
break;
1104 case ImmTyDppBankMask:
OS <<
"DppBankMask";
break;
1105 case ImmTyDppBoundCtrl:
OS <<
"DppBoundCtrl";
break;
1106 case ImmTyDppFI:
OS <<
"DppFI";
break;
1107 case ImmTySDWADstSel:
OS <<
"SDWADstSel";
break;
1108 case ImmTySDWASrc0Sel:
OS <<
"SDWASrc0Sel";
break;
1109 case ImmTySDWASrc1Sel:
OS <<
"SDWASrc1Sel";
break;
1110 case ImmTySDWADstUnused:
OS <<
"SDWADstUnused";
break;
1111 case ImmTyDMask:
OS <<
"DMask";
break;
1112 case ImmTyDim:
OS <<
"Dim";
break;
1113 case ImmTyUNorm:
OS <<
"UNorm";
break;
1114 case ImmTyDA:
OS <<
"DA";
break;
1115 case ImmTyR128A16:
OS <<
"R128A16";
break;
1116 case ImmTyA16:
OS <<
"A16";
break;
1117 case ImmTyLWE:
OS <<
"LWE";
break;
1118 case ImmTyOff:
OS <<
"Off";
break;
1119 case ImmTyExpTgt:
OS <<
"ExpTgt";
break;
1120 case ImmTyExpCompr:
OS <<
"ExpCompr";
break;
1121 case ImmTyExpVM:
OS <<
"ExpVM";
break;
1122 case ImmTyHwreg:
OS <<
"Hwreg";
break;
1123 case ImmTySendMsg:
OS <<
"SendMsg";
break;
1124 case ImmTyInterpSlot:
OS <<
"InterpSlot";
break;
1125 case ImmTyInterpAttr:
OS <<
"InterpAttr";
break;
1126 case ImmTyInterpAttrChan:
OS <<
"InterpAttrChan";
break;
1127 case ImmTyOpSel:
OS <<
"OpSel";
break;
1128 case ImmTyOpSelHi:
OS <<
"OpSelHi";
break;
1129 case ImmTyNegLo:
OS <<
"NegLo";
break;
1130 case ImmTyNegHi:
OS <<
"NegHi";
break;
1131 case ImmTySwizzle:
OS <<
"Swizzle";
break;
1132 case ImmTyGprIdxMode:
OS <<
"GprIdxMode";
break;
1133 case ImmTyHigh:
OS <<
"High";
break;
1134 case ImmTyBLGP:
OS <<
"BLGP";
break;
1135 case ImmTyCBSZ:
OS <<
"CBSZ";
break;
1136 case ImmTyABID:
OS <<
"ABID";
break;
1137 case ImmTyEndpgm:
OS <<
"Endpgm";
break;
1138 case ImmTyWaitVDST:
OS <<
"WaitVDST";
break;
1139 case ImmTyWaitEXP:
OS <<
"WaitEXP";
break;
1140 case ImmTyWaitVAVDst:
OS <<
"WaitVAVDst";
break;
1141 case ImmTyWaitVMVSrc:
OS <<
"WaitVMVSrc";
break;
1149 OS <<
"<register " <<
getReg() <<
" mods: " <<
Reg.Mods <<
'>';
1152 OS <<
'<' << getImm();
1153 if (getImmTy() != ImmTyNone) {
1154 OS <<
" type: "; printImmTy(
OS, getImmTy());
1156 OS <<
" mods: " <<
Imm.Mods <<
'>';
1159 OS <<
'\'' << getToken() <<
'\'';
1162 OS <<
"<expr " << *Expr <<
'>';
1167 static AMDGPUOperand::Ptr CreateImm(
const AMDGPUAsmParser *AsmParser,
1168 int64_t Val,
SMLoc Loc,
1169 ImmTy
Type = ImmTyNone,
1170 bool IsFPImm =
false) {
1171 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1173 Op->Imm.IsFPImm = IsFPImm;
1174 Op->Imm.Kind = ImmKindTyNone;
1176 Op->Imm.Mods = Modifiers();
1182 static AMDGPUOperand::Ptr CreateToken(
const AMDGPUAsmParser *AsmParser,
1184 bool HasExplicitEncodingSize =
true) {
1185 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1186 Res->Tok.Data = Str.data();
1187 Res->Tok.Length = Str.size();
1188 Res->StartLoc = Loc;
1193 static AMDGPUOperand::Ptr CreateReg(
const AMDGPUAsmParser *AsmParser,
1194 unsigned RegNo,
SMLoc S,
1196 auto Op = std::make_unique<AMDGPUOperand>(
Register, AsmParser);
1197 Op->Reg.RegNo = RegNo;
1198 Op->Reg.Mods = Modifiers();
1204 static AMDGPUOperand::Ptr CreateExpr(
const AMDGPUAsmParser *AsmParser,
1206 auto Op = std::make_unique<AMDGPUOperand>(
Expression, AsmParser);
1215 OS <<
"abs:" << Mods.Abs <<
" neg: " << Mods.Neg <<
" sext:" << Mods.Sext;
1226class KernelScopeInfo {
1227 int SgprIndexUnusedMin = -1;
1228 int VgprIndexUnusedMin = -1;
1229 int AgprIndexUnusedMin = -1;
1233 void usesSgprAt(
int i) {
1234 if (i >= SgprIndexUnusedMin) {
1235 SgprIndexUnusedMin = ++i;
1244 void usesVgprAt(
int i) {
1245 if (i >= VgprIndexUnusedMin) {
1246 VgprIndexUnusedMin = ++i;
1251 VgprIndexUnusedMin);
1257 void usesAgprAt(
int i) {
1262 if (i >= AgprIndexUnusedMin) {
1263 AgprIndexUnusedMin = ++i;
1273 VgprIndexUnusedMin);
1280 KernelScopeInfo() =
default;
1286 usesSgprAt(SgprIndexUnusedMin = -1);
1287 usesVgprAt(VgprIndexUnusedMin = -1);
1289 usesAgprAt(AgprIndexUnusedMin = -1);
1293 void usesRegister(RegisterKind RegKind,
unsigned DwordRegIndex,
1294 unsigned RegWidth) {
1297 usesSgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1300 usesAgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1303 usesVgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1314 unsigned ForcedEncodingSize = 0;
1315 bool ForcedDPP =
false;
1316 bool ForcedSDWA =
false;
1317 KernelScopeInfo KernelScope;
1322#define GET_ASSEMBLER_HEADER
1323#include "AMDGPUGenAsmMatcher.inc"
1328 bool ParseAsAbsoluteExpression(
uint32_t &Ret);
1329 bool OutOfRangeError(
SMRange Range);
1345 bool calculateGPRBlocks(
const FeatureBitset &Features,
bool VCCUsed,
1346 bool FlatScrUsed,
bool XNACKUsed,
1347 std::optional<bool> EnableWavefrontSize32,
1348 unsigned NextFreeVGPR,
SMRange VGPRRange,
1349 unsigned NextFreeSGPR,
SMRange SGPRRange,
1350 unsigned &VGPRBlocks,
unsigned &SGPRBlocks);
1351 bool ParseDirectiveAMDGCNTarget();
1352 bool ParseDirectiveAMDHSACodeObjectVersion();
1353 bool ParseDirectiveAMDHSAKernel();
1355 bool ParseDirectiveAMDKernelCodeT();
1358 bool ParseDirectiveAMDGPUHsaKernel();
1360 bool ParseDirectiveISAVersion();
1361 bool ParseDirectiveHSAMetadata();
1362 bool ParseDirectivePALMetadataBegin();
1363 bool ParseDirectivePALMetadata();
1364 bool ParseDirectiveAMDGPULDS();
1368 bool ParseToEndDirective(
const char *AssemblerDirectiveBegin,
1369 const char *AssemblerDirectiveEnd,
1370 std::string &CollectString);
1372 bool AddNextRegisterToList(
unsigned& Reg,
unsigned& RegWidth,
1373 RegisterKind RegKind,
unsigned Reg1,
SMLoc Loc);
1374 bool ParseAMDGPURegister(RegisterKind &RegKind,
unsigned &Reg,
1375 unsigned &RegNum,
unsigned &RegWidth,
1376 bool RestoreOnFailure =
false);
1377 bool ParseAMDGPURegister(RegisterKind &RegKind,
unsigned &Reg,
1378 unsigned &RegNum,
unsigned &RegWidth,
1380 unsigned ParseRegularReg(RegisterKind &RegKind,
unsigned &RegNum,
1383 unsigned ParseSpecialReg(RegisterKind &RegKind,
unsigned &RegNum,
1386 unsigned ParseRegList(RegisterKind &RegKind,
unsigned &RegNum,
1388 bool ParseRegRange(
unsigned& Num,
unsigned& Width);
1389 unsigned getRegularReg(RegisterKind RegKind,
unsigned RegNum,
unsigned SubReg,
1390 unsigned RegWidth,
SMLoc Loc);
1394 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1395 void initializeGprCountSymbol(RegisterKind RegKind);
1396 bool updateGprCountSymbols(RegisterKind RegKind,
unsigned DwordRegIndex,
1402 enum AMDGPUMatchResultTy {
1403 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1406 OperandMode_Default,
1410 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1418 if (getFeatureBits().
none()) {
1450 initializeGprCountSymbol(IS_VGPR);
1451 initializeGprCountSymbol(IS_SGPR);
1524 bool hasInv2PiInlineImm()
const {
1525 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1528 bool hasFlatOffsets()
const {
1529 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1533 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1536 bool hasSGPR102_SGPR103()
const {
1540 bool hasSGPR104_SGPR105()
const {
return isGFX10Plus(); }
1542 bool hasIntClamp()
const {
1543 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1546 bool hasPartialNSAEncoding()
const {
1547 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1579 void setForcedEncodingSize(
unsigned Size) { ForcedEncodingSize =
Size; }
1580 void setForcedDPP(
bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1581 void setForcedSDWA(
bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1583 unsigned getForcedEncodingSize()
const {
return ForcedEncodingSize; }
1584 bool isForcedVOP3()
const {
return ForcedEncodingSize == 64; }
1585 bool isForcedDPP()
const {
return ForcedDPP; }
1586 bool isForcedSDWA()
const {
return ForcedSDWA; }
1588 StringRef getMatchedVariantName()
const;
1590 std::unique_ptr<AMDGPUOperand>
parseRegister(
bool RestoreOnFailure =
false);
1592 bool RestoreOnFailure);
1595 SMLoc &EndLoc)
override;
1598 unsigned Kind)
override;
1602 bool MatchingInlineAsm)
override;
1605 OperandMode Mode = OperandMode_Default);
1613 ParseStatus parseIntWithPrefix(
const char *Prefix, int64_t &
Int);
1617 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1618 std::function<
bool(int64_t &)> ConvertResult =
nullptr);
1622 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1623 bool (*ConvertResult)(int64_t &) =
nullptr);
1627 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1636 bool isOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1637 bool isRegOrOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1638 bool isNamedOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1639 bool isOpcodeModifierWithVal(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1640 bool parseSP3NegModifier();
1642 bool HasLit =
false);
1645 bool HasLit =
false);
1647 bool AllowImm =
true);
1649 bool AllowImm =
true);
1654 AMDGPUOperand::ImmTy ImmTy);
1665 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1670 bool tryParseFmt(
const char *Pref, int64_t MaxVal, int64_t &Val);
1671 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt,
StringRef FormatStr,
SMLoc Loc);
1675 bool parseCnt(int64_t &IntVal);
1678 bool parseDepCtr(int64_t &IntVal,
unsigned &Mask);
1682 bool parseDelay(int64_t &Delay);
1688 struct OperandInfoTy {
1691 bool IsSymbolic =
false;
1692 bool IsDefined =
false;
1694 OperandInfoTy(int64_t Val) : Val(Val) {}
1697 struct StructuredOpField : OperandInfoTy {
1701 bool IsDefined =
false;
1706 virtual ~StructuredOpField() =
default;
1708 bool Error(AMDGPUAsmParser &Parser,
const Twine &Err)
const {
1709 Parser.Error(Loc,
"invalid " +
Desc +
": " + Err);
1713 virtual bool validate(AMDGPUAsmParser &Parser)
const {
1715 return Error(Parser,
"not supported on this GPU");
1717 return Error(Parser,
"only " +
Twine(Width) +
"-bit values are legal");
1725 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &
Op, OperandInfoTy &Stream);
1726 bool validateSendMsg(
const OperandInfoTy &Msg,
1727 const OperandInfoTy &
Op,
1728 const OperandInfoTy &Stream);
1731 OperandInfoTy &Width);
1737 SMLoc getOperandLoc(std::function<
bool(
const AMDGPUOperand&)>
Test,
1742 bool SearchMandatoryLiterals =
false)
const;
1751 bool validateSOPLiteral(
const MCInst &Inst)
const;
1753 bool validateVOPDRegBankConstraints(
const MCInst &Inst,
1755 bool validateIntClampSupported(
const MCInst &Inst);
1756 bool validateMIMGAtomicDMask(
const MCInst &Inst);
1757 bool validateMIMGGatherDMask(
const MCInst &Inst);
1759 bool validateMIMGDataSize(
const MCInst &Inst,
const SMLoc &IDLoc);
1760 bool validateMIMGAddrSize(
const MCInst &Inst,
const SMLoc &IDLoc);
1761 bool validateMIMGD16(
const MCInst &Inst);
1762 bool validateMIMGMSAA(
const MCInst &Inst);
1763 bool validateOpSel(
const MCInst &Inst);
1766 bool validateVccOperand(
unsigned Reg)
const;
1771 bool validateAGPRLdSt(
const MCInst &Inst)
const;
1772 bool validateVGPRAlign(
const MCInst &Inst)
const;
1776 bool validateDivScale(
const MCInst &Inst);
1779 const SMLoc &IDLoc);
1781 const unsigned CPol);
1784 std::optional<StringRef> validateLdsDirect(
const MCInst &Inst);
1785 unsigned getConstantBusLimit(
unsigned Opcode)
const;
1786 bool usesConstantBus(
const MCInst &Inst,
unsigned OpIdx);
1787 bool isInlineConstant(
const MCInst &Inst,
unsigned OpIdx)
const;
1788 unsigned findImplicitSGPRReadInVOP(
const MCInst &Inst)
const;
1814 AsmToken peekToken(
bool ShouldSkipSpace =
true);
1816 SMLoc getLoc()
const;
1820 void onBeginOfFile()
override;
1821 bool parsePrimaryExpr(
const MCExpr *&Res,
SMLoc &EndLoc)
override;
1832 bool parseSwizzleOperand(int64_t &
Op,
1833 const unsigned MinVal,
1834 const unsigned MaxVal,
1837 bool parseSwizzleOperands(
const unsigned OpNum, int64_t*
Op,
1838 const unsigned MinVal,
1839 const unsigned MaxVal,
1842 bool parseSwizzleOffset(int64_t &Imm);
1843 bool parseSwizzleMacro(int64_t &Imm);
1844 bool parseSwizzleQuadPerm(int64_t &Imm);
1845 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1846 bool parseSwizzleBroadcast(int64_t &Imm);
1847 bool parseSwizzleSwap(int64_t &Imm);
1848 bool parseSwizzleReverse(int64_t &Imm);
1851 int64_t parseGPRIdxMacro();
1859 OptionalImmIndexMap &OptionalIdx);
1867 OptionalImmIndexMap &OptionalIdx);
1869 OptionalImmIndexMap &OptionalIdx);
1874 bool parseDimId(
unsigned &Encoding);
1876 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1880 int64_t parseDPPCtrlSel(
StringRef Ctrl);
1881 int64_t parseDPPCtrlPerm();
1887 bool IsDPP8 =
false);
1893 AMDGPUOperand::ImmTy
Type);
1902 bool SkipDstVcc =
false,
1903 bool SkipSrcVcc =
false);
1916 return &APFloat::IEEEsingle();
1918 return &APFloat::IEEEdouble();
1920 return &APFloat::IEEEhalf();
1953 return &APFloat::IEEEsingle();
1959 return &APFloat::IEEEdouble();
1968 return &APFloat::IEEEhalf();
1976 return &APFloat::BFloat();
1991 APFloat::rmNearestTiesToEven,
1994 if (
Status != APFloat::opOK &&
1996 ((
Status & APFloat::opOverflow) != 0 ||
1997 (
Status & APFloat::opUnderflow) != 0)) {
2020bool AMDGPUOperand::isInlinableImm(
MVT type)
const {
2030 if (!isImmTy(ImmTyNone)) {
2041 if (type == MVT::f64 || type == MVT::i64) {
2043 AsmParser->hasInv2PiInlineImm());
2065 APFloat::rmNearestTiesToEven, &Lost);
2072 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2074 AsmParser->hasInv2PiInlineImm());
2079 static_cast<int32_t
>(FPLiteral.bitcastToAPInt().getZExtValue()),
2080 AsmParser->hasInv2PiInlineImm());
2084 if (type == MVT::f64 || type == MVT::i64) {
2086 AsmParser->hasInv2PiInlineImm());
2095 static_cast<int16_t
>(
Literal.getLoBits(16).getSExtValue()),
2096 type, AsmParser->hasInv2PiInlineImm());
2100 static_cast<int32_t
>(
Literal.getLoBits(32).getZExtValue()),
2101 AsmParser->hasInv2PiInlineImm());
2104bool AMDGPUOperand::isLiteralImm(
MVT type)
const {
2106 if (!isImmTy(ImmTyNone)) {
2113 if (type == MVT::f64 && hasFPModifiers()) {
2130 if (type == MVT::f64) {
2135 if (type == MVT::i64) {
2148 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2149 : (type == MVT::v2i16) ? MVT::f32
2150 : (type == MVT::v2f32) ? MVT::f32
2157bool AMDGPUOperand::isRegClass(
unsigned RCID)
const {
2158 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(
getReg());
2161bool AMDGPUOperand::isVRegWithInputMods()
const {
2162 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2164 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2165 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2168template <
bool IsFake16>
bool AMDGPUOperand::isT16VRegWithInputMods()
const {
2169 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2170 : AMDGPU::VGPR_16_Lo128RegClassID);
2173bool AMDGPUOperand::isSDWAOperand(
MVT type)
const {
2174 if (AsmParser->isVI())
2176 else if (AsmParser->isGFX9Plus())
2177 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2182bool AMDGPUOperand::isSDWAFP16Operand()
const {
2183 return isSDWAOperand(MVT::f16);
2186bool AMDGPUOperand::isSDWAFP32Operand()
const {
2187 return isSDWAOperand(MVT::f32);
2190bool AMDGPUOperand::isSDWAInt16Operand()
const {
2191 return isSDWAOperand(MVT::i16);
2194bool AMDGPUOperand::isSDWAInt32Operand()
const {
2195 return isSDWAOperand(MVT::i32);
2198bool AMDGPUOperand::isBoolReg()
const {
2199 auto FB = AsmParser->getFeatureBits();
2200 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) ||
2201 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32()));
2206 assert(isImmTy(ImmTyNone) &&
Imm.Mods.hasFPModifiers());
2221void AMDGPUOperand::addImmOperands(
MCInst &Inst,
unsigned N,
bool ApplyModifiers)
const {
2229 addLiteralImmOperand(Inst,
Imm.Val,
2231 isImmTy(ImmTyNone) &&
Imm.Mods.hasFPModifiers());
2233 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2239void AMDGPUOperand::addLiteralImmOperand(
MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const {
2240 const auto& InstDesc = AsmParser->getMII()->get(Inst.
getOpcode());
2245 if (ApplyModifiers) {
2248 Val = applyInputFPModifiers(Val,
Size);
2252 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2262 AsmParser->hasInv2PiInlineImm())) {
2271 if (
Literal.getLoBits(32) != 0) {
2272 const_cast<AMDGPUAsmParser *
>(AsmParser)->
Warning(Inst.
getLoc(),
2273 "Can't encode literal as exact 64-bit floating-point operand. "
2274 "Low 32-bits will be set to zero");
2275 Val &= 0xffffffff00000000u;
2279 setImmKindLiteral();
2295 if (AsmParser->hasInv2PiInlineImm() &&
Literal == 0x3fc45f306725feed) {
2301 setImmKindLiteral();
2337 APFloat::rmNearestTiesToEven, &lost);
2341 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2344 setImmKindMandatoryLiteral();
2346 setImmKindLiteral();
2377 AsmParser->hasInv2PiInlineImm())) {
2384 setImmKindLiteral();
2402 setImmKindLiteral();
2416 setImmKindLiteral();
2425 AsmParser->hasInv2PiInlineImm())) {
2432 setImmKindLiteral();
2441 AsmParser->hasInv2PiInlineImm())) {
2448 setImmKindLiteral();
2462 AsmParser->hasInv2PiInlineImm()));
2472 AsmParser->hasInv2PiInlineImm()));
2480 setImmKindMandatoryLiteral();
2484 setImmKindMandatoryLiteral();
2491void AMDGPUOperand::addRegOperands(
MCInst &Inst,
unsigned N)
const {
2495bool AMDGPUOperand::isInlineValue()
const {
2504 if (Is == IS_VGPR) {
2508 return AMDGPU::VGPR_32RegClassID;
2510 return AMDGPU::VReg_64RegClassID;
2512 return AMDGPU::VReg_96RegClassID;
2514 return AMDGPU::VReg_128RegClassID;
2516 return AMDGPU::VReg_160RegClassID;
2518 return AMDGPU::VReg_192RegClassID;
2520 return AMDGPU::VReg_224RegClassID;
2522 return AMDGPU::VReg_256RegClassID;
2524 return AMDGPU::VReg_288RegClassID;
2526 return AMDGPU::VReg_320RegClassID;
2528 return AMDGPU::VReg_352RegClassID;
2530 return AMDGPU::VReg_384RegClassID;
2532 return AMDGPU::VReg_512RegClassID;
2534 return AMDGPU::VReg_1024RegClassID;
2536 }
else if (Is == IS_TTMP) {
2540 return AMDGPU::TTMP_32RegClassID;
2542 return AMDGPU::TTMP_64RegClassID;
2544 return AMDGPU::TTMP_128RegClassID;
2546 return AMDGPU::TTMP_256RegClassID;
2548 return AMDGPU::TTMP_512RegClassID;
2550 }
else if (Is == IS_SGPR) {
2554 return AMDGPU::SGPR_32RegClassID;
2556 return AMDGPU::SGPR_64RegClassID;
2558 return AMDGPU::SGPR_96RegClassID;
2560 return AMDGPU::SGPR_128RegClassID;
2562 return AMDGPU::SGPR_160RegClassID;
2564 return AMDGPU::SGPR_192RegClassID;
2566 return AMDGPU::SGPR_224RegClassID;
2568 return AMDGPU::SGPR_256RegClassID;
2570 return AMDGPU::SGPR_288RegClassID;
2572 return AMDGPU::SGPR_320RegClassID;
2574 return AMDGPU::SGPR_352RegClassID;
2576 return AMDGPU::SGPR_384RegClassID;
2578 return AMDGPU::SGPR_512RegClassID;
2580 }
else if (Is == IS_AGPR) {
2584 return AMDGPU::AGPR_32RegClassID;
2586 return AMDGPU::AReg_64RegClassID;
2588 return AMDGPU::AReg_96RegClassID;
2590 return AMDGPU::AReg_128RegClassID;
2592 return AMDGPU::AReg_160RegClassID;
2594 return AMDGPU::AReg_192RegClassID;
2596 return AMDGPU::AReg_224RegClassID;
2598 return AMDGPU::AReg_256RegClassID;
2600 return AMDGPU::AReg_288RegClassID;
2602 return AMDGPU::AReg_320RegClassID;
2604 return AMDGPU::AReg_352RegClassID;
2606 return AMDGPU::AReg_384RegClassID;
2608 return AMDGPU::AReg_512RegClassID;
2610 return AMDGPU::AReg_1024RegClassID;
2618 .
Case(
"exec", AMDGPU::EXEC)
2619 .
Case(
"vcc", AMDGPU::VCC)
2620 .
Case(
"flat_scratch", AMDGPU::FLAT_SCR)
2621 .
Case(
"xnack_mask", AMDGPU::XNACK_MASK)
2622 .
Case(
"shared_base", AMDGPU::SRC_SHARED_BASE)
2623 .
Case(
"src_shared_base", AMDGPU::SRC_SHARED_BASE)
2624 .
Case(
"shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2625 .
Case(
"src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2626 .
Case(
"private_base", AMDGPU::SRC_PRIVATE_BASE)
2627 .
Case(
"src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2628 .
Case(
"private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2629 .
Case(
"src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2630 .
Case(
"pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2631 .
Case(
"src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2632 .
Case(
"lds_direct", AMDGPU::LDS_DIRECT)
2633 .
Case(
"src_lds_direct", AMDGPU::LDS_DIRECT)
2634 .
Case(
"m0", AMDGPU::M0)
2635 .
Case(
"vccz", AMDGPU::SRC_VCCZ)
2636 .
Case(
"src_vccz", AMDGPU::SRC_VCCZ)
2637 .
Case(
"execz", AMDGPU::SRC_EXECZ)
2638 .
Case(
"src_execz", AMDGPU::SRC_EXECZ)
2639 .
Case(
"scc", AMDGPU::SRC_SCC)
2640 .
Case(
"src_scc", AMDGPU::SRC_SCC)
2641 .
Case(
"tba", AMDGPU::TBA)
2642 .
Case(
"tma", AMDGPU::TMA)
2643 .
Case(
"flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2644 .
Case(
"flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2645 .
Case(
"xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2646 .
Case(
"xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2647 .
Case(
"vcc_lo", AMDGPU::VCC_LO)
2648 .
Case(
"vcc_hi", AMDGPU::VCC_HI)
2649 .
Case(
"exec_lo", AMDGPU::EXEC_LO)
2650 .
Case(
"exec_hi", AMDGPU::EXEC_HI)
2651 .
Case(
"tma_lo", AMDGPU::TMA_LO)
2652 .
Case(
"tma_hi", AMDGPU::TMA_HI)
2653 .
Case(
"tba_lo", AMDGPU::TBA_LO)
2654 .
Case(
"tba_hi", AMDGPU::TBA_HI)
2655 .
Case(
"pc", AMDGPU::PC_REG)
2656 .
Case(
"null", AMDGPU::SGPR_NULL)
2660bool AMDGPUAsmParser::ParseRegister(
MCRegister &RegNo,
SMLoc &StartLoc,
2661 SMLoc &EndLoc,
bool RestoreOnFailure) {
2662 auto R = parseRegister();
2663 if (!R)
return true;
2665 RegNo =
R->getReg();
2666 StartLoc =
R->getStartLoc();
2667 EndLoc =
R->getEndLoc();
2673 return ParseRegister(Reg, StartLoc, EndLoc,
false);
2678 bool Result = ParseRegister(Reg, StartLoc, EndLoc,
true);
2679 bool PendingErrors = getParser().hasPendingError();
2680 getParser().clearPendingErrors();
2688bool AMDGPUAsmParser::AddNextRegisterToList(
unsigned &Reg,
unsigned &RegWidth,
2689 RegisterKind RegKind,
unsigned Reg1,
2693 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2698 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2699 Reg = AMDGPU::FLAT_SCR;
2703 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2704 Reg = AMDGPU::XNACK_MASK;
2708 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2713 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2718 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2723 Error(Loc,
"register does not fit in the list");
2729 if (Reg1 != Reg + RegWidth / 32) {
2730 Error(Loc,
"registers in a list must have consecutive indices");
2748 {{
"ttmp"}, IS_TTMP},
2754 return Kind == IS_VGPR ||
2762 if (Str.starts_with(Reg.Name))
2768 return !Str.getAsInteger(10, Num);
2772AMDGPUAsmParser::isRegister(
const AsmToken &Token,
2789 if (!RegSuffix.
empty()) {
2807AMDGPUAsmParser::isRegister()
2809 return isRegister(getToken(), peekToken());
2812unsigned AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
unsigned RegNum,
2813 unsigned SubReg,
unsigned RegWidth,
2817 unsigned AlignSize = 1;
2818 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2824 if (RegNum % AlignSize != 0) {
2825 Error(Loc,
"invalid register alignment");
2826 return AMDGPU::NoRegister;
2829 unsigned RegIdx = RegNum / AlignSize;
2832 Error(Loc,
"invalid or unsupported register size");
2833 return AMDGPU::NoRegister;
2839 Error(Loc,
"register index is out of range");
2840 return AMDGPU::NoRegister;
2850 assert(Reg &&
"Invalid subregister!");
2856bool AMDGPUAsmParser::ParseRegRange(
unsigned &Num,
unsigned &RegWidth) {
2857 int64_t RegLo, RegHi;
2861 SMLoc FirstIdxLoc = getLoc();
2864 if (!parseExpr(RegLo))
2868 SecondIdxLoc = getLoc();
2869 if (!parseExpr(RegHi))
2878 if (!isUInt<32>(RegLo)) {
2879 Error(FirstIdxLoc,
"invalid register index");
2883 if (!isUInt<32>(RegHi)) {
2884 Error(SecondIdxLoc,
"invalid register index");
2888 if (RegLo > RegHi) {
2889 Error(FirstIdxLoc,
"first register index should not exceed second index");
2893 Num =
static_cast<unsigned>(RegLo);
2894 RegWidth = 32 * ((RegHi - RegLo) + 1);
2898unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2899 unsigned &RegNum,
unsigned &RegWidth,
2906 RegKind = IS_SPECIAL;
2913unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2914 unsigned &RegNum,
unsigned &RegWidth,
2918 auto Loc = getLoc();
2922 Error(Loc,
"invalid register name");
2923 return AMDGPU::NoRegister;
2931 unsigned SubReg = NoSubRegister;
2932 if (!RegSuffix.
empty()) {
2944 Error(Loc,
"invalid register index");
2945 return AMDGPU::NoRegister;
2950 if (!ParseRegRange(RegNum, RegWidth))
2951 return AMDGPU::NoRegister;
2954 return getRegularReg(RegKind, RegNum,
SubReg, RegWidth, Loc);
2957unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
unsigned &RegNum,
2960 unsigned Reg = AMDGPU::NoRegister;
2961 auto ListLoc = getLoc();
2964 "expected a register or a list of registers")) {
2965 return AMDGPU::NoRegister;
2970 auto Loc = getLoc();
2971 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2972 return AMDGPU::NoRegister;
2973 if (RegWidth != 32) {
2974 Error(Loc,
"expected a single 32-bit register");
2975 return AMDGPU::NoRegister;
2979 RegisterKind NextRegKind;
2980 unsigned NextReg, NextRegNum, NextRegWidth;
2983 if (!ParseAMDGPURegister(NextRegKind, NextReg,
2984 NextRegNum, NextRegWidth,
2986 return AMDGPU::NoRegister;
2988 if (NextRegWidth != 32) {
2989 Error(Loc,
"expected a single 32-bit register");
2990 return AMDGPU::NoRegister;
2992 if (NextRegKind != RegKind) {
2993 Error(Loc,
"registers in a list must be of the same kind");
2994 return AMDGPU::NoRegister;
2996 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2997 return AMDGPU::NoRegister;
3001 "expected a comma or a closing square bracket")) {
3002 return AMDGPU::NoRegister;
3006 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3011bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
unsigned &Reg,
3012 unsigned &RegNum,
unsigned &RegWidth,
3014 auto Loc = getLoc();
3015 Reg = AMDGPU::NoRegister;
3018 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3019 if (Reg == AMDGPU::NoRegister)
3020 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3022 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3026 if (Reg == AMDGPU::NoRegister) {
3027 assert(Parser.hasPendingError());
3031 if (!subtargetHasRegister(*
TRI, Reg)) {
3032 if (Reg == AMDGPU::SGPR_NULL) {
3033 Error(Loc,
"'null' operand is not supported on this GPU");
3035 Error(Loc,
"register not available on this GPU");
3043bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
unsigned &Reg,
3044 unsigned &RegNum,
unsigned &RegWidth,
3045 bool RestoreOnFailure ) {
3046 Reg = AMDGPU::NoRegister;
3049 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3050 if (RestoreOnFailure) {
3051 while (!Tokens.
empty()) {
3060std::optional<StringRef>
3061AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3064 return StringRef(
".amdgcn.next_free_vgpr");
3066 return StringRef(
".amdgcn.next_free_sgpr");
3068 return std::nullopt;
3072void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3073 auto SymbolName = getGprCountSymbolName(RegKind);
3074 assert(SymbolName &&
"initializing invalid register kind");
3075 MCSymbol *
Sym = getContext().getOrCreateSymbol(*SymbolName);
3079bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3080 unsigned DwordRegIndex,
3081 unsigned RegWidth) {
3086 auto SymbolName = getGprCountSymbolName(RegKind);
3089 MCSymbol *
Sym = getContext().getOrCreateSymbol(*SymbolName);
3091 int64_t NewMax = DwordRegIndex +
divideCeil(RegWidth, 32) - 1;
3094 if (!
Sym->isVariable())
3095 return !
Error(getLoc(),
3096 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3097 if (!
Sym->getVariableValue(
false)->evaluateAsAbsolute(OldCount))
3100 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3102 if (OldCount <= NewMax)
3108std::unique_ptr<AMDGPUOperand>
3109AMDGPUAsmParser::parseRegister(
bool RestoreOnFailure) {
3110 const auto &Tok = getToken();
3111 SMLoc StartLoc = Tok.getLoc();
3112 SMLoc EndLoc = Tok.getEndLoc();
3113 RegisterKind RegKind;
3114 unsigned Reg, RegNum, RegWidth;
3116 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3120 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3123 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3124 return AMDGPUOperand::CreateReg(
this, Reg, StartLoc, EndLoc);
3128 bool HasSP3AbsModifier,
bool HasLit) {
3136 HasLit = trySkipId(
"lit");
3148 const auto& Tok = getToken();
3149 const auto& NextTok = peekToken();
3152 bool Negate =
false;
3160 AMDGPUOperand::Modifiers Mods;
3171 APFloat RealVal(APFloat::IEEEdouble());
3172 auto roundMode = APFloat::rmNearestTiesToEven;
3173 if (
errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3176 RealVal.changeSign();
3179 AMDGPUOperand::CreateImm(
this, RealVal.bitcastToAPInt().getZExtValue(), S,
3180 AMDGPUOperand::ImmTyNone,
true));
3181 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3182 Op.setModifiers(Mods);
3191 if (HasSP3AbsModifier) {
3200 if (getParser().parsePrimaryExpr(Expr, EndLoc,
nullptr))
3203 if (Parser.parseExpression(Expr))
3207 if (Expr->evaluateAsAbsolute(IntVal)) {
3208 Operands.push_back(AMDGPUOperand::CreateImm(
this, IntVal, S));
3209 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3210 Op.setModifiers(Mods);
3214 Operands.push_back(AMDGPUOperand::CreateExpr(
this, Expr, S));
3227 if (
auto R = parseRegister()) {
3236 bool HasSP3AbsMod,
bool HasLit) {
3242 return parseImm(
Operands, HasSP3AbsMod, HasLit);
3246AMDGPUAsmParser::isNamedOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3249 return str ==
"abs" || str ==
"neg" || str ==
"sext";
3255AMDGPUAsmParser::isOpcodeModifierWithVal(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3260AMDGPUAsmParser::isOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3261 return isNamedOperandModifier(Token, NextToken) || Token.
is(
AsmToken::Pipe);
3265AMDGPUAsmParser::isRegOrOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3266 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3283AMDGPUAsmParser::isModifier() {
3287 peekTokens(NextToken);
3289 return isOperandModifier(Tok, NextToken[0]) ||
3290 (Tok.
is(
AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3291 isOpcodeModifierWithVal(Tok, NextToken[0]);
3317AMDGPUAsmParser::parseSP3NegModifier() {
3320 peekTokens(NextToken);
3323 (isRegister(NextToken[0], NextToken[1]) ||
3325 isId(NextToken[0],
"abs"))) {
3343 return Error(getLoc(),
"invalid syntax, expected 'neg' modifier");
3345 SP3Neg = parseSP3NegModifier();
3348 Neg = trySkipId(
"neg");
3350 return Error(Loc,
"expected register or immediate");
3354 Abs = trySkipId(
"abs");
3358 Lit = trySkipId(
"lit");
3365 return Error(Loc,
"expected register or immediate");
3369 Res = parseRegOrImm(
Operands, SP3Abs, Lit);
3376 if (Lit && !
Operands.back()->isImm())
3377 Error(Loc,
"expected immediate with lit modifier");
3379 if (SP3Abs && !skipToken(
AsmToken::Pipe,
"expected vertical bar"))
3388 AMDGPUOperand::Modifiers Mods;
3389 Mods.Abs = Abs || SP3Abs;
3390 Mods.Neg = Neg || SP3Neg;
3393 if (Mods.hasFPModifiers() || Lit) {
3394 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3396 return Error(
Op.getStartLoc(),
"expected an absolute expression");
3397 Op.setModifiers(Mods);
3405 bool Sext = trySkipId(
"sext");
3406 if (Sext && !skipToken(
AsmToken::LParen,
"expected left paren after sext"))
3421 AMDGPUOperand::Modifiers Mods;
3424 if (Mods.hasIntModifiers()) {
3425 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3427 return Error(
Op.getStartLoc(),
"expected an absolute expression");
3428 Op.setModifiers(Mods);
3435 return parseRegOrImmWithFPInputMods(
Operands,
false);
3439 return parseRegOrImmWithIntInputMods(
Operands,
false);
3443 auto Loc = getLoc();
3444 if (trySkipId(
"off")) {
3445 Operands.push_back(AMDGPUOperand::CreateImm(
this, 0, Loc,
3446 AMDGPUOperand::ImmTyOff,
false));
3453 std::unique_ptr<AMDGPUOperand>
Reg = parseRegister();
3455 Operands.push_back(std::move(Reg));
3462unsigned AMDGPUAsmParser::checkTargetMatchPredicate(
MCInst &Inst) {
3469 return Match_InvalidOperand;
3471 if (Inst.
getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3472 Inst.
getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3477 if (!
Op.isImm() ||
Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3478 return Match_InvalidOperand;
3482 return Match_Success;
3486 static const unsigned Variants[] = {
3497 if (isForcedDPP() && isForcedVOP3()) {
3501 if (getForcedEncodingSize() == 32) {
3506 if (isForcedVOP3()) {
3511 if (isForcedSDWA()) {
3517 if (isForcedDPP()) {
3525StringRef AMDGPUAsmParser::getMatchedVariantName()
const {
3526 if (isForcedDPP() && isForcedVOP3())
3529 if (getForcedEncodingSize() == 32)
3544unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(
const MCInst &Inst)
const {
3548 case AMDGPU::FLAT_SCR:
3550 case AMDGPU::VCC_LO:
3551 case AMDGPU::VCC_HI:
3558 return AMDGPU::NoRegister;
3565bool AMDGPUAsmParser::isInlineConstant(
const MCInst &Inst,
3566 unsigned OpIdx)
const {
3576 int64_t Val = MO.
getImm();
3625unsigned AMDGPUAsmParser::getConstantBusLimit(
unsigned Opcode)
const {
3631 case AMDGPU::V_LSHLREV_B64_e64:
3632 case AMDGPU::V_LSHLREV_B64_gfx10:
3633 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3634 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3635 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3636 case AMDGPU::V_LSHRREV_B64_e64:
3637 case AMDGPU::V_LSHRREV_B64_gfx10:
3638 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3639 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3640 case AMDGPU::V_ASHRREV_I64_e64:
3641 case AMDGPU::V_ASHRREV_I64_gfx10:
3642 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3643 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3644 case AMDGPU::V_LSHL_B64_e64:
3645 case AMDGPU::V_LSHR_B64_e64:
3646 case AMDGPU::V_ASHR_I64_e64:
3659 bool AddMandatoryLiterals =
false) {
3665 int16_t ImmDeferredIdx =
3682bool AMDGPUAsmParser::usesConstantBus(
const MCInst &Inst,
unsigned OpIdx) {
3685 return !isInlineConstant(Inst, OpIdx);
3686 }
else if (MO.
isReg()) {
3693 return isSGPR(PReg,
TRI) && PReg != SGPR_NULL;
3705 const unsigned Opcode = Inst.
getOpcode();
3706 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3709 if (!LaneSelOp.
isReg())
3712 return LaneSelReg ==
M0 || LaneSelReg == M0_gfxpre11;
3715bool AMDGPUAsmParser::validateConstantBusLimitations(
3717 const unsigned Opcode = Inst.
getOpcode();
3719 unsigned LastSGPR = AMDGPU::NoRegister;
3720 unsigned ConstantBusUseCount = 0;
3721 unsigned NumLiterals = 0;
3722 unsigned LiteralSize;
3724 if (!(
Desc.TSFlags &
3740 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3741 if (SGPRUsed != AMDGPU::NoRegister) {
3742 SGPRsUsed.
insert(SGPRUsed);
3743 ++ConstantBusUseCount;
3748 for (
int OpIdx : OpIndices) {
3753 if (usesConstantBus(Inst, OpIdx)) {
3762 if (SGPRsUsed.
insert(LastSGPR).second) {
3763 ++ConstantBusUseCount;
3783 if (NumLiterals == 0) {
3786 }
else if (LiteralSize !=
Size) {
3792 ConstantBusUseCount += NumLiterals;
3794 if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3800 Error(Loc,
"invalid operand (violates constant bus restrictions)");
3804bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3807 const unsigned Opcode = Inst.
getOpcode();
3813 auto getVRegIdx = [&](
unsigned,
unsigned OperandIdx) {
3821 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
3824 auto InvalidCompOprIdx =
3825 InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc);
3826 if (!InvalidCompOprIdx)
3829 auto CompOprIdx = *InvalidCompOprIdx;
3831 std::max(InstInfo[
VOPD::X].getIndexInParsedOperands(CompOprIdx),
3832 InstInfo[
VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3835 auto Loc = ((AMDGPUOperand &)*
Operands[ParsedIdx]).getStartLoc();
3836 if (CompOprIdx == VOPD::Component::DST) {
3837 Error(Loc,
"one dst register must be even and the other odd");
3839 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3841 " operands must use different VGPR banks");
3847bool AMDGPUAsmParser::validateIntClampSupported(
const MCInst &Inst) {
3864bool AMDGPUAsmParser::validateMIMGDataSize(
const MCInst &Inst,
3865 const SMLoc &IDLoc) {
3883 unsigned TFESize = (TFEIdx != -1 && Inst.
getOperand(TFEIdx).
getImm()) ? 1 : 0;
3888 bool IsPackedD16 =
false;
3893 IsPackedD16 = D16Idx >= 0;
3895 DataSize = (DataSize + 1) / 2;
3898 if ((VDataSize / 4) == DataSize + TFESize)
3903 Modifiers = IsPackedD16 ?
"dmask and d16" :
"dmask";
3905 Modifiers = IsPackedD16 ?
"dmask, d16 and tfe" :
"dmask and tfe";
3907 Error(IDLoc,
Twine(
"image data size does not match ") + Modifiers);
3911bool AMDGPUAsmParser::validateMIMGAddrSize(
const MCInst &Inst,
3912 const SMLoc &IDLoc) {
3925 : AMDGPU::OpName::rsrc;
3932 assert(SrsrcIdx > VAddr0Idx);
3935 if (BaseOpcode->
BVH) {
3936 if (IsA16 == BaseOpcode->
A16)
3938 Error(IDLoc,
"image address size does not match a16");
3944 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3945 unsigned ActualAddrSize =
3946 IsNSA ? SrsrcIdx - VAddr0Idx
3949 unsigned ExpectedAddrSize =
3953 if (hasPartialNSAEncoding() &&
3956 int VAddrLastIdx = SrsrcIdx - 1;
3957 unsigned VAddrLastSize =
3960 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
3963 if (ExpectedAddrSize > 12)
3964 ExpectedAddrSize = 16;
3969 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3973 if (ActualAddrSize == ExpectedAddrSize)
3976 Error(IDLoc,
"image address size does not match dim and a16");
3980bool AMDGPUAsmParser::validateMIMGAtomicDMask(
const MCInst &Inst) {
3987 if (!
Desc.mayLoad() || !
Desc.mayStore())
3997 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4000bool AMDGPUAsmParser::validateMIMGGatherDMask(
const MCInst &Inst) {
4016 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4019bool AMDGPUAsmParser::validateMIMGMSAA(
const MCInst &Inst) {
4030 if (!BaseOpcode->
MSAA)
4039 return DimInfo->
MSAA;
4045 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4046 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4047 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4057bool AMDGPUAsmParser::validateMovrels(
const MCInst &Inst,
4081 Error(ErrLoc,
"source operand must be a VGPR");
4085bool AMDGPUAsmParser::validateMAIAccWrite(
const MCInst &Inst,
4090 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4104 "source operand must be either a VGPR or an inline constant");
4111bool AMDGPUAsmParser::validateMAISrc2(
const MCInst &Inst,
4117 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4124 if (Inst.
getOperand(Src2Idx).
isImm() && isInlineConstant(Inst, Src2Idx)) {
4126 "inline constants are not allowed for this operand");
4133bool AMDGPUAsmParser::validateMFMA(
const MCInst &Inst,
4151 if (Src2Reg == DstReg)
4155 if (
TRI->getRegClass(
Desc.operands()[0].RegClass).getSizeInBits() <= 128)
4158 if (
TRI->regsOverlap(Src2Reg, DstReg)) {
4160 "source 2 operand must not partially overlap with dst");
4167bool AMDGPUAsmParser::validateDivScale(
const MCInst &Inst) {
4171 case V_DIV_SCALE_F32_gfx6_gfx7:
4172 case V_DIV_SCALE_F32_vi:
4173 case V_DIV_SCALE_F32_gfx10:
4174 case V_DIV_SCALE_F64_gfx6_gfx7:
4175 case V_DIV_SCALE_F64_vi:
4176 case V_DIV_SCALE_F64_gfx10:
4182 for (
auto Name : {AMDGPU::OpName::src0_modifiers,
4183 AMDGPU::OpName::src2_modifiers,
4184 AMDGPU::OpName::src2_modifiers}) {
4195bool AMDGPUAsmParser::validateMIMGD16(
const MCInst &Inst) {
4215 case AMDGPU::V_SUBREV_F32_e32:
4216 case AMDGPU::V_SUBREV_F32_e64:
4217 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4218 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4219 case AMDGPU::V_SUBREV_F32_e32_vi:
4220 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4221 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4222 case AMDGPU::V_SUBREV_F32_e64_vi:
4224 case AMDGPU::V_SUBREV_CO_U32_e32:
4225 case AMDGPU::V_SUBREV_CO_U32_e64:
4226 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4227 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4229 case AMDGPU::V_SUBBREV_U32_e32:
4230 case AMDGPU::V_SUBBREV_U32_e64:
4231 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4232 case AMDGPU::V_SUBBREV_U32_e32_vi:
4233 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4234 case AMDGPU::V_SUBBREV_U32_e64_vi:
4236 case AMDGPU::V_SUBREV_U32_e32:
4237 case AMDGPU::V_SUBREV_U32_e64:
4238 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4239 case AMDGPU::V_SUBREV_U32_e32_vi:
4240 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4241 case AMDGPU::V_SUBREV_U32_e64_vi:
4243 case AMDGPU::V_SUBREV_F16_e32:
4244 case AMDGPU::V_SUBREV_F16_e64:
4245 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4246 case AMDGPU::V_SUBREV_F16_e32_vi:
4247 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4248 case AMDGPU::V_SUBREV_F16_e64_vi:
4250 case AMDGPU::V_SUBREV_U16_e32:
4251 case AMDGPU::V_SUBREV_U16_e64:
4252 case AMDGPU::V_SUBREV_U16_e32_vi:
4253 case AMDGPU::V_SUBREV_U16_e64_vi:
4255 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4256 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4257 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4259 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4260 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4262 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4263 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4265 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4266 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4268 case AMDGPU::V_LSHRREV_B32_e32:
4269 case AMDGPU::V_LSHRREV_B32_e64:
4270 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4271 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4272 case AMDGPU::V_LSHRREV_B32_e32_vi:
4273 case AMDGPU::V_LSHRREV_B32_e64_vi:
4274 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4275 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4277 case AMDGPU::V_ASHRREV_I32_e32:
4278 case AMDGPU::V_ASHRREV_I32_e64:
4279 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4280 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4281 case AMDGPU::V_ASHRREV_I32_e32_vi:
4282 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4283 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4284 case AMDGPU::V_ASHRREV_I32_e64_vi:
4286 case AMDGPU::V_LSHLREV_B32_e32:
4287 case AMDGPU::V_LSHLREV_B32_e64:
4288 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4289 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4290 case AMDGPU::V_LSHLREV_B32_e32_vi:
4291 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4292 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4293 case AMDGPU::V_LSHLREV_B32_e64_vi:
4295 case AMDGPU::V_LSHLREV_B16_e32:
4296 case AMDGPU::V_LSHLREV_B16_e64:
4297 case AMDGPU::V_LSHLREV_B16_e32_vi:
4298 case AMDGPU::V_LSHLREV_B16_e64_vi:
4299 case AMDGPU::V_LSHLREV_B16_gfx10:
4301 case AMDGPU::V_LSHRREV_B16_e32:
4302 case AMDGPU::V_LSHRREV_B16_e64:
4303 case AMDGPU::V_LSHRREV_B16_e32_vi:
4304 case AMDGPU::V_LSHRREV_B16_e64_vi:
4305 case AMDGPU::V_LSHRREV_B16_gfx10:
4307 case AMDGPU::V_ASHRREV_I16_e32:
4308 case AMDGPU::V_ASHRREV_I16_e64:
4309 case AMDGPU::V_ASHRREV_I16_e32_vi:
4310 case AMDGPU::V_ASHRREV_I16_e64_vi:
4311 case AMDGPU::V_ASHRREV_I16_gfx10:
4313 case AMDGPU::V_LSHLREV_B64_e64:
4314 case AMDGPU::V_LSHLREV_B64_gfx10:
4315 case AMDGPU::V_LSHLREV_B64_vi:
4317 case AMDGPU::V_LSHRREV_B64_e64:
4318 case AMDGPU::V_LSHRREV_B64_gfx10:
4319 case AMDGPU::V_LSHRREV_B64_vi:
4321 case AMDGPU::V_ASHRREV_I64_e64:
4322 case AMDGPU::V_ASHRREV_I64_gfx10:
4323 case AMDGPU::V_ASHRREV_I64_vi:
4325 case AMDGPU::V_PK_LSHLREV_B16:
4326 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4327 case AMDGPU::V_PK_LSHLREV_B16_vi:
4329 case AMDGPU::V_PK_LSHRREV_B16:
4330 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4331 case AMDGPU::V_PK_LSHRREV_B16_vi:
4332 case AMDGPU::V_PK_ASHRREV_I16:
4333 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4334 case AMDGPU::V_PK_ASHRREV_I16_vi:
4341std::optional<StringRef>
4342AMDGPUAsmParser::validateLdsDirect(
const MCInst &Inst) {
4344 using namespace SIInstrFlags;
4345 const unsigned Opcode = Inst.
getOpcode();
4351 if ((
Desc.TSFlags & Enc) == 0)
4352 return std::nullopt;
4354 for (
auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4359 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4362 return StringRef(
"lds_direct is not supported on this GPU");
4365 return StringRef(
"lds_direct cannot be used with this instruction");
4367 if (SrcName != OpName::src0)
4368 return StringRef(
"lds_direct may be used as src0 only");
4372 return std::nullopt;
4376 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
4377 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4378 if (
Op.isFlatOffset())
4379 return Op.getStartLoc();
4384bool AMDGPUAsmParser::validateOffset(
const MCInst &Inst,
4393 return validateFlatOffset(Inst,
Operands);
4396 return validateSMEMOffset(Inst,
Operands);
4401 const unsigned OffsetSize = 24;
4402 if (!
isIntN(OffsetSize,
Op.getImm())) {
4404 Twine(
"expected a ") +
Twine(OffsetSize) +
"-bit signed offset");
4408 const unsigned OffsetSize = 16;
4409 if (!
isUIntN(OffsetSize,
Op.getImm())) {
4411 Twine(
"expected a ") +
Twine(OffsetSize) +
"-bit unsigned offset");
4418bool AMDGPUAsmParser::validateFlatOffset(
const MCInst &Inst,
4429 if (!hasFlatOffsets() &&
Op.getImm() != 0) {
4431 "flat offset modifier is not supported on this GPU");
4438 bool AllowNegative =
4441 if (!
isIntN(OffsetSize,
Op.getImm()) || (!AllowNegative &&
Op.getImm() < 0)) {
4443 Twine(
"expected a ") +
4444 (AllowNegative ?
Twine(OffsetSize) +
"-bit signed offset"
4445 :
Twine(OffsetSize - 1) +
"-bit unsigned offset"));
4454 for (
unsigned i = 2, e =
Operands.size(); i != e; ++i) {
4455 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4456 if (
Op.isSMEMOffset() ||
Op.isSMEMOffsetMod())
4457 return Op.getStartLoc();
4462bool AMDGPUAsmParser::validateSMEMOffset(
const MCInst &Inst,
4488 : (
isVI() || IsBuffer) ?
"expected a 20-bit unsigned offset"
4489 :
"expected a 21-bit signed offset");
4494bool AMDGPUAsmParser::validateSOPLiteral(
const MCInst &Inst)
const {
4503 const int OpIndices[] = { Src0Idx, Src1Idx };
4505 unsigned NumExprs = 0;
4506 unsigned NumLiterals = 0;
4509 for (
int OpIdx : OpIndices) {
4510 if (OpIdx == -1)
break;
4515 if (MO.
isImm() && !isInlineConstant(Inst, OpIdx)) {
4517 if (NumLiterals == 0 || LiteralValue !=
Value) {
4521 }
else if (MO.
isExpr()) {
4527 return NumLiterals + NumExprs <= 1;
4530bool AMDGPUAsmParser::validateOpSel(
const MCInst &Inst) {
4544 if (OpSelIdx != -1) {
4549 if (OpSelHiIdx != -1) {
4567bool AMDGPUAsmParser::validateNeg(
const MCInst &Inst,
int OpName) {
4592 int SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
4593 AMDGPU::OpName::src1_modifiers,
4594 AMDGPU::OpName::src2_modifiers};
4596 for (
unsigned i = 0; i < 3; ++i) {
4606bool AMDGPUAsmParser::validateDPP(
const MCInst &Inst,
4610 if (DppCtrlIdx >= 0) {
4617 Error(S,
"DP ALU dpp only supports row_newbcast");
4623 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
4633 Error(S,
"invalid operand for instruction");
4638 "src1 immediate operand invalid for instruction");
4648bool AMDGPUAsmParser::validateVccOperand(
unsigned Reg)
const {
4649 auto FB = getFeatureBits();
4650 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4651 (FB[AMDGPU::FeatureWavefrontSize32] &&
Reg == AMDGPU::VCC_LO);
4655bool AMDGPUAsmParser::validateVOPLiteral(
const MCInst &Inst,
4661 !HasMandatoryLiteral && !
isVOPD(Opcode))
4666 unsigned NumExprs = 0;
4667 unsigned NumLiterals = 0;
4670 for (
int OpIdx : OpIndices) {
4680 if (MO.
isImm() && !isInlineConstant(Inst, OpIdx)) {
4686 if (!IsValid32Op && !isInt<32>(
Value) && !isUInt<32>(
Value)) {
4687 Error(getLitLoc(
Operands),
"invalid operand for instruction");
4691 if (IsFP64 && IsValid32Op)
4694 if (NumLiterals == 0 || LiteralValue !=
Value) {
4698 }
else if (MO.
isExpr()) {
4702 NumLiterals += NumExprs;
4707 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4708 Error(getLitLoc(
Operands),
"literal operands are not supported");
4712 if (NumLiterals > 1) {
4713 Error(getLitLoc(
Operands,
true),
"only one unique literal operand is allowed");
4731 unsigned Sub =
MRI->getSubReg(
Op.getReg(), AMDGPU::sub0);
4732 auto Reg = Sub ? Sub :
Op.getReg();
4734 return AGPR32.
contains(Reg) ? 1 : 0;
4737bool AMDGPUAsmParser::validateAGPRLdSt(
const MCInst &Inst)
const {
4745 : AMDGPU::OpName::vdata;
4753 if (Data2Areg >= 0 && Data2Areg != DataAreg)
4757 auto FB = getFeatureBits();
4758 if (FB[AMDGPU::FeatureGFX90AInsts]) {
4759 if (DataAreg < 0 || DstAreg < 0)
4761 return DstAreg == DataAreg;
4764 return DstAreg < 1 && DataAreg < 1;
4767bool AMDGPUAsmParser::validateVGPRAlign(
const MCInst &Inst)
const {
4768 auto FB = getFeatureBits();
4769 if (!FB[AMDGPU::FeatureGFX90AInsts])
4780 unsigned Sub =
MRI->getSubReg(
Op.getReg(), AMDGPU::sub0);
4784 if (VGPR32.
contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4786 if (AGPR32.
contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4794 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
4795 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4797 return Op.getStartLoc();
4802bool AMDGPUAsmParser::validateBLGP(
const MCInst &Inst,
4812 auto FB = getFeatureBits();
4813 bool UsesNeg =
false;
4814 if (FB[AMDGPU::FeatureGFX940Insts]) {
4816 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4817 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4818 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4819 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4824 if (IsNeg == UsesNeg)
4828 UsesNeg ?
"invalid modifier: blgp is not supported"
4829 :
"invalid modifier: neg is not supported");
4834bool AMDGPUAsmParser::validateWaitCnt(
const MCInst &Inst,
4840 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4841 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4842 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4843 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4849 if (Reg == AMDGPU::SGPR_NULL)
4853 Error(RegLoc,
"src0 must be null");
4857bool AMDGPUAsmParser::validateDS(
const MCInst &Inst,
4863 return validateGWS(Inst,
Operands);
4874 Error(S,
"gds modifier is not supported on this GPU");
4882bool AMDGPUAsmParser::validateGWS(
const MCInst &Inst,
4884 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4888 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4889 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4898 auto RegIdx =
Reg - (VGPR32.
contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4901 Error(RegLoc,
"vgpr must be even aligned");
4908bool AMDGPUAsmParser::validateCoherencyBits(
const MCInst &Inst,
4910 const SMLoc &IDLoc) {
4912 AMDGPU::OpName::cpol);
4919 return validateTHAndScopeBits(Inst,
Operands, CPol);
4925 Error(S,
"cache policy is not supported for SMRD instructions");
4929 Error(IDLoc,
"invalid cache policy for SMEM instruction");
4938 if (!(TSFlags & AllowSCCModifier)) {
4943 "scc modifier is not supported for this instruction on this GPU");
4954 :
"instruction must use glc");
4962 &CStr.data()[CStr.find(
isGFX940() ?
"sc0" :
"glc")]);
4964 :
"instruction must not use glc");
4972bool AMDGPUAsmParser::validateTHAndScopeBits(
const MCInst &Inst,
4974 const unsigned CPol) {
4978 const unsigned Opcode = Inst.
getOpcode();
4990 return PrintError(
"instruction must use th:TH_ATOMIC_RETURN");
4998 return PrintError(
"invalid th value for SMEM instruction");
5005 return PrintError(
"scope and th combination is not valid");
5014 return PrintError(
"invalid th value for atomic instructions");
5015 }
else if (IsStore) {
5017 return PrintError(
"invalid th value for store instructions");
5020 return PrintError(
"invalid th value for load instructions");
5030 if (!Operand->isReg())
5032 unsigned Reg = Operand->getReg();
5033 if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
5035 "execz and vccz are not supported on this GPU");
5042bool AMDGPUAsmParser::validateTFE(
const MCInst &Inst,
5045 if (
Desc.mayStore() &&
5049 Error(Loc,
"TFE modifier has no meaning for store instructions");
5057bool AMDGPUAsmParser::validateInstruction(
const MCInst &Inst,
5060 if (
auto ErrMsg = validateLdsDirect(Inst)) {
5064 if (!validateSOPLiteral(Inst)) {
5066 "only one unique literal operand is allowed");
5069 if (!validateVOPLiteral(Inst,
Operands)) {
5072 if (!validateConstantBusLimitations(Inst,
Operands)) {
5075 if (!validateVOPDRegBankConstraints(Inst,
Operands)) {
5078 if (!validateIntClampSupported(Inst)) {
5080 "integer clamping is not supported on this GPU");
5083 if (!validateOpSel(Inst)) {
5085 "invalid op_sel operand");
5088 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5090 "invalid neg_lo operand");
5093 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5095 "invalid neg_hi operand");
5098 if (!validateDPP(Inst,
Operands)) {
5102 if (!validateMIMGD16(Inst)) {
5104 "d16 modifier is not supported on this GPU");
5107 if (!validateMIMGMSAA(Inst)) {
5109 "invalid dim; must be MSAA type");
5112 if (!validateMIMGDataSize(Inst, IDLoc)) {
5115 if (!validateMIMGAddrSize(Inst, IDLoc))
5117 if (!validateMIMGAtomicDMask(Inst)) {
5119 "invalid atomic image dmask");
5122 if (!validateMIMGGatherDMask(Inst)) {
5124 "invalid image_gather dmask: only one bit must be set");
5127 if (!validateMovrels(Inst,
Operands)) {
5130 if (!validateOffset(Inst,
Operands)) {
5133 if (!validateMAIAccWrite(Inst,
Operands)) {
5136 if (!validateMAISrc2(Inst,
Operands)) {
5139 if (!validateMFMA(Inst,
Operands)) {
5142 if (!validateCoherencyBits(Inst,
Operands, IDLoc)) {
5146 if (!validateAGPRLdSt(Inst)) {
5147 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5148 ?
"invalid register class: data and dst should be all VGPR or AGPR"
5149 :
"invalid register class: agpr loads and stores not supported on this GPU"
5153 if (!validateVGPRAlign(Inst)) {
5155 "invalid register class: vgpr tuples must be 64 bit aligned");
5162 if (!validateBLGP(Inst,
Operands)) {
5166 if (!validateDivScale(Inst)) {
5167 Error(IDLoc,
"ABS not allowed in VOP3B instructions");
5170 if (!validateWaitCnt(Inst,
Operands)) {
5173 if (!validateExeczVcczOperands(
Operands)) {
5176 if (!validateTFE(Inst,
Operands)) {
5185 unsigned VariantID = 0);
5189 unsigned VariantID);
5191bool AMDGPUAsmParser::isSupportedMnemo(
StringRef Mnemo,
5196bool AMDGPUAsmParser::isSupportedMnemo(
StringRef Mnemo,
5199 for (
auto Variant : Variants) {
5207bool AMDGPUAsmParser::checkUnsupportedInstruction(
StringRef Mnemo,
5208 const SMLoc &IDLoc) {
5209 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5212 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5217 getParser().clearPendingErrors();
5221 StringRef VariantName = getMatchedVariantName();
5222 if (!VariantName.
empty() && isSupportedMnemo(Mnemo, FBS)) {
5225 " variant of this instruction is not supported"));
5229 if (
isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5230 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5233 FeaturesWS32.
flip(AMDGPU::FeatureWavefrontSize64)
5234 .
flip(AMDGPU::FeatureWavefrontSize32);
5236 ComputeAvailableFeatures(FeaturesWS32);
5238 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5239 return Error(IDLoc,
"instruction requires wavesize=32");
5244 return Error(IDLoc,
"instruction not supported on this GPU");
5249 return Error(IDLoc,
"invalid instruction" + Suggestion);
5255 const auto &
Op = ((AMDGPUOperand &)*
Operands[InvalidOprIdx]);
5256 if (
Op.isToken() && InvalidOprIdx > 1) {
5257 const auto &PrevOp = ((AMDGPUOperand &)*
Operands[InvalidOprIdx - 1]);
5258 return PrevOp.isToken() && PrevOp.getToken() ==
"::";
5263bool AMDGPUAsmParser::MatchAndEmitInstruction(
SMLoc IDLoc,
unsigned &Opcode,
5267 bool MatchingInlineAsm) {
5269 unsigned Result = Match_Success;
5270 for (
auto Variant : getMatchedVariants()) {
5272 auto R = MatchInstructionImpl(
Operands, Inst, EI, MatchingInlineAsm,
5277 if ((R == Match_Success) ||
5278 (R == Match_PreferE32) ||
5279 (R == Match_MissingFeature && Result != Match_PreferE32) ||
5280 (R == Match_InvalidOperand && Result != Match_MissingFeature
5281 && Result != Match_PreferE32) ||
5282 (R == Match_MnemonicFail && Result != Match_InvalidOperand
5283 && Result != Match_MissingFeature
5284 && Result != Match_PreferE32)) {
5288 if (R == Match_Success)
5292 if (Result == Match_Success) {
5293 if (!validateInstruction(Inst, IDLoc,
Operands)) {
5302 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5308 case Match_MissingFeature:
5312 return Error(IDLoc,
"operands are not valid for this GPU or mode");
5314 case Match_InvalidOperand: {
5315 SMLoc ErrorLoc = IDLoc;
5318 return Error(IDLoc,
"too few operands for instruction");
5321 if (ErrorLoc ==
SMLoc())
5325 return Error(ErrorLoc,
"invalid VOPDY instruction");
5327 return Error(ErrorLoc,
"invalid operand for instruction");
5330 case Match_PreferE32:
5331 return Error(IDLoc,
"internal error: instruction without _e64 suffix "
5332 "should be encoded as e32");
5333 case Match_MnemonicFail:
5339bool AMDGPUAsmParser::ParseAsAbsoluteExpression(
uint32_t &Ret) {
5344 if (getParser().parseAbsoluteExpression(Tmp)) {
5351bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5353 return TokError(
"directive only supported for amdgcn architecture");
5355 std::string TargetIDDirective;
5356 SMLoc TargetStart = getTok().getLoc();
5357 if (getParser().parseEscapedString(TargetIDDirective))
5361 if (getTargetStreamer().getTargetID()->
toString() != TargetIDDirective)
5362 return getParser().Error(TargetRange.
Start,
5363 (
Twine(
".amdgcn_target directive's target id ") +
5364 Twine(TargetIDDirective) +
5365 Twine(
" does not match the specified target id ") +
5366 Twine(getTargetStreamer().getTargetID()->
toString())).str());
5371bool AMDGPUAsmParser::OutOfRangeError(
SMRange Range) {
5372 return Error(
Range.Start,
"value out of range", Range);
5375bool AMDGPUAsmParser::calculateGPRBlocks(
5376 const FeatureBitset &Features,
bool VCCUsed,
bool FlatScrUsed,
5377 bool XNACKUsed, std::optional<bool> EnableWavefrontSize32,
5378 unsigned NextFreeVGPR,
SMRange VGPRRange,
unsigned NextFreeSGPR,
5379 SMRange SGPRRange,
unsigned &VGPRBlocks,
unsigned &SGPRBlocks) {
5390 unsigned MaxAddressableNumSGPRs =
5393 if (
Version.Major >= 8 && !Features.
test(FeatureSGPRInitBug) &&
5394 NumSGPRs > MaxAddressableNumSGPRs)
5395 return OutOfRangeError(SGPRRange);
5400 if ((
Version.Major <= 7 || Features.
test(FeatureSGPRInitBug)) &&
5401 NumSGPRs > MaxAddressableNumSGPRs)
5402 return OutOfRangeError(SGPRRange);
5404 if (Features.
test(FeatureSGPRInitBug))
5409 EnableWavefrontSize32);
5415bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5417 return TokError(
"directive only supported for amdgcn architecture");
5420 return TokError(
"directive only supported for amdhsa OS");
5423 if (getParser().parseIdentifier(KernelName))
5428 &getSTI(), getContext());
5444 unsigned ImpliedUserSGPRCount = 0;
5448 std::optional<unsigned> ExplicitUserSGPRCount;
5449 bool ReserveVCC =
true;
5450 bool ReserveFlatScr =
true;
5451 std::optional<bool> EnableWavefrontSize32;
5457 SMRange IDRange = getTok().getLocRange();
5458 if (!parseId(
ID,
"expected .amdhsa_ directive or .end_amdhsa_kernel"))
5461 if (
ID ==
".end_amdhsa_kernel")
5465 return TokError(
".amdhsa_ directives cannot be repeated");
5467 SMLoc ValStart = getLoc();
5469 if (getParser().parseExpression(ExprVal))
5471 SMLoc ValEnd = getLoc();
5476 bool EvaluatableExpr;
5477 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
5479 return OutOfRangeError(ValRange);
5483#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
5484 if (!isUInt<ENTRY##_WIDTH>(Val)) \
5485 return OutOfRangeError(RANGE); \
5486 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
5491#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
5493 return Error(IDRange.Start, "directive should have resolvable expression", \
5496 if (
ID ==
".amdhsa_group_segment_fixed_size") {
5499 return OutOfRangeError(ValRange);
5501 }
else if (
ID ==
".amdhsa_private_segment_fixed_size") {
5504 return OutOfRangeError(ValRange);
5506 }
else if (
ID ==
".amdhsa_kernarg_size") {
5508 return OutOfRangeError(ValRange);
5510 }
else if (
ID ==
".amdhsa_user_sgpr_count") {
5512 ExplicitUserSGPRCount = Val;
5513 }
else if (
ID ==
".amdhsa_user_sgpr_private_segment_buffer") {
5517 "directive is not supported with architected flat scratch",
5520 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5523 ImpliedUserSGPRCount += 4;
5524 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_preload_length") {
5527 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
5530 return OutOfRangeError(ValRange);
5534 ImpliedUserSGPRCount += Val;
5535 PreloadLength = Val;
5537 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_preload_offset") {
5540 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
5543 return OutOfRangeError(ValRange);
5547 PreloadOffset = Val;
5548 }
else if (
ID ==
".amdhsa_user_sgpr_dispatch_ptr") {
5551 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
5554 ImpliedUserSGPRCount += 2;
5555 }
else if (
ID ==
".amdhsa_user_sgpr_queue_ptr") {
5558 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
5561 ImpliedUserSGPRCount += 2;
5562 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_segment_ptr") {
5565 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
5568 ImpliedUserSGPRCount += 2;
5569 }
else if (
ID ==
".amdhsa_user_sgpr_dispatch_id") {
5572 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
5575 ImpliedUserSGPRCount += 2;
5576 }
else if (
ID ==
".amdhsa_user_sgpr_flat_scratch_init") {
5579 "directive is not supported with architected flat scratch",
5583 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
5586 ImpliedUserSGPRCount += 2;
5587 }
else if (
ID ==
".amdhsa_user_sgpr_private_segment_size") {
5590 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5593 ImpliedUserSGPRCount += 1;
5594 }
else if (
ID ==
".amdhsa_wavefront_size32") {
5596 if (IVersion.
Major < 10)
5597 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5598 EnableWavefrontSize32 = Val;
5600 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
5602 }
else if (
ID ==
".amdhsa_uses_dynamic_stack") {
5604 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
5606 }
else if (
ID ==
".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5609 "directive is not supported with architected flat scratch",
5612 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5614 }
else if (
ID ==
".amdhsa_enable_private_segment") {
5618 "directive is not supported without architected flat scratch",
5621 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5623 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_x") {
5625 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
5627 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_y") {
5629 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
5631 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_z") {
5633 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
5635 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_info") {
5637 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
5639 }
else if (
ID ==
".amdhsa_system_vgpr_workitem_id") {
5641 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
5643 }
else if (
ID ==
".amdhsa_next_free_vgpr") {
5645 VGPRRange = ValRange;
5647 }
else if (
ID ==
".amdhsa_next_free_sgpr") {
5649 SGPRRange = ValRange;
5651 }
else if (
ID ==
".amdhsa_accum_offset") {
5653 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
5656 }
else if (
ID ==
".amdhsa_reserve_vcc") {
5658 if (!isUInt<1>(Val))
5659 return OutOfRangeError(ValRange);
5661 }
else if (
ID ==
".amdhsa_reserve_flat_scratch") {
5663 if (IVersion.
Major < 7)
5664 return Error(IDRange.
Start,
"directive requires gfx7+", IDRange);
5667 "directive is not supported with architected flat scratch",
5669 if (!isUInt<1>(Val))
5670 return OutOfRangeError(ValRange);
5671 ReserveFlatScr = Val;
5672 }
else if (
ID ==
".amdhsa_reserve_xnack_mask") {
5673 if (IVersion.
Major < 8)
5674 return Error(IDRange.
Start,
"directive requires gfx8+", IDRange);
5675 if (!isUInt<1>(Val))
5676 return OutOfRangeError(ValRange);
5677 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5678 return getParser().Error(IDRange.
Start,
".amdhsa_reserve_xnack_mask does not match target id",
5680 }
else if (
ID ==
".amdhsa_float_round_mode_32") {
5682 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
5684 }
else if (
ID ==
".amdhsa_float_round_mode_16_64") {
5686 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
5688 }
else if (
ID ==
".amdhsa_float_denorm_mode_32") {
5690 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
5692 }
else if (
ID ==
".amdhsa_float_denorm_mode_16_64") {
5694 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
5696 }
else if (
ID ==
".amdhsa_dx10_clamp") {
5697 if (IVersion.
Major >= 12)
5698 return Error(IDRange.
Start,
"directive unsupported on gfx12+", IDRange);
5700 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
5702 }
else if (
ID ==
".amdhsa_ieee_mode") {
5703 if (IVersion.
Major >= 12)
5704 return Error(IDRange.
Start,
"directive unsupported on gfx12+", IDRange);
5706 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
5708 }
else if (
ID ==
".amdhsa_fp16_overflow") {
5709 if (IVersion.
Major < 9)
5710 return Error(IDRange.
Start,
"directive requires gfx9+", IDRange);
5712 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
5714 }
else if (
ID ==
".amdhsa_tg_split") {
5716 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
5719 }
else if (
ID ==
".amdhsa_workgroup_processor_mode") {
5720 if (IVersion.
Major < 10)
5721 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5723 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
5725 }
else if (
ID ==
".amdhsa_memory_ordered") {
5726 if (IVersion.
Major < 10)
5727 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5729 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
5731 }
else if (
ID ==
".amdhsa_forward_progress") {
5732 if (IVersion.
Major < 10)
5733 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5735 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
5737 }
else if (
ID ==
".amdhsa_shared_vgpr_count") {
5739 if (IVersion.
Major < 10 || IVersion.
Major >= 12)
5740 return Error(IDRange.
Start,
"directive requires gfx10 or gfx11",
5742 SharedVGPRCount = Val;
5744 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
5746 }
else if (
ID ==
".amdhsa_exception_fp_ieee_invalid_op") {
5749 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
5751 }
else if (
ID ==
".amdhsa_exception_fp_denorm_src") {
5753 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5755 }
else if (
ID ==
".amdhsa_exception_fp_ieee_div_zero") {
5758 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
5760 }
else if (
ID ==
".amdhsa_exception_fp_ieee_overflow") {
5762 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5764 }
else if (
ID ==
".amdhsa_exception_fp_ieee_underflow") {
5766 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5768 }
else if (
ID ==
".amdhsa_exception_fp_ieee_inexact") {
5770 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5772 }
else if (
ID ==
".amdhsa_exception_int_div_zero") {
5774 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5776 }
else if (
ID ==
".amdhsa_round_robin_scheduling") {
5777 if (IVersion.
Major < 12)
5778 return Error(IDRange.
Start,
"directive requires gfx12+", IDRange);
5780 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
5783 return Error(IDRange.
Start,
"unknown .amdhsa_kernel directive", IDRange);
5786#undef PARSE_BITS_ENTRY
5789 if (!Seen.
contains(
".amdhsa_next_free_vgpr"))
5790 return TokError(
".amdhsa_next_free_vgpr directive is required");
5792 if (!Seen.
contains(
".amdhsa_next_free_sgpr"))
5793 return TokError(
".amdhsa_next_free_sgpr directive is required");
5795 unsigned VGPRBlocks;
5796 unsigned SGPRBlocks;
5797 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5798 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5799 EnableWavefrontSize32, NextFreeVGPR,
5800 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5804 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5806 return OutOfRangeError(VGPRRange);
5809 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
5810 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
5812 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5814 return OutOfRangeError(SGPRRange);
5817 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
5818 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
5820 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5821 return TokError(
"amdgpu_user_sgpr_count smaller than than implied by "
5822 "enabled user SGPRs");
5824 unsigned UserSGPRCount =
5825 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5827 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5828 return TokError(
"too many user SGPRs enabled");
5831 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
5832 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, getContext());
5836 return TokError(
"Kernarg size should be resolvable");
5838 if (PreloadLength && kernarg_size &&
5839 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
5840 return TokError(
"Kernarg preload length + offset is larger than the "
5841 "kernarg segment size");
5844 if (!Seen.
contains(
".amdhsa_accum_offset"))
5845 return TokError(
".amdhsa_accum_offset directive is required");
5846 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5847 return TokError(
"accum_offset should be in range [4..256] in "
5850 return TokError(
"accum_offset exceeds total VGPR allocation");
5854 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
5855 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, getContext());
5858 if (IVersion.
Major >= 10 && IVersion.
Major < 12) {
5860 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
5861 return TokError(
"shared_vgpr_count directive not valid on "
5862 "wavefront size 32");
5864 if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5865 return TokError(
"shared_vgpr_count*2 + "
5866 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5871 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
5872 NextFreeVGPR, NextFreeSGPR,
5873 ReserveVCC, ReserveFlatScr);
5877bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
5879 if (ParseAsAbsoluteExpression(Version))
5882 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
5886bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(
StringRef ID,
5890 if (
ID ==
"max_scratch_backing_memory_byte_size") {
5891 Parser.eatToEndOfStatement();
5898 return TokError(Err.str());
5902 if (
ID ==
"enable_dx10_clamp") {
5905 return TokError(
"enable_dx10_clamp=1 is not allowed on GFX12+");
5908 if (
ID ==
"enable_ieee_mode") {
5911 return TokError(
"enable_ieee_mode=1 is not allowed on GFX12+");
5914 if (
ID ==
"enable_wavefront_size32") {
5917 return TokError(
"enable_wavefront_size32=1 is only allowed on GFX10+");
5918 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5919 return TokError(
"enable_wavefront_size32=1 requires +WavefrontSize32");
5921 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5922 return TokError(
"enable_wavefront_size32=0 requires +WavefrontSize64");
5926 if (
ID ==
"wavefront_size") {
5927 if (Header.wavefront_size == 5) {
5929 return TokError(
"wavefront_size=5 is only allowed on GFX10+");
5930 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5931 return TokError(
"wavefront_size=5 requires +WavefrontSize32");
5932 }
else if (Header.wavefront_size == 6) {
5933 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5934 return TokError(
"wavefront_size=6 requires +WavefrontSize64");
5938 if (
ID ==
"enable_wgp_mode") {
5941 return TokError(
"enable_wgp_mode=1 is only allowed on GFX10+");
5944 if (
ID ==
"enable_mem_ordered") {
5947 return TokError(
"enable_mem_ordered=1 is only allowed on GFX10+");
5950 if (
ID ==
"enable_fwd_progress") {
5953 return TokError(
"enable_fwd_progress=1 is only allowed on GFX10+");
5959bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5969 if (!parseId(
ID,
"expected value identifier or .end_amd_kernel_code_t"))
5972 if (
ID ==
".end_amd_kernel_code_t")
5975 if (ParseAMDKernelCodeTValue(
ID, Header))
5979 getTargetStreamer().EmitAMDKernelCodeT(Header);
5984bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5986 if (!parseId(KernelName,
"expected symbol name"))
5989 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5992 KernelScope.initialize(getContext());
5996bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5998 return Error(getLoc(),
5999 ".amd_amdgpu_isa directive is not available on non-amdgcn "
6003 auto TargetIDDirective = getLexer().getTok().getStringContents();
6004 if (getTargetStreamer().getTargetID()->
toString() != TargetIDDirective)
6005 return Error(getParser().getTok().getLoc(),
"target id must match options");
6007 getTargetStreamer().EmitISAVersion();
6013bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6016 std::string HSAMetadataString;
6021 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6022 return Error(getLoc(),
"invalid HSA metadata");
6029bool AMDGPUAsmParser::ParseToEndDirective(
const char *AssemblerDirectiveBegin,
6030 const char *AssemblerDirectiveEnd,
6031 std::string &CollectString) {
6035 getLexer().setSkipSpace(
false);
6037 bool FoundEnd =
false;
6040 CollectStream << getTokenStr();
6044 if (trySkipId(AssemblerDirectiveEnd)) {
6049 CollectStream << Parser.parseStringToEndOfStatement()
6050 << getContext().getAsmInfo()->getSeparatorString();
6052 Parser.eatToEndOfStatement();
6055 getLexer().setSkipSpace(
true);
6058 return TokError(
Twine(
"expected directive ") +
6059 Twine(AssemblerDirectiveEnd) +
Twine(
" not found"));
6062 CollectStream.flush();
6067bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6073 auto PALMetadata = getTargetStreamer().getPALMetadata();
6074 if (!PALMetadata->setFromString(
String))
6075 return Error(getLoc(),
"invalid PAL metadata");
6080bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6082 return Error(getLoc(),
6084 "not available on non-amdpal OSes")).str());
6087 auto PALMetadata = getTargetStreamer().getPALMetadata();
6088 PALMetadata->setLegacy();
6091 if (ParseAsAbsoluteExpression(Key)) {
6092 return TokError(
Twine(
"invalid value in ") +
6096 return TokError(
Twine(
"expected an even number of values in ") +
6099 if (ParseAsAbsoluteExpression(
Value)) {
6100 return TokError(
Twine(
"invalid value in ") +
6103 PALMetadata->setRegister(Key,
Value);
6112bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6113 if (getParser().checkForValidSection())
6117 SMLoc NameLoc = getLoc();
6118 if (getParser().parseIdentifier(
Name))
6119 return TokError(
"expected identifier in directive");
6122 if (getParser().parseComma())
6128 SMLoc SizeLoc = getLoc();
6129 if (getParser().parseAbsoluteExpression(
Size))
6132 return Error(SizeLoc,
"size must be non-negative");
6133 if (
Size > LocalMemorySize)
6134 return Error(SizeLoc,
"size is too large");
6136 int64_t Alignment = 4;
6138 SMLoc AlignLoc = getLoc();
6139 if (getParser().parseAbsoluteExpression(Alignment))
6142 return Error(AlignLoc,
"alignment must be a power of two");
6147 if (Alignment >= 1u << 31)
6148 return Error(AlignLoc,
"alignment is too large");
6154 Symbol->redefineIfPossible();
6155 if (!
Symbol->isUndefined())
6156 return Error(NameLoc,
"invalid symbol redefinition");
6158 getTargetStreamer().emitAMDGPULDS(Symbol,
Size,
Align(Alignment));
6162bool AMDGPUAsmParser::ParseDirective(
AsmToken DirectiveID) {
6166 if (IDVal ==
".amdhsa_kernel")
6167 return ParseDirectiveAMDHSAKernel();
6169 if (IDVal ==
".amdhsa_code_object_version")
6170 return ParseDirectiveAMDHSACodeObjectVersion();
6174 return ParseDirectiveHSAMetadata();
6176 if (IDVal ==
".amd_kernel_code_t")
6177 return ParseDirectiveAMDKernelCodeT();
6179 if (IDVal ==
".amdgpu_hsa_kernel")
6180 return ParseDirectiveAMDGPUHsaKernel();
6182 if (IDVal ==
".amd_amdgpu_isa")
6183 return ParseDirectiveISAVersion();
6187 Twine(
" directive is "
6188 "not available on non-amdhsa OSes"))
6193 if (IDVal ==
".amdgcn_target")
6194 return ParseDirectiveAMDGCNTarget();
6196 if (IDVal ==
".amdgpu_lds")
6197 return ParseDirectiveAMDGPULDS();
6200 return ParseDirectivePALMetadataBegin();
6203 return ParseDirectivePALMetadata();
6211 if (
MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
6215 if (
MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
6216 return hasSGPR104_SGPR105();
6219 case AMDGPU::SRC_SHARED_BASE_LO:
6220 case AMDGPU::SRC_SHARED_BASE:
6221 case AMDGPU::SRC_SHARED_LIMIT_LO:
6222 case AMDGPU::SRC_SHARED_LIMIT:
6223 case AMDGPU::SRC_PRIVATE_BASE_LO:
6224 case AMDGPU::SRC_PRIVATE_BASE:
6225 case AMDGPU::SRC_PRIVATE_LIMIT_LO:
6226 case AMDGPU::SRC_PRIVATE_LIMIT:
6228 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
6231 case AMDGPU::TBA_LO:
6232 case AMDGPU::TBA_HI:
6234 case AMDGPU::TMA_LO:
6235 case AMDGPU::TMA_HI:
6237 case AMDGPU::XNACK_MASK:
6238 case AMDGPU::XNACK_MASK_LO:
6239 case AMDGPU::XNACK_MASK_HI:
6240 return (
isVI() ||
isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6241 case AMDGPU::SGPR_NULL:
6255 case AMDGPU::FLAT_SCR:
6256 case AMDGPU::FLAT_SCR_LO:
6257 case AMDGPU::FLAT_SCR_HI:
6266 if (
MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
6267 return hasSGPR102_SGPR103();
6280 Res = MatchOperandParserImpl(
Operands, Mnemonic);
6292 SMLoc LBraceLoc = getLoc();
6297 auto Loc = getLoc();
6300 Error(Loc,
"expected a register");
6304 RBraceLoc = getLoc();
6309 "expected a comma or a closing square bracket"))
6313 if (
Operands.size() - Prefix > 1) {
6315 AMDGPUOperand::CreateToken(
this,
"[", LBraceLoc));
6316 Operands.push_back(AMDGPUOperand::CreateToken(
this,
"]", RBraceLoc));
6327 setForcedEncodingSize(0);
6328 setForcedDPP(
false);
6329 setForcedSDWA(
false);
6331 if (
Name.ends_with(
"_e64_dpp")) {
6333 setForcedEncodingSize(64);
6334 return Name.substr(0,
Name.size() - 8);
6335 }
else if (
Name.ends_with(
"_e64")) {
6336 setForcedEncodingSize(64);
6337 return Name.substr(0,
Name.size() - 4);
6338 }
else if (
Name.ends_with(
"_e32")) {
6339 setForcedEncodingSize(32);
6340 return Name.substr(0,
Name.size() - 4);
6341 }
else if (
Name.ends_with(
"_dpp")) {
6343 return Name.substr(0,
Name.size() - 4);
6344 }
else if (
Name.ends_with(
"_sdwa")) {
6345 setForcedSDWA(
true);
6346 return Name.substr(0,
Name.size() - 5);
6353 unsigned VariantID);
6365 Operands.push_back(AMDGPUOperand::CreateToken(
this,
Name, NameLoc));
6367 bool IsMIMG =
Name.starts_with(
"image_");
6370 OperandMode Mode = OperandMode_Default;
6372 Mode = OperandMode_NSA;
6376 checkUnsupportedInstruction(
Name, NameLoc);
6377 if (!Parser.hasPendingError()) {
6380 :
"not a valid operand.";
6381 Error(getLoc(), Msg);
6403 if (!trySkipId(
Name))
6406 Operands.push_back(AMDGPUOperand::CreateToken(
this,
Name, S));
6410ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
const char *Prefix,
6421 std::function<
bool(int64_t &)> ConvertResult) {
6429 if (ConvertResult && !ConvertResult(
Value)) {
6433 Operands.push_back(AMDGPUOperand::CreateImm(
this,
Value, S, ImmTy));
6437ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
6439 bool (*ConvertResult)(int64_t &)) {
6448 const unsigned MaxSize = 4;
6452 for (
int I = 0; ; ++
I) {
6454 SMLoc Loc = getLoc();
6458 if (
Op != 0 &&
Op != 1)
6466 if (
I + 1 == MaxSize)
6467 return Error(getLoc(),
"expected a closing square bracket");
6473 Operands.push_back(AMDGPUOperand::CreateImm(
this, Val, S, ImmTy));
6479 AMDGPUOperand::ImmTy ImmTy) {
6483 if (trySkipId(
Name)) {
6485 }
else if (trySkipId(
"no",
Name)) {
6492 return Error(S,
"r128 modifier is not supported on this GPU");
6494 return Error(S,
"a16 modifier is not supported on this GPU");
6496 if (
isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
6497 ImmTy = AMDGPUOperand::ImmTyR128A16;
6499 Operands.push_back(AMDGPUOperand::CreateImm(
this, Bit, S, ImmTy));
6504 bool &Disabling)
const {
6505 Disabling =
Id.consume_front(
"no");
6525 SMLoc StringLoc = getLoc();
6527 int64_t CPolVal = 0;
6545 ResScope = parseScope(
Operands, Scope);
6560 Operands.push_back(AMDGPUOperand::CreateImm(
this, CPolVal, StringLoc,
6561 AMDGPUOperand::ImmTyCPol));
6566 SMLoc OpLoc = getLoc();
6567 unsigned Enabled = 0, Seen = 0;
6571 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
6578 return Error(S,
"dlc modifier is not supported on this GPU");
6581 return Error(S,
"scc modifier is not supported on this GPU");
6584 return Error(S,
"duplicate cache policy modifier");
6596 AMDGPUOperand::CreateImm(
this,
Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
6608 Res = parseStringWithPrefix(
"scope",
Value, StringLoc);
6619 if (Scope == 0xffffffff)
6620 return Error(StringLoc,
"invalid scope value");
6634 if (
Value ==
"TH_DEFAULT")
6636 else if (
Value ==
"TH_STORE_LU" ||
Value ==
"TH_LOAD_RT_WB" ||
6637 Value ==
"TH_LOAD_NT_WB") {
6638 return Error(StringLoc,
"invalid th value");
6639 }
else if (
Value.consume_front(
"TH_ATOMIC_")) {
6641 }
else if (
Value.consume_front(
"TH_LOAD_")) {
6643 }
else if (
Value.consume_front(
"TH_STORE_")) {
6646 return Error(StringLoc,
"invalid th value");
6649 if (
Value ==
"BYPASS")
6680 if (TH == 0xffffffff)
6681 return Error(StringLoc,
"invalid th value");
6688 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
6689 AMDGPUOperand::ImmTy ImmT,
6691 auto i = OptionalIdx.find(ImmT);
6692 if (i != OptionalIdx.end()) {
6693 unsigned Idx = i->second;
6694 ((AMDGPUOperand &)*
Operands[
Idx]).addImmOperands(Inst, 1);
6706 StringLoc = getLoc();
6715bool AMDGPUAsmParser::tryParseFmt(
const char *Pref,
6719 SMLoc Loc = getLoc();
6721 auto Res = parseIntWithPrefix(Pref, Val);
6727 if (Val < 0 || Val > MaxVal) {
6737 AMDGPUOperand::ImmTy ImmTy) {
6738 const char *Pref =
"index_key";
6740 SMLoc Loc = getLoc();
6741 auto Res = parseIntWithPrefix(Pref, ImmVal);
6745 if (ImmTy == AMDGPUOperand::ImmTyIndexKey16bit && (ImmVal < 0 || ImmVal > 1))
6748 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
6751 Operands.push_back(AMDGPUOperand::CreateImm(
this, ImmVal, Loc, ImmTy));
6756 return tryParseIndexKey(
Operands, AMDGPUOperand::ImmTyIndexKey8bit);
6760 return tryParseIndexKey(
Operands, AMDGPUOperand::ImmTyIndexKey16bit);
6765ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
6772 for (
int I = 0;
I < 2; ++
I) {
6773 if (Dfmt == DFMT_UNDEF && !tryParseFmt(
"dfmt", DFMT_MAX, Dfmt))
6776 if (Nfmt == NFMT_UNDEF && !tryParseFmt(
"nfmt", NFMT_MAX, Nfmt))
6781 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
6787 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
6790 Dfmt = (Dfmt ==
DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6791 Nfmt = (Nfmt ==
NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6797ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
6802 if (!tryParseFmt(
"format", UFMT_MAX, Fmt))
6805 if (Fmt == UFMT_UNDEF)
6812bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6820 if (Format != DFMT_UNDEF) {
6826 if (Format != NFMT_UNDEF) {
6831 Error(Loc,
"unsupported format");
6842 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6847 SMLoc Loc = getLoc();
6848 if (!parseId(Str,
"expected a format string") ||
6849 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
6851 if (Dfmt == DFMT_UNDEF)
6852 return Error(Loc,
"duplicate numeric format");
6853 if (Nfmt == NFMT_UNDEF)
6854 return Error(Loc,
"duplicate data format");
6857 Dfmt = (Dfmt ==
DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6858 Nfmt = (Nfmt ==
NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6862 if (Ufmt == UFMT_UNDEF)
6863 return Error(FormatLoc,
"unsupported format");
6878 if (Id == UFMT_UNDEF)
6882 return Error(Loc,
"unified format is not supported on this GPU");
6888ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6890 SMLoc Loc = getLoc();
6892 if (!parseExpr(Format))
6895 return Error(Loc,
"out of range format");
6900ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6908 SMLoc Loc = getLoc();
6909 if (!parseId(FormatStr,
"expected a format string"))
6912 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6914 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6924 return parseNumericFormat(Format);
6932 SMLoc Loc = getLoc();
6942 AMDGPUOperand::CreateImm(
this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6961 Res = parseSymbolicOrNumericFormat(Format);
6966 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands[
Size - 2]);
6967 assert(
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6974 return Error(getLoc(),
"duplicate format");
6980 parseIntWithPrefix(
"offset",
Operands, AMDGPUOperand::ImmTyOffset);
6982 Res = parseIntWithPrefix(
"inst_offset",
Operands,
6983 AMDGPUOperand::ImmTyInstOffset);
6990 parseNamedBit(
"r128",
Operands, AMDGPUOperand::ImmTyR128A16);
6992 Res = parseNamedBit(
"a16",
Operands, AMDGPUOperand::ImmTyA16);
6998 parseIntWithPrefix(
"blgp",
Operands, AMDGPUOperand::ImmTyBLGP);
7001 parseOperandArrayWithPrefix(
"neg",
Operands, AMDGPUOperand::ImmTyBLGP);
7011 OptionalImmIndexMap OptionalIdx;
7013 unsigned OperandIdx[4];
7014 unsigned EnMask = 0;
7017 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
7018 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
7023 OperandIdx[SrcIdx] = Inst.
size();
7024 Op.addRegOperands(Inst, 1);
7031 OperandIdx[SrcIdx] = Inst.
size();
7037 if (
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7038 Op.addImmOperands(Inst, 1);
7042 if (
Op.isToken() && (
Op.getToken() ==
"done" ||
Op.getToken() ==
"row_en"))
7046 OptionalIdx[
Op.getImmTy()] = i;
7052 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7059 for (
auto i = 0; i < SrcIdx; ++i) {
7061 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7086 IntVal =
encode(ISA, IntVal, CntVal);
7087 if (CntVal !=
decode(ISA, IntVal)) {
7089 IntVal =
encode(ISA, IntVal, -1);
7097bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7099 SMLoc CntLoc = getLoc();
7107 SMLoc ValLoc = getLoc();
7108 if (!parseExpr(CntVal))
7116 if (CntName ==
"vmcnt" || CntName ==
"vmcnt_sat") {
7118 }
else if (CntName ==
"expcnt" || CntName ==
"expcnt_sat") {
7120 }
else if (CntName ==
"lgkmcnt" || CntName ==
"lgkmcnt_sat") {
7123 Error(CntLoc,
"invalid counter name " + CntName);
7128 Error(ValLoc,
"too large value for " + CntName);
7137 Error(getLoc(),
"expected a counter name");
7164bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7165 SMLoc FieldLoc = getLoc();
7171 SMLoc ValueLoc = getLoc();
7178 if (FieldName ==
"instid0") {
7180 }
else if (FieldName ==
"instskip") {
7182 }
else if (FieldName ==
"instid1") {
7185 Error(FieldLoc,
"invalid field name " + FieldName);
7204 .
Case(
"VALU_DEP_1", 1)
7205 .
Case(
"VALU_DEP_2", 2)
7206 .
Case(
"VALU_DEP_3", 3)
7207 .
Case(
"VALU_DEP_4", 4)
7208 .
Case(
"TRANS32_DEP_1", 5)
7209 .
Case(
"TRANS32_DEP_2", 6)
7210 .
Case(
"TRANS32_DEP_3", 7)
7211 .
Case(
"FMA_ACCUM_CYCLE_1", 8)
7212 .
Case(
"SALU_CYCLE_1", 9)
7213 .
Case(
"SALU_CYCLE_2", 10)
7214 .
Case(
"SALU_CYCLE_3", 11)
7222 Delay |=
Value << Shift;
7232 if (!parseDelay(Delay))
7236 if (!parseExpr(Delay))
7240 Operands.push_back(AMDGPUOperand::CreateImm(
this, Delay, S));
7245AMDGPUOperand::isSWaitCnt()
const {
7249bool AMDGPUOperand::isSDelayALU()
const {
return isImm(); }
7255void AMDGPUAsmParser::depCtrError(
SMLoc Loc,
int ErrorId,
7259 Error(Loc,
Twine(
"invalid counter name ", DepCtrName));
7262 Error(Loc,
Twine(DepCtrName,
" is not supported on this GPU"));
7265 Error(Loc,
Twine(
"duplicate counter name ", DepCtrName));
7268 Error(Loc,
Twine(
"invalid value for ", DepCtrName));
7275bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr,
unsigned &UsedOprMask) {
7279 SMLoc DepCtrLoc = getLoc();
7287 if (!parseExpr(ExprVal))
7290 unsigned PrevOprMask = UsedOprMask;
7291 int CntVal =
encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
7294 depCtrError(DepCtrLoc, CntVal, DepCtrName);
7303 Error(getLoc(),
"expected a counter name");
7308 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
7309 DepCtr = (DepCtr & ~CntValMask) | CntVal;
7317 SMLoc Loc = getLoc();
7320 unsigned UsedOprMask = 0;
7322 if (!parseDepCtr(DepCtr, UsedOprMask))
7326 if (!parseExpr(DepCtr))
7330 Operands.push_back(AMDGPUOperand::CreateImm(
this, DepCtr, Loc));
7334bool AMDGPUOperand::isDepCtr()
const {
return isS16Imm(); }
7340ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
7342 OperandInfoTy &Width) {
7349 HwReg.Loc = getLoc();
7352 HwReg.IsSymbolic =
true;
7354 }
else if (!parseExpr(HwReg.Val,
"a register name")) {
7362 if (!skipToken(
AsmToken::Comma,
"expected a comma or a closing parenthesis"))
7366 if (!parseExpr(
Offset.Val))
7372 Width.Loc = getLoc();
7373 if (!parseExpr(Width.Val) ||
7384 SMLoc Loc = getLoc();
7386 StructuredOpField HwReg(
"id",
"hardware register", HwregId::Width,
7388 StructuredOpField
Offset(
"offset",
"bit offset", HwregOffset::Width,
7389 HwregOffset::Default);
7390 struct : StructuredOpField {
7391 using StructuredOpField::StructuredOpField;
7392 bool validate(AMDGPUAsmParser &Parser)
const override {
7394 return Error(Parser,
"only values from 1 to 32 are legal");
7397 } Width(
"size",
"bitfield width", HwregSize::Width, HwregSize::Default);
7401 Res = parseHwregFunc(HwReg,
Offset, Width);
7404 if (!validateStructuredOpFields({&HwReg, &
Offset, &Width}))
7406 ImmVal = HwregEncoding::encode(HwReg.Val,
Offset.Val, Width.Val);
7410 parseExpr(ImmVal,
"a hwreg macro, structured immediate"))
7416 if (!isUInt<16>(ImmVal))
7417 return Error(Loc,
"invalid immediate: only 16-bit values are legal");
7419 AMDGPUOperand::CreateImm(
this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
7423bool AMDGPUOperand::isHwreg()
const {
7424 return isImmTy(ImmTyHwreg);
7432AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
7434 OperandInfoTy &Stream) {
7440 Msg.IsSymbolic =
true;
7442 }
else if (!parseExpr(
Msg.Val,
"a message name")) {
7447 Op.IsDefined =
true;
7452 }
else if (!parseExpr(
Op.Val,
"an operation name")) {
7457 Stream.IsDefined =
true;
7458 Stream.Loc = getLoc();
7459 if (!parseExpr(Stream.Val))
7468AMDGPUAsmParser::validateSendMsg(
const OperandInfoTy &Msg,
7469 const OperandInfoTy &
Op,
7470 const OperandInfoTy &Stream) {
7480 Error(
Msg.Loc,
"specified message id is not supported on this GPU");
7485 Error(
Msg.Loc,
"invalid message id");
7491 Error(
Op.Loc,
"message does not support operations");
7493 Error(
Msg.Loc,
"missing message operation");
7498 Error(
Op.Loc,
"invalid operation id");
7503 Error(Stream.Loc,
"message operation does not support streams");
7507 Error(Stream.Loc,
"invalid message stream id");
7517 SMLoc Loc = getLoc();
7521 OperandInfoTy
Op(OP_NONE_);
7522 OperandInfoTy Stream(STREAM_ID_NONE_);
7523 if (parseSendMsgBody(Msg,
Op, Stream) &&
7524 validateSendMsg(Msg,
Op, Stream)) {
7529 }
else if (parseExpr(ImmVal,
"a sendmsg macro")) {
7530 if (ImmVal < 0 || !isUInt<16>(ImmVal))
7531 return Error(Loc,
"invalid immediate: only 16-bit values are legal");
7536 Operands.push_back(AMDGPUOperand::CreateImm(
this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
7540bool AMDGPUOperand::isSendMsg()
const {
7541 return isImmTy(ImmTySendMsg);
7562 return Error(S,
"invalid interpolation slot");
7564 Operands.push_back(AMDGPUOperand::CreateImm(
this, Slot, S,
7565 AMDGPUOperand::ImmTyInterpSlot));
7576 if (!Str.starts_with(
"attr"))
7577 return Error(S,
"invalid interpolation attribute");
7587 return Error(S,
"invalid or missing interpolation attribute channel");
7589 Str = Str.drop_back(2).drop_front(4);
7592 if (Str.getAsInteger(10, Attr))
7593 return Error(S,
"invalid or missing interpolation attribute number");
7596 return Error(S,
"out of bounds interpolation attribute number");
7600 Operands.push_back(AMDGPUOperand::CreateImm(
this, Attr, S,
7601 AMDGPUOperand::ImmTyInterpAttr));
7602 Operands.push_back(AMDGPUOperand::CreateImm(
7603 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
7622 return Error(S, (
Id == ET_INVALID)
7623 ?
"invalid exp target"
7624 :
"exp target is not supported on this GPU");
7626 Operands.push_back(AMDGPUOperand::CreateImm(
this,
Id, S,
7627 AMDGPUOperand::ImmTyExpTgt));
7642 return isId(getToken(),
Id);
7647 return getTokenKind() ==
Kind;
7650StringRef AMDGPUAsmParser::getId()
const {
7677 if (isId(
Id) && peekToken().is(Kind)) {
7687 if (isToken(Kind)) {
7697 if (!trySkipToken(Kind)) {
7698 Error(getLoc(), ErrMsg);
7709 if (Parser.parseExpression(Expr))
7712 if (Expr->evaluateAsAbsolute(Imm))
7716 Error(S,
"expected absolute expression");
7719 Twine(
" or an absolute expression"));
7729 if (Parser.parseExpression(Expr))
7733 if (Expr->evaluateAsAbsolute(IntVal)) {
7734 Operands.push_back(AMDGPUOperand::CreateImm(
this, IntVal, S));
7736 Operands.push_back(AMDGPUOperand::CreateExpr(
this, Expr, S));
7744 Val = getToken().getStringContents();
7748 Error(getLoc(), ErrMsg);
7756 Val = getTokenStr();
7760 if (!ErrMsg.
empty())
7761 Error(getLoc(), ErrMsg);
7767AMDGPUAsmParser::getToken()
const {
7768 return Parser.getTok();
7771AsmToken AMDGPUAsmParser::peekToken(
bool ShouldSkipSpace) {
7774 : getLexer().peekTok(ShouldSkipSpace);
7779 auto TokCount = getLexer().peekTokens(Tokens);
7786AMDGPUAsmParser::getTokenKind()
const {
7791AMDGPUAsmParser::getLoc()
const {
7792 return getToken().getLoc();
7796AMDGPUAsmParser::getTokenStr()
const {
7797 return getToken().getString();
7801AMDGPUAsmParser::lex() {
7806 return ((AMDGPUOperand &)*
Operands[0]).getStartLoc();
7810AMDGPUAsmParser::getOperandLoc(std::function<
bool(
const AMDGPUOperand&)>
Test,
7812 for (
unsigned i =
Operands.size() - 1; i > 0; --i) {
7813 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
7815 return Op.getStartLoc();
7821AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy
Type,
7823 auto Test = [=](
const AMDGPUOperand&
Op) {
return Op.isImmTy(
Type); };
7828AMDGPUAsmParser::getRegLoc(
unsigned Reg,
7830 auto Test = [=](
const AMDGPUOperand&
Op) {
7831 return Op.isRegKind() &&
Op.getReg() ==
Reg;
7837 bool SearchMandatoryLiterals)
const {
7838 auto Test = [](
const AMDGPUOperand&
Op) {
7839 return Op.IsImmKindLiteral() ||
Op.isExpr();
7842 if (SearchMandatoryLiterals && Loc == getInstLoc(
Operands))
7843 Loc = getMandatoryLitLoc(
Operands);
7848 auto Test = [](
const AMDGPUOperand &
Op) {
7849 return Op.IsImmKindMandatoryLiteral();
7856 auto Test = [](
const AMDGPUOperand&
Op) {
7857 return Op.isImmKindConst();
7874 SMLoc IdLoc = getLoc();
7880 find_if(Fields, [
Id](StructuredOpField *
F) {
return F->Id ==
Id; });
7881 if (
I == Fields.
end())
7882 return Error(IdLoc,
"unknown field");
7883 if ((*I)->IsDefined)
7884 return Error(IdLoc,
"duplicate field");
7887 (*I)->Loc = getLoc();
7888 if (!parseExpr((*I)->Val))
7890 (*I)->IsDefined =
true;
7897bool AMDGPUAsmParser::validateStructuredOpFields(
7899 return all_of(Fields, [
this](
const StructuredOpField *
F) {
7900 return F->validate(*
this);
7911 const unsigned OrMask,
7912 const unsigned XorMask) {
7915 return BITMASK_PERM_ENC |
7916 (AndMask << BITMASK_AND_SHIFT) |
7917 (OrMask << BITMASK_OR_SHIFT) |
7918 (XorMask << BITMASK_XOR_SHIFT);
7922AMDGPUAsmParser::parseSwizzleOperand(int64_t &
Op,
7923 const unsigned MinVal,
7924 const unsigned MaxVal,
7931 if (!parseExpr(
Op)) {
7934 if (Op < MinVal || Op > MaxVal) {
7943AMDGPUAsmParser::parseSwizzleOperands(
const unsigned OpNum, int64_t*
Op,
7944 const unsigned MinVal,
7945 const unsigned MaxVal,
7948 for (
unsigned i = 0; i < OpNum; ++i) {
7949 if (!parseSwizzleOperand(
Op[i], MinVal, MaxVal, ErrMsg, Loc))
7957AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7961 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
7962 "expected a 2-bit lane id")) {
7973AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
7980 if (!parseSwizzleOperand(GroupSize,
7982 "group size must be in the interval [2,32]",
7987 Error(Loc,
"group size must be a power of two");
7990 if (parseSwizzleOperand(LaneIdx,
7992 "lane id must be in the interval [0,group size - 1]",
8001AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
8007 if (!parseSwizzleOperand(GroupSize,
8009 "group size must be in the interval [2,32]",
8014 Error(Loc,
"group size must be a power of two");
8023AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8029 if (!parseSwizzleOperand(GroupSize,
8031 "group size must be in the interval [1,16]",
8036 Error(Loc,
"group size must be a power of two");
8045AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
8053 SMLoc StrLoc = getLoc();
8054 if (!parseString(Ctl)) {
8057 if (Ctl.
size() != BITMASK_WIDTH) {
8058 Error(StrLoc,
"expected a 5-character mask");
8062 unsigned AndMask = 0;
8063 unsigned OrMask = 0;
8064 unsigned XorMask = 0;
8066 for (
size_t i = 0; i < Ctl.
size(); ++i) {
8070 Error(StrLoc,
"invalid mask");
8092AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
8094 SMLoc OffsetLoc = getLoc();
8096 if (!parseExpr(Imm,
"a swizzle macro")) {
8099 if (!isUInt<16>(Imm)) {
8100 Error(OffsetLoc,
"expected a 16-bit offset");
8107AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
8112 SMLoc ModeLoc = getLoc();
8115 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
8116 Ok = parseSwizzleQuadPerm(Imm);
8117 }
else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
8118 Ok = parseSwizzleBitmaskPerm(Imm);
8119 }
else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
8120 Ok = parseSwizzleBroadcast(Imm);
8121 }
else if (trySkipId(IdSymbolic[ID_SWAP])) {
8122 Ok = parseSwizzleSwap(Imm);
8123 }
else if (trySkipId(IdSymbolic[ID_REVERSE])) {
8124 Ok = parseSwizzleReverse(Imm);
8126 Error(ModeLoc,
"expected a swizzle mode");
8129 return Ok && skipToken(
AsmToken::RParen,
"expected a closing parentheses");
8139 if (trySkipId(
"offset")) {
8143 if (trySkipId(
"swizzle")) {
8144 Ok = parseSwizzleMacro(Imm);
8146 Ok = parseSwizzleOffset(Imm);
8150 Operands.push_back(AMDGPUOperand::CreateImm(
this, Imm, S, AMDGPUOperand::ImmTySwizzle));
8158AMDGPUOperand::isSwizzle()
const {
8159 return isImmTy(ImmTySwizzle);
8166int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8180 for (
unsigned ModeId = ID_MIN; ModeId <=
ID_MAX; ++ModeId) {
8181 if (trySkipId(IdSymbolic[ModeId])) {
8188 Error(S, (Imm == 0)?
8189 "expected a VGPR index mode or a closing parenthesis" :
8190 "expected a VGPR index mode");
8195 Error(S,
"duplicate VGPR index mode");
8203 "expected a comma or a closing parenthesis"))
8218 Imm = parseGPRIdxMacro();
8222 if (getParser().parseAbsoluteExpression(Imm))
8224 if (Imm < 0 || !isUInt<4>(Imm))
8225 return Error(S,
"invalid immediate: only 4-bit values are legal");
8229 AMDGPUOperand::CreateImm(
this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
8233bool AMDGPUOperand::isGPRIdxMode()
const {
8234 return isImmTy(ImmTyGprIdxMode);
8246 if (isRegister() || isModifier())
8258 if (
Opr.isExpr() && !
Opr.isSymbolRefExpr()) {
8259 Error(Loc,
"expected an absolute expression or a label");
8260 }
else if (
Opr.isImm() && !
Opr.isS16Imm()) {
8261 Error(Loc,
"expected a 16-bit signed jump offset");
8279void AMDGPUAsmParser::cvtMubufImpl(
MCInst &Inst,
8282 OptionalImmIndexMap OptionalIdx;
8283 unsigned FirstOperandIdx = 1;
8284 bool IsAtomicReturn =
false;
8291 for (
unsigned i = FirstOperandIdx, e =
Operands.size(); i != e; ++i) {
8292 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
8296 Op.addRegOperands(Inst, 1);
8300 if (IsAtomicReturn && i == FirstOperandIdx)
8301 Op.addRegOperands(Inst, 1);
8306 if (
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
8307 Op.addImmOperands(Inst, 1);
8319 OptionalIdx[
Op.getImmTy()] = i;
8330bool AMDGPUOperand::isSMRDOffset8()
const {
8331 return isImmLiteral() && isUInt<8>(getImm());
8334bool AMDGPUOperand::isSMEMOffset()
const {
8336 return isImmLiteral();
8339bool AMDGPUOperand::isSMRDLiteralOffset()
const {
8342 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
8374bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
8375 if (BoundCtrl == 0 || BoundCtrl == 1) {
8383void AMDGPUAsmParser::onBeginOfFile() {
8384 if (!getParser().getStreamer().getTargetStreamer() ||
8388 if (!getTargetStreamer().getTargetID())
8389 getTargetStreamer().initializeTargetID(getSTI(),
8390 getSTI().getFeatureString());
8393 getTargetStreamer().EmitDirectiveAMDGCNTarget();
8401bool AMDGPUAsmParser::parsePrimaryExpr(
const MCExpr *&Res,
SMLoc &EndLoc) {
8407 .
Case(
"max", AGVK::AGVK_Max)
8408 .
Case(
"or", AGVK::AGVK_Or)
8418 if (Exprs.
empty()) {
8419 Error(getToken().getLoc(),
8420 "empty " +
Twine(TokenId) +
" expression");
8423 if (CommaCount + 1 != Exprs.
size()) {
8424 Error(getToken().getLoc(),
8425 "mismatch of commas in " +
Twine(TokenId) +
" expression");
8432 if (getParser().parseExpression(Expr, EndLoc))
8436 if (LastTokenWasComma)
8439 Error(getToken().getLoc(),
8440 "unexpected token in " +
Twine(TokenId) +
" expression");
8446 return getParser().parsePrimaryExpr(Res, EndLoc,
nullptr);
8451 if (
Name ==
"mul") {
8452 return parseIntWithPrefix(
"mul",
Operands,
8456 if (
Name ==
"div") {
8457 return parseIntWithPrefix(
"div",
Operands,
8473 const int Ops[] = { AMDGPU::OpName::src0,
8474 AMDGPU::OpName::src1,
8475 AMDGPU::OpName::src2 };
8490 if (
DstOp.isReg() &&
8491 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(
DstOp.
getReg())) {
8495 if ((OpSel & (1 << SrcNum)) != 0)
8501void AMDGPUAsmParser::cvtVOP3OpSel(
MCInst &Inst,
8508 OptionalImmIndexMap &OptionalIdx) {
8509 cvtVOP3P(Inst,
Operands, OptionalIdx);
8518 &&
Desc.NumOperands > (OpNum + 1)
8520 &&
Desc.operands()[OpNum + 1].RegClass != -1
8522 &&
Desc.getOperandConstraint(OpNum + 1,
8523 MCOI::OperandConstraint::TIED_TO) == -1;
8528 OptionalImmIndexMap OptionalIdx;
8533 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
8534 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
8537 for (
unsigned E =
Operands.size();
I != E; ++
I) {
8538 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
8540 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8541 }
else if (
Op.isInterpSlot() ||
Op.isInterpAttr() ||
8542 Op.isInterpAttrChan()) {
8544 }
else if (
Op.isImmModifier()) {
8545 OptionalIdx[
Op.getImmTy()] =
I;
8553 AMDGPUOperand::ImmTyHigh);
8557 AMDGPUOperand::ImmTyClampSI);
8561 AMDGPUOperand::ImmTyOModSI);
8566 OptionalImmIndexMap OptionalIdx;
8571 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
8572 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
8575 for (
unsigned E =
Operands.size();
I != E; ++
I) {
8576 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
8578 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8579 }
else if (
Op.isImmModifier()) {
8580 OptionalIdx[
Op.getImmTy()] =
I;
8597 const int Ops[] = { AMDGPU::OpName::src0,
8598 AMDGPU::OpName::src1,
8599 AMDGPU::OpName::src2 };
8600 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8601 AMDGPU::OpName::src1_modifiers,
8602 AMDGPU::OpName::src2_modifiers };
8606 for (
int J = 0; J < 3; ++J) {
8614 if ((OpSel & (1 << J)) != 0)
8616 if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
8617 (OpSel & (1 << 3)) != 0)
8625 OptionalImmIndexMap &OptionalIdx) {
8630 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
8631 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
8634 for (
unsigned E =
Operands.size();
I != E; ++
I) {
8635 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
8637 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8638 }
else if (
Op.isImmModifier()) {
8639 OptionalIdx[
Op.getImmTy()] =
I;
8640 }
else if (
Op.isRegOrImm()) {
8641 Op.addRegOrImmOperands(Inst, 1);
8649 AMDGPUOperand::ImmTyClampSI);
8653 AMDGPUOperand::ImmTyOModSI);
8660 auto it = Inst.
begin();
8670 OptionalImmIndexMap OptionalIdx;
8671 cvtVOP3(Inst,
Operands, OptionalIdx);
8675 OptionalImmIndexMap &OptIdx) {
8681 if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
8682 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
8683 Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_gfx12 ||
8684 Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_gfx12) {
8692 !(Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp_gfx12 ||
8693 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp_gfx12 ||
8694 Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp8_gfx12 ||
8695 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp8_gfx12)) {
8704 if (OpSelIdx != -1) {
8709 if (OpSelHiIdx != -1) {
8723 const int Ops[] = { AMDGPU::OpName::src0,
8724 AMDGPU::OpName::src1,
8725 AMDGPU::OpName::src2 };
8726 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8727 AMDGPU::OpName::src1_modifiers,
8728 AMDGPU::OpName::src2_modifiers };
8731 unsigned OpSelHi = 0;
8738 if (OpSelHiIdx != -1)
8747 for (
int J = 0; J < 3; ++J) {
8760 if (
SrcOp.isReg() && getMRI()
8767 if ((OpSel & (1 << J)) != 0)
8771 if ((OpSelHi & (1 << J)) != 0)
8774 if ((NegLo & (1 << J)) != 0)
8777 if ((NegHi & (1 << J)) != 0)
8785 OptionalImmIndexMap OptIdx;
8791 unsigned i,
unsigned Opc,
unsigned OpName) {
8793 ((AMDGPUOperand &)*
Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
8795 ((AMDGPUOperand &)*
Operands[i]).addRegOperands(Inst, 1);
8801 ((AMDGPUOperand &)*
Operands[1]).addRegOperands(Inst, 1);
8804 ((AMDGPUOperand &)*
Operands[1]).addRegOperands(Inst, 1);
8805 ((AMDGPUOperand &)*
Operands[4]).addRegOperands(Inst, 1);
8807 OptionalImmIndexMap OptIdx;
8808 for (
unsigned i = 5; i <
Operands.size(); ++i) {
8809 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
8810 OptIdx[
Op.getImmTy()] = i;
8815 AMDGPUOperand::ImmTyIndexKey8bit);
8819 AMDGPUOperand::ImmTyIndexKey16bit);
8839 Operands.push_back(AMDGPUOperand::CreateToken(
this,
"::", S));
8840 SMLoc OpYLoc = getLoc();
8843 Operands.push_back(AMDGPUOperand::CreateToken(
this, OpYName, OpYLoc));
8846 return Error(OpYLoc,
"expected a VOPDY instruction after ::");
8853 auto addOp = [&](
uint16_t ParsedOprIdx) {
8854 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[ParsedOprIdx]);
8856 Op.addRegOperands(Inst, 1);
8860 Op.addImmOperands(Inst, 1);
8872 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
8876 const auto &CInfo = InstInfo[CompIdx];
8877 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
8878 for (
unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
8879 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
8880 if (CInfo.hasSrc2Acc())
8881 addOp(CInfo.getIndexOfDstInParsedOperands());
8889bool AMDGPUOperand::isDPP8()
const {
8890 return isImmTy(ImmTyDPP8);
8893bool AMDGPUOperand::isDPPCtrl()
const {
8894 using namespace AMDGPU::DPP;
8896 bool result =
isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8898 int64_t
Imm = getImm();
8899 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8900 (
Imm >= DppCtrl::ROW_SHL_FIRST &&
Imm <= DppCtrl::ROW_SHL_LAST) ||
8901 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8902 (
Imm >= DppCtrl::ROW_ROR_FIRST &&
Imm <= DppCtrl::ROW_ROR_LAST) ||
8903 (Imm == DppCtrl::WAVE_SHL1) ||
8904 (
Imm == DppCtrl::WAVE_ROL1) ||
8905 (Imm == DppCtrl::WAVE_SHR1) ||
8906 (
Imm == DppCtrl::WAVE_ROR1) ||
8907 (Imm == DppCtrl::ROW_MIRROR) ||
8908 (
Imm == DppCtrl::ROW_HALF_MIRROR) ||
8909 (Imm == DppCtrl::BCAST15) ||
8910 (
Imm == DppCtrl::BCAST31) ||
8911 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8912 (
Imm >= DppCtrl::ROW_XMASK_FIRST &&
Imm <= DppCtrl::ROW_XMASK_LAST);
8921bool AMDGPUOperand::isBLGP()
const {
8922 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8925bool AMDGPUOperand::isCBSZ()
const {
8926 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
8929bool AMDGPUOperand::isABID()
const {
8930 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
8933bool AMDGPUOperand::isS16Imm()
const {
8934 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8937bool AMDGPUOperand::isU16Imm()
const {
8938 return isImmLiteral() && isUInt<16>(getImm());
8945bool AMDGPUAsmParser::parseDimId(
unsigned &Encoding) {
8950 SMLoc Loc = getToken().getEndLoc();
8951 Token = std::string(getTokenStr());
8953 if (getLoc() != Loc)
8958 if (!parseId(Suffix))
8984 SMLoc Loc = getLoc();
8985 if (!parseDimId(Encoding))
8986 return Error(Loc,
"invalid dim value");
8988 Operands.push_back(AMDGPUOperand::CreateImm(
this, Encoding, S,
8989 AMDGPUOperand::ImmTyDim));
9007 if (!skipToken(
AsmToken::LBrac,
"expected an opening square bracket"))
9010 for (
size_t i = 0; i < 8; ++i) {
9014 SMLoc Loc = getLoc();
9015 if (getParser().parseAbsoluteExpression(Sels[i]))
9017 if (0 > Sels[i] || 7 < Sels[i])
9018 return Error(Loc,
"expected a 3-bit value");
9025 for (
size_t i = 0; i < 8; ++i)
9026 DPP8 |= (Sels[i] << (i * 3));
9028 Operands.push_back(AMDGPUOperand::CreateImm(
this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
9033AMDGPUAsmParser::isSupportedDPPCtrl(
StringRef Ctrl,
9035 if (Ctrl ==
"row_newbcast")
9038 if (Ctrl ==
"row_share" ||
9039 Ctrl ==
"row_xmask")
9042 if (Ctrl ==
"wave_shl" ||
9043 Ctrl ==
"wave_shr" ||
9044 Ctrl ==
"wave_rol" ||
9045 Ctrl ==
"wave_ror" ||
9046 Ctrl ==
"row_bcast")
9049 return Ctrl ==
"row_mirror" ||
9050 Ctrl ==
"row_half_mirror" ||
9051 Ctrl ==
"quad_perm" ||
9052 Ctrl ==
"row_shl" ||
9053 Ctrl ==
"row_shr" ||
9058AMDGPUAsmParser::parseDPPCtrlPerm() {
9061 if (!skipToken(
AsmToken::LBrac,
"expected an opening square bracket"))
9065 for (
int i = 0; i < 4; ++i) {
9070 SMLoc Loc = getLoc();
9071 if (getParser().parseAbsoluteExpression(Temp))
9073 if (Temp < 0 || Temp > 3) {
9074 Error(Loc,
"expected a 2-bit value");
9078 Val += (Temp << i * 2);
9088AMDGPUAsmParser::parseDPPCtrlSel(
StringRef Ctrl) {
9089 using namespace AMDGPU::DPP;
9094 SMLoc Loc = getLoc();
9096 if (getParser().parseAbsoluteExpression(Val))
9099 struct DppCtrlCheck {
9106 .
Case(
"wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
9107 .Case(
"wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
9108 .Case(
"wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
9109 .Case(
"wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
9110 .Case(
"row_shl", {DppCtrl::ROW_SHL0, 1, 15})
9111 .Case(
"row_shr", {DppCtrl::ROW_SHR0, 1, 15})
9112 .Case(
"row_ror", {DppCtrl::ROW_ROR0, 1, 15})
9113 .Case(
"row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
9114 .Case(
"row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
9115 .Case(
"row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
9119 if (
Check.Ctrl == -1) {
9120 Valid = (
Ctrl ==
"row_bcast" && (Val == 15 || Val == 31));
9121 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
9136 using namespace AMDGPU::DPP;
9139 !isSupportedDPPCtrl(getTokenStr(),
Operands))
9148 if (Ctrl ==
"row_mirror") {
9149 Val = DppCtrl::ROW_MIRROR;
9150 }
else if (Ctrl ==
"row_half_mirror") {
9151 Val = DppCtrl::ROW_HALF_MIRROR;
9154 if (Ctrl ==
"quad_perm") {
9155 Val = parseDPPCtrlPerm();
9157 Val = parseDPPCtrlSel(Ctrl);
9166 AMDGPUOperand::CreateImm(
this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
9172 OptionalImmIndexMap OptionalIdx;
9182 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
9186 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9187 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
9191 for (
unsigned E =
Operands.size();
I != E; ++
I) {
9195 if (OldIdx == NumOperands) {
9197 constexpr int DST_IDX = 0;
9199 }
else if (Src2ModIdx == NumOperands) {
9210 bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp8_gfx12 ||
9211 Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp8_gfx12 ||
9212 Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp_gfx12 ||
9213 Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp_gfx12;
9214 if (IsVOP3CvtSrDpp) {
9228 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
9230 if (IsDPP8 &&
Op.isDppFI()) {
9233 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9234 }
else if (
Op.isReg()) {
9235 Op.addRegOperands(Inst, 1);
9236 }
else if (
Op.isImm() &&
9238 assert(!
Op.IsImmKindLiteral() &&
"Cannot use literal with DPP");
9239 Op.addImmOperands(Inst, 1);
9240 }
else if (
Op.isImm()) {
9241 OptionalIdx[
Op.getImmTy()] =
I;
9253 cvtVOP3P(Inst,
Operands, OptionalIdx);
9255 cvtVOP3OpSel(Inst,
Operands, OptionalIdx);
9272 AMDGPUOperand::ImmTyDppFI);
9277 OptionalImmIndexMap OptionalIdx;
9281 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9282 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
9286 for (
unsigned E =
Operands.size();
I != E; ++
I) {
9294 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
9296 if (
Op.isReg() && validateVccOperand(
Op.getReg())) {
9304 Op.addImmOperands(Inst, 1);
9306 Op.addRegWithFPInputModsOperands(Inst, 2);
9307 }
else if (
Op.isDppFI()) {
9309 }
else if (
Op.isReg()) {
9310 Op.addRegOperands(Inst, 1);
9316 Op.addRegWithFPInputModsOperands(Inst, 2);
9317 }
else if (
Op.isReg()) {
9318 Op.addRegOperands(Inst, 1);
9319 }
else if (
Op.isDPPCtrl()) {
9320 Op.addImmOperands(Inst, 1);
9321 }
else if (
Op.isImm()) {
9323 OptionalIdx[
Op.getImmTy()] =
I;
9339 AMDGPUOperand::ImmTyDppFI);
9350 AMDGPUOperand::ImmTy
Type) {
9363 .
Case(
"BYTE_0", SdwaSel::BYTE_0)
9364 .
Case(
"BYTE_1", SdwaSel::BYTE_1)
9365 .
Case(
"BYTE_2", SdwaSel::BYTE_2)
9366 .
Case(
"BYTE_3", SdwaSel::BYTE_3)
9367 .
Case(
"WORD_0", SdwaSel::WORD_0)
9368 .
Case(
"WORD_1", SdwaSel::WORD_1)
9369 .
Case(
"DWORD", SdwaSel::DWORD)
9372 if (
Int == 0xffffffff)
9373 return Error(StringLoc,
"invalid " +
Twine(Prefix) +
" value");
9392 .
Case(
"UNUSED_PAD", DstUnused::UNUSED_PAD)
9393 .
Case(
"UNUSED_SEXT", DstUnused::UNUSED_SEXT)
9394 .
Case(
"UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
9397 if (
Int == 0xffffffff)
9398 return Error(StringLoc,
"invalid dst_unused value");
9400 Operands.push_back(AMDGPUOperand::CreateImm(
this,
Int, S, AMDGPUOperand::ImmTySDWADstUnused));
9430 OptionalImmIndexMap OptionalIdx;
9431 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
9432 bool SkippedVcc =
false;
9436 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9437 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
9440 for (
unsigned E =
Operands.size();
I != E; ++
I) {
9441 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
9442 if (SkipVcc && !SkippedVcc &&
Op.isReg() &&
9443 (
Op.getReg() == AMDGPU::VCC ||
Op.getReg() == AMDGPU::VCC_LO)) {
9461 Op.addRegOrImmWithInputModsOperands(Inst, 2);
9462 }
else if (
Op.isImm()) {
9464 OptionalIdx[
Op.getImmTy()] =
I;
9472 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
9473 Opc != AMDGPU::V_NOP_sdwa_vi) {
9475 switch (BasicInstType) {
9479 AMDGPUOperand::ImmTyClampSI, 0);
9483 AMDGPUOperand::ImmTyOModSI, 0);
9487 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9491 AMDGPUOperand::ImmTySDWADstUnused,
9492 DstUnused::UNUSED_PRESERVE);
9517 llvm_unreachable(
"Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
9523 if (Inst.
getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
9524 Inst.
getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
9525 auto it = Inst.
begin();
9538#define GET_REGISTER_MATCHER
9539#define GET_MATCHER_IMPLEMENTATION
9540#define GET_MNEMONIC_SPELL_CHECKER
9541#define GET_MNEMONIC_CHECKER
9542#include "AMDGPUGenAsmMatcher.inc"
9548 return parseTokenOp(
"addr64",
Operands);
9550 return parseTokenOp(
"done",
Operands);
9552 return parseTokenOp(
"idxen",
Operands);
9554 return parseTokenOp(
"lds",
Operands);
9556 return parseTokenOp(
"offen",
Operands);
9558 return parseTokenOp(
"off",
Operands);
9560 return parseTokenOp(
"row_en",
Operands);
9562 return parseNamedBit(
"gds",
Operands, AMDGPUOperand::ImmTyGDS);
9564 return parseNamedBit(
"tfe",
Operands, AMDGPUOperand::ImmTyTFE);
9566 return tryCustomParseOperand(
Operands, MCK);
9577 AMDGPUOperand &Operand = (AMDGPUOperand&)
Op;
9580 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
9582 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
9584 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
9586 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
9588 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
9590 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
9598 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
9600 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
9601 case MCK_SOPPBrTarget:
9602 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
9603 case MCK_VReg32OrOff:
9604 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
9605 case MCK_InterpSlot:
9606 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
9607 case MCK_InterpAttr:
9608 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
9609 case MCK_InterpAttrChan:
9610 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
9612 case MCK_SReg_64_XEXEC:
9618 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
9620 return Match_InvalidOperand;
9632 if (!parseExpr(Imm)) {
9637 if (!isUInt<16>(Imm))
9638 return Error(S,
"expected a 16-bit value");
9641 AMDGPUOperand::CreateImm(
this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
9645bool AMDGPUOperand::isEndpgm()
const {
return isImmTy(ImmTyEndpgm); }
9651bool AMDGPUOperand::isWaitVDST()
const {
9652 return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm());
9655bool AMDGPUOperand::isWaitVAVDst()
const {
9656 return isImmTy(ImmTyWaitVAVDst) && isUInt<4>(getImm());
9659bool AMDGPUOperand::isWaitVMVSrc()
const {
9660 return isImmTy(ImmTyWaitVMVSrc) && isUInt<1>(getImm());
9667bool AMDGPUOperand::isWaitEXP()
const {
9668 return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm());
9675bool AMDGPUOperand::isSplitBarrier()
const {
return isInlinableImm(MVT::i32); }
unsigned const MachineRegisterInfo * MRI
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static unsigned getSpecialRegForName(StringRef RegName)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, unsigned OpName)
static bool IsRevOpcode(const unsigned Opcode)
static int getRegClass(RegisterKind Is, unsigned RegWidth)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, const MCRegisterInfo *MRI)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
#define LLVM_EXTERNAL_VISIBILITY
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static unsigned getOperandSize(MachineInstr &MI, unsigned Idx, MachineRegisterInfo &MRI)
static llvm::Expected< InlineInfo > decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
#define G_00B848_FWD_PROGRESS(x)
#define G_00B848_MEM_ORDERED(x)
#define G_00B848_IEEE_MODE(x)
#define G_00B848_DX10_CLAMP(x)
#define G_00B848_WGP_MODE(x)
Interface definition for SIInstrInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
unsigned unsigned DefaultVal
This file implements the SmallBitVector class.
StringSet - A set-like wrapper for the StringMap.
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
support::ulittle16_t & Lo
support::ulittle16_t & Hi
static const AMDGPUVariadicMCExpr * create(VariadicKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
Target independent representation for an assembler token.
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
bool is(TokenKind K) const
TokenKind getKind() const
This class represents an Operation in the Expression.
Base class for user error types.
Lightweight error class with error context and mandatory checking.
Tagged union holding either a T or a Error.
Class representing an expression and its matching format.
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
MCAsmParser & getParser()
Generic assembler parser interface, for use by target specific assembly parsers.
virtual MCStreamer & getStreamer()=0
Return the output streamer for the assembler.
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Context object for machine code objects.
const MCRegisterInfo * getRegisterInfo() const
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
const MCSubtargetInfo * getSubtargetInfo() const
Base class for the full range of assembler expressions which are needed for parsing.
Instances of this class represent a single low-level machine instruction.
unsigned getNumOperands() const
unsigned getOpcode() const
iterator insert(iterator I, const MCOperand &Op)
void addOperand(const MCOperand Op)
const MCOperand & getOperand(unsigned i) const
Describe properties that are true of each instruction in the target description file.
bool mayStore() const
Return true if this instruction could possibly modify memory.
Interface to description of machine instruction set.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Instances of this class represent operands of the MCInst class.
static MCOperand createReg(unsigned Reg)
static MCOperand createExpr(const MCExpr *Val)
void setReg(unsigned Reg)
Set the register number.
static MCOperand createImm(int64_t Val)
unsigned getReg() const
Returns the register number.
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
virtual bool isReg() const =0
isReg - Is this a register operand?
virtual bool isMem() const =0
isMem - Is this a memory operand?
virtual MCRegister getReg() const =0
virtual bool isToken() const =0
isToken - Is this a token operand?
virtual bool isImm() const =0
isImm - Is this an immediate operand?
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
unsigned getRegister(unsigned i) const
getRegister - Return the specified register in the class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
static constexpr unsigned NoRegister
Streaming machine code generation interface.
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
MCTargetStreamer * getTargetStreamer()
Generic base class for all target subtargets.
const FeatureBitset & getFeatureBits() const
FeatureBitset ToggleFeature(uint64_t FB)
Toggle a feature and return the re-computed feature bits.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
void setVariableValue(const MCExpr *Value)
MCTargetAsmParser - Generic interface to target specific assembly parsers.
MCSubtargetInfo & copySTI()
Create a copy of STI and return a non-const reference to it.
virtual bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
virtual bool ParseDirective(AsmToken DirectiveID)
ParseDirective - Parse a target specific assembler directive This method is deprecated,...
virtual ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
tryParseRegister - parse one register if possible
void setAvailableFeatures(const FeatureBitset &Value)
const MCSubtargetInfo & getSTI() const
virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, unsigned Kind)
Allow a target to add special case operand matching for things that tblgen doesn't/can't handle effec...
virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands)=0
ParseInstruction - Parse one assembly instruction.
virtual unsigned checkTargetMatchPredicate(MCInst &Inst)
checkTargetMatchPredicate - Validate the instruction match against any complex target predicates not ...
virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm)=0
MatchAndEmitInstruction - Recognize a series of operands of a parsed instruction as an actual MCInst ...
Target specific streamer interface.
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
Wrapper class representing virtual and physical registers.
Represents a location in source code.
static SMLoc getFromPointer(const char *Ptr)
constexpr const char * getPointer() const
constexpr bool isValid() const
Represents a range in source code.
Implements a dense probed hash-table based set with some number of buckets stored inline.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringMapEntry - This is used to represent one value that is inserted into a StringMap.
StringRef - Represent a constant reference to a string, i.e.
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
empty - Check if the string is empty.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
constexpr size_t size() const
size - Get the string size.
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
StringSet - A wrapper for StringMap that provides set-like functionality.
bool contains(StringRef key) const
Check if the set contains the given key.
std::pair< typename Base::iterator, bool > insert(StringRef key)
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
A raw_ostream that writes to an SmallVector or SmallString.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char NumVGPRs[]
Key for Kernel::CodeProps::Metadata::mNumVGPRs.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
const CustomOperand< const MCSubtargetInfo & > Opr[]
int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI)
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
@ FIXED_NUM_SGPRS_FOR_INIT_BUG
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, std::optional< bool > EnableWavefrontSize32)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
int64_t getMsgOpId(int64_t MsgId, const StringRef Name)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
const CustomOperand< const MCSubtargetInfo & > Msg[]
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
constexpr unsigned COMPONENTS[]
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
LLVM_READNONE bool isLegalDPALU_DPPControl(unsigned DC)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const MCSubtargetInfo *STI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
unsigned mc2PseudoReg(unsigned Reg)
Convert hardware register Reg to a pseudo register.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isDPALU_DPP(const MCInstrDesc &OpDesc)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
bool isGFX9(const MCSubtargetInfo &STI)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
bool hasMAIInsts(const MCSubtargetInfo &STI)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
bool isSGPR(unsigned Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
bool isHi(unsigned Reg, const MCRegisterInfo &MRI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isInlineValue(unsigned Reg)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_V2INT32
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_INLINE_AC_V2FP16
@ OPERAND_REG_IMM_INT32
Operands with register or 32-bit immediate.
@ OPERAND_REG_IMM_BF16_DEFERRED
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_AC_BF16
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_INLINE_AC_INT16
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_V2INT16
@ OPERAND_REG_INLINE_AC_FP16
@ OPERAND_REG_INLINE_AC_INT32
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_INLINE_AC_V2BF16
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_REG_INLINE_C_V2FP32
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
@ OPERAND_REG_IMM_FP32_DEFERRED
@ OPERAND_REG_IMM_FP16_DEFERRED
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isVI(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ UNDEF
UNDEF - An undefined node.
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
Reg
All possible values of the reg field in the ModR/M byte.
std::optional< const char * > toString(const std::optional< DWARFFormValue > &V)
Take an optional DWARFFormValue and try to extract a string value from it.
Scope
Defines the scope in which this symbol should be visible: Default – Visible in the public interface o...
This is an optimization pass for GlobalISel generic memory operations.
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
int popcount(T Value) noexcept
Count the number of set bits in a value.
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
void PrintError(const Twine &Msg)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Target & getTheR600Target()
The target for R600 GPUs.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Target & getTheGCNTarget()
The target for GCN GPUs.
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
unsigned M0(unsigned Val)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool parseAmdKernelCodeField(StringRef ID, MCAsmParser &Parser, amd_kernel_code_t &C, raw_ostream &Err)
AMD Kernel Code Object (amd_kernel_code_t).
Instruction set architecture version.
const MCExpr * compute_pgm_rsrc2
const MCExpr * kernarg_size
const MCExpr * kernarg_preload
const MCExpr * compute_pgm_rsrc3
const MCExpr * private_segment_fixed_size
const MCExpr * compute_pgm_rsrc1
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
const MCExpr * group_segment_fixed_size
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
const MCExpr * kernel_code_properties
Represents the counter values to wait for in an s_waitcnt instruction.
static const fltSemantics & IEEEsingle() LLVM_READNONE
static const fltSemantics & IEEEhalf() LLVM_READNONE
static const fltSemantics & BFloat() LLVM_READNONE
opStatus
IEEE-754R 7: Default exception handling.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Description of the encoding of one expression Op.
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...
uint32_t group_segment_fixed_size
uint32_t private_segment_fixed_size