51enum RegisterKind { IS_UNKNOWN,
IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
65 SMLoc StartLoc, EndLoc;
66 const AMDGPUAsmParser *AsmParser;
69 AMDGPUOperand(KindTy Kind_,
const AMDGPUAsmParser *AsmParser_)
70 :
Kind(Kind_), AsmParser(AsmParser_) {}
72 using Ptr = std::unique_ptr<AMDGPUOperand>;
80 bool hasFPModifiers()
const {
return Abs || Neg; }
81 bool hasIntModifiers()
const {
return Sext; }
82 bool hasModifiers()
const {
return hasFPModifiers() || hasIntModifiers(); }
84 int64_t getFPModifiersOperand()
const {
91 int64_t getIntModifiersOperand()
const {
97 int64_t getModifiersOperand()
const {
98 assert(!(hasFPModifiers() && hasIntModifiers())
99 &&
"fp and int modifiers should not be used simultaneously");
100 if (hasFPModifiers()) {
101 return getFPModifiersOperand();
102 }
else if (hasIntModifiers()) {
103 return getIntModifiersOperand();
181 ImmKindTyMandatoryLiteral,
195 mutable ImmKindTy
Kind;
212 bool isToken()
const override {
return Kind == Token; }
214 bool isSymbolRefExpr()
const {
215 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
218 bool isImm()
const override {
219 return Kind == Immediate;
222 void setImmKindNone()
const {
224 Imm.Kind = ImmKindTyNone;
227 void setImmKindLiteral()
const {
229 Imm.Kind = ImmKindTyLiteral;
232 void setImmKindMandatoryLiteral()
const {
234 Imm.Kind = ImmKindTyMandatoryLiteral;
237 void setImmKindConst()
const {
239 Imm.Kind = ImmKindTyConst;
242 bool IsImmKindLiteral()
const {
243 return isImm() &&
Imm.Kind == ImmKindTyLiteral;
246 bool IsImmKindMandatoryLiteral()
const {
247 return isImm() &&
Imm.Kind == ImmKindTyMandatoryLiteral;
250 bool isImmKindConst()
const {
251 return isImm() &&
Imm.Kind == ImmKindTyConst;
254 bool isInlinableImm(
MVT type)
const;
255 bool isLiteralImm(
MVT type)
const;
257 bool isRegKind()
const {
261 bool isReg()
const override {
262 return isRegKind() && !hasModifiers();
265 bool isRegOrInline(
unsigned RCID,
MVT type)
const {
266 return isRegClass(RCID) || isInlinableImm(type);
270 return isRegOrInline(RCID, type) || isLiteralImm(type);
273 bool isRegOrImmWithInt16InputMods()
const {
277 bool isRegOrImmWithIntT16InputMods()
const {
281 bool isRegOrImmWithInt32InputMods()
const {
285 bool isRegOrInlineImmWithInt16InputMods()
const {
286 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
289 bool isRegOrInlineImmWithInt32InputMods()
const {
290 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
293 bool isRegOrImmWithInt64InputMods()
const {
297 bool isRegOrImmWithFP16InputMods()
const {
301 bool isRegOrImmWithFPT16InputMods()
const {
305 bool isRegOrImmWithFP32InputMods()
const {
309 bool isRegOrImmWithFP64InputMods()
const {
313 bool isRegOrInlineImmWithFP16InputMods()
const {
314 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16);
317 bool isRegOrInlineImmWithFP32InputMods()
const {
318 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
322 bool isVReg()
const {
323 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
324 isRegClass(AMDGPU::VReg_64RegClassID) ||
325 isRegClass(AMDGPU::VReg_96RegClassID) ||
326 isRegClass(AMDGPU::VReg_128RegClassID) ||
327 isRegClass(AMDGPU::VReg_160RegClassID) ||
328 isRegClass(AMDGPU::VReg_192RegClassID) ||
329 isRegClass(AMDGPU::VReg_256RegClassID) ||
330 isRegClass(AMDGPU::VReg_512RegClassID) ||
331 isRegClass(AMDGPU::VReg_1024RegClassID);
334 bool isVReg32()
const {
335 return isRegClass(AMDGPU::VGPR_32RegClassID);
338 bool isVReg32OrOff()
const {
339 return isOff() || isVReg32();
342 bool isNull()
const {
343 return isRegKind() &&
getReg() == AMDGPU::SGPR_NULL;
346 bool isVRegWithInputMods()
const;
347 bool isT16VRegWithInputMods()
const;
349 bool isSDWAOperand(
MVT type)
const;
350 bool isSDWAFP16Operand()
const;
351 bool isSDWAFP32Operand()
const;
352 bool isSDWAInt16Operand()
const;
353 bool isSDWAInt32Operand()
const;
355 bool isImmTy(ImmTy ImmT)
const {
359 template <ImmTy Ty>
bool isImmTy()
const {
return isImmTy(Ty); }
361 bool isImmLiteral()
const {
return isImmTy(ImmTyNone); }
363 bool isImmModifier()
const {
364 return isImm() &&
Imm.Type != ImmTyNone;
367 bool isOModSI()
const {
return isImmTy(ImmTyOModSI); }
368 bool isDMask()
const {
return isImmTy(ImmTyDMask); }
369 bool isDim()
const {
return isImmTy(ImmTyDim); }
370 bool isR128A16()
const {
return isImmTy(ImmTyR128A16); }
371 bool isOff()
const {
return isImmTy(ImmTyOff); }
372 bool isExpTgt()
const {
return isImmTy(ImmTyExpTgt); }
373 bool isOffen()
const {
return isImmTy(ImmTyOffen); }
374 bool isIdxen()
const {
return isImmTy(ImmTyIdxen); }
375 bool isAddr64()
const {
return isImmTy(ImmTyAddr64); }
376 bool isOffset()
const {
return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
377 bool isOffset0()
const {
return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
378 bool isOffset1()
const {
return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
379 bool isSMEMOffsetMod()
const {
return isImmTy(ImmTySMEMOffsetMod); }
380 bool isFlatOffset()
const {
return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
381 bool isGDS()
const {
return isImmTy(ImmTyGDS); }
382 bool isLDS()
const {
return isImmTy(ImmTyLDS); }
383 bool isCPol()
const {
return isImmTy(ImmTyCPol); }
384 bool isTFE()
const {
return isImmTy(ImmTyTFE); }
385 bool isFORMAT()
const {
return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
386 bool isDppBankMask()
const {
return isImmTy(ImmTyDppBankMask); }
387 bool isDppRowMask()
const {
return isImmTy(ImmTyDppRowMask); }
388 bool isDppBoundCtrl()
const {
return isImmTy(ImmTyDppBoundCtrl); }
389 bool isDppFI()
const {
return isImmTy(ImmTyDppFI); }
390 bool isSDWADstSel()
const {
return isImmTy(ImmTySDWADstSel); }
391 bool isSDWASrc0Sel()
const {
return isImmTy(ImmTySDWASrc0Sel); }
392 bool isSDWASrc1Sel()
const {
return isImmTy(ImmTySDWASrc1Sel); }
393 bool isSDWADstUnused()
const {
return isImmTy(ImmTySDWADstUnused); }
394 bool isInterpSlot()
const {
return isImmTy(ImmTyInterpSlot); }
395 bool isInterpAttr()
const {
return isImmTy(ImmTyInterpAttr); }
396 bool isInterpAttrChan()
const {
return isImmTy(ImmTyInterpAttrChan); }
397 bool isOpSel()
const {
return isImmTy(ImmTyOpSel); }
398 bool isOpSelHi()
const {
return isImmTy(ImmTyOpSelHi); }
399 bool isNegLo()
const {
return isImmTy(ImmTyNegLo); }
400 bool isNegHi()
const {
return isImmTy(ImmTyNegHi); }
402 bool isRegOrImm()
const {
406 bool isRegClass(
unsigned RCID)
const;
410 bool isRegOrInlineNoMods(
unsigned RCID,
MVT type)
const {
411 return isRegOrInline(RCID, type) && !hasModifiers();
414 bool isSCSrcB16()
const {
415 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
418 bool isSCSrcV2B16()
const {
422 bool isSCSrcB32()
const {
423 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
426 bool isSCSrcB64()
const {
427 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
430 bool isBoolReg()
const;
432 bool isSCSrcF16()
const {
433 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
436 bool isSCSrcV2F16()
const {
440 bool isSCSrcF32()
const {
441 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
444 bool isSCSrcF64()
const {
445 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
448 bool isSSrcB32()
const {
449 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
452 bool isSSrcB16()
const {
453 return isSCSrcB16() || isLiteralImm(MVT::i16);
456 bool isSSrcV2B16()
const {
461 bool isSSrcB64()
const {
464 return isSCSrcB64() || isLiteralImm(MVT::i64);
467 bool isSSrcF32()
const {
468 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
471 bool isSSrcF64()
const {
472 return isSCSrcB64() || isLiteralImm(MVT::f64);
475 bool isSSrcF16()
const {
476 return isSCSrcB16() || isLiteralImm(MVT::f16);
479 bool isSSrcV2F16()
const {
484 bool isSSrcV2FP32()
const {
489 bool isSCSrcV2FP32()
const {
494 bool isSSrcV2INT32()
const {
499 bool isSCSrcV2INT32()
const {
504 bool isSSrcOrLdsB32()
const {
505 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
506 isLiteralImm(MVT::i32) || isExpr();
509 bool isVCSrcB32()
const {
510 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
513 bool isVCSrcB64()
const {
514 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
517 bool isVCSrcTB16()
const {
518 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
521 bool isVCSrcTB16_Lo128()
const {
522 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
525 bool isVCSrcFake16B16_Lo128()
const {
526 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
529 bool isVCSrcB16()
const {
530 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
533 bool isVCSrcV2B16()
const {
537 bool isVCSrcF32()
const {
538 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
541 bool isVCSrcF64()
const {
542 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
545 bool isVCSrcTF16()
const {
546 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
549 bool isVCSrcTF16_Lo128()
const {
550 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
553 bool isVCSrcFake16F16_Lo128()
const {
554 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
557 bool isVCSrcF16()
const {
558 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
561 bool isVCSrcV2F16()
const {
565 bool isVSrcB32()
const {
566 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
569 bool isVSrcB64()
const {
570 return isVCSrcF64() || isLiteralImm(MVT::i64);
573 bool isVSrcTB16()
const {
return isVCSrcTB16() || isLiteralImm(MVT::i16); }
575 bool isVSrcTB16_Lo128()
const {
576 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
579 bool isVSrcFake16B16_Lo128()
const {
580 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
583 bool isVSrcB16()
const {
584 return isVCSrcB16() || isLiteralImm(MVT::i16);
587 bool isVSrcV2B16()
const {
588 return isVSrcB16() || isLiteralImm(MVT::v2i16);
591 bool isVCSrcV2FP32()
const {
595 bool isVSrcV2FP32()
const {
596 return isVSrcF64() || isLiteralImm(MVT::v2f32);
599 bool isVCSrcV2INT32()
const {
603 bool isVSrcV2INT32()
const {
604 return isVSrcB64() || isLiteralImm(MVT::v2i32);
607 bool isVSrcF32()
const {
608 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
611 bool isVSrcF64()
const {
612 return isVCSrcF64() || isLiteralImm(MVT::f64);
615 bool isVSrcTF16()
const {
return isVCSrcTF16() || isLiteralImm(MVT::f16); }
617 bool isVSrcTF16_Lo128()
const {
618 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
621 bool isVSrcFake16F16_Lo128()
const {
622 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
625 bool isVSrcF16()
const {
626 return isVCSrcF16() || isLiteralImm(MVT::f16);
629 bool isVSrcV2F16()
const {
630 return isVSrcF16() || isLiteralImm(MVT::v2f16);
633 bool isVISrcB32()
const {
634 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
637 bool isVISrcB16()
const {
638 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
641 bool isVISrcV2B16()
const {
645 bool isVISrcF32()
const {
646 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
649 bool isVISrcF16()
const {
650 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
653 bool isVISrcV2F16()
const {
654 return isVISrcF16() || isVISrcB32();
657 bool isVISrc_64B64()
const {
658 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
661 bool isVISrc_64F64()
const {
662 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
665 bool isVISrc_64V2FP32()
const {
666 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
669 bool isVISrc_64V2INT32()
const {
670 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
673 bool isVISrc_256B64()
const {
674 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
677 bool isVISrc_256F64()
const {
678 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
681 bool isVISrc_128B16()
const {
682 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
685 bool isVISrc_128V2B16()
const {
686 return isVISrc_128B16();
689 bool isVISrc_128B32()
const {
690 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
693 bool isVISrc_128F32()
const {
694 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
697 bool isVISrc_256V2FP32()
const {
698 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
701 bool isVISrc_256V2INT32()
const {
702 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
705 bool isVISrc_512B32()
const {
706 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
709 bool isVISrc_512B16()
const {
710 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
713 bool isVISrc_512V2B16()
const {
714 return isVISrc_512B16();
717 bool isVISrc_512F32()
const {
718 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
721 bool isVISrc_512F16()
const {
722 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
725 bool isVISrc_512V2F16()
const {
726 return isVISrc_512F16() || isVISrc_512B32();
729 bool isVISrc_1024B32()
const {
730 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
733 bool isVISrc_1024B16()
const {
734 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
737 bool isVISrc_1024V2B16()
const {
738 return isVISrc_1024B16();
741 bool isVISrc_1024F32()
const {
742 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
745 bool isVISrc_1024F16()
const {
746 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
749 bool isVISrc_1024V2F16()
const {
750 return isVISrc_1024F16() || isVISrc_1024B32();
753 bool isAISrcB32()
const {
754 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
757 bool isAISrcB16()
const {
758 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
761 bool isAISrcV2B16()
const {
765 bool isAISrcF32()
const {
766 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
769 bool isAISrcF16()
const {
770 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
773 bool isAISrcV2F16()
const {
774 return isAISrcF16() || isAISrcB32();
777 bool isAISrc_64B64()
const {
778 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
781 bool isAISrc_64F64()
const {
782 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
785 bool isAISrc_128B32()
const {
786 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
789 bool isAISrc_128B16()
const {
790 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
793 bool isAISrc_128V2B16()
const {
794 return isAISrc_128B16();
797 bool isAISrc_128F32()
const {
798 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
801 bool isAISrc_128F16()
const {
802 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
805 bool isAISrc_128V2F16()
const {
806 return isAISrc_128F16() || isAISrc_128B32();
809 bool isVISrc_128F16()
const {
810 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
813 bool isVISrc_128V2F16()
const {
814 return isVISrc_128F16() || isVISrc_128B32();
817 bool isAISrc_256B64()
const {
818 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
821 bool isAISrc_256F64()
const {
822 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
825 bool isAISrc_512B32()
const {
826 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
829 bool isAISrc_512B16()
const {
830 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
833 bool isAISrc_512V2B16()
const {
834 return isAISrc_512B16();
837 bool isAISrc_512F32()
const {
838 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
841 bool isAISrc_512F16()
const {
842 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
845 bool isAISrc_512V2F16()
const {
846 return isAISrc_512F16() || isAISrc_512B32();
849 bool isAISrc_1024B32()
const {
850 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
853 bool isAISrc_1024B16()
const {
854 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
857 bool isAISrc_1024V2B16()
const {
858 return isAISrc_1024B16();
861 bool isAISrc_1024F32()
const {
862 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
865 bool isAISrc_1024F16()
const {
866 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
869 bool isAISrc_1024V2F16()
const {
870 return isAISrc_1024F16() || isAISrc_1024B32();
873 bool isKImmFP32()
const {
874 return isLiteralImm(MVT::f32);
877 bool isKImmFP16()
const {
878 return isLiteralImm(MVT::f16);
881 bool isMem()
const override {
885 bool isExpr()
const {
889 bool isSOPPBrTarget()
const {
return isExpr() ||
isImm(); }
891 bool isSWaitCnt()
const;
892 bool isDepCtr()
const;
893 bool isSDelayALU()
const;
894 bool isHwreg()
const;
895 bool isSendMsg()
const;
896 bool isSwizzle()
const;
897 bool isSMRDOffset8()
const;
898 bool isSMEMOffset()
const;
899 bool isSMRDLiteralOffset()
const;
901 bool isDPPCtrl()
const;
905 bool isGPRIdxMode()
const;
906 bool isS16Imm()
const;
907 bool isU16Imm()
const;
908 bool isEndpgm()
const;
909 bool isWaitVDST()
const;
910 bool isWaitEXP()
const;
912 auto getPredicate(std::function<
bool(
const AMDGPUOperand &
Op)>
P)
const {
913 return std::bind(
P, *
this);
921 int64_t getImm()
const {
926 void setImm(int64_t Val) {
931 ImmTy getImmTy()
const {
936 unsigned getReg()
const override {
941 SMLoc getStartLoc()
const override {
945 SMLoc getEndLoc()
const override {
950 return SMRange(StartLoc, EndLoc);
953 Modifiers getModifiers()
const {
954 assert(isRegKind() || isImmTy(ImmTyNone));
955 return isRegKind() ?
Reg.Mods :
Imm.Mods;
958 void setModifiers(Modifiers Mods) {
959 assert(isRegKind() || isImmTy(ImmTyNone));
966 bool hasModifiers()
const {
967 return getModifiers().hasModifiers();
970 bool hasFPModifiers()
const {
971 return getModifiers().hasFPModifiers();
974 bool hasIntModifiers()
const {
975 return getModifiers().hasIntModifiers();
980 void addImmOperands(
MCInst &Inst,
unsigned N,
bool ApplyModifiers =
true)
const;
982 void addLiteralImmOperand(
MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const;
984 void addRegOperands(
MCInst &Inst,
unsigned N)
const;
986 void addRegOrImmOperands(
MCInst &Inst,
unsigned N)
const {
988 addRegOperands(Inst,
N);
990 addImmOperands(Inst,
N);
993 void addRegOrImmWithInputModsOperands(
MCInst &Inst,
unsigned N)
const {
994 Modifiers Mods = getModifiers();
997 addRegOperands(Inst,
N);
999 addImmOperands(Inst,
N,
false);
1003 void addRegOrImmWithFPInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1004 assert(!hasIntModifiers());
1005 addRegOrImmWithInputModsOperands(Inst,
N);
1008 void addRegOrImmWithIntInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1009 assert(!hasFPModifiers());
1010 addRegOrImmWithInputModsOperands(Inst,
N);
1013 void addRegWithInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1014 Modifiers Mods = getModifiers();
1017 addRegOperands(Inst,
N);
1020 void addRegWithFPInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1021 assert(!hasIntModifiers());
1022 addRegWithInputModsOperands(Inst,
N);
1025 void addRegWithIntInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1026 assert(!hasFPModifiers());
1027 addRegWithInputModsOperands(Inst,
N);
1032 case ImmTyNone:
OS <<
"None";
break;
1033 case ImmTyGDS:
OS <<
"GDS";
break;
1034 case ImmTyLDS:
OS <<
"LDS";
break;
1035 case ImmTyOffen:
OS <<
"Offen";
break;
1036 case ImmTyIdxen:
OS <<
"Idxen";
break;
1037 case ImmTyAddr64:
OS <<
"Addr64";
break;
1038 case ImmTyOffset:
OS <<
"Offset";
break;
1039 case ImmTyInstOffset:
OS <<
"InstOffset";
break;
1040 case ImmTyOffset0:
OS <<
"Offset0";
break;
1041 case ImmTyOffset1:
OS <<
"Offset1";
break;
1042 case ImmTySMEMOffsetMod:
OS <<
"SMEMOffsetMod";
break;
1043 case ImmTyCPol:
OS <<
"CPol";
break;
1044 case ImmTyTFE:
OS <<
"TFE";
break;
1045 case ImmTyD16:
OS <<
"D16";
break;
1046 case ImmTyFORMAT:
OS <<
"FORMAT";
break;
1047 case ImmTyClampSI:
OS <<
"ClampSI";
break;
1048 case ImmTyOModSI:
OS <<
"OModSI";
break;
1049 case ImmTyDPP8:
OS <<
"DPP8";
break;
1050 case ImmTyDppCtrl:
OS <<
"DppCtrl";
break;
1051 case ImmTyDppRowMask:
OS <<
"DppRowMask";
break;
1052 case ImmTyDppBankMask:
OS <<
"DppBankMask";
break;
1053 case ImmTyDppBoundCtrl:
OS <<
"DppBoundCtrl";
break;
1054 case ImmTyDppFI:
OS <<
"DppFI";
break;
1055 case ImmTySDWADstSel:
OS <<
"SDWADstSel";
break;
1056 case ImmTySDWASrc0Sel:
OS <<
"SDWASrc0Sel";
break;
1057 case ImmTySDWASrc1Sel:
OS <<
"SDWASrc1Sel";
break;
1058 case ImmTySDWADstUnused:
OS <<
"SDWADstUnused";
break;
1059 case ImmTyDMask:
OS <<
"DMask";
break;
1060 case ImmTyDim:
OS <<
"Dim";
break;
1061 case ImmTyUNorm:
OS <<
"UNorm";
break;
1062 case ImmTyDA:
OS <<
"DA";
break;
1063 case ImmTyR128A16:
OS <<
"R128A16";
break;
1064 case ImmTyA16:
OS <<
"A16";
break;
1065 case ImmTyLWE:
OS <<
"LWE";
break;
1066 case ImmTyOff:
OS <<
"Off";
break;
1067 case ImmTyExpTgt:
OS <<
"ExpTgt";
break;
1068 case ImmTyExpCompr:
OS <<
"ExpCompr";
break;
1069 case ImmTyExpVM:
OS <<
"ExpVM";
break;
1070 case ImmTyHwreg:
OS <<
"Hwreg";
break;
1071 case ImmTySendMsg:
OS <<
"SendMsg";
break;
1072 case ImmTyInterpSlot:
OS <<
"InterpSlot";
break;
1073 case ImmTyInterpAttr:
OS <<
"InterpAttr";
break;
1074 case ImmTyInterpAttrChan:
OS <<
"InterpAttrChan";
break;
1075 case ImmTyOpSel:
OS <<
"OpSel";
break;
1076 case ImmTyOpSelHi:
OS <<
"OpSelHi";
break;
1077 case ImmTyNegLo:
OS <<
"NegLo";
break;
1078 case ImmTyNegHi:
OS <<
"NegHi";
break;
1079 case ImmTySwizzle:
OS <<
"Swizzle";
break;
1080 case ImmTyGprIdxMode:
OS <<
"GprIdxMode";
break;
1081 case ImmTyHigh:
OS <<
"High";
break;
1082 case ImmTyBLGP:
OS <<
"BLGP";
break;
1083 case ImmTyCBSZ:
OS <<
"CBSZ";
break;
1084 case ImmTyABID:
OS <<
"ABID";
break;
1085 case ImmTyEndpgm:
OS <<
"Endpgm";
break;
1086 case ImmTyWaitVDST:
OS <<
"WaitVDST";
break;
1087 case ImmTyWaitEXP:
OS <<
"WaitEXP";
break;
1094 OS <<
"<register " <<
getReg() <<
" mods: " <<
Reg.Mods <<
'>';
1097 OS <<
'<' << getImm();
1098 if (getImmTy() != ImmTyNone) {
1099 OS <<
" type: "; printImmTy(
OS, getImmTy());
1101 OS <<
" mods: " <<
Imm.Mods <<
'>';
1104 OS <<
'\'' << getToken() <<
'\'';
1107 OS <<
"<expr " << *Expr <<
'>';
1112 static AMDGPUOperand::Ptr CreateImm(
const AMDGPUAsmParser *AsmParser,
1113 int64_t Val,
SMLoc Loc,
1114 ImmTy
Type = ImmTyNone,
1115 bool IsFPImm =
false) {
1116 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1118 Op->Imm.IsFPImm = IsFPImm;
1119 Op->Imm.Kind = ImmKindTyNone;
1121 Op->Imm.Mods = Modifiers();
1127 static AMDGPUOperand::Ptr CreateToken(
const AMDGPUAsmParser *AsmParser,
1129 bool HasExplicitEncodingSize =
true) {
1130 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1131 Res->Tok.Data = Str.data();
1132 Res->Tok.Length = Str.size();
1133 Res->StartLoc = Loc;
1138 static AMDGPUOperand::Ptr CreateReg(
const AMDGPUAsmParser *AsmParser,
1139 unsigned RegNo,
SMLoc S,
1141 auto Op = std::make_unique<AMDGPUOperand>(
Register, AsmParser);
1142 Op->Reg.RegNo = RegNo;
1143 Op->Reg.Mods = Modifiers();
1149 static AMDGPUOperand::Ptr CreateExpr(
const AMDGPUAsmParser *AsmParser,
1151 auto Op = std::make_unique<AMDGPUOperand>(
Expression, AsmParser);
1160 OS <<
"abs:" << Mods.Abs <<
" neg: " << Mods.Neg <<
" sext:" << Mods.Sext;
1171class KernelScopeInfo {
1172 int SgprIndexUnusedMin = -1;
1173 int VgprIndexUnusedMin = -1;
1174 int AgprIndexUnusedMin = -1;
1178 void usesSgprAt(
int i) {
1179 if (i >= SgprIndexUnusedMin) {
1180 SgprIndexUnusedMin = ++i;
1189 void usesVgprAt(
int i) {
1190 if (i >= VgprIndexUnusedMin) {
1191 VgprIndexUnusedMin = ++i;
1196 VgprIndexUnusedMin);
1202 void usesAgprAt(
int i) {
1207 if (i >= AgprIndexUnusedMin) {
1208 AgprIndexUnusedMin = ++i;
1218 VgprIndexUnusedMin);
1225 KernelScopeInfo() =
default;
1231 usesSgprAt(SgprIndexUnusedMin = -1);
1232 usesVgprAt(VgprIndexUnusedMin = -1);
1234 usesAgprAt(AgprIndexUnusedMin = -1);
1238 void usesRegister(RegisterKind RegKind,
unsigned DwordRegIndex,
1239 unsigned RegWidth) {
1242 usesSgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1245 usesAgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1248 usesVgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1259 unsigned ForcedEncodingSize = 0;
1260 bool ForcedDPP =
false;
1261 bool ForcedSDWA =
false;
1262 KernelScopeInfo KernelScope;
1267#define GET_ASSEMBLER_HEADER
1268#include "AMDGPUGenAsmMatcher.inc"
1273 bool ParseAsAbsoluteExpression(
uint32_t &Ret);
1274 bool OutOfRangeError(
SMRange Range);
1290 bool calculateGPRBlocks(
const FeatureBitset &Features,
bool VCCUsed,
1291 bool FlatScrUsed,
bool XNACKUsed,
1292 std::optional<bool> EnableWavefrontSize32,
1293 unsigned NextFreeVGPR,
SMRange VGPRRange,
1294 unsigned NextFreeSGPR,
SMRange SGPRRange,
1295 unsigned &VGPRBlocks,
unsigned &SGPRBlocks);
1296 bool ParseDirectiveAMDGCNTarget();
1297 bool ParseDirectiveAMDHSAKernel();
1299 bool ParseDirectiveHSACodeObjectVersion();
1300 bool ParseDirectiveHSACodeObjectISA();
1302 bool ParseDirectiveAMDKernelCodeT();
1305 bool ParseDirectiveAMDGPUHsaKernel();
1307 bool ParseDirectiveISAVersion();
1308 bool ParseDirectiveHSAMetadata();
1309 bool ParseDirectivePALMetadataBegin();
1310 bool ParseDirectivePALMetadata();
1311 bool ParseDirectiveAMDGPULDS();
1315 bool ParseToEndDirective(
const char *AssemblerDirectiveBegin,
1316 const char *AssemblerDirectiveEnd,
1317 std::string &CollectString);
1319 bool AddNextRegisterToList(
unsigned& Reg,
unsigned& RegWidth,
1320 RegisterKind RegKind,
unsigned Reg1,
SMLoc Loc);
1321 bool ParseAMDGPURegister(RegisterKind &RegKind,
unsigned &Reg,
1322 unsigned &RegNum,
unsigned &RegWidth,
1323 bool RestoreOnFailure =
false);
1324 bool ParseAMDGPURegister(RegisterKind &RegKind,
unsigned &Reg,
1325 unsigned &RegNum,
unsigned &RegWidth,
1327 unsigned ParseRegularReg(RegisterKind &RegKind,
unsigned &RegNum,
1330 unsigned ParseSpecialReg(RegisterKind &RegKind,
unsigned &RegNum,
1333 unsigned ParseRegList(RegisterKind &RegKind,
unsigned &RegNum,
1335 bool ParseRegRange(
unsigned& Num,
unsigned& Width);
1336 unsigned getRegularReg(RegisterKind RegKind,
1343 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1344 void initializeGprCountSymbol(RegisterKind RegKind);
1345 bool updateGprCountSymbols(RegisterKind RegKind,
unsigned DwordRegIndex,
1351 enum AMDGPUMatchResultTy {
1352 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1355 OperandMode_Default,
1359 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1367 if (getFeatureBits().
none()) {
1399 initializeGprCountSymbol(IS_VGPR);
1400 initializeGprCountSymbol(IS_SGPR);
1469 bool hasInv2PiInlineImm()
const {
1470 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1473 bool hasFlatOffsets()
const {
1474 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1478 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1481 bool hasSGPR102_SGPR103()
const {
1485 bool hasSGPR104_SGPR105()
const {
return isGFX10Plus(); }
1487 bool hasIntClamp()
const {
1488 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1491 bool hasPartialNSAEncoding()
const {
1492 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1524 void setForcedEncodingSize(
unsigned Size) { ForcedEncodingSize =
Size; }
1525 void setForcedDPP(
bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1526 void setForcedSDWA(
bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1528 unsigned getForcedEncodingSize()
const {
return ForcedEncodingSize; }
1529 bool isForcedVOP3()
const {
return ForcedEncodingSize == 64; }
1530 bool isForcedDPP()
const {
return ForcedDPP; }
1531 bool isForcedSDWA()
const {
return ForcedSDWA; }
1533 StringRef getMatchedVariantName()
const;
1535 std::unique_ptr<AMDGPUOperand>
parseRegister(
bool RestoreOnFailure =
false);
1537 bool RestoreOnFailure);
1540 SMLoc &EndLoc)
override;
1543 unsigned Kind)
override;
1547 bool MatchingInlineAsm)
override;
1550 OperandMode Mode = OperandMode_Default);
1558 ParseStatus parseIntWithPrefix(
const char *Prefix, int64_t &
Int);
1562 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1563 std::function<
bool(int64_t &)> ConvertResult =
nullptr);
1567 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1568 bool (*ConvertResult)(int64_t &) =
nullptr);
1572 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1579 bool isOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1580 bool isRegOrOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1581 bool isNamedOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1582 bool isOpcodeModifierWithVal(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1583 bool parseSP3NegModifier();
1585 bool HasLit =
false);
1588 bool HasLit =
false);
1590 bool AllowImm =
true);
1592 bool AllowImm =
true);
1603 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1608 bool tryParseFmt(
const char *Pref, int64_t MaxVal, int64_t &Val);
1609 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt,
StringRef FormatStr,
SMLoc Loc);
1613 bool parseCnt(int64_t &IntVal);
1616 bool parseDepCtr(int64_t &IntVal,
unsigned &Mask);
1620 bool parseDelay(int64_t &Delay);
1626 struct OperandInfoTy {
1629 bool IsSymbolic =
false;
1630 bool IsDefined =
false;
1632 OperandInfoTy(int64_t Id_) :
Id(Id_) {}
1635 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &
Op, OperandInfoTy &Stream);
1636 bool validateSendMsg(
const OperandInfoTy &Msg,
1637 const OperandInfoTy &
Op,
1638 const OperandInfoTy &Stream);
1640 bool parseHwregBody(OperandInfoTy &HwReg,
1642 OperandInfoTy &Width);
1643 bool validateHwreg(
const OperandInfoTy &HwReg,
1644 const OperandInfoTy &
Offset,
1645 const OperandInfoTy &Width);
1651 SMLoc getOperandLoc(std::function<
bool(
const AMDGPUOperand&)>
Test,
1656 bool SearchMandatoryLiterals =
false)
const;
1664 bool validateSOPLiteral(
const MCInst &Inst)
const;
1666 bool validateVOPDRegBankConstraints(
const MCInst &Inst,
1668 bool validateIntClampSupported(
const MCInst &Inst);
1669 bool validateMIMGAtomicDMask(
const MCInst &Inst);
1670 bool validateMIMGGatherDMask(
const MCInst &Inst);
1672 bool validateMIMGDataSize(
const MCInst &Inst,
const SMLoc &IDLoc);
1673 bool validateMIMGAddrSize(
const MCInst &Inst,
const SMLoc &IDLoc);
1674 bool validateMIMGD16(
const MCInst &Inst);
1675 bool validateMIMGMSAA(
const MCInst &Inst);
1676 bool validateOpSel(
const MCInst &Inst);
1678 bool validateVccOperand(
unsigned Reg)
const;
1683 bool validateAGPRLdSt(
const MCInst &Inst)
const;
1684 bool validateVGPRAlign(
const MCInst &Inst)
const;
1688 bool validateDivScale(
const MCInst &Inst);
1691 const SMLoc &IDLoc);
1694 std::optional<StringRef> validateLdsDirect(
const MCInst &Inst);
1695 unsigned getConstantBusLimit(
unsigned Opcode)
const;
1696 bool usesConstantBus(
const MCInst &Inst,
unsigned OpIdx);
1697 bool isInlineConstant(
const MCInst &Inst,
unsigned OpIdx)
const;
1698 unsigned findImplicitSGPRReadInVOP(
const MCInst &Inst)
const;
1724 AsmToken peekToken(
bool ShouldSkipSpace =
true);
1726 SMLoc getLoc()
const;
1741 bool parseSwizzleOperand(int64_t &
Op,
1742 const unsigned MinVal,
1743 const unsigned MaxVal,
1746 bool parseSwizzleOperands(
const unsigned OpNum, int64_t*
Op,
1747 const unsigned MinVal,
1748 const unsigned MaxVal,
1751 bool parseSwizzleOffset(int64_t &Imm);
1752 bool parseSwizzleMacro(int64_t &Imm);
1753 bool parseSwizzleQuadPerm(int64_t &Imm);
1754 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1755 bool parseSwizzleBroadcast(int64_t &Imm);
1756 bool parseSwizzleSwap(int64_t &Imm);
1757 bool parseSwizzleReverse(int64_t &Imm);
1760 int64_t parseGPRIdxMacro();
1768 OptionalImmIndexMap &OptionalIdx);
1774 OptionalImmIndexMap &OptionalIdx);
1776 OptionalImmIndexMap &OptionalIdx);
1781 bool parseDimId(
unsigned &Encoding);
1783 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1787 int64_t parseDPPCtrlSel(
StringRef Ctrl);
1788 int64_t parseDPPCtrlPerm();
1794 bool IsDPP8 =
false);
1800 AMDGPUOperand::ImmTy
Type);
1809 bool SkipDstVcc =
false,
1810 bool SkipSrcVcc =
false);
1823 return &APFloat::IEEEsingle();
1825 return &APFloat::IEEEdouble();
1827 return &APFloat::IEEEhalf();
1851 return &APFloat::IEEEsingle();
1857 return &APFloat::IEEEdouble();
1872 return &APFloat::IEEEhalf();
1887 APFloat::rmNearestTiesToEven,
1890 if (
Status != APFloat::opOK &&
1892 ((
Status & APFloat::opOverflow) != 0 ||
1893 (
Status & APFloat::opUnderflow) != 0)) {
1914bool AMDGPUOperand::isInlinableImm(
MVT type)
const {
1924 if (!isImmTy(ImmTyNone)) {
1935 if (type == MVT::f64 || type == MVT::i64) {
1937 AsmParser->hasInv2PiInlineImm());
1946 static_cast<int16_t
>(FPLiteral.bitcastToAPInt().getZExtValue()),
1947 type, AsmParser->hasInv2PiInlineImm());
1952 static_cast<int32_t
>(FPLiteral.bitcastToAPInt().getZExtValue()),
1953 AsmParser->hasInv2PiInlineImm());
1957 if (type == MVT::f64 || type == MVT::i64) {
1959 AsmParser->hasInv2PiInlineImm());
1968 static_cast<int16_t
>(
Literal.getLoBits(16).getSExtValue()),
1969 type, AsmParser->hasInv2PiInlineImm());
1973 static_cast<int32_t
>(
Literal.getLoBits(32).getZExtValue()),
1974 AsmParser->hasInv2PiInlineImm());
1977bool AMDGPUOperand::isLiteralImm(
MVT type)
const {
1979 if (!isImmTy(ImmTyNone)) {
1986 if (type == MVT::f64 && hasFPModifiers()) {
2003 if (type == MVT::f64) {
2008 if (type == MVT::i64) {
2017 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
2018 (type == MVT::v2i16)? MVT::i16 :
2019 (type == MVT::v2f32)? MVT::f32 : type;
2025bool AMDGPUOperand::isRegClass(
unsigned RCID)
const {
2026 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(
getReg());
2029bool AMDGPUOperand::isVRegWithInputMods()
const {
2030 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2032 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2033 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2036bool AMDGPUOperand::isT16VRegWithInputMods()
const {
2037 return isRegClass(AMDGPU::VGPR_32_Lo128RegClassID);
2040bool AMDGPUOperand::isSDWAOperand(
MVT type)
const {
2041 if (AsmParser->isVI())
2043 else if (AsmParser->isGFX9Plus())
2044 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2049bool AMDGPUOperand::isSDWAFP16Operand()
const {
2050 return isSDWAOperand(MVT::f16);
2053bool AMDGPUOperand::isSDWAFP32Operand()
const {
2054 return isSDWAOperand(MVT::f32);
2057bool AMDGPUOperand::isSDWAInt16Operand()
const {
2058 return isSDWAOperand(MVT::i16);
2061bool AMDGPUOperand::isSDWAInt32Operand()
const {
2062 return isSDWAOperand(MVT::i32);
2065bool AMDGPUOperand::isBoolReg()
const {
2066 auto FB = AsmParser->getFeatureBits();
2067 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
2068 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
2073 assert(isImmTy(ImmTyNone) &&
Imm.Mods.hasFPModifiers());
2088void AMDGPUOperand::addImmOperands(
MCInst &Inst,
unsigned N,
bool ApplyModifiers)
const {
2096 addLiteralImmOperand(Inst,
Imm.Val,
2098 isImmTy(ImmTyNone) &&
Imm.Mods.hasFPModifiers());
2100 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2106void AMDGPUOperand::addLiteralImmOperand(
MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const {
2107 const auto& InstDesc = AsmParser->getMII()->get(Inst.
getOpcode());
2112 if (ApplyModifiers) {
2115 Val = applyInputFPModifiers(Val,
Size);
2119 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2129 AsmParser->hasInv2PiInlineImm())) {
2138 if (
Literal.getLoBits(32) != 0) {
2139 const_cast<AMDGPUAsmParser *
>(AsmParser)->
Warning(Inst.
getLoc(),
2140 "Can't encode literal as exact 64-bit floating-point operand. "
2141 "Low 32-bits will be set to zero");
2142 Val &= 0xffffffff00000000u;
2146 setImmKindLiteral();
2185 APFloat::rmNearestTiesToEven, &lost);
2189 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2192 setImmKindMandatoryLiteral();
2194 setImmKindLiteral();
2223 AsmParser->hasInv2PiInlineImm())) {
2230 setImmKindLiteral();
2248 setImmKindLiteral();
2260 AsmParser->hasInv2PiInlineImm())) {
2267 setImmKindLiteral();
2276 AsmParser->hasInv2PiInlineImm()));
2283 setImmKindMandatoryLiteral();
2287 setImmKindMandatoryLiteral();
2294void AMDGPUOperand::addRegOperands(
MCInst &Inst,
unsigned N)
const {
2298bool AMDGPUOperand::isInlineValue()
const {
2307 if (Is == IS_VGPR) {
2311 return AMDGPU::VGPR_32RegClassID;
2313 return AMDGPU::VReg_64RegClassID;
2315 return AMDGPU::VReg_96RegClassID;
2317 return AMDGPU::VReg_128RegClassID;
2319 return AMDGPU::VReg_160RegClassID;
2321 return AMDGPU::VReg_192RegClassID;
2323 return AMDGPU::VReg_224RegClassID;
2325 return AMDGPU::VReg_256RegClassID;
2327 return AMDGPU::VReg_288RegClassID;
2329 return AMDGPU::VReg_320RegClassID;
2331 return AMDGPU::VReg_352RegClassID;
2333 return AMDGPU::VReg_384RegClassID;
2335 return AMDGPU::VReg_512RegClassID;
2337 return AMDGPU::VReg_1024RegClassID;
2339 }
else if (Is == IS_TTMP) {
2343 return AMDGPU::TTMP_32RegClassID;
2345 return AMDGPU::TTMP_64RegClassID;
2347 return AMDGPU::TTMP_128RegClassID;
2349 return AMDGPU::TTMP_256RegClassID;
2351 return AMDGPU::TTMP_512RegClassID;
2353 }
else if (Is == IS_SGPR) {
2357 return AMDGPU::SGPR_32RegClassID;
2359 return AMDGPU::SGPR_64RegClassID;
2361 return AMDGPU::SGPR_96RegClassID;
2363 return AMDGPU::SGPR_128RegClassID;
2365 return AMDGPU::SGPR_160RegClassID;
2367 return AMDGPU::SGPR_192RegClassID;
2369 return AMDGPU::SGPR_224RegClassID;
2371 return AMDGPU::SGPR_256RegClassID;
2373 return AMDGPU::SGPR_288RegClassID;
2375 return AMDGPU::SGPR_320RegClassID;
2377 return AMDGPU::SGPR_352RegClassID;
2379 return AMDGPU::SGPR_384RegClassID;
2381 return AMDGPU::SGPR_512RegClassID;
2383 }
else if (Is == IS_AGPR) {
2387 return AMDGPU::AGPR_32RegClassID;
2389 return AMDGPU::AReg_64RegClassID;
2391 return AMDGPU::AReg_96RegClassID;
2393 return AMDGPU::AReg_128RegClassID;
2395 return AMDGPU::AReg_160RegClassID;
2397 return AMDGPU::AReg_192RegClassID;
2399 return AMDGPU::AReg_224RegClassID;
2401 return AMDGPU::AReg_256RegClassID;
2403 return AMDGPU::AReg_288RegClassID;
2405 return AMDGPU::AReg_320RegClassID;
2407 return AMDGPU::AReg_352RegClassID;
2409 return AMDGPU::AReg_384RegClassID;
2411 return AMDGPU::AReg_512RegClassID;
2413 return AMDGPU::AReg_1024RegClassID;
2421 .
Case(
"exec", AMDGPU::EXEC)
2422 .
Case(
"vcc", AMDGPU::VCC)
2423 .
Case(
"flat_scratch", AMDGPU::FLAT_SCR)
2424 .
Case(
"xnack_mask", AMDGPU::XNACK_MASK)
2425 .
Case(
"shared_base", AMDGPU::SRC_SHARED_BASE)
2426 .
Case(
"src_shared_base", AMDGPU::SRC_SHARED_BASE)
2427 .
Case(
"shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2428 .
Case(
"src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2429 .
Case(
"private_base", AMDGPU::SRC_PRIVATE_BASE)
2430 .
Case(
"src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2431 .
Case(
"private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2432 .
Case(
"src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2433 .
Case(
"pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2434 .
Case(
"src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2435 .
Case(
"lds_direct", AMDGPU::LDS_DIRECT)
2436 .
Case(
"src_lds_direct", AMDGPU::LDS_DIRECT)
2437 .
Case(
"m0", AMDGPU::M0)
2438 .
Case(
"vccz", AMDGPU::SRC_VCCZ)
2439 .
Case(
"src_vccz", AMDGPU::SRC_VCCZ)
2440 .
Case(
"execz", AMDGPU::SRC_EXECZ)
2441 .
Case(
"src_execz", AMDGPU::SRC_EXECZ)
2442 .
Case(
"scc", AMDGPU::SRC_SCC)
2443 .
Case(
"src_scc", AMDGPU::SRC_SCC)
2444 .
Case(
"tba", AMDGPU::TBA)
2445 .
Case(
"tma", AMDGPU::TMA)
2446 .
Case(
"flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2447 .
Case(
"flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2448 .
Case(
"xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2449 .
Case(
"xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2450 .
Case(
"vcc_lo", AMDGPU::VCC_LO)
2451 .
Case(
"vcc_hi", AMDGPU::VCC_HI)
2452 .
Case(
"exec_lo", AMDGPU::EXEC_LO)
2453 .
Case(
"exec_hi", AMDGPU::EXEC_HI)
2454 .
Case(
"tma_lo", AMDGPU::TMA_LO)
2455 .
Case(
"tma_hi", AMDGPU::TMA_HI)
2456 .
Case(
"tba_lo", AMDGPU::TBA_LO)
2457 .
Case(
"tba_hi", AMDGPU::TBA_HI)
2458 .
Case(
"pc", AMDGPU::PC_REG)
2459 .
Case(
"null", AMDGPU::SGPR_NULL)
2463bool AMDGPUAsmParser::ParseRegister(
MCRegister &RegNo,
SMLoc &StartLoc,
2464 SMLoc &EndLoc,
bool RestoreOnFailure) {
2465 auto R = parseRegister();
2466 if (!R)
return true;
2468 RegNo =
R->getReg();
2469 StartLoc =
R->getStartLoc();
2470 EndLoc =
R->getEndLoc();
2476 return ParseRegister(Reg, StartLoc, EndLoc,
false);
2481 bool Result = ParseRegister(Reg, StartLoc, EndLoc,
true);
2482 bool PendingErrors = getParser().hasPendingError();
2483 getParser().clearPendingErrors();
2491bool AMDGPUAsmParser::AddNextRegisterToList(
unsigned &Reg,
unsigned &RegWidth,
2492 RegisterKind RegKind,
unsigned Reg1,
2496 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2501 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2502 Reg = AMDGPU::FLAT_SCR;
2506 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2507 Reg = AMDGPU::XNACK_MASK;
2511 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2516 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2521 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2526 Error(Loc,
"register does not fit in the list");
2532 if (Reg1 != Reg + RegWidth / 32) {
2533 Error(Loc,
"registers in a list must have consecutive indices");
2551 {{
"ttmp"}, IS_TTMP},
2557 return Kind == IS_VGPR ||
2565 if (Str.startswith(Reg.Name))
2571 return !Str.getAsInteger(10, Num);
2575AMDGPUAsmParser::isRegister(
const AsmToken &Token,
2592 if (!RegSuffix.
empty()) {
2608AMDGPUAsmParser::isRegister()
2610 return isRegister(getToken(), peekToken());
2614AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2621 unsigned AlignSize = 1;
2622 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2625 AlignSize = std::min(RegWidth / 32, 4u);
2628 if (RegNum % AlignSize != 0) {
2629 Error(Loc,
"invalid register alignment");
2630 return AMDGPU::NoRegister;
2633 unsigned RegIdx = RegNum / AlignSize;
2636 Error(Loc,
"invalid or unsupported register size");
2637 return AMDGPU::NoRegister;
2643 Error(Loc,
"register index is out of range");
2644 return AMDGPU::NoRegister;
2650bool AMDGPUAsmParser::ParseRegRange(
unsigned &Num,
unsigned &RegWidth) {
2651 int64_t RegLo, RegHi;
2655 SMLoc FirstIdxLoc = getLoc();
2658 if (!parseExpr(RegLo))
2662 SecondIdxLoc = getLoc();
2663 if (!parseExpr(RegHi))
2672 if (!isUInt<32>(RegLo)) {
2673 Error(FirstIdxLoc,
"invalid register index");
2677 if (!isUInt<32>(RegHi)) {
2678 Error(SecondIdxLoc,
"invalid register index");
2682 if (RegLo > RegHi) {
2683 Error(FirstIdxLoc,
"first register index should not exceed second index");
2687 Num =
static_cast<unsigned>(RegLo);
2688 RegWidth = 32 * ((RegHi - RegLo) + 1);
2692unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2693 unsigned &RegNum,
unsigned &RegWidth,
2700 RegKind = IS_SPECIAL;
2707unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2708 unsigned &RegNum,
unsigned &RegWidth,
2712 auto Loc = getLoc();
2716 Error(Loc,
"invalid register name");
2717 return AMDGPU::NoRegister;
2725 if (!RegSuffix.
empty()) {
2728 Error(Loc,
"invalid register index");
2729 return AMDGPU::NoRegister;
2734 if (!ParseRegRange(RegNum, RegWidth))
2735 return AMDGPU::NoRegister;
2738 return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2741unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
unsigned &RegNum,
2744 unsigned Reg = AMDGPU::NoRegister;
2745 auto ListLoc = getLoc();
2748 "expected a register or a list of registers")) {
2749 return AMDGPU::NoRegister;
2754 auto Loc = getLoc();
2755 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2756 return AMDGPU::NoRegister;
2757 if (RegWidth != 32) {
2758 Error(Loc,
"expected a single 32-bit register");
2759 return AMDGPU::NoRegister;
2763 RegisterKind NextRegKind;
2764 unsigned NextReg, NextRegNum, NextRegWidth;
2767 if (!ParseAMDGPURegister(NextRegKind, NextReg,
2768 NextRegNum, NextRegWidth,
2770 return AMDGPU::NoRegister;
2772 if (NextRegWidth != 32) {
2773 Error(Loc,
"expected a single 32-bit register");
2774 return AMDGPU::NoRegister;
2776 if (NextRegKind != RegKind) {
2777 Error(Loc,
"registers in a list must be of the same kind");
2778 return AMDGPU::NoRegister;
2780 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2781 return AMDGPU::NoRegister;
2785 "expected a comma or a closing square bracket")) {
2786 return AMDGPU::NoRegister;
2790 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2795bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
unsigned &Reg,
2796 unsigned &RegNum,
unsigned &RegWidth,
2798 auto Loc = getLoc();
2799 Reg = AMDGPU::NoRegister;
2802 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2803 if (Reg == AMDGPU::NoRegister)
2804 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2806 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2810 if (Reg == AMDGPU::NoRegister) {
2815 if (!subtargetHasRegister(*
TRI, Reg)) {
2816 if (Reg == AMDGPU::SGPR_NULL) {
2817 Error(Loc,
"'null' operand is not supported on this GPU");
2819 Error(Loc,
"register not available on this GPU");
2827bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
unsigned &Reg,
2828 unsigned &RegNum,
unsigned &RegWidth,
2829 bool RestoreOnFailure ) {
2830 Reg = AMDGPU::NoRegister;
2833 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2834 if (RestoreOnFailure) {
2835 while (!Tokens.
empty()) {
2844std::optional<StringRef>
2845AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2848 return StringRef(
".amdgcn.next_free_vgpr");
2850 return StringRef(
".amdgcn.next_free_sgpr");
2852 return std::nullopt;
2856void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2857 auto SymbolName = getGprCountSymbolName(RegKind);
2858 assert(SymbolName &&
"initializing invalid register kind");
2859 MCSymbol *
Sym = getContext().getOrCreateSymbol(*SymbolName);
2863bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2864 unsigned DwordRegIndex,
2865 unsigned RegWidth) {
2870 auto SymbolName = getGprCountSymbolName(RegKind);
2873 MCSymbol *
Sym = getContext().getOrCreateSymbol(*SymbolName);
2875 int64_t NewMax = DwordRegIndex +
divideCeil(RegWidth, 32) - 1;
2878 if (!
Sym->isVariable())
2879 return !
Error(getLoc(),
2880 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2881 if (!
Sym->getVariableValue(
false)->evaluateAsAbsolute(OldCount))
2884 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2886 if (OldCount <= NewMax)
2892std::unique_ptr<AMDGPUOperand>
2893AMDGPUAsmParser::parseRegister(
bool RestoreOnFailure) {
2894 const auto &Tok = getToken();
2895 SMLoc StartLoc = Tok.getLoc();
2896 SMLoc EndLoc = Tok.getEndLoc();
2897 RegisterKind RegKind;
2898 unsigned Reg, RegNum, RegWidth;
2900 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2904 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2907 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2908 return AMDGPUOperand::CreateReg(
this, Reg, StartLoc, EndLoc);
2912 bool HasSP3AbsModifier,
bool HasLit) {
2920 HasLit = trySkipId(
"lit");
2932 const auto& Tok = getToken();
2933 const auto& NextTok = peekToken();
2936 bool Negate =
false;
2944 AMDGPUOperand::Modifiers Mods;
2955 APFloat RealVal(APFloat::IEEEdouble());
2956 auto roundMode = APFloat::rmNearestTiesToEven;
2957 if (
errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
2960 RealVal.changeSign();
2963 AMDGPUOperand::CreateImm(
this, RealVal.bitcastToAPInt().getZExtValue(), S,
2964 AMDGPUOperand::ImmTyNone,
true));
2965 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
2966 Op.setModifiers(Mods);
2975 if (HasSP3AbsModifier) {
2984 if (getParser().parsePrimaryExpr(Expr, EndLoc,
nullptr))
2991 if (Expr->evaluateAsAbsolute(IntVal)) {
2992 Operands.push_back(AMDGPUOperand::CreateImm(
this, IntVal, S));
2993 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
2994 Op.setModifiers(Mods);
2998 Operands.push_back(AMDGPUOperand::CreateExpr(
this, Expr, S));
3011 if (
auto R = parseRegister()) {
3020 bool HasSP3AbsMod,
bool HasLit) {
3026 return parseImm(
Operands, HasSP3AbsMod, HasLit);
3030AMDGPUAsmParser::isNamedOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3033 return str ==
"abs" || str ==
"neg" || str ==
"sext";
3039AMDGPUAsmParser::isOpcodeModifierWithVal(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3044AMDGPUAsmParser::isOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3045 return isNamedOperandModifier(Token, NextToken) || Token.
is(
AsmToken::Pipe);
3049AMDGPUAsmParser::isRegOrOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3050 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3067AMDGPUAsmParser::isModifier() {
3071 peekTokens(NextToken);
3073 return isOperandModifier(Tok, NextToken[0]) ||
3074 (Tok.
is(
AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3075 isOpcodeModifierWithVal(Tok, NextToken[0]);
3101AMDGPUAsmParser::parseSP3NegModifier() {
3104 peekTokens(NextToken);
3107 (isRegister(NextToken[0], NextToken[1]) ||
3109 isId(NextToken[0],
"abs"))) {
3127 return Error(getLoc(),
"invalid syntax, expected 'neg' modifier");
3129 SP3Neg = parseSP3NegModifier();
3132 Neg = trySkipId(
"neg");
3134 return Error(Loc,
"expected register or immediate");
3138 Abs = trySkipId(
"abs");
3142 Lit = trySkipId(
"lit");
3149 return Error(Loc,
"expected register or immediate");
3153 Res = parseRegOrImm(
Operands, SP3Abs, Lit);
3160 if (Lit && !
Operands.back()->isImm())
3161 Error(Loc,
"expected immediate with lit modifier");
3163 if (SP3Abs && !skipToken(
AsmToken::Pipe,
"expected vertical bar"))
3172 AMDGPUOperand::Modifiers Mods;
3173 Mods.Abs = Abs || SP3Abs;
3174 Mods.Neg = Neg || SP3Neg;
3177 if (Mods.hasFPModifiers() || Lit) {
3178 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3180 return Error(
Op.getStartLoc(),
"expected an absolute expression");
3181 Op.setModifiers(Mods);
3189 bool Sext = trySkipId(
"sext");
3190 if (Sext && !skipToken(
AsmToken::LParen,
"expected left paren after sext"))
3205 AMDGPUOperand::Modifiers Mods;
3208 if (Mods.hasIntModifiers()) {
3209 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3211 return Error(
Op.getStartLoc(),
"expected an absolute expression");
3212 Op.setModifiers(Mods);
3219 return parseRegOrImmWithFPInputMods(
Operands,
false);
3223 return parseRegOrImmWithIntInputMods(
Operands,
false);
3227 auto Loc = getLoc();
3228 if (trySkipId(
"off")) {
3229 Operands.push_back(AMDGPUOperand::CreateImm(
this, 0, Loc,
3230 AMDGPUOperand::ImmTyOff,
false));
3237 std::unique_ptr<AMDGPUOperand>
Reg = parseRegister();
3239 Operands.push_back(std::move(Reg));
3246unsigned AMDGPUAsmParser::checkTargetMatchPredicate(
MCInst &Inst) {
3253 return Match_InvalidOperand;
3257 getForcedEncodingSize() != 64)
3258 return Match_PreferE32;
3260 if (Inst.
getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3261 Inst.
getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3266 if (!
Op.isImm() ||
Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3267 return Match_InvalidOperand;
3271 return Match_Success;
3275 static const unsigned Variants[] = {
3286 if (isForcedDPP() && isForcedVOP3()) {
3290 if (getForcedEncodingSize() == 32) {
3295 if (isForcedVOP3()) {
3300 if (isForcedSDWA()) {
3306 if (isForcedDPP()) {
3314StringRef AMDGPUAsmParser::getMatchedVariantName()
const {
3315 if (isForcedDPP() && isForcedVOP3())
3318 if (getForcedEncodingSize() == 32)
3333unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(
const MCInst &Inst)
const {
3337 case AMDGPU::FLAT_SCR:
3339 case AMDGPU::VCC_LO:
3340 case AMDGPU::VCC_HI:
3347 return AMDGPU::NoRegister;
3354bool AMDGPUAsmParser::isInlineConstant(
const MCInst &Inst,
3355 unsigned OpIdx)
const {
3365 int64_t Val = MO.
getImm();
3397unsigned AMDGPUAsmParser::getConstantBusLimit(
unsigned Opcode)
const {
3403 case AMDGPU::V_LSHLREV_B64_e64:
3404 case AMDGPU::V_LSHLREV_B64_gfx10:
3405 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3406 case AMDGPU::V_LSHRREV_B64_e64:
3407 case AMDGPU::V_LSHRREV_B64_gfx10:
3408 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3409 case AMDGPU::V_ASHRREV_I64_e64:
3410 case AMDGPU::V_ASHRREV_I64_gfx10:
3411 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3412 case AMDGPU::V_LSHL_B64_e64:
3413 case AMDGPU::V_LSHR_B64_e64:
3414 case AMDGPU::V_ASHR_I64_e64:
3427 bool AddMandatoryLiterals =
false) {
3433 int16_t ImmDeferredIdx =
3450bool AMDGPUAsmParser::usesConstantBus(
const MCInst &Inst,
unsigned OpIdx) {
3453 return !isInlineConstant(Inst, OpIdx);
3454 }
else if (MO.
isReg()) {
3458 return isSGPR(PReg,
TRI) && PReg != SGPR_NULL;
3464bool AMDGPUAsmParser::validateConstantBusLimitations(
3468 unsigned LastSGPR = AMDGPU::NoRegister;
3469 unsigned ConstantBusUseCount = 0;
3470 unsigned NumLiterals = 0;
3471 unsigned LiteralSize;
3473 if (!(
Desc.TSFlags &
3486 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3487 if (SGPRUsed != AMDGPU::NoRegister) {
3488 SGPRsUsed.
insert(SGPRUsed);
3489 ++ConstantBusUseCount;
3494 for (
int OpIdx : OpIndices) {
3499 if (usesConstantBus(Inst, OpIdx)) {
3508 if (SGPRsUsed.
insert(LastSGPR).second) {
3509 ++ConstantBusUseCount;
3529 if (NumLiterals == 0) {
3532 }
else if (LiteralSize !=
Size) {
3538 ConstantBusUseCount += NumLiterals;
3540 if (ConstantBusUseCount <= getConstantBusLimit(
Opcode))
3546 Error(Loc,
"invalid operand (violates constant bus restrictions)");
3550bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3559 auto getVRegIdx = [&](
unsigned,
unsigned OperandIdx) {
3567 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(getVRegIdx);
3568 if (!InvalidCompOprIdx)
3571 auto CompOprIdx = *InvalidCompOprIdx;
3573 std::max(InstInfo[
VOPD::X].getIndexInParsedOperands(CompOprIdx),
3574 InstInfo[
VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3577 auto Loc = ((AMDGPUOperand &)*
Operands[ParsedIdx]).getStartLoc();
3578 if (CompOprIdx == VOPD::Component::DST) {
3579 Error(Loc,
"one dst register must be even and the other odd");
3581 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3583 " operands must use different VGPR banks");
3589bool AMDGPUAsmParser::validateIntClampSupported(
const MCInst &Inst) {
3603bool AMDGPUAsmParser::validateMIMGDataSize(
const MCInst &Inst,
3604 const SMLoc &IDLoc) {
3622 unsigned TFESize = (TFEIdx != -1 && Inst.
getOperand(TFEIdx).
getImm()) ? 1 : 0;
3627 bool IsPackedD16 =
false;
3632 IsPackedD16 = D16Idx >= 0;
3634 DataSize = (DataSize + 1) / 2;
3637 if ((VDataSize / 4) == DataSize + TFESize)
3642 Modifiers = IsPackedD16 ?
"dmask and d16" :
"dmask";
3644 Modifiers = IsPackedD16 ?
"dmask, d16 and tfe" :
"dmask and tfe";
3646 Error(IDLoc,
Twine(
"image data size does not match ") + Modifiers);
3650bool AMDGPUAsmParser::validateMIMGAddrSize(
const MCInst &Inst,
3651 const SMLoc &IDLoc) {
3669 assert(SrsrcIdx > VAddr0Idx);
3672 if (BaseOpcode->
BVH) {
3673 if (IsA16 == BaseOpcode->
A16)
3675 Error(IDLoc,
"image address size does not match a16");
3681 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3682 unsigned ActualAddrSize =
3683 IsNSA ? SrsrcIdx - VAddr0Idx
3686 unsigned ExpectedAddrSize =
3690 if (hasPartialNSAEncoding() && ExpectedAddrSize >
getNSAMaxSize()) {
3691 int VAddrLastIdx = SrsrcIdx - 1;
3692 unsigned VAddrLastSize =
3695 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
3698 if (ExpectedAddrSize > 12)
3699 ExpectedAddrSize = 16;
3704 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3708 if (ActualAddrSize == ExpectedAddrSize)
3711 Error(IDLoc,
"image address size does not match dim and a16");
3715bool AMDGPUAsmParser::validateMIMGAtomicDMask(
const MCInst &Inst) {
3722 if (!
Desc.mayLoad() || !
Desc.mayStore())
3732 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3735bool AMDGPUAsmParser::validateMIMGGatherDMask(
const MCInst &Inst) {
3751 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3754bool AMDGPUAsmParser::validateMIMGMSAA(
const MCInst &Inst) {
3765 if (!BaseOpcode->
MSAA)
3774 return DimInfo->
MSAA;
3780 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3781 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3782 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3792bool AMDGPUAsmParser::validateMovrels(
const MCInst &Inst,
3816 Error(ErrLoc,
"source operand must be a VGPR");
3820bool AMDGPUAsmParser::validateMAIAccWrite(
const MCInst &Inst,
3825 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3839 "source operand must be either a VGPR or an inline constant");
3846bool AMDGPUAsmParser::validateMAISrc2(
const MCInst &Inst,
3852 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
3859 if (Inst.
getOperand(Src2Idx).
isImm() && isInlineConstant(Inst, Src2Idx)) {
3861 "inline constants are not allowed for this operand");
3868bool AMDGPUAsmParser::validateMFMA(
const MCInst &Inst,
3886 if (Src2Reg == DstReg)
3890 if (
TRI->getRegClass(
Desc.operands()[0].RegClass).getSizeInBits() <= 128)
3893 if (
TRI->regsOverlap(Src2Reg, DstReg)) {
3895 "source 2 operand must not partially overlap with dst");
3902bool AMDGPUAsmParser::validateDivScale(
const MCInst &Inst) {
3906 case V_DIV_SCALE_F32_gfx6_gfx7:
3907 case V_DIV_SCALE_F32_vi:
3908 case V_DIV_SCALE_F32_gfx10:
3909 case V_DIV_SCALE_F64_gfx6_gfx7:
3910 case V_DIV_SCALE_F64_vi:
3911 case V_DIV_SCALE_F64_gfx10:
3917 for (
auto Name : {AMDGPU::OpName::src0_modifiers,
3918 AMDGPU::OpName::src2_modifiers,
3919 AMDGPU::OpName::src2_modifiers}) {
3930bool AMDGPUAsmParser::validateMIMGD16(
const MCInst &Inst) {
3950 case AMDGPU::V_SUBREV_F32_e32:
3951 case AMDGPU::V_SUBREV_F32_e64:
3952 case AMDGPU::V_SUBREV_F32_e32_gfx10:
3953 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3954 case AMDGPU::V_SUBREV_F32_e32_vi:
3955 case AMDGPU::V_SUBREV_F32_e64_gfx10:
3956 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3957 case AMDGPU::V_SUBREV_F32_e64_vi:
3959 case AMDGPU::V_SUBREV_CO_U32_e32:
3960 case AMDGPU::V_SUBREV_CO_U32_e64:
3961 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3962 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3964 case AMDGPU::V_SUBBREV_U32_e32:
3965 case AMDGPU::V_SUBBREV_U32_e64:
3966 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3967 case AMDGPU::V_SUBBREV_U32_e32_vi:
3968 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3969 case AMDGPU::V_SUBBREV_U32_e64_vi:
3971 case AMDGPU::V_SUBREV_U32_e32:
3972 case AMDGPU::V_SUBREV_U32_e64:
3973 case AMDGPU::V_SUBREV_U32_e32_gfx9:
3974 case AMDGPU::V_SUBREV_U32_e32_vi:
3975 case AMDGPU::V_SUBREV_U32_e64_gfx9:
3976 case AMDGPU::V_SUBREV_U32_e64_vi:
3978 case AMDGPU::V_SUBREV_F16_e32:
3979 case AMDGPU::V_SUBREV_F16_e64:
3980 case AMDGPU::V_SUBREV_F16_e32_gfx10:
3981 case AMDGPU::V_SUBREV_F16_e32_vi:
3982 case AMDGPU::V_SUBREV_F16_e64_gfx10:
3983 case AMDGPU::V_SUBREV_F16_e64_vi:
3985 case AMDGPU::V_SUBREV_U16_e32:
3986 case AMDGPU::V_SUBREV_U16_e64:
3987 case AMDGPU::V_SUBREV_U16_e32_vi:
3988 case AMDGPU::V_SUBREV_U16_e64_vi:
3990 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3991 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3992 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3994 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3995 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3997 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3998 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4000 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4001 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4003 case AMDGPU::V_LSHRREV_B32_e32:
4004 case AMDGPU::V_LSHRREV_B32_e64:
4005 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4006 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4007 case AMDGPU::V_LSHRREV_B32_e32_vi:
4008 case AMDGPU::V_LSHRREV_B32_e64_vi:
4009 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4010 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4012 case AMDGPU::V_ASHRREV_I32_e32:
4013 case AMDGPU::V_ASHRREV_I32_e64:
4014 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4015 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4016 case AMDGPU::V_ASHRREV_I32_e32_vi:
4017 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4018 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4019 case AMDGPU::V_ASHRREV_I32_e64_vi:
4021 case AMDGPU::V_LSHLREV_B32_e32:
4022 case AMDGPU::V_LSHLREV_B32_e64:
4023 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4024 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4025 case AMDGPU::V_LSHLREV_B32_e32_vi:
4026 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4027 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4028 case AMDGPU::V_LSHLREV_B32_e64_vi:
4030 case AMDGPU::V_LSHLREV_B16_e32:
4031 case AMDGPU::V_LSHLREV_B16_e64:
4032 case AMDGPU::V_LSHLREV_B16_e32_vi:
4033 case AMDGPU::V_LSHLREV_B16_e64_vi:
4034 case AMDGPU::V_LSHLREV_B16_gfx10:
4036 case AMDGPU::V_LSHRREV_B16_e32:
4037 case AMDGPU::V_LSHRREV_B16_e64:
4038 case AMDGPU::V_LSHRREV_B16_e32_vi:
4039 case AMDGPU::V_LSHRREV_B16_e64_vi:
4040 case AMDGPU::V_LSHRREV_B16_gfx10:
4042 case AMDGPU::V_ASHRREV_I16_e32:
4043 case AMDGPU::V_ASHRREV_I16_e64:
4044 case AMDGPU::V_ASHRREV_I16_e32_vi:
4045 case AMDGPU::V_ASHRREV_I16_e64_vi:
4046 case AMDGPU::V_ASHRREV_I16_gfx10:
4048 case AMDGPU::V_LSHLREV_B64_e64:
4049 case AMDGPU::V_LSHLREV_B64_gfx10:
4050 case AMDGPU::V_LSHLREV_B64_vi:
4052 case AMDGPU::V_LSHRREV_B64_e64:
4053 case AMDGPU::V_LSHRREV_B64_gfx10:
4054 case AMDGPU::V_LSHRREV_B64_vi:
4056 case AMDGPU::V_ASHRREV_I64_e64:
4057 case AMDGPU::V_ASHRREV_I64_gfx10:
4058 case AMDGPU::V_ASHRREV_I64_vi:
4060 case AMDGPU::V_PK_LSHLREV_B16:
4061 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4062 case AMDGPU::V_PK_LSHLREV_B16_vi:
4064 case AMDGPU::V_PK_LSHRREV_B16:
4065 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4066 case AMDGPU::V_PK_LSHRREV_B16_vi:
4067 case AMDGPU::V_PK_ASHRREV_I16:
4068 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4069 case AMDGPU::V_PK_ASHRREV_I16_vi:
4076std::optional<StringRef>
4077AMDGPUAsmParser::validateLdsDirect(
const MCInst &Inst) {
4079 using namespace SIInstrFlags;
4086 if ((
Desc.TSFlags & Enc) == 0)
4087 return std::nullopt;
4089 for (
auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4094 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4097 return StringRef(
"lds_direct is not supported on this GPU");
4100 return StringRef(
"lds_direct cannot be used with this instruction");
4102 if (SrcName != OpName::src0)
4103 return StringRef(
"lds_direct may be used as src0 only");
4107 return std::nullopt;
4111 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
4112 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4113 if (
Op.isFlatOffset())
4114 return Op.getStartLoc();
4119bool AMDGPUAsmParser::validateFlatOffset(
const MCInst &Inst,
4130 if (!hasFlatOffsets() &&
Op.getImm() != 0) {
4132 "flat offset modifier is not supported on this GPU");
4139 bool AllowNegative =
4141 if (!
isIntN(OffsetSize,
Op.getImm()) || (!AllowNegative &&
Op.getImm() < 0)) {
4143 Twine(
"expected a ") +
4144 (AllowNegative ?
Twine(OffsetSize) +
"-bit signed offset"
4145 :
Twine(OffsetSize - 1) +
"-bit unsigned offset"));
4154 for (
unsigned i = 2, e =
Operands.size(); i != e; ++i) {
4155 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4156 if (
Op.isSMEMOffset() ||
Op.isSMEMOffsetMod())
4157 return Op.getStartLoc();
4162bool AMDGPUAsmParser::validateSMEMOffset(
const MCInst &Inst,
4187 (
isVI() || IsBuffer) ?
"expected a 20-bit unsigned offset" :
4188 "expected a 21-bit signed offset");
4193bool AMDGPUAsmParser::validateSOPLiteral(
const MCInst &Inst)
const {
4202 const int OpIndices[] = { Src0Idx, Src1Idx };
4204 unsigned NumExprs = 0;
4205 unsigned NumLiterals = 0;
4208 for (
int OpIdx : OpIndices) {
4209 if (OpIdx == -1)
break;
4214 if (MO.
isImm() && !isInlineConstant(Inst, OpIdx)) {
4216 if (NumLiterals == 0 || LiteralValue !=
Value) {
4220 }
else if (MO.
isExpr()) {
4226 return NumLiterals + NumExprs <= 1;
4229bool AMDGPUAsmParser::validateOpSel(
const MCInst &Inst) {
4243 if (OpSelIdx != -1) {
4248 if (OpSelHiIdx != -1) {
4266bool AMDGPUAsmParser::validateDPP(
const MCInst &Inst,
4270 if (DppCtrlIdx >= 0) {
4277 Error(S,
"DP ALU dpp only supports row_newbcast");
4283 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
4292 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[Src1Idx]);
4293 Error(
Op.getStartLoc(),
"invalid operand for instruction");
4303bool AMDGPUAsmParser::validateVccOperand(
unsigned Reg)
const {
4304 auto FB = getFeatureBits();
4305 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4306 (FB[AMDGPU::FeatureWavefrontSize32] &&
Reg == AMDGPU::VCC_LO);
4310bool AMDGPUAsmParser::validateVOPLiteral(
const MCInst &Inst,
4321 unsigned NumExprs = 0;
4322 unsigned NumLiterals = 0;
4325 for (
int OpIdx : OpIndices) {
4335 if (MO.
isImm() && !isInlineConstant(Inst, OpIdx)) {
4341 if (!IsValid32Op && !isInt<32>(
Value) && !isUInt<32>(
Value)) {
4342 Error(getLitLoc(
Operands),
"invalid operand for instruction");
4346 if (IsFP64 && IsValid32Op)
4349 if (NumLiterals == 0 || LiteralValue !=
Value) {
4353 }
else if (MO.
isExpr()) {
4357 NumLiterals += NumExprs;
4362 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4363 Error(getLitLoc(
Operands),
"literal operands are not supported");
4367 if (NumLiterals > 1) {
4368 Error(getLitLoc(
Operands,
true),
"only one unique literal operand is allowed");
4386 unsigned Sub =
MRI->getSubReg(
Op.getReg(), AMDGPU::sub0);
4387 auto Reg = Sub ? Sub :
Op.getReg();
4389 return AGPR32.
contains(Reg) ? 1 : 0;
4392bool AMDGPUAsmParser::validateAGPRLdSt(
const MCInst &Inst)
const {
4400 : AMDGPU::OpName::vdata;
4408 if (Data2Areg >= 0 && Data2Areg != DataAreg)
4412 auto FB = getFeatureBits();
4413 if (FB[AMDGPU::FeatureGFX90AInsts]) {
4414 if (DataAreg < 0 || DstAreg < 0)
4416 return DstAreg == DataAreg;
4419 return DstAreg < 1 && DataAreg < 1;
4422bool AMDGPUAsmParser::validateVGPRAlign(
const MCInst &Inst)
const {
4423 auto FB = getFeatureBits();
4424 if (!FB[AMDGPU::FeatureGFX90AInsts])
4435 unsigned Sub =
MRI->getSubReg(
Op.getReg(), AMDGPU::sub0);
4439 if (VGPR32.
contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4441 if (AGPR32.
contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4449 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
4450 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4452 return Op.getStartLoc();
4457bool AMDGPUAsmParser::validateBLGP(
const MCInst &Inst,
4467 auto FB = getFeatureBits();
4468 bool UsesNeg =
false;
4469 if (FB[AMDGPU::FeatureGFX940Insts]) {
4471 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4472 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4473 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4474 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4479 if (IsNeg == UsesNeg)
4483 UsesNeg ?
"invalid modifier: blgp is not supported"
4484 :
"invalid modifier: neg is not supported");
4489bool AMDGPUAsmParser::validateWaitCnt(
const MCInst &Inst,
4495 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4496 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4497 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4498 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4504 if (Reg == AMDGPU::SGPR_NULL)
4508 Error(RegLoc,
"src0 must be null");
4512bool AMDGPUAsmParser::validateDS(
const MCInst &Inst,
4518 return validateGWS(Inst,
Operands);
4529 Error(S,
"gds modifier is not supported on this GPU");
4537bool AMDGPUAsmParser::validateGWS(
const MCInst &Inst,
4539 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4543 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4544 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4553 auto RegIdx =
Reg - (VGPR32.
contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4556 Error(RegLoc,
"vgpr must be even aligned");
4563bool AMDGPUAsmParser::validateCoherencyBits(
const MCInst &Inst,
4565 const SMLoc &IDLoc) {
4567 AMDGPU::OpName::cpol);
4577 Error(S,
"cache policy is not supported for SMRD instructions");
4581 Error(IDLoc,
"invalid cache policy for SMEM instruction");
4590 if (!(TSFlags & AllowSCCModifier)) {
4595 "scc modifier is not supported for this instruction on this GPU");
4606 :
"instruction must use glc");
4614 &CStr.data()[CStr.find(
isGFX940() ?
"sc0" :
"glc")]);
4616 :
"instruction must not use glc");
4628 if (!Operand->isReg())
4630 unsigned Reg = Operand->getReg();
4631 if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
4633 "execz and vccz are not supported on this GPU");
4640bool AMDGPUAsmParser::validateTFE(
const MCInst &Inst,
4643 if (
Desc.mayStore() &&
4647 Error(Loc,
"TFE modifier has no meaning for store instructions");
4655bool AMDGPUAsmParser::validateInstruction(
const MCInst &Inst,
4658 if (
auto ErrMsg = validateLdsDirect(Inst)) {
4662 if (!validateSOPLiteral(Inst)) {
4664 "only one unique literal operand is allowed");
4667 if (!validateVOPLiteral(Inst,
Operands)) {
4670 if (!validateConstantBusLimitations(Inst,
Operands)) {
4673 if (!validateVOPDRegBankConstraints(Inst,
Operands)) {
4676 if (!validateIntClampSupported(Inst)) {
4678 "integer clamping is not supported on this GPU");
4681 if (!validateOpSel(Inst)) {
4683 "invalid op_sel operand");
4686 if (!validateDPP(Inst,
Operands)) {
4690 if (!validateMIMGD16(Inst)) {
4692 "d16 modifier is not supported on this GPU");
4695 if (!validateMIMGMSAA(Inst)) {
4697 "invalid dim; must be MSAA type");
4700 if (!validateMIMGDataSize(Inst, IDLoc)) {
4703 if (!validateMIMGAddrSize(Inst, IDLoc))
4705 if (!validateMIMGAtomicDMask(Inst)) {
4707 "invalid atomic image dmask");
4710 if (!validateMIMGGatherDMask(Inst)) {
4712 "invalid image_gather dmask: only one bit must be set");
4715 if (!validateMovrels(Inst,
Operands)) {
4718 if (!validateFlatOffset(Inst,
Operands)) {
4721 if (!validateSMEMOffset(Inst,
Operands)) {
4724 if (!validateMAIAccWrite(Inst,
Operands)) {
4727 if (!validateMAISrc2(Inst,
Operands)) {
4730 if (!validateMFMA(Inst,
Operands)) {
4733 if (!validateCoherencyBits(Inst,
Operands, IDLoc)) {
4737 if (!validateAGPRLdSt(Inst)) {
4738 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4739 ?
"invalid register class: data and dst should be all VGPR or AGPR"
4740 :
"invalid register class: agpr loads and stores not supported on this GPU"
4744 if (!validateVGPRAlign(Inst)) {
4746 "invalid register class: vgpr tuples must be 64 bit aligned");
4753 if (!validateBLGP(Inst,
Operands)) {
4757 if (!validateDivScale(Inst)) {
4758 Error(IDLoc,
"ABS not allowed in VOP3B instructions");
4761 if (!validateWaitCnt(Inst,
Operands)) {
4764 if (!validateExeczVcczOperands(
Operands)) {
4767 if (!validateTFE(Inst,
Operands)) {
4776 unsigned VariantID = 0);
4780 unsigned VariantID);
4782bool AMDGPUAsmParser::isSupportedMnemo(
StringRef Mnemo,
4787bool AMDGPUAsmParser::isSupportedMnemo(
StringRef Mnemo,
4790 for (
auto Variant : Variants) {
4798bool AMDGPUAsmParser::checkUnsupportedInstruction(
StringRef Mnemo,
4799 const SMLoc &IDLoc) {
4800 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
4803 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4808 getParser().clearPendingErrors();
4812 StringRef VariantName = getMatchedVariantName();
4813 if (!VariantName.
empty() && isSupportedMnemo(Mnemo, FBS)) {
4816 " variant of this instruction is not supported"));
4820 if (
isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
4821 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
4824 FeaturesWS32.
flip(AMDGPU::FeatureWavefrontSize64)
4825 .
flip(AMDGPU::FeatureWavefrontSize32);
4827 ComputeAvailableFeatures(FeaturesWS32);
4829 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
4830 return Error(IDLoc,
"instruction requires wavesize=32");
4835 return Error(IDLoc,
"instruction not supported on this GPU");
4840 return Error(IDLoc,
"invalid instruction" + Suggestion);
4846 const auto &
Op = ((AMDGPUOperand &)*
Operands[InvalidOprIdx]);
4847 if (
Op.isToken() && InvalidOprIdx > 1) {
4848 const auto &PrevOp = ((AMDGPUOperand &)*
Operands[InvalidOprIdx - 1]);
4849 return PrevOp.isToken() && PrevOp.getToken() ==
"::";
4854bool AMDGPUAsmParser::MatchAndEmitInstruction(
SMLoc IDLoc,
unsigned &
Opcode,
4858 bool MatchingInlineAsm) {
4860 unsigned Result = Match_Success;
4861 for (
auto Variant : getMatchedVariants()) {
4863 auto R = MatchInstructionImpl(
Operands, Inst, EI, MatchingInlineAsm,
4868 if ((R == Match_Success) ||
4869 (R == Match_PreferE32) ||
4870 (R == Match_MissingFeature && Result != Match_PreferE32) ||
4871 (R == Match_InvalidOperand && Result != Match_MissingFeature
4872 && Result != Match_PreferE32) ||
4873 (R == Match_MnemonicFail && Result != Match_InvalidOperand
4874 && Result != Match_MissingFeature
4875 && Result != Match_PreferE32)) {
4879 if (R == Match_Success)
4883 if (Result == Match_Success) {
4884 if (!validateInstruction(Inst, IDLoc,
Operands)) {
4893 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4899 case Match_MissingFeature:
4903 return Error(IDLoc,
"operands are not valid for this GPU or mode");
4905 case Match_InvalidOperand: {
4906 SMLoc ErrorLoc = IDLoc;
4909 return Error(IDLoc,
"too few operands for instruction");
4912 if (ErrorLoc ==
SMLoc())
4916 return Error(ErrorLoc,
"invalid VOPDY instruction");
4918 return Error(ErrorLoc,
"invalid operand for instruction");
4921 case Match_PreferE32:
4922 return Error(IDLoc,
"internal error: instruction without _e64 suffix "
4923 "should be encoded as e32");
4924 case Match_MnemonicFail:
4930bool AMDGPUAsmParser::ParseAsAbsoluteExpression(
uint32_t &Ret) {
4935 if (getParser().parseAbsoluteExpression(Tmp)) {
4942bool AMDGPUAsmParser::ParseDirectiveMajorMinor(
uint32_t &Major,
4944 if (ParseAsAbsoluteExpression(Major))
4945 return TokError(
"invalid major version");
4948 return TokError(
"minor version number required, comma expected");
4950 if (ParseAsAbsoluteExpression(Minor))
4951 return TokError(
"invalid minor version");
4956bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4958 return TokError(
"directive only supported for amdgcn architecture");
4960 std::string TargetIDDirective;
4961 SMLoc TargetStart = getTok().getLoc();
4962 if (getParser().parseEscapedString(TargetIDDirective))
4966 if (getTargetStreamer().getTargetID()->
toString() != TargetIDDirective)
4967 return getParser().Error(TargetRange.
Start,
4968 (
Twine(
".amdgcn_target directive's target id ") +
4969 Twine(TargetIDDirective) +
4970 Twine(
" does not match the specified target id ") +
4971 Twine(getTargetStreamer().getTargetID()->
toString())).str());
4976bool AMDGPUAsmParser::OutOfRangeError(
SMRange Range) {
4977 return Error(
Range.Start,
"value out of range", Range);
4980bool AMDGPUAsmParser::calculateGPRBlocks(
4981 const FeatureBitset &Features,
bool VCCUsed,
bool FlatScrUsed,
4982 bool XNACKUsed, std::optional<bool> EnableWavefrontSize32,
4983 unsigned NextFreeVGPR,
SMRange VGPRRange,
unsigned NextFreeSGPR,
4984 SMRange SGPRRange,
unsigned &VGPRBlocks,
unsigned &SGPRBlocks) {
4995 unsigned MaxAddressableNumSGPRs =
4998 if (
Version.Major >= 8 && !Features.
test(FeatureSGPRInitBug) &&
4999 NumSGPRs > MaxAddressableNumSGPRs)
5000 return OutOfRangeError(SGPRRange);
5005 if ((
Version.Major <= 7 || Features.
test(FeatureSGPRInitBug)) &&
5006 NumSGPRs > MaxAddressableNumSGPRs)
5007 return OutOfRangeError(SGPRRange);
5009 if (Features.
test(FeatureSGPRInitBug))
5020bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5022 return TokError(
"directive only supported for amdgcn architecture");
5025 return TokError(
"directive only supported for amdhsa OS");
5028 if (getParser().parseIdentifier(KernelName))
5047 unsigned ImpliedUserSGPRCount = 0;
5051 std::optional<unsigned> ExplicitUserSGPRCount;
5052 bool ReserveVCC =
true;
5053 bool ReserveFlatScr =
true;
5054 std::optional<bool> EnableWavefrontSize32;
5060 SMRange IDRange = getTok().getLocRange();
5061 if (!parseId(
ID,
"expected .amdhsa_ directive or .end_amdhsa_kernel"))
5064 if (
ID ==
".end_amdhsa_kernel")
5068 return TokError(
".amdhsa_ directives cannot be repeated");
5070 SMLoc ValStart = getLoc();
5072 if (getParser().parseAbsoluteExpression(IVal))
5074 SMLoc ValEnd = getLoc();
5078 return OutOfRangeError(ValRange);
5082#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
5083 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
5084 return OutOfRangeError(RANGE); \
5085 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
5087 if (
ID ==
".amdhsa_group_segment_fixed_size") {
5089 return OutOfRangeError(ValRange);
5091 }
else if (
ID ==
".amdhsa_private_segment_fixed_size") {
5093 return OutOfRangeError(ValRange);
5095 }
else if (
ID ==
".amdhsa_kernarg_size") {
5097 return OutOfRangeError(ValRange);
5099 }
else if (
ID ==
".amdhsa_user_sgpr_count") {
5100 ExplicitUserSGPRCount = Val;
5101 }
else if (
ID ==
".amdhsa_user_sgpr_private_segment_buffer") {
5104 "directive is not supported with architected flat scratch",
5107 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5110 ImpliedUserSGPRCount += 4;
5111 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_preload_length") {
5113 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
5116 return OutOfRangeError(ValRange);
5120 ImpliedUserSGPRCount += Val;
5121 PreloadLength = Val;
5123 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_preload_offset") {
5125 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
5128 return OutOfRangeError(ValRange);
5132 PreloadOffset = Val;
5133 }
else if (
ID ==
".amdhsa_user_sgpr_dispatch_ptr") {
5135 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
5138 ImpliedUserSGPRCount += 2;
5139 }
else if (
ID ==
".amdhsa_user_sgpr_queue_ptr") {
5141 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
5144 ImpliedUserSGPRCount += 2;
5145 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_segment_ptr") {
5147 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
5150 ImpliedUserSGPRCount += 2;
5151 }
else if (
ID ==
".amdhsa_user_sgpr_dispatch_id") {
5153 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
5156 ImpliedUserSGPRCount += 2;
5157 }
else if (
ID ==
".amdhsa_user_sgpr_flat_scratch_init") {
5160 "directive is not supported with architected flat scratch",
5163 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
5166 ImpliedUserSGPRCount += 2;
5167 }
else if (
ID ==
".amdhsa_user_sgpr_private_segment_size") {
5169 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5172 ImpliedUserSGPRCount += 1;
5173 }
else if (
ID ==
".amdhsa_wavefront_size32") {
5174 if (IVersion.
Major < 10)
5175 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5176 EnableWavefrontSize32 = Val;
5178 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
5180 }
else if (
ID ==
".amdhsa_uses_dynamic_stack") {
5182 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange);
5183 }
else if (
ID ==
".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5186 "directive is not supported with architected flat scratch",
5189 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5190 }
else if (
ID ==
".amdhsa_enable_private_segment") {
5194 "directive is not supported without architected flat scratch",
5197 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5198 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_x") {
5200 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
5202 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_y") {
5204 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
5206 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_z") {
5208 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
5210 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_info") {
5212 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
5214 }
else if (
ID ==
".amdhsa_system_vgpr_workitem_id") {
5216 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
5218 }
else if (
ID ==
".amdhsa_next_free_vgpr") {
5219 VGPRRange = ValRange;
5221 }
else if (
ID ==
".amdhsa_next_free_sgpr") {
5222 SGPRRange = ValRange;
5224 }
else if (
ID ==
".amdhsa_accum_offset") {
5226 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
5228 }
else if (
ID ==
".amdhsa_reserve_vcc") {
5229 if (!isUInt<1>(Val))
5230 return OutOfRangeError(ValRange);
5232 }
else if (
ID ==
".amdhsa_reserve_flat_scratch") {
5233 if (IVersion.
Major < 7)
5234 return Error(IDRange.
Start,
"directive requires gfx7+", IDRange);
5237 "directive is not supported with architected flat scratch",
5239 if (!isUInt<1>(Val))
5240 return OutOfRangeError(ValRange);
5241 ReserveFlatScr = Val;
5242 }
else if (
ID ==
".amdhsa_reserve_xnack_mask") {
5243 if (IVersion.
Major < 8)
5244 return Error(IDRange.
Start,
"directive requires gfx8+", IDRange);
5245 if (!isUInt<1>(Val))
5246 return OutOfRangeError(ValRange);
5247 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())