51enum RegisterKind { IS_UNKNOWN,
IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
65 SMLoc StartLoc, EndLoc;
66 const AMDGPUAsmParser *AsmParser;
69 AMDGPUOperand(KindTy Kind_,
const AMDGPUAsmParser *AsmParser_)
70 :
Kind(Kind_), AsmParser(AsmParser_) {}
72 using Ptr = std::unique_ptr<AMDGPUOperand>;
79 bool hasFPModifiers()
const {
return Abs || Neg; }
80 bool hasIntModifiers()
const {
return Sext; }
81 bool hasModifiers()
const {
return hasFPModifiers() || hasIntModifiers(); }
83 int64_t getFPModifiersOperand()
const {
90 int64_t getIntModifiersOperand()
const {
96 int64_t getModifiersOperand()
const {
97 assert(!(hasFPModifiers() && hasIntModifiers())
98 &&
"fp and int modifiers should not be used simultaneously");
99 if (hasFPModifiers()) {
100 return getFPModifiersOperand();
101 }
else if (hasIntModifiers()) {
102 return getIntModifiersOperand();
181 ImmKindTyMandatoryLiteral,
195 mutable ImmKindTy
Kind;
212 bool isToken()
const override {
return Kind == Token; }
214 bool isSymbolRefExpr()
const {
215 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
218 bool isImm()
const override {
219 return Kind == Immediate;
222 void setImmKindNone()
const {
224 Imm.Kind = ImmKindTyNone;
227 void setImmKindLiteral()
const {
229 Imm.Kind = ImmKindTyLiteral;
232 void setImmKindMandatoryLiteral()
const {
234 Imm.Kind = ImmKindTyMandatoryLiteral;
237 void setImmKindConst()
const {
239 Imm.Kind = ImmKindTyConst;
242 bool IsImmKindLiteral()
const {
243 return isImm() &&
Imm.Kind == ImmKindTyLiteral;
246 bool IsImmKindMandatoryLiteral()
const {
247 return isImm() &&
Imm.Kind == ImmKindTyMandatoryLiteral;
250 bool isImmKindConst()
const {
251 return isImm() &&
Imm.Kind == ImmKindTyConst;
254 bool isInlinableImm(
MVT type)
const;
255 bool isLiteralImm(
MVT type)
const;
257 bool isRegKind()
const {
261 bool isReg()
const override {
262 return isRegKind() && !hasModifiers();
265 bool isRegOrInline(
unsigned RCID,
MVT type)
const {
266 return isRegClass(RCID) || isInlinableImm(type);
270 return isRegOrInline(RCID, type) || isLiteralImm(type);
273 bool isRegOrImmWithInt16InputMods()
const {
277 bool isRegOrImmWithInt32InputMods()
const {
281 bool isRegOrInlineImmWithInt16InputMods()
const {
282 return isRegOrInline(AMDGPU::VS_32RegClassID,
MVT::i16);
285 bool isRegOrInlineImmWithInt32InputMods()
const {
286 return isRegOrInline(AMDGPU::VS_32RegClassID,
MVT::i32);
289 bool isRegOrImmWithInt64InputMods()
const {
293 bool isRegOrImmWithFP16InputMods()
const {
297 bool isRegOrImmWithFP32InputMods()
const {
301 bool isRegOrImmWithFP64InputMods()
const {
305 bool isRegOrInlineImmWithFP16InputMods()
const {
306 return isRegOrInline(AMDGPU::VS_32RegClassID,
MVT::f16);
309 bool isRegOrInlineImmWithFP32InputMods()
const {
310 return isRegOrInline(AMDGPU::VS_32RegClassID,
MVT::f32);
314 bool isVReg()
const {
315 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
316 isRegClass(AMDGPU::VReg_64RegClassID) ||
317 isRegClass(AMDGPU::VReg_96RegClassID) ||
318 isRegClass(AMDGPU::VReg_128RegClassID) ||
319 isRegClass(AMDGPU::VReg_160RegClassID) ||
320 isRegClass(AMDGPU::VReg_192RegClassID) ||
321 isRegClass(AMDGPU::VReg_256RegClassID) ||
322 isRegClass(AMDGPU::VReg_512RegClassID) ||
323 isRegClass(AMDGPU::VReg_1024RegClassID);
326 bool isVReg32()
const {
327 return isRegClass(AMDGPU::VGPR_32RegClassID);
330 bool isVReg32OrOff()
const {
331 return isOff() || isVReg32();
334 bool isNull()
const {
335 return isRegKind() &&
getReg() == AMDGPU::SGPR_NULL;
338 bool isVRegWithInputMods()
const;
339 bool isT16VRegWithInputMods()
const;
341 bool isSDWAOperand(
MVT type)
const;
342 bool isSDWAFP16Operand()
const;
343 bool isSDWAFP32Operand()
const;
344 bool isSDWAInt16Operand()
const;
345 bool isSDWAInt32Operand()
const;
347 bool isImmTy(ImmTy ImmT)
const {
351 bool isImmLiteral()
const {
return isImmTy(ImmTyNone); }
353 bool isImmModifier()
const {
354 return isImm() &&
Imm.Type != ImmTyNone;
357 bool isClampSI()
const {
return isImmTy(ImmTyClampSI); }
358 bool isOModSI()
const {
return isImmTy(ImmTyOModSI); }
359 bool isDMask()
const {
return isImmTy(ImmTyDMask); }
360 bool isDim()
const {
return isImmTy(ImmTyDim); }
361 bool isUNorm()
const {
return isImmTy(ImmTyUNorm); }
362 bool isDA()
const {
return isImmTy(ImmTyDA); }
363 bool isR128A16()
const {
return isImmTy(ImmTyR128A16); }
364 bool isA16()
const {
return isImmTy(ImmTyA16); }
365 bool isLWE()
const {
return isImmTy(ImmTyLWE); }
366 bool isOff()
const {
return isImmTy(ImmTyOff); }
367 bool isExpTgt()
const {
return isImmTy(ImmTyExpTgt); }
368 bool isExpVM()
const {
return isImmTy(ImmTyExpVM); }
369 bool isExpCompr()
const {
return isImmTy(ImmTyExpCompr); }
370 bool isOffen()
const {
return isImmTy(ImmTyOffen); }
371 bool isIdxen()
const {
return isImmTy(ImmTyIdxen); }
372 bool isAddr64()
const {
return isImmTy(ImmTyAddr64); }
373 bool isOffset()
const {
return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
374 bool isOffset0()
const {
return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
375 bool isOffset1()
const {
return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
376 bool isSMEMOffsetMod()
const {
return isImmTy(ImmTySMEMOffsetMod); }
377 bool isFlatOffset()
const {
return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
378 bool isGDS()
const {
return isImmTy(ImmTyGDS); }
379 bool isLDS()
const {
return isImmTy(ImmTyLDS); }
380 bool isCPol()
const {
return isImmTy(ImmTyCPol); }
381 bool isSWZ()
const {
return isImmTy(ImmTySWZ); }
382 bool isTFE()
const {
return isImmTy(ImmTyTFE); }
383 bool isD16()
const {
return isImmTy(ImmTyD16); }
384 bool isFORMAT()
const {
return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
385 bool isBankMask()
const {
return isImmTy(ImmTyDppBankMask); }
386 bool isRowMask()
const {
return isImmTy(ImmTyDppRowMask); }
387 bool isDppBoundCtrl()
const {
return isImmTy(ImmTyDppBoundCtrl); }
388 bool isFI()
const {
return isImmTy(ImmTyDppFi); }
389 bool isSDWADstSel()
const {
return isImmTy(ImmTySdwaDstSel); }
390 bool isSDWASrc0Sel()
const {
return isImmTy(ImmTySdwaSrc0Sel); }
391 bool isSDWASrc1Sel()
const {
return isImmTy(ImmTySdwaSrc1Sel); }
392 bool isSDWADstUnused()
const {
return isImmTy(ImmTySdwaDstUnused); }
393 bool isInterpSlot()
const {
return isImmTy(ImmTyInterpSlot); }
394 bool isInterpAttr()
const {
return isImmTy(ImmTyInterpAttr); }
395 bool isAttrChan()
const {
return isImmTy(ImmTyAttrChan); }
396 bool isOpSel()
const {
return isImmTy(ImmTyOpSel); }
397 bool isOpSelHi()
const {
return isImmTy(ImmTyOpSelHi); }
398 bool isNegLo()
const {
return isImmTy(ImmTyNegLo); }
399 bool isNegHi()
const {
return isImmTy(ImmTyNegHi); }
400 bool isHigh()
const {
return isImmTy(ImmTyHigh); }
402 bool isRegOrImm()
const {
406 bool isRegClass(
unsigned RCID)
const;
410 bool isRegOrInlineNoMods(
unsigned RCID,
MVT type)
const {
411 return isRegOrInline(RCID, type) && !hasModifiers();
414 bool isSCSrcB16()
const {
415 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID,
MVT::i16);
418 bool isSCSrcV2B16()
const {
422 bool isSCSrcB32()
const {
423 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID,
MVT::i32);
426 bool isSCSrcB64()
const {
427 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID,
MVT::i64);
430 bool isBoolReg()
const;
432 bool isSCSrcF16()
const {
433 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID,
MVT::f16);
436 bool isSCSrcV2F16()
const {
440 bool isSCSrcF32()
const {
441 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID,
MVT::f32);
444 bool isSCSrcF64()
const {
445 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID,
MVT::f64);
448 bool isSSrcB32()
const {
449 return isSCSrcB32() || isLiteralImm(
MVT::i32) || isExpr();
452 bool isSSrcB16()
const {
453 return isSCSrcB16() || isLiteralImm(
MVT::i16);
456 bool isSSrcV2B16()
const {
461 bool isSSrcB64()
const {
464 return isSCSrcB64() || isLiteralImm(
MVT::i64);
467 bool isSSrcF32()
const {
468 return isSCSrcB32() || isLiteralImm(
MVT::f32) || isExpr();
471 bool isSSrcF64()
const {
472 return isSCSrcB64() || isLiteralImm(
MVT::f64);
475 bool isSSrcF16()
const {
476 return isSCSrcB16() || isLiteralImm(
MVT::f16);
479 bool isSSrcV2F16()
const {
484 bool isSSrcV2FP32()
const {
489 bool isSCSrcV2FP32()
const {
494 bool isSSrcV2INT32()
const {
499 bool isSCSrcV2INT32()
const {
504 bool isSSrcOrLdsB32()
const {
505 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID,
MVT::i32) ||
509 bool isVCSrcB32()
const {
510 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID,
MVT::i32);
513 bool isVCSrcB64()
const {
514 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID,
MVT::i64);
517 bool isVCSrcTB16_Lo128()
const {
518 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID,
MVT::i16);
521 bool isVCSrcB16()
const {
522 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID,
MVT::i16);
525 bool isVCSrcV2B16()
const {
529 bool isVCSrcF32()
const {
530 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID,
MVT::f32);
533 bool isVCSrcF64()
const {
534 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID,
MVT::f64);
537 bool isVCSrcTF16_Lo128()
const {
538 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID,
MVT::f16);
541 bool isVCSrcF16()
const {
542 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID,
MVT::f16);
545 bool isVCSrcV2F16()
const {
549 bool isVSrcB32()
const {
550 return isVCSrcF32() || isLiteralImm(
MVT::i32) || isExpr();
553 bool isVSrcB64()
const {
554 return isVCSrcF64() || isLiteralImm(
MVT::i64);
557 bool isVSrcTB16_Lo128()
const {
558 return isVCSrcTB16_Lo128() || isLiteralImm(
MVT::i16);
561 bool isVSrcB16()
const {
562 return isVCSrcB16() || isLiteralImm(
MVT::i16);
565 bool isVSrcV2B16()
const {
566 return isVSrcB16() || isLiteralImm(
MVT::v2i16);
569 bool isVCSrcV2FP32()
const {
573 bool isVSrcV2FP32()
const {
574 return isVSrcF64() || isLiteralImm(
MVT::v2f32);
577 bool isVCSrcV2INT32()
const {
581 bool isVSrcV2INT32()
const {
582 return isVSrcB64() || isLiteralImm(
MVT::v2i32);
585 bool isVSrcF32()
const {
586 return isVCSrcF32() || isLiteralImm(
MVT::f32) || isExpr();
589 bool isVSrcF64()
const {
590 return isVCSrcF64() || isLiteralImm(
MVT::f64);
593 bool isVSrcTF16_Lo128()
const {
594 return isVCSrcTF16_Lo128() || isLiteralImm(
MVT::f16);
597 bool isVSrcF16()
const {
598 return isVCSrcF16() || isLiteralImm(
MVT::f16);
601 bool isVSrcV2F16()
const {
602 return isVSrcF16() || isLiteralImm(
MVT::v2f16);
605 bool isVISrcB32()
const {
606 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID,
MVT::i32);
609 bool isVISrcB16()
const {
610 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID,
MVT::i16);
613 bool isVISrcV2B16()
const {
617 bool isVISrcF32()
const {
618 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID,
MVT::f32);
621 bool isVISrcF16()
const {
622 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID,
MVT::f16);
625 bool isVISrcV2F16()
const {
626 return isVISrcF16() || isVISrcB32();
629 bool isVISrc_64B64()
const {
630 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID,
MVT::i64);
633 bool isVISrc_64F64()
const {
634 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID,
MVT::f64);
637 bool isVISrc_64V2FP32()
const {
638 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID,
MVT::f32);
641 bool isVISrc_64V2INT32()
const {
642 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID,
MVT::i32);
645 bool isVISrc_256B64()
const {
646 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID,
MVT::i64);
649 bool isVISrc_256F64()
const {
650 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID,
MVT::f64);
653 bool isVISrc_128B16()
const {
654 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID,
MVT::i16);
657 bool isVISrc_128V2B16()
const {
658 return isVISrc_128B16();
661 bool isVISrc_128B32()
const {
662 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID,
MVT::i32);
665 bool isVISrc_128F32()
const {
666 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID,
MVT::f32);
669 bool isVISrc_256V2FP32()
const {
670 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID,
MVT::f32);
673 bool isVISrc_256V2INT32()
const {
674 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID,
MVT::i32);
677 bool isVISrc_512B32()
const {
678 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID,
MVT::i32);
681 bool isVISrc_512B16()
const {
682 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID,
MVT::i16);
685 bool isVISrc_512V2B16()
const {
686 return isVISrc_512B16();
689 bool isVISrc_512F32()
const {
690 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID,
MVT::f32);
693 bool isVISrc_512F16()
const {
694 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID,
MVT::f16);
697 bool isVISrc_512V2F16()
const {
698 return isVISrc_512F16() || isVISrc_512B32();
701 bool isVISrc_1024B32()
const {
702 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID,
MVT::i32);
705 bool isVISrc_1024B16()
const {
706 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID,
MVT::i16);
709 bool isVISrc_1024V2B16()
const {
710 return isVISrc_1024B16();
713 bool isVISrc_1024F32()
const {
714 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID,
MVT::f32);
717 bool isVISrc_1024F16()
const {
718 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID,
MVT::f16);
721 bool isVISrc_1024V2F16()
const {
722 return isVISrc_1024F16() || isVISrc_1024B32();
725 bool isAISrcB32()
const {
726 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID,
MVT::i32);
729 bool isAISrcB16()
const {
730 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID,
MVT::i16);
733 bool isAISrcV2B16()
const {
737 bool isAISrcF32()
const {
738 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID,
MVT::f32);
741 bool isAISrcF16()
const {
742 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID,
MVT::f16);
745 bool isAISrcV2F16()
const {
746 return isAISrcF16() || isAISrcB32();
749 bool isAISrc_64B64()
const {
750 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID,
MVT::i64);
753 bool isAISrc_64F64()
const {
754 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID,
MVT::f64);
757 bool isAISrc_128B32()
const {
758 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID,
MVT::i32);
761 bool isAISrc_128B16()
const {
762 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID,
MVT::i16);
765 bool isAISrc_128V2B16()
const {
766 return isAISrc_128B16();
769 bool isAISrc_128F32()
const {
770 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID,
MVT::f32);
773 bool isAISrc_128F16()
const {
774 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID,
MVT::f16);
777 bool isAISrc_128V2F16()
const {
778 return isAISrc_128F16() || isAISrc_128B32();
781 bool isVISrc_128F16()
const {
782 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID,
MVT::f16);
785 bool isVISrc_128V2F16()
const {
786 return isVISrc_128F16() || isVISrc_128B32();
789 bool isAISrc_256B64()
const {
790 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID,
MVT::i64);
793 bool isAISrc_256F64()
const {
794 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID,
MVT::f64);
797 bool isAISrc_512B32()
const {
798 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID,
MVT::i32);
801 bool isAISrc_512B16()
const {
802 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID,
MVT::i16);
805 bool isAISrc_512V2B16()
const {
806 return isAISrc_512B16();
809 bool isAISrc_512F32()
const {
810 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID,
MVT::f32);
813 bool isAISrc_512F16()
const {
814 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID,
MVT::f16);
817 bool isAISrc_512V2F16()
const {
818 return isAISrc_512F16() || isAISrc_512B32();
821 bool isAISrc_1024B32()
const {
822 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID,
MVT::i32);
825 bool isAISrc_1024B16()
const {
826 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID,
MVT::i16);
829 bool isAISrc_1024V2B16()
const {
830 return isAISrc_1024B16();
833 bool isAISrc_1024F32()
const {
834 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID,
MVT::f32);
837 bool isAISrc_1024F16()
const {
838 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID,
MVT::f16);
841 bool isAISrc_1024V2F16()
const {
842 return isAISrc_1024F16() || isAISrc_1024B32();
845 bool isKImmFP32()
const {
849 bool isKImmFP16()
const {
853 bool isMem()
const override {
857 bool isExpr()
const {
861 bool isSoppBrTarget()
const {
862 return isExpr() ||
isImm();
865 bool isSWaitCnt()
const;
866 bool isDepCtr()
const;
867 bool isSDelayAlu()
const;
868 bool isHwreg()
const;
869 bool isSendMsg()
const;
870 bool isSwizzle()
const;
871 bool isSMRDOffset8()
const;
872 bool isSMEMOffset()
const;
873 bool isSMRDLiteralOffset()
const;
875 bool isDPPCtrl()
const;
879 bool isGPRIdxMode()
const;
880 bool isS16Imm()
const;
881 bool isU16Imm()
const;
882 bool isEndpgm()
const;
883 bool isWaitVDST()
const;
884 bool isWaitEXP()
const;
891 int64_t getImm()
const {
896 void setImm(int64_t Val) {
901 ImmTy getImmTy()
const {
906 unsigned getReg()
const override {
920 return SMRange(StartLoc, EndLoc);
923 Modifiers getModifiers()
const {
924 assert(isRegKind() || isImmTy(ImmTyNone));
925 return isRegKind() ?
Reg.Mods :
Imm.Mods;
928 void setModifiers(Modifiers Mods) {
929 assert(isRegKind() || isImmTy(ImmTyNone));
936 bool hasModifiers()
const {
937 return getModifiers().hasModifiers();
940 bool hasFPModifiers()
const {
941 return getModifiers().hasFPModifiers();
944 bool hasIntModifiers()
const {
945 return getModifiers().hasIntModifiers();
950 void addImmOperands(
MCInst &Inst,
unsigned N,
bool ApplyModifiers =
true)
const;
952 void addLiteralImmOperand(
MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const;
954 template <
unsigned Bitw
idth>
955 void addKImmFPOperands(
MCInst &Inst,
unsigned N)
const;
957 void addKImmFP16Operands(
MCInst &Inst,
unsigned N)
const {
958 addKImmFPOperands<16>(Inst,
N);
961 void addKImmFP32Operands(
MCInst &Inst,
unsigned N)
const {
962 addKImmFPOperands<32>(Inst,
N);
965 void addRegOperands(
MCInst &Inst,
unsigned N)
const;
967 void addBoolRegOperands(
MCInst &Inst,
unsigned N)
const {
968 addRegOperands(Inst,
N);
971 void addRegOrImmOperands(
MCInst &Inst,
unsigned N)
const {
973 addRegOperands(Inst,
N);
977 addImmOperands(Inst,
N);
980 void addRegOrImmWithInputModsOperands(
MCInst &Inst,
unsigned N)
const {
981 Modifiers Mods = getModifiers();
984 addRegOperands(Inst,
N);
986 addImmOperands(Inst,
N,
false);
990 void addRegOrImmWithFPInputModsOperands(
MCInst &Inst,
unsigned N)
const {
991 assert(!hasIntModifiers());
992 addRegOrImmWithInputModsOperands(Inst,
N);
995 void addRegOrImmWithIntInputModsOperands(
MCInst &Inst,
unsigned N)
const {
996 assert(!hasFPModifiers());
997 addRegOrImmWithInputModsOperands(Inst,
N);
1000 void addRegWithInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1001 Modifiers Mods = getModifiers();
1004 addRegOperands(Inst,
N);
1007 void addRegWithFPInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1008 assert(!hasIntModifiers());
1009 addRegWithInputModsOperands(Inst,
N);
1012 void addRegWithIntInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1013 assert(!hasFPModifiers());
1014 addRegWithInputModsOperands(Inst,
N);
1017 void addSoppBrTargetOperands(
MCInst &Inst,
unsigned N)
const {
1019 addImmOperands(Inst,
N);
1028 case ImmTyNone:
OS <<
"None";
break;
1029 case ImmTyGDS:
OS <<
"GDS";
break;
1030 case ImmTyLDS:
OS <<
"LDS";
break;
1031 case ImmTyOffen:
OS <<
"Offen";
break;
1032 case ImmTyIdxen:
OS <<
"Idxen";
break;
1033 case ImmTyAddr64:
OS <<
"Addr64";
break;
1034 case ImmTyOffset:
OS <<
"Offset";
break;
1035 case ImmTyInstOffset:
OS <<
"InstOffset";
break;
1036 case ImmTyOffset0:
OS <<
"Offset0";
break;
1037 case ImmTyOffset1:
OS <<
"Offset1";
break;
1038 case ImmTySMEMOffsetMod:
OS <<
"SMEMOffsetMod";
break;
1039 case ImmTyCPol:
OS <<
"CPol";
break;
1040 case ImmTySWZ:
OS <<
"SWZ";
break;
1041 case ImmTyTFE:
OS <<
"TFE";
break;
1042 case ImmTyD16:
OS <<
"D16";
break;
1043 case ImmTyFORMAT:
OS <<
"FORMAT";
break;
1044 case ImmTyClampSI:
OS <<
"ClampSI";
break;
1045 case ImmTyOModSI:
OS <<
"OModSI";
break;
1046 case ImmTyDPP8:
OS <<
"DPP8";
break;
1047 case ImmTyDppCtrl:
OS <<
"DppCtrl";
break;
1048 case ImmTyDppRowMask:
OS <<
"DppRowMask";
break;
1049 case ImmTyDppBankMask:
OS <<
"DppBankMask";
break;
1050 case ImmTyDppBoundCtrl:
OS <<
"DppBoundCtrl";
break;
1051 case ImmTyDppFi:
OS <<
"FI";
break;
1052 case ImmTySdwaDstSel:
OS <<
"SdwaDstSel";
break;
1053 case ImmTySdwaSrc0Sel:
OS <<
"SdwaSrc0Sel";
break;
1054 case ImmTySdwaSrc1Sel:
OS <<
"SdwaSrc1Sel";
break;
1055 case ImmTySdwaDstUnused:
OS <<
"SdwaDstUnused";
break;
1056 case ImmTyDMask:
OS <<
"DMask";
break;
1057 case ImmTyDim:
OS <<
"Dim";
break;
1058 case ImmTyUNorm:
OS <<
"UNorm";
break;
1059 case ImmTyDA:
OS <<
"DA";
break;
1060 case ImmTyR128A16:
OS <<
"R128A16";
break;
1061 case ImmTyA16:
OS <<
"A16";
break;
1062 case ImmTyLWE:
OS <<
"LWE";
break;
1063 case ImmTyOff:
OS <<
"Off";
break;
1064 case ImmTyExpTgt:
OS <<
"ExpTgt";
break;
1065 case ImmTyExpCompr:
OS <<
"ExpCompr";
break;
1066 case ImmTyExpVM:
OS <<
"ExpVM";
break;
1067 case ImmTyHwreg:
OS <<
"Hwreg";
break;
1068 case ImmTySendMsg:
OS <<
"SendMsg";
break;
1069 case ImmTyInterpSlot:
OS <<
"InterpSlot";
break;
1070 case ImmTyInterpAttr:
OS <<
"InterpAttr";
break;
1071 case ImmTyAttrChan:
OS <<
"AttrChan";
break;
1072 case ImmTyOpSel:
OS <<
"OpSel";
break;
1073 case ImmTyOpSelHi:
OS <<
"OpSelHi";
break;
1074 case ImmTyNegLo:
OS <<
"NegLo";
break;
1075 case ImmTyNegHi:
OS <<
"NegHi";
break;
1076 case ImmTySwizzle:
OS <<
"Swizzle";
break;
1077 case ImmTyGprIdxMode:
OS <<
"GprIdxMode";
break;
1078 case ImmTyHigh:
OS <<
"High";
break;
1079 case ImmTyBLGP:
OS <<
"BLGP";
break;
1080 case ImmTyCBSZ:
OS <<
"CBSZ";
break;
1081 case ImmTyABID:
OS <<
"ABID";
break;
1082 case ImmTyEndpgm:
OS <<
"Endpgm";
break;
1083 case ImmTyWaitVDST:
OS <<
"WaitVDST";
break;
1084 case ImmTyWaitEXP:
OS <<
"WaitEXP";
break;
1091 OS <<
"<register " <<
getReg() <<
" mods: " <<
Reg.Mods <<
'>';
1094 OS <<
'<' << getImm();
1095 if (getImmTy() != ImmTyNone) {
1096 OS <<
" type: "; printImmTy(
OS, getImmTy());
1098 OS <<
" mods: " <<
Imm.Mods <<
'>';
1101 OS <<
'\'' << getToken() <<
'\'';
1104 OS <<
"<expr " << *Expr <<
'>';
1109 static AMDGPUOperand::Ptr CreateImm(
const AMDGPUAsmParser *AsmParser,
1110 int64_t Val,
SMLoc Loc,
1111 ImmTy
Type = ImmTyNone,
1112 bool IsFPImm =
false) {
1113 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1115 Op->Imm.IsFPImm = IsFPImm;
1116 Op->Imm.Kind = ImmKindTyNone;
1118 Op->Imm.Mods = Modifiers();
1124 static AMDGPUOperand::Ptr CreateToken(
const AMDGPUAsmParser *AsmParser,
1126 bool HasExplicitEncodingSize =
true) {
1127 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1128 Res->Tok.Data = Str.data();
1129 Res->Tok.Length = Str.size();
1130 Res->StartLoc = Loc;
1135 static AMDGPUOperand::Ptr CreateReg(
const AMDGPUAsmParser *AsmParser,
1136 unsigned RegNo,
SMLoc S,
1138 auto Op = std::make_unique<AMDGPUOperand>(
Register, AsmParser);
1139 Op->Reg.RegNo = RegNo;
1140 Op->Reg.Mods = Modifiers();
1146 static AMDGPUOperand::Ptr CreateExpr(
const AMDGPUAsmParser *AsmParser,
1148 auto Op = std::make_unique<AMDGPUOperand>(
Expression, AsmParser);
1157 OS <<
"abs:" << Mods.Abs <<
" neg: " << Mods.Neg <<
" sext:" << Mods.Sext;
1168class KernelScopeInfo {
1169 int SgprIndexUnusedMin = -1;
1170 int VgprIndexUnusedMin = -1;
1171 int AgprIndexUnusedMin = -1;
1175 void usesSgprAt(
int i) {
1176 if (i >= SgprIndexUnusedMin) {
1177 SgprIndexUnusedMin = ++i;
1186 void usesVgprAt(
int i) {
1187 if (i >= VgprIndexUnusedMin) {
1188 VgprIndexUnusedMin = ++i;
1193 VgprIndexUnusedMin);
1199 void usesAgprAt(
int i) {
1204 if (i >= AgprIndexUnusedMin) {
1205 AgprIndexUnusedMin = ++i;
1215 VgprIndexUnusedMin);
1222 KernelScopeInfo() =
default;
1228 usesSgprAt(SgprIndexUnusedMin = -1);
1229 usesVgprAt(VgprIndexUnusedMin = -1);
1231 usesAgprAt(AgprIndexUnusedMin = -1);
1235 void usesRegister(RegisterKind RegKind,
unsigned DwordRegIndex,
1236 unsigned RegWidth) {
1239 usesSgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1242 usesAgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1245 usesVgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1256 unsigned ForcedEncodingSize = 0;
1257 bool ForcedDPP =
false;
1258 bool ForcedSDWA =
false;
1259 KernelScopeInfo KernelScope;
1264#define GET_ASSEMBLER_HEADER
1265#include "AMDGPUGenAsmMatcher.inc"
1270 bool ParseAsAbsoluteExpression(
uint32_t &Ret);
1271 bool OutOfRangeError(
SMRange Range);
1287 bool calculateGPRBlocks(
const FeatureBitset &Features,
bool VCCUsed,
1288 bool FlatScrUsed,
bool XNACKUsed,
1289 std::optional<bool> EnableWavefrontSize32,
1290 unsigned NextFreeVGPR,
SMRange VGPRRange,
1291 unsigned NextFreeSGPR,
SMRange SGPRRange,
1292 unsigned &VGPRBlocks,
unsigned &SGPRBlocks);
1293 bool ParseDirectiveAMDGCNTarget();
1294 bool ParseDirectiveAMDHSAKernel();
1296 bool ParseDirectiveHSACodeObjectVersion();
1297 bool ParseDirectiveHSACodeObjectISA();
1299 bool ParseDirectiveAMDKernelCodeT();
1302 bool ParseDirectiveAMDGPUHsaKernel();
1304 bool ParseDirectiveISAVersion();
1305 bool ParseDirectiveHSAMetadata();
1306 bool ParseDirectivePALMetadataBegin();
1307 bool ParseDirectivePALMetadata();
1308 bool ParseDirectiveAMDGPULDS();
1312 bool ParseToEndDirective(
const char *AssemblerDirectiveBegin,
1313 const char *AssemblerDirectiveEnd,
1314 std::string &CollectString);
1316 bool AddNextRegisterToList(
unsigned& Reg,
unsigned& RegWidth,
1317 RegisterKind RegKind,
unsigned Reg1,
SMLoc Loc);
1318 bool ParseAMDGPURegister(RegisterKind &RegKind,
unsigned &Reg,
1319 unsigned &RegNum,
unsigned &RegWidth,
1320 bool RestoreOnFailure =
false);
1321 bool ParseAMDGPURegister(RegisterKind &RegKind,
unsigned &Reg,
1322 unsigned &RegNum,
unsigned &RegWidth,
1324 unsigned ParseRegularReg(RegisterKind &RegKind,
unsigned &RegNum,
1327 unsigned ParseSpecialReg(RegisterKind &RegKind,
unsigned &RegNum,
1330 unsigned ParseRegList(RegisterKind &RegKind,
unsigned &RegNum,
1332 bool ParseRegRange(
unsigned& Num,
unsigned& Width);
1333 unsigned getRegularReg(RegisterKind RegKind,
1340 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1341 void initializeGprCountSymbol(RegisterKind RegKind);
1342 bool updateGprCountSymbols(RegisterKind RegKind,
unsigned DwordRegIndex,
1347 bool IsGdsHardcoded);
1350 enum AMDGPUMatchResultTy {
1351 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1354 OperandMode_Default,
1358 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1366 if (getFeatureBits().
none()) {
1398 initializeGprCountSymbol(IS_VGPR);
1399 initializeGprCountSymbol(IS_SGPR);
1466 bool hasInv2PiInlineImm()
const {
1467 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1470 bool hasFlatOffsets()
const {
1471 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1475 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1478 bool hasSGPR102_SGPR103()
const {
1482 bool hasSGPR104_SGPR105()
const {
return isGFX10Plus(); }
1484 bool hasIntClamp()
const {
1485 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1488 bool hasPartialNSAEncoding()
const {
1489 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1515 void setForcedEncodingSize(
unsigned Size) { ForcedEncodingSize =
Size; }
1516 void setForcedDPP(
bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1517 void setForcedSDWA(
bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1519 unsigned getForcedEncodingSize()
const {
return ForcedEncodingSize; }
1520 bool isForcedVOP3()
const {
return ForcedEncodingSize == 64; }
1521 bool isForcedDPP()
const {
return ForcedDPP; }
1522 bool isForcedSDWA()
const {
return ForcedSDWA; }
1524 StringRef getMatchedVariantName()
const;
1526 std::unique_ptr<AMDGPUOperand>
parseRegister(
bool RestoreOnFailure =
false);
1528 bool RestoreOnFailure);
1530 SMLoc &EndLoc)
override;
1532 SMLoc &EndLoc)
override;
1535 unsigned Kind)
override;
1539 bool MatchingInlineAsm)
override;
1542 OperandMode Mode = OperandMode_Default);
1554 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1555 bool (*ConvertResult)(int64_t &) =
nullptr);
1558 parseOperandArrayWithPrefix(
const char *Prefix,
1560 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1561 bool (*ConvertResult)(int64_t&) =
nullptr);
1565 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1573 bool isOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1574 bool isRegOrOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1575 bool isNamedOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1576 bool isOpcodeModifierWithVal(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1577 bool parseSP3NegModifier();
1595 bool tryParseFmt(
const char *Pref, int64_t MaxVal, int64_t &Val);
1596 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt,
StringRef FormatStr,
SMLoc Loc);
1603 bool parseCnt(int64_t &IntVal);
1606 bool parseDepCtr(int64_t &IntVal,
unsigned &Mask);
1610 bool parseDelay(int64_t &Delay);
1616 struct OperandInfoTy {
1619 bool IsSymbolic =
false;
1620 bool IsDefined =
false;
1622 OperandInfoTy(int64_t Id_) :
Id(Id_) {}
1625 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1626 bool validateSendMsg(
const OperandInfoTy &Msg,
1627 const OperandInfoTy &Op,
1628 const OperandInfoTy &Stream);
1630 bool parseHwregBody(OperandInfoTy &HwReg,
1632 OperandInfoTy &Width);
1633 bool validateHwreg(
const OperandInfoTy &HwReg,
1634 const OperandInfoTy &
Offset,
1635 const OperandInfoTy &Width);
1641 SMLoc getOperandLoc(std::function<
bool(
const AMDGPUOperand&)>
Test,
1646 bool SearchMandatoryLiterals =
false)
const;
1654 bool validateSOPLiteral(
const MCInst &Inst)
const;
1656 bool validateVOPDRegBankConstraints(
const MCInst &Inst,
1658 bool validateIntClampSupported(
const MCInst &Inst);
1659 bool validateMIMGAtomicDMask(
const MCInst &Inst);
1660 bool validateMIMGGatherDMask(
const MCInst &Inst);
1662 bool validateMIMGDataSize(
const MCInst &Inst,
const SMLoc &IDLoc);
1663 bool validateMIMGAddrSize(
const MCInst &Inst);
1664 bool validateMIMGD16(
const MCInst &Inst);
1665 bool validateMIMGMSAA(
const MCInst &Inst);
1666 bool validateOpSel(
const MCInst &Inst);
1668 bool validateVccOperand(
unsigned Reg)
const;
1673 bool validateAGPRLdSt(
const MCInst &Inst)
const;
1674 bool validateVGPRAlign(
const MCInst &Inst)
const;
1677 bool validateDivScale(
const MCInst &Inst);
1680 const SMLoc &IDLoc);
1683 std::optional<StringRef> validateLdsDirect(
const MCInst &Inst);
1684 unsigned getConstantBusLimit(
unsigned Opcode)
const;
1685 bool usesConstantBus(
const MCInst &Inst,
unsigned OpIdx);
1686 bool isInlineConstant(
const MCInst &Inst,
unsigned OpIdx)
const;
1687 unsigned findImplicitSGPRReadInVOP(
const MCInst &Inst)
const;
1713 AsmToken peekToken(
bool ShouldSkipSpace =
true);
1715 SMLoc getLoc()
const;
1731 bool parseSwizzleOperand(int64_t &Op,
1732 const unsigned MinVal,
1733 const unsigned MaxVal,
1736 bool parseSwizzleOperands(
const unsigned OpNum, int64_t* Op,
1737 const unsigned MinVal,
1738 const unsigned MaxVal,
1741 bool parseSwizzleOffset(int64_t &Imm);
1742 bool parseSwizzleMacro(int64_t &Imm);
1743 bool parseSwizzleQuadPerm(int64_t &Imm);
1744 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1745 bool parseSwizzleBroadcast(int64_t &Imm);
1746 bool parseSwizzleSwap(int64_t &Imm);
1747 bool parseSwizzleReverse(int64_t &Imm);
1750 int64_t parseGPRIdxMacro();
1756 AMDGPUOperand::Ptr defaultCPol()
const;
1758 AMDGPUOperand::Ptr defaultSMRDOffset8()
const;
1759 AMDGPUOperand::Ptr defaultSMEMOffset()
const;
1760 AMDGPUOperand::Ptr defaultSMEMOffsetMod()
const;
1761 AMDGPUOperand::Ptr defaultSMRDLiteralOffset()
const;
1762 AMDGPUOperand::Ptr defaultFlatOffset()
const;
1767 OptionalImmIndexMap &OptionalIdx);
1773 OptionalImmIndexMap &OptionalIdx);
1775 OptionalImmIndexMap &OptionalIdx);
1781 bool IsAtomic =
false);
1787 bool parseDimId(
unsigned &Encoding);
1792 int64_t parseDPPCtrlSel(
StringRef Ctrl);
1793 int64_t parseDPPCtrlPerm();
1794 AMDGPUOperand::Ptr defaultRowMask()
const;
1795 AMDGPUOperand::Ptr defaultBankMask()
const;
1796 AMDGPUOperand::Ptr defaultDppBoundCtrl()
const;
1797 AMDGPUOperand::Ptr defaultFI()
const;
1803 bool IsDPP8 =
false);
1809 AMDGPUOperand::ImmTy
Type);
1818 bool SkipDstVcc =
false,
1819 bool SkipSrcVcc =
false);
1821 AMDGPUOperand::Ptr defaultBLGP()
const;
1822 AMDGPUOperand::Ptr defaultCBSZ()
const;
1823 AMDGPUOperand::Ptr defaultABID()
const;
1826 AMDGPUOperand::Ptr defaultEndpgmImmOperands()
const;
1828 AMDGPUOperand::Ptr defaultWaitVDST()
const;
1829 AMDGPUOperand::Ptr defaultWaitEXP()
const;
1839 return &APFloat::IEEEsingle();
1841 return &APFloat::IEEEdouble();
1843 return &APFloat::IEEEhalf();
1855 case AMDGPU::OPERAND_REG_IMM_INT32:
1856 case AMDGPU::OPERAND_REG_IMM_FP32:
1857 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1858 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1859 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1860 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1861 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1862 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1863 case AMDGPU::OPERAND_REG_IMM_V2FP32:
1864 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1865 case AMDGPU::OPERAND_REG_IMM_V2INT32:
1866 case AMDGPU::OPERAND_KIMM32:
1867 return &APFloat::IEEEsingle();
1868 case AMDGPU::OPERAND_REG_IMM_INT64:
1869 case AMDGPU::OPERAND_REG_IMM_FP64:
1870 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1871 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1872 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1873 return &APFloat::IEEEdouble();
1874 case AMDGPU::OPERAND_REG_IMM_INT16:
1875 case AMDGPU::OPERAND_REG_IMM_FP16:
1876 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1877 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1878 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1879 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1880 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1881 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1882 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1883 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1884 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1885 case AMDGPU::OPERAND_REG_IMM_V2INT16:
1886 case AMDGPU::OPERAND_REG_IMM_V2FP16:
1887 case AMDGPU::OPERAND_KIMM16:
1888 return &APFloat::IEEEhalf();
1903 APFloat::rmNearestTiesToEven,
1906 if (
Status != APFloat::opOK &&
1908 ((
Status & APFloat::opOverflow) != 0 ||
1909 (
Status & APFloat::opUnderflow) != 0)) {
1927 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1930bool AMDGPUOperand::isInlinableImm(
MVT type)
const {
1940 if (!isImmTy(ImmTyNone)) {
1952 return AMDGPU::isInlinableLiteral64(
Imm.Val,
1953 AsmParser->hasInv2PiInlineImm());
1962 static_cast<int16_t
>(FPLiteral.bitcastToAPInt().getZExtValue()),
1963 type, AsmParser->hasInv2PiInlineImm());
1967 return AMDGPU::isInlinableLiteral32(
1968 static_cast<int32_t
>(FPLiteral.bitcastToAPInt().getZExtValue()),
1969 AsmParser->hasInv2PiInlineImm());
1974 return AMDGPU::isInlinableLiteral64(
Imm.Val,
1975 AsmParser->hasInv2PiInlineImm());
1984 static_cast<int16_t
>(
Literal.getLoBits(16).getSExtValue()),
1985 type, AsmParser->hasInv2PiInlineImm());
1988 return AMDGPU::isInlinableLiteral32(
1989 static_cast<int32_t
>(
Literal.getLoBits(32).getZExtValue()),
1990 AsmParser->hasInv2PiInlineImm());
1993bool AMDGPUOperand::isLiteralImm(
MVT type)
const {
1995 if (!isImmTy(ImmTyNone)) {
2002 if (type ==
MVT::f64 && hasFPModifiers()) {
2041bool AMDGPUOperand::isRegClass(
unsigned RCID)
const {
2042 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(
getReg());
2045bool AMDGPUOperand::isVRegWithInputMods()
const {
2046 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2048 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2049 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
2052bool AMDGPUOperand::isT16VRegWithInputMods()
const {
2053 return isRegClass(AMDGPU::VGPR_32_Lo128RegClassID);
2056bool AMDGPUOperand::isSDWAOperand(
MVT type)
const {
2057 if (AsmParser->isVI())
2059 else if (AsmParser->isGFX9Plus())
2060 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2065bool AMDGPUOperand::isSDWAFP16Operand()
const {
2069bool AMDGPUOperand::isSDWAFP32Operand()
const {
2073bool AMDGPUOperand::isSDWAInt16Operand()
const {
2077bool AMDGPUOperand::isSDWAInt32Operand()
const {
2081bool AMDGPUOperand::isBoolReg()
const {
2082 auto FB = AsmParser->getFeatureBits();
2083 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
2084 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
2089 assert(isImmTy(ImmTyNone) &&
Imm.Mods.hasFPModifiers());
2104void AMDGPUOperand::addImmOperands(
MCInst &Inst,
unsigned N,
bool ApplyModifiers)
const {
2105 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.
getOpcode()),
2107 addLiteralImmOperand(Inst,
Imm.Val,
2109 isImmTy(ImmTyNone) &&
Imm.Mods.hasFPModifiers());
2111 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2117void AMDGPUOperand::addLiteralImmOperand(
MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const {
2118 const auto& InstDesc = AsmParser->getMII()->get(Inst.
getOpcode());
2121 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2123 if (ApplyModifiers) {
2124 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2126 Val = applyInputFPModifiers(Val,
Size);
2130 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2134 case AMDGPU::OPERAND_REG_IMM_INT64:
2135 case AMDGPU::OPERAND_REG_IMM_FP64:
2136 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2137 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2138 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2139 if (AMDGPU::isInlinableLiteral64(
Literal.getZExtValue(),
2140 AsmParser->hasInv2PiInlineImm())) {
2147 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) {
2149 if (
Literal.getLoBits(32) != 0) {
2150 const_cast<AMDGPUAsmParser *
>(AsmParser)->
Warning(Inst.
getLoc(),
2151 "Can't encode literal as exact 64-bit floating-point operand. "
2152 "Low 32-bits will be set to zero");
2156 setImmKindLiteral();
2165 case AMDGPU::OPERAND_REG_IMM_INT32:
2166 case AMDGPU::OPERAND_REG_IMM_FP32:
2167 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2168 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2169 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2170 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2171 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2172 case AMDGPU::OPERAND_REG_IMM_INT16:
2173 case AMDGPU::OPERAND_REG_IMM_FP16:
2174 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2175 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2176 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2177 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2178 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2179 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2180 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2181 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2182 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2183 case AMDGPU::OPERAND_REG_IMM_V2INT16:
2184 case AMDGPU::OPERAND_REG_IMM_V2FP16:
2185 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2186 case AMDGPU::OPERAND_REG_IMM_V2FP32:
2187 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2188 case AMDGPU::OPERAND_REG_IMM_V2INT32:
2189 case AMDGPU::OPERAND_KIMM32:
2190 case AMDGPU::OPERAND_KIMM16: {
2195 APFloat::rmNearestTiesToEven, &lost);
2199 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2201 if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) {
2202 setImmKindMandatoryLiteral();
2204 setImmKindLiteral();
2218 case AMDGPU::OPERAND_REG_IMM_INT32:
2219 case AMDGPU::OPERAND_REG_IMM_FP32:
2220 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2221 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2222 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2223 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2224 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2225 case AMDGPU::OPERAND_REG_IMM_V2INT16:
2226 case AMDGPU::OPERAND_REG_IMM_V2FP16:
2227 case AMDGPU::OPERAND_REG_IMM_V2FP32:
2228 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2229 case AMDGPU::OPERAND_REG_IMM_V2INT32:
2230 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2232 AMDGPU::isInlinableLiteral32(
static_cast<int32_t
>(Val),
2233 AsmParser->hasInv2PiInlineImm())) {
2240 setImmKindLiteral();
2243 case AMDGPU::OPERAND_REG_IMM_INT64:
2244 case AMDGPU::OPERAND_REG_IMM_FP64:
2245 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2246 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2247 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2248 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2255 setImmKindLiteral();
2258 case AMDGPU::OPERAND_REG_IMM_INT16:
2259 case AMDGPU::OPERAND_REG_IMM_FP16:
2260 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2261 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2262 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2263 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2264 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2266 AMDGPU::isInlinableLiteral16(
static_cast<int16_t
>(Val),
2267 AsmParser->hasInv2PiInlineImm())) {
2274 setImmKindLiteral();
2277 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2278 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2279 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2280 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2282 assert(AMDGPU::isInlinableLiteral16(
static_cast<int16_t
>(Val),
2283 AsmParser->hasInv2PiInlineImm()));
2288 case AMDGPU::OPERAND_KIMM32:
2290 setImmKindMandatoryLiteral();
2292 case AMDGPU::OPERAND_KIMM16:
2294 setImmKindMandatoryLiteral();
2301template <
unsigned Bitw
idth>
2302void AMDGPUOperand::addKImmFPOperands(
MCInst &Inst,
unsigned N)
const {
2304 setImmKindMandatoryLiteral();
2315 APFloat::rmNearestTiesToEven, &Lost);
2319void AMDGPUOperand::addRegOperands(
MCInst &Inst,
unsigned N)
const {
2323bool AMDGPUOperand::isInlineValue()
const {
2332 if (Is == IS_VGPR) {
2336 return AMDGPU::VGPR_32RegClassID;
2338 return AMDGPU::VReg_64RegClassID;
2340 return AMDGPU::VReg_96RegClassID;
2342 return AMDGPU::VReg_128RegClassID;
2344 return AMDGPU::VReg_160RegClassID;
2346 return AMDGPU::VReg_192RegClassID;
2348 return AMDGPU::VReg_224RegClassID;
2350 return AMDGPU::VReg_256RegClassID;
2352 return AMDGPU::VReg_288RegClassID;
2354 return AMDGPU::VReg_320RegClassID;
2356 return AMDGPU::VReg_352RegClassID;
2358 return AMDGPU::VReg_384RegClassID;
2360 return AMDGPU::VReg_512RegClassID;
2362 return AMDGPU::VReg_1024RegClassID;
2364 }
else if (Is == IS_TTMP) {
2368 return AMDGPU::TTMP_32RegClassID;
2370 return AMDGPU::TTMP_64RegClassID;
2372 return AMDGPU::TTMP_128RegClassID;
2374 return AMDGPU::TTMP_256RegClassID;
2376 return AMDGPU::TTMP_512RegClassID;
2378 }
else if (Is == IS_SGPR) {
2382 return AMDGPU::SGPR_32RegClassID;
2384 return AMDGPU::SGPR_64RegClassID;
2386 return AMDGPU::SGPR_96RegClassID;
2388 return AMDGPU::SGPR_128RegClassID;
2390 return AMDGPU::SGPR_160RegClassID;
2392 return AMDGPU::SGPR_192RegClassID;
2394 return AMDGPU::SGPR_224RegClassID;
2396 return AMDGPU::SGPR_256RegClassID;
2398 return AMDGPU::SGPR_288RegClassID;
2400 return AMDGPU::SGPR_320RegClassID;
2402 return AMDGPU::SGPR_352RegClassID;
2404 return AMDGPU::SGPR_384RegClassID;
2406 return AMDGPU::SGPR_512RegClassID;
2408 }
else if (Is == IS_AGPR) {
2412 return AMDGPU::AGPR_32RegClassID;
2414 return AMDGPU::AReg_64RegClassID;
2416 return AMDGPU::AReg_96RegClassID;
2418 return AMDGPU::AReg_128RegClassID;
2420 return AMDGPU::AReg_160RegClassID;
2422 return AMDGPU::AReg_192RegClassID;
2424 return AMDGPU::AReg_224RegClassID;
2426 return AMDGPU::AReg_256RegClassID;
2428 return AMDGPU::AReg_288RegClassID;
2430 return AMDGPU::AReg_320RegClassID;
2432 return AMDGPU::AReg_352RegClassID;
2434 return AMDGPU::AReg_384RegClassID;
2436 return AMDGPU::AReg_512RegClassID;
2438 return AMDGPU::AReg_1024RegClassID;
2446 .
Case(
"exec", AMDGPU::EXEC)
2447 .
Case(
"vcc", AMDGPU::VCC)
2448 .
Case(
"flat_scratch", AMDGPU::FLAT_SCR)
2449 .
Case(
"xnack_mask", AMDGPU::XNACK_MASK)
2450 .
Case(
"shared_base", AMDGPU::SRC_SHARED_BASE)
2451 .
Case(
"src_shared_base", AMDGPU::SRC_SHARED_BASE)
2452 .
Case(
"shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2453 .
Case(
"src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2454 .
Case(
"private_base", AMDGPU::SRC_PRIVATE_BASE)
2455 .
Case(
"src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2456 .
Case(
"private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2457 .
Case(
"src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2458 .
Case(
"pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2459 .
Case(
"src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2460 .
Case(
"lds_direct", AMDGPU::LDS_DIRECT)
2461 .
Case(
"src_lds_direct", AMDGPU::LDS_DIRECT)
2462 .
Case(
"m0", AMDGPU::M0)
2463 .
Case(
"vccz", AMDGPU::SRC_VCCZ)
2464 .
Case(
"src_vccz", AMDGPU::SRC_VCCZ)
2465 .
Case(
"execz", AMDGPU::SRC_EXECZ)
2466 .
Case(
"src_execz", AMDGPU::SRC_EXECZ)
2467 .
Case(
"scc", AMDGPU::SRC_SCC)
2468 .
Case(
"src_scc", AMDGPU::SRC_SCC)
2469 .
Case(
"tba", AMDGPU::TBA)
2470 .
Case(
"tma", AMDGPU::TMA)
2471 .
Case(
"flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2472 .
Case(
"flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2473 .
Case(
"xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2474 .
Case(
"xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2475 .
Case(
"vcc_lo", AMDGPU::VCC_LO)
2476 .
Case(
"vcc_hi", AMDGPU::VCC_HI)
2477 .
Case(
"exec_lo", AMDGPU::EXEC_LO)
2478 .
Case(
"exec_hi", AMDGPU::EXEC_HI)
2479 .
Case(
"tma_lo", AMDGPU::TMA_LO)
2480 .
Case(
"tma_hi", AMDGPU::TMA_HI)
2481 .
Case(
"tba_lo", AMDGPU::TBA_LO)
2482 .
Case(
"tba_hi", AMDGPU::TBA_HI)
2483 .
Case(
"pc", AMDGPU::PC_REG)
2484 .
Case(
"null", AMDGPU::SGPR_NULL)
2488bool AMDGPUAsmParser::ParseRegister(
MCRegister &RegNo,
SMLoc &StartLoc,
2489 SMLoc &EndLoc,
bool RestoreOnFailure) {
2490 auto R = parseRegister();
2491 if (!R)
return true;
2493 RegNo =
R->getReg();
2494 StartLoc =
R->getStartLoc();
2495 EndLoc =
R->getEndLoc();
2499bool AMDGPUAsmParser::parseRegister(
MCRegister &RegNo,
SMLoc &StartLoc,
2501 return ParseRegister(RegNo, StartLoc, EndLoc,
false);
2508 ParseRegister(RegNo, StartLoc, EndLoc,
true);
2509 bool PendingErrors = getParser().hasPendingError();
2510 getParser().clearPendingErrors();
2518bool AMDGPUAsmParser::AddNextRegisterToList(
unsigned &Reg,
unsigned &RegWidth,
2519 RegisterKind RegKind,
unsigned Reg1,
2523 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2528 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2529 Reg = AMDGPU::FLAT_SCR;
2533 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2534 Reg = AMDGPU::XNACK_MASK;
2538 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2543 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2548 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2553 Error(Loc,
"register does not fit in the list");
2559 if (Reg1 != Reg + RegWidth / 32) {
2560 Error(Loc,
"registers in a list must have consecutive indices");
2578 {{
"ttmp"}, IS_TTMP},
2584 return Kind == IS_VGPR ||
2592 if (Str.startswith(Reg.Name))
2598 return !Str.getAsInteger(10, Num);
2602AMDGPUAsmParser::isRegister(
const AsmToken &Token,
2619 if (!RegSuffix.
empty()) {
2635AMDGPUAsmParser::isRegister()
2637 return isRegister(getToken(), peekToken());
2641AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2648 unsigned AlignSize = 1;
2649 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2652 AlignSize = std::min(RegWidth / 32, 4u);
2655 if (RegNum % AlignSize != 0) {
2656 Error(Loc,
"invalid register alignment");
2657 return AMDGPU::NoRegister;
2660 unsigned RegIdx = RegNum / AlignSize;
2663 Error(Loc,
"invalid or unsupported register size");
2664 return AMDGPU::NoRegister;
2670 Error(Loc,
"register index is out of range");
2671 return AMDGPU::NoRegister;
2677bool AMDGPUAsmParser::ParseRegRange(
unsigned &Num,
unsigned &RegWidth) {
2678 int64_t RegLo, RegHi;
2682 SMLoc FirstIdxLoc = getLoc();
2685 if (!parseExpr(RegLo))
2689 SecondIdxLoc = getLoc();
2690 if (!parseExpr(RegHi))
2699 if (!isUInt<32>(RegLo)) {
2700 Error(FirstIdxLoc,
"invalid register index");
2704 if (!isUInt<32>(RegHi)) {
2705 Error(SecondIdxLoc,
"invalid register index");
2709 if (RegLo > RegHi) {
2710 Error(FirstIdxLoc,
"first register index should not exceed second index");
2714 Num =
static_cast<unsigned>(RegLo);
2715 RegWidth = 32 * ((RegHi - RegLo) + 1);
2719unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2720 unsigned &RegNum,
unsigned &RegWidth,
2727 RegKind = IS_SPECIAL;
2734unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2735 unsigned &RegNum,
unsigned &RegWidth,
2739 auto Loc = getLoc();
2743 Error(Loc,
"invalid register name");
2744 return AMDGPU::NoRegister;
2752 if (!RegSuffix.
empty()) {
2755 Error(Loc,
"invalid register index");
2756 return AMDGPU::NoRegister;
2761 if (!ParseRegRange(RegNum, RegWidth))
2762 return AMDGPU::NoRegister;
2765 return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2768unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
unsigned &RegNum,
2771 unsigned Reg = AMDGPU::NoRegister;
2772 auto ListLoc = getLoc();
2775 "expected a register or a list of registers")) {
2776 return AMDGPU::NoRegister;
2781 auto Loc = getLoc();
2782 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2783 return AMDGPU::NoRegister;
2784 if (RegWidth != 32) {
2785 Error(Loc,
"expected a single 32-bit register");
2786 return AMDGPU::NoRegister;
2790 RegisterKind NextRegKind;
2791 unsigned NextReg, NextRegNum, NextRegWidth;
2794 if (!ParseAMDGPURegister(NextRegKind, NextReg,
2795 NextRegNum, NextRegWidth,
2797 return AMDGPU::NoRegister;
2799 if (NextRegWidth != 32) {
2800 Error(Loc,
"expected a single 32-bit register");
2801 return AMDGPU::NoRegister;
2803 if (NextRegKind != RegKind) {
2804 Error(Loc,
"registers in a list must be of the same kind");
2805 return AMDGPU::NoRegister;
2807 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2808 return AMDGPU::NoRegister;
2812 "expected a comma or a closing square bracket")) {
2813 return AMDGPU::NoRegister;
2817 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2822bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
unsigned &Reg,
2823 unsigned &RegNum,
unsigned &RegWidth,
2825 auto Loc = getLoc();
2826 Reg = AMDGPU::NoRegister;
2829 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2830 if (Reg == AMDGPU::NoRegister)
2831 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2833 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2837 if (Reg == AMDGPU::NoRegister) {
2842 if (!subtargetHasRegister(*
TRI, Reg)) {
2843 if (Reg == AMDGPU::SGPR_NULL) {
2844 Error(Loc,
"'null' operand is not supported on this GPU");
2846 Error(Loc,
"register not available on this GPU");
2854bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
unsigned &Reg,
2855 unsigned &RegNum,
unsigned &RegWidth,
2856 bool RestoreOnFailure ) {
2857 Reg = AMDGPU::NoRegister;
2860 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2861 if (RestoreOnFailure) {
2862 while (!Tokens.
empty()) {
2871std::optional<StringRef>
2872AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2875 return StringRef(
".amdgcn.next_free_vgpr");
2877 return StringRef(
".amdgcn.next_free_sgpr");
2879 return std::nullopt;
2883void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2884 auto SymbolName = getGprCountSymbolName(RegKind);
2885 assert(SymbolName &&
"initializing invalid register kind");
2886 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2890bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2891 unsigned DwordRegIndex,
2892 unsigned RegWidth) {
2894 if (AMDGPU::getIsaVersion(getSTI().
getCPU()).Major < 6)
2897 auto SymbolName = getGprCountSymbolName(RegKind);
2900 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2902 int64_t NewMax = DwordRegIndex +
divideCeil(RegWidth, 32) - 1;
2906 return !
Error(getLoc(),
2907 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2911 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2913 if (OldCount <= NewMax)
2919std::unique_ptr<AMDGPUOperand>
2920AMDGPUAsmParser::parseRegister(
bool RestoreOnFailure) {
2921 const auto &Tok = getToken();
2922 SMLoc StartLoc = Tok.getLoc();
2923 SMLoc EndLoc = Tok.getEndLoc();
2924 RegisterKind RegKind;
2925 unsigned Reg, RegNum, RegWidth;
2927 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2931 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2934 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2935 return AMDGPUOperand::CreateReg(
this, Reg, StartLoc, EndLoc);
2946 const auto& Tok = getToken();
2947 const auto& NextTok = peekToken();
2950 bool Negate =
false;
2966 APFloat RealVal(APFloat::IEEEdouble());
2967 auto roundMode = APFloat::rmNearestTiesToEven;
2968 if (
errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2972 RealVal.changeSign();
2975 AMDGPUOperand::CreateImm(
this, RealVal.bitcastToAPInt().getZExtValue(), S,
2976 AMDGPUOperand::ImmTyNone,
true));
2985 if (HasSP3AbsModifier) {
2994 if (getParser().parsePrimaryExpr(Expr, EndLoc,
nullptr))
3001 if (Expr->evaluateAsAbsolute(IntVal)) {
3002 Operands.push_back(AMDGPUOperand::CreateImm(
this, IntVal, S));
3004 Operands.push_back(AMDGPUOperand::CreateExpr(
this, Expr, S));
3018 if (
auto R = parseRegister()) {
3031 }
else if (isModifier()) {
3034 return parseImm(
Operands, HasSP3AbsMod);
3039AMDGPUAsmParser::isNamedOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3042 return str ==
"abs" || str ==
"neg" || str ==
"sext";
3048AMDGPUAsmParser::isOpcodeModifierWithVal(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3053AMDGPUAsmParser::isOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3054 return isNamedOperandModifier(Token, NextToken) || Token.
is(
AsmToken::Pipe);
3058AMDGPUAsmParser::isRegOrOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3059 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3076AMDGPUAsmParser::isModifier() {
3080 peekTokens(NextToken);
3082 return isOperandModifier(Tok, NextToken[0]) ||
3083 (Tok.
is(
AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3084 isOpcodeModifierWithVal(Tok, NextToken[0]);
3110AMDGPUAsmParser::parseSP3NegModifier() {
3113 peekTokens(NextToken);
3116 (isRegister(NextToken[0], NextToken[1]) ||
3118 isId(NextToken[0],
"abs"))) {
3135 Error(getLoc(),
"invalid syntax, expected 'neg' modifier");
3139 SP3Neg = parseSP3NegModifier();
3142 Neg = trySkipId(
"neg");
3143 if (Neg && SP3Neg) {
3144 Error(Loc,
"expected register or immediate");
3150 Abs = trySkipId(
"abs");
3156 if (Abs && SP3Abs) {
3157 Error(Loc,
"expected register or immediate");
3163 Res = parseRegOrImm(
Operands, SP3Abs);
3171 if (SP3Abs && !skipToken(
AsmToken::Pipe,
"expected vertical bar"))
3178 AMDGPUOperand::Modifiers Mods;
3179 Mods.Abs = Abs || SP3Abs;
3180 Mods.Neg = Neg || SP3Neg;
3182 if (Mods.hasFPModifiers()) {
3183 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3185 Error(
Op.getStartLoc(),
"expected an absolute expression");
3188 Op.setModifiers(Mods);
3196 bool Sext = trySkipId(
"sext");
3197 if (Sext && !skipToken(
AsmToken::LParen,
"expected left paren after sext"))
3213 AMDGPUOperand::Modifiers Mods;
3216 if (Mods.hasIntModifiers()) {
3217 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3219 Error(
Op.getStartLoc(),
"expected an absolute expression");
3222 Op.setModifiers(Mods);
3230 return parseRegOrImmWithFPInputMods(
Operands,
false);
3235 return parseRegOrImmWithIntInputMods(
Operands,
false);
3239 auto Loc = getLoc();
3240 if (trySkipId(
"off")) {
3241 Operands.push_back(AMDGPUOperand::CreateImm(
this, 0, Loc,
3242 AMDGPUOperand::ImmTyOff,
false));
3249 std::unique_ptr<AMDGPUOperand>
Reg = parseRegister();
3251 Operands.push_back(std::move(Reg));
3259unsigned AMDGPUAsmParser::checkTargetMatchPredicate(
MCInst &Inst) {
3266 return Match_InvalidOperand;
3270 getForcedEncodingSize() != 64)
3271 return Match_PreferE32;
3273 if (Inst.
getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3274 Inst.
getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3277 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::dst_sel);
3279 if (!
Op.isImm() ||
Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3280 return Match_InvalidOperand;
3284 return Match_Success;
3288 static const unsigned Variants[] = {
3299 if (isForcedDPP() && isForcedVOP3()) {
3303 if (getForcedEncodingSize() == 32) {
3308 if (isForcedVOP3()) {
3313 if (isForcedSDWA()) {
3319 if (isForcedDPP()) {
3327StringRef AMDGPUAsmParser::getMatchedVariantName()
const {
3328 if (isForcedDPP() && isForcedVOP3())
3331 if (getForcedEncodingSize() == 32)
3346unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(
const MCInst &Inst)
const {
3350 case AMDGPU::FLAT_SCR:
3352 case AMDGPU::VCC_LO:
3353 case AMDGPU::VCC_HI:
3360 return AMDGPU::NoRegister;
3367bool AMDGPUAsmParser::isInlineConstant(
const MCInst &Inst,
3368 unsigned OpIdx)
const {
3371 if (!AMDGPU::isSISrcOperand(Desc, OpIdx) ||
3372 AMDGPU::isKImmOperand(Desc, OpIdx)) {
3378 int64_t Val = MO.
getImm();
3379 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3383 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3385 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3388 if (
OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3389 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3390 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3391 return AMDGPU::isInlinableIntLiteral(Val);
3393 if (
OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3394 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3396 return AMDGPU::isInlinableIntLiteralV216(Val);
3398 if (
OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3399 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3401 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3403 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3410unsigned AMDGPUAsmParser::getConstantBusLimit(
unsigned Opcode)
const {
3416 case AMDGPU::V_LSHLREV_B64_e64:
3417 case AMDGPU::V_LSHLREV_B64_gfx10:
3418 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3419 case AMDGPU::V_LSHRREV_B64_e64:
3420 case AMDGPU::V_LSHRREV_B64_gfx10:
3421 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3422 case AMDGPU::V_ASHRREV_I64_e64:
3423 case AMDGPU::V_ASHRREV_I64_gfx10:
3424 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3425 case AMDGPU::V_LSHL_B64_e64:
3426 case AMDGPU::V_LSHR_B64_e64:
3427 case AMDGPU::V_ASHR_I64_e64:
3440 bool AddMandatoryLiterals =
false) {
3446 int16_t ImmDeferredIdx =
3463bool AMDGPUAsmParser::usesConstantBus(
const MCInst &Inst,
unsigned OpIdx) {
3466 return !isInlineConstant(Inst, OpIdx);
3467 }
else if (MO.
isReg()) {
3471 return isSGPR(PReg,
TRI) && PReg != SGPR_NULL;
3477bool AMDGPUAsmParser::validateConstantBusLimitations(
3479 const unsigned Opcode = Inst.
getOpcode();
3481 unsigned LastSGPR = AMDGPU::NoRegister;
3482 unsigned ConstantBusUseCount = 0;
3483 unsigned NumLiterals = 0;
3484 unsigned LiteralSize;
3493 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3499 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3500 if (SGPRUsed != AMDGPU::NoRegister) {
3501 SGPRsUsed.
insert(SGPRUsed);
3502 ++ConstantBusUseCount;
3507 for (
int OpIdx : OpIndices) {
3512 if (usesConstantBus(Inst, OpIdx)) {
3521 if (SGPRsUsed.
insert(LastSGPR).second) {
3522 ++ConstantBusUseCount;
3538 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3542 if (NumLiterals == 0) {
3545 }
else if (LiteralSize !=
Size) {
3551 ConstantBusUseCount += NumLiterals;
3553 if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3559 Error(Loc,
"invalid operand (violates constant bus restrictions)");
3563bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3566 const unsigned Opcode = Inst.
getOpcode();
3572 auto getVRegIdx = [&](
unsigned,
unsigned OperandIdx) {
3580 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(getVRegIdx);
3581 if (!InvalidCompOprIdx)
3584 auto CompOprIdx = *InvalidCompOprIdx;
3586 std::max(InstInfo[
VOPD::X].getIndexInParsedOperands(CompOprIdx),
3587 InstInfo[
VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3590 auto Loc = ((AMDGPUOperand &)*
Operands[ParsedIdx]).getStartLoc();
3591 if (CompOprIdx == VOPD::Component::DST) {
3592 Error(Loc,
"one dst register must be even and the other odd");
3594 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3596 " operands must use different VGPR banks");
3602bool AMDGPUAsmParser::validateIntClampSupported(
const MCInst &Inst) {
3608 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3616bool AMDGPUAsmParser::validateMIMGDataSize(
const MCInst &Inst,
3617 const SMLoc &IDLoc) {
3625 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3626 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3627 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3634 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3635 unsigned TFESize = (TFEIdx != -1 && Inst.
getOperand(TFEIdx).
getImm()) ? 1 : 0;
3640 bool IsPackedD16 =
false;
3644 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3645 IsPackedD16 = D16Idx >= 0;
3647 DataSize = (DataSize + 1) / 2;
3650 if ((VDataSize / 4) == DataSize + TFESize)
3655 Modifiers = IsPackedD16 ?
"dmask and d16" :
"dmask";
3657 Modifiers = IsPackedD16 ?
"dmask, d16 and tfe" :
"dmask and tfe";
3659 Error(IDLoc,
Twine(
"image data size does not match ") + Modifiers);
3663bool AMDGPUAsmParser::validateMIMGAddrSize(
const MCInst &Inst) {
3673 AMDGPU::getMIMGBaseOpcodeInfo(
Info->BaseOpcode);
3674 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3675 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3676 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3677 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3681 assert(SrsrcIdx > VAddr0Idx);
3688 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3689 unsigned ActualAddrSize =
3690 IsNSA ? SrsrcIdx - VAddr0Idx
3691 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3694 unsigned ExpectedAddrSize =
3695 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16,
hasG16());
3698 if (hasPartialNSAEncoding() && ExpectedAddrSize >
getNSAMaxSize()) {
3699 int VAddrLastIdx = SrsrcIdx - 1;
3700 unsigned VAddrLastSize =
3701 AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4;
3703 return VAddrLastIdx - VAddr0Idx + VAddrLastSize == ExpectedAddrSize;
3706 if (ExpectedAddrSize > 12)
3707 ExpectedAddrSize = 16;
3712 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3716 return ActualAddrSize == ExpectedAddrSize;
3719bool AMDGPUAsmParser::validateMIMGAtomicDMask(
const MCInst &Inst) {
3729 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3736 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3739bool AMDGPUAsmParser::validateMIMGGatherDMask(
const MCInst &Inst) {
3747 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3755 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3758bool AMDGPUAsmParser::validateMIMGMSAA(
const MCInst &Inst) {
3767 AMDGPU::getMIMGBaseOpcodeInfo(
Info->BaseOpcode);
3769 if (!BaseOpcode->
MSAA)
3772 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3778 return DimInfo->
MSAA;
3784 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3785 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3786 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3796bool AMDGPUAsmParser::validateMovrels(
const MCInst &Inst,
3805 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3820 Error(ErrLoc,
"source operand must be a VGPR");
3824bool AMDGPUAsmParser::validateMAIAccWrite(
const MCInst &Inst,
3829 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3832 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3843 "source operand must be either a VGPR or an inline constant");
3850bool AMDGPUAsmParser::validateMAISrc2(
const MCInst &Inst,
3856 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
3863 if (Inst.
getOperand(Src2Idx).
isImm() && isInlineConstant(Inst, Src2Idx)) {
3865 "inline constants are not allowed for this operand");
3872bool AMDGPUAsmParser::validateMFMA(
const MCInst &Inst,
3880 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3890 if (Src2Reg == DstReg)
3894 if (
TRI->getRegClass(Desc.
operands()[0].RegClass).getSizeInBits() <= 128)
3897 if (
TRI->regsOverlap(Src2Reg, DstReg)) {
3899 "source 2 operand must not partially overlap with dst");
3906bool AMDGPUAsmParser::validateDivScale(
const MCInst &Inst) {
3910 case V_DIV_SCALE_F32_gfx6_gfx7:
3911 case V_DIV_SCALE_F32_vi:
3912 case V_DIV_SCALE_F32_gfx10:
3913 case V_DIV_SCALE_F64_gfx6_gfx7:
3914 case V_DIV_SCALE_F64_vi:
3915 case V_DIV_SCALE_F64_gfx10:
3921 for (
auto Name : {AMDGPU::OpName::src0_modifiers,
3922 AMDGPU::OpName::src2_modifiers,
3923 AMDGPU::OpName::src2_modifiers}) {
3934bool AMDGPUAsmParser::validateMIMGD16(
const MCInst &Inst) {
3942 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3954 case AMDGPU::V_SUBREV_F32_e32:
3955 case AMDGPU::V_SUBREV_F32_e64:
3956 case AMDGPU::V_SUBREV_F32_e32_gfx10:
3957 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3958 case AMDGPU::V_SUBREV_F32_e32_vi:
3959 case AMDGPU::V_SUBREV_F32_e64_gfx10:
3960 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3961 case AMDGPU::V_SUBREV_F32_e64_vi:
3963 case AMDGPU::V_SUBREV_CO_U32_e32:
3964 case AMDGPU::V_SUBREV_CO_U32_e64:
3965 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3966 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3968 case AMDGPU::V_SUBBREV_U32_e32:
3969 case AMDGPU::V_SUBBREV_U32_e64:
3970 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3971 case AMDGPU::V_SUBBREV_U32_e32_vi:
3972 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3973 case AMDGPU::V_SUBBREV_U32_e64_vi:
3975 case AMDGPU::V_SUBREV_U32_e32:
3976 case AMDGPU::V_SUBREV_U32_e64:
3977 case AMDGPU::V_SUBREV_U32_e32_gfx9:
3978 case AMDGPU::V_SUBREV_U32_e32_vi:
3979 case AMDGPU::V_SUBREV_U32_e64_gfx9:
3980 case AMDGPU::V_SUBREV_U32_e64_vi:
3982 case AMDGPU::V_SUBREV_F16_e32:
3983 case AMDGPU::V_SUBREV_F16_e64:
3984 case AMDGPU::V_SUBREV_F16_e32_gfx10:
3985 case AMDGPU::V_SUBREV_F16_e32_vi:
3986 case AMDGPU::V_SUBREV_F16_e64_gfx10:
3987 case AMDGPU::V_SUBREV_F16_e64_vi:
3989 case AMDGPU::V_SUBREV_U16_e32:
3990 case AMDGPU::V_SUBREV_U16_e64:
3991 case AMDGPU::V_SUBREV_U16_e32_vi:
3992 case AMDGPU::V_SUBREV_U16_e64_vi:
3994 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3995 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3996 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3998 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3999 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4001 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4002 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4004 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4005 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4007 case AMDGPU::V_LSHRREV_B32_e32:
4008 case AMDGPU::V_LSHRREV_B32_e64:
4009 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4010 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4011 case AMDGPU::V_LSHRREV_B32_e32_vi:
4012 case AMDGPU::V_LSHRREV_B32_e64_vi:
4013 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4014 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4016 case AMDGPU::V_ASHRREV_I32_e32:
4017 case AMDGPU::V_ASHRREV_I32_e64:
4018 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4019 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4020 case AMDGPU::V_ASHRREV_I32_e32_vi:
4021 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4022 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4023 case AMDGPU::V_ASHRREV_I32_e64_vi:
4025 case AMDGPU::V_LSHLREV_B32_e32:
4026 case AMDGPU::V_LSHLREV_B32_e64:
4027 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4028 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4029 case AMDGPU::V_LSHLREV_B32_e32_vi:
4030 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4031 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4032 case AMDGPU::V_LSHLREV_B32_e64_vi:
4034 case AMDGPU::V_LSHLREV_B16_e32:
4035 case AMDGPU::V_LSHLREV_B16_e64:
4036 case AMDGPU::V_LSHLREV_B16_e32_vi:
4037 case AMDGPU::V_LSHLREV_B16_e64_vi:
4038 case AMDGPU::V_LSHLREV_B16_gfx10:
4040 case AMDGPU::V_LSHRREV_B16_e32:
4041 case AMDGPU::V_LSHRREV_B16_e64:
4042 case AMDGPU::V_LSHRREV_B16_e32_vi:
4043 case AMDGPU::V_LSHRREV_B16_e64_vi:
4044 case AMDGPU::V_LSHRREV_B16_gfx10:
4046 case AMDGPU::V_ASHRREV_I16_e32:
4047 case AMDGPU::V_ASHRREV_I16_e64:
4048 case AMDGPU::V_ASHRREV_I16_e32_vi:
4049 case AMDGPU::V_ASHRREV_I16_e64_vi:
4050 case AMDGPU::V_ASHRREV_I16_gfx10:
4052 case AMDGPU::V_LSHLREV_B64_e64:
4053 case AMDGPU::V_LSHLREV_B64_gfx10:
4054 case AMDGPU::V_LSHLREV_B64_vi:
4056 case AMDGPU::V_LSHRREV_B64_e64:
4057 case AMDGPU::V_LSHRREV_B64_gfx10:
4058 case AMDGPU::V_LSHRREV_B64_vi:
4060 case AMDGPU::V_ASHRREV_I64_e64:
4061 case AMDGPU::V_ASHRREV_I64_gfx10:
4062 case AMDGPU::V_ASHRREV_I64_vi:
4064 case AMDGPU::V_PK_LSHLREV_B16:
4065 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4066 case AMDGPU::V_PK_LSHLREV_B16_vi:
4068 case AMDGPU::V_PK_LSHRREV_B16:
4069 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4070 case AMDGPU::V_PK_LSHRREV_B16_vi:
4071 case AMDGPU::V_PK_ASHRREV_I16:
4072 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4073 case AMDGPU::V_PK_ASHRREV_I16_vi:
4080std::optional<StringRef>
4081AMDGPUAsmParser::validateLdsDirect(
const MCInst &Inst) {
4083 using namespace SIInstrFlags;
4084 const unsigned Opcode = Inst.
getOpcode();
4090 if ((Desc.
TSFlags & Enc) == 0)
4091 return std::nullopt;
4093 for (
auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4098 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4101 return StringRef(
"lds_direct is not supported on this GPU");
4104 return StringRef(
"lds_direct cannot be used with this instruction");
4106 if (SrcName != OpName::src0)
4107 return StringRef(
"lds_direct may be used as src0 only");
4111 return std::nullopt;
4115 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
4116 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4117 if (
Op.isFlatOffset())
4118 return Op.getStartLoc();
4123bool AMDGPUAsmParser::validateFlatOffset(
const MCInst &Inst,
4130 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4134 if (!hasFlatOffsets() &&
Op.getImm() != 0) {
4136 "flat offset modifier is not supported on this GPU");
4142 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4143 bool AllowNegative =
4145 if (!
isIntN(OffsetSize,
Op.getImm()) || (!AllowNegative &&
Op.getImm() < 0)) {
4147 Twine(
"expected a ") +
4148 (AllowNegative ?
Twine(OffsetSize) +
"-bit signed offset"
4149 :
Twine(OffsetSize - 1) +
"-bit unsigned offset"));
4158 for (
unsigned i = 2, e =
Operands.size(); i != e; ++i) {
4159 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4160 if (
Op.isSMEMOffset() ||
Op.isSMEMOffsetMod())
4161 return Op.getStartLoc();
4166bool AMDGPUAsmParser::validateSMEMOffset(
const MCInst &Inst,
4176 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4185 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4186 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(),
Offset) ||
4187 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(),
Offset, IsBuffer))
4191 (
isVI() || IsBuffer) ?
"expected a 20-bit unsigned offset" :
4192 "expected a 21-bit signed offset");
4197bool AMDGPUAsmParser::validateSOPLiteral(
const MCInst &Inst)
const {
4203 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4204 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4206 const int OpIndices[] = { Src0Idx, Src1Idx };
4208 unsigned NumExprs = 0;
4209 unsigned NumLiterals = 0;
4212 for (
int OpIdx : OpIndices) {
4213 if (OpIdx == -1)
break;
4217 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4218 if (MO.
isImm() && !isInlineConstant(Inst, OpIdx)) {
4220 if (NumLiterals == 0 || LiteralValue !=
Value) {
4224 }
else if (MO.
isExpr()) {
4230 return NumLiterals + NumExprs <= 1;
4233bool AMDGPUAsmParser::validateOpSel(
const MCInst &Inst) {
4236 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4246 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4247 if (OpSelIdx != -1) {
4251 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4252 if (OpSelHiIdx != -1) {
4261 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4270bool AMDGPUAsmParser::validateDPP(
const MCInst &Inst,
4273 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4278 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
4280 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4284 Error(S,
"64 bit dpp only supports row_newbcast");
4293bool AMDGPUAsmParser::validateVccOperand(
unsigned Reg)
const {
4294 auto FB = getFeatureBits();
4295 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4296 (FB[AMDGPU::FeatureWavefrontSize32] &&
Reg == AMDGPU::VCC_LO);
4300bool AMDGPUAsmParser::validateVOPLiteral(
const MCInst &Inst,
4306 !HasMandatoryLiteral && !
isVOPD(Opcode))
4311 unsigned NumExprs = 0;
4312 unsigned NumLiterals = 0;
4315 for (
int OpIdx : OpIndices) {
4325 if (MO.
isImm() && !isInlineConstant(Inst, OpIdx)) {
4327 if (NumLiterals == 0 || LiteralValue !=
Value) {
4331 }
else if (MO.
isExpr()) {
4335 NumLiterals += NumExprs;
4340 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4341 Error(getLitLoc(
Operands),
"literal operands are not supported");
4345 if (NumLiterals > 1) {
4346 Error(getLitLoc(
Operands,
true),
"only one unique literal operand is allowed");
4356 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), NameIdx);
4364 unsigned Sub =
MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4365 auto Reg = Sub ? Sub : Op.getReg();
4367 return AGPR32.
contains(Reg) ? 1 : 0;
4370bool AMDGPUAsmParser::validateAGPRLdSt(
const MCInst &Inst)
const {
4378 : AMDGPU::OpName::vdata;
4386 if (Data2Areg >= 0 && Data2Areg != DataAreg)
4390 auto FB = getFeatureBits();
4391 if (FB[AMDGPU::FeatureGFX90AInsts]) {
4392 if (DataAreg < 0 || DstAreg < 0)
4394 return DstAreg == DataAreg;
4397 return DstAreg < 1 && DataAreg < 1;
4400bool AMDGPUAsmParser::validateVGPRAlign(
const MCInst &Inst)
const {
4401 auto FB = getFeatureBits();
4402 if (!FB[AMDGPU::FeatureGFX90AInsts])
4413 unsigned Sub =
MRI->getSubReg(
Op.getReg(), AMDGPU::sub0);
4417 if (VGPR32.
contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4419 if (AGPR32.
contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4427 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
4428 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4430 return Op.getStartLoc();
4435bool AMDGPUAsmParser::validateBLGP(
const MCInst &Inst,
4438 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4445 auto FB = getFeatureBits();
4446 bool UsesNeg =
false;
4447 if (FB[AMDGPU::FeatureGFX940Insts]) {
4449 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4450 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4451 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4452 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4457 if (IsNeg == UsesNeg)
4461 UsesNeg ?
"invalid modifier: blgp is not supported"
4462 :
"invalid modifier: neg is not supported");
4467bool AMDGPUAsmParser::validateWaitCnt(
const MCInst &Inst,
4473 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4474 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4475 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4476 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4479 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
4482 if (Reg == AMDGPU::SGPR_NULL)
4486 Error(RegLoc,
"src0 must be null");
4492bool AMDGPUAsmParser::validateGWS(
const MCInst &Inst,
4494 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4498 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4499 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4505 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::data0);
4508 auto RegIdx =
Reg - (VGPR32.
contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4511 Error(RegLoc,
"vgpr must be even aligned");
4518bool AMDGPUAsmParser::validateCoherencyBits(
const MCInst &Inst,
4520 const SMLoc &IDLoc) {
4521 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.
getOpcode(),
4522 AMDGPU::OpName::cpol);
4532 Error(S,
"cache policy is not supported for SMRD instructions");
4536 Error(IDLoc,
"invalid cache policy for SMEM instruction");
4545 Error(S,
"scc is not supported on this GPU");
4555 :
"instruction must use glc");
4563 &CStr.data()[CStr.find(
isGFX940() ?
"sc0" :
"glc")]);
4565 :
"instruction must not use glc");
4577 if (!Operand->isReg())
4579 unsigned Reg = Operand->getReg();
4580 if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
4582 "execz and vccz are not supported on this GPU");
4589bool AMDGPUAsmParser::validateTFE(
const MCInst &Inst,
4596 Error(Loc,
"TFE modifier has no meaning for store instructions");
4604bool AMDGPUAsmParser::validateInstruction(
const MCInst &Inst,
4607 if (
auto ErrMsg = validateLdsDirect(Inst)) {
4611 if (!validateSOPLiteral(Inst)) {
4613 "only one unique literal operand is allowed");
4616 if (!validateVOPLiteral(Inst,
Operands)) {
4619 if (!validateConstantBusLimitations(Inst,
Operands)) {
4622 if (!validateVOPDRegBankConstraints(Inst,
Operands)) {
4625 if (!validateIntClampSupported(Inst)) {
4627 "integer clamping is not supported on this GPU");
4630 if (!validateOpSel(Inst)) {
4632 "invalid op_sel operand");
4635 if (!validateDPP(Inst,
Operands)) {
4639 if (!validateMIMGD16(Inst)) {
4641 "d16 modifier is not supported on this GPU");
4644 if (!validateMIMGMSAA(Inst)) {
4646 "invalid dim; must be MSAA type");
4649 if (!validateMIMGDataSize(Inst, IDLoc)) {
4652 if (!validateMIMGAddrSize(Inst)) {
4654 "image address size does not match dim and a16");
4657 if (!validateMIMGAtomicDMask(Inst)) {
4659 "invalid atomic image dmask");
4662 if (!validateMIMGGatherDMask(Inst)) {
4664 "invalid image_gather dmask: only one bit must be set");
4667 if (!validateMovrels(Inst,
Operands)) {
4670 if (!validateFlatOffset(Inst,
Operands)) {
4673 if (!validateSMEMOffset(Inst,
Operands)) {
4676 if (!validateMAIAccWrite(Inst,
Operands)) {
4679 if (!validateMAISrc2(Inst,
Operands)) {
4682 if (!validateMFMA(Inst,
Operands)) {
4685 if (!validateCoherencyBits(Inst,
Operands, IDLoc)) {
4689 if (!validateAGPRLdSt(Inst)) {
4690 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4691 ?
"invalid register class: data and dst should be all VGPR or AGPR"
4692 :
"invalid register class: agpr loads and stores not supported on this GPU"
4696 if (!validateVGPRAlign(Inst)) {
4698 "invalid register class: vgpr tuples must be 64 bit aligned");
4701 if (!validateGWS(Inst,
Operands)) {
4705 if (!validateBLGP(Inst,
Operands)) {
4709 if (!validateDivScale(Inst)) {
4710 Error(IDLoc,
"ABS not allowed in VOP3B instructions");
4713 if (!validateWaitCnt(Inst,
Operands)) {
4716 if (!validateExeczVcczOperands(
Operands)) {
4719 if (!validateTFE(Inst,
Operands)) {
4728 unsigned VariantID = 0);
4732 unsigned VariantID);
4734bool AMDGPUAsmParser::isSupportedMnemo(
StringRef Mnemo,
4739bool AMDGPUAsmParser::isSupportedMnemo(
StringRef Mnemo,
4742 for (
auto Variant : Variants) {
4750bool AMDGPUAsmParser::checkUnsupportedInstruction(
StringRef Mnemo,
4751 const SMLoc &IDLoc) {
4752 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
4755 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4760 getParser().clearPendingErrors();
4764 StringRef VariantName = getMatchedVariantName();
4765 if (!VariantName.
empty() && isSupportedMnemo(Mnemo, FBS)) {
4768 " variant of this instruction is not supported"));
4772 if (
isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
4773 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
4776 FeaturesWS32.
flip(AMDGPU::FeatureWavefrontSize64)
4777 .
flip(AMDGPU::FeatureWavefrontSize32);
4779 ComputeAvailableFeatures(FeaturesWS32);
4781 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
4782 return Error(IDLoc,
"instruction requires wavesize=32");
4787 return Error(IDLoc,
"instruction not supported on this GPU");
4792 return Error(IDLoc,
"invalid instruction" + Suggestion);
4798 const auto &Op = ((AMDGPUOperand &)*
Operands[InvalidOprIdx]);
4799 if (Op.isToken() && InvalidOprIdx > 1) {
4800 const auto &PrevOp = ((AMDGPUOperand &)*
Operands[InvalidOprIdx - 1]);
4801 return PrevOp.isToken() && PrevOp.getToken() ==
"::";
4806bool AMDGPUAsmParser::MatchAndEmitInstruction(
SMLoc IDLoc,
unsigned &Opcode,
4810 bool MatchingInlineAsm) {
4812 unsigned Result = Match_Success;
4813 for (
auto Variant : getMatchedVariants()) {
4815 auto R = MatchInstructionImpl(
Operands, Inst, EI, MatchingInlineAsm,
4820 if ((R == Match_Success) ||
4821 (R == Match_PreferE32) ||
4822 (R == Match_MissingFeature && Result != Match_PreferE32) ||
4823 (R == Match_InvalidOperand && Result != Match_MissingFeature
4824 && Result != Match_PreferE32) ||
4825 (R == Match_MnemonicFail && Result != Match_InvalidOperand
4826 && Result != Match_MissingFeature
4827 && Result != Match_PreferE32)) {
4831 if (R == Match_Success)
4835 if (Result == Match_Success) {
4836 if (!validateInstruction(Inst, IDLoc,
Operands)) {
4845 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4851 case Match_MissingFeature:
4855 return Error(IDLoc,
"operands are not valid for this GPU or mode");
4857 case Match_InvalidOperand: {
4858 SMLoc ErrorLoc = IDLoc;
4861 return Error(IDLoc,
"too few operands for instruction");
4864 if (ErrorLoc ==
SMLoc())
4868 return Error(ErrorLoc,
"invalid VOPDY instruction");
4870 return Error(ErrorLoc,
"invalid operand for instruction");
4873 case Match_PreferE32:
4874 return Error(IDLoc,
"internal error: instruction without _e64 suffix "
4875 "should be encoded as e32");
4876 case Match_MnemonicFail:
4882bool AMDGPUAsmParser::ParseAsAbsoluteExpression(
uint32_t &Ret) {
4887 if (getParser().parseAbsoluteExpression(Tmp)) {
4894bool AMDGPUAsmParser::ParseDirectiveMajorMinor(
uint32_t &Major,
4896 if (ParseAsAbsoluteExpression(Major))
4897 return TokError(
"invalid major version");
4900 return TokError(
"minor version number required, comma expected");
4902 if (ParseAsAbsoluteExpression(Minor))
4903 return TokError(
"invalid minor version");
4908bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4910 return TokError(
"directive only supported for amdgcn architecture");
4912 std::string TargetIDDirective;
4913 SMLoc TargetStart = getTok().getLoc();
4914 if (getParser().parseEscapedString(TargetIDDirective))
4918 if (getTargetStreamer().getTargetID()->
toString() != TargetIDDirective)
4919 return getParser().Error(TargetRange.
Start,
4920 (
Twine(
".amdgcn_target directive's target id ") +
4921 Twine(TargetIDDirective) +
4922 Twine(
" does not match the specified target id ") +
4923 Twine(getTargetStreamer().getTargetID()->
toString())).str());
4928bool AMDGPUAsmParser::OutOfRangeError(
SMRange Range) {
4929 return Error(
Range.Start,
"value out of range", Range);
4932bool AMDGPUAsmParser::calculateGPRBlocks(
4933 const FeatureBitset &Features,
bool VCCUsed,
bool FlatScrUsed,
4934 bool XNACKUsed, std::optional<bool> EnableWavefrontSize32,
4935 unsigned NextFreeVGPR,
SMRange VGPRRange,
unsigned NextFreeSGPR,
4936 SMRange SGPRRange,
unsigned &VGPRBlocks,
unsigned &SGPRBlocks) {
4947 unsigned MaxAddressableNumSGPRs =
4950 if (
Version.Major >= 8 && !Features.
test(FeatureSGPRInitBug) &&
4951 NumSGPRs > MaxAddressableNumSGPRs)
4952 return OutOfRangeError(SGPRRange);
4957 if ((
Version.Major <= 7 || Features.
test(FeatureSGPRInitBug)) &&
4958 NumSGPRs > MaxAddressableNumSGPRs)
4959 return OutOfRangeError(SGPRRange);
4961 if (Features.
test(FeatureSGPRInitBug))
4972bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4974 return TokError(
"directive only supported for amdgcn architecture");
4977 return TokError(
"directive only supported for amdhsa OS");
4980 if (getParser().parseIdentifier(KernelName))
4997 unsigned ImpliedUserSGPRCount = 0;
5001 std::optional<unsigned> ExplicitUserSGPRCount;
5002 bool ReserveVCC =
true;
5003 bool ReserveFlatScr =
true;
5004 std::optional<bool> EnableWavefrontSize32;
5010 SMRange IDRange = getTok().getLocRange();
5011 if (!parseId(
ID,
"expected .amdhsa_ directive or .end_amdhsa_kernel"))
5014 if (
ID ==
".end_amdhsa_kernel")
5018 return TokError(
".amdhsa_ directives cannot be repeated");
5020 SMLoc ValStart = getLoc();
5022 if (getParser().parseAbsoluteExpression(IVal))
5024 SMLoc ValEnd = getLoc();
5028 return OutOfRangeError(ValRange);
5032#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
5033 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
5034 return OutOfRangeError(RANGE); \
5035 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
5037 if (
ID ==
".amdhsa_group_segment_fixed_size") {
5039 return OutOfRangeError(ValRange);
5041 }
else if (
ID ==
".amdhsa_private_segment_fixed_size") {
5043 return OutOfRangeError(ValRange);
5045 }
else if (
ID ==
".amdhsa_kernarg_size") {
5047 return OutOfRangeError(ValRange);
5049 }
else if (
ID ==
".amdhsa_user_sgpr_count") {
5050 ExplicitUserSGPRCount = Val;
5051 }
else if (
ID ==
".amdhsa_user_sgpr_private_segment_buffer") {
5054 "directive is not supported with architected flat scratch",
5057 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5060 ImpliedUserSGPRCount += 4;
5061 }
else if (
ID ==
".amdhsa_user_sgpr_dispatch_ptr") {
5063 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
5066 ImpliedUserSGPRCount += 2;
5067 }
else if (
ID ==
".amdhsa_user_sgpr_queue_ptr") {
5069 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
5072 ImpliedUserSGPRCount += 2;
5073 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_segment_ptr") {
5075 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
5078 ImpliedUserSGPRCount += 2;
5079 }
else if (
ID ==
".amdhsa_user_sgpr_dispatch_id") {
5081 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
5084 ImpliedUserSGPRCount += 2;
5085 }
else if (
ID ==
".amdhsa_user_sgpr_flat_scratch_init") {
5088 "directive is not supported with architected flat scratch",
5091 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
5094 ImpliedUserSGPRCount += 2;
5095 }
else if (
ID ==
".amdhsa_user_sgpr_private_segment_size") {
5097 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5100 ImpliedUserSGPRCount += 1;
5101 }
else if (
ID ==
".amdhsa_wavefront_size32") {
5102 if (IVersion.
Major < 10)
5103 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5104 EnableWavefrontSize32 = Val;
5106 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
5108 }
else if (
ID ==
".amdhsa_uses_dynamic_stack") {
5110 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange);
5111 }
else if (
ID ==
".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5114 "directive is not supported with architected flat scratch",
5117 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5118 }
else if (
ID ==
".amdhsa_enable_private_segment") {
5122 "directive is not supported without architected flat scratch",
5125 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5126 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_x") {
5128 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
5130 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_y") {
5132 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
5134 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_z") {
5136 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
5138 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_info") {
5140 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
5142 }
else if (
ID ==
".amdhsa_system_vgpr_workitem_id") {
5144 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
5146 }
else if (
ID ==
".amdhsa_next_free_vgpr") {
5147 VGPRRange = ValRange;
5149 }
else if (
ID ==
".amdhsa_next_free_sgpr") {
5150 SGPRRange = ValRange;
5152 }
else if (
ID ==
".amdhsa_accum_offset") {
5154 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
5156 }
else if (
ID ==
".amdhsa_reserve_vcc") {
5157 if (!isUInt<1>(Val))
5158 return OutOfRangeError(ValRange);
5160 }
else if (
ID ==
".amdhsa_reserve_flat_scratch") {
5161 if (IVersion.
Major < 7)
5162 return Error(IDRange.
Start,
"directive requires gfx7+", IDRange);
5165 "directive is not supported with architected flat scratch",
5167 if (!isUInt<1>(Val))
5168 return OutOfRangeError(ValRange);
5169 ReserveFlatScr = Val;
5170 }
else if (
ID ==
".amdhsa_reserve_xnack_mask") {
5171 if (IVersion.
Major < 8)
5172 return Error(IDRange.
Start,
"directive requires gfx8+", IDRange);
5173 if (!isUInt<1>(Val))
5174 return OutOfRangeError(ValRange);
5175 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5176 return getParser().Error(IDRange.
Start,
".amdhsa_reserve_xnack_mask does not match target id",
5178 }
else if (
ID ==
".amdhsa_float_round_mode_32") {
5180 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
5181 }
else if (
ID ==
".amdhsa_float_round_mode_16_64") {
5183 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
5184 }
else if (
ID ==
".amdhsa_float_denorm_mode_32") {
5186 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
5187 }
else if (
ID ==
".amdhsa_float_denorm_mode_16_64") {
5189 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
5191 }
else if (
ID ==
".amdhsa_dx10_clamp") {
5193 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
5194 }
else if (
ID ==
".amdhsa_ieee_mode") {
5197 }
else if (
ID ==
".amdhsa_fp16_overflow") {
5198 if (IVersion.
Major < 9)
5199 return Error(IDRange.
Start,
"directive requires gfx9+", IDRange);
5202 }
else if (
ID ==
".amdhsa_tg_split") {
5204 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
5207 }
else if (
ID ==
".amdhsa_workgroup_processor_mode") {
5208 if (IVersion.
Major < 10)
5209 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5212 }
else if (
ID ==
".amdhsa_memory_ordered") {
5213 if (IVersion.
Major < 10)
5214 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5217 }
else if (
ID ==
".amdhsa_forward_progress") {
5218 if (IVersion.
Major < 10)
5219 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5222 }
else if (
ID ==
".amdhsa_shared_vgpr_count") {
5223 if (IVersion.
Major < 10)
5224 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5225 SharedVGPRCount = Val;
5227 COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT, Val,
5229 }
else if (
ID ==
".amdhsa_exception_fp_ieee_invalid_op") {
5232 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
5234 }
else if (
ID ==
".amdhsa_exception_fp_denorm_src") {
5236 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5238 }
else if (
ID ==
".amdhsa_exception_fp_ieee_div_zero") {
5241 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
5243 }
else if (
ID ==
".amdhsa_exception_fp_ieee_overflow") {
5245 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5247 }
else if (
ID ==
".amdhsa_exception_fp_ieee_underflow") {
5249 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5251 }
else if (
ID ==
".amdhsa_exception_fp_ieee_inexact") {
5253 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5255 }
else if (
ID ==
".amdhsa_exception_int_div_zero") {
5257 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5260 return Error(IDRange.
Start,
"unknown .amdhsa_kernel directive", IDRange);
5263#undef PARSE_BITS_ENTRY
5266 if (!Seen.
contains(
".amdhsa_next_free_vgpr"))
5267 return TokError(
".amdhsa_next_free_vgpr directive is required");
5269 if (!Seen.
contains(
".amdhsa_next_free_sgpr"))
5270 return TokError(
".amdhsa_next_free_sgpr directive is required");
5272 unsigned VGPRBlocks;
5273 unsigned SGPRBlocks;
5274 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, Reserv