53enum RegisterKind { IS_UNKNOWN,
IS_VGPR, IS_SGPR,
IS_AGPR, IS_TTMP, IS_SPECIAL };
67 SMLoc StartLoc, EndLoc;
68 const AMDGPUAsmParser *AsmParser;
71 AMDGPUOperand(KindTy Kind_,
const AMDGPUAsmParser *AsmParser_)
72 :
Kind(Kind_), AsmParser(AsmParser_) {}
74 using Ptr = std::unique_ptr<AMDGPUOperand>;
82 bool hasFPModifiers()
const {
return Abs || Neg; }
83 bool hasIntModifiers()
const {
return Sext; }
84 bool hasModifiers()
const {
return hasFPModifiers() || hasIntModifiers(); }
86 int64_t getFPModifiersOperand()
const {
93 int64_t getIntModifiersOperand()
const {
99 int64_t getModifiersOperand()
const {
100 assert(!(hasFPModifiers() && hasIntModifiers())
101 &&
"fp and int modifiers should not be used simultaneously");
102 if (hasFPModifiers())
103 return getFPModifiersOperand();
104 if (hasIntModifiers())
105 return getIntModifiersOperand();
187 ImmKindTyMandatoryLiteral,
201 mutable ImmKindTy
Kind;
218 bool isToken()
const override {
return Kind == Token; }
220 bool isSymbolRefExpr()
const {
221 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
224 bool isImm()
const override {
225 return Kind == Immediate;
228 void setImmKindNone()
const {
230 Imm.Kind = ImmKindTyNone;
233 void setImmKindLiteral()
const {
235 Imm.Kind = ImmKindTyLiteral;
238 void setImmKindMandatoryLiteral()
const {
240 Imm.Kind = ImmKindTyMandatoryLiteral;
243 void setImmKindConst()
const {
245 Imm.Kind = ImmKindTyConst;
248 bool IsImmKindLiteral()
const {
249 return isImm() &&
Imm.Kind == ImmKindTyLiteral;
252 bool IsImmKindMandatoryLiteral()
const {
253 return isImm() &&
Imm.Kind == ImmKindTyMandatoryLiteral;
256 bool isImmKindConst()
const {
257 return isImm() &&
Imm.Kind == ImmKindTyConst;
260 bool isInlinableImm(
MVT type)
const;
261 bool isLiteralImm(
MVT type)
const;
263 bool isRegKind()
const {
267 bool isReg()
const override {
268 return isRegKind() && !hasModifiers();
271 bool isRegOrInline(
unsigned RCID,
MVT type)
const {
272 return isRegClass(RCID) || isInlinableImm(type);
276 return isRegOrInline(RCID, type) || isLiteralImm(type);
279 bool isRegOrImmWithInt16InputMods()
const {
283 template <
bool IsFake16>
bool isRegOrImmWithIntT16InputMods()
const {
285 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
288 bool isRegOrImmWithInt32InputMods()
const {
292 bool isRegOrInlineImmWithInt16InputMods()
const {
293 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
296 template <
bool IsFake16>
bool isRegOrInlineImmWithIntT16InputMods()
const {
297 return isRegOrInline(
298 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
301 bool isRegOrInlineImmWithInt32InputMods()
const {
302 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
305 bool isRegOrImmWithInt64InputMods()
const {
309 bool isRegOrImmWithFP16InputMods()
const {
313 template <
bool IsFake16>
bool isRegOrImmWithFPT16InputMods()
const {
315 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
318 bool isRegOrImmWithFP32InputMods()
const {
322 bool isRegOrImmWithFP64InputMods()
const {
326 template <
bool IsFake16>
bool isRegOrInlineImmWithFP16InputMods()
const {
327 return isRegOrInline(
328 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
331 bool isRegOrInlineImmWithFP32InputMods()
const {
332 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
335 bool isPackedFP16InputMods()
const {
339 bool isPackedFP32InputMods()
const {
343 bool isVReg()
const {
344 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
345 isRegClass(AMDGPU::VReg_64RegClassID) ||
346 isRegClass(AMDGPU::VReg_96RegClassID) ||
347 isRegClass(AMDGPU::VReg_128RegClassID) ||
348 isRegClass(AMDGPU::VReg_160RegClassID) ||
349 isRegClass(AMDGPU::VReg_192RegClassID) ||
350 isRegClass(AMDGPU::VReg_256RegClassID) ||
351 isRegClass(AMDGPU::VReg_512RegClassID) ||
352 isRegClass(AMDGPU::VReg_1024RegClassID);
355 bool isVReg32()
const {
356 return isRegClass(AMDGPU::VGPR_32RegClassID);
359 bool isVReg32OrOff()
const {
360 return isOff() || isVReg32();
363 bool isNull()
const {
364 return isRegKind() &&
getReg() == AMDGPU::SGPR_NULL;
367 bool isVRegWithInputMods()
const;
368 template <
bool IsFake16>
bool isT16_Lo128VRegWithInputMods()
const;
369 template <
bool IsFake16>
bool isT16VRegWithInputMods()
const;
371 bool isSDWAOperand(
MVT type)
const;
372 bool isSDWAFP16Operand()
const;
373 bool isSDWAFP32Operand()
const;
374 bool isSDWAInt16Operand()
const;
375 bool isSDWAInt32Operand()
const;
377 bool isImmTy(ImmTy ImmT)
const {
381 template <ImmTy Ty>
bool isImmTy()
const {
return isImmTy(Ty); }
383 bool isImmLiteral()
const {
return isImmTy(ImmTyNone); }
385 bool isImmModifier()
const {
386 return isImm() &&
Imm.Type != ImmTyNone;
389 bool isOModSI()
const {
return isImmTy(ImmTyOModSI); }
390 bool isDim()
const {
return isImmTy(ImmTyDim); }
391 bool isR128A16()
const {
return isImmTy(ImmTyR128A16); }
392 bool isOff()
const {
return isImmTy(ImmTyOff); }
393 bool isExpTgt()
const {
return isImmTy(ImmTyExpTgt); }
394 bool isOffen()
const {
return isImmTy(ImmTyOffen); }
395 bool isIdxen()
const {
return isImmTy(ImmTyIdxen); }
396 bool isAddr64()
const {
return isImmTy(ImmTyAddr64); }
397 bool isSMEMOffsetMod()
const {
return isImmTy(ImmTySMEMOffsetMod); }
398 bool isFlatOffset()
const {
return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
399 bool isGDS()
const {
return isImmTy(ImmTyGDS); }
400 bool isLDS()
const {
return isImmTy(ImmTyLDS); }
401 bool isCPol()
const {
return isImmTy(ImmTyCPol); }
402 bool isIndexKey8bit()
const {
return isImmTy(ImmTyIndexKey8bit); }
403 bool isIndexKey16bit()
const {
return isImmTy(ImmTyIndexKey16bit); }
404 bool isTFE()
const {
return isImmTy(ImmTyTFE); }
405 bool isFORMAT()
const {
return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
406 bool isDppFI()
const {
return isImmTy(ImmTyDppFI); }
407 bool isSDWADstSel()
const {
return isImmTy(ImmTySDWADstSel); }
408 bool isSDWASrc0Sel()
const {
return isImmTy(ImmTySDWASrc0Sel); }
409 bool isSDWASrc1Sel()
const {
return isImmTy(ImmTySDWASrc1Sel); }
410 bool isSDWADstUnused()
const {
return isImmTy(ImmTySDWADstUnused); }
411 bool isInterpSlot()
const {
return isImmTy(ImmTyInterpSlot); }
412 bool isInterpAttr()
const {
return isImmTy(ImmTyInterpAttr); }
413 bool isInterpAttrChan()
const {
return isImmTy(ImmTyInterpAttrChan); }
414 bool isOpSel()
const {
return isImmTy(ImmTyOpSel); }
415 bool isOpSelHi()
const {
return isImmTy(ImmTyOpSelHi); }
416 bool isNegLo()
const {
return isImmTy(ImmTyNegLo); }
417 bool isNegHi()
const {
return isImmTy(ImmTyNegHi); }
418 bool isBitOp3()
const {
return isImmTy(ImmTyBitOp3) && isUInt<8>(getImm()); }
420 bool isRegOrImm()
const {
424 bool isRegClass(
unsigned RCID)
const;
428 bool isRegOrInlineNoMods(
unsigned RCID,
MVT type)
const {
429 return isRegOrInline(RCID, type) && !hasModifiers();
432 bool isSCSrcB16()
const {
433 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
436 bool isSCSrcV2B16()
const {
440 bool isSCSrc_b32()
const {
441 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
444 bool isSCSrc_b64()
const {
445 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
448 bool isBoolReg()
const;
450 bool isSCSrcF16()
const {
451 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
454 bool isSCSrcV2F16()
const {
458 bool isSCSrcF32()
const {
459 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
462 bool isSCSrcF64()
const {
463 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
466 bool isSSrc_b32()
const {
467 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
470 bool isSSrc_b16()
const {
return isSCSrcB16() || isLiteralImm(MVT::i16); }
472 bool isSSrcV2B16()
const {
477 bool isSSrc_b64()
const {
480 return isSCSrc_b64() || isLiteralImm(MVT::i64);
483 bool isSSrc_f32()
const {
484 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
487 bool isSSrcF64()
const {
return isSCSrc_b64() || isLiteralImm(MVT::f64); }
489 bool isSSrc_bf16()
const {
return isSCSrcB16() || isLiteralImm(MVT::bf16); }
491 bool isSSrc_f16()
const {
return isSCSrcB16() || isLiteralImm(MVT::f16); }
493 bool isSSrcV2F16()
const {
498 bool isSSrcV2FP32()
const {
503 bool isSCSrcV2FP32()
const {
508 bool isSSrcV2INT32()
const {
513 bool isSCSrcV2INT32()
const {
515 return isSCSrc_b32();
518 bool isSSrcOrLds_b32()
const {
519 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
520 isLiteralImm(MVT::i32) || isExpr();
523 bool isVCSrc_b32()
const {
524 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
527 bool isVCSrcB64()
const {
528 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
531 bool isVCSrcT_b16()
const {
532 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
535 bool isVCSrcTB16_Lo128()
const {
536 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
539 bool isVCSrcFake16B16_Lo128()
const {
540 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
543 bool isVCSrc_b16()
const {
544 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
547 bool isVCSrc_v2b16()
const {
return isVCSrc_b16(); }
549 bool isVCSrc_f32()
const {
550 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
553 bool isVCSrcF64()
const {
554 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
557 bool isVCSrcTBF16()
const {
558 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
561 bool isVCSrcT_f16()
const {
562 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
565 bool isVCSrcT_bf16()
const {
566 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
569 bool isVCSrcTBF16_Lo128()
const {
570 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
573 bool isVCSrcTF16_Lo128()
const {
574 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
577 bool isVCSrcFake16BF16_Lo128()
const {
578 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
581 bool isVCSrcFake16F16_Lo128()
const {
582 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
585 bool isVCSrc_bf16()
const {
586 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
589 bool isVCSrc_f16()
const {
590 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
593 bool isVCSrc_v2bf16()
const {
return isVCSrc_bf16(); }
595 bool isVCSrc_v2f16()
const {
return isVCSrc_f16(); }
597 bool isVSrc_b32()
const {
598 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
601 bool isVSrc_b64()
const {
return isVCSrcF64() || isLiteralImm(MVT::i64); }
603 bool isVSrcT_b16()
const {
return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
605 bool isVSrcT_b16_Lo128()
const {
606 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
609 bool isVSrcFake16_b16_Lo128()
const {
610 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
613 bool isVSrc_b16()
const {
return isVCSrc_b16() || isLiteralImm(MVT::i16); }
615 bool isVSrc_v2b16()
const {
return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
617 bool isVCSrcV2FP32()
const {
621 bool isVSrc_v2f32()
const {
return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
623 bool isVCSrcV2INT32()
const {
627 bool isVSrc_v2b32()
const {
return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
629 bool isVSrc_f32()
const {
630 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
633 bool isVSrc_f64()
const {
return isVCSrcF64() || isLiteralImm(MVT::f64); }
635 bool isVSrcT_bf16()
const {
return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
637 bool isVSrcT_f16()
const {
return isVCSrcT_f16() || isLiteralImm(MVT::f16); }
639 bool isVSrcT_bf16_Lo128()
const {
640 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
643 bool isVSrcT_f16_Lo128()
const {
644 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
647 bool isVSrcFake16_bf16_Lo128()
const {
648 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
651 bool isVSrcFake16_f16_Lo128()
const {
652 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
655 bool isVSrc_bf16()
const {
return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
657 bool isVSrc_f16()
const {
return isVCSrc_f16() || isLiteralImm(MVT::f16); }
659 bool isVSrc_v2bf16()
const {
660 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
663 bool isVSrc_v2f16()
const {
return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
665 bool isVISrcB32()
const {
666 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
669 bool isVISrcB16()
const {
670 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
673 bool isVISrcV2B16()
const {
677 bool isVISrcF32()
const {
678 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
681 bool isVISrcF16()
const {
682 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
685 bool isVISrcV2F16()
const {
686 return isVISrcF16() || isVISrcB32();
689 bool isVISrc_64_bf16()
const {
690 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
693 bool isVISrc_64_f16()
const {
694 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
697 bool isVISrc_64_b32()
const {
698 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
701 bool isVISrc_64B64()
const {
702 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
705 bool isVISrc_64_f64()
const {
706 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
709 bool isVISrc_64V2FP32()
const {
710 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
713 bool isVISrc_64V2INT32()
const {
714 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
717 bool isVISrc_256_b32()
const {
718 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
721 bool isVISrc_256_f32()
const {
722 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
725 bool isVISrc_256B64()
const {
726 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
729 bool isVISrc_256_f64()
const {
730 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
733 bool isVISrc_128B16()
const {
734 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
737 bool isVISrc_128V2B16()
const {
738 return isVISrc_128B16();
741 bool isVISrc_128_b32()
const {
742 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
745 bool isVISrc_128_f32()
const {
746 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
749 bool isVISrc_256V2FP32()
const {
750 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
753 bool isVISrc_256V2INT32()
const {
754 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
757 bool isVISrc_512_b32()
const {
758 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
761 bool isVISrc_512B16()
const {
762 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
765 bool isVISrc_512V2B16()
const {
766 return isVISrc_512B16();
769 bool isVISrc_512_f32()
const {
770 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
773 bool isVISrc_512F16()
const {
774 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
777 bool isVISrc_512V2F16()
const {
778 return isVISrc_512F16() || isVISrc_512_b32();
781 bool isVISrc_1024_b32()
const {
782 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
785 bool isVISrc_1024B16()
const {
786 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
789 bool isVISrc_1024V2B16()
const {
790 return isVISrc_1024B16();
793 bool isVISrc_1024_f32()
const {
794 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
797 bool isVISrc_1024F16()
const {
798 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
801 bool isVISrc_1024V2F16()
const {
802 return isVISrc_1024F16() || isVISrc_1024_b32();
805 bool isAISrcB32()
const {
806 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
809 bool isAISrcB16()
const {
810 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
813 bool isAISrcV2B16()
const {
817 bool isAISrcF32()
const {
818 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
821 bool isAISrcF16()
const {
822 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
825 bool isAISrcV2F16()
const {
826 return isAISrcF16() || isAISrcB32();
829 bool isAISrc_64B64()
const {
830 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
833 bool isAISrc_64_f64()
const {
834 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
837 bool isAISrc_128_b32()
const {
838 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
841 bool isAISrc_128B16()
const {
842 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
845 bool isAISrc_128V2B16()
const {
846 return isAISrc_128B16();
849 bool isAISrc_128_f32()
const {
850 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
853 bool isAISrc_128F16()
const {
854 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
857 bool isAISrc_128V2F16()
const {
858 return isAISrc_128F16() || isAISrc_128_b32();
861 bool isVISrc_128_bf16()
const {
862 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
865 bool isVISrc_128_f16()
const {
866 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
869 bool isVISrc_128V2F16()
const {
870 return isVISrc_128_f16() || isVISrc_128_b32();
873 bool isAISrc_256B64()
const {
874 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
877 bool isAISrc_256_f64()
const {
878 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
881 bool isAISrc_512_b32()
const {
882 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
885 bool isAISrc_512B16()
const {
886 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
889 bool isAISrc_512V2B16()
const {
890 return isAISrc_512B16();
893 bool isAISrc_512_f32()
const {
894 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
897 bool isAISrc_512F16()
const {
898 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
901 bool isAISrc_512V2F16()
const {
902 return isAISrc_512F16() || isAISrc_512_b32();
905 bool isAISrc_1024_b32()
const {
906 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
909 bool isAISrc_1024B16()
const {
910 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
913 bool isAISrc_1024V2B16()
const {
914 return isAISrc_1024B16();
917 bool isAISrc_1024_f32()
const {
918 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
921 bool isAISrc_1024F16()
const {
922 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
925 bool isAISrc_1024V2F16()
const {
926 return isAISrc_1024F16() || isAISrc_1024_b32();
929 bool isKImmFP32()
const {
930 return isLiteralImm(MVT::f32);
933 bool isKImmFP16()
const {
934 return isLiteralImm(MVT::f16);
937 bool isMem()
const override {
941 bool isExpr()
const {
945 bool isSOPPBrTarget()
const {
return isExpr() ||
isImm(); }
947 bool isSWaitCnt()
const;
948 bool isDepCtr()
const;
949 bool isSDelayALU()
const;
950 bool isHwreg()
const;
951 bool isSendMsg()
const;
952 bool isSplitBarrier()
const;
953 bool isSwizzle()
const;
954 bool isSMRDOffset8()
const;
955 bool isSMEMOffset()
const;
956 bool isSMRDLiteralOffset()
const;
958 bool isDPPCtrl()
const;
960 bool isGPRIdxMode()
const;
961 bool isS16Imm()
const;
962 bool isU16Imm()
const;
963 bool isEndpgm()
const;
965 auto getPredicate(std::function<
bool(
const AMDGPUOperand &
Op)>
P)
const {
966 return [=](){
return P(*
this); };
974 int64_t getImm()
const {
979 void setImm(int64_t Val) {
984 ImmTy getImmTy()
const {
994 SMLoc getStartLoc()
const override {
998 SMLoc getEndLoc()
const override {
1003 return SMRange(StartLoc, EndLoc);
1006 Modifiers getModifiers()
const {
1007 assert(isRegKind() || isImmTy(ImmTyNone));
1008 return isRegKind() ?
Reg.Mods :
Imm.Mods;
1011 void setModifiers(Modifiers Mods) {
1012 assert(isRegKind() || isImmTy(ImmTyNone));
1019 bool hasModifiers()
const {
1020 return getModifiers().hasModifiers();
1023 bool hasFPModifiers()
const {
1024 return getModifiers().hasFPModifiers();
1027 bool hasIntModifiers()
const {
1028 return getModifiers().hasIntModifiers();
1033 void addImmOperands(
MCInst &Inst,
unsigned N,
bool ApplyModifiers =
true)
const;
1035 void addLiteralImmOperand(
MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const;
1037 void addRegOperands(
MCInst &Inst,
unsigned N)
const;
1039 void addRegOrImmOperands(
MCInst &Inst,
unsigned N)
const {
1041 addRegOperands(Inst,
N);
1043 addImmOperands(Inst,
N);
1046 void addRegOrImmWithInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1047 Modifiers Mods = getModifiers();
1050 addRegOperands(Inst,
N);
1052 addImmOperands(Inst,
N,
false);
1056 void addRegOrImmWithFPInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1057 assert(!hasIntModifiers());
1058 addRegOrImmWithInputModsOperands(Inst,
N);
1061 void addRegOrImmWithIntInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1062 assert(!hasFPModifiers());
1063 addRegOrImmWithInputModsOperands(Inst,
N);
1066 void addRegWithInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1067 Modifiers Mods = getModifiers();
1070 addRegOperands(Inst,
N);
1073 void addRegWithFPInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1074 assert(!hasIntModifiers());
1075 addRegWithInputModsOperands(Inst,
N);
1078 void addRegWithIntInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1079 assert(!hasFPModifiers());
1080 addRegWithInputModsOperands(Inst,
N);
1086 case ImmTyNone:
OS <<
"None";
break;
1087 case ImmTyGDS:
OS <<
"GDS";
break;
1088 case ImmTyLDS:
OS <<
"LDS";
break;
1089 case ImmTyOffen:
OS <<
"Offen";
break;
1090 case ImmTyIdxen:
OS <<
"Idxen";
break;
1091 case ImmTyAddr64:
OS <<
"Addr64";
break;
1092 case ImmTyOffset:
OS <<
"Offset";
break;
1093 case ImmTyInstOffset:
OS <<
"InstOffset";
break;
1094 case ImmTyOffset0:
OS <<
"Offset0";
break;
1095 case ImmTyOffset1:
OS <<
"Offset1";
break;
1096 case ImmTySMEMOffsetMod:
OS <<
"SMEMOffsetMod";
break;
1097 case ImmTyCPol:
OS <<
"CPol";
break;
1098 case ImmTyIndexKey8bit:
OS <<
"index_key";
break;
1099 case ImmTyIndexKey16bit:
OS <<
"index_key";
break;
1100 case ImmTyTFE:
OS <<
"TFE";
break;
1101 case ImmTyD16:
OS <<
"D16";
break;
1102 case ImmTyFORMAT:
OS <<
"FORMAT";
break;
1103 case ImmTyClamp:
OS <<
"Clamp";
break;
1104 case ImmTyOModSI:
OS <<
"OModSI";
break;
1105 case ImmTyDPP8:
OS <<
"DPP8";
break;
1106 case ImmTyDppCtrl:
OS <<
"DppCtrl";
break;
1107 case ImmTyDppRowMask:
OS <<
"DppRowMask";
break;
1108 case ImmTyDppBankMask:
OS <<
"DppBankMask";
break;
1109 case ImmTyDppBoundCtrl:
OS <<
"DppBoundCtrl";
break;
1110 case ImmTyDppFI:
OS <<
"DppFI";
break;
1111 case ImmTySDWADstSel:
OS <<
"SDWADstSel";
break;
1112 case ImmTySDWASrc0Sel:
OS <<
"SDWASrc0Sel";
break;
1113 case ImmTySDWASrc1Sel:
OS <<
"SDWASrc1Sel";
break;
1114 case ImmTySDWADstUnused:
OS <<
"SDWADstUnused";
break;
1115 case ImmTyDMask:
OS <<
"DMask";
break;
1116 case ImmTyDim:
OS <<
"Dim";
break;
1117 case ImmTyUNorm:
OS <<
"UNorm";
break;
1118 case ImmTyDA:
OS <<
"DA";
break;
1119 case ImmTyR128A16:
OS <<
"R128A16";
break;
1120 case ImmTyA16:
OS <<
"A16";
break;
1121 case ImmTyLWE:
OS <<
"LWE";
break;
1122 case ImmTyOff:
OS <<
"Off";
break;
1123 case ImmTyExpTgt:
OS <<
"ExpTgt";
break;
1124 case ImmTyExpCompr:
OS <<
"ExpCompr";
break;
1125 case ImmTyExpVM:
OS <<
"ExpVM";
break;
1126 case ImmTyHwreg:
OS <<
"Hwreg";
break;
1127 case ImmTySendMsg:
OS <<
"SendMsg";
break;
1128 case ImmTyInterpSlot:
OS <<
"InterpSlot";
break;
1129 case ImmTyInterpAttr:
OS <<
"InterpAttr";
break;
1130 case ImmTyInterpAttrChan:
OS <<
"InterpAttrChan";
break;
1131 case ImmTyOpSel:
OS <<
"OpSel";
break;
1132 case ImmTyOpSelHi:
OS <<
"OpSelHi";
break;
1133 case ImmTyNegLo:
OS <<
"NegLo";
break;
1134 case ImmTyNegHi:
OS <<
"NegHi";
break;
1135 case ImmTySwizzle:
OS <<
"Swizzle";
break;
1136 case ImmTyGprIdxMode:
OS <<
"GprIdxMode";
break;
1137 case ImmTyHigh:
OS <<
"High";
break;
1138 case ImmTyBLGP:
OS <<
"BLGP";
break;
1139 case ImmTyCBSZ:
OS <<
"CBSZ";
break;
1140 case ImmTyABID:
OS <<
"ABID";
break;
1141 case ImmTyEndpgm:
OS <<
"Endpgm";
break;
1142 case ImmTyWaitVDST:
OS <<
"WaitVDST";
break;
1143 case ImmTyWaitEXP:
OS <<
"WaitEXP";
break;
1144 case ImmTyWaitVAVDst:
OS <<
"WaitVAVDst";
break;
1145 case ImmTyWaitVMVSrc:
OS <<
"WaitVMVSrc";
break;
1146 case ImmTyByteSel:
OS <<
"ByteSel" ;
break;
1147 case ImmTyBitOp3:
OS <<
"BitOp3";
break;
1156 <<
" mods: " <<
Reg.Mods <<
'>';
1159 OS <<
'<' << getImm();
1160 if (getImmTy() != ImmTyNone) {
1161 OS <<
" type: "; printImmTy(
OS, getImmTy());
1163 OS <<
" mods: " <<
Imm.Mods <<
'>';
1166 OS <<
'\'' << getToken() <<
'\'';
1169 OS <<
"<expr " << *Expr <<
'>';
1174 static AMDGPUOperand::Ptr CreateImm(
const AMDGPUAsmParser *AsmParser,
1175 int64_t Val,
SMLoc Loc,
1176 ImmTy
Type = ImmTyNone,
1177 bool IsFPImm =
false) {
1178 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1180 Op->Imm.IsFPImm = IsFPImm;
1181 Op->Imm.Kind = ImmKindTyNone;
1183 Op->Imm.Mods = Modifiers();
1189 static AMDGPUOperand::Ptr CreateToken(
const AMDGPUAsmParser *AsmParser,
1191 bool HasExplicitEncodingSize =
true) {
1192 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1193 Res->Tok.Data = Str.data();
1194 Res->Tok.Length = Str.size();
1195 Res->StartLoc = Loc;
1200 static AMDGPUOperand::Ptr CreateReg(
const AMDGPUAsmParser *AsmParser,
1202 auto Op = std::make_unique<AMDGPUOperand>(
Register, AsmParser);
1203 Op->Reg.RegNo =
Reg;
1204 Op->Reg.Mods = Modifiers();
1210 static AMDGPUOperand::Ptr CreateExpr(
const AMDGPUAsmParser *AsmParser,
1212 auto Op = std::make_unique<AMDGPUOperand>(
Expression, AsmParser);
1221 OS <<
"abs:" << Mods.Abs <<
" neg: " << Mods.Neg <<
" sext:" << Mods.Sext;
1232class KernelScopeInfo {
1233 int SgprIndexUnusedMin = -1;
1234 int VgprIndexUnusedMin = -1;
1235 int AgprIndexUnusedMin = -1;
1239 void usesSgprAt(
int i) {
1240 if (i >= SgprIndexUnusedMin) {
1241 SgprIndexUnusedMin = ++i;
1250 void usesVgprAt(
int i) {
1251 if (i >= VgprIndexUnusedMin) {
1252 VgprIndexUnusedMin = ++i;
1257 VgprIndexUnusedMin);
1263 void usesAgprAt(
int i) {
1268 if (i >= AgprIndexUnusedMin) {
1269 AgprIndexUnusedMin = ++i;
1279 VgprIndexUnusedMin);
1286 KernelScopeInfo() =
default;
1292 usesSgprAt(SgprIndexUnusedMin = -1);
1293 usesVgprAt(VgprIndexUnusedMin = -1);
1295 usesAgprAt(AgprIndexUnusedMin = -1);
1299 void usesRegister(RegisterKind RegKind,
unsigned DwordRegIndex,
1300 unsigned RegWidth) {
1303 usesSgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1306 usesAgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1309 usesVgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1320 unsigned ForcedEncodingSize = 0;
1321 bool ForcedDPP =
false;
1322 bool ForcedSDWA =
false;
1323 KernelScopeInfo KernelScope;
1328#define GET_ASSEMBLER_HEADER
1329#include "AMDGPUGenAsmMatcher.inc"
1334 void createConstantSymbol(
StringRef Id, int64_t Val);
1336 bool ParseAsAbsoluteExpression(
uint32_t &Ret);
1354 const MCExpr *FlatScrUsed,
bool XNACKUsed,
1355 std::optional<bool> EnableWavefrontSize32,
1359 bool ParseDirectiveAMDGCNTarget();
1360 bool ParseDirectiveAMDHSACodeObjectVersion();
1361 bool ParseDirectiveAMDHSAKernel();
1363 bool ParseDirectiveAMDKernelCodeT();
1366 bool ParseDirectiveAMDGPUHsaKernel();
1368 bool ParseDirectiveISAVersion();
1369 bool ParseDirectiveHSAMetadata();
1370 bool ParseDirectivePALMetadataBegin();
1371 bool ParseDirectivePALMetadata();
1372 bool ParseDirectiveAMDGPULDS();
1376 bool ParseToEndDirective(
const char *AssemblerDirectiveBegin,
1377 const char *AssemblerDirectiveEnd,
1378 std::string &CollectString);
1380 bool AddNextRegisterToList(
MCRegister &Reg,
unsigned &RegWidth,
1382 bool ParseAMDGPURegister(RegisterKind &RegKind,
MCRegister &Reg,
1383 unsigned &RegNum,
unsigned &RegWidth,
1384 bool RestoreOnFailure =
false);
1385 bool ParseAMDGPURegister(RegisterKind &RegKind,
MCRegister &Reg,
1386 unsigned &RegNum,
unsigned &RegWidth,
1388 MCRegister ParseRegularReg(RegisterKind &RegKind,
unsigned &RegNum,
1391 MCRegister ParseSpecialReg(RegisterKind &RegKind,
unsigned &RegNum,
1394 MCRegister ParseRegList(RegisterKind &RegKind,
unsigned &RegNum,
1397 bool ParseRegRange(
unsigned& Num,
unsigned& Width);
1398 MCRegister getRegularReg(RegisterKind RegKind,
unsigned RegNum,
1403 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1404 void initializeGprCountSymbol(RegisterKind RegKind);
1405 bool updateGprCountSymbols(RegisterKind RegKind,
unsigned DwordRegIndex,
1412 OperandMode_Default,
1416 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1424 if (getFeatureBits().
none()) {
1430 if (!FB[AMDGPU::FeatureWavefrontSize64] &&
1431 !FB[AMDGPU::FeatureWavefrontSize32]) {
1442 createConstantSymbol(
".amdgcn.gfx_generation_number",
ISA.Major);
1443 createConstantSymbol(
".amdgcn.gfx_generation_minor",
ISA.Minor);
1444 createConstantSymbol(
".amdgcn.gfx_generation_stepping",
ISA.Stepping);
1446 createConstantSymbol(
".option.machine_version_major",
ISA.Major);
1447 createConstantSymbol(
".option.machine_version_minor",
ISA.Minor);
1448 createConstantSymbol(
".option.machine_version_stepping",
ISA.Stepping);
1451 initializeGprCountSymbol(IS_VGPR);
1452 initializeGprCountSymbol(IS_SGPR);
1457 createConstantSymbol(Symbol, Code);
1459 createConstantSymbol(
"UC_VERSION_W64_BIT", 0x2000);
1460 createConstantSymbol(
"UC_VERSION_W32_BIT", 0x4000);
1461 createConstantSymbol(
"UC_VERSION_MDP_BIT", 0x8000);
1531 bool hasInv2PiInlineImm()
const {
1532 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1535 bool hasFlatOffsets()
const {
1536 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1540 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1543 bool hasSGPR102_SGPR103()
const {
1547 bool hasSGPR104_SGPR105()
const {
return isGFX10Plus(); }
1549 bool hasIntClamp()
const {
1550 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1553 bool hasPartialNSAEncoding()
const {
1554 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1586 void setForcedEncodingSize(
unsigned Size) { ForcedEncodingSize =
Size; }
1587 void setForcedDPP(
bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1588 void setForcedSDWA(
bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1590 unsigned getForcedEncodingSize()
const {
return ForcedEncodingSize; }
1591 bool isForcedVOP3()
const {
return ForcedEncodingSize == 64; }
1592 bool isForcedDPP()
const {
return ForcedDPP; }
1593 bool isForcedSDWA()
const {
return ForcedSDWA; }
1595 StringRef getMatchedVariantName()
const;
1597 std::unique_ptr<AMDGPUOperand>
parseRegister(
bool RestoreOnFailure =
false);
1599 bool RestoreOnFailure);
1602 SMLoc &EndLoc)
override;
1605 unsigned Kind)
override;
1609 bool MatchingInlineAsm)
override;
1612 OperandMode Mode = OperandMode_Default);
1620 ParseStatus parseIntWithPrefix(
const char *Prefix, int64_t &
Int);
1624 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1625 std::function<
bool(int64_t &)> ConvertResult =
nullptr);
1629 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1630 bool (*ConvertResult)(int64_t &) =
nullptr);
1634 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1648 AMDGPUOperand::ImmTy
Type);
1651 bool isOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1652 bool isRegOrOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1653 bool isNamedOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1654 bool isOpcodeModifierWithVal(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1655 bool parseSP3NegModifier();
1657 bool HasLit =
false);
1660 bool HasLit =
false);
1662 bool AllowImm =
true);
1664 bool AllowImm =
true);
1669 AMDGPUOperand::ImmTy ImmTy);
1680 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1685 bool tryParseFmt(
const char *Pref, int64_t MaxVal, int64_t &Val);
1686 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt,
StringRef FormatStr,
SMLoc Loc);
1690 bool parseCnt(int64_t &IntVal);
1693 bool parseDepCtr(int64_t &IntVal,
unsigned &Mask);
1697 bool parseDelay(int64_t &Delay);
1703 struct OperandInfoTy {
1706 bool IsSymbolic =
false;
1707 bool IsDefined =
false;
1709 OperandInfoTy(int64_t Val) : Val(Val) {}
1712 struct StructuredOpField : OperandInfoTy {
1716 bool IsDefined =
false;
1721 virtual ~StructuredOpField() =
default;
1723 bool Error(AMDGPUAsmParser &Parser,
const Twine &Err)
const {
1724 Parser.Error(Loc,
"invalid " +
Desc +
": " + Err);
1728 virtual bool validate(AMDGPUAsmParser &Parser)
const {
1730 return Error(Parser,
"not supported on this GPU");
1732 return Error(Parser,
"only " +
Twine(Width) +
"-bit values are legal");
1740 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &
Op, OperandInfoTy &Stream);
1741 bool validateSendMsg(
const OperandInfoTy &Msg,
1742 const OperandInfoTy &
Op,
1743 const OperandInfoTy &Stream);
1746 OperandInfoTy &Width);
1752 SMLoc getOperandLoc(std::function<
bool(
const AMDGPUOperand&)>
Test,
1757 bool SearchMandatoryLiterals =
false)
const;
1766 bool validateSOPLiteral(
const MCInst &Inst)
const;
1768 bool validateVOPDRegBankConstraints(
const MCInst &Inst,
1770 bool validateIntClampSupported(
const MCInst &Inst);
1771 bool validateMIMGAtomicDMask(
const MCInst &Inst);
1772 bool validateMIMGGatherDMask(
const MCInst &Inst);
1774 bool validateMIMGDataSize(
const MCInst &Inst,
const SMLoc &IDLoc);
1775 bool validateMIMGAddrSize(
const MCInst &Inst,
const SMLoc &IDLoc);
1776 bool validateMIMGD16(
const MCInst &Inst);
1778 bool validateMIMGMSAA(
const MCInst &Inst);
1779 bool validateOpSel(
const MCInst &Inst);
1782 bool validateVccOperand(
MCRegister Reg)
const;
1787 bool validateAGPRLdSt(
const MCInst &Inst)
const;
1788 bool validateVGPRAlign(
const MCInst &Inst)
const;
1792 bool validateDivScale(
const MCInst &Inst);
1795 const SMLoc &IDLoc);
1797 const unsigned CPol);
1799 std::optional<StringRef> validateLdsDirect(
const MCInst &Inst);
1800 unsigned getConstantBusLimit(
unsigned Opcode)
const;
1801 bool usesConstantBus(
const MCInst &Inst,
unsigned OpIdx);
1802 bool isInlineConstant(
const MCInst &Inst,
unsigned OpIdx)
const;
1803 unsigned findImplicitSGPRReadInVOP(
const MCInst &Inst)
const;
1829 AsmToken peekToken(
bool ShouldSkipSpace =
true);
1831 SMLoc getLoc()
const;
1835 void onBeginOfFile()
override;
1836 bool parsePrimaryExpr(
const MCExpr *&Res,
SMLoc &EndLoc)
override;
1847 bool parseSwizzleOperand(int64_t &
Op,
const unsigned MinVal,
1848 const unsigned MaxVal,
const Twine &ErrMsg,
1850 bool parseSwizzleOperands(
const unsigned OpNum, int64_t*
Op,
1851 const unsigned MinVal,
1852 const unsigned MaxVal,
1855 bool parseSwizzleOffset(int64_t &Imm);
1856 bool parseSwizzleMacro(int64_t &Imm);
1857 bool parseSwizzleQuadPerm(int64_t &Imm);
1858 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1859 bool parseSwizzleBroadcast(int64_t &Imm);
1860 bool parseSwizzleSwap(int64_t &Imm);
1861 bool parseSwizzleReverse(int64_t &Imm);
1862 bool parseSwizzleFFT(int64_t &Imm);
1863 bool parseSwizzleRotate(int64_t &Imm);
1866 int64_t parseGPRIdxMacro();
1874 OptionalImmIndexMap &OptionalIdx);
1882 OptionalImmIndexMap &OptionalIdx);
1884 OptionalImmIndexMap &OptionalIdx);
1889 bool parseDimId(
unsigned &Encoding);
1891 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1895 int64_t parseDPPCtrlSel(
StringRef Ctrl);
1896 int64_t parseDPPCtrlPerm();
1902 bool IsDPP8 =
false);
1908 AMDGPUOperand::ImmTy
Type);
1917 bool SkipDstVcc =
false,
1918 bool SkipSrcVcc =
false);
1931 return &APFloat::IEEEsingle();
1933 return &APFloat::IEEEdouble();
1935 return &APFloat::IEEEhalf();
1968 return &APFloat::IEEEsingle();
1974 return &APFloat::IEEEdouble();
1983 return &APFloat::IEEEhalf();
1991 return &APFloat::BFloat();
2006 APFloat::rmNearestTiesToEven,
2009 if (
Status != APFloat::opOK &&
2011 ((
Status & APFloat::opOverflow) != 0 ||
2012 (
Status & APFloat::opUnderflow) != 0)) {
2035bool AMDGPUOperand::isInlinableImm(
MVT type)
const {
2045 if (!isImmTy(ImmTyNone)) {
2056 if (type == MVT::f64 || type == MVT::i64) {
2058 AsmParser->hasInv2PiInlineImm());
2080 APFloat::rmNearestTiesToEven, &Lost);
2087 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2089 AsmParser->hasInv2PiInlineImm());
2094 static_cast<int32_t
>(FPLiteral.bitcastToAPInt().getZExtValue()),
2095 AsmParser->hasInv2PiInlineImm());
2099 if (type == MVT::f64 || type == MVT::i64) {
2101 AsmParser->hasInv2PiInlineImm());
2110 static_cast<int16_t
>(
Literal.getLoBits(16).getSExtValue()),
2111 type, AsmParser->hasInv2PiInlineImm());
2115 static_cast<int32_t
>(
Literal.getLoBits(32).getZExtValue()),
2116 AsmParser->hasInv2PiInlineImm());
2119bool AMDGPUOperand::isLiteralImm(
MVT type)
const {
2121 if (!isImmTy(ImmTyNone)) {
2128 if (type == MVT::f64 && hasFPModifiers()) {
2145 if (type == MVT::f64) {
2150 if (type == MVT::i64) {
2163 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2164 : (type == MVT::v2i16) ? MVT::f32
2165 : (type == MVT::v2f32) ? MVT::f32
2172bool AMDGPUOperand::isRegClass(
unsigned RCID)
const {
2173 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(
getReg());
2176bool AMDGPUOperand::isVRegWithInputMods()
const {
2177 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2179 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2180 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2183template <
bool IsFake16>
2184bool AMDGPUOperand::isT16_Lo128VRegWithInputMods()
const {
2185 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2186 : AMDGPU::VGPR_16_Lo128RegClassID);
2189template <
bool IsFake16>
bool AMDGPUOperand::isT16VRegWithInputMods()
const {
2190 return isRegClass(IsFake16 ? AMDGPU::VGPR_32RegClassID
2191 : AMDGPU::VGPR_16RegClassID);
2194bool AMDGPUOperand::isSDWAOperand(
MVT type)
const {
2195 if (AsmParser->isVI())
2197 if (AsmParser->isGFX9Plus())
2198 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2202bool AMDGPUOperand::isSDWAFP16Operand()
const {
2203 return isSDWAOperand(MVT::f16);
2206bool AMDGPUOperand::isSDWAFP32Operand()
const {
2207 return isSDWAOperand(MVT::f32);
2210bool AMDGPUOperand::isSDWAInt16Operand()
const {
2211 return isSDWAOperand(MVT::i16);
2214bool AMDGPUOperand::isSDWAInt32Operand()
const {
2215 return isSDWAOperand(MVT::i32);
2218bool AMDGPUOperand::isBoolReg()
const {
2219 auto FB = AsmParser->getFeatureBits();
2220 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) ||
2221 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32()));
2226 assert(isImmTy(ImmTyNone) &&
Imm.Mods.hasFPModifiers());
2241void AMDGPUOperand::addImmOperands(
MCInst &Inst,
unsigned N,
bool ApplyModifiers)
const {
2249 addLiteralImmOperand(Inst,
Imm.Val,
2251 isImmTy(ImmTyNone) &&
Imm.Mods.hasFPModifiers());
2253 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2259void AMDGPUOperand::addLiteralImmOperand(
MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const {
2260 const auto& InstDesc = AsmParser->getMII()->get(Inst.
getOpcode());
2265 if (ApplyModifiers) {
2268 Val = applyInputFPModifiers(Val,
Size);
2272 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2282 AsmParser->hasInv2PiInlineImm())) {
2291 if (
Literal.getLoBits(32) != 0) {
2292 const_cast<AMDGPUAsmParser *
>(AsmParser)->
Warning(Inst.
getLoc(),
2293 "Can't encode literal as exact 64-bit floating-point operand. "
2294 "Low 32-bits will be set to zero");
2295 Val &= 0xffffffff00000000u;
2299 setImmKindLiteral();
2315 if (AsmParser->hasInv2PiInlineImm() &&
Literal == 0x3fc45f306725feed) {
2321 setImmKindLiteral();
2357 APFloat::rmNearestTiesToEven, &lost);
2361 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2364 setImmKindMandatoryLiteral();
2366 setImmKindLiteral();
2397 AsmParser->hasInv2PiInlineImm())) {
2404 setImmKindLiteral();
2422 setImmKindLiteral();
2436 setImmKindLiteral();
2445 AsmParser->hasInv2PiInlineImm())) {
2452 setImmKindLiteral();
2461 AsmParser->hasInv2PiInlineImm())) {
2468 setImmKindLiteral();
2482 AsmParser->hasInv2PiInlineImm()));
2492 AsmParser->hasInv2PiInlineImm()));
2500 setImmKindMandatoryLiteral();
2504 setImmKindMandatoryLiteral();
2511void AMDGPUOperand::addRegOperands(
MCInst &Inst,
unsigned N)
const {
2515bool AMDGPUOperand::isInlineValue()
const {
2523void AMDGPUAsmParser::createConstantSymbol(
StringRef Id, int64_t Val) {
2534 if (Is == IS_VGPR) {
2538 return AMDGPU::VGPR_32RegClassID;
2540 return AMDGPU::VReg_64RegClassID;
2542 return AMDGPU::VReg_96RegClassID;
2544 return AMDGPU::VReg_128RegClassID;
2546 return AMDGPU::VReg_160RegClassID;
2548 return AMDGPU::VReg_192RegClassID;
2550 return AMDGPU::VReg_224RegClassID;
2552 return AMDGPU::VReg_256RegClassID;
2554 return AMDGPU::VReg_288RegClassID;
2556 return AMDGPU::VReg_320RegClassID;
2558 return AMDGPU::VReg_352RegClassID;
2560 return AMDGPU::VReg_384RegClassID;
2562 return AMDGPU::VReg_512RegClassID;
2564 return AMDGPU::VReg_1024RegClassID;
2566 }
else if (Is == IS_TTMP) {
2570 return AMDGPU::TTMP_32RegClassID;
2572 return AMDGPU::TTMP_64RegClassID;
2574 return AMDGPU::TTMP_128RegClassID;
2576 return AMDGPU::TTMP_256RegClassID;
2578 return AMDGPU::TTMP_512RegClassID;
2580 }
else if (Is == IS_SGPR) {
2584 return AMDGPU::SGPR_32RegClassID;
2586 return AMDGPU::SGPR_64RegClassID;
2588 return AMDGPU::SGPR_96RegClassID;
2590 return AMDGPU::SGPR_128RegClassID;
2592 return AMDGPU::SGPR_160RegClassID;
2594 return AMDGPU::SGPR_192RegClassID;
2596 return AMDGPU::SGPR_224RegClassID;
2598 return AMDGPU::SGPR_256RegClassID;
2600 return AMDGPU::SGPR_288RegClassID;
2602 return AMDGPU::SGPR_320RegClassID;
2604 return AMDGPU::SGPR_352RegClassID;
2606 return AMDGPU::SGPR_384RegClassID;
2608 return AMDGPU::SGPR_512RegClassID;
2610 }
else if (Is == IS_AGPR) {
2614 return AMDGPU::AGPR_32RegClassID;
2616 return AMDGPU::AReg_64RegClassID;
2618 return AMDGPU::AReg_96RegClassID;
2620 return AMDGPU::AReg_128RegClassID;
2622 return AMDGPU::AReg_160RegClassID;
2624 return AMDGPU::AReg_192RegClassID;
2626 return AMDGPU::AReg_224RegClassID;
2628 return AMDGPU::AReg_256RegClassID;
2630 return AMDGPU::AReg_288RegClassID;
2632 return AMDGPU::AReg_320RegClassID;
2634 return AMDGPU::AReg_352RegClassID;
2636 return AMDGPU::AReg_384RegClassID;
2638 return AMDGPU::AReg_512RegClassID;
2640 return AMDGPU::AReg_1024RegClassID;
2648 .
Case(
"exec", AMDGPU::EXEC)
2649 .
Case(
"vcc", AMDGPU::VCC)
2650 .
Case(
"flat_scratch", AMDGPU::FLAT_SCR)
2651 .
Case(
"xnack_mask", AMDGPU::XNACK_MASK)
2652 .
Case(
"shared_base", AMDGPU::SRC_SHARED_BASE)
2653 .
Case(
"src_shared_base", AMDGPU::SRC_SHARED_BASE)
2654 .
Case(
"shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2655 .
Case(
"src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2656 .
Case(
"private_base", AMDGPU::SRC_PRIVATE_BASE)
2657 .
Case(
"src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2658 .
Case(
"private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2659 .
Case(
"src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2660 .
Case(
"pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2661 .
Case(
"src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2662 .
Case(
"lds_direct", AMDGPU::LDS_DIRECT)
2663 .
Case(
"src_lds_direct", AMDGPU::LDS_DIRECT)
2664 .
Case(
"m0", AMDGPU::M0)
2665 .
Case(
"vccz", AMDGPU::SRC_VCCZ)
2666 .
Case(
"src_vccz", AMDGPU::SRC_VCCZ)
2667 .
Case(
"execz", AMDGPU::SRC_EXECZ)
2668 .
Case(
"src_execz", AMDGPU::SRC_EXECZ)
2669 .
Case(
"scc", AMDGPU::SRC_SCC)
2670 .
Case(
"src_scc", AMDGPU::SRC_SCC)
2671 .
Case(
"tba", AMDGPU::TBA)
2672 .
Case(
"tma", AMDGPU::TMA)
2673 .
Case(
"flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2674 .
Case(
"flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2675 .
Case(
"xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2676 .
Case(
"xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2677 .
Case(
"vcc_lo", AMDGPU::VCC_LO)
2678 .
Case(
"vcc_hi", AMDGPU::VCC_HI)
2679 .
Case(
"exec_lo", AMDGPU::EXEC_LO)
2680 .
Case(
"exec_hi", AMDGPU::EXEC_HI)
2681 .
Case(
"tma_lo", AMDGPU::TMA_LO)
2682 .
Case(
"tma_hi", AMDGPU::TMA_HI)
2683 .
Case(
"tba_lo", AMDGPU::TBA_LO)
2684 .
Case(
"tba_hi", AMDGPU::TBA_HI)
2685 .
Case(
"pc", AMDGPU::PC_REG)
2686 .
Case(
"null", AMDGPU::SGPR_NULL)
2690bool AMDGPUAsmParser::ParseRegister(
MCRegister &RegNo,
SMLoc &StartLoc,
2691 SMLoc &EndLoc,
bool RestoreOnFailure) {
2692 auto R = parseRegister();
2693 if (!R)
return true;
2695 RegNo =
R->getReg();
2696 StartLoc =
R->getStartLoc();
2697 EndLoc =
R->getEndLoc();
2703 return ParseRegister(Reg, StartLoc, EndLoc,
false);
2708 bool Result = ParseRegister(Reg, StartLoc, EndLoc,
true);
2709 bool PendingErrors = getParser().hasPendingError();
2710 getParser().clearPendingErrors();
2718bool AMDGPUAsmParser::AddNextRegisterToList(
MCRegister &Reg,
unsigned &RegWidth,
2719 RegisterKind RegKind,
2723 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2728 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2729 Reg = AMDGPU::FLAT_SCR;
2733 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2734 Reg = AMDGPU::XNACK_MASK;
2738 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2743 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2748 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2753 Error(Loc,
"register does not fit in the list");
2759 if (Reg1 != Reg + RegWidth / 32) {
2760 Error(Loc,
"registers in a list must have consecutive indices");
2778 {{
"ttmp"}, IS_TTMP},
2784 return Kind == IS_VGPR ||
2792 if (Str.starts_with(Reg.Name))
2798 return !Str.getAsInteger(10, Num);
2802AMDGPUAsmParser::isRegister(
const AsmToken &Token,
2819 if (!RegSuffix.
empty()) {
2837AMDGPUAsmParser::isRegister()
2839 return isRegister(getToken(), peekToken());
2842MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
unsigned RegNum,
2843 unsigned SubReg,
unsigned RegWidth,
2847 unsigned AlignSize = 1;
2848 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2854 if (RegNum % AlignSize != 0) {
2855 Error(Loc,
"invalid register alignment");
2859 unsigned RegIdx = RegNum / AlignSize;
2862 Error(Loc,
"invalid or unsupported register size");
2869 Error(Loc,
"register index is out of range");
2880 assert(Reg &&
"Invalid subregister!");
2886bool AMDGPUAsmParser::ParseRegRange(
unsigned &Num,
unsigned &RegWidth) {
2887 int64_t RegLo, RegHi;
2891 SMLoc FirstIdxLoc = getLoc();
2898 SecondIdxLoc = getLoc();
2908 if (!isUInt<32>(RegLo)) {
2909 Error(FirstIdxLoc,
"invalid register index");
2913 if (!isUInt<32>(RegHi)) {
2914 Error(SecondIdxLoc,
"invalid register index");
2918 if (RegLo > RegHi) {
2919 Error(FirstIdxLoc,
"first register index should not exceed second index");
2923 Num =
static_cast<unsigned>(RegLo);
2924 RegWidth = 32 * ((RegHi - RegLo) + 1);
2928MCRegister AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2937 RegKind = IS_SPECIAL;
2944MCRegister AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2950 auto Loc = getLoc();
2954 Error(Loc,
"invalid register name");
2963 unsigned SubReg = NoSubRegister;
2964 if (!RegSuffix.
empty()) {
2972 Error(Loc,
"invalid register index");
2978 if (!ParseRegRange(RegNum, RegWidth))
2982 return getRegularReg(RegKind, RegNum,
SubReg, RegWidth, Loc);
2985MCRegister AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
2986 unsigned &RegNum,
unsigned &RegWidth,
2989 auto ListLoc = getLoc();
2992 "expected a register or a list of registers")) {
2998 auto Loc = getLoc();
2999 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
3001 if (RegWidth != 32) {
3002 Error(Loc,
"expected a single 32-bit register");
3007 RegisterKind NextRegKind;
3009 unsigned NextRegNum, NextRegWidth;
3012 if (!ParseAMDGPURegister(NextRegKind, NextReg,
3013 NextRegNum, NextRegWidth,
3017 if (NextRegWidth != 32) {
3018 Error(Loc,
"expected a single 32-bit register");
3021 if (NextRegKind != RegKind) {
3022 Error(Loc,
"registers in a list must be of the same kind");
3025 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
3030 "expected a comma or a closing square bracket")) {
3035 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3040bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3044 auto Loc = getLoc();
3048 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3050 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3052 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3057 assert(Parser.hasPendingError());
3061 if (!subtargetHasRegister(*
TRI, Reg)) {
3062 if (Reg == AMDGPU::SGPR_NULL) {
3063 Error(Loc,
"'null' operand is not supported on this GPU");
3066 " register not available on this GPU");
3074bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3077 bool RestoreOnFailure ) {
3081 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3082 if (RestoreOnFailure) {
3083 while (!Tokens.
empty()) {
3092std::optional<StringRef>
3093AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3096 return StringRef(
".amdgcn.next_free_vgpr");
3098 return StringRef(
".amdgcn.next_free_sgpr");
3100 return std::nullopt;
3104void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3105 auto SymbolName = getGprCountSymbolName(RegKind);
3106 assert(SymbolName &&
"initializing invalid register kind");
3107 MCSymbol *
Sym = getContext().getOrCreateSymbol(*SymbolName);
3111bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3112 unsigned DwordRegIndex,
3113 unsigned RegWidth) {
3118 auto SymbolName = getGprCountSymbolName(RegKind);
3121 MCSymbol *
Sym = getContext().getOrCreateSymbol(*SymbolName);
3123 int64_t NewMax = DwordRegIndex +
divideCeil(RegWidth, 32) - 1;
3126 if (!
Sym->isVariable())
3127 return !
Error(getLoc(),
3128 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3129 if (!
Sym->getVariableValue(
false)->evaluateAsAbsolute(OldCount))
3132 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3134 if (OldCount <= NewMax)
3140std::unique_ptr<AMDGPUOperand>
3141AMDGPUAsmParser::parseRegister(
bool RestoreOnFailure) {
3142 const auto &Tok = getToken();
3143 SMLoc StartLoc = Tok.getLoc();
3144 SMLoc EndLoc = Tok.getEndLoc();
3145 RegisterKind RegKind;
3147 unsigned RegNum, RegWidth;
3149 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3153 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3156 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3157 return AMDGPUOperand::CreateReg(
this, Reg, StartLoc, EndLoc);
3161 bool HasSP3AbsModifier,
bool HasLit) {
3169 HasLit = trySkipId(
"lit");
3181 const auto& Tok = getToken();
3182 const auto& NextTok = peekToken();
3185 bool Negate =
false;
3193 AMDGPUOperand::Modifiers Mods;
3204 APFloat RealVal(APFloat::IEEEdouble());
3205 auto roundMode = APFloat::rmNearestTiesToEven;
3206 if (
errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3209 RealVal.changeSign();
3212 AMDGPUOperand::CreateImm(
this, RealVal.bitcastToAPInt().getZExtValue(), S,
3213 AMDGPUOperand::ImmTyNone,
true));
3214 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3215 Op.setModifiers(Mods);
3224 if (HasSP3AbsModifier) {
3233 if (getParser().parsePrimaryExpr(Expr, EndLoc,
nullptr))
3236 if (Parser.parseExpression(Expr))
3240 if (Expr->evaluateAsAbsolute(IntVal)) {
3241 Operands.push_back(AMDGPUOperand::CreateImm(
this, IntVal, S));
3242 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3243 Op.setModifiers(Mods);
3247 Operands.push_back(AMDGPUOperand::CreateExpr(
this, Expr, S));
3260 if (
auto R = parseRegister()) {
3269 bool HasSP3AbsMod,
bool HasLit) {
3275 return parseImm(
Operands, HasSP3AbsMod, HasLit);
3279AMDGPUAsmParser::isNamedOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3282 return str ==
"abs" || str ==
"neg" || str ==
"sext";
3288AMDGPUAsmParser::isOpcodeModifierWithVal(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3293AMDGPUAsmParser::isOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3294 return isNamedOperandModifier(Token, NextToken) || Token.
is(
AsmToken::Pipe);
3298AMDGPUAsmParser::isRegOrOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3299 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3316AMDGPUAsmParser::isModifier() {
3320 peekTokens(NextToken);
3322 return isOperandModifier(Tok, NextToken[0]) ||
3323 (Tok.
is(
AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3324 isOpcodeModifierWithVal(Tok, NextToken[0]);
3350AMDGPUAsmParser::parseSP3NegModifier() {
3353 peekTokens(NextToken);
3356 (isRegister(NextToken[0], NextToken[1]) ||
3358 isId(NextToken[0],
"abs"))) {
3376 return Error(getLoc(),
"invalid syntax, expected 'neg' modifier");
3378 SP3Neg = parseSP3NegModifier();
3381 Neg = trySkipId(
"neg");
3383 return Error(Loc,
"expected register or immediate");
3387 Abs = trySkipId(
"abs");
3391 Lit = trySkipId(
"lit");
3398 return Error(Loc,
"expected register or immediate");
3402 Res = parseRegOrImm(
Operands, SP3Abs, Lit);
3409 if (Lit && !
Operands.back()->isImm())
3410 Error(Loc,
"expected immediate with lit modifier");
3412 if (SP3Abs && !skipToken(
AsmToken::Pipe,
"expected vertical bar"))
3421 AMDGPUOperand::Modifiers Mods;
3422 Mods.Abs = Abs || SP3Abs;
3423 Mods.Neg = Neg || SP3Neg;
3426 if (Mods.hasFPModifiers() || Lit) {
3427 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3429 return Error(
Op.getStartLoc(),
"expected an absolute expression");
3430 Op.setModifiers(Mods);
3438 bool Sext = trySkipId(
"sext");
3439 if (Sext && !skipToken(
AsmToken::LParen,
"expected left paren after sext"))
3454 AMDGPUOperand::Modifiers Mods;
3457 if (Mods.hasIntModifiers()) {
3458 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3460 return Error(
Op.getStartLoc(),
"expected an absolute expression");
3461 Op.setModifiers(Mods);
3468 return parseRegOrImmWithFPInputMods(
Operands,
false);
3472 return parseRegOrImmWithIntInputMods(
Operands,
false);
3476 auto Loc = getLoc();
3477 if (trySkipId(
"off")) {
3478 Operands.push_back(AMDGPUOperand::CreateImm(
this, 0, Loc,
3479 AMDGPUOperand::ImmTyOff,
false));
3486 std::unique_ptr<AMDGPUOperand>
Reg = parseRegister();
3488 Operands.push_back(std::move(Reg));
3495unsigned AMDGPUAsmParser::checkTargetMatchPredicate(
MCInst &Inst) {
3502 return Match_InvalidOperand;
3504 if (Inst.
getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3505 Inst.
getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3510 if (!
Op.isImm() ||
Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3511 return Match_InvalidOperand;
3515 return Match_Success;
3519 static const unsigned Variants[] = {
3530 if (isForcedDPP() && isForcedVOP3()) {
3534 if (getForcedEncodingSize() == 32) {
3539 if (isForcedVOP3()) {
3544 if (isForcedSDWA()) {
3550 if (isForcedDPP()) {
3558StringRef AMDGPUAsmParser::getMatchedVariantName()
const {
3559 if (isForcedDPP() && isForcedVOP3())
3562 if (getForcedEncodingSize() == 32)
3577unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(
const MCInst &Inst)
const {
3581 case AMDGPU::FLAT_SCR:
3583 case AMDGPU::VCC_LO:
3584 case AMDGPU::VCC_HI:
3591 return AMDGPU::NoRegister;
3598bool AMDGPUAsmParser::isInlineConstant(
const MCInst &Inst,
3599 unsigned OpIdx)
const {
3609 int64_t Val = MO.
getImm();
3658unsigned AMDGPUAsmParser::getConstantBusLimit(
unsigned Opcode)
const {
3664 case AMDGPU::V_LSHLREV_B64_e64:
3665 case AMDGPU::V_LSHLREV_B64_gfx10:
3666 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3667 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3668 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3669 case AMDGPU::V_LSHRREV_B64_e64:
3670 case AMDGPU::V_LSHRREV_B64_gfx10:
3671 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3672 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3673 case AMDGPU::V_ASHRREV_I64_e64:
3674 case AMDGPU::V_ASHRREV_I64_gfx10:
3675 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3676 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3677 case AMDGPU::V_LSHL_B64_e64:
3678 case AMDGPU::V_LSHR_B64_e64:
3679 case AMDGPU::V_ASHR_I64_e64:
3692 bool AddMandatoryLiterals =
false) {
3698 int16_t ImmDeferredIdx =
3715bool AMDGPUAsmParser::usesConstantBus(
const MCInst &Inst,
unsigned OpIdx) {
3718 return !isInlineConstant(Inst, OpIdx);
3725 return isSGPR(PReg,
TRI) && PReg != SGPR_NULL;
3736 const unsigned Opcode = Inst.
getOpcode();
3737 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3740 if (!LaneSelOp.
isReg())
3743 return LaneSelReg ==
M0 || LaneSelReg == M0_gfxpre11;
3746bool AMDGPUAsmParser::validateConstantBusLimitations(
3748 const unsigned Opcode = Inst.
getOpcode();
3751 unsigned ConstantBusUseCount = 0;
3752 unsigned NumLiterals = 0;
3753 unsigned LiteralSize;
3755 if (!(
Desc.TSFlags &
3771 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3772 if (SGPRUsed != AMDGPU::NoRegister) {
3773 SGPRsUsed.
insert(SGPRUsed);
3774 ++ConstantBusUseCount;
3779 for (
int OpIdx : OpIndices) {
3784 if (usesConstantBus(Inst, OpIdx)) {
3793 if (SGPRsUsed.
insert(LastSGPR).second) {
3794 ++ConstantBusUseCount;
3814 if (NumLiterals == 0) {
3817 }
else if (LiteralSize !=
Size) {
3823 ConstantBusUseCount += NumLiterals;
3825 if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3831 Error(Loc,
"invalid operand (violates constant bus restrictions)");
3835bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3838 const unsigned Opcode = Inst.
getOpcode();
3844 auto getVRegIdx = [&](
unsigned,
unsigned OperandIdx) {
3852 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
3855 auto InvalidCompOprIdx =
3856 InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc);
3857 if (!InvalidCompOprIdx)
3860 auto CompOprIdx = *InvalidCompOprIdx;
3862 std::max(InstInfo[
VOPD::X].getIndexInParsedOperands(CompOprIdx),
3863 InstInfo[
VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3866 auto Loc = ((AMDGPUOperand &)*
Operands[ParsedIdx]).getStartLoc();
3867 if (CompOprIdx == VOPD::Component::DST) {
3868 Error(Loc,
"one dst register must be even and the other odd");
3870 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3872 " operands must use different VGPR banks");
3878bool AMDGPUAsmParser::validateIntClampSupported(
const MCInst &Inst) {
3895bool AMDGPUAsmParser::validateMIMGDataSize(
const MCInst &Inst,
3896 const SMLoc &IDLoc) {
3915 unsigned TFESize = (TFEIdx != -1 && Inst.
getOperand(TFEIdx).
getImm()) ? 1 : 0;
3920 bool IsPackedD16 =
false;
3925 IsPackedD16 = D16Idx >= 0;
3927 DataSize = (DataSize + 1) / 2;
3930 if ((VDataSize / 4) == DataSize + TFESize)
3935 Modifiers = IsPackedD16 ?
"dmask and d16" :
"dmask";
3937 Modifiers = IsPackedD16 ?
"dmask, d16 and tfe" :
"dmask and tfe";
3939 Error(IDLoc,
Twine(
"image data size does not match ") + Modifiers);
3943bool AMDGPUAsmParser::validateMIMGAddrSize(
const MCInst &Inst,
3944 const SMLoc &IDLoc) {
3957 : AMDGPU::OpName::rsrc;
3964 assert(SrsrcIdx > VAddr0Idx);
3967 if (BaseOpcode->
BVH) {
3968 if (IsA16 == BaseOpcode->
A16)
3970 Error(IDLoc,
"image address size does not match a16");
3976 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3977 unsigned ActualAddrSize =
3978 IsNSA ? SrsrcIdx - VAddr0Idx
3981 unsigned ExpectedAddrSize =
3985 if (hasPartialNSAEncoding() &&
3988 int VAddrLastIdx = SrsrcIdx - 1;
3989 unsigned VAddrLastSize =
3992 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
3995 if (ExpectedAddrSize > 12)
3996 ExpectedAddrSize = 16;
4001 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
4005 if (ActualAddrSize == ExpectedAddrSize)
4008 Error(IDLoc,
"image address size does not match dim and a16");
4012bool AMDGPUAsmParser::validateMIMGAtomicDMask(
const MCInst &Inst) {
4019 if (!
Desc.mayLoad() || !
Desc.mayStore())
4029 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4032bool AMDGPUAsmParser::validateMIMGGatherDMask(
const MCInst &Inst) {
4048 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4051bool AMDGPUAsmParser::validateMIMGDim(
const MCInst &Inst,
4066 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
4067 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4074bool AMDGPUAsmParser::validateMIMGMSAA(
const MCInst &Inst) {
4085 if (!BaseOpcode->
MSAA)
4094 return DimInfo->
MSAA;
4100 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4101 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4102 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4112bool AMDGPUAsmParser::validateMovrels(
const MCInst &Inst,
4136 Error(ErrLoc,
"source operand must be a VGPR");
4140bool AMDGPUAsmParser::validateMAIAccWrite(
const MCInst &Inst,
4145 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4159 "source operand must be either a VGPR or an inline constant");
4166bool AMDGPUAsmParser::validateMAISrc2(
const MCInst &Inst,
4172 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4179 if (Inst.
getOperand(Src2Idx).
isImm() && isInlineConstant(Inst, Src2Idx)) {
4181 "inline constants are not allowed for this operand");
4188bool AMDGPUAsmParser::validateMFMA(
const MCInst &Inst,
4197 if (BlgpIdx != -1) {
4212 "wrong register tuple size for cbsz value " +
Twine(CBSZ));
4220 "wrong register tuple size for blgp value " +
Twine(BLGP));
4238 if (Src2Reg == DstReg)
4242 if (
TRI->getRegClass(
Desc.operands()[0].RegClass).getSizeInBits() <= 128)
4245 if (
TRI->regsOverlap(Src2Reg, DstReg)) {
4247 "source 2 operand must not partially overlap with dst");
4254bool AMDGPUAsmParser::validateDivScale(
const MCInst &Inst) {
4258 case V_DIV_SCALE_F32_gfx6_gfx7:
4259 case V_DIV_SCALE_F32_vi:
4260 case V_DIV_SCALE_F32_gfx10:
4261 case V_DIV_SCALE_F64_gfx6_gfx7:
4262 case V_DIV_SCALE_F64_vi:
4263 case V_DIV_SCALE_F64_gfx10:
4269 for (
auto Name : {AMDGPU::OpName::src0_modifiers,
4270 AMDGPU::OpName::src2_modifiers,
4271 AMDGPU::OpName::src2_modifiers}) {
4282bool AMDGPUAsmParser::validateMIMGD16(
const MCInst &Inst) {
4302 case AMDGPU::V_SUBREV_F32_e32:
4303 case AMDGPU::V_SUBREV_F32_e64:
4304 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4305 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4306 case AMDGPU::V_SUBREV_F32_e32_vi:
4307 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4308 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4309 case AMDGPU::V_SUBREV_F32_e64_vi:
4311 case AMDGPU::V_SUBREV_CO_U32_e32:
4312 case AMDGPU::V_SUBREV_CO_U32_e64:
4313 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4314 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4316 case AMDGPU::V_SUBBREV_U32_e32:
4317 case AMDGPU::V_SUBBREV_U32_e64:
4318 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4319 case AMDGPU::V_SUBBREV_U32_e32_vi:
4320 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4321 case AMDGPU::V_SUBBREV_U32_e64_vi:
4323 case AMDGPU::V_SUBREV_U32_e32:
4324 case AMDGPU::V_SUBREV_U32_e64:
4325 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4326 case AMDGPU::V_SUBREV_U32_e32_vi:
4327 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4328 case AMDGPU::V_SUBREV_U32_e64_vi:
4330 case AMDGPU::V_SUBREV_F16_e32:
4331 case AMDGPU::V_SUBREV_F16_e64:
4332 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4333 case AMDGPU::V_SUBREV_F16_e32_vi:
4334 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4335 case AMDGPU::V_SUBREV_F16_e64_vi:
4337 case AMDGPU::V_SUBREV_U16_e32:
4338 case AMDGPU::V_SUBREV_U16_e64:
4339 case AMDGPU::V_SUBREV_U16_e32_vi:
4340 case AMDGPU::V_SUBREV_U16_e64_vi:
4342 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4343 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4344 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4346 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4347 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4349 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4350 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4352 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4353 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4355 case AMDGPU::V_LSHRREV_B32_e32:
4356 case AMDGPU::V_LSHRREV_B32_e64:
4357 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4358 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4359 case AMDGPU::V_LSHRREV_B32_e32_vi:
4360 case AMDGPU::V_LSHRREV_B32_e64_vi:
4361 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4362 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4364 case AMDGPU::V_ASHRREV_I32_e32:
4365 case AMDGPU::V_ASHRREV_I32_e64:
4366 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4367 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4368 case AMDGPU::V_ASHRREV_I32_e32_vi:
4369 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4370 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4371 case AMDGPU::V_ASHRREV_I32_e64_vi:
4373 case AMDGPU::V_LSHLREV_B32_e32:
4374 case AMDGPU::V_LSHLREV_B32_e64:
4375 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4376 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4377 case AMDGPU::V_LSHLREV_B32_e32_vi:
4378 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4379 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4380 case AMDGPU::V_LSHLREV_B32_e64_vi:
4382 case AMDGPU::V_LSHLREV_B16_e32:
4383 case AMDGPU::V_LSHLREV_B16_e64:
4384 case AMDGPU::V_LSHLREV_B16_e32_vi:
4385 case AMDGPU::V_LSHLREV_B16_e64_vi:
4386 case AMDGPU::V_LSHLREV_B16_gfx10:
4388 case AMDGPU::V_LSHRREV_B16_e32:
4389 case AMDGPU::V_LSHRREV_B16_e64:
4390 case AMDGPU::V_LSHRREV_B16_e32_vi:
4391 case AMDGPU::V_LSHRREV_B16_e64_vi:
4392 case AMDGPU::V_LSHRREV_B16_gfx10:
4394 case AMDGPU::V_ASHRREV_I16_e32:
4395 case AMDGPU::V_ASHRREV_I16_e64:
4396 case AMDGPU::V_ASHRREV_I16_e32_vi:
4397 case AMDGPU::V_ASHRREV_I16_e64_vi:
4398 case AMDGPU::V_ASHRREV_I16_gfx10:
4400 case AMDGPU::V_LSHLREV_B64_e64:
4401 case AMDGPU::V_LSHLREV_B64_gfx10:
4402 case AMDGPU::V_LSHLREV_B64_vi:
4404 case AMDGPU::V_LSHRREV_B64_e64:
4405 case AMDGPU::V_LSHRREV_B64_gfx10:
4406 case AMDGPU::V_LSHRREV_B64_vi:
4408 case AMDGPU::V_ASHRREV_I64_e64:
4409 case AMDGPU::V_ASHRREV_I64_gfx10:
4410 case AMDGPU::V_ASHRREV_I64_vi:
4412 case AMDGPU::V_PK_LSHLREV_B16:
4413 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4414 case AMDGPU::V_PK_LSHLREV_B16_vi:
4416 case AMDGPU::V_PK_LSHRREV_B16:
4417 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4418 case AMDGPU::V_PK_LSHRREV_B16_vi:
4419 case AMDGPU::V_PK_ASHRREV_I16:
4420 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4421 case AMDGPU::V_PK_ASHRREV_I16_vi:
4428std::optional<StringRef>
4429AMDGPUAsmParser::validateLdsDirect(
const MCInst &Inst) {
4431 using namespace SIInstrFlags;
4432 const unsigned Opcode = Inst.
getOpcode();
4438 if ((
Desc.TSFlags & Enc) == 0)
4439 return std::nullopt;
4441 for (
auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4446 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4449 return StringRef(
"lds_direct is not supported on this GPU");
4452 return StringRef(
"lds_direct cannot be used with this instruction");
4454 if (SrcName != OpName::src0)
4455 return StringRef(
"lds_direct may be used as src0 only");
4459 return std::nullopt;
4463 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
4464 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4465 if (
Op.isFlatOffset())
4466 return Op.getStartLoc();
4471bool AMDGPUAsmParser::validateOffset(
const MCInst &Inst,
4480 return validateFlatOffset(Inst,
Operands);
4483 return validateSMEMOffset(Inst,
Operands);
4488 const unsigned OffsetSize = 24;
4489 if (!
isIntN(OffsetSize,
Op.getImm())) {
4491 Twine(
"expected a ") +
Twine(OffsetSize) +
"-bit signed offset");
4495 const unsigned OffsetSize = 16;
4496 if (!
isUIntN(OffsetSize,
Op.getImm())) {
4498 Twine(
"expected a ") +
Twine(OffsetSize) +
"-bit unsigned offset");
4505bool AMDGPUAsmParser::validateFlatOffset(
const MCInst &Inst,
4516 if (!hasFlatOffsets() &&
Op.getImm() != 0) {
4518 "flat offset modifier is not supported on this GPU");
4525 bool AllowNegative =
4528 if (!
isIntN(OffsetSize,
Op.getImm()) || (!AllowNegative &&
Op.getImm() < 0)) {
4530 Twine(
"expected a ") +
4531 (AllowNegative ?
Twine(OffsetSize) +
"-bit signed offset"
4532 :
Twine(OffsetSize - 1) +
"-bit unsigned offset"));
4541 for (
unsigned i = 2, e =
Operands.size(); i != e; ++i) {
4542 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4543 if (
Op.isSMEMOffset() ||
Op.isSMEMOffsetMod())
4544 return Op.getStartLoc();
4549bool AMDGPUAsmParser::validateSMEMOffset(
const MCInst &Inst,
4575 : (
isVI() || IsBuffer) ?
"expected a 20-bit unsigned offset"
4576 :
"expected a 21-bit signed offset");
4581bool AMDGPUAsmParser::validateSOPLiteral(
const MCInst &Inst)
const {
4590 const int OpIndices[] = { Src0Idx, Src1Idx };
4592 unsigned NumExprs = 0;
4593 unsigned NumLiterals = 0;
4596 for (
int OpIdx : OpIndices) {
4597 if (OpIdx == -1)
break;
4602 if (MO.
isImm() && !isInlineConstant(Inst, OpIdx)) {
4604 if (NumLiterals == 0 || LiteralValue !=
Value) {
4608 }
else if (MO.
isExpr()) {
4614 return NumLiterals + NumExprs <= 1;
4617bool AMDGPUAsmParser::validateOpSel(
const MCInst &Inst) {
4631 if (OpSelIdx != -1) {
4636 if (OpSelHiIdx != -1) {
4654bool AMDGPUAsmParser::validateNeg(
const MCInst &Inst,
int OpName) {
4679 int SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
4680 AMDGPU::OpName::src1_modifiers,
4681 AMDGPU::OpName::src2_modifiers};
4683 for (
unsigned i = 0; i < 3; ++i) {
4693bool AMDGPUAsmParser::validateDPP(
const MCInst &Inst,
4697 if (DppCtrlIdx >= 0) {
4704 Error(S,
"DP ALU dpp only supports row_newbcast");
4710 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
4720 Error(S,
"invalid operand for instruction");
4725 "src1 immediate operand invalid for instruction");
4735bool AMDGPUAsmParser::validateVccOperand(
MCRegister Reg)
const {
4736 auto FB = getFeatureBits();
4737 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4738 (FB[AMDGPU::FeatureWavefrontSize32] &&
Reg == AMDGPU::VCC_LO);
4742bool AMDGPUAsmParser::validateVOPLiteral(
const MCInst &Inst,
4748 !HasMandatoryLiteral && !
isVOPD(Opcode))
4753 unsigned NumExprs = 0;
4754 unsigned NumLiterals = 0;
4757 for (
int OpIdx : OpIndices) {
4767 if (MO.
isImm() && !isInlineConstant(Inst, OpIdx)) {
4773 if (!IsValid32Op && !isInt<32>(
Value) && !isUInt<32>(
Value)) {
4774 Error(getLitLoc(
Operands),
"invalid operand for instruction");
4778 if (IsFP64 && IsValid32Op)
4781 if (NumLiterals == 0 || LiteralValue !=
Value) {
4785 }
else if (MO.
isExpr()) {
4789 NumLiterals += NumExprs;
4794 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4795 Error(getLitLoc(
Operands),
"literal operands are not supported");
4799 if (NumLiterals > 1) {
4800 Error(getLitLoc(
Operands,
true),
"only one unique literal operand is allowed");
4819 auto Reg = Sub ? Sub :
Op.getReg();
4821 return AGPR32.
contains(Reg) ? 1 : 0;
4824bool AMDGPUAsmParser::validateAGPRLdSt(
const MCInst &Inst)
const {
4832 : AMDGPU::OpName::vdata;
4840 if (Data2Areg >= 0 && Data2Areg != DataAreg)
4844 auto FB = getFeatureBits();
4845 if (FB[AMDGPU::FeatureGFX90AInsts]) {
4846 if (DataAreg < 0 || DstAreg < 0)
4848 return DstAreg == DataAreg;
4851 return DstAreg < 1 && DataAreg < 1;
4854bool AMDGPUAsmParser::validateVGPRAlign(
const MCInst &Inst)
const {
4855 auto FB = getFeatureBits();
4856 if (!FB[AMDGPU::FeatureGFX90AInsts])
4871 if (VGPR32.
contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4873 if (AGPR32.
contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4881 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
4882 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4884 return Op.getStartLoc();
4889bool AMDGPUAsmParser::validateBLGP(
const MCInst &Inst,
4899 auto FB = getFeatureBits();
4900 bool UsesNeg =
false;
4901 if (FB[AMDGPU::FeatureGFX940Insts]) {
4903 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4904 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4905 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4906 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4911 if (IsNeg == UsesNeg)
4915 UsesNeg ?
"invalid modifier: blgp is not supported"
4916 :
"invalid modifier: neg is not supported");
4921bool AMDGPUAsmParser::validateWaitCnt(
const MCInst &Inst,
4927 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4928 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4929 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4930 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4936 if (Reg == AMDGPU::SGPR_NULL)
4940 Error(RegLoc,
"src0 must be null");
4944bool AMDGPUAsmParser::validateDS(
const MCInst &Inst,
4950 return validateGWS(Inst,
Operands);
4961 Error(S,
"gds modifier is not supported on this GPU");
4969bool AMDGPUAsmParser::validateGWS(
const MCInst &Inst,
4971 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4975 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4976 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4985 auto RegIdx =
Reg - (VGPR32.
contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4988 Error(RegLoc,
"vgpr must be even aligned");
4995bool AMDGPUAsmParser::validateCoherencyBits(
const MCInst &Inst,
4997 const SMLoc &IDLoc) {
4999 AMDGPU::OpName::cpol);
5006 return validateTHAndScopeBits(Inst,
Operands, CPol);
5012 Error(S,
"cache policy is not supported for SMRD instructions");
5016 Error(IDLoc,
"invalid cache policy for SMEM instruction");
5025 if (!(TSFlags & AllowSCCModifier)) {
5030 "scc modifier is not supported for this instruction on this GPU");
5041 :
"instruction must use glc");
5049 &CStr.data()[CStr.find(
isGFX940() ?
"sc0" :
"glc")]);
5051 :
"instruction must not use glc");
5059bool AMDGPUAsmParser::validateTHAndScopeBits(
const MCInst &Inst,
5061 const unsigned CPol) {
5065 const unsigned Opcode = Inst.
getOpcode();
5077 return PrintError(
"instruction must use th:TH_ATOMIC_RETURN");
5085 return PrintError(
"invalid th value for SMEM instruction");
5092 return PrintError(
"scope and th combination is not valid");
5101 return PrintError(
"invalid th value for atomic instructions");
5102 }
else if (IsStore) {
5104 return PrintError(
"invalid th value for store instructions");
5107 return PrintError(
"invalid th value for load instructions");
5113bool AMDGPUAsmParser::validateTFE(
const MCInst &Inst,
5116 if (
Desc.mayStore() &&
5120 Error(Loc,
"TFE modifier has no meaning for store instructions");
5128bool AMDGPUAsmParser::validateInstruction(
const MCInst &Inst,
5131 if (
auto ErrMsg = validateLdsDirect(Inst)) {
5135 if (!validateSOPLiteral(Inst)) {
5137 "only one unique literal operand is allowed");
5140 if (!validateVOPLiteral(Inst,
Operands)) {
5143 if (!validateConstantBusLimitations(Inst,
Operands)) {
5146 if (!validateVOPDRegBankConstraints(Inst,
Operands)) {
5149 if (!validateIntClampSupported(Inst)) {
5151 "integer clamping is not supported on this GPU");
5154 if (!validateOpSel(Inst)) {
5156 "invalid op_sel operand");
5159 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5161 "invalid neg_lo operand");
5164 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5166 "invalid neg_hi operand");
5169 if (!validateDPP(Inst,
Operands)) {
5173 if (!validateMIMGD16(Inst)) {
5175 "d16 modifier is not supported on this GPU");
5178 if (!validateMIMGDim(Inst,
Operands)) {
5179 Error(IDLoc,
"missing dim operand");
5182 if (!validateMIMGMSAA(Inst)) {
5184 "invalid dim; must be MSAA type");
5187 if (!validateMIMGDataSize(Inst, IDLoc)) {
5190 if (!validateMIMGAddrSize(Inst, IDLoc))
5192 if (!validateMIMGAtomicDMask(Inst)) {
5194 "invalid atomic image dmask");
5197 if (!validateMIMGGatherDMask(Inst)) {
5199 "invalid image_gather dmask: only one bit must be set");
5202 if (!validateMovrels(Inst,
Operands)) {
5205 if (!validateOffset(Inst,
Operands)) {
5208 if (!validateMAIAccWrite(Inst,
Operands)) {
5211 if (!validateMAISrc2(Inst,
Operands)) {
5214 if (!validateMFMA(Inst,
Operands)) {
5217 if (!validateCoherencyBits(Inst,
Operands, IDLoc)) {
5221 if (!validateAGPRLdSt(Inst)) {
5222 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5223 ?
"invalid register class: data and dst should be all VGPR or AGPR"
5224 :
"invalid register class: agpr loads and stores not supported on this GPU"
5228 if (!validateVGPRAlign(Inst)) {
5230 "invalid register class: vgpr tuples must be 64 bit aligned");
5237 if (!validateBLGP(Inst,
Operands)) {
5241 if (!validateDivScale(Inst)) {
5242 Error(IDLoc,
"ABS not allowed in VOP3B instructions");
5245 if (!validateWaitCnt(Inst,
Operands)) {
5248 if (!validateTFE(Inst,
Operands)) {
5257 unsigned VariantID = 0);
5261 unsigned VariantID);
5263bool AMDGPUAsmParser::isSupportedMnemo(
StringRef Mnemo,
5268bool AMDGPUAsmParser::isSupportedMnemo(
StringRef Mnemo,
5271 for (
auto Variant : Variants) {
5279bool AMDGPUAsmParser::checkUnsupportedInstruction(
StringRef Mnemo,
5280 const SMLoc &IDLoc) {
5281 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5284 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5289 getParser().clearPendingErrors();
5293 StringRef VariantName = getMatchedVariantName();
5294 if (!VariantName.
empty() && isSupportedMnemo(Mnemo, FBS)) {
5297 " variant of this instruction is not supported"));
5301 if (
isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5302 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5305 FeaturesWS32.
flip(AMDGPU::FeatureWavefrontSize64)
5306 .
flip(AMDGPU::FeatureWavefrontSize32);
5308 ComputeAvailableFeatures(FeaturesWS32);
5310 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5311 return Error(IDLoc,
"instruction requires wavesize=32");
5316 return Error(IDLoc,
"instruction not supported on this GPU");
5321 return Error(IDLoc,
"invalid instruction" + Suggestion);
5327 const auto &
Op = ((AMDGPUOperand &)*
Operands[InvalidOprIdx]);
5328 if (
Op.isToken() && InvalidOprIdx > 1) {
5329 const auto &PrevOp = ((AMDGPUOperand &)*
Operands[InvalidOprIdx - 1]);
5330 return PrevOp.isToken() && PrevOp.getToken() ==
"::";
5335bool AMDGPUAsmParser::matchAndEmitInstruction(
SMLoc IDLoc,
unsigned &Opcode,
5339 bool MatchingInlineAsm) {
5341 unsigned Result = Match_Success;
5342 for (
auto Variant : getMatchedVariants()) {
5344 auto R = MatchInstructionImpl(
Operands, Inst, EI, MatchingInlineAsm,
5349 if (R == Match_Success || R == Match_MissingFeature ||
5350 (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5351 (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5352 Result != Match_MissingFeature)) {
5356 if (R == Match_Success)
5360 if (Result == Match_Success) {
5361 if (!validateInstruction(Inst, IDLoc,
Operands)) {
5370 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5376 case Match_MissingFeature:
5380 return Error(IDLoc,
"operands are not valid for this GPU or mode");
5382 case Match_InvalidOperand: {
5383 SMLoc ErrorLoc = IDLoc;
5386 return Error(IDLoc,
"too few operands for instruction");
5389 if (ErrorLoc ==
SMLoc())
5393 return Error(ErrorLoc,
"invalid VOPDY instruction");
5395 return Error(ErrorLoc,
"invalid operand for instruction");
5398 case Match_MnemonicFail:
5404bool AMDGPUAsmParser::ParseAsAbsoluteExpression(
uint32_t &Ret) {
5409 if (getParser().parseAbsoluteExpression(Tmp)) {
5416bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5418 return TokError(
"directive only supported for amdgcn architecture");
5420 std::string TargetIDDirective;
5421 SMLoc TargetStart = getTok().getLoc();
5422 if (getParser().parseEscapedString(TargetIDDirective))
5426 if (getTargetStreamer().getTargetID()->
toString() != TargetIDDirective)
5427 return getParser().Error(TargetRange.
Start,
5428 (
Twine(
".amdgcn_target directive's target id ") +
5429 Twine(TargetIDDirective) +
5430 Twine(
" does not match the specified target id ") +
5431 Twine(getTargetStreamer().getTargetID()->
toString())).str());
5440bool AMDGPUAsmParser::calculateGPRBlocks(
5442 const MCExpr *FlatScrUsed,
bool XNACKUsed,
5443 std::optional<bool> EnableWavefrontSize32,
const MCExpr *NextFreeVGPR,
5445 const MCExpr *&VGPRBlocks,
const MCExpr *&SGPRBlocks) {
5452 int64_t EvaluatedSGPRs;
5457 unsigned MaxAddressableNumSGPRs =
5460 if (
NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
Version.Major >= 8 &&
5461 !Features.
test(FeatureSGPRInitBug) &&
5462 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5463 return OutOfRangeError(SGPRRange);
5465 const MCExpr *ExtraSGPRs =
5469 if (
NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
5470 (
Version.Major <= 7 || Features.
test(FeatureSGPRInitBug)) &&
5471 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5472 return OutOfRangeError(SGPRRange);
5474 if (Features.
test(FeatureSGPRInitBug))
5481 auto GetNumGPRBlocks = [&Ctx](
const MCExpr *NumGPR,
5482 unsigned Granule) ->
const MCExpr * {
5486 const MCExpr *AlignToGPR =
5494 VGPRBlocks = GetNumGPRBlocks(
5503bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5505 return TokError(
"directive only supported for amdgcn architecture");
5508 return TokError(
"directive only supported for amdhsa OS");
5511 if (getParser().parseIdentifier(KernelName))
5516 &getSTI(), getContext());
5526 const MCExpr *NextFreeVGPR = ZeroExpr;
5532 const MCExpr *NextFreeSGPR = ZeroExpr;
5535 unsigned ImpliedUserSGPRCount = 0;
5539 std::optional<unsigned> ExplicitUserSGPRCount;
5540 const MCExpr *ReserveVCC = OneExpr;
5541 const MCExpr *ReserveFlatScr = OneExpr;
5542 std::optional<bool> EnableWavefrontSize32;
5548 SMRange IDRange = getTok().getLocRange();
5549 if (!parseId(
ID,
"expected .amdhsa_ directive or .end_amdhsa_kernel"))
5552 if (
ID ==
".end_amdhsa_kernel")
5556 return TokError(
".amdhsa_ directives cannot be repeated");
5558 SMLoc ValStart = getLoc();
5560 if (getParser().parseExpression(ExprVal))
5562 SMLoc ValEnd = getLoc();
5567 bool EvaluatableExpr;
5568 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
5570 return OutOfRangeError(ValRange);
5574#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
5575 if (!isUInt<ENTRY##_WIDTH>(Val)) \
5576 return OutOfRangeError(RANGE); \
5577 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
5582#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
5584 return Error(IDRange.Start, "directive should have resolvable expression", \
5587 if (
ID ==
".amdhsa_group_segment_fixed_size") {
5590 return OutOfRangeError(ValRange);
5592 }
else if (
ID ==
".amdhsa_private_segment_fixed_size") {
5595 return OutOfRangeError(ValRange);
5597 }
else if (
ID ==
".amdhsa_kernarg_size") {
5599 return OutOfRangeError(ValRange);
5601 }
else if (
ID ==
".amdhsa_user_sgpr_count") {
5603 ExplicitUserSGPRCount = Val;
5604 }
else if (
ID ==
".amdhsa_user_sgpr_private_segment_buffer") {
5608 "directive is not supported with architected flat scratch",
5611 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5614 ImpliedUserSGPRCount += 4;
5615 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_preload_length") {
5618 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
5621 return OutOfRangeError(ValRange);
5625 ImpliedUserSGPRCount += Val;
5626 PreloadLength = Val;
5628 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_preload_offset") {
5631 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
5634 return OutOfRangeError(ValRange);
5638 PreloadOffset = Val;
5639 }
else if (
ID ==
".amdhsa_user_sgpr_dispatch_ptr") {
5642 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
5645 ImpliedUserSGPRCount += 2;
5646 }
else if (
ID ==
".amdhsa_user_sgpr_queue_ptr") {
5649 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
5652 ImpliedUserSGPRCount += 2;
5653 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_segment_ptr") {
5656 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
5659 ImpliedUserSGPRCount += 2;
5660 }
else if (
ID ==
".amdhsa_user_sgpr_dispatch_id") {
5663 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
5666 ImpliedUserSGPRCount += 2;
5667 }
else if (
ID ==
".amdhsa_user_sgpr_flat_scratch_init") {
5670 "directive is not supported with architected flat scratch",
5674 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
5677 ImpliedUserSGPRCount += 2;
5678 }
else if (
ID ==
".amdhsa_user_sgpr_private_segment_size") {
5681 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5684 ImpliedUserSGPRCount += 1;
5685 }
else if (
ID ==
".amdhsa_wavefront_size32") {
5687 if (IVersion.
Major < 10)
5688 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5689 EnableWavefrontSize32 = Val;
5691 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
5693 }
else if (
ID ==
".amdhsa_uses_dynamic_stack") {
5695 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
5697 }
else if (
ID ==
".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5700 "directive is not supported with architected flat scratch",
5703 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5705 }
else if (
ID ==
".amdhsa_enable_private_segment") {
5709 "directive is not supported without architected flat scratch",
5712 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5714 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_x") {
5716 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
5718 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_y") {
5720 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
5722 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_z") {
5724 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
5726 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_info") {
5728 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
5730 }
else if (
ID ==
".amdhsa_system_vgpr_workitem_id") {
5732 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
5734 }
else if (
ID ==
".amdhsa_next_free_vgpr") {
5735 VGPRRange = ValRange;
5736 NextFreeVGPR = ExprVal;
5737 }
else if (
ID ==
".amdhsa_next_free_sgpr") {
5738 SGPRRange = ValRange;
5739 NextFreeSGPR = ExprVal;
5740 }
else if (
ID ==
".amdhsa_accum_offset") {
5742 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
5743 AccumOffset = ExprVal;
5744 }
else if (
ID ==
".amdhsa_reserve_vcc") {
5745 if (EvaluatableExpr && !isUInt<1>(Val))
5746 return OutOfRangeError(ValRange);
5747 ReserveVCC = ExprVal;
5748 }
else if (
ID ==
".amdhsa_reserve_flat_scratch") {
5749 if (IVersion.
Major < 7)
5750 return Error(IDRange.
Start,
"directive requires gfx7+", IDRange);
5753 "directive is not supported with architected flat scratch",
5755 if (EvaluatableExpr && !isUInt<1>(Val))
5756 return OutOfRangeError(ValRange);
5757 ReserveFlatScr = ExprVal;
5758 }
else if (
ID ==
".amdhsa_reserve_xnack_mask") {
5759 if (IVersion.
Major < 8)
5760 return Error(IDRange.
Start,
"directive requires gfx8+", IDRange);
5761 if (!isUInt<1>(Val))
5762 return OutOfRangeError(ValRange);
5763 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5764 return getParser().Error(IDRange.
Start,
".amdhsa_reserve_xnack_mask does not match target id",
5766 }
else if (
ID ==
".amdhsa_float_round_mode_32") {
5768 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
5770 }
else if (
ID ==
".amdhsa_float_round_mode_16_64") {
5772 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
5774 }
else if (
ID ==
".amdhsa_float_denorm_mode_32") {
5776 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
5778 }
else if (
ID ==
".amdhsa_float_denorm_mode_16_64") {
5780 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
5782 }
else if (
ID ==
".amdhsa_dx10_clamp") {
5783 if (IVersion.
Major >= 12)
5784 return Error(IDRange.
Start,
"directive unsupported on gfx12+", IDRange);
5786 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
5788 }
else if (
ID ==
".amdhsa_ieee_mode") {
5789 if (IVersion.
Major >= 12)
5790 return Error(IDRange.
Start,
"directive unsupported on gfx12+", IDRange);
5792 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
5794 }
else if (
ID ==
".amdhsa_fp16_overflow") {
5795 if (IVersion.
Major < 9)
5796 return Error(IDRange.
Start,
"directive requires gfx9+", IDRange);
5798 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
5800 }
else if (
ID ==
".amdhsa_tg_split") {
5802 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
5805 }
else if (
ID ==
".amdhsa_workgroup_processor_mode") {
5806 if (IVersion.
Major < 10)
5807 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5809 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
5811 }
else if (
ID ==
".amdhsa_memory_ordered") {
5812 if (IVersion.
Major < 10)
5813 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5815 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
5817 }
else if (
ID ==
".amdhsa_forward_progress") {
5818 if (IVersion.
Major < 10)
5819 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5821 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
5823 }
else if (
ID ==
".amdhsa_shared_vgpr_count") {
5825 if (IVersion.
Major < 10 || IVersion.
Major >= 12)
5826 return Error(IDRange.
Start,
"directive requires gfx10 or gfx11",
5828 SharedVGPRCount = Val;
5830 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
5832 }
else if (
ID ==
".amdhsa_exception_fp_ieee_invalid_op") {
5835 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
5837 }
else if (
ID ==
".amdhsa_exception_fp_denorm_src") {
5839 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5841 }
else if (
ID ==
".amdhsa_exception_fp_ieee_div_zero") {
5844 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
5846 }
else if (
ID ==
".amdhsa_exception_fp_ieee_overflow") {
5848 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5850 }
else if (
ID ==
".amdhsa_exception_fp_ieee_underflow") {
5852 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5854 }
else if (
ID ==
".amdhsa_exception_fp_ieee_inexact") {
5856 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5858 }
else if (
ID ==
".amdhsa_exception_int_div_zero") {
5860 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5862 }
else if (
ID ==
".amdhsa_round_robin_scheduling") {
5863 if (IVersion.
Major < 12)
5864 return Error(IDRange.
Start,
"directive requires gfx12+", IDRange);
5866 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
5869 return Error(IDRange.
Start,
"unknown .amdhsa_kernel directive", IDRange);
5872#undef PARSE_BITS_ENTRY
5875 if (!Seen.
contains(
".amdhsa_next_free_vgpr"))
5876 return TokError(
".amdhsa_next_free_vgpr directive is required");
5878 if (!Seen.
contains(
".amdhsa_next_free_sgpr"))
5879 return TokError(
".amdhsa_next_free_sgpr directive is required");
5881 unsigned UserSGPRCount = ExplicitUserSGPRCount.value_or(ImpliedUserSGPRCount);
5886 if (PreloadLength) {
5892 const MCExpr *VGPRBlocks;
5893 const MCExpr *SGPRBlocks;
5894 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5895 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5896 EnableWavefrontSize32, NextFreeVGPR,
5897 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5901 int64_t EvaluatedVGPRBlocks;
5902 bool VGPRBlocksEvaluatable =
5903 VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);
5904 if (VGPRBlocksEvaluatable &&
5905 !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5906 static_cast<uint64_t>(EvaluatedVGPRBlocks))) {
5907 return OutOfRangeError(VGPRRange);
5911 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
5912 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
5914 int64_t EvaluatedSGPRBlocks;
5915 if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&
5916 !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5917 static_cast<uint64_t>(EvaluatedSGPRBlocks)))
5918 return OutOfRangeError(SGPRRange);
5921 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
5922 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
5924 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5925 return TokError(
"amdgpu_user_sgpr_count smaller than than implied by "
5926 "enabled user SGPRs");
5928 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5929 return TokError(
"too many user SGPRs enabled");
5932 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
5933 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, getContext());
5937 return TokError(
"Kernarg size should be resolvable");
5939 if (PreloadLength && kernarg_size &&
5940 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
5941 return TokError(
"Kernarg preload length + offset is larger than the "
5942 "kernarg segment size");
5945 if (!Seen.
contains(
".amdhsa_accum_offset"))
5946 return TokError(
".amdhsa_accum_offset directive is required");
5947 int64_t EvaluatedAccum;
5948 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);
5949 uint64_t UEvaluatedAccum = EvaluatedAccum;
5950 if (AccumEvaluatable &&
5951 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
5952 return TokError(
"accum_offset should be in range [4..256] in "
5955 int64_t EvaluatedNumVGPR;
5956 if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&
5960 return TokError(
"accum_offset exceeds total VGPR allocation");
5966 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
5967 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5971 if (IVersion.
Major >= 10 && IVersion.
Major < 12) {
5973 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
5974 return TokError(
"shared_vgpr_count directive not valid on "
5975 "wavefront size 32");
5978 if (VGPRBlocksEvaluatable &&
5979 (SharedVGPRCount * 2 +
static_cast<uint64_t>(EvaluatedVGPRBlocks) >
5981 return TokError(
"shared_vgpr_count*2 + "
5982 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5987 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
5988 NextFreeVGPR, NextFreeSGPR,
5989 ReserveVCC, ReserveFlatScr);
5993bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
5995 if (ParseAsAbsoluteExpression(Version))
5998 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
6002bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(
StringRef ID,
6006 if (
ID ==
"max_scratch_backing_memory_byte_size") {
6007 Parser.eatToEndOfStatement();
6013 if (!
C.ParseKernelCodeT(
ID, getParser(), Err)) {
6014 return TokError(Err.str());
6018 if (
ID ==
"enable_wavefront_size32") {
6021 return TokError(
"enable_wavefront_size32=1 is only allowed on GFX10+");
6022 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
6023 return TokError(
"enable_wavefront_size32=1 requires +WavefrontSize32");
6025 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
6026 return TokError(
"enable_wavefront_size32=0 requires +WavefrontSize64");
6030 if (
ID ==
"wavefront_size") {
6031 if (
C.wavefront_size == 5) {
6033 return TokError(
"wavefront_size=5 is only allowed on GFX10+");
6034 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
6035 return TokError(
"wavefront_size=5 requires +WavefrontSize32");
6036 }
else if (
C.wavefront_size == 6) {
6037 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
6038 return TokError(
"wavefront_size=6 requires +WavefrontSize64");
6045bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
6055 if (!parseId(
ID,
"expected value identifier or .end_amd_kernel_code_t"))
6058 if (
ID ==
".end_amd_kernel_code_t")
6061 if (ParseAMDKernelCodeTValue(
ID, KernelCode))
6065 KernelCode.
validate(&getSTI(), getContext());
6066 getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
6071bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
6073 if (!parseId(KernelName,
"expected symbol name"))
6076 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
6079 KernelScope.initialize(getContext());
6083bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6085 return Error(getLoc(),
6086 ".amd_amdgpu_isa directive is not available on non-amdgcn "
6090 auto TargetIDDirective = getLexer().getTok().getStringContents();
6091 if (getTargetStreamer().getTargetID()->
toString() != TargetIDDirective)
6092 return Error(getParser().getTok().getLoc(),
"target id must match options");
6094 getTargetStreamer().EmitISAVersion();
6100bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6103 std::string HSAMetadataString;
6108 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6109 return Error(getLoc(),
"invalid HSA metadata");
6116bool AMDGPUAsmParser::ParseToEndDirective(
const char *AssemblerDirectiveBegin,
6117 const char *AssemblerDirectiveEnd,
6118 std::string &CollectString) {
6122 getLexer().setSkipSpace(
false);
6124 bool FoundEnd =
false;
6127 CollectStream << getTokenStr();
6131 if (trySkipId(AssemblerDirectiveEnd)) {
6136 CollectStream << Parser.parseStringToEndOfStatement()
6137 << getContext().getAsmInfo()->getSeparatorString();
6139 Parser.eatToEndOfStatement();
6142 getLexer().setSkipSpace(
true);
6145 return TokError(
Twine(
"expected directive ") +
6146 Twine(AssemblerDirectiveEnd) +
Twine(
" not found"));
6153bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6159 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6160 if (!PALMetadata->setFromString(
String))
6161 return Error(getLoc(),
"invalid PAL metadata");
6166bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6168 return Error(getLoc(),
6170 "not available on non-amdpal OSes")).str());
6173 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6174 PALMetadata->setLegacy();
6177 if (ParseAsAbsoluteExpression(Key)) {
6178 return TokError(
Twine(
"invalid value in ") +
6182 return TokError(
Twine(
"expected an even number of values in ") +
6185 if (ParseAsAbsoluteExpression(
Value)) {
6186 return TokError(
Twine(
"invalid value in ") +
6189 PALMetadata->setRegister(Key,
Value);
6198bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6199 if (getParser().checkForValidSection())
6203 SMLoc NameLoc = getLoc();
6204 if (getParser().parseIdentifier(
Name))
6205 return TokError(
"expected identifier in directive");
6208 if (getParser().parseComma())
6214 SMLoc SizeLoc = getLoc();
6215 if (getParser().parseAbsoluteExpression(
Size))
6218 return Error(SizeLoc,
"size must be non-negative");
6219 if (
Size > LocalMemorySize)
6220 return Error(SizeLoc,
"size is too large");
6222 int64_t Alignment = 4;
6224 SMLoc AlignLoc = getLoc();
6225 if (getParser().parseAbsoluteExpression(Alignment))
6228 return Error(AlignLoc,
"alignment must be a power of two");
6233 if (Alignment >= 1u << 31)
6234 return Error(AlignLoc,
"alignment is too large");
6240 Symbol->redefineIfPossible();
6241 if (!
Symbol->isUndefined())
6242 return Error(NameLoc,
"invalid symbol redefinition");
6244 getTargetStreamer().emitAMDGPULDS(Symbol,
Size,
Align(Alignment));
6248bool AMDGPUAsmParser::ParseDirective(
AsmToken DirectiveID) {
6252 if (IDVal ==
".amdhsa_kernel")
6253 return ParseDirectiveAMDHSAKernel();
6255 if (IDVal ==
".amdhsa_code_object_version")
6256 return ParseDirectiveAMDHSACodeObjectVersion();
6260 return ParseDirectiveHSAMetadata();
6262 if (IDVal ==
".amd_kernel_code_t")
6263 return ParseDirectiveAMDKernelCodeT();
6265 if (IDVal ==
".amdgpu_hsa_kernel")
6266 return ParseDirectiveAMDGPUHsaKernel();
6268 if (IDVal ==
".amd_amdgpu_isa")
6269 return ParseDirectiveISAVersion();
6273 Twine(
" directive is "
6274 "not available on non-amdhsa OSes"))
6279 if (IDVal ==
".amdgcn_target")
6280 return ParseDirectiveAMDGCNTarget();
6282 if (IDVal ==
".amdgpu_lds")
6283 return ParseDirectiveAMDGPULDS();
6286 return ParseDirectivePALMetadataBegin();
6289 return ParseDirectivePALMetadata();
6296 if (
MRI.regsOverlap(TTMP12_TTMP13_TTMP14_TTMP15, Reg))
6300 if (
MRI.regsOverlap(SGPR104_SGPR105, Reg))
6301 return hasSGPR104_SGPR105();
6304 case SRC_SHARED_BASE_LO:
6305 case SRC_SHARED_BASE:
6306 case SRC_SHARED_LIMIT_LO:
6307 case SRC_SHARED_LIMIT:
6308 case SRC_PRIVATE_BASE_LO:
6309 case SRC_PRIVATE_BASE:
6310 case SRC_PRIVATE_LIMIT_LO:
6311 case SRC_PRIVATE_LIMIT:
6313 case SRC_POPS_EXITING_WAVE_ID:
6325 return (
isVI() ||
isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6354 if (
MRI.regsOverlap(SGPR102_SGPR103, Reg))
6355 return hasSGPR102_SGPR103();
6368 Res = MatchOperandParserImpl(
Operands, Mnemonic);
6380 SMLoc LBraceLoc = getLoc();
6385 auto Loc = getLoc();
6388 Error(Loc,
"expected a register");
6392 RBraceLoc = getLoc();
6397 "expected a comma or a closing square bracket"))
6401 if (
Operands.size() - Prefix > 1) {
6403 AMDGPUOperand::CreateToken(
this,
"[", LBraceLoc));
6404 Operands.push_back(AMDGPUOperand::CreateToken(
this,
"]", RBraceLoc));
6415 setForcedEncodingSize(0);
6416 setForcedDPP(
false);
6417 setForcedSDWA(
false);
6419 if (
Name.ends_with(
"_e64_dpp")) {
6421 setForcedEncodingSize(64);
6422 return Name.substr(0,
Name.size() - 8);
6424 if (
Name.ends_with(
"_e64")) {
6425 setForcedEncodingSize(64);
6426 return Name.substr(0,
Name.size() - 4);
6428 if (
Name.ends_with(
"_e32")) {
6429 setForcedEncodingSize(32);
6430 return Name.substr(0,
Name.size() - 4);
6432 if (
Name.ends_with(
"_dpp")) {
6434 return Name.substr(0,
Name.size() - 4);
6436 if (
Name.ends_with(
"_sdwa")) {
6437 setForcedSDWA(
true);
6438 return Name.substr(0,
Name.size() - 5);
6445 unsigned VariantID);
6457 Operands.push_back(AMDGPUOperand::CreateToken(
this,
Name, NameLoc));
6459 bool IsMIMG =
Name.starts_with(
"image_");
6462 OperandMode Mode = OperandMode_Default;
6464 Mode = OperandMode_NSA;
6468 checkUnsupportedInstruction(
Name, NameLoc);
6469 if (!Parser.hasPendingError()) {
6472 :
"not a valid operand.";
6473 Error(getLoc(), Msg);
6495 if (!trySkipId(
Name))
6498 Operands.push_back(AMDGPUOperand::CreateToken(
this,
Name, S));
6502ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
const char *Prefix,
6513 std::function<
bool(int64_t &)> ConvertResult) {
6521 if (ConvertResult && !ConvertResult(
Value)) {
6525 Operands.push_back(AMDGPUOperand::CreateImm(
this,
Value, S, ImmTy));
6529ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
6531 bool (*ConvertResult)(int64_t &)) {
6540 const unsigned MaxSize = 4;
6544 for (
int I = 0; ; ++
I) {
6546 SMLoc Loc = getLoc();
6550 if (
Op != 0 &&
Op != 1)
6558 if (
I + 1 == MaxSize)
6559 return Error(getLoc(),
"expected a closing square bracket");
6565 Operands.push_back(AMDGPUOperand::CreateImm(
this, Val, S, ImmTy));
6571 AMDGPUOperand::ImmTy ImmTy) {
6575 if (trySkipId(
Name)) {
6577 }
else if (trySkipId(
"no",
Name)) {
6584 return Error(S,
"r128 modifier is not supported on this GPU");
6586 return Error(S,
"a16 modifier is not supported on this GPU");
6588 if (
isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
6589 ImmTy = AMDGPUOperand::ImmTyR128A16;
6591 Operands.push_back(AMDGPUOperand::CreateImm(
this, Bit, S, ImmTy));
6596 bool &Disabling)
const {
6597 Disabling =
Id.consume_front(
"no");
6617 SMLoc StringLoc = getLoc();
6619 int64_t CPolVal = 0;
6637 ResScope = parseScope(
Operands, Scope);
6652 Operands.push_back(AMDGPUOperand::CreateImm(
this, CPolVal, StringLoc,
6653 AMDGPUOperand::ImmTyCPol));
6658 SMLoc OpLoc = getLoc();
6659 unsigned Enabled = 0, Seen = 0;
6663 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
6670 return Error(S,
"dlc modifier is not supported on this GPU");
6673 return Error(S,
"scc modifier is not supported on this GPU");
6676 return Error(S,
"duplicate cache policy modifier");
6688 AMDGPUOperand::CreateImm(
this,
Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
6698 Operands,
"scope", {
"SCOPE_CU",
"SCOPE_SE",
"SCOPE_DEV",
"SCOPE_SYS"},
6716 if (
Value ==
"TH_DEFAULT")
6718 else if (
Value ==
"TH_STORE_LU" ||
Value ==
"TH_LOAD_RT_WB" ||
6719 Value ==
"TH_LOAD_NT_WB") {
6720 return Error(StringLoc,
"invalid th value");
6721 }
else if (
Value.consume_front(
"TH_ATOMIC_")) {
6723 }
else if (
Value.consume_front(
"TH_LOAD_")) {
6725 }
else if (
Value.consume_front(
"TH_STORE_")) {
6728 return Error(StringLoc,
"invalid th value");
6731 if (
Value ==
"BYPASS")
6762 if (TH == 0xffffffff)
6763 return Error(StringLoc,
"invalid th value");
6770 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
6771 AMDGPUOperand::ImmTy ImmT,
6773 auto i = OptionalIdx.find(ImmT);
6774 if (i != OptionalIdx.end()) {
6775 unsigned Idx = i->second;
6776 ((AMDGPUOperand &)*
Operands[
Idx]).addImmOperands(Inst, 1);
6788 StringLoc = getLoc();
6793ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
6799 SMLoc StringLoc = getLoc();
6803 Value = getTokenStr();
6807 if (
Value == Ids[IntVal])
6812 if (IntVal < 0 || IntVal >= (int64_t)Ids.
size())
6818ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
6820 AMDGPUOperand::ImmTy
Type) {
6826 Operands.push_back(AMDGPUOperand::CreateImm(
this, IntVal, S,
Type));
6835bool AMDGPUAsmParser::tryParseFmt(
const char *Pref,
6839 SMLoc Loc = getLoc();
6841 auto Res = parseIntWithPrefix(Pref, Val);
6847 if (Val < 0 || Val > MaxVal) {
6857 AMDGPUOperand::ImmTy ImmTy) {
6858 const char *Pref =
"index_key";
6860 SMLoc Loc = getLoc();
6861 auto Res = parseIntWithPrefix(Pref, ImmVal);
6865 if (ImmTy == AMDGPUOperand::ImmTyIndexKey16bit && (ImmVal < 0 || ImmVal > 1))
6868 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
6871 Operands.push_back(AMDGPUOperand::CreateImm(
this, ImmVal, Loc, ImmTy));
6876 return tryParseIndexKey(
Operands, AMDGPUOperand::ImmTyIndexKey8bit);
6880 return tryParseIndexKey(
Operands, AMDGPUOperand::ImmTyIndexKey16bit);
6885ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
6892 for (
int I = 0;
I < 2; ++
I) {
6893 if (Dfmt == DFMT_UNDEF && !tryParseFmt(
"dfmt", DFMT_MAX, Dfmt))
6896 if (Nfmt == NFMT_UNDEF && !tryParseFmt(
"nfmt", NFMT_MAX, Nfmt))
6901 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
6907 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
6910 Dfmt = (Dfmt ==
DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6911 Nfmt = (Nfmt ==
NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6917ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
6922 if (!tryParseFmt(
"format", UFMT_MAX, Fmt))
6925 if (Fmt == UFMT_UNDEF)
6932bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6940 if (Format != DFMT_UNDEF) {
6946 if (Format != NFMT_UNDEF) {
6951 Error(Loc,
"unsupported format");
6962 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6967 SMLoc Loc = getLoc();
6968 if (!parseId(Str,
"expected a format string") ||
6969 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
6971 if (Dfmt == DFMT_UNDEF)
6972 return Error(Loc,
"duplicate numeric format");
6973 if (Nfmt == NFMT_UNDEF)
6974 return Error(Loc,
"duplicate data format");
6977 Dfmt = (Dfmt ==
DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6978 Nfmt = (Nfmt ==
NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6982 if (Ufmt == UFMT_UNDEF)
6983 return Error(FormatLoc,
"unsupported format");
6998 if (Id == UFMT_UNDEF)
7002 return Error(Loc,
"unified format is not supported on this GPU");
7008ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
7010 SMLoc Loc = getLoc();
7015 return Error(Loc,
"out of range format");
7020ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
7028 SMLoc Loc = getLoc();
7029 if (!parseId(FormatStr,
"expected a format string"))
7032 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
7034 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
7044 return parseNumericFormat(Format);
7052 SMLoc Loc = getLoc();
7062 AMDGPUOperand::CreateImm(
this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
7081 Res = parseSymbolicOrNumericFormat(Format);
7086 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands[
Size - 2]);
7087 assert(
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
7094 return Error(getLoc(),
"duplicate format");
7100 parseIntWithPrefix(
"offset",
Operands, AMDGPUOperand::ImmTyOffset);
7102 Res = parseIntWithPrefix(
"inst_offset",
Operands,
7103 AMDGPUOperand::ImmTyInstOffset);
7110 parseNamedBit(
"r128",
Operands, AMDGPUOperand::ImmTyR128A16);
7112 Res = parseNamedBit(
"a16",
Operands, AMDGPUOperand::ImmTyA16);
7118 parseIntWithPrefix(
"blgp",
Operands, AMDGPUOperand::ImmTyBLGP);
7121 parseOperandArrayWithPrefix(
"neg",
Operands, AMDGPUOperand::ImmTyBLGP);
7131 OptionalImmIndexMap OptionalIdx;
7133 unsigned OperandIdx[4];
7134 unsigned EnMask = 0;
7137 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
7138 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
7143 OperandIdx[SrcIdx] = Inst.
size();
7144 Op.addRegOperands(Inst, 1);
7151 OperandIdx[SrcIdx] = Inst.
size();
7157 if (
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7158 Op.addImmOperands(Inst, 1);
7162 if (
Op.isToken() && (
Op.getToken() ==
"done" ||
Op.getToken() ==
"row_en"))
7166 OptionalIdx[
Op.getImmTy()] = i;
7172 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7179 for (
auto i = 0; i < SrcIdx; ++i) {
7181 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7206 IntVal =
encode(ISA, IntVal, CntVal);
7207 if (CntVal !=
decode(ISA, IntVal)) {
7209 IntVal =
encode(ISA, IntVal, -1);
7217bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7219 SMLoc CntLoc = getLoc();
7227 SMLoc ValLoc = getLoc();
7236 if (CntName ==
"vmcnt" || CntName ==
"vmcnt_sat") {
7238 }
else if (CntName ==
"expcnt" || CntName ==
"expcnt_sat") {
7240 }
else if (CntName ==
"lgkmcnt" || CntName ==
"lgkmcnt_sat") {
7243 Error(CntLoc,
"invalid counter name " + CntName);
7248 Error(ValLoc,
"too large value for " + CntName);
7257 Error(getLoc(),
"expected a counter name");
7284bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7285 SMLoc FieldLoc = getLoc();
7291 SMLoc ValueLoc = getLoc();
7298 if (FieldName ==
"instid0") {
7300 }
else if (FieldName ==
"instskip") {
7302 }
else if (FieldName ==
"instid1") {
7305 Error(FieldLoc,
"invalid field name " + FieldName);
7324 .
Case(
"VALU_DEP_1", 1)
7325 .
Case(
"VALU_DEP_2", 2)
7326 .
Case(
"VALU_DEP_3", 3)
7327 .
Case(
"VALU_DEP_4", 4)
7328 .
Case(
"TRANS32_DEP_1", 5)
7329 .
Case(
"TRANS32_DEP_2", 6)
7330 .
Case(
"TRANS32_DEP_3", 7)
7331 .
Case(
"FMA_ACCUM_CYCLE_1", 8)
7332 .
Case(
"SALU_CYCLE_1", 9)
7333 .
Case(
"SALU_CYCLE_2", 10)
7334 .
Case(
"SALU_CYCLE_3", 11)
7342 Delay |=
Value << Shift;
7352 if (!parseDelay(Delay))
7360 Operands.push_back(AMDGPUOperand::CreateImm(
this, Delay, S));
7365AMDGPUOperand::isSWaitCnt()
const {
7369bool AMDGPUOperand::isSDelayALU()
const {
return isImm(); }
7375void AMDGPUAsmParser::depCtrError(
SMLoc Loc,
int ErrorId,
7379 Error(Loc,
Twine(
"invalid counter name ", DepCtrName));
7382 Error(Loc,
Twine(DepCtrName,
" is not supported on this GPU"));
7385 Error(Loc,
Twine(
"duplicate counter name ", DepCtrName));
7388 Error(Loc,
Twine(
"invalid value for ", DepCtrName));
7395bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr,
unsigned &UsedOprMask) {
7399 SMLoc DepCtrLoc = getLoc();
7410 unsigned PrevOprMask = UsedOprMask;
7411 int CntVal =
encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
7414 depCtrError(DepCtrLoc, CntVal, DepCtrName);
7423 Error(getLoc(),
"expected a counter name");
7428 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
7429 DepCtr = (DepCtr & ~CntValMask) | CntVal;
7437 SMLoc Loc = getLoc();
7440 unsigned UsedOprMask = 0;
7442 if (!parseDepCtr(DepCtr, UsedOprMask))
7450 Operands.push_back(AMDGPUOperand::CreateImm(
this, DepCtr, Loc));
7454bool AMDGPUOperand::isDepCtr()
const {
return isS16Imm(); }
7460ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
7462 OperandInfoTy &Width) {
7469 HwReg.Loc = getLoc();
7472 HwReg.IsSymbolic =
true;
7474 }
else if (!
parseExpr(HwReg.Val,
"a register name")) {
7482 if (!skipToken(
AsmToken::Comma,
"expected a comma or a closing parenthesis"))
7492 Width.Loc = getLoc();
7504 SMLoc Loc = getLoc();
7506 StructuredOpField HwReg(
"id",
"hardware register", HwregId::Width,
7508 StructuredOpField
Offset(
"offset",
"bit offset", HwregOffset::Width,
7509 HwregOffset::Default);
7510 struct : StructuredOpField {
7511 using StructuredOpField::StructuredOpField;
7512 bool validate(AMDGPUAsmParser &Parser)
const override {
7514 return Error(Parser,
"only values from 1 to 32 are legal");
7517 } Width(
"size",
"bitfield width", HwregSize::Width, HwregSize::Default);
7521 Res = parseHwregFunc(HwReg,
Offset, Width);
7524 if (!validateStructuredOpFields({&HwReg, &
Offset, &Width}))
7526 ImmVal = HwregEncoding::encode(HwReg.Val,
Offset.Val, Width.Val);
7530 parseExpr(ImmVal,
"a hwreg macro, structured immediate"))
7536 if (!isUInt<16>(ImmVal))
7537 return Error(Loc,
"invalid immediate: only 16-bit values are legal");
7539 AMDGPUOperand::CreateImm(
this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
7543bool AMDGPUOperand::isHwreg()
const {
7544 return isImmTy(ImmTyHwreg);
7552AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
7554 OperandInfoTy &Stream) {
7560 Msg.IsSymbolic =
true;
7562 }
else if (!
parseExpr(Msg.Val,
"a message name")) {
7567 Op.IsDefined =
true;
7570 (
Op.Val =
getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
7573 }
else if (!
parseExpr(
Op.Val,
"an operation name")) {
7578 Stream.IsDefined =
true;
7579 Stream.Loc = getLoc();
7589AMDGPUAsmParser::validateSendMsg(
const OperandInfoTy &Msg,
7590 const OperandInfoTy &
Op,
7591 const OperandInfoTy &Stream) {
7597 bool Strict = Msg.IsSymbolic;
7601 Error(Msg.Loc,
"specified message id is not supported on this GPU");
7606 Error(Msg.Loc,
"invalid message id");
7612 Error(
Op.Loc,
"message does not support operations");
7614 Error(Msg.Loc,
"missing message operation");
7620 Error(
Op.Loc,
"specified operation id is not supported on this GPU");
7622 Error(
Op.Loc,
"invalid operation id");
7627 Error(Stream.Loc,
"message operation does not support streams");
7631 Error(Stream.Loc,
"invalid message stream id");
7641 SMLoc Loc = getLoc();
7645 OperandInfoTy
Op(OP_NONE_);
7646 OperandInfoTy Stream(STREAM_ID_NONE_);
7647 if (parseSendMsgBody(Msg,
Op, Stream) &&
7648 validateSendMsg(Msg,
Op, Stream)) {
7653 }
else if (
parseExpr(ImmVal,
"a sendmsg macro")) {
7654 if (ImmVal < 0 || !isUInt<16>(ImmVal))
7655 return Error(Loc,
"invalid immediate: only 16-bit values are legal");
7660 Operands.push_back(AMDGPUOperand::CreateImm(
this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
7664bool AMDGPUOperand::isSendMsg()
const {
7665 return isImmTy(ImmTySendMsg);
7686 return Error(S,
"invalid interpolation slot");
7688 Operands.push_back(AMDGPUOperand::CreateImm(
this, Slot, S,
7689 AMDGPUOperand::ImmTyInterpSlot));
7700 if (!Str.starts_with(
"attr"))
7701 return Error(S,
"invalid interpolation attribute");
7711 return Error(S,
"invalid or missing interpolation attribute channel");
7713 Str = Str.drop_back(2).drop_front(4);
7716 if (Str.getAsInteger(10, Attr))
7717 return Error(S,
"invalid or missing interpolation attribute number");
7720 return Error(S,
"out of bounds interpolation attribute number");
7724 Operands.push_back(AMDGPUOperand::CreateImm(
this, Attr, S,
7725 AMDGPUOperand::ImmTyInterpAttr));
7726 Operands.push_back(AMDGPUOperand::CreateImm(
7727 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
7746 return Error(S, (
Id == ET_INVALID)
7747 ?
"invalid exp target"
7748 :
"exp target is not supported on this GPU");
7750 Operands.push_back(AMDGPUOperand::CreateImm(
this,
Id, S,
7751 AMDGPUOperand::ImmTyExpTgt));
7766 return isId(getToken(),
Id);
7771 return getTokenKind() ==
Kind;
7774StringRef AMDGPUAsmParser::getId()
const {
7801 if (isId(
Id) && peekToken().is(Kind)) {
7811 if (isToken(Kind)) {
7821 if (!trySkipToken(Kind)) {
7822 Error(getLoc(), ErrMsg);
7833 if (Parser.parseExpression(Expr))
7836 if (Expr->evaluateAsAbsolute(Imm))
7840 Error(S,
"expected absolute expression");
7843 Twine(
" or an absolute expression"));
7853 if (Parser.parseExpression(Expr))
7857 if (Expr->evaluateAsAbsolute(IntVal)) {
7858 Operands.push_back(AMDGPUOperand::CreateImm(
this, IntVal, S));
7860 Operands.push_back(AMDGPUOperand::CreateExpr(
this, Expr, S));
7868 Val = getToken().getStringContents();
7872 Error(getLoc(), ErrMsg);
7879 Val = getTokenStr();
7883 if (!ErrMsg.
empty())
7884 Error(getLoc(), ErrMsg);
7889AMDGPUAsmParser::getToken()
const {
7890 return Parser.getTok();
7893AsmToken AMDGPUAsmParser::peekToken(
bool ShouldSkipSpace) {
7896 : getLexer().peekTok(ShouldSkipSpace);
7901 auto TokCount = getLexer().peekTokens(Tokens);
7908AMDGPUAsmParser::getTokenKind()
const {
7913AMDGPUAsmParser::getLoc()
const {
7914 return getToken().getLoc();
7918AMDGPUAsmParser::getTokenStr()
const {
7919 return getToken().getString();
7923AMDGPUAsmParser::lex() {
7928 return ((AMDGPUOperand &)*
Operands[0]).getStartLoc();
7932AMDGPUAsmParser::getOperandLoc(std::function<
bool(
const AMDGPUOperand&)>
Test,
7934 for (
unsigned i =
Operands.size() - 1; i > 0; --i) {
7935 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
7937 return Op.getStartLoc();
7943AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy
Type,
7945 auto Test = [=](
const AMDGPUOperand&
Op) {
return Op.isImmTy(
Type); };
7951 auto Test = [=](
const AMDGPUOperand&
Op) {
7952 return Op.isRegKind() &&
Op.getReg() ==
Reg;
7958 bool SearchMandatoryLiterals)
const {
7959 auto Test = [](
const AMDGPUOperand&
Op) {
7960 return Op.IsImmKindLiteral() ||
Op.isExpr();
7963 if (SearchMandatoryLiterals && Loc == getInstLoc(
Operands))
7964 Loc = getMandatoryLitLoc(
Operands);
7969 auto Test = [](
const AMDGPUOperand &
Op) {
7970 return Op.IsImmKindMandatoryLiteral();
7977 auto Test = [](
const AMDGPUOperand&
Op) {
7978 return Op.isImmKindConst();
7995 SMLoc IdLoc = getLoc();
8001 find_if(Fields, [
Id](StructuredOpField *
F) {
return F->Id ==
Id; });
8002 if (
I == Fields.
end())
8003 return Error(IdLoc,
"unknown field");
8004 if ((*I)->IsDefined)
8005 return Error(IdLoc,
"duplicate field");
8008 (*I)->Loc = getLoc();
8011 (*I)->IsDefined =
true;
8018bool AMDGPUAsmParser::validateStructuredOpFields(
8020 return all_of(Fields, [
this](
const StructuredOpField *
F) {
8021 return F->validate(*
this);
8032 const unsigned OrMask,
8033 const unsigned XorMask) {
8036 return BITMASK_PERM_ENC |
8037 (AndMask << BITMASK_AND_SHIFT) |
8038 (OrMask << BITMASK_OR_SHIFT) |
8039 (XorMask << BITMASK_XOR_SHIFT);
8042bool AMDGPUAsmParser::parseSwizzleOperand(int64_t &
Op,
const unsigned MinVal,
8043 const unsigned MaxVal,
8052 if (Op < MinVal || Op > MaxVal) {
8061AMDGPUAsmParser::parseSwizzleOperands(
const unsigned OpNum, int64_t*
Op,
8062 const unsigned MinVal,
8063 const unsigned MaxVal,
8066 for (
unsigned i = 0; i < OpNum; ++i) {
8067 if (!parseSwizzleOperand(
Op[i], MinVal, MaxVal, ErrMsg, Loc))
8075AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
8079 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
8080 "expected a 2-bit lane id")) {
8091AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
8098 if (!parseSwizzleOperand(GroupSize,
8100 "group size must be in the interval [2,32]",
8105 Error(Loc,
"group size must be a power of two");
8108 if (parseSwizzleOperand(LaneIdx,
8110 "lane id must be in the interval [0,group size - 1]",
8119AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
8125 if (!parseSwizzleOperand(GroupSize,
8127 "group size must be in the interval [2,32]",
8132 Error(Loc,
"group size must be a power of two");
8141AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8147 if (!parseSwizzleOperand(GroupSize,
8149 "group size must be in the interval [1,16]",
8154 Error(Loc,
"group size must be a power of two");
8163AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
8171 SMLoc StrLoc = getLoc();
8172 if (!parseString(Ctl)) {
8175 if (Ctl.
size() != BITMASK_WIDTH) {
8176 Error(StrLoc,
"expected a 5-character mask");
8180 unsigned AndMask = 0;
8181 unsigned OrMask = 0;
8182 unsigned XorMask = 0;
8184 for (
size_t i = 0; i < Ctl.
size(); ++i) {
8188 Error(StrLoc,
"invalid mask");
8209bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &Imm) {
8213 Error(getLoc(),
"FFT mode swizzle not supported on this GPU");
8219 if (!parseSwizzleOperand(Swizzle, 0, FFT_SWIZZLE_MAX,
8220 "FFT swizzle must be in the interval [0," +
8229bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &Imm) {
8233 Error(getLoc(),
"Rotate mode swizzle not supported on this GPU");
8240 if (!parseSwizzleOperand(
Direction, 0, 1,
8241 "direction must be 0 (left) or 1 (right)", Loc))
8245 if (!parseSwizzleOperand(
8246 RotateSize, 0, ROTATE_MAX_SIZE,
8247 "number of threads to rotate must be in the interval [0," +
8253 (RotateSize << ROTATE_SIZE_SHIFT);
8258AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
8260 SMLoc OffsetLoc = getLoc();
8262 if (!
parseExpr(Imm,
"a swizzle macro")) {
8265 if (!isUInt<16>(Imm)) {
8266 Error(OffsetLoc,
"expected a 16-bit offset");
8273AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
8278 SMLoc ModeLoc = getLoc();
8281 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
8282 Ok = parseSwizzleQuadPerm(Imm);
8283 }
else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
8284 Ok = parseSwizzleBitmaskPerm(Imm);
8285 }
else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
8286 Ok = parseSwizzleBroadcast(Imm);
8287 }
else if (trySkipId(IdSymbolic[ID_SWAP])) {
8288 Ok = parseSwizzleSwap(Imm);
8289 }
else if (trySkipId(IdSymbolic[ID_REVERSE])) {
8290 Ok = parseSwizzleReverse(Imm);
8291 }
else if (trySkipId(IdSymbolic[ID_FFT])) {
8292 Ok = parseSwizzleFFT(Imm);
8293 }
else if (trySkipId(IdSymbolic[ID_ROTATE])) {
8294 Ok = parseSwizzleRotate(Imm);
8296 Error(ModeLoc,
"expected a swizzle mode");
8299 return Ok && skipToken(
AsmToken::RParen,
"expected a closing parentheses");
8309 if (trySkipId(
"offset")) {
8313 if (trySkipId(
"swizzle")) {
8314 Ok = parseSwizzleMacro(Imm);
8316 Ok = parseSwizzleOffset(Imm);
8320 Operands.push_back(AMDGPUOperand::CreateImm(
this, Imm, S, AMDGPUOperand::ImmTySwizzle));
8328AMDGPUOperand::isSwizzle()
const {
8329 return isImmTy(ImmTySwizzle);
8336int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8350 for (
unsigned ModeId = ID_MIN; ModeId <=
ID_MAX; ++ModeId) {
8351 if (trySkipId(IdSymbolic[ModeId])) {
8358 Error(S, (Imm == 0)?
8359 "expected a VGPR index mode or a closing parenthesis" :
8360 "expected a VGPR index mode");
8365 Error(S,
"duplicate VGPR index mode");
8373 "expected a comma or a closing parenthesis"))
8388 Imm = parseGPRIdxMacro();
8392 if (getParser().parseAbsoluteExpression(Imm))
8394 if (Imm < 0 || !isUInt<4>(Imm))
8395 return Error(S,
"invalid immediate: only 4-bit values are legal");
8399 AMDGPUOperand::CreateImm(
this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
8403bool AMDGPUOperand::isGPRIdxMode()
const {
8404 return isImmTy(ImmTyGprIdxMode);
8416 if (isRegister() || isModifier())
8423 assert(Opr.isImm() || Opr.isExpr());
8424 SMLoc Loc = Opr.getStartLoc();
8428 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
8429 Error(Loc,
"expected an absolute expression or a label");
8430 }
else if (Opr.isImm() && !Opr.isS16Imm()) {
8431 Error(Loc,
"expected a 16-bit signed jump offset");
8449void AMDGPUAsmParser::cvtMubufImpl(
MCInst &Inst,
8452 OptionalImmIndexMap OptionalIdx;
8453 unsigned FirstOperandIdx = 1;
8454 bool IsAtomicReturn =
false;
8461 for (
unsigned i = FirstOperandIdx, e =
Operands.size(); i != e; ++i) {
8462 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
8466 Op.addRegOperands(Inst, 1);
8470 if (IsAtomicReturn && i == FirstOperandIdx)
8471 Op.addRegOperands(Inst, 1);
8476 if (
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
8477 Op.addImmOperands(Inst, 1);
8489 OptionalIdx[
Op.getImmTy()] = i;
8500bool AMDGPUOperand::isSMRDOffset8()
const {
8501 return isImmLiteral() && isUInt<8>(getImm());
8504bool AMDGPUOperand::isSMEMOffset()
const {
8506 return isImmLiteral();
8509bool AMDGPUOperand::isSMRDLiteralOffset()
const {
8512 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
8544bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
8545 if (BoundCtrl == 0 || BoundCtrl == 1) {
8553void AMDGPUAsmParser::onBeginOfFile() {
8554 if (!getParser().getStreamer().getTargetStreamer() ||
8558 if (!getTargetStreamer().getTargetID())
8559 getTargetStreamer().initializeTargetID(getSTI(),
8560 getSTI().getFeatureString());
8563 getTargetStreamer().EmitDirectiveAMDGCNTarget();
8571bool AMDGPUAsmParser::parsePrimaryExpr(
const MCExpr *&Res,
SMLoc &EndLoc) {
8577 .
Case(
"max", AGVK::AGVK_Max)
8578 .
Case(
"or", AGVK::AGVK_Or)
8579 .
Case(
"extrasgprs", AGVK::AGVK_ExtraSGPRs)
8580 .
Case(
"totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
8581 .
Case(
"alignto", AGVK::AGVK_AlignTo)
8582 .
Case(
"occupancy", AGVK::AGVK_Occupancy)
8592 if (Exprs.
empty()) {
8593 Error(getToken().getLoc(),
8594 "empty " +
Twine(TokenId) +
" expression");
8597 if (CommaCount + 1 != Exprs.
size()) {
8598 Error(getToken().getLoc(),
8599 "mismatch of commas in " +
Twine(TokenId) +
" expression");
8606 if (getParser().parseExpression(Expr, EndLoc))
8610 if (LastTokenWasComma)
8613 Error(getToken().getLoc(),
8614 "unexpected token in " +
Twine(TokenId) +
" expression");
8620 return getParser().parsePrimaryExpr(Res, EndLoc,
nullptr);
8625 if (
Name ==
"mul") {
8626 return parseIntWithPrefix(
"mul",
Operands,
8630 if (
Name ==
"div") {
8631 return parseIntWithPrefix(
"div",
Operands,
8647 const int Ops[] = { AMDGPU::OpName::src0,
8648 AMDGPU::OpName::src1,
8649 AMDGPU::OpName::src2 };
8664 if (
DstOp.isReg() &&
8665 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(
DstOp.
getReg())) {
8669 if ((OpSel & (1 << SrcNum)) != 0)
8675void AMDGPUAsmParser::cvtVOP3OpSel(
MCInst &Inst,
8682 OptionalImmIndexMap &OptionalIdx) {
8683 cvtVOP3P(Inst,
Operands, OptionalIdx);
8692 &&
Desc.NumOperands > (OpNum + 1)
8694 &&
Desc.operands()[OpNum + 1].RegClass != -1
8696 &&
Desc.getOperandConstraint(OpNum + 1,
8697 MCOI::OperandConstraint::TIED_TO) == -1;
8702 OptionalImmIndexMap OptionalIdx;
8707 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
8708 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
8711 for (
unsigned E =
Operands.size();
I != E; ++
I) {
8712 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
8714 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8715 }
else if (
Op.isInterpSlot() ||
Op.isInterpAttr() ||
8716 Op.isInterpAttrChan()) {
8718 }
else if (
Op.isImmModifier()) {
8719 OptionalIdx[
Op.getImmTy()] =
I;
8727 AMDGPUOperand::ImmTyHigh);
8731 AMDGPUOperand::ImmTyClamp);
8735 AMDGPUOperand::ImmTyOModSI);
8740 OptionalImmIndexMap OptionalIdx;
8745 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
8746 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
8749 for (
unsigned E =
Operands.size();
I != E; ++
I) {
8750 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
8752 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8753 }
else if (
Op.isImmModifier()) {
8754 OptionalIdx[
Op.getImmTy()] =
I;
8771 const int Ops[] = { AMDGPU::OpName::src0,
8772 AMDGPU::OpName::src1,
8773 AMDGPU::OpName::src2 };
8774 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8775 AMDGPU::OpName::src1_modifiers,
8776 AMDGPU::OpName::src2_modifiers };
8780 for (
int J = 0; J < 3; ++J) {
8788 if ((OpSel & (1 << J)) != 0)
8790 if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
8791 (OpSel & (1 << 3)) != 0)
8799 OptionalImmIndexMap &OptionalIdx) {
8804 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
8805 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
8808 for (
unsigned E =
Operands.size();
I != E; ++
I) {
8809 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
8811 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8812 }
else if (
Op.isImmModifier()) {
8813 OptionalIdx[
Op.getImmTy()] =
I;
8815 Op.addRegOrImmOperands(Inst, 1);
8823 AMDGPUOperand::ImmTyByteSel);
8828 AMDGPUOperand::ImmTyClamp);
8832 AMDGPUOperand::ImmTyOModSI);
8839 auto *it = Inst.
begin();
8849 OptionalImmIndexMap OptionalIdx;
8850 cvtVOP3(Inst,
Operands, OptionalIdx);
8854 OptionalImmIndexMap &OptIdx) {
8860 if (Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi ||
8861 Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi ||
8862 Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
8863 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
8864 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
8865 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {
8873 !(Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp_gfx12 ||
8874 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp_gfx12 ||
8875 Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp8_gfx12 ||
8876 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp8_gfx12 ||
8877 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
8878 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
8879 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
8880 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12)) {
8885 if (BitOp3Idx != -1) {
8893 if (OpSelIdx != -1) {
8898 if (OpSelHiIdx != -1) {
8912 const int Ops[] = { AMDGPU::OpName::src0,
8913 AMDGPU::OpName::src1,
8914 AMDGPU::OpName::src2 };
8915 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8916 AMDGPU::OpName::src1_modifiers,
8917 AMDGPU::OpName::src2_modifiers };
8920 unsigned OpSelHi = 0;
8927 if (OpSelHiIdx != -1)
8936 for (
int J = 0; J < 3; ++J) {
8949 if (
SrcOp.isReg() && getMRI()
8956 if ((OpSel & (1 << J)) != 0)
8960 if ((OpSelHi & (1 << J)) != 0)
8963 if ((NegLo & (1 << J)) != 0)
8966 if ((NegHi & (1 << J)) != 0)
8974 OptionalImmIndexMap OptIdx;
8980 unsigned i,
unsigned Opc,
unsigned OpName) {
8982 ((AMDGPUOperand &)*
Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
8984 ((AMDGPUOperand &)*
Operands[i]).addRegOperands(Inst, 1);
8990 ((AMDGPUOperand &)*
Operands[1]).addRegOperands(Inst, 1);
8993 ((AMDGPUOperand &)*
Operands[1]).addRegOperands(Inst, 1);
8994 ((AMDGPUOperand &)*
Operands[4]).addRegOperands(Inst, 1);
8996 OptionalImmIndexMap OptIdx;
8997 for (
unsigned i = 5; i <
Operands.size(); ++i) {
8998 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
8999 OptIdx[
Op.getImmTy()] = i;
9004 AMDGPUOperand::ImmTyIndexKey8bit);
9008 AMDGPUOperand::ImmTyIndexKey16bit);
9028 Operands.push_back(AMDGPUOperand::CreateToken(
this,
"::", S));
9029 SMLoc OpYLoc = getLoc();
9032 Operands.push_back(AMDGPUOperand::CreateToken(
this, OpYName, OpYLoc));
9035 return Error(OpYLoc,
"expected a VOPDY instruction after ::");
9042 auto addOp = [&](
uint16_t ParsedOprIdx) {
9043 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[ParsedOprIdx]);
9045 Op.addRegOperands(Inst, 1);
9049 Op.addImmOperands(Inst, 1);
9061 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
9065 const auto &CInfo = InstInfo[CompIdx];
9066 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
9067 for (
unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
9068 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
9069 if (CInfo.hasSrc2Acc())
9070 addOp(CInfo.getIndexOfDstInParsedOperands());
9078bool AMDGPUOperand::isDPP8()
const {
9079 return isImmTy(ImmTyDPP8);
9082bool AMDGPUOperand::isDPPCtrl()
const {
9083 using namespace AMDGPU::DPP;
9085 bool result =
isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
9087 int64_t
Imm = getImm();
9088 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
9089 (
Imm >= DppCtrl::ROW_SHL_FIRST &&
Imm <= DppCtrl::ROW_SHL_LAST) ||
9090 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
9091 (
Imm >= DppCtrl::ROW_ROR_FIRST &&
Imm <= DppCtrl::ROW_ROR_LAST) ||
9092 (Imm == DppCtrl::WAVE_SHL1) ||
9093 (
Imm == DppCtrl::WAVE_ROL1) ||
9094 (Imm == DppCtrl::WAVE_SHR1) ||
9095 (
Imm == DppCtrl::WAVE_ROR1) ||
9096 (Imm == DppCtrl::ROW_MIRROR) ||
9097 (
Imm == DppCtrl::ROW_HALF_MIRROR) ||
9098 (Imm == DppCtrl::BCAST15) ||
9099 (
Imm == DppCtrl::BCAST31) ||
9100 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
9101 (
Imm >= DppCtrl::ROW_XMASK_FIRST &&
Imm <= DppCtrl::ROW_XMASK_LAST);
9110bool AMDGPUOperand::isBLGP()
const {
9111 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
9114bool AMDGPUOperand::isS16Imm()
const {
9115 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
9118bool AMDGPUOperand::isU16Imm()
const {
9119 return isImmLiteral() && isUInt<16>(getImm());
9126bool AMDGPUAsmParser::parseDimId(
unsigned &Encoding) {
9131 SMLoc Loc = getToken().getEndLoc();
9132 Token = std::string(getTokenStr());
9134 if (getLoc() != Loc)
9139 if (!parseId(Suffix))
9165 SMLoc Loc = getLoc();
9166 if (!parseDimId(Encoding))
9167 return Error(Loc,
"invalid dim value");
9169 Operands.push_back(AMDGPUOperand::CreateImm(
this, Encoding, S,
9170 AMDGPUOperand::ImmTyDim));
9188 if (!skipToken(
AsmToken::LBrac,
"expected an opening square bracket"))
9191 for (
size_t i = 0; i < 8; ++i) {
9195 SMLoc Loc = getLoc();
9196 if (getParser().parseAbsoluteExpression(Sels[i]))
9198 if (0 > Sels[i] || 7 < Sels[i])
9199 return Error(Loc,
"expected a 3-bit value");
9206 for (
size_t i = 0; i < 8; ++i)
9207 DPP8 |= (Sels[i] << (i * 3));
9209 Operands.push_back(AMDGPUOperand::CreateImm(
this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
9214AMDGPUAsmParser::isSupportedDPPCtrl(
StringRef Ctrl,
9216 if (Ctrl ==
"row_newbcast")
9219 if (Ctrl ==
"row_share" ||
9220 Ctrl ==
"row_xmask")
9223 if (Ctrl ==
"wave_shl" ||
9224 Ctrl ==
"wave_shr" ||
9225 Ctrl ==
"wave_rol" ||
9226 Ctrl ==
"wave_ror" ||
9227 Ctrl ==
"row_bcast")
9230 return Ctrl ==
"row_mirror" ||
9231 Ctrl ==
"row_half_mirror" ||
9232 Ctrl ==
"quad_perm" ||
9233 Ctrl ==
"row_shl" ||
9234 Ctrl ==
"row_shr" ||
9239AMDGPUAsmParser::parseDPPCtrlPerm() {
9242 if (!skipToken(
AsmToken::LBrac,
"expected an opening square bracket"))
9246 for (
int i = 0; i < 4; ++i) {
9251 SMLoc Loc = getLoc();
9252 if (getParser().parseAbsoluteExpression(Temp))
9254 if (Temp < 0 || Temp > 3) {
9255 Error(Loc,
"expected a 2-bit value");
9259 Val += (Temp << i * 2);
9269AMDGPUAsmParser::parseDPPCtrlSel(
StringRef Ctrl) {
9270 using namespace AMDGPU::DPP;
9275 SMLoc Loc = getLoc();
9277 if (getParser().parseAbsoluteExpression(Val))
9280 struct DppCtrlCheck {
9287 .
Case(
"wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
9288 .Case(
"wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
9289 .Case(
"wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
9290 .Case(
"wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
9291 .Case(
"row_shl", {DppCtrl::ROW_SHL0, 1, 15})
9292 .Case(
"row_shr", {DppCtrl::ROW_SHR0, 1, 15})
9293 .Case(
"row_ror", {DppCtrl::ROW_ROR0, 1, 15})
9294 .Case(
"row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
9295 .Case(
"row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
9296 .Case(
"row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
9300 if (
Check.Ctrl == -1) {
9301 Valid = (
Ctrl ==
"row_bcast" && (Val == 15 || Val == 31));
9302 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
9317 using namespace AMDGPU::DPP;
9320 !isSupportedDPPCtrl(getTokenStr(),
Operands))
9329 if (Ctrl ==
"row_mirror") {
9330 Val = DppCtrl::ROW_MIRROR;
9331 }
else if (Ctrl ==
"row_half_mirror") {
9332 Val = DppCtrl::ROW_HALF_MIRROR;
9335 if (Ctrl ==
"quad_perm") {
9336 Val = parseDPPCtrlPerm();
9338 Val = parseDPPCtrlSel(Ctrl);
9347 AMDGPUOperand::CreateImm(
this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
9353 OptionalImmIndexMap OptionalIdx;
9363 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
9367 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9368 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
9372 for (
unsigned E =
Operands.size();
I != E; ++
I) {
9376 if (OldIdx == NumOperands) {
9378 constexpr int DST_IDX = 0;
9380 }
else if (Src2ModIdx == NumOperands) {
9391 bool IsVOP3CvtSrDpp =
9392 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
9393 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
9394 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
9395 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12;
9396 if (IsVOP3CvtSrDpp) {
9410 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
9412 if (IsDPP8 &&
Op.isDppFI()) {
9415 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9416 }
else if (
Op.isReg()) {
9417 Op.addRegOperands(Inst, 1);
9418 }
else if (
Op.isImm() &&
9420 assert(!
Op.IsImmKindLiteral() &&
"Cannot use literal with DPP");
9421 Op.addImmOperands(Inst, 1);
9422 }
else if (
Op.isImm()) {
9423 OptionalIdx[
Op.getImmTy()] =
I;
9431 AMDGPUOperand::ImmTyByteSel);
9435 AMDGPUOperand::ImmTyClamp);
9441 cvtVOP3P(Inst,
Operands, OptionalIdx);
9443 cvtVOP3OpSel(Inst,
Operands, OptionalIdx);
9460 AMDGPUOperand::ImmTyDppFI);
9465 OptionalImmIndexMap OptionalIdx;
9469 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9470 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
9474 for (
unsigned E =
Operands.size();
I != E; ++
I) {
9482 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
9484 if (
Op.isReg() && validateVccOperand(
Op.getReg())) {
9492 Op.addImmOperands(Inst, 1);
9494 Op.addRegWithFPInputModsOperands(Inst, 2);
9495 }
else if (
Op.isDppFI()) {
9497 }
else if (
Op.isReg()) {
9498 Op.addRegOperands(Inst, 1);
9504 Op.addRegWithFPInputModsOperands(Inst, 2);
9505 }
else if (
Op.isReg()) {
9506 Op.addRegOperands(Inst, 1);
9507 }
else if (
Op.isDPPCtrl()) {
9508 Op.addImmOperands(Inst, 1);
9509 }
else if (
Op.isImm()) {
9511 OptionalIdx[
Op.getImmTy()] =
I;
9527 AMDGPUOperand::ImmTyDppFI);
9538 AMDGPUOperand::ImmTy
Type) {
9539 return parseStringOrIntWithPrefix(
9541 {
"BYTE_0",
"BYTE_1",
"BYTE_2",
"BYTE_3",
"WORD_0",
"WORD_1",
"DWORD"},
9546 return parseStringOrIntWithPrefix(
9547 Operands,
"dst_unused", {
"UNUSED_PAD",
"UNUSED_SEXT",
"UNUSED_PRESERVE"},
9548 AMDGPUOperand::ImmTySDWADstUnused);
9577 OptionalImmIndexMap OptionalIdx;
9578 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
9579 bool SkippedVcc =
false;
9583 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9584 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
9587 for (
unsigned E =
Operands.size();
I != E; ++
I) {
9588 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
9589 if (SkipVcc && !SkippedVcc &&
Op.isReg() &&
9590 (
Op.getReg() == AMDGPU::VCC ||
Op.getReg() == AMDGPU::VCC_LO)) {
9608 Op.addRegOrImmWithInputModsOperands(Inst, 2);
9609 }
else if (
Op.isImm()) {
9611 OptionalIdx[
Op.getImmTy()] =
I;
9619 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
9620 Opc != AMDGPU::V_NOP_sdwa_vi) {
9622 switch (BasicInstType) {
9626 AMDGPUOperand::ImmTyClamp, 0);
9630 AMDGPUOperand::ImmTyOModSI, 0);
9634 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9638 AMDGPUOperand::ImmTySDWADstUnused,
9639 DstUnused::UNUSED_PRESERVE);
9646 AMDGPUOperand::ImmTyClamp, 0);
9660 AMDGPUOperand::ImmTyClamp, 0);
9666 llvm_unreachable(
"Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
9672 if (Inst.
getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
9673 Inst.
getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
9674 auto *it = Inst.
begin();
9687#define GET_REGISTER_MATCHER
9688#define GET_MATCHER_IMPLEMENTATION
9689#define GET_MNEMONIC_SPELL_CHECKER
9690#define GET_MNEMONIC_CHECKER
9691#include "AMDGPUGenAsmMatcher.inc"
9697 return parseTokenOp(
"addr64",
Operands);
9699 return parseTokenOp(
"done",
Operands);
9701 return parseTokenOp(
"idxen",
Operands);
9703 return parseTokenOp(
"lds",
Operands);
9705 return parseTokenOp(
"offen",
Operands);
9707 return parseTokenOp(
"off",
Operands);
9709 return parseTokenOp(
"row_en",
Operands);
9711 return parseNamedBit(
"gds",
Operands, AMDGPUOperand::ImmTyGDS);
9713 return parseNamedBit(
"tfe",
Operands, AMDGPUOperand::ImmTyTFE);
9715 return tryCustomParseOperand(
Operands, MCK);
9726 AMDGPUOperand &Operand = (AMDGPUOperand&)
Op;
9729 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
9731 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
9733 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
9735 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
9737 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
9739 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
9747 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
9749 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
9750 case MCK_SOPPBrTarget:
9751 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
9752 case MCK_VReg32OrOff:
9753 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
9754 case MCK_InterpSlot:
9755 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
9756 case MCK_InterpAttr:
9757 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
9758 case MCK_InterpAttrChan:
9759 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
9761 case MCK_SReg_64_XEXEC:
9767 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
9769 return Match_InvalidOperand;
9786 if (!isUInt<16>(Imm))
9787 return Error(S,
"expected a 16-bit value");
9790 AMDGPUOperand::CreateImm(
this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
9794bool AMDGPUOperand::isEndpgm()
const {
return isImmTy(ImmTyEndpgm); }
9800bool AMDGPUOperand::isSplitBarrier()
const {
return isInlinableImm(MVT::i32); }
unsigned const MachineRegisterInfo * MRI
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, unsigned OpName)
static bool IsRevOpcode(const unsigned Opcode)
static int getRegClass(RegisterKind Is, unsigned RegWidth)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static MCRegister getSpecialRegForName(StringRef RegName)
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, const MCRegisterInfo *MRI)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value, raw_ostream &Err)
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
#define LLVM_EXTERNAL_VISIBILITY
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static unsigned getOperandSize(MachineInstr &MI, unsigned Idx, MachineRegisterInfo &MRI)
static llvm::Expected< InlineInfo > decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
Interface definition for SIInstrInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
unsigned unsigned DefaultVal
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
This file implements the SmallBitVector class.
StringSet - A set-like wrapper for the StringMap.
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
support::ulittle16_t & Lo
support::ulittle16_t & Hi
static const char * getRegisterName(MCRegister Reg)
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
Target independent representation for an assembler token.
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
bool is(TokenKind K) const
TokenKind getKind() const
This class represents an Operation in the Expression.
Base class for user error types.
Lightweight error class with error context and mandatory checking.
Tagged union holding either a T or a Error.
Class representing an expression and its matching format.
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
MCAsmParser & getParser()
Generic assembler parser interface, for use by target specific assembly parsers.
virtual MCStreamer & getStreamer()=0
Return the output streamer for the assembler.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Context object for machine code objects.
const MCRegisterInfo * getRegisterInfo() const
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
const MCSubtargetInfo * getSubtargetInfo() const
Base class for the full range of assembler expressions which are needed for parsing.
Instances of this class represent a single low-level machine instruction.
unsigned getNumOperands() const
unsigned getOpcode() const
iterator insert(iterator I, const MCOperand &Op)
void addOperand(const MCOperand Op)
const MCOperand & getOperand(unsigned i) const
Describe properties that are true of each instruction in the target description file.
bool mayStore() const
Return true if this instruction could possibly modify memory.
Interface to description of machine instruction set.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Instances of this class represent operands of the MCInst class.
static MCOperand createExpr(const MCExpr *Val)
static MCOperand createReg(MCRegister Reg)
static MCOperand createImm(int64_t Val)
void setReg(MCRegister Reg)
Set the register number.
MCRegister getReg() const
Returns the register number.
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
virtual bool isReg() const =0
isReg - Is this a register operand?
virtual bool isMem() const =0
isMem - Is this a memory operand?
virtual MCRegister getReg() const =0
virtual bool isToken() const =0
isToken - Is this a token operand?
virtual bool isImm() const =0
isImm - Is this an immediate operand?
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
unsigned getRegister(unsigned i) const
getRegister - Return the specified register in the class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
constexpr bool isValid() const
Streaming machine code generation interface.
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
MCTargetStreamer * getTargetStreamer()
Generic base class for all target subtargets.
const FeatureBitset & getFeatureBits() const
FeatureBitset ToggleFeature(uint64_t FB)
Toggle a feature and return the re-computed feature bits.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
void setVariableValue(const MCExpr *Value)
MCTargetAsmParser - Generic interface to target specific assembly parsers.
virtual bool parseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands)=0
Parse one assembly instruction.
MCSubtargetInfo & copySTI()
Create a copy of STI and return a non-const reference to it.
virtual bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
virtual bool ParseDirective(AsmToken DirectiveID)
ParseDirective - Parse a target specific assembler directive This method is deprecated,...
virtual ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
tryParseRegister - parse one register if possible
virtual bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm)=0
Recognize a series of operands of a parsed instruction as an actual MCInst and emit it to the specifi...
void setAvailableFeatures(const FeatureBitset &Value)
const MCSubtargetInfo & getSTI() const
virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, unsigned Kind)
Allow a target to add special case operand matching for things that tblgen doesn't/can't handle effec...
virtual unsigned checkTargetMatchPredicate(MCInst &Inst)
checkTargetMatchPredicate - Validate the instruction match against any complex target predicates not ...
Target specific streamer interface.
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
Wrapper class representing virtual and physical registers.
Represents a location in source code.
static SMLoc getFromPointer(const char *Ptr)
constexpr const char * getPointer() const
constexpr bool isValid() const
Represents a range in source code.
Implements a dense probed hash-table based set with some number of buckets stored inline.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringMapEntry - This is used to represent one value that is inserted into a StringMap.
StringRef - Represent a constant reference to a string, i.e.
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
empty - Check if the string is empty.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
constexpr size_t size() const
size - Get the string size.
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
StringSet - A wrapper for StringMap that provides set-like functionality.
bool contains(StringRef key) const
Check if the set contains the given key.
std::pair< typename Base::iterator, bool > insert(StringRef key)
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
A raw_ostream that writes to an SmallVector or SmallString.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
int64_t getHwregId(StringRef Name, const MCSubtargetInfo &STI)
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
@ FIXED_NUM_SGPRS_FOR_INIT_BUG
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
int64_t getMsgOpId(int64_t MsgId, StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a sendmsg operation to the operation portion of the immediate encoding.
int64_t getMsgId(StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a msg_id to the message portion of the immediate encoding.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
ArrayRef< GFXVersion > getGFXVersions()
constexpr unsigned COMPONENTS[]
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
LLVM_READNONE bool isLegalDPALU_DPPControl(unsigned DC)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)
IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isDPALU_DPP(const MCInstrDesc &OpDesc)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
bool isGFX9(const MCSubtargetInfo &STI)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
bool hasMAIInsts(const MCSubtargetInfo &STI)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isInlineValue(unsigned Reg)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_V2INT32
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_INLINE_AC_V2FP16
@ OPERAND_REG_IMM_INT32
Operands with register or 32-bit immediate.
@ OPERAND_REG_IMM_BF16_DEFERRED
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_AC_BF16
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_INLINE_AC_INT16
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_V2INT16
@ OPERAND_REG_INLINE_AC_FP16
@ OPERAND_REG_INLINE_AC_INT32
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_INLINE_AC_V2BF16
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_REG_INLINE_C_V2FP32
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
@ OPERAND_REG_IMM_FP32_DEFERRED
@ OPERAND_REG_IMM_FP16_DEFERRED
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
MCRegister mc2PseudoReg(MCRegister Reg)
Convert hardware register Reg to a pseudo register.
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ UNDEF
UNDEF - An undefined node.
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
Reg
All possible values of the reg field in the ModR/M byte.
Scope
Defines the scope in which this symbol should be visible: Default – Visible in the public interface o...
This is an optimization pass for GlobalISel generic memory operations.
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
int popcount(T Value) noexcept
Count the number of set bits in a value.
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
void PrintError(const Twine &Msg)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Target & getTheR600Target()
The target for R600 GPUs.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Target & getTheGCNTarget()
The target for GCN GPUs.
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
unsigned M0(unsigned Val)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
const char * toString(DWARFSectionKind Kind)
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
void initDefault(const MCSubtargetInfo *STI, MCContext &Ctx, bool InitMCExpr=true)
Instruction set architecture version.
const MCExpr * compute_pgm_rsrc2
const MCExpr * kernarg_size
const MCExpr * kernarg_preload
const MCExpr * compute_pgm_rsrc3
const MCExpr * private_segment_fixed_size
const MCExpr * compute_pgm_rsrc1
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
const MCExpr * group_segment_fixed_size
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
const MCExpr * kernel_code_properties
Represents the counter values to wait for in an s_waitcnt instruction.
static const fltSemantics & IEEEsingle() LLVM_READNONE
static const fltSemantics & IEEEhalf() LLVM_READNONE
static const fltSemantics & BFloat() LLVM_READNONE
opStatus
IEEE-754R 7: Default exception handling.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Description of the encoding of one expression Op.
Direction
An enum for the direction of the loop.
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...
uint32_t group_segment_fixed_size
uint32_t private_segment_fixed_size