53enum RegisterKind { IS_UNKNOWN,
IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
67 SMLoc StartLoc, EndLoc;
68 const AMDGPUAsmParser *AsmParser;
71 AMDGPUOperand(KindTy Kind_,
const AMDGPUAsmParser *AsmParser_)
72 :
Kind(Kind_), AsmParser(AsmParser_) {}
74 using Ptr = std::unique_ptr<AMDGPUOperand>;
82 bool hasFPModifiers()
const {
return Abs || Neg; }
83 bool hasIntModifiers()
const {
return Sext; }
84 bool hasModifiers()
const {
return hasFPModifiers() || hasIntModifiers(); }
86 int64_t getFPModifiersOperand()
const {
93 int64_t getIntModifiersOperand()
const {
99 int64_t getModifiersOperand()
const {
100 assert(!(hasFPModifiers() && hasIntModifiers())
101 &&
"fp and int modifiers should not be used simultaneously");
102 if (hasFPModifiers()) {
103 return getFPModifiersOperand();
104 }
else if (hasIntModifiers()) {
105 return getIntModifiersOperand();
188 ImmKindTyMandatoryLiteral,
202 mutable ImmKindTy
Kind;
219 bool isToken()
const override {
return Kind == Token; }
221 bool isSymbolRefExpr()
const {
222 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
225 bool isImm()
const override {
226 return Kind == Immediate;
229 void setImmKindNone()
const {
231 Imm.Kind = ImmKindTyNone;
234 void setImmKindLiteral()
const {
236 Imm.Kind = ImmKindTyLiteral;
239 void setImmKindMandatoryLiteral()
const {
241 Imm.Kind = ImmKindTyMandatoryLiteral;
244 void setImmKindConst()
const {
246 Imm.Kind = ImmKindTyConst;
249 bool IsImmKindLiteral()
const {
250 return isImm() &&
Imm.Kind == ImmKindTyLiteral;
253 bool IsImmKindMandatoryLiteral()
const {
254 return isImm() &&
Imm.Kind == ImmKindTyMandatoryLiteral;
257 bool isImmKindConst()
const {
258 return isImm() &&
Imm.Kind == ImmKindTyConst;
261 bool isInlinableImm(
MVT type)
const;
262 bool isLiteralImm(
MVT type)
const;
264 bool isRegKind()
const {
268 bool isReg()
const override {
269 return isRegKind() && !hasModifiers();
272 bool isRegOrInline(
unsigned RCID,
MVT type)
const {
273 return isRegClass(RCID) || isInlinableImm(type);
277 return isRegOrInline(RCID, type) || isLiteralImm(type);
280 bool isRegOrImmWithInt16InputMods()
const {
284 bool isRegOrImmWithIntT16InputMods()
const {
288 bool isRegOrImmWithInt32InputMods()
const {
292 bool isRegOrInlineImmWithInt16InputMods()
const {
293 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
296 bool isRegOrInlineImmWithInt32InputMods()
const {
297 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
300 bool isRegOrImmWithInt64InputMods()
const {
304 bool isRegOrImmWithFP16InputMods()
const {
308 bool isRegOrImmWithFPT16InputMods()
const {
312 bool isRegOrImmWithFP32InputMods()
const {
316 bool isRegOrImmWithFP64InputMods()
const {
320 template <
bool IsFake16>
bool isRegOrInlineImmWithFP16InputMods()
const {
321 return isRegOrInline(
322 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
325 bool isRegOrInlineImmWithFP32InputMods()
const {
326 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
329 bool isPackedFP16InputMods()
const {
333 bool isVReg()
const {
334 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
335 isRegClass(AMDGPU::VReg_64RegClassID) ||
336 isRegClass(AMDGPU::VReg_96RegClassID) ||
337 isRegClass(AMDGPU::VReg_128RegClassID) ||
338 isRegClass(AMDGPU::VReg_160RegClassID) ||
339 isRegClass(AMDGPU::VReg_192RegClassID) ||
340 isRegClass(AMDGPU::VReg_256RegClassID) ||
341 isRegClass(AMDGPU::VReg_512RegClassID) ||
342 isRegClass(AMDGPU::VReg_1024RegClassID);
345 bool isVReg32()
const {
346 return isRegClass(AMDGPU::VGPR_32RegClassID);
349 bool isVReg32OrOff()
const {
350 return isOff() || isVReg32();
353 bool isNull()
const {
354 return isRegKind() &&
getReg() == AMDGPU::SGPR_NULL;
357 bool isVRegWithInputMods()
const;
358 template <
bool IsFake16>
bool isT16VRegWithInputMods()
const;
360 bool isSDWAOperand(
MVT type)
const;
361 bool isSDWAFP16Operand()
const;
362 bool isSDWAFP32Operand()
const;
363 bool isSDWAInt16Operand()
const;
364 bool isSDWAInt32Operand()
const;
366 bool isImmTy(ImmTy ImmT)
const {
370 template <ImmTy Ty>
bool isImmTy()
const {
return isImmTy(Ty); }
372 bool isImmLiteral()
const {
return isImmTy(ImmTyNone); }
374 bool isImmModifier()
const {
375 return isImm() &&
Imm.Type != ImmTyNone;
378 bool isOModSI()
const {
return isImmTy(ImmTyOModSI); }
379 bool isDim()
const {
return isImmTy(ImmTyDim); }
380 bool isR128A16()
const {
return isImmTy(ImmTyR128A16); }
381 bool isOff()
const {
return isImmTy(ImmTyOff); }
382 bool isExpTgt()
const {
return isImmTy(ImmTyExpTgt); }
383 bool isOffen()
const {
return isImmTy(ImmTyOffen); }
384 bool isIdxen()
const {
return isImmTy(ImmTyIdxen); }
385 bool isAddr64()
const {
return isImmTy(ImmTyAddr64); }
386 bool isSMEMOffsetMod()
const {
return isImmTy(ImmTySMEMOffsetMod); }
387 bool isFlatOffset()
const {
return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
388 bool isGDS()
const {
return isImmTy(ImmTyGDS); }
389 bool isLDS()
const {
return isImmTy(ImmTyLDS); }
390 bool isCPol()
const {
return isImmTy(ImmTyCPol); }
391 bool isIndexKey8bit()
const {
return isImmTy(ImmTyIndexKey8bit); }
392 bool isIndexKey16bit()
const {
return isImmTy(ImmTyIndexKey16bit); }
393 bool isTFE()
const {
return isImmTy(ImmTyTFE); }
394 bool isFORMAT()
const {
return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
395 bool isDppFI()
const {
return isImmTy(ImmTyDppFI); }
396 bool isSDWADstSel()
const {
return isImmTy(ImmTySDWADstSel); }
397 bool isSDWASrc0Sel()
const {
return isImmTy(ImmTySDWASrc0Sel); }
398 bool isSDWASrc1Sel()
const {
return isImmTy(ImmTySDWASrc1Sel); }
399 bool isSDWADstUnused()
const {
return isImmTy(ImmTySDWADstUnused); }
400 bool isInterpSlot()
const {
return isImmTy(ImmTyInterpSlot); }
401 bool isInterpAttr()
const {
return isImmTy(ImmTyInterpAttr); }
402 bool isInterpAttrChan()
const {
return isImmTy(ImmTyInterpAttrChan); }
403 bool isOpSel()
const {
return isImmTy(ImmTyOpSel); }
404 bool isOpSelHi()
const {
return isImmTy(ImmTyOpSelHi); }
405 bool isNegLo()
const {
return isImmTy(ImmTyNegLo); }
406 bool isNegHi()
const {
return isImmTy(ImmTyNegHi); }
408 bool isRegOrImm()
const {
412 bool isRegClass(
unsigned RCID)
const;
416 bool isRegOrInlineNoMods(
unsigned RCID,
MVT type)
const {
417 return isRegOrInline(RCID, type) && !hasModifiers();
420 bool isSCSrcB16()
const {
421 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
424 bool isSCSrcV2B16()
const {
428 bool isSCSrc_b32()
const {
429 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
432 bool isSCSrc_b64()
const {
433 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
436 bool isBoolReg()
const;
438 bool isSCSrcF16()
const {
439 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
442 bool isSCSrcV2F16()
const {
446 bool isSCSrcF32()
const {
447 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
450 bool isSCSrcF64()
const {
451 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
454 bool isSSrc_b32()
const {
455 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
458 bool isSSrc_b16()
const {
return isSCSrcB16() || isLiteralImm(MVT::i16); }
460 bool isSSrcV2B16()
const {
465 bool isSSrc_b64()
const {
468 return isSCSrc_b64() || isLiteralImm(MVT::i64);
471 bool isSSrc_f32()
const {
472 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
475 bool isSSrcF64()
const {
return isSCSrc_b64() || isLiteralImm(MVT::f64); }
477 bool isSSrc_bf16()
const {
return isSCSrcB16() || isLiteralImm(MVT::bf16); }
479 bool isSSrc_f16()
const {
return isSCSrcB16() || isLiteralImm(MVT::f16); }
481 bool isSSrcV2F16()
const {
486 bool isSSrcV2FP32()
const {
491 bool isSCSrcV2FP32()
const {
496 bool isSSrcV2INT32()
const {
501 bool isSCSrcV2INT32()
const {
503 return isSCSrc_b32();
506 bool isSSrcOrLds_b32()
const {
507 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
508 isLiteralImm(MVT::i32) || isExpr();
511 bool isVCSrc_b32()
const {
512 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
515 bool isVCSrcB64()
const {
516 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
519 bool isVCSrcTB16()
const {
520 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
523 bool isVCSrcTB16_Lo128()
const {
524 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
527 bool isVCSrcFake16B16_Lo128()
const {
528 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
531 bool isVCSrc_b16()
const {
532 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
535 bool isVCSrc_v2b16()
const {
return isVCSrc_b16(); }
537 bool isVCSrc_f32()
const {
538 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
541 bool isVCSrcF64()
const {
542 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
545 bool isVCSrcTBF16()
const {
546 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
549 bool isVCSrcTF16()
const {
550 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
553 bool isVCSrcTBF16_Lo128()
const {
554 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
557 bool isVCSrcTF16_Lo128()
const {
558 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
561 bool isVCSrcFake16BF16_Lo128()
const {
562 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
565 bool isVCSrcFake16F16_Lo128()
const {
566 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
569 bool isVCSrc_bf16()
const {
570 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
573 bool isVCSrc_f16()
const {
574 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
577 bool isVCSrc_v2bf16()
const {
return isVCSrc_bf16(); }
579 bool isVCSrc_v2f16()
const {
return isVCSrc_f16(); }
581 bool isVSrc_b32()
const {
582 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
585 bool isVSrc_b64()
const {
return isVCSrcF64() || isLiteralImm(MVT::i64); }
587 bool isVSrcT_b16()
const {
return isVCSrcTB16() || isLiteralImm(MVT::i16); }
589 bool isVSrcT_b16_Lo128()
const {
590 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
593 bool isVSrcFake16_b16_Lo128()
const {
594 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
597 bool isVSrc_b16()
const {
return isVCSrc_b16() || isLiteralImm(MVT::i16); }
599 bool isVSrc_v2b16()
const {
return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
601 bool isVCSrcV2FP32()
const {
605 bool isVSrc_v2f32()
const {
return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
607 bool isVCSrcV2INT32()
const {
611 bool isVSrc_v2b32()
const {
return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
613 bool isVSrc_f32()
const {
614 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
617 bool isVSrc_f64()
const {
return isVCSrcF64() || isLiteralImm(MVT::f64); }
619 bool isVSrcT_bf16()
const {
return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
621 bool isVSrcT_f16()
const {
return isVCSrcTF16() || isLiteralImm(MVT::f16); }
623 bool isVSrcT_bf16_Lo128()
const {
624 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
627 bool isVSrcT_f16_Lo128()
const {
628 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
631 bool isVSrcFake16_bf16_Lo128()
const {
632 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
635 bool isVSrcFake16_f16_Lo128()
const {
636 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
639 bool isVSrc_bf16()
const {
return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
641 bool isVSrc_f16()
const {
return isVCSrc_f16() || isLiteralImm(MVT::f16); }
643 bool isVSrc_v2bf16()
const {
644 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
647 bool isVSrc_v2f16()
const {
return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
649 bool isVISrcB32()
const {
650 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
653 bool isVISrcB16()
const {
654 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
657 bool isVISrcV2B16()
const {
661 bool isVISrcF32()
const {
662 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
665 bool isVISrcF16()
const {
666 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
669 bool isVISrcV2F16()
const {
670 return isVISrcF16() || isVISrcB32();
673 bool isVISrc_64_bf16()
const {
674 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
677 bool isVISrc_64_f16()
const {
678 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
681 bool isVISrc_64_b32()
const {
682 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
685 bool isVISrc_64B64()
const {
686 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
689 bool isVISrc_64_f64()
const {
690 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
693 bool isVISrc_64V2FP32()
const {
694 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
697 bool isVISrc_64V2INT32()
const {
698 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
701 bool isVISrc_256_b32()
const {
702 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
705 bool isVISrc_256_f32()
const {
706 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
709 bool isVISrc_256B64()
const {
710 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
713 bool isVISrc_256_f64()
const {
714 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
717 bool isVISrc_128B16()
const {
718 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
721 bool isVISrc_128V2B16()
const {
722 return isVISrc_128B16();
725 bool isVISrc_128_b32()
const {
726 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
729 bool isVISrc_128_f32()
const {
730 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
733 bool isVISrc_256V2FP32()
const {
734 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
737 bool isVISrc_256V2INT32()
const {
738 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
741 bool isVISrc_512_b32()
const {
742 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
745 bool isVISrc_512B16()
const {
746 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
749 bool isVISrc_512V2B16()
const {
750 return isVISrc_512B16();
753 bool isVISrc_512_f32()
const {
754 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
757 bool isVISrc_512F16()
const {
758 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
761 bool isVISrc_512V2F16()
const {
762 return isVISrc_512F16() || isVISrc_512_b32();
765 bool isVISrc_1024_b32()
const {
766 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
769 bool isVISrc_1024B16()
const {
770 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
773 bool isVISrc_1024V2B16()
const {
774 return isVISrc_1024B16();
777 bool isVISrc_1024_f32()
const {
778 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
781 bool isVISrc_1024F16()
const {
782 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
785 bool isVISrc_1024V2F16()
const {
786 return isVISrc_1024F16() || isVISrc_1024_b32();
789 bool isAISrcB32()
const {
790 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
793 bool isAISrcB16()
const {
794 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
797 bool isAISrcV2B16()
const {
801 bool isAISrcF32()
const {
802 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
805 bool isAISrcF16()
const {
806 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
809 bool isAISrcV2F16()
const {
810 return isAISrcF16() || isAISrcB32();
813 bool isAISrc_64B64()
const {
814 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
817 bool isAISrc_64_f64()
const {
818 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
821 bool isAISrc_128_b32()
const {
822 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
825 bool isAISrc_128B16()
const {
826 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
829 bool isAISrc_128V2B16()
const {
830 return isAISrc_128B16();
833 bool isAISrc_128_f32()
const {
834 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
837 bool isAISrc_128F16()
const {
838 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
841 bool isAISrc_128V2F16()
const {
842 return isAISrc_128F16() || isAISrc_128_b32();
845 bool isVISrc_128_bf16()
const {
846 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
849 bool isVISrc_128_f16()
const {
850 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
853 bool isVISrc_128V2F16()
const {
854 return isVISrc_128_f16() || isVISrc_128_b32();
857 bool isAISrc_256B64()
const {
858 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
861 bool isAISrc_256_f64()
const {
862 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
865 bool isAISrc_512_b32()
const {
866 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
869 bool isAISrc_512B16()
const {
870 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
873 bool isAISrc_512V2B16()
const {
874 return isAISrc_512B16();
877 bool isAISrc_512_f32()
const {
878 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
881 bool isAISrc_512F16()
const {
882 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
885 bool isAISrc_512V2F16()
const {
886 return isAISrc_512F16() || isAISrc_512_b32();
889 bool isAISrc_1024_b32()
const {
890 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
893 bool isAISrc_1024B16()
const {
894 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
897 bool isAISrc_1024V2B16()
const {
898 return isAISrc_1024B16();
901 bool isAISrc_1024_f32()
const {
902 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
905 bool isAISrc_1024F16()
const {
906 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
909 bool isAISrc_1024V2F16()
const {
910 return isAISrc_1024F16() || isAISrc_1024_b32();
913 bool isKImmFP32()
const {
914 return isLiteralImm(MVT::f32);
917 bool isKImmFP16()
const {
918 return isLiteralImm(MVT::f16);
921 bool isMem()
const override {
925 bool isExpr()
const {
929 bool isSOPPBrTarget()
const {
return isExpr() ||
isImm(); }
931 bool isSWaitCnt()
const;
932 bool isDepCtr()
const;
933 bool isSDelayALU()
const;
934 bool isHwreg()
const;
935 bool isSendMsg()
const;
936 bool isSplitBarrier()
const;
937 bool isSwizzle()
const;
938 bool isSMRDOffset8()
const;
939 bool isSMEMOffset()
const;
940 bool isSMRDLiteralOffset()
const;
942 bool isDPPCtrl()
const;
944 bool isGPRIdxMode()
const;
945 bool isS16Imm()
const;
946 bool isU16Imm()
const;
947 bool isEndpgm()
const;
949 auto getPredicate(std::function<
bool(
const AMDGPUOperand &
Op)>
P)
const {
950 return std::bind(
P, *
this);
958 int64_t getImm()
const {
963 void setImm(int64_t Val) {
968 ImmTy getImmTy()
const {
978 SMLoc getStartLoc()
const override {
982 SMLoc getEndLoc()
const override {
987 return SMRange(StartLoc, EndLoc);
990 Modifiers getModifiers()
const {
991 assert(isRegKind() || isImmTy(ImmTyNone));
992 return isRegKind() ?
Reg.Mods :
Imm.Mods;
995 void setModifiers(Modifiers Mods) {
996 assert(isRegKind() || isImmTy(ImmTyNone));
1003 bool hasModifiers()
const {
1004 return getModifiers().hasModifiers();
1007 bool hasFPModifiers()
const {
1008 return getModifiers().hasFPModifiers();
1011 bool hasIntModifiers()
const {
1012 return getModifiers().hasIntModifiers();
1017 void addImmOperands(
MCInst &Inst,
unsigned N,
bool ApplyModifiers =
true)
const;
1019 void addLiteralImmOperand(
MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const;
1021 void addRegOperands(
MCInst &Inst,
unsigned N)
const;
1023 void addRegOrImmOperands(
MCInst &Inst,
unsigned N)
const {
1025 addRegOperands(Inst,
N);
1027 addImmOperands(Inst,
N);
1030 void addRegOrImmWithInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1031 Modifiers Mods = getModifiers();
1034 addRegOperands(Inst,
N);
1036 addImmOperands(Inst,
N,
false);
1040 void addRegOrImmWithFPInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1041 assert(!hasIntModifiers());
1042 addRegOrImmWithInputModsOperands(Inst,
N);
1045 void addRegOrImmWithIntInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1046 assert(!hasFPModifiers());
1047 addRegOrImmWithInputModsOperands(Inst,
N);
1050 void addRegWithInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1051 Modifiers Mods = getModifiers();
1054 addRegOperands(Inst,
N);
1057 void addRegWithFPInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1058 assert(!hasIntModifiers());
1059 addRegWithInputModsOperands(Inst,
N);
1062 void addRegWithIntInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1063 assert(!hasFPModifiers());
1064 addRegWithInputModsOperands(Inst,
N);
1070 case ImmTyNone:
OS <<
"None";
break;
1071 case ImmTyGDS:
OS <<
"GDS";
break;
1072 case ImmTyLDS:
OS <<
"LDS";
break;
1073 case ImmTyOffen:
OS <<
"Offen";
break;
1074 case ImmTyIdxen:
OS <<
"Idxen";
break;
1075 case ImmTyAddr64:
OS <<
"Addr64";
break;
1076 case ImmTyOffset:
OS <<
"Offset";
break;
1077 case ImmTyInstOffset:
OS <<
"InstOffset";
break;
1078 case ImmTyOffset0:
OS <<
"Offset0";
break;
1079 case ImmTyOffset1:
OS <<
"Offset1";
break;
1080 case ImmTySMEMOffsetMod:
OS <<
"SMEMOffsetMod";
break;
1081 case ImmTyCPol:
OS <<
"CPol";
break;
1082 case ImmTyIndexKey8bit:
OS <<
"index_key";
break;
1083 case ImmTyIndexKey16bit:
OS <<
"index_key";
break;
1084 case ImmTyTFE:
OS <<
"TFE";
break;
1085 case ImmTyD16:
OS <<
"D16";
break;
1086 case ImmTyFORMAT:
OS <<
"FORMAT";
break;
1087 case ImmTyClampSI:
OS <<
"ClampSI";
break;
1088 case ImmTyOModSI:
OS <<
"OModSI";
break;
1089 case ImmTyDPP8:
OS <<
"DPP8";
break;
1090 case ImmTyDppCtrl:
OS <<
"DppCtrl";
break;
1091 case ImmTyDppRowMask:
OS <<
"DppRowMask";
break;
1092 case ImmTyDppBankMask:
OS <<
"DppBankMask";
break;
1093 case ImmTyDppBoundCtrl:
OS <<
"DppBoundCtrl";
break;
1094 case ImmTyDppFI:
OS <<
"DppFI";
break;
1095 case ImmTySDWADstSel:
OS <<
"SDWADstSel";
break;
1096 case ImmTySDWASrc0Sel:
OS <<
"SDWASrc0Sel";
break;
1097 case ImmTySDWASrc1Sel:
OS <<
"SDWASrc1Sel";
break;
1098 case ImmTySDWADstUnused:
OS <<
"SDWADstUnused";
break;
1099 case ImmTyDMask:
OS <<
"DMask";
break;
1100 case ImmTyDim:
OS <<
"Dim";
break;
1101 case ImmTyUNorm:
OS <<
"UNorm";
break;
1102 case ImmTyDA:
OS <<
"DA";
break;
1103 case ImmTyR128A16:
OS <<
"R128A16";
break;
1104 case ImmTyA16:
OS <<
"A16";
break;
1105 case ImmTyLWE:
OS <<
"LWE";
break;
1106 case ImmTyOff:
OS <<
"Off";
break;
1107 case ImmTyExpTgt:
OS <<
"ExpTgt";
break;
1108 case ImmTyExpCompr:
OS <<
"ExpCompr";
break;
1109 case ImmTyExpVM:
OS <<
"ExpVM";
break;
1110 case ImmTyHwreg:
OS <<
"Hwreg";
break;
1111 case ImmTySendMsg:
OS <<
"SendMsg";
break;
1112 case ImmTyInterpSlot:
OS <<
"InterpSlot";
break;
1113 case ImmTyInterpAttr:
OS <<
"InterpAttr";
break;
1114 case ImmTyInterpAttrChan:
OS <<
"InterpAttrChan";
break;
1115 case ImmTyOpSel:
OS <<
"OpSel";
break;
1116 case ImmTyOpSelHi:
OS <<
"OpSelHi";
break;
1117 case ImmTyNegLo:
OS <<
"NegLo";
break;
1118 case ImmTyNegHi:
OS <<
"NegHi";
break;
1119 case ImmTySwizzle:
OS <<
"Swizzle";
break;
1120 case ImmTyGprIdxMode:
OS <<
"GprIdxMode";
break;
1121 case ImmTyHigh:
OS <<
"High";
break;
1122 case ImmTyBLGP:
OS <<
"BLGP";
break;
1123 case ImmTyCBSZ:
OS <<
"CBSZ";
break;
1124 case ImmTyABID:
OS <<
"ABID";
break;
1125 case ImmTyEndpgm:
OS <<
"Endpgm";
break;
1126 case ImmTyWaitVDST:
OS <<
"WaitVDST";
break;
1127 case ImmTyWaitEXP:
OS <<
"WaitEXP";
break;
1128 case ImmTyWaitVAVDst:
OS <<
"WaitVAVDst";
break;
1129 case ImmTyWaitVMVSrc:
OS <<
"WaitVMVSrc";
break;
1130 case ImmTyByteSel:
OS <<
"ByteSel" ;
break;
1138 OS <<
"<register " <<
getReg() <<
" mods: " <<
Reg.Mods <<
'>';
1141 OS <<
'<' << getImm();
1142 if (getImmTy() != ImmTyNone) {
1143 OS <<
" type: "; printImmTy(
OS, getImmTy());
1145 OS <<
" mods: " <<
Imm.Mods <<
'>';
1148 OS <<
'\'' << getToken() <<
'\'';
1151 OS <<
"<expr " << *Expr <<
'>';
1156 static AMDGPUOperand::Ptr CreateImm(
const AMDGPUAsmParser *AsmParser,
1157 int64_t Val,
SMLoc Loc,
1158 ImmTy
Type = ImmTyNone,
1159 bool IsFPImm =
false) {
1160 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1162 Op->Imm.IsFPImm = IsFPImm;
1163 Op->Imm.Kind = ImmKindTyNone;
1165 Op->Imm.Mods = Modifiers();
1171 static AMDGPUOperand::Ptr CreateToken(
const AMDGPUAsmParser *AsmParser,
1173 bool HasExplicitEncodingSize =
true) {
1174 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1175 Res->Tok.Data = Str.data();
1176 Res->Tok.Length = Str.size();
1177 Res->StartLoc = Loc;
1182 static AMDGPUOperand::Ptr CreateReg(
const AMDGPUAsmParser *AsmParser,
1183 unsigned RegNo,
SMLoc S,
1185 auto Op = std::make_unique<AMDGPUOperand>(
Register, AsmParser);
1186 Op->Reg.RegNo = RegNo;
1187 Op->Reg.Mods = Modifiers();
1193 static AMDGPUOperand::Ptr CreateExpr(
const AMDGPUAsmParser *AsmParser,
1195 auto Op = std::make_unique<AMDGPUOperand>(
Expression, AsmParser);
1204 OS <<
"abs:" << Mods.Abs <<
" neg: " << Mods.Neg <<
" sext:" << Mods.Sext;
1215class KernelScopeInfo {
1216 int SgprIndexUnusedMin = -1;
1217 int VgprIndexUnusedMin = -1;
1218 int AgprIndexUnusedMin = -1;
1222 void usesSgprAt(
int i) {
1223 if (i >= SgprIndexUnusedMin) {
1224 SgprIndexUnusedMin = ++i;
1233 void usesVgprAt(
int i) {
1234 if (i >= VgprIndexUnusedMin) {
1235 VgprIndexUnusedMin = ++i;
1240 VgprIndexUnusedMin);
1246 void usesAgprAt(
int i) {
1251 if (i >= AgprIndexUnusedMin) {
1252 AgprIndexUnusedMin = ++i;
1262 VgprIndexUnusedMin);
1269 KernelScopeInfo() =
default;
1275 usesSgprAt(SgprIndexUnusedMin = -1);
1276 usesVgprAt(VgprIndexUnusedMin = -1);
1278 usesAgprAt(AgprIndexUnusedMin = -1);
1282 void usesRegister(RegisterKind RegKind,
unsigned DwordRegIndex,
1283 unsigned RegWidth) {
1286 usesSgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1289 usesAgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1292 usesVgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1303 unsigned ForcedEncodingSize = 0;
1304 bool ForcedDPP =
false;
1305 bool ForcedSDWA =
false;
1306 KernelScopeInfo KernelScope;
1311#define GET_ASSEMBLER_HEADER
1312#include "AMDGPUGenAsmMatcher.inc"
1317 bool ParseAsAbsoluteExpression(
uint32_t &Ret);
1318 bool OutOfRangeError(
SMRange Range);
1334 bool calculateGPRBlocks(
const FeatureBitset &Features,
bool VCCUsed,
1335 bool FlatScrUsed,
bool XNACKUsed,
1336 std::optional<bool> EnableWavefrontSize32,
1337 unsigned NextFreeVGPR,
SMRange VGPRRange,
1338 unsigned NextFreeSGPR,
SMRange SGPRRange,
1339 unsigned &VGPRBlocks,
unsigned &SGPRBlocks);
1340 bool ParseDirectiveAMDGCNTarget();
1341 bool ParseDirectiveAMDHSACodeObjectVersion();
1342 bool ParseDirectiveAMDHSAKernel();
1344 bool ParseDirectiveAMDKernelCodeT();
1347 bool ParseDirectiveAMDGPUHsaKernel();
1349 bool ParseDirectiveISAVersion();
1350 bool ParseDirectiveHSAMetadata();
1351 bool ParseDirectivePALMetadataBegin();
1352 bool ParseDirectivePALMetadata();
1353 bool ParseDirectiveAMDGPULDS();
1357 bool ParseToEndDirective(
const char *AssemblerDirectiveBegin,
1358 const char *AssemblerDirectiveEnd,
1359 std::string &CollectString);
1361 bool AddNextRegisterToList(
unsigned& Reg,
unsigned& RegWidth,
1362 RegisterKind RegKind,
unsigned Reg1,
SMLoc Loc);
1363 bool ParseAMDGPURegister(RegisterKind &RegKind,
unsigned &Reg,
1364 unsigned &RegNum,
unsigned &RegWidth,
1365 bool RestoreOnFailure =
false);
1366 bool ParseAMDGPURegister(RegisterKind &RegKind,
unsigned &Reg,
1367 unsigned &RegNum,
unsigned &RegWidth,
1369 unsigned ParseRegularReg(RegisterKind &RegKind,
unsigned &RegNum,
1372 unsigned ParseSpecialReg(RegisterKind &RegKind,
unsigned &RegNum,
1375 unsigned ParseRegList(RegisterKind &RegKind,
unsigned &RegNum,
1377 bool ParseRegRange(
unsigned& Num,
unsigned& Width);
1378 unsigned getRegularReg(RegisterKind RegKind,
unsigned RegNum,
unsigned SubReg,
1379 unsigned RegWidth,
SMLoc Loc);
1383 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1384 void initializeGprCountSymbol(RegisterKind RegKind);
1385 bool updateGprCountSymbols(RegisterKind RegKind,
unsigned DwordRegIndex,
1391 enum AMDGPUMatchResultTy {
1392 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1395 OperandMode_Default,
1399 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1407 if (getFeatureBits().
none()) {
1439 initializeGprCountSymbol(IS_VGPR);
1440 initializeGprCountSymbol(IS_SGPR);
1513 bool hasInv2PiInlineImm()
const {
1514 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1517 bool hasFlatOffsets()
const {
1518 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1522 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1525 bool hasSGPR102_SGPR103()
const {
1529 bool hasSGPR104_SGPR105()
const {
return isGFX10Plus(); }
1531 bool hasIntClamp()
const {
1532 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1535 bool hasPartialNSAEncoding()
const {
1536 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1568 void setForcedEncodingSize(
unsigned Size) { ForcedEncodingSize =
Size; }
1569 void setForcedDPP(
bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1570 void setForcedSDWA(
bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1572 unsigned getForcedEncodingSize()
const {
return ForcedEncodingSize; }
1573 bool isForcedVOP3()
const {
return ForcedEncodingSize == 64; }
1574 bool isForcedDPP()
const {
return ForcedDPP; }
1575 bool isForcedSDWA()
const {
return ForcedSDWA; }
1577 StringRef getMatchedVariantName()
const;
1579 std::unique_ptr<AMDGPUOperand>
parseRegister(
bool RestoreOnFailure =
false);
1581 bool RestoreOnFailure);
1584 SMLoc &EndLoc)
override;
1587 unsigned Kind)
override;
1591 bool MatchingInlineAsm)
override;
1594 OperandMode Mode = OperandMode_Default);
1602 ParseStatus parseIntWithPrefix(
const char *Prefix, int64_t &
Int);
1606 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1607 std::function<
bool(int64_t &)> ConvertResult =
nullptr);
1611 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1612 bool (*ConvertResult)(int64_t &) =
nullptr);
1616 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1625 bool isOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1626 bool isRegOrOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1627 bool isNamedOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1628 bool isOpcodeModifierWithVal(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1629 bool parseSP3NegModifier();
1631 bool HasLit =
false);
1634 bool HasLit =
false);
1636 bool AllowImm =
true);
1638 bool AllowImm =
true);
1643 AMDGPUOperand::ImmTy ImmTy);
1654 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1659 bool tryParseFmt(
const char *Pref, int64_t MaxVal, int64_t &Val);
1660 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt,
StringRef FormatStr,
SMLoc Loc);
1664 bool parseCnt(int64_t &IntVal);
1667 bool parseDepCtr(int64_t &IntVal,
unsigned &Mask);
1671 bool parseDelay(int64_t &Delay);
1677 struct OperandInfoTy {
1680 bool IsSymbolic =
false;
1681 bool IsDefined =
false;
1683 OperandInfoTy(int64_t Val) : Val(Val) {}
1686 struct StructuredOpField : OperandInfoTy {
1690 bool IsDefined =
false;
1695 virtual ~StructuredOpField() =
default;
1697 bool Error(AMDGPUAsmParser &Parser,
const Twine &Err)
const {
1698 Parser.Error(Loc,
"invalid " +
Desc +
": " + Err);
1702 virtual bool validate(AMDGPUAsmParser &Parser)
const {
1704 return Error(Parser,
"not supported on this GPU");
1706 return Error(Parser,
"only " +
Twine(Width) +
"-bit values are legal");
1714 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &
Op, OperandInfoTy &Stream);
1715 bool validateSendMsg(
const OperandInfoTy &Msg,
1716 const OperandInfoTy &
Op,
1717 const OperandInfoTy &Stream);
1720 OperandInfoTy &Width);
1726 SMLoc getOperandLoc(std::function<
bool(
const AMDGPUOperand&)>
Test,
1731 bool SearchMandatoryLiterals =
false)
const;
1740 bool validateSOPLiteral(
const MCInst &Inst)
const;
1742 bool validateVOPDRegBankConstraints(
const MCInst &Inst,
1744 bool validateIntClampSupported(
const MCInst &Inst);
1745 bool validateMIMGAtomicDMask(
const MCInst &Inst);
1746 bool validateMIMGGatherDMask(
const MCInst &Inst);
1748 bool validateMIMGDataSize(
const MCInst &Inst,
const SMLoc &IDLoc);
1749 bool validateMIMGAddrSize(
const MCInst &Inst,
const SMLoc &IDLoc);
1750 bool validateMIMGD16(
const MCInst &Inst);
1751 bool validateMIMGMSAA(
const MCInst &Inst);
1752 bool validateOpSel(
const MCInst &Inst);
1755 bool validateVccOperand(
unsigned Reg)
const;
1760 bool validateAGPRLdSt(
const MCInst &Inst)
const;
1761 bool validateVGPRAlign(
const MCInst &Inst)
const;
1765 bool validateDivScale(
const MCInst &Inst);
1768 const SMLoc &IDLoc);
1770 const unsigned CPol);
1773 std::optional<StringRef> validateLdsDirect(
const MCInst &Inst);
1774 unsigned getConstantBusLimit(
unsigned Opcode)
const;
1775 bool usesConstantBus(
const MCInst &Inst,
unsigned OpIdx);
1776 bool isInlineConstant(
const MCInst &Inst,
unsigned OpIdx)
const;
1777 unsigned findImplicitSGPRReadInVOP(
const MCInst &Inst)
const;
1803 AsmToken peekToken(
bool ShouldSkipSpace =
true);
1805 SMLoc getLoc()
const;
1809 void onBeginOfFile()
override;
1810 bool parsePrimaryExpr(
const MCExpr *&Res,
SMLoc &EndLoc)
override;
1821 bool parseSwizzleOperand(int64_t &
Op,
1822 const unsigned MinVal,
1823 const unsigned MaxVal,
1826 bool parseSwizzleOperands(
const unsigned OpNum, int64_t*
Op,
1827 const unsigned MinVal,
1828 const unsigned MaxVal,
1831 bool parseSwizzleOffset(int64_t &Imm);
1832 bool parseSwizzleMacro(int64_t &Imm);
1833 bool parseSwizzleQuadPerm(int64_t &Imm);
1834 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1835 bool parseSwizzleBroadcast(int64_t &Imm);
1836 bool parseSwizzleSwap(int64_t &Imm);
1837 bool parseSwizzleReverse(int64_t &Imm);
1840 int64_t parseGPRIdxMacro();
1848 OptionalImmIndexMap &OptionalIdx);
1856 OptionalImmIndexMap &OptionalIdx);
1858 OptionalImmIndexMap &OptionalIdx);
1863 bool parseDimId(
unsigned &Encoding);
1865 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1869 int64_t parseDPPCtrlSel(
StringRef Ctrl);
1870 int64_t parseDPPCtrlPerm();
1876 bool IsDPP8 =
false);
1882 AMDGPUOperand::ImmTy
Type);
1891 bool SkipDstVcc =
false,
1892 bool SkipSrcVcc =
false);
1905 return &APFloat::IEEEsingle();
1907 return &APFloat::IEEEdouble();
1909 return &APFloat::IEEEhalf();
1942 return &APFloat::IEEEsingle();
1948 return &APFloat::IEEEdouble();
1957 return &APFloat::IEEEhalf();
1965 return &APFloat::BFloat();
1980 APFloat::rmNearestTiesToEven,
1983 if (
Status != APFloat::opOK &&
1985 ((
Status & APFloat::opOverflow) != 0 ||
1986 (
Status & APFloat::opUnderflow) != 0)) {
2009bool AMDGPUOperand::isInlinableImm(
MVT type)
const {
2019 if (!isImmTy(ImmTyNone)) {
2030 if (type == MVT::f64 || type == MVT::i64) {
2032 AsmParser->hasInv2PiInlineImm());
2054 APFloat::rmNearestTiesToEven, &Lost);
2061 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2063 AsmParser->hasInv2PiInlineImm());
2068 static_cast<int32_t
>(FPLiteral.bitcastToAPInt().getZExtValue()),
2069 AsmParser->hasInv2PiInlineImm());
2073 if (type == MVT::f64 || type == MVT::i64) {
2075 AsmParser->hasInv2PiInlineImm());
2084 static_cast<int16_t
>(
Literal.getLoBits(16).getSExtValue()),
2085 type, AsmParser->hasInv2PiInlineImm());
2089 static_cast<int32_t
>(
Literal.getLoBits(32).getZExtValue()),
2090 AsmParser->hasInv2PiInlineImm());
2093bool AMDGPUOperand::isLiteralImm(
MVT type)
const {
2095 if (!isImmTy(ImmTyNone)) {
2102 if (type == MVT::f64 && hasFPModifiers()) {
2119 if (type == MVT::f64) {
2124 if (type == MVT::i64) {
2137 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2138 : (type == MVT::v2i16) ? MVT::f32
2139 : (type == MVT::v2f32) ? MVT::f32
2146bool AMDGPUOperand::isRegClass(
unsigned RCID)
const {
2147 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(
getReg());
2150bool AMDGPUOperand::isVRegWithInputMods()
const {
2151 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2153 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2154 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2157template <
bool IsFake16>
bool AMDGPUOperand::isT16VRegWithInputMods()
const {
2158 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2159 : AMDGPU::VGPR_16_Lo128RegClassID);
2162bool AMDGPUOperand::isSDWAOperand(
MVT type)
const {
2163 if (AsmParser->isVI())
2165 else if (AsmParser->isGFX9Plus())
2166 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2171bool AMDGPUOperand::isSDWAFP16Operand()
const {
2172 return isSDWAOperand(MVT::f16);
2175bool AMDGPUOperand::isSDWAFP32Operand()
const {
2176 return isSDWAOperand(MVT::f32);
2179bool AMDGPUOperand::isSDWAInt16Operand()
const {
2180 return isSDWAOperand(MVT::i16);
2183bool AMDGPUOperand::isSDWAInt32Operand()
const {
2184 return isSDWAOperand(MVT::i32);
2187bool AMDGPUOperand::isBoolReg()
const {
2188 auto FB = AsmParser->getFeatureBits();
2189 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) ||
2190 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32()));
2195 assert(isImmTy(ImmTyNone) &&
Imm.Mods.hasFPModifiers());
2210void AMDGPUOperand::addImmOperands(
MCInst &Inst,
unsigned N,
bool ApplyModifiers)
const {
2218 addLiteralImmOperand(Inst,
Imm.Val,
2220 isImmTy(ImmTyNone) &&
Imm.Mods.hasFPModifiers());
2222 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2228void AMDGPUOperand::addLiteralImmOperand(
MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const {
2229 const auto& InstDesc = AsmParser->getMII()->get(Inst.
getOpcode());
2234 if (ApplyModifiers) {
2237 Val = applyInputFPModifiers(Val,
Size);
2241 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2251 AsmParser->hasInv2PiInlineImm())) {
2260 if (
Literal.getLoBits(32) != 0) {
2261 const_cast<AMDGPUAsmParser *
>(AsmParser)->
Warning(Inst.
getLoc(),
2262 "Can't encode literal as exact 64-bit floating-point operand. "
2263 "Low 32-bits will be set to zero");
2264 Val &= 0xffffffff00000000u;
2268 setImmKindLiteral();
2284 if (AsmParser->hasInv2PiInlineImm() &&
Literal == 0x3fc45f306725feed) {
2290 setImmKindLiteral();
2326 APFloat::rmNearestTiesToEven, &lost);
2330 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2333 setImmKindMandatoryLiteral();
2335 setImmKindLiteral();
2366 AsmParser->hasInv2PiInlineImm())) {
2373 setImmKindLiteral();
2391 setImmKindLiteral();
2405 setImmKindLiteral();
2414 AsmParser->hasInv2PiInlineImm())) {
2421 setImmKindLiteral();
2430 AsmParser->hasInv2PiInlineImm())) {
2437 setImmKindLiteral();
2451 AsmParser->hasInv2PiInlineImm()));
2461 AsmParser->hasInv2PiInlineImm()));
2469 setImmKindMandatoryLiteral();
2473 setImmKindMandatoryLiteral();
2480void AMDGPUOperand::addRegOperands(
MCInst &Inst,
unsigned N)
const {
2484bool AMDGPUOperand::isInlineValue()
const {
2493 if (Is == IS_VGPR) {
2497 return AMDGPU::VGPR_32RegClassID;
2499 return AMDGPU::VReg_64RegClassID;
2501 return AMDGPU::VReg_96RegClassID;
2503 return AMDGPU::VReg_128RegClassID;
2505 return AMDGPU::VReg_160RegClassID;
2507 return AMDGPU::VReg_192RegClassID;
2509 return AMDGPU::VReg_224RegClassID;
2511 return AMDGPU::VReg_256RegClassID;
2513 return AMDGPU::VReg_288RegClassID;
2515 return AMDGPU::VReg_320RegClassID;
2517 return AMDGPU::VReg_352RegClassID;
2519 return AMDGPU::VReg_384RegClassID;
2521 return AMDGPU::VReg_512RegClassID;
2523 return AMDGPU::VReg_1024RegClassID;
2525 }
else if (Is == IS_TTMP) {
2529 return AMDGPU::TTMP_32RegClassID;
2531 return AMDGPU::TTMP_64RegClassID;
2533 return AMDGPU::TTMP_128RegClassID;
2535 return AMDGPU::TTMP_256RegClassID;
2537 return AMDGPU::TTMP_512RegClassID;
2539 }
else if (Is == IS_SGPR) {
2543 return AMDGPU::SGPR_32RegClassID;
2545 return AMDGPU::SGPR_64RegClassID;
2547 return AMDGPU::SGPR_96RegClassID;
2549 return AMDGPU::SGPR_128RegClassID;
2551 return AMDGPU::SGPR_160RegClassID;
2553 return AMDGPU::SGPR_192RegClassID;
2555 return AMDGPU::SGPR_224RegClassID;
2557 return AMDGPU::SGPR_256RegClassID;
2559 return AMDGPU::SGPR_288RegClassID;
2561 return AMDGPU::SGPR_320RegClassID;
2563 return AMDGPU::SGPR_352RegClassID;
2565 return AMDGPU::SGPR_384RegClassID;
2567 return AMDGPU::SGPR_512RegClassID;
2569 }
else if (Is == IS_AGPR) {
2573 return AMDGPU::AGPR_32RegClassID;
2575 return AMDGPU::AReg_64RegClassID;
2577 return AMDGPU::AReg_96RegClassID;
2579 return AMDGPU::AReg_128RegClassID;
2581 return AMDGPU::AReg_160RegClassID;
2583 return AMDGPU::AReg_192RegClassID;
2585 return AMDGPU::AReg_224RegClassID;
2587 return AMDGPU::AReg_256RegClassID;
2589 return AMDGPU::AReg_288RegClassID;
2591 return AMDGPU::AReg_320RegClassID;
2593 return AMDGPU::AReg_352RegClassID;
2595 return AMDGPU::AReg_384RegClassID;
2597 return AMDGPU::AReg_512RegClassID;
2599 return AMDGPU::AReg_1024RegClassID;
2607 .
Case(
"exec", AMDGPU::EXEC)
2608 .
Case(
"vcc", AMDGPU::VCC)
2609 .
Case(
"flat_scratch", AMDGPU::FLAT_SCR)
2610 .
Case(
"xnack_mask", AMDGPU::XNACK_MASK)
2611 .
Case(
"shared_base", AMDGPU::SRC_SHARED_BASE)
2612 .
Case(
"src_shared_base", AMDGPU::SRC_SHARED_BASE)
2613 .
Case(
"shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2614 .
Case(
"src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2615 .
Case(
"private_base", AMDGPU::SRC_PRIVATE_BASE)
2616 .
Case(
"src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2617 .
Case(
"private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2618 .
Case(
"src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2619 .
Case(
"pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2620 .
Case(
"src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2621 .
Case(
"lds_direct", AMDGPU::LDS_DIRECT)
2622 .
Case(
"src_lds_direct", AMDGPU::LDS_DIRECT)
2623 .
Case(
"m0", AMDGPU::M0)
2624 .
Case(
"vccz", AMDGPU::SRC_VCCZ)
2625 .
Case(
"src_vccz", AMDGPU::SRC_VCCZ)
2626 .
Case(
"execz", AMDGPU::SRC_EXECZ)
2627 .
Case(
"src_execz", AMDGPU::SRC_EXECZ)
2628 .
Case(
"scc", AMDGPU::SRC_SCC)
2629 .
Case(
"src_scc", AMDGPU::SRC_SCC)
2630 .
Case(
"tba", AMDGPU::TBA)
2631 .
Case(
"tma", AMDGPU::TMA)
2632 .
Case(
"flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2633 .
Case(
"flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2634 .
Case(
"xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2635 .
Case(
"xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2636 .
Case(
"vcc_lo", AMDGPU::VCC_LO)
2637 .
Case(
"vcc_hi", AMDGPU::VCC_HI)
2638 .
Case(
"exec_lo", AMDGPU::EXEC_LO)
2639 .
Case(
"exec_hi", AMDGPU::EXEC_HI)
2640 .
Case(
"tma_lo", AMDGPU::TMA_LO)
2641 .
Case(
"tma_hi", AMDGPU::TMA_HI)
2642 .
Case(
"tba_lo", AMDGPU::TBA_LO)
2643 .
Case(
"tba_hi", AMDGPU::TBA_HI)
2644 .
Case(
"pc", AMDGPU::PC_REG)
2645 .
Case(
"null", AMDGPU::SGPR_NULL)
2649bool AMDGPUAsmParser::ParseRegister(
MCRegister &RegNo,
SMLoc &StartLoc,
2650 SMLoc &EndLoc,
bool RestoreOnFailure) {
2651 auto R = parseRegister();
2652 if (!R)
return true;
2654 RegNo =
R->getReg();
2655 StartLoc =
R->getStartLoc();
2656 EndLoc =
R->getEndLoc();
2662 return ParseRegister(Reg, StartLoc, EndLoc,
false);
2667 bool Result = ParseRegister(Reg, StartLoc, EndLoc,
true);
2668 bool PendingErrors = getParser().hasPendingError();
2669 getParser().clearPendingErrors();
2677bool AMDGPUAsmParser::AddNextRegisterToList(
unsigned &Reg,
unsigned &RegWidth,
2678 RegisterKind RegKind,
unsigned Reg1,
2682 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2687 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2688 Reg = AMDGPU::FLAT_SCR;
2692 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2693 Reg = AMDGPU::XNACK_MASK;
2697 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2702 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2707 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2712 Error(Loc,
"register does not fit in the list");
2718 if (Reg1 != Reg + RegWidth / 32) {
2719 Error(Loc,
"registers in a list must have consecutive indices");
2737 {{
"ttmp"}, IS_TTMP},
2743 return Kind == IS_VGPR ||
2751 if (Str.starts_with(Reg.Name))
2757 return !Str.getAsInteger(10, Num);
2761AMDGPUAsmParser::isRegister(
const AsmToken &Token,
2778 if (!RegSuffix.
empty()) {
2796AMDGPUAsmParser::isRegister()
2798 return isRegister(getToken(), peekToken());
2801unsigned AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
unsigned RegNum,
2802 unsigned SubReg,
unsigned RegWidth,
2806 unsigned AlignSize = 1;
2807 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2813 if (RegNum % AlignSize != 0) {
2814 Error(Loc,
"invalid register alignment");
2815 return AMDGPU::NoRegister;
2818 unsigned RegIdx = RegNum / AlignSize;
2821 Error(Loc,
"invalid or unsupported register size");
2822 return AMDGPU::NoRegister;
2828 Error(Loc,
"register index is out of range");
2829 return AMDGPU::NoRegister;
2839 assert(Reg &&
"Invalid subregister!");
2845bool AMDGPUAsmParser::ParseRegRange(
unsigned &Num,
unsigned &RegWidth) {
2846 int64_t RegLo, RegHi;
2850 SMLoc FirstIdxLoc = getLoc();
2853 if (!parseExpr(RegLo))
2857 SecondIdxLoc = getLoc();
2858 if (!parseExpr(RegHi))
2867 if (!isUInt<32>(RegLo)) {
2868 Error(FirstIdxLoc,
"invalid register index");
2872 if (!isUInt<32>(RegHi)) {
2873 Error(SecondIdxLoc,
"invalid register index");
2877 if (RegLo > RegHi) {
2878 Error(FirstIdxLoc,
"first register index should not exceed second index");
2882 Num =
static_cast<unsigned>(RegLo);
2883 RegWidth = 32 * ((RegHi - RegLo) + 1);
2887unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2888 unsigned &RegNum,
unsigned &RegWidth,
2895 RegKind = IS_SPECIAL;
2902unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2903 unsigned &RegNum,
unsigned &RegWidth,
2907 auto Loc = getLoc();
2911 Error(Loc,
"invalid register name");
2912 return AMDGPU::NoRegister;
2920 unsigned SubReg = NoSubRegister;
2921 if (!RegSuffix.
empty()) {
2933 Error(Loc,
"invalid register index");
2934 return AMDGPU::NoRegister;
2939 if (!ParseRegRange(RegNum, RegWidth))
2940 return AMDGPU::NoRegister;
2943 return getRegularReg(RegKind, RegNum,
SubReg, RegWidth, Loc);
2946unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
unsigned &RegNum,
2949 unsigned Reg = AMDGPU::NoRegister;
2950 auto ListLoc = getLoc();
2953 "expected a register or a list of registers")) {
2954 return AMDGPU::NoRegister;
2959 auto Loc = getLoc();
2960 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2961 return AMDGPU::NoRegister;
2962 if (RegWidth != 32) {
2963 Error(Loc,
"expected a single 32-bit register");
2964 return AMDGPU::NoRegister;
2968 RegisterKind NextRegKind;
2969 unsigned NextReg, NextRegNum, NextRegWidth;
2972 if (!ParseAMDGPURegister(NextRegKind, NextReg,
2973 NextRegNum, NextRegWidth,
2975 return AMDGPU::NoRegister;
2977 if (NextRegWidth != 32) {
2978 Error(Loc,
"expected a single 32-bit register");
2979 return AMDGPU::NoRegister;
2981 if (NextRegKind != RegKind) {
2982 Error(Loc,
"registers in a list must be of the same kind");
2983 return AMDGPU::NoRegister;
2985 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2986 return AMDGPU::NoRegister;
2990 "expected a comma or a closing square bracket")) {
2991 return AMDGPU::NoRegister;
2995 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3000bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
unsigned &Reg,
3001 unsigned &RegNum,
unsigned &RegWidth,
3003 auto Loc = getLoc();
3004 Reg = AMDGPU::NoRegister;
3007 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3008 if (Reg == AMDGPU::NoRegister)
3009 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3011 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3015 if (Reg == AMDGPU::NoRegister) {
3016 assert(Parser.hasPendingError());
3020 if (!subtargetHasRegister(*
TRI, Reg)) {
3021 if (Reg == AMDGPU::SGPR_NULL) {
3022 Error(Loc,
"'null' operand is not supported on this GPU");
3024 Error(Loc,
"register not available on this GPU");
3032bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
unsigned &Reg,
3033 unsigned &RegNum,
unsigned &RegWidth,
3034 bool RestoreOnFailure ) {
3035 Reg = AMDGPU::NoRegister;
3038 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3039 if (RestoreOnFailure) {
3040 while (!Tokens.
empty()) {
3049std::optional<StringRef>
3050AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3053 return StringRef(
".amdgcn.next_free_vgpr");
3055 return StringRef(
".amdgcn.next_free_sgpr");
3057 return std::nullopt;
3061void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3062 auto SymbolName = getGprCountSymbolName(RegKind);
3063 assert(SymbolName &&
"initializing invalid register kind");
3064 MCSymbol *
Sym = getContext().getOrCreateSymbol(*SymbolName);
3068bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3069 unsigned DwordRegIndex,
3070 unsigned RegWidth) {
3075 auto SymbolName = getGprCountSymbolName(RegKind);
3078 MCSymbol *
Sym = getContext().getOrCreateSymbol(*SymbolName);
3080 int64_t NewMax = DwordRegIndex +
divideCeil(RegWidth, 32) - 1;
3083 if (!
Sym->isVariable())
3084 return !
Error(getLoc(),
3085 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3086 if (!
Sym->getVariableValue(
false)->evaluateAsAbsolute(OldCount))
3089 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3091 if (OldCount <= NewMax)
3097std::unique_ptr<AMDGPUOperand>
3098AMDGPUAsmParser::parseRegister(
bool RestoreOnFailure) {
3099 const auto &Tok = getToken();
3100 SMLoc StartLoc = Tok.getLoc();
3101 SMLoc EndLoc = Tok.getEndLoc();
3102 RegisterKind RegKind;
3103 unsigned Reg, RegNum, RegWidth;
3105 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3109 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3112 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3113 return AMDGPUOperand::CreateReg(
this, Reg, StartLoc, EndLoc);
3117 bool HasSP3AbsModifier,
bool HasLit) {
3125 HasLit = trySkipId(
"lit");
3137 const auto& Tok = getToken();
3138 const auto& NextTok = peekToken();
3141 bool Negate =
false;
3149 AMDGPUOperand::Modifiers Mods;
3160 APFloat RealVal(APFloat::IEEEdouble());
3161 auto roundMode = APFloat::rmNearestTiesToEven;
3162 if (
errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3165 RealVal.changeSign();
3168 AMDGPUOperand::CreateImm(
this, RealVal.bitcastToAPInt().getZExtValue(), S,
3169 AMDGPUOperand::ImmTyNone,
true));
3170 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3171 Op.setModifiers(Mods);
3180 if (HasSP3AbsModifier) {
3189 if (getParser().parsePrimaryExpr(Expr, EndLoc,
nullptr))
3192 if (Parser.parseExpression(Expr))
3196 if (Expr->evaluateAsAbsolute(IntVal)) {
3197 Operands.push_back(AMDGPUOperand::CreateImm(
this, IntVal, S));
3198 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3199 Op.setModifiers(Mods);
3203 Operands.push_back(AMDGPUOperand::CreateExpr(
this, Expr, S));
3216 if (
auto R = parseRegister()) {
3225 bool HasSP3AbsMod,
bool HasLit) {
3231 return parseImm(
Operands, HasSP3AbsMod, HasLit);
3235AMDGPUAsmParser::isNamedOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3238 return str ==
"abs" || str ==
"neg" || str ==
"sext";
3244AMDGPUAsmParser::isOpcodeModifierWithVal(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3249AMDGPUAsmParser::isOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3250 return isNamedOperandModifier(Token, NextToken) || Token.
is(
AsmToken::Pipe);
3254AMDGPUAsmParser::isRegOrOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3255 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3272AMDGPUAsmParser::isModifier() {
3276 peekTokens(NextToken);
3278 return isOperandModifier(Tok, NextToken[0]) ||
3279 (Tok.
is(
AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3280 isOpcodeModifierWithVal(Tok, NextToken[0]);
3306AMDGPUAsmParser::parseSP3NegModifier() {
3309 peekTokens(NextToken);
3312 (isRegister(NextToken[0], NextToken[1]) ||
3314 isId(NextToken[0],
"abs"))) {
3332 return Error(getLoc(),
"invalid syntax, expected 'neg' modifier");
3334 SP3Neg = parseSP3NegModifier();
3337 Neg = trySkipId(
"neg");
3339 return Error(Loc,
"expected register or immediate");
3343 Abs = trySkipId(
"abs");
3347 Lit = trySkipId(
"lit");
3354 return Error(Loc,
"expected register or immediate");
3358 Res = parseRegOrImm(
Operands, SP3Abs, Lit);
3365 if (Lit && !
Operands.back()->isImm())
3366 Error(Loc,
"expected immediate with lit modifier");
3368 if (SP3Abs && !skipToken(
AsmToken::Pipe,
"expected vertical bar"))
3377 AMDGPUOperand::Modifiers Mods;
3378 Mods.Abs = Abs || SP3Abs;
3379 Mods.Neg = Neg || SP3Neg;
3382 if (Mods.hasFPModifiers() || Lit) {
3383 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3385 return Error(
Op.getStartLoc(),
"expected an absolute expression");
3386 Op.setModifiers(Mods);
3394 bool Sext = trySkipId(
"sext");
3395 if (Sext && !skipToken(
AsmToken::LParen,
"expected left paren after sext"))
3410 AMDGPUOperand::Modifiers Mods;
3413 if (Mods.hasIntModifiers()) {
3414 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3416 return Error(
Op.getStartLoc(),
"expected an absolute expression");
3417 Op.setModifiers(Mods);
3424 return parseRegOrImmWithFPInputMods(
Operands,
false);
3428 return parseRegOrImmWithIntInputMods(
Operands,
false);
3432 auto Loc = getLoc();
3433 if (trySkipId(
"off")) {
3434 Operands.push_back(AMDGPUOperand::CreateImm(
this, 0, Loc,
3435 AMDGPUOperand::ImmTyOff,
false));
3442 std::unique_ptr<AMDGPUOperand>
Reg = parseRegister();
3444 Operands.push_back(std::move(Reg));
3451unsigned AMDGPUAsmParser::checkTargetMatchPredicate(
MCInst &Inst) {
3458 return Match_InvalidOperand;
3460 if (Inst.
getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3461 Inst.
getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3466 if (!
Op.isImm() ||
Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3467 return Match_InvalidOperand;
3471 return Match_Success;
3475 static const unsigned Variants[] = {
3486 if (isForcedDPP() && isForcedVOP3()) {
3490 if (getForcedEncodingSize() == 32) {
3495 if (isForcedVOP3()) {
3500 if (isForcedSDWA()) {
3506 if (isForcedDPP()) {
3514StringRef AMDGPUAsmParser::getMatchedVariantName()
const {
3515 if (isForcedDPP() && isForcedVOP3())
3518 if (getForcedEncodingSize() == 32)
3533unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(
const MCInst &Inst)
const {
3537 case AMDGPU::FLAT_SCR:
3539 case AMDGPU::VCC_LO:
3540 case AMDGPU::VCC_HI:
3547 return AMDGPU::NoRegister;
3554bool AMDGPUAsmParser::isInlineConstant(
const MCInst &Inst,
3555 unsigned OpIdx)
const {
3565 int64_t Val = MO.
getImm();
3614unsigned AMDGPUAsmParser::getConstantBusLimit(
unsigned Opcode)
const {
3620 case AMDGPU::V_LSHLREV_B64_e64:
3621 case AMDGPU::V_LSHLREV_B64_gfx10:
3622 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3623 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3624 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3625 case AMDGPU::V_LSHRREV_B64_e64:
3626 case AMDGPU::V_LSHRREV_B64_gfx10:
3627 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3628 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3629 case AMDGPU::V_ASHRREV_I64_e64:
3630 case AMDGPU::V_ASHRREV_I64_gfx10:
3631 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3632 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3633 case AMDGPU::V_LSHL_B64_e64:
3634 case AMDGPU::V_LSHR_B64_e64:
3635 case AMDGPU::V_ASHR_I64_e64:
3648 bool AddMandatoryLiterals =
false) {
3654 int16_t ImmDeferredIdx =
3671bool AMDGPUAsmParser::usesConstantBus(
const MCInst &Inst,
unsigned OpIdx) {
3674 return !isInlineConstant(Inst, OpIdx);
3675 }
else if (MO.
isReg()) {
3682 return isSGPR(PReg,
TRI) && PReg != SGPR_NULL;
3694 const unsigned Opcode = Inst.
getOpcode();
3695 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3698 if (!LaneSelOp.
isReg())
3701 return LaneSelReg ==
M0 || LaneSelReg == M0_gfxpre11;
3704bool AMDGPUAsmParser::validateConstantBusLimitations(
3706 const unsigned Opcode = Inst.
getOpcode();
3708 unsigned LastSGPR = AMDGPU::NoRegister;
3709 unsigned ConstantBusUseCount = 0;
3710 unsigned NumLiterals = 0;
3711 unsigned LiteralSize;
3713 if (!(
Desc.TSFlags &
3729 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3730 if (SGPRUsed != AMDGPU::NoRegister) {
3731 SGPRsUsed.
insert(SGPRUsed);
3732 ++ConstantBusUseCount;
3737 for (
int OpIdx : OpIndices) {
3742 if (usesConstantBus(Inst, OpIdx)) {
3751 if (SGPRsUsed.
insert(LastSGPR).second) {
3752 ++ConstantBusUseCount;
3772 if (NumLiterals == 0) {
3775 }
else if (LiteralSize !=
Size) {
3781 ConstantBusUseCount += NumLiterals;
3783 if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3789 Error(Loc,
"invalid operand (violates constant bus restrictions)");
3793bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3796 const unsigned Opcode = Inst.
getOpcode();
3802 auto getVRegIdx = [&](
unsigned,
unsigned OperandIdx) {
3810 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
3813 auto InvalidCompOprIdx =
3814 InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc);
3815 if (!InvalidCompOprIdx)
3818 auto CompOprIdx = *InvalidCompOprIdx;
3820 std::max(InstInfo[
VOPD::X].getIndexInParsedOperands(CompOprIdx),
3821 InstInfo[
VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3824 auto Loc = ((AMDGPUOperand &)*
Operands[ParsedIdx]).getStartLoc();
3825 if (CompOprIdx == VOPD::Component::DST) {
3826 Error(Loc,
"one dst register must be even and the other odd");
3828 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3830 " operands must use different VGPR banks");
3836bool AMDGPUAsmParser::validateIntClampSupported(
const MCInst &Inst) {
3853bool AMDGPUAsmParser::validateMIMGDataSize(
const MCInst &Inst,
3854 const SMLoc &IDLoc) {
3872 unsigned TFESize = (TFEIdx != -1 && Inst.
getOperand(TFEIdx).
getImm()) ? 1 : 0;
3877 bool IsPackedD16 =
false;
3882 IsPackedD16 = D16Idx >= 0;
3884 DataSize = (DataSize + 1) / 2;
3887 if ((VDataSize / 4) == DataSize + TFESize)
3892 Modifiers = IsPackedD16 ?
"dmask and d16" :
"dmask";
3894 Modifiers = IsPackedD16 ?
"dmask, d16 and tfe" :
"dmask and tfe";
3896 Error(IDLoc,
Twine(
"image data size does not match ") + Modifiers);
3900bool AMDGPUAsmParser::validateMIMGAddrSize(
const MCInst &Inst,
3901 const SMLoc &IDLoc) {
3914 : AMDGPU::OpName::rsrc;
3921 assert(SrsrcIdx > VAddr0Idx);
3924 if (BaseOpcode->
BVH) {
3925 if (IsA16 == BaseOpcode->
A16)
3927 Error(IDLoc,
"image address size does not match a16");
3933 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3934 unsigned ActualAddrSize =
3935 IsNSA ? SrsrcIdx - VAddr0Idx
3938 unsigned ExpectedAddrSize =
3942 if (hasPartialNSAEncoding() &&
3945 int VAddrLastIdx = SrsrcIdx - 1;
3946 unsigned VAddrLastSize =
3949 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
3952 if (ExpectedAddrSize > 12)
3953 ExpectedAddrSize = 16;
3958 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3962 if (ActualAddrSize == ExpectedAddrSize)
3965 Error(IDLoc,
"image address size does not match dim and a16");
3969bool AMDGPUAsmParser::validateMIMGAtomicDMask(
const MCInst &Inst) {
3976 if (!
Desc.mayLoad() || !
Desc.mayStore())
3986 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3989bool AMDGPUAsmParser::validateMIMGGatherDMask(
const MCInst &Inst) {
4005 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4008bool AMDGPUAsmParser::validateMIMGMSAA(
const MCInst &Inst) {
4019 if (!BaseOpcode->
MSAA)
4028 return DimInfo->
MSAA;
4034 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4035 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4036 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4046bool AMDGPUAsmParser::validateMovrels(
const MCInst &Inst,
4070 Error(ErrLoc,
"source operand must be a VGPR");
4074bool AMDGPUAsmParser::validateMAIAccWrite(
const MCInst &Inst,
4079 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4093 "source operand must be either a VGPR or an inline constant");
4100bool AMDGPUAsmParser::validateMAISrc2(
const MCInst &Inst,
4106 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4113 if (Inst.
getOperand(Src2Idx).
isImm() && isInlineConstant(Inst, Src2Idx)) {
4115 "inline constants are not allowed for this operand");
4122bool AMDGPUAsmParser::validateMFMA(
const MCInst &Inst,
4140 if (Src2Reg == DstReg)
4144 if (
TRI->getRegClass(
Desc.operands()[0].RegClass).getSizeInBits() <= 128)
4147 if (
TRI->regsOverlap(Src2Reg, DstReg)) {
4149 "source 2 operand must not partially overlap with dst");
4156bool AMDGPUAsmParser::validateDivScale(
const MCInst &Inst) {
4160 case V_DIV_SCALE_F32_gfx6_gfx7:
4161 case V_DIV_SCALE_F32_vi:
4162 case V_DIV_SCALE_F32_gfx10:
4163 case V_DIV_SCALE_F64_gfx6_gfx7:
4164 case V_DIV_SCALE_F64_vi:
4165 case V_DIV_SCALE_F64_gfx10:
4171 for (
auto Name : {AMDGPU::OpName::src0_modifiers,
4172 AMDGPU::OpName::src2_modifiers,
4173 AMDGPU::OpName::src2_modifiers}) {
4184bool AMDGPUAsmParser::validateMIMGD16(
const MCInst &Inst) {
4204 case AMDGPU::V_SUBREV_F32_e32:
4205 case AMDGPU::V_SUBREV_F32_e64:
4206 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4207 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4208 case AMDGPU::V_SUBREV_F32_e32_vi:
4209 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4210 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4211 case AMDGPU::V_SUBREV_F32_e64_vi:
4213 case AMDGPU::V_SUBREV_CO_U32_e32:
4214 case AMDGPU::V_SUBREV_CO_U32_e64:
4215 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4216 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4218 case AMDGPU::V_SUBBREV_U32_e32:
4219 case AMDGPU::V_SUBBREV_U32_e64:
4220 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4221 case AMDGPU::V_SUBBREV_U32_e32_vi:
4222 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4223 case AMDGPU::V_SUBBREV_U32_e64_vi:
4225 case AMDGPU::V_SUBREV_U32_e32:
4226 case AMDGPU::V_SUBREV_U32_e64:
4227 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4228 case AMDGPU::V_SUBREV_U32_e32_vi:
4229 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4230 case AMDGPU::V_SUBREV_U32_e64_vi:
4232 case AMDGPU::V_SUBREV_F16_e32:
4233 case AMDGPU::V_SUBREV_F16_e64:
4234 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4235 case AMDGPU::V_SUBREV_F16_e32_vi:
4236 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4237 case AMDGPU::V_SUBREV_F16_e64_vi:
4239 case AMDGPU::V_SUBREV_U16_e32:
4240 case AMDGPU::V_SUBREV_U16_e64:
4241 case AMDGPU::V_SUBREV_U16_e32_vi:
4242 case AMDGPU::V_SUBREV_U16_e64_vi:
4244 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4245 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4246 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4248 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4249 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4251 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4252 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4254 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4255 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4257 case AMDGPU::V_LSHRREV_B32_e32:
4258 case AMDGPU::V_LSHRREV_B32_e64:
4259 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4260 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4261 case AMDGPU::V_LSHRREV_B32_e32_vi:
4262 case AMDGPU::V_LSHRREV_B32_e64_vi:
4263 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4264 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4266 case AMDGPU::V_ASHRREV_I32_e32:
4267 case AMDGPU::V_ASHRREV_I32_e64:
4268 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4269 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4270 case AMDGPU::V_ASHRREV_I32_e32_vi:
4271 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4272 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4273 case AMDGPU::V_ASHRREV_I32_e64_vi:
4275 case AMDGPU::V_LSHLREV_B32_e32:
4276 case AMDGPU::V_LSHLREV_B32_e64:
4277 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4278 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4279 case AMDGPU::V_LSHLREV_B32_e32_vi:
4280 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4281 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4282 case AMDGPU::V_LSHLREV_B32_e64_vi:
4284 case AMDGPU::V_LSHLREV_B16_e32:
4285 case AMDGPU::V_LSHLREV_B16_e64:
4286 case AMDGPU::V_LSHLREV_B16_e32_vi:
4287 case AMDGPU::V_LSHLREV_B16_e64_vi:
4288 case AMDGPU::V_LSHLREV_B16_gfx10:
4290 case AMDGPU::V_LSHRREV_B16_e32:
4291 case AMDGPU::V_LSHRREV_B16_e64:
4292 case AMDGPU::V_LSHRREV_B16_e32_vi:
4293 case AMDGPU::V_LSHRREV_B16_e64_vi:
4294 case AMDGPU::V_LSHRREV_B16_gfx10:
4296 case AMDGPU::V_ASHRREV_I16_e32:
4297 case AMDGPU::V_ASHRREV_I16_e64:
4298 case AMDGPU::V_ASHRREV_I16_e32_vi:
4299 case AMDGPU::V_ASHRREV_I16_e64_vi:
4300 case AMDGPU::V_ASHRREV_I16_gfx10:
4302 case AMDGPU::V_LSHLREV_B64_e64:
4303 case AMDGPU::V_LSHLREV_B64_gfx10:
4304 case AMDGPU::V_LSHLREV_B64_vi:
4306 case AMDGPU::V_LSHRREV_B64_e64:
4307 case AMDGPU::V_LSHRREV_B64_gfx10:
4308 case AMDGPU::V_LSHRREV_B64_vi:
4310 case AMDGPU::V_ASHRREV_I64_e64:
4311 case AMDGPU::V_ASHRREV_I64_gfx10:
4312 case AMDGPU::V_ASHRREV_I64_vi:
4314 case AMDGPU::V_PK_LSHLREV_B16:
4315 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4316 case AMDGPU::V_PK_LSHLREV_B16_vi:
4318 case AMDGPU::V_PK_LSHRREV_B16:
4319 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4320 case AMDGPU::V_PK_LSHRREV_B16_vi:
4321 case AMDGPU::V_PK_ASHRREV_I16:
4322 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4323 case AMDGPU::V_PK_ASHRREV_I16_vi:
4330std::optional<StringRef>
4331AMDGPUAsmParser::validateLdsDirect(
const MCInst &Inst) {
4333 using namespace SIInstrFlags;
4334 const unsigned Opcode = Inst.
getOpcode();
4340 if ((
Desc.TSFlags & Enc) == 0)
4341 return std::nullopt;
4343 for (
auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4348 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4351 return StringRef(
"lds_direct is not supported on this GPU");
4354 return StringRef(
"lds_direct cannot be used with this instruction");
4356 if (SrcName != OpName::src0)
4357 return StringRef(
"lds_direct may be used as src0 only");
4361 return std::nullopt;
4365 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
4366 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4367 if (
Op.isFlatOffset())
4368 return Op.getStartLoc();
4373bool AMDGPUAsmParser::validateOffset(
const MCInst &Inst,
4382 return validateFlatOffset(Inst,
Operands);
4385 return validateSMEMOffset(Inst,
Operands);
4390 const unsigned OffsetSize = 24;
4391 if (!
isIntN(OffsetSize,
Op.getImm())) {
4393 Twine(
"expected a ") +
Twine(OffsetSize) +
"-bit signed offset");
4397 const unsigned OffsetSize = 16;
4398 if (!
isUIntN(OffsetSize,
Op.getImm())) {
4400 Twine(
"expected a ") +
Twine(OffsetSize) +
"-bit unsigned offset");
4407bool AMDGPUAsmParser::validateFlatOffset(
const MCInst &Inst,
4418 if (!hasFlatOffsets() &&
Op.getImm() != 0) {
4420 "flat offset modifier is not supported on this GPU");
4427 bool AllowNegative =
4430 if (!
isIntN(OffsetSize,
Op.getImm()) || (!AllowNegative &&
Op.getImm() < 0)) {
4432 Twine(
"expected a ") +
4433 (AllowNegative ?
Twine(OffsetSize) +
"-bit signed offset"
4434 :
Twine(OffsetSize - 1) +
"-bit unsigned offset"));
4443 for (
unsigned i = 2, e =
Operands.size(); i != e; ++i) {
4444 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4445 if (
Op.isSMEMOffset() ||
Op.isSMEMOffsetMod())
4446 return Op.getStartLoc();
4451bool AMDGPUAsmParser::validateSMEMOffset(
const MCInst &Inst,
4477 : (
isVI() || IsBuffer) ?
"expected a 20-bit unsigned offset"
4478 :
"expected a 21-bit signed offset");
4483bool AMDGPUAsmParser::validateSOPLiteral(
const MCInst &Inst)
const {
4492 const int OpIndices[] = { Src0Idx, Src1Idx };
4494 unsigned NumExprs = 0;
4495 unsigned NumLiterals = 0;
4498 for (
int OpIdx : OpIndices) {
4499 if (OpIdx == -1)
break;
4504 if (MO.
isImm() && !isInlineConstant(Inst, OpIdx)) {
4506 if (NumLiterals == 0 || LiteralValue !=
Value) {
4510 }
else if (MO.
isExpr()) {
4516 return NumLiterals + NumExprs <= 1;
4519bool AMDGPUAsmParser::validateOpSel(
const MCInst &Inst) {
4533 if (OpSelIdx != -1) {
4538 if (OpSelHiIdx != -1) {
4556bool AMDGPUAsmParser::validateNeg(
const MCInst &Inst,
int OpName) {
4581 int SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
4582 AMDGPU::OpName::src1_modifiers,
4583 AMDGPU::OpName::src2_modifiers};
4585 for (
unsigned i = 0; i < 3; ++i) {
4595bool AMDGPUAsmParser::validateDPP(
const MCInst &Inst,
4599 if (DppCtrlIdx >= 0) {
4606 Error(S,
"DP ALU dpp only supports row_newbcast");
4612 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
4622 Error(S,
"invalid operand for instruction");
4627 "src1 immediate operand invalid for instruction");
4637bool AMDGPUAsmParser::validateVccOperand(
unsigned Reg)
const {
4638 auto FB = getFeatureBits();
4639 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4640 (FB[AMDGPU::FeatureWavefrontSize32] &&
Reg == AMDGPU::VCC_LO);
4644bool AMDGPUAsmParser::validateVOPLiteral(
const MCInst &Inst,
4650 !HasMandatoryLiteral && !
isVOPD(Opcode))
4655 unsigned NumExprs = 0;
4656 unsigned NumLiterals = 0;
4659 for (
int OpIdx : OpIndices) {
4669 if (MO.
isImm() && !isInlineConstant(Inst, OpIdx)) {
4675 if (!IsValid32Op && !isInt<32>(
Value) && !isUInt<32>(
Value)) {
4676 Error(getLitLoc(
Operands),
"invalid operand for instruction");
4680 if (IsFP64 && IsValid32Op)
4683 if (NumLiterals == 0 || LiteralValue !=
Value) {
4687 }
else if (MO.
isExpr()) {
4691 NumLiterals += NumExprs;
4696 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4697 Error(getLitLoc(
Operands),
"literal operands are not supported");
4701 if (NumLiterals > 1) {
4702 Error(getLitLoc(
Operands,
true),
"only one unique literal operand is allowed");
4720 unsigned Sub =
MRI->getSubReg(
Op.getReg(), AMDGPU::sub0);
4721 auto Reg = Sub ? Sub :
Op.getReg();
4723 return AGPR32.
contains(Reg) ? 1 : 0;
4726bool AMDGPUAsmParser::validateAGPRLdSt(
const MCInst &Inst)
const {
4734 : AMDGPU::OpName::vdata;
4742 if (Data2Areg >= 0 && Data2Areg != DataAreg)
4746 auto FB = getFeatureBits();
4747 if (FB[AMDGPU::FeatureGFX90AInsts]) {
4748 if (DataAreg < 0 || DstAreg < 0)
4750 return DstAreg == DataAreg;
4753 return DstAreg < 1 && DataAreg < 1;
4756bool AMDGPUAsmParser::validateVGPRAlign(
const MCInst &Inst)
const {
4757 auto FB = getFeatureBits();
4758 if (!FB[AMDGPU::FeatureGFX90AInsts])
4769 unsigned Sub =
MRI->getSubReg(
Op.getReg(), AMDGPU::sub0);
4773 if (VGPR32.
contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4775 if (AGPR32.
contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4783 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
4784 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4786 return Op.getStartLoc();
4791bool AMDGPUAsmParser::validateBLGP(
const MCInst &Inst,
4801 auto FB = getFeatureBits();
4802 bool UsesNeg =
false;
4803 if (FB[AMDGPU::FeatureGFX940Insts]) {
4805 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4806 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4807 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4808 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4813 if (IsNeg == UsesNeg)
4817 UsesNeg ?
"invalid modifier: blgp is not supported"
4818 :
"invalid modifier: neg is not supported");
4823bool AMDGPUAsmParser::validateWaitCnt(
const MCInst &Inst,
4829 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4830 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4831 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4832 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4838 if (Reg == AMDGPU::SGPR_NULL)
4842 Error(RegLoc,
"src0 must be null");
4846bool AMDGPUAsmParser::validateDS(
const MCInst &Inst,
4852 return validateGWS(Inst,
Operands);
4863 Error(S,
"gds modifier is not supported on this GPU");
4871bool AMDGPUAsmParser::validateGWS(
const MCInst &Inst,
4873 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4877 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4878 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4887 auto RegIdx =
Reg - (VGPR32.
contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4890 Error(RegLoc,
"vgpr must be even aligned");
4897bool AMDGPUAsmParser::validateCoherencyBits(
const MCInst &Inst,
4899 const SMLoc &IDLoc) {
4901 AMDGPU::OpName::cpol);
4908 return validateTHAndScopeBits(Inst,
Operands, CPol);
4914 Error(S,
"cache policy is not supported for SMRD instructions");
4918 Error(IDLoc,
"invalid cache policy for SMEM instruction");
4927 if (!(TSFlags & AllowSCCModifier)) {
4932 "scc modifier is not supported for this instruction on this GPU");
4943 :
"instruction must use glc");
4951 &CStr.data()[CStr.find(
isGFX940() ?
"sc0" :
"glc")]);
4953 :
"instruction must not use glc");
4961bool AMDGPUAsmParser::validateTHAndScopeBits(
const MCInst &Inst,
4963 const unsigned CPol) {
4967 const unsigned Opcode = Inst.
getOpcode();
4979 return PrintError(
"instruction must use th:TH_ATOMIC_RETURN");
4987 return PrintError(
"invalid th value for SMEM instruction");
4994 return PrintError(
"scope and th combination is not valid");
5003 return PrintError(
"invalid th value for atomic instructions");
5004 }
else if (IsStore) {
5006 return PrintError(
"invalid th value for store instructions");
5009 return PrintError(
"invalid th value for load instructions");
5019 if (!Operand->isReg())
5021 unsigned Reg = Operand->getReg();
5022 if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
5024 "execz and vccz are not supported on this GPU");
5031bool AMDGPUAsmParser::validateTFE(
const MCInst &Inst,
5034 if (
Desc.mayStore() &&
5038 Error(Loc,
"TFE modifier has no meaning for store instructions");
5046bool AMDGPUAsmParser::validateInstruction(
const MCInst &Inst,
5049 if (
auto ErrMsg = validateLdsDirect(Inst)) {
5053 if (!validateSOPLiteral(Inst)) {
5055 "only one unique literal operand is allowed");
5058 if (!validateVOPLiteral(Inst,
Operands)) {
5061 if (!validateConstantBusLimitations(Inst,
Operands)) {
5064 if (!validateVOPDRegBankConstraints(Inst,
Operands)) {
5067 if (!validateIntClampSupported(Inst)) {
5069 "integer clamping is not supported on this GPU");
5072 if (!validateOpSel(Inst)) {
5074 "invalid op_sel operand");
5077 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5079 "invalid neg_lo operand");
5082 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5084 "invalid neg_hi operand");
5087 if (!validateDPP(Inst,
Operands)) {
5091 if (!validateMIMGD16(Inst)) {
5093 "d16 modifier is not supported on this GPU");
5096 if (!validateMIMGMSAA(Inst)) {
5098 "invalid dim; must be MSAA type");
5101 if (!validateMIMGDataSize(Inst, IDLoc)) {
5104 if (!validateMIMGAddrSize(Inst, IDLoc))
5106 if (!validateMIMGAtomicDMask(Inst)) {
5108 "invalid atomic image dmask");
5111 if (!validateMIMGGatherDMask(Inst)) {
5113 "invalid image_gather dmask: only one bit must be set");
5116 if (!validateMovrels(Inst,
Operands)) {
5119 if (!validateOffset(Inst,
Operands)) {
5122 if (!validateMAIAccWrite(Inst,
Operands)) {
5125 if (!validateMAISrc2(Inst,
Operands)) {
5128 if (!validateMFMA(Inst,
Operands)) {
5131 if (!validateCoherencyBits(Inst,
Operands, IDLoc)) {
5135 if (!validateAGPRLdSt(Inst)) {
5136 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5137 ?
"invalid register class: data and dst should be all VGPR or AGPR"
5138 :
"invalid register class: agpr loads and stores not supported on this GPU"
5142 if (!validateVGPRAlign(Inst)) {
5144 "invalid register class: vgpr tuples must be 64 bit aligned");
5151 if (!validateBLGP(Inst,
Operands)) {
5155 if (!validateDivScale(Inst)) {
5156 Error(IDLoc,
"ABS not allowed in VOP3B instructions");
5159 if (!validateWaitCnt(Inst,
Operands)) {
5162 if (!validateExeczVcczOperands(
Operands)) {
5165 if (!validateTFE(Inst,
Operands)) {
5174 unsigned VariantID = 0);
5178 unsigned VariantID);
5180bool AMDGPUAsmParser::isSupportedMnemo(
StringRef Mnemo,
5185bool AMDGPUAsmParser::isSupportedMnemo(
StringRef Mnemo,
5188 for (
auto Variant : Variants) {
5196bool AMDGPUAsmParser::checkUnsupportedInstruction(
StringRef Mnemo,
5197 const SMLoc &IDLoc) {
5198 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5201 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5206 getParser().clearPendingErrors();
5210 StringRef VariantName = getMatchedVariantName();
5211 if (!VariantName.
empty() && isSupportedMnemo(Mnemo, FBS)) {
5214 " variant of this instruction is not supported"));
5218 if (
isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5219 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5222 FeaturesWS32.
flip(AMDGPU::FeatureWavefrontSize64)
5223 .
flip(AMDGPU::FeatureWavefrontSize32);
5225 ComputeAvailableFeatures(FeaturesWS32);
5227 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5228 return Error(IDLoc,
"instruction requires wavesize=32");
5233 return Error(IDLoc,
"instruction not supported on this GPU");
5238 return Error(IDLoc,
"invalid instruction" + Suggestion);
5244 const auto &
Op = ((AMDGPUOperand &)*
Operands[InvalidOprIdx]);
5245 if (
Op.isToken() && InvalidOprIdx > 1) {
5246 const auto &PrevOp = ((AMDGPUOperand &)*
Operands[InvalidOprIdx - 1]);
5247 return PrevOp.isToken() && PrevOp.getToken() ==
"::";
5252bool AMDGPUAsmParser::MatchAndEmitInstruction(
SMLoc IDLoc,
unsigned &Opcode,
5256 bool MatchingInlineAsm) {
5258 unsigned Result = Match_Success;
5259 for (
auto Variant : getMatchedVariants()) {
5261 auto R = MatchInstructionImpl(
Operands, Inst, EI, MatchingInlineAsm,
5266 if ((R == Match_Success) ||
5267 (R == Match_PreferE32) ||
5268 (R == Match_MissingFeature && Result != Match_PreferE32) ||
5269 (R == Match_InvalidOperand && Result != Match_MissingFeature
5270 && Result != Match_PreferE32) ||
5271 (R == Match_MnemonicFail && Result != Match_InvalidOperand
5272 && Result != Match_MissingFeature
5273 && Result != Match_PreferE32)) {
5277 if (R == Match_Success)
5281 if (Result == Match_Success) {
5282 if (!validateInstruction(Inst, IDLoc,
Operands)) {
5291 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5297 case Match_MissingFeature:
5301 return Error(IDLoc,
"operands are not valid for this GPU or mode");
5303 case Match_InvalidOperand: {
5304 SMLoc ErrorLoc = IDLoc;
5307 return Error(IDLoc,
"too few operands for instruction");
5310 if (ErrorLoc ==
SMLoc())
5314 return Error(ErrorLoc,
"invalid VOPDY instruction");
5316 return Error(ErrorLoc,
"invalid operand for instruction");
5319 case Match_PreferE32:
5320 return Error(IDLoc,
"internal error: instruction without _e64 suffix "
5321 "should be encoded as e32");
5322 case Match_MnemonicFail:
5328bool AMDGPUAsmParser::ParseAsAbsoluteExpression(
uint32_t &Ret) {
5333 if (getParser().parseAbsoluteExpression(Tmp)) {
5340bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5342 return TokError(
"directive only supported for amdgcn architecture");
5344 std::string TargetIDDirective;
5345 SMLoc TargetStart = getTok().getLoc();
5346 if (getParser().parseEscapedString(TargetIDDirective))
5350 if (getTargetStreamer().getTargetID()->
toString() != TargetIDDirective)
5351 return getParser().Error(TargetRange.
Start,
5352 (
Twine(
".amdgcn_target directive's target id ") +
5353 Twine(TargetIDDirective) +
5354 Twine(
" does not match the specified target id ") +
5355 Twine(getTargetStreamer().getTargetID()->
toString())).str());
5360bool AMDGPUAsmParser::OutOfRangeError(
SMRange Range) {
5361 return Error(
Range.Start,
"value out of range", Range);
5364bool AMDGPUAsmParser::calculateGPRBlocks(
5365 const FeatureBitset &Features,
bool VCCUsed,
bool FlatScrUsed,
5366 bool XNACKUsed, std::optional<bool> EnableWavefrontSize32,
5367 unsigned NextFreeVGPR,
SMRange VGPRRange,
unsigned NextFreeSGPR,
5368 SMRange SGPRRange,
unsigned &VGPRBlocks,
unsigned &SGPRBlocks) {
5379 unsigned MaxAddressableNumSGPRs =
5382 if (
Version.Major >= 8 && !Features.
test(FeatureSGPRInitBug) &&
5383 NumSGPRs > MaxAddressableNumSGPRs)
5384 return OutOfRangeError(SGPRRange);
5389 if ((
Version.Major <= 7 || Features.
test(FeatureSGPRInitBug)) &&
5390 NumSGPRs > MaxAddressableNumSGPRs)
5391 return OutOfRangeError(SGPRRange);
5393 if (Features.
test(FeatureSGPRInitBug))
5398 EnableWavefrontSize32);
5404bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5406 return TokError(
"directive only supported for amdgcn architecture");
5409 return TokError(
"directive only supported for amdhsa OS");
5412 if (getParser().parseIdentifier(KernelName))
5417 &getSTI(), getContext());
5433 unsigned ImpliedUserSGPRCount = 0;
5437 std::optional<unsigned> ExplicitUserSGPRCount;
5438 bool ReserveVCC =
true;
5439 bool ReserveFlatScr =
true;
5440 std::optional<bool> EnableWavefrontSize32;
5446 SMRange IDRange = getTok().getLocRange();
5447 if (!parseId(
ID,
"expected .amdhsa_ directive or .end_amdhsa_kernel"))
5450 if (
ID ==
".end_amdhsa_kernel")
5454 return TokError(
".amdhsa_ directives cannot be repeated");
5456 SMLoc ValStart = getLoc();
5458 if (getParser().parseExpression(ExprVal))
5460 SMLoc ValEnd = getLoc();
5465 bool EvaluatableExpr;
5466 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
5468 return OutOfRangeError(ValRange);
5472#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
5473 if (!isUInt<ENTRY##_WIDTH>(Val)) \
5474 return OutOfRangeError(RANGE); \
5475 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
5480#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
5482 return Error(IDRange.Start, "directive should have resolvable expression", \
5485 if (
ID ==
".amdhsa_group_segment_fixed_size") {
5488 return OutOfRangeError(ValRange);
5490 }
else if (
ID ==
".amdhsa_private_segment_fixed_size") {
5493 return OutOfRangeError(ValRange);
5495 }
else if (
ID ==
".amdhsa_kernarg_size") {
5497 return OutOfRangeError(ValRange);
5499 }
else if (
ID ==
".amdhsa_user_sgpr_count") {
5501 ExplicitUserSGPRCount = Val;
5502 }
else if (
ID ==
".amdhsa_user_sgpr_private_segment_buffer") {
5506 "directive is not supported with architected flat scratch",
5509 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5512 ImpliedUserSGPRCount += 4;
5513 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_preload_length") {
5516 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
5519 return OutOfRangeError(ValRange);
5523 ImpliedUserSGPRCount += Val;
5524 PreloadLength = Val;
5526 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_preload_offset") {
5529 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
5532 return OutOfRangeError(ValRange);
5536 PreloadOffset = Val;
5537 }
else if (
ID ==
".amdhsa_user_sgpr_dispatch_ptr") {
5540 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
5543 ImpliedUserSGPRCount += 2;
5544 }
else if (
ID ==
".amdhsa_user_sgpr_queue_ptr") {
5547 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
5550 ImpliedUserSGPRCount += 2;
5551 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_segment_ptr") {
5554 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
5557 ImpliedUserSGPRCount += 2;
5558 }
else if (
ID ==
".amdhsa_user_sgpr_dispatch_id") {
5561 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
5564 ImpliedUserSGPRCount += 2;
5565 }
else if (
ID ==
".amdhsa_user_sgpr_flat_scratch_init") {
5568 "directive is not supported with architected flat scratch",
5572 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
5575 ImpliedUserSGPRCount += 2;
5576 }
else if (
ID ==
".amdhsa_user_sgpr_private_segment_size") {
5579 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5582 ImpliedUserSGPRCount += 1;
5583 }
else if (
ID ==
".amdhsa_wavefront_size32") {
5585 if (IVersion.
Major < 10)
5586 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5587 EnableWavefrontSize32 = Val;
5589 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
5591 }
else if (
ID ==
".amdhsa_uses_dynamic_stack") {
5593 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
5595 }
else if (
ID ==
".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5598 "directive is not supported with architected flat scratch",
5601 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5603 }
else if (
ID ==
".amdhsa_enable_private_segment") {
5607 "directive is not supported without architected flat scratch",
5610 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5612 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_x") {
5614 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
5616 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_y") {
5618 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
5620 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_z") {
5622 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
5624 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_info") {
5626 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
5628 }
else if (
ID ==
".amdhsa_system_vgpr_workitem_id") {
5630 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
5632 }
else if (
ID ==
".amdhsa_next_free_vgpr") {
5634 VGPRRange = ValRange;
5636 }
else if (
ID ==
".amdhsa_next_free_sgpr") {
5638 SGPRRange = ValRange;
5640 }
else if (
ID ==
".amdhsa_accum_offset") {
5642 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
5645 }
else if (
ID ==
".amdhsa_reserve_vcc") {
5647 if (!isUInt<1>(Val))
5648 return OutOfRangeError(ValRange);
5650 }
else if (
ID ==
".amdhsa_reserve_flat_scratch") {
5652 if (IVersion.
Major < 7)
5653 return Error(IDRange.
Start,
"directive requires gfx7+", IDRange);
5656 "directive is not supported with architected flat scratch",
5658 if (!isUInt<1>(Val))
5659 return OutOfRangeError(ValRange);
5660 ReserveFlatScr = Val;
5661 }
else if (
ID ==
".amdhsa_reserve_xnack_mask") {
5662 if (IVersion.
Major < 8)
5663 return Error(IDRange.
Start,
"directive requires gfx8+", IDRange);
5664 if (!isUInt<1>(Val))
5665 return OutOfRangeError(ValRange);
5666 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5667 return getParser().Error(IDRange.
Start,
".amdhsa_reserve_xnack_mask does not match target id",
5669 }
else if (
ID ==
".amdhsa_float_round_mode_32") {
5671 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
5673 }
else if (
ID ==
".amdhsa_float_round_mode_16_64") {
5675 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
5677 }
else if (
ID ==
".amdhsa_float_denorm_mode_32") {
5679 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
5681 }
else if (
ID ==
".amdhsa_float_denorm_mode_16_64") {
5683 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
5685 }
else if (
ID ==
".amdhsa_dx10_clamp") {
5686 if (IVersion.
Major >= 12)
5687 return Error(IDRange.
Start,
"directive unsupported on gfx12+", IDRange);
5689 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
5691 }
else if (
ID ==
".amdhsa_ieee_mode") {
5692 if (IVersion.
Major >= 12)
5693 return Error(IDRange.
Start,
"directive unsupported on gfx12+", IDRange);
5695 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
5697 }
else if (
ID ==
".amdhsa_fp16_overflow") {
5698 if (IVersion.
Major < 9)
5699 return Error(IDRange.
Start,
"directive requires gfx9+", IDRange);
5701 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
5703 }
else if (
ID ==
".amdhsa_tg_split") {
5705 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
5708 }
else if (
ID ==
".amdhsa_workgroup_processor_mode") {
5709 if (IVersion.
Major < 10)
5710 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5712 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
5714 }
else if (
ID ==
".amdhsa_memory_ordered") {
5715 if (IVersion.
Major < 10)
5716 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5718 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
5720 }
else if (
ID ==
".amdhsa_forward_progress") {
5721 if (IVersion.
Major < 10)
5722 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5724 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
5726 }
else if (
ID ==
".amdhsa_shared_vgpr_count") {
5728 if (IVersion.
Major < 10 || IVersion.
Major >= 12)
5729 return Error(IDRange.
Start,
"directive requires gfx10 or gfx11",
5731 SharedVGPRCount = Val;
5733 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
5735 }
else if (
ID ==
".amdhsa_exception_fp_ieee_invalid_op") {
5738 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
5740 }
else if (
ID ==
".amdhsa_exception_fp_denorm_src") {
5742 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5744 }
else if (
ID ==
".amdhsa_exception_fp_ieee_div_zero") {
5747 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
5749 }
else if (
ID ==
".amdhsa_exception_fp_ieee_overflow") {
5751 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5753 }
else if (
ID ==
".amdhsa_exception_fp_ieee_underflow") {
5755 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5757 }
else if (
ID ==
".amdhsa_exception_fp_ieee_inexact") {
5759 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5761 }
else if (
ID ==
".amdhsa_exception_int_div_zero") {
5763 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5765 }
else if (
ID ==
".amdhsa_round_robin_scheduling") {
5766 if (IVersion.
Major < 12)
5767 return Error(IDRange.
Start,
"directive requires gfx12+", IDRange);
5769 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
5772 return Error(IDRange.
Start,
"unknown .amdhsa_kernel directive", IDRange);
5775#undef PARSE_BITS_ENTRY
5778 if (!Seen.
contains(
".amdhsa_next_free_vgpr"))
5779 return TokError(
".amdhsa_next_free_vgpr directive is required");
5781 if (!Seen.
contains(
".amdhsa_next_free_sgpr"))
5782 return TokError(
".amdhsa_next_free_sgpr directive is required");
5784 unsigned VGPRBlocks;
5785 unsigned SGPRBlocks;
5786 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5787 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5788 EnableWavefrontSize32, NextFreeVGPR,
5789 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5793 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5795 return OutOfRangeError(VGPRRange);
5798 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
5799 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
5801 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5803 return OutOfRangeError(SGPRRange);
5806 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
5807 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
5809 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5810 return TokError(
"amdgpu_user_sgpr_count smaller than than implied by "
5811 "enabled user SGPRs");
5813 unsigned UserSGPRCount =
5814 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5816 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5817 return TokError(
"too many user SGPRs enabled");
5820 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
5821 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, getContext());
5825 return TokError(
"Kernarg size should be resolvable");
5827 if (PreloadLength && kernarg_size &&
5828 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
5829 return TokError(
"Kernarg preload length + offset is larger than the "
5830 "kernarg segment size");
5833 if (!Seen.
contains(
".amdhsa_accum_offset"))
5834 return TokError(
".amdhsa_accum_offset directive is required");
5835 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5836 return TokError(
"accum_offset should be in range [4..256] in "
5839 return TokError(
"accum_offset exceeds total VGPR allocation");
5843 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
5844 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, getContext());
5847 if (IVersion.
Major >= 10 && IVersion.
Major < 12) {
5849 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
5850 return TokError(
"shared_vgpr_count directive not valid on "
5851 "wavefront size 32");
5853 if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5854 return TokError(
"shared_vgpr_count*2 + "
5855 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5860 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
5861 NextFreeVGPR, NextFreeSGPR,
5862 ReserveVCC, ReserveFlatScr);
5866bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
5868 if (ParseAsAbsoluteExpression(Version))
5871 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
5875bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(
StringRef ID,
5879 if (
ID ==
"max_scratch_backing_memory_byte_size") {
5880 Parser.eatToEndOfStatement();
5887 return TokError(Err.str());
5891 if (
ID ==
"enable_dx10_clamp") {
5894 return TokError(
"enable_dx10_clamp=1 is not allowed on GFX12+");
5897 if (
ID ==
"enable_ieee_mode") {
5900 return TokError(
"enable_ieee_mode=1 is not allowed on GFX12+");
5903 if (
ID ==
"enable_wavefront_size32") {
5906 return TokError(
"enable_wavefront_size32=1 is only allowed on GFX10+");
5907 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5908 return TokError(
"enable_wavefront_size32=1 requires +WavefrontSize32");
5910 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5911 return TokError(
"enable_wavefront_size32=0 requires +WavefrontSize64");
5915 if (
ID ==
"wavefront_size") {
5916 if (Header.wavefront_size == 5) {
5918 return TokError(
"wavefront_size=5 is only allowed on GFX10+");
5919 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5920 return TokError(
"wavefront_size=5 requires +WavefrontSize32");
5921 }
else if (Header.wavefront_size == 6) {
5922 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5923 return TokError(
"wavefront_size=6 requires +WavefrontSize64");
5927 if (
ID ==
"enable_wgp_mode") {
5930 return TokError(
"enable_wgp_mode=1 is only allowed on GFX10+");
5933 if (
ID ==
"enable_mem_ordered") {
5936 return TokError(
"enable_mem_ordered=1 is only allowed on GFX10+");
5939 if (
ID ==
"enable_fwd_progress") {
5942 return TokError(
"enable_fwd_progress=1 is only allowed on GFX10+");
5948bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5958 if (!parseId(
ID,
"expected value identifier or .end_amd_kernel_code_t"))
5961 if (
ID ==
".end_amd_kernel_code_t")
5964 if (ParseAMDKernelCodeTValue(
ID, Header))
5968 getTargetStreamer().EmitAMDKernelCodeT(Header);
5973bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5975 if (!parseId(KernelName,
"expected symbol name"))
5978 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5981 KernelScope.initialize(getContext());
5985bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5987 return Error(getLoc(),
5988 ".amd_amdgpu_isa directive is not available on non-amdgcn "
5992 auto TargetIDDirective = getLexer().getTok().getStringContents();
5993 if (getTargetStreamer().getTargetID()->
toString() != TargetIDDirective)
5994 return Error(getParser().getTok().getLoc(),
"target id must match options");
5996 getTargetStreamer().EmitISAVersion();
6002bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6005 std::string HSAMetadataString;
6010 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6011 return Error(getLoc(),
"invalid HSA metadata");
6018bool AMDGPUAsmParser::ParseToEndDirective(
const char *AssemblerDirectiveBegin,
6019 const char *AssemblerDirectiveEnd,
6020 std::string &CollectString) {
6024 getLexer().setSkipSpace(
false);
6026 bool FoundEnd =
false;
6029 CollectStream << getTokenStr();
6033 if (trySkipId(AssemblerDirectiveEnd)) {
6038 CollectStream << Parser.parseStringToEndOfStatement()
6039 << getContext().getAsmInfo()->getSeparatorString();
6041 Parser.eatToEndOfStatement();
6044 getLexer().setSkipSpace(
true);
6047 return TokError(
Twine(
"expected directive ") +
6048 Twine(AssemblerDirectiveEnd) +
Twine(
" not found"));
6051 CollectStream.flush();
6056bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6062 auto PALMetadata = getTargetStreamer().getPALMetadata();
6063 if (!PALMetadata->setFromString(
String))
6064 return Error(getLoc(),
"invalid PAL metadata");
6069bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6071 return Error(getLoc(),
6073 "not available on non-amdpal OSes")).str());
6076 auto PALMetadata = getTargetStreamer().getPALMetadata();
6077 PALMetadata->setLegacy();
6080 if (ParseAsAbsoluteExpression(Key)) {
6081 return TokError(
Twine(
"invalid value in ") +
6085 return TokError(
Twine(
"expected an even number of values in ") +
6088 if (ParseAsAbsoluteExpression(
Value)) {
6089 return TokError(
Twine(
"invalid value in ") +
6092 PALMetadata->setRegister(Key,
Value);
6101bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6102 if (getParser().checkForValidSection())
6106 SMLoc NameLoc = getLoc();
6107 if (getParser().parseIdentifier(
Name))
6108 return TokError(
"expected identifier in directive");
6111 if (getParser().parseComma())
6117 SMLoc SizeLoc = getLoc();
6118 if (getParser().parseAbsoluteExpression(
Size))
6121 return Error(SizeLoc,
"size must be non-negative");
6122 if (
Size > LocalMemorySize)
6123 return Error(SizeLoc,
"size is too large");
6125 int64_t Alignment = 4;
6127 SMLoc AlignLoc = getLoc();
6128 if (getParser().parseAbsoluteExpression(Alignment))
6131 return Error(AlignLoc,
"alignment must be a power of two");
6136 if (Alignment >= 1u << 31)
6137 return Error(AlignLoc,
"alignment is too large");
6143 Symbol->redefineIfPossible();
6144 if (!
Symbol->isUndefined())
6145 return Error(NameLoc,
"invalid symbol redefinition");
6147 getTargetStreamer().emitAMDGPULDS(Symbol,
Size,
Align(Alignment));
6151bool AMDGPUAsmParser::ParseDirective(
AsmToken DirectiveID) {
6155 if (IDVal ==
".amdhsa_kernel")
6156 return ParseDirectiveAMDHSAKernel();
6158 if (IDVal ==
".amdhsa_code_object_version")
6159 return ParseDirectiveAMDHSACodeObjectVersion();
6163 return ParseDirectiveHSAMetadata();
6165 if (IDVal ==
".amd_kernel_code_t")
6166 return ParseDirectiveAMDKernelCodeT();
6168 if (IDVal ==
".amdgpu_hsa_kernel")
6169 return ParseDirectiveAMDGPUHsaKernel();
6171 if (IDVal ==
".amd_amdgpu_isa")
6172 return ParseDirectiveISAVersion();
6176 Twine(
" directive is "
6177 "not available on non-amdhsa OSes"))
6182 if (IDVal ==
".amdgcn_target")
6183 return ParseDirectiveAMDGCNTarget();
6185 if (IDVal ==
".amdgpu_lds")
6186 return ParseDirectiveAMDGPULDS();
6189 return ParseDirectivePALMetadataBegin();
6192 return ParseDirectivePALMetadata();
6200 if (
MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
6204 if (
MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
6205 return hasSGPR104_SGPR105();
6208 case AMDGPU::SRC_SHARED_BASE_LO:
6209 case AMDGPU::SRC_SHARED_BASE:
6210 case AMDGPU::SRC_SHARED_LIMIT_LO:
6211 case AMDGPU::SRC_SHARED_LIMIT:
6212 case AMDGPU::SRC_PRIVATE_BASE_LO:
6213 case AMDGPU::SRC_PRIVATE_BASE:
6214 case AMDGPU::SRC_PRIVATE_LIMIT_LO:
6215 case AMDGPU::SRC_PRIVATE_LIMIT:
6217 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
6220 case AMDGPU::TBA_LO:
6221 case AMDGPU::TBA_HI:
6223 case AMDGPU::TMA_LO:
6224 case AMDGPU::TMA_HI:
6226 case AMDGPU::XNACK_MASK:
6227 case AMDGPU::XNACK_MASK_LO:
6228 case AMDGPU::XNACK_MASK_HI:
6229 return (
isVI() ||
isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6230 case AMDGPU::SGPR_NULL:
6244 case AMDGPU::FLAT_SCR:
6245 case AMDGPU::FLAT_SCR_LO:
6246 case AMDGPU::FLAT_SCR_HI:
6255 if (
MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
6256 return hasSGPR102_SGPR103();
6269 Res = MatchOperandParserImpl(
Operands, Mnemonic);
6281 SMLoc LBraceLoc = getLoc();
6286 auto Loc = getLoc();
6289 Error(Loc,
"expected a register");
6293 RBraceLoc = getLoc();
6298 "expected a comma or a closing square bracket"))
6302 if (
Operands.size() - Prefix > 1) {
6304 AMDGPUOperand::CreateToken(
this,
"[", LBraceLoc));
6305 Operands.push_back(AMDGPUOperand::CreateToken(
this,
"]", RBraceLoc));
6316 setForcedEncodingSize(0);
6317 setForcedDPP(
false);
6318 setForcedSDWA(
false);
6320 if (
Name.ends_with(
"_e64_dpp")) {
6322 setForcedEncodingSize(64);
6323 return Name.substr(0,
Name.size() - 8);
6324 }
else if (
Name.ends_with(
"_e64")) {
6325 setForcedEncodingSize(64);
6326 return Name.substr(0,
Name.size() - 4);
6327 }
else if (
Name.ends_with(
"_e32")) {
6328 setForcedEncodingSize(32);
6329 return Name.substr(0,
Name.size() - 4);
6330 }
else if (
Name.ends_with(
"_dpp")) {
6332 return Name.substr(0,
Name.size() - 4);
6333 }
else if (
Name.ends_with(
"_sdwa")) {
6334 setForcedSDWA(
true);
6335 return Name.substr(0,
Name.size() - 5);
6342 unsigned VariantID);
6354 Operands.push_back(AMDGPUOperand::CreateToken(
this,
Name, NameLoc));
6356 bool IsMIMG =
Name.starts_with(
"image_");
6359 OperandMode Mode = OperandMode_Default;
6361 Mode = OperandMode_NSA;
6365 checkUnsupportedInstruction(
Name, NameLoc);
6366 if (!Parser.hasPendingError()) {
6369 :
"not a valid operand.";
6370 Error(getLoc(), Msg);
6392 if (!trySkipId(
Name))
6395 Operands.push_back(AMDGPUOperand::CreateToken(
this,
Name, S));
6399ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
const char *Prefix,
6410 std::function<
bool(int64_t &)> ConvertResult) {
6418 if (ConvertResult && !ConvertResult(
Value)) {
6422 Operands.push_back(AMDGPUOperand::CreateImm(
this,
Value, S, ImmTy));
6426ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
6428 bool (*ConvertResult)(int64_t &)) {
6437 const unsigned MaxSize = 4;
6441 for (
int I = 0; ; ++
I) {
6443 SMLoc Loc = getLoc();
6447 if (
Op != 0 &&
Op != 1)
6455 if (
I + 1 == MaxSize)
6456 return Error(getLoc(),
"expected a closing square bracket");
6462 Operands.push_back(AMDGPUOperand::CreateImm(
this, Val, S, ImmTy));
6468 AMDGPUOperand::ImmTy ImmTy) {
6472 if (trySkipId(
Name)) {
6474 }
else if (trySkipId(
"no",
Name)) {
6481 return Error(S,
"r128 modifier is not supported on this GPU");
6483 return Error(S,
"a16 modifier is not supported on this GPU");
6485 if (
isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
6486 ImmTy = AMDGPUOperand::ImmTyR128A16;
6488 Operands.push_back(AMDGPUOperand::CreateImm(
this, Bit, S, ImmTy));
6493 bool &Disabling)
const {
6494 Disabling =
Id.consume_front(
"no");
6514 SMLoc StringLoc = getLoc();
6516 int64_t CPolVal = 0;
6534 ResScope = parseScope(
Operands, Scope);
6549 Operands.push_back(AMDGPUOperand::CreateImm(
this, CPolVal, StringLoc,
6550 AMDGPUOperand::ImmTyCPol));
6555 SMLoc OpLoc = getLoc();
6556 unsigned Enabled = 0, Seen = 0;
6560 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
6567 return Error(S,
"dlc modifier is not supported on this GPU");
6570 return Error(S,
"scc modifier is not supported on this GPU");
6573 return Error(S,
"duplicate cache policy modifier");
6585 AMDGPUOperand::CreateImm(
this,
Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
6597 Res = parseStringWithPrefix(
"scope",
Value, StringLoc);
6608 if (Scope == 0xffffffff)
6609 return Error(StringLoc,
"invalid scope value");
6623 if (
Value ==
"TH_DEFAULT")
6625 else if (
Value ==
"TH_STORE_LU" ||
Value ==
"TH_LOAD_RT_WB" ||
6626 Value ==
"TH_LOAD_NT_WB") {
6627 return Error(StringLoc,
"invalid th value");
6628 }
else if (
Value.consume_front(
"TH_ATOMIC_")) {
6630 }
else if (
Value.consume_front(
"TH_LOAD_")) {
6632 }
else if (
Value.consume_front(
"TH_STORE_")) {
6635 return Error(StringLoc,
"invalid th value");
6638 if (
Value ==
"BYPASS")
6669 if (TH == 0xffffffff)
6670 return Error(StringLoc,
"invalid th value");
6677 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
6678 AMDGPUOperand::ImmTy ImmT,
6680 auto i = OptionalIdx.find(ImmT);
6681 if (i != OptionalIdx.end()) {
6682 unsigned Idx = i->second;
6683 ((AMDGPUOperand &)*
Operands[
Idx]).addImmOperands(Inst, 1);
6695 StringLoc = getLoc();
6704bool AMDGPUAsmParser::tryParseFmt(
const char *Pref,
6708 SMLoc Loc = getLoc();
6710 auto Res = parseIntWithPrefix(Pref, Val);
6716 if (Val < 0 || Val > MaxVal) {
6726 AMDGPUOperand::ImmTy ImmTy) {
6727 const char *Pref =
"index_key";
6729 SMLoc Loc = getLoc();
6730 auto Res = parseIntWithPrefix(Pref, ImmVal);
6734 if (ImmTy == AMDGPUOperand::ImmTyIndexKey16bit && (ImmVal < 0 || ImmVal > 1))
6737 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
6740 Operands.push_back(AMDGPUOperand::CreateImm(
this, ImmVal, Loc, ImmTy));
6745 return tryParseIndexKey(
Operands, AMDGPUOperand::ImmTyIndexKey8bit);
6749 return tryParseIndexKey(
Operands, AMDGPUOperand::ImmTyIndexKey16bit);
6754ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
6761 for (
int I = 0;
I < 2; ++
I) {
6762 if (Dfmt == DFMT_UNDEF && !tryParseFmt(
"dfmt", DFMT_MAX, Dfmt))
6765 if (Nfmt == NFMT_UNDEF && !tryParseFmt(
"nfmt", NFMT_MAX, Nfmt))
6770 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
6776 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
6779 Dfmt = (Dfmt ==
DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6780 Nfmt = (Nfmt ==
NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6786ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
6791 if (!tryParseFmt(
"format", UFMT_MAX, Fmt))
6794 if (Fmt == UFMT_UNDEF)
6801bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6809 if (Format != DFMT_UNDEF) {
6815 if (Format != NFMT_UNDEF) {
6820 Error(Loc,
"unsupported format");
6831 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6836 SMLoc Loc = getLoc();
6837 if (!parseId(Str,
"expected a format string") ||
6838 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
6840 if (Dfmt == DFMT_UNDEF)
6841 return Error(Loc,
"duplicate numeric format");
6842 if (Nfmt == NFMT_UNDEF)
6843 return Error(Loc,
"duplicate data format");
6846 Dfmt = (Dfmt ==
DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6847 Nfmt = (Nfmt ==
NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6851 if (Ufmt == UFMT_UNDEF)
6852 return Error(FormatLoc,
"unsupported format");
6867 if (Id == UFMT_UNDEF)
6871 return Error(Loc,
"unified format is not supported on this GPU");
6877ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6879 SMLoc Loc = getLoc();
6881 if (!parseExpr(Format))
6884 return Error(Loc,
"out of range format");
6889ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6897 SMLoc Loc = getLoc();
6898 if (!parseId(FormatStr,
"expected a format string"))
6901 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6903 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6913 return parseNumericFormat(Format);
6921 SMLoc Loc = getLoc();
6931 AMDGPUOperand::CreateImm(
this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6950 Res = parseSymbolicOrNumericFormat(Format);
6955 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands[
Size - 2]);
6956 assert(
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6963 return Error(getLoc(),
"duplicate format");
6969 parseIntWithPrefix(
"offset",
Operands, AMDGPUOperand::ImmTyOffset);
6971 Res = parseIntWithPrefix(
"inst_offset",
Operands,
6972 AMDGPUOperand::ImmTyInstOffset);
6979 parseNamedBit(
"r128",
Operands, AMDGPUOperand::ImmTyR128A16);
6981 Res = parseNamedBit(
"a16",
Operands, AMDGPUOperand::ImmTyA16);
6987 parseIntWithPrefix(
"blgp",
Operands, AMDGPUOperand::ImmTyBLGP);
6990 parseOperandArrayWithPrefix(
"neg",
Operands, AMDGPUOperand::ImmTyBLGP);
7000 OptionalImmIndexMap OptionalIdx;
7002 unsigned OperandIdx[4];
7003 unsigned EnMask = 0;
7006 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
7007 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
7012 OperandIdx[SrcIdx] = Inst.
size();
7013 Op.addRegOperands(Inst, 1);
7020 OperandIdx[SrcIdx] = Inst.
size();
7026 if (
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7027 Op.addImmOperands(Inst, 1);
7031 if (
Op.isToken() && (
Op.getToken() ==
"done" ||
Op.getToken() ==
"row_en"))
7035 OptionalIdx[
Op.getImmTy()] = i;
7041 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7048 for (
auto i = 0; i < SrcIdx; ++i) {
7050 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7075 IntVal =
encode(ISA, IntVal, CntVal);
7076 if (CntVal !=
decode(ISA, IntVal)) {
7078 IntVal =
encode(ISA, IntVal, -1);
7086bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7088 SMLoc CntLoc = getLoc();
7096 SMLoc ValLoc = getLoc();
7097 if (!parseExpr(CntVal))
7105 if (CntName ==
"vmcnt" || CntName ==
"vmcnt_sat") {
7107 }
else if (CntName ==
"expcnt" || CntName ==
"expcnt_sat") {
7109 }
else if (CntName ==
"lgkmcnt" || CntName ==
"lgkmcnt_sat") {
7112 Error(CntLoc,
"invalid counter name " + CntName);
7117 Error(ValLoc,
"too large value for " + CntName);
7126 Error(getLoc(),
"expected a counter name");
7153bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7154 SMLoc FieldLoc = getLoc();
7160 SMLoc ValueLoc = getLoc();
7167 if (FieldName ==
"instid0") {
7169 }
else if (FieldName ==
"instskip") {
7171 }
else if (FieldName ==
"instid1") {
7174 Error(FieldLoc,
"invalid field name " + FieldName);
7193 .
Case(
"VALU_DEP_1", 1)
7194 .
Case(
"VALU_DEP_2", 2)
7195 .
Case(
"VALU_DEP_3", 3)
7196 .
Case(
"VALU_DEP_4", 4)
7197 .
Case(
"TRANS32_DEP_1", 5)
7198 .
Case(
"TRANS32_DEP_2", 6)
7199 .
Case(
"TRANS32_DEP_3", 7)
7200 .
Case(
"FMA_ACCUM_CYCLE_1", 8)
7201 .
Case(
"SALU_CYCLE_1", 9)
7202 .
Case(
"SALU_CYCLE_2", 10)
7203 .
Case(
"SALU_CYCLE_3", 11)
7211 Delay |=
Value << Shift;
7221 if (!parseDelay(Delay))
7225 if (!parseExpr(Delay))
7229 Operands.push_back(AMDGPUOperand::CreateImm(
this, Delay, S));
7234AMDGPUOperand::isSWaitCnt()
const {
7238bool AMDGPUOperand::isSDelayALU()
const {
return isImm(); }
7244void AMDGPUAsmParser::depCtrError(
SMLoc Loc,
int ErrorId,
7248 Error(Loc,
Twine(
"invalid counter name ", DepCtrName));
7251 Error(Loc,
Twine(DepCtrName,
" is not supported on this GPU"));
7254 Error(Loc,
Twine(
"duplicate counter name ", DepCtrName));
7257 Error(Loc,
Twine(
"invalid value for ", DepCtrName));
7264bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr,
unsigned &UsedOprMask) {
7268 SMLoc DepCtrLoc = getLoc();
7276 if (!parseExpr(ExprVal))
7279 unsigned PrevOprMask = UsedOprMask;
7280 int CntVal =
encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
7283 depCtrError(DepCtrLoc, CntVal, DepCtrName);
7292 Error(getLoc(),
"expected a counter name");
7297 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
7298 DepCtr = (DepCtr & ~CntValMask) | CntVal;
7306 SMLoc Loc = getLoc();
7309 unsigned UsedOprMask = 0;
7311 if (!parseDepCtr(DepCtr, UsedOprMask))
7315 if (!parseExpr(DepCtr))
7319 Operands.push_back(AMDGPUOperand::CreateImm(
this, DepCtr, Loc));
7323bool AMDGPUOperand::isDepCtr()
const {
return isS16Imm(); }
7329ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
7331 OperandInfoTy &Width) {
7338 HwReg.Loc = getLoc();
7341 HwReg.IsSymbolic =
true;
7343 }
else if (!parseExpr(HwReg.Val,
"a register name")) {
7351 if (!skipToken(
AsmToken::Comma,
"expected a comma or a closing parenthesis"))
7355 if (!parseExpr(
Offset.Val))
7361 Width.Loc = getLoc();
7362 if (!parseExpr(Width.Val) ||
7373 SMLoc Loc = getLoc();
7375 StructuredOpField HwReg(
"id",
"hardware register", HwregId::Width,
7377 StructuredOpField
Offset(
"offset",
"bit offset", HwregOffset::Width,
7378 HwregOffset::Default);
7379 struct : StructuredOpField {
7380 using StructuredOpField::StructuredOpField;
7381 bool validate(AMDGPUAsmParser &Parser)
const override {
7383 return Error(Parser,
"only values from 1 to 32 are legal");
7386 } Width(
"size",
"bitfield width", HwregSize::Width, HwregSize::Default);
7390 Res = parseHwregFunc(HwReg,
Offset, Width);
7393 if (!validateStructuredOpFields({&HwReg, &
Offset, &Width}))
7395 ImmVal = HwregEncoding::encode(HwReg.Val,
Offset.Val, Width.Val);
7399 parseExpr(ImmVal,
"a hwreg macro, structured immediate"))
7405 if (!isUInt<16>(ImmVal))
7406 return Error(Loc,
"invalid immediate: only 16-bit values are legal");
7408 AMDGPUOperand::CreateImm(
this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
7412bool AMDGPUOperand::isHwreg()
const {
7413 return isImmTy(ImmTyHwreg);
7421AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
7423 OperandInfoTy &Stream) {
7429 Msg.IsSymbolic =
true;
7431 }
else if (!parseExpr(Msg.Val,
"a message name")) {
7436 Op.IsDefined =
true;
7439 (
Op.Val =
getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
7442 }
else if (!parseExpr(
Op.Val,
"an operation name")) {
7447 Stream.IsDefined =
true;
7448 Stream.Loc = getLoc();
7449 if (!parseExpr(Stream.Val))
7458AMDGPUAsmParser::validateSendMsg(
const OperandInfoTy &Msg,
7459 const OperandInfoTy &
Op,
7460 const OperandInfoTy &Stream) {
7466 bool Strict = Msg.IsSymbolic;
7470 Error(Msg.Loc,
"specified message id is not supported on this GPU");
7475 Error(Msg.Loc,
"invalid message id");
7481 Error(
Op.Loc,
"message does not support operations");
7483 Error(Msg.Loc,
"missing message operation");
7489 Error(
Op.Loc,
"specified operation id is not supported on this GPU");
7491 Error(
Op.Loc,
"invalid operation id");
7496 Error(Stream.Loc,
"message operation does not support streams");
7500 Error(Stream.Loc,
"invalid message stream id");
7510 SMLoc Loc = getLoc();
7514 OperandInfoTy
Op(OP_NONE_);
7515 OperandInfoTy Stream(STREAM_ID_NONE_);
7516 if (parseSendMsgBody(Msg,
Op, Stream) &&
7517 validateSendMsg(Msg,
Op, Stream)) {
7522 }
else if (parseExpr(ImmVal,
"a sendmsg macro")) {
7523 if (ImmVal < 0 || !isUInt<16>(ImmVal))
7524 return Error(Loc,
"invalid immediate: only 16-bit values are legal");
7529 Operands.push_back(AMDGPUOperand::CreateImm(
this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
7533bool AMDGPUOperand::isSendMsg()
const {
7534 return isImmTy(ImmTySendMsg);
7555 return Error(S,
"invalid interpolation slot");
7557 Operands.push_back(AMDGPUOperand::CreateImm(
this, Slot, S,
7558 AMDGPUOperand::ImmTyInterpSlot));
7569 if (!Str.starts_with(
"attr"))
7570 return Error(S,
"invalid interpolation attribute");
7580 return Error(S,
"invalid or missing interpolation attribute channel");
7582 Str = Str.drop_back(2).drop_front(4);
7585 if (Str.getAsInteger(10, Attr))
7586 return Error(S,
"invalid or missing interpolation attribute number");
7589 return Error(S,
"out of bounds interpolation attribute number");
7593 Operands.push_back(AMDGPUOperand::CreateImm(
this, Attr, S,
7594 AMDGPUOperand::ImmTyInterpAttr));
7595 Operands.push_back(AMDGPUOperand::CreateImm(
7596 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
7615 return Error(S, (
Id == ET_INVALID)
7616 ?
"invalid exp target"
7617 :
"exp target is not supported on this GPU");
7619 Operands.push_back(AMDGPUOperand::CreateImm(
this,
Id, S,
7620 AMDGPUOperand::ImmTyExpTgt));
7635 return isId(getToken(),
Id);
7640 return getTokenKind() ==
Kind;
7643StringRef AMDGPUAsmParser::getId()
const {
7670 if (isId(
Id) && peekToken().is(Kind)) {
7680 if (isToken(Kind)) {
7690 if (!trySkipToken(Kind)) {
7691 Error(getLoc(), ErrMsg);
7702 if (Parser.parseExpression(Expr))
7705 if (Expr->evaluateAsAbsolute(Imm))
7709 Error(S,
"expected absolute expression");
7712 Twine(
" or an absolute expression"));
7722 if (Parser.parseExpression(Expr))
7726 if (Expr->evaluateAsAbsolute(IntVal)) {
7727 Operands.push_back(AMDGPUOperand::CreateImm(
this, IntVal, S));
7729 Operands.push_back(AMDGPUOperand::CreateExpr(
this, Expr, S));
7737 Val = getToken().getStringContents();
7741 Error(getLoc(), ErrMsg);
7749 Val = getTokenStr();
7753 if (!ErrMsg.
empty())
7754 Error(getLoc(), ErrMsg);
7760AMDGPUAsmParser::getToken()
const {
7761 return Parser.getTok();
7764AsmToken AMDGPUAsmParser::peekToken(
bool ShouldSkipSpace) {
7767 : getLexer().peekTok(ShouldSkipSpace);
7772 auto TokCount = getLexer().peekTokens(Tokens);
7779AMDGPUAsmParser::getTokenKind()
const {
7784AMDGPUAsmParser::getLoc()
const {
7785 return getToken().getLoc();
7789AMDGPUAsmParser::getTokenStr()
const {
7790 return getToken().getString();
7794AMDGPUAsmParser::lex() {
7799 return ((AMDGPUOperand &)*
Operands[0]).getStartLoc();
7803AMDGPUAsmParser::getOperandLoc(std::function<
bool(
const AMDGPUOperand&)>
Test,
7805 for (
unsigned i =
Operands.size() - 1; i > 0; --i) {
7806 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
7808 return Op.getStartLoc();
7814AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy
Type,
7816 auto Test = [=](
const AMDGPUOperand&
Op) {
return Op.isImmTy(
Type); };
7821AMDGPUAsmParser::getRegLoc(
unsigned Reg,
7823 auto Test = [=](
const AMDGPUOperand&
Op) {
7824 return Op.isRegKind() &&
Op.getReg() ==
Reg;
7830 bool SearchMandatoryLiterals)
const {
7831 auto Test = [](
const AMDGPUOperand&
Op) {
7832 return Op.IsImmKindLiteral() ||
Op.isExpr();
7835 if (SearchMandatoryLiterals && Loc == getInstLoc(
Operands))
7836 Loc = getMandatoryLitLoc(
Operands);
7841 auto Test = [](
const AMDGPUOperand &
Op) {
7842 return Op.IsImmKindMandatoryLiteral();
7849 auto Test = [](
const AMDGPUOperand&
Op) {
7850 return Op.isImmKindConst();
7867 SMLoc IdLoc = getLoc();
7873 find_if(Fields, [
Id](StructuredOpField *
F) {
return F->Id ==
Id; });
7874 if (
I == Fields.
end())
7875 return Error(IdLoc,
"unknown field");
7876 if ((*I)->IsDefined)
7877 return Error(IdLoc,
"duplicate field");
7880 (*I)->Loc = getLoc();
7881 if (!parseExpr((*I)->Val))
7883 (*I)->IsDefined =
true;
7890bool AMDGPUAsmParser::validateStructuredOpFields(
7892 return all_of(Fields, [
this](
const StructuredOpField *
F) {
7893 return F->validate(*
this);
7904 const unsigned OrMask,
7905 const unsigned XorMask) {
7908 return BITMASK_PERM_ENC |
7909 (AndMask << BITMASK_AND_SHIFT) |
7910 (OrMask << BITMASK_OR_SHIFT) |
7911 (XorMask << BITMASK_XOR_SHIFT);
7915AMDGPUAsmParser::parseSwizzleOperand(int64_t &
Op,
7916 const unsigned MinVal,
7917 const unsigned MaxVal,
7924 if (!parseExpr(
Op)) {
7927 if (Op < MinVal || Op > MaxVal) {
7936AMDGPUAsmParser::parseSwizzleOperands(
const unsigned OpNum, int64_t*
Op,
7937 const unsigned MinVal,
7938 const unsigned MaxVal,
7941 for (
unsigned i = 0; i < OpNum; ++i) {
7942 if (!parseSwizzleOperand(
Op[i], MinVal, MaxVal, ErrMsg, Loc))
7950AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7954 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
7955 "expected a 2-bit lane id")) {
7966AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
7973 if (!parseSwizzleOperand(GroupSize,
7975 "group size must be in the interval [2,32]",
7980 Error(Loc,
"group size must be a power of two");
7983 if (parseSwizzleOperand(LaneIdx,
7985 "lane id must be in the interval [0,group size - 1]",
7994AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
8000 if (!parseSwizzleOperand(GroupSize,
8002 "group size must be in the interval [2,32]",
8007 Error(Loc,
"group size must be a power of two");
8016AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8022 if (!parseSwizzleOperand(GroupSize,
8024 "group size must be in the interval [1,16]",
8029 Error(Loc,
"group size must be a power of two");
8038AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
8046 SMLoc StrLoc = getLoc();
8047 if (!parseString(Ctl)) {
8050 if (Ctl.
size() != BITMASK_WIDTH) {
8051 Error(StrLoc,
"expected a 5-character mask");
8055 unsigned AndMask = 0;
8056 unsigned OrMask = 0;
8057 unsigned XorMask = 0;
8059 for (
size_t i = 0; i < Ctl.
size(); ++i) {
8063 Error(StrLoc,
"invalid mask");
8085AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
8087 SMLoc OffsetLoc = getLoc();
8089 if (!parseExpr(Imm,
"a swizzle macro")) {
8092 if (!isUInt<16>(Imm)) {
8093 Error(OffsetLoc,
"expected a 16-bit offset");
8100AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
8105 SMLoc ModeLoc = getLoc();
8108 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
8109 Ok = parseSwizzleQuadPerm(Imm);
8110 }
else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
8111 Ok = parseSwizzleBitmaskPerm(Imm);
8112 }
else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
8113 Ok = parseSwizzleBroadcast(Imm);
8114 }
else if (trySkipId(IdSymbolic[ID_SWAP])) {
8115 Ok = parseSwizzleSwap(Imm);
8116 }
else if (trySkipId(IdSymbolic[ID_REVERSE])) {
8117 Ok = parseSwizzleReverse(Imm);
8119 Error(ModeLoc,
"expected a swizzle mode");
8122 return Ok && skipToken(
AsmToken::RParen,
"expected a closing parentheses");
8132 if (trySkipId(
"offset")) {
8136 if (trySkipId(
"swizzle")) {
8137 Ok = parseSwizzleMacro(Imm);
8139 Ok = parseSwizzleOffset(Imm);
8143 Operands.push_back(AMDGPUOperand::CreateImm(
this, Imm, S, AMDGPUOperand::ImmTySwizzle));
8151AMDGPUOperand::isSwizzle()
const {
8152 return isImmTy(ImmTySwizzle);
8159int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8173 for (
unsigned ModeId = ID_MIN; ModeId <=
ID_MAX; ++ModeId) {
8174 if (trySkipId(IdSymbolic[ModeId])) {
8181 Error(S, (Imm == 0)?
8182 "expected a VGPR index mode or a closing parenthesis" :
8183 "expected a VGPR index mode");
8188 Error(S,
"duplicate VGPR index mode");
8196 "expected a comma or a closing parenthesis"))
8211 Imm = parseGPRIdxMacro();
8215 if (getParser().parseAbsoluteExpression(Imm))
8217 if (Imm < 0 || !isUInt<4>(Imm))
8218 return Error(S,
"invalid immediate: only 4-bit values are legal");
8222 AMDGPUOperand::CreateImm(
this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
8226bool AMDGPUOperand::isGPRIdxMode()
const {
8227 return isImmTy(ImmTyGprIdxMode);
8239 if (isRegister() || isModifier())
8246 assert(Opr.isImm() || Opr.isExpr());
8247 SMLoc Loc = Opr.getStartLoc();
8251 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
8252 Error(Loc,
"expected an absolute expression or a label");
8253 }
else if (Opr.isImm() && !Opr.isS16Imm()) {
8254 Error(Loc,
"expected a 16-bit signed jump offset");
8272void AMDGPUAsmParser::cvtMubufImpl(
MCInst &Inst,
8275 OptionalImmIndexMap OptionalIdx;
8276 unsigned FirstOperandIdx = 1;
8277 bool IsAtomicReturn =
false;
8284 for (
unsigned i = FirstOperandIdx, e =
Operands.size(); i != e; ++i) {
8285 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
8289 Op.addRegOperands(Inst, 1);
8293 if (IsAtomicReturn && i == FirstOperandIdx)
8294 Op.addRegOperands(Inst, 1);
8299 if (
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
8300 Op.addImmOperands(Inst, 1);
8312 OptionalIdx[
Op.getImmTy()] = i;
8323bool AMDGPUOperand::isSMRDOffset8()
const {
8324 return isImmLiteral() && isUInt<8>(getImm());
8327bool AMDGPUOperand::isSMEMOffset()
const {
8329 return isImmLiteral();
8332bool AMDGPUOperand::isSMRDLiteralOffset()
const {
8335 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
8367bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
8368 if (BoundCtrl == 0 || BoundCtrl == 1) {
8376void AMDGPUAsmParser::onBeginOfFile() {
8377 if (!getParser().getStreamer().getTargetStreamer() ||
8381 if (!getTargetStreamer().getTargetID())
8382 getTargetStreamer().initializeTargetID(getSTI(),
8383 getSTI().getFeatureString());
8386 getTargetStreamer().EmitDirectiveAMDGCNTarget();
8394bool AMDGPUAsmParser::parsePrimaryExpr(
const MCExpr *&Res,
SMLoc &EndLoc) {
8400 .
Case(
"max", AGVK::AGVK_Max)
8401 .
Case(
"or", AGVK::AGVK_Or)
8411 if (Exprs.
empty()) {
8412 Error(getToken().getLoc(),
8413 "empty " +
Twine(TokenId) +
" expression");
8416 if (CommaCount + 1 != Exprs.
size()) {
8417 Error(getToken().getLoc(),
8418 "mismatch of commas in " +
Twine(TokenId) +
" expression");
8425 if (getParser().parseExpression(Expr, EndLoc))
8429 if (LastTokenWasComma)
8432 Error(getToken().getLoc(),
8433 "unexpected token in " +
Twine(TokenId) +
" expression");
8439 return getParser().parsePrimaryExpr(Res, EndLoc,
nullptr);
8444 if (
Name ==
"mul") {
8445 return parseIntWithPrefix(
"mul",
Operands,
8449 if (
Name ==
"div") {
8450 return parseIntWithPrefix(
"div",
Operands,
8466 const int Ops[] = { AMDGPU::OpName::src0,
8467 AMDGPU::OpName::src1,
8468 AMDGPU::OpName::src2 };
8483 if (
DstOp.isReg() &&
8484 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(
DstOp.
getReg())) {
8488 if ((OpSel & (1 << SrcNum)) != 0)
8494void AMDGPUAsmParser::cvtVOP3OpSel(
MCInst &Inst,
8501 OptionalImmIndexMap &OptionalIdx) {
8502 cvtVOP3P(Inst,
Operands, OptionalIdx);
8511 &&
Desc.NumOperands > (OpNum + 1)
8513 &&
Desc.operands()[OpNum + 1].RegClass != -1
8515 &&
Desc.getOperandConstraint(OpNum + 1,
8516 MCOI::OperandConstraint::TIED_TO) == -1;
8521 OptionalImmIndexMap OptionalIdx;
8526 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
8527 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
8530 for (
unsigned E =
Operands.size();
I != E; ++
I) {
8531 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
8533 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8534 }
else if (
Op.isInterpSlot() ||
Op.isInterpAttr() ||
8535 Op.isInterpAttrChan()) {
8537 }
else if (
Op.isImmModifier()) {
8538 OptionalIdx[
Op.getImmTy()] =
I;
8546 AMDGPUOperand::ImmTyHigh);
8550 AMDGPUOperand::ImmTyClampSI);
8554 AMDGPUOperand::ImmTyOModSI);
8559 OptionalImmIndexMap OptionalIdx;
8564 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
8565 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
8568 for (
unsigned E =
Operands.size();
I != E; ++
I) {
8569 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
8571 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8572 }
else if (
Op.isImmModifier()) {
8573 OptionalIdx[
Op.getImmTy()] =
I;
8590 const int Ops[] = { AMDGPU::OpName::src0,
8591 AMDGPU::OpName::src1,
8592 AMDGPU::OpName::src2 };
8593 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8594 AMDGPU::OpName::src1_modifiers,
8595 AMDGPU::OpName::src2_modifiers };
8599 for (
int J = 0; J < 3; ++J) {
8607 if ((OpSel & (1 << J)) != 0)
8609 if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
8610 (OpSel & (1 << 3)) != 0)
8618 OptionalImmIndexMap &OptionalIdx) {
8623 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
8624 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
8627 for (
unsigned E =
Operands.size();
I != E; ++
I) {
8628 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
8630 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8631 }
else if (
Op.isImmModifier()) {
8632 OptionalIdx[
Op.getImmTy()] =
I;
8633 }
else if (
Op.isRegOrImm()) {
8634 Op.addRegOrImmOperands(Inst, 1);
8644 AMDGPUOperand::ImmTyByteSel);
8649 AMDGPUOperand::ImmTyClampSI);
8653 AMDGPUOperand::ImmTyOModSI);
8660 auto it = Inst.
begin();
8670 OptionalImmIndexMap OptionalIdx;
8671 cvtVOP3(Inst,
Operands, OptionalIdx);
8675 OptionalImmIndexMap &OptIdx) {
8681 if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
8682 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
8683 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
8684 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {
8692 !(Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp_gfx12 ||
8693 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp_gfx12 ||
8694 Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp8_gfx12 ||
8695 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp8_gfx12 ||
8696 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
8697 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
8698 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
8699 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12)) {
8708 if (OpSelIdx != -1) {
8713 if (OpSelHiIdx != -1) {
8727 const int Ops[] = { AMDGPU::OpName::src0,
8728 AMDGPU::OpName::src1,
8729 AMDGPU::OpName::src2 };
8730 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8731 AMDGPU::OpName::src1_modifiers,
8732 AMDGPU::OpName::src2_modifiers };
8735 unsigned OpSelHi = 0;
8742 if (OpSelHiIdx != -1)
8751 for (
int J = 0; J < 3; ++J) {
8764 if (
SrcOp.isReg() && getMRI()
8771 if ((OpSel & (1 << J)) != 0)
8775 if ((OpSelHi & (1 << J)) != 0)
8778 if ((NegLo & (1 << J)) != 0)
8781 if ((NegHi & (1 << J)) != 0)
8789 OptionalImmIndexMap OptIdx;
8795 unsigned i,
unsigned Opc,
unsigned OpName) {
8797 ((AMDGPUOperand &)*
Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
8799 ((AMDGPUOperand &)*
Operands[i]).addRegOperands(Inst, 1);
8805 ((AMDGPUOperand &)*
Operands[1]).addRegOperands(Inst, 1);
8808 ((AMDGPUOperand &)*
Operands[1]).addRegOperands(Inst, 1);
8809 ((AMDGPUOperand &)*
Operands[4]).addRegOperands(Inst, 1);
8811 OptionalImmIndexMap OptIdx;
8812 for (
unsigned i = 5; i <
Operands.size(); ++i) {
8813 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
8814 OptIdx[
Op.getImmTy()] = i;
8819 AMDGPUOperand::ImmTyIndexKey8bit);
8823 AMDGPUOperand::ImmTyIndexKey16bit);
8843 Operands.push_back(AMDGPUOperand::CreateToken(
this,
"::", S));
8844 SMLoc OpYLoc = getLoc();
8847 Operands.push_back(AMDGPUOperand::CreateToken(
this, OpYName, OpYLoc));
8850 return Error(OpYLoc,
"expected a VOPDY instruction after ::");
8857 auto addOp = [&](
uint16_t ParsedOprIdx) {
8858 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[ParsedOprIdx]);
8860 Op.addRegOperands(Inst, 1);
8864 Op.addImmOperands(Inst, 1);
8876 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
8880 const auto &CInfo = InstInfo[CompIdx];
8881 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
8882 for (
unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
8883 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
8884 if (CInfo.hasSrc2Acc())
8885 addOp(CInfo.getIndexOfDstInParsedOperands());
8893bool AMDGPUOperand::isDPP8()
const {
8894 return isImmTy(ImmTyDPP8);
8897bool AMDGPUOperand::isDPPCtrl()
const {
8898 using namespace AMDGPU::DPP;
8900 bool result =
isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8902 int64_t
Imm = getImm();
8903 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8904 (
Imm >= DppCtrl::ROW_SHL_FIRST &&
Imm <= DppCtrl::ROW_SHL_LAST) ||
8905 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8906 (
Imm >= DppCtrl::ROW_ROR_FIRST &&
Imm <= DppCtrl::ROW_ROR_LAST) ||
8907 (Imm == DppCtrl::WAVE_SHL1) ||
8908 (
Imm == DppCtrl::WAVE_ROL1) ||
8909 (Imm == DppCtrl::WAVE_SHR1) ||
8910 (
Imm == DppCtrl::WAVE_ROR1) ||
8911 (Imm == DppCtrl::ROW_MIRROR) ||
8912 (
Imm == DppCtrl::ROW_HALF_MIRROR) ||
8913 (Imm == DppCtrl::BCAST15) ||
8914 (
Imm == DppCtrl::BCAST31) ||
8915 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8916 (
Imm >= DppCtrl::ROW_XMASK_FIRST &&
Imm <= DppCtrl::ROW_XMASK_LAST);
8925bool AMDGPUOperand::isBLGP()
const {
8926 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8929bool AMDGPUOperand::isS16Imm()
const {
8930 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8933bool AMDGPUOperand::isU16Imm()
const {
8934 return isImmLiteral() && isUInt<16>(getImm());
8941bool AMDGPUAsmParser::parseDimId(
unsigned &Encoding) {
8946 SMLoc Loc = getToken().getEndLoc();
8947 Token = std::string(getTokenStr());
8949 if (getLoc() != Loc)
8954 if (!parseId(Suffix))
8980 SMLoc Loc = getLoc();
8981 if (!parseDimId(Encoding))
8982 return Error(Loc,
"invalid dim value");
8984 Operands.push_back(AMDGPUOperand::CreateImm(
this, Encoding, S,
8985 AMDGPUOperand::ImmTyDim));
9003 if (!skipToken(
AsmToken::LBrac,
"expected an opening square bracket"))
9006 for (
size_t i = 0; i < 8; ++i) {
9010 SMLoc Loc = getLoc();
9011 if (getParser().parseAbsoluteExpression(Sels[i]))
9013 if (0 > Sels[i] || 7 < Sels[i])
9014 return Error(Loc,
"expected a 3-bit value");
9021 for (
size_t i = 0; i < 8; ++i)
9022 DPP8 |= (Sels[i] << (i * 3));
9024 Operands.push_back(AMDGPUOperand::CreateImm(
this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
9029AMDGPUAsmParser::isSupportedDPPCtrl(
StringRef Ctrl,
9031 if (Ctrl ==
"row_newbcast")
9034 if (Ctrl ==
"row_share" ||
9035 Ctrl ==
"row_xmask")
9038 if (Ctrl ==
"wave_shl" ||
9039 Ctrl ==
"wave_shr" ||
9040 Ctrl ==
"wave_rol" ||
9041 Ctrl ==
"wave_ror" ||
9042 Ctrl ==
"row_bcast")
9045 return Ctrl ==
"row_mirror" ||
9046 Ctrl ==
"row_half_mirror" ||
9047 Ctrl ==
"quad_perm" ||
9048 Ctrl ==
"row_shl" ||
9049 Ctrl ==
"row_shr" ||
9054AMDGPUAsmParser::parseDPPCtrlPerm() {
9057 if (!skipToken(
AsmToken::LBrac,
"expected an opening square bracket"))
9061 for (
int i = 0; i < 4; ++i) {
9066 SMLoc Loc = getLoc();
9067 if (getParser().parseAbsoluteExpression(Temp))
9069 if (Temp < 0 || Temp > 3) {
9070 Error(Loc,
"expected a 2-bit value");
9074 Val += (Temp << i * 2);
9084AMDGPUAsmParser::parseDPPCtrlSel(
StringRef Ctrl) {
9085 using namespace AMDGPU::DPP;
9090 SMLoc Loc = getLoc();
9092 if (getParser().parseAbsoluteExpression(Val))
9095 struct DppCtrlCheck {
9102 .
Case(
"wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
9103 .Case(
"wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
9104 .Case(
"wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
9105 .Case(
"wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
9106 .Case(
"row_shl", {DppCtrl::ROW_SHL0, 1, 15})
9107 .Case(
"row_shr", {DppCtrl::ROW_SHR0, 1, 15})
9108 .Case(
"row_ror", {DppCtrl::ROW_ROR0, 1, 15})
9109 .Case(
"row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
9110 .Case(
"row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
9111 .Case(
"row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
9115 if (
Check.Ctrl == -1) {
9116 Valid = (
Ctrl ==
"row_bcast" && (Val == 15 || Val == 31));
9117 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
9132 using namespace AMDGPU::DPP;
9135 !isSupportedDPPCtrl(getTokenStr(),
Operands))
9144 if (Ctrl ==
"row_mirror") {
9145 Val = DppCtrl::ROW_MIRROR;
9146 }
else if (Ctrl ==
"row_half_mirror") {
9147 Val = DppCtrl::ROW_HALF_MIRROR;
9150 if (Ctrl ==
"quad_perm") {
9151 Val = parseDPPCtrlPerm();
9153 Val = parseDPPCtrlSel(Ctrl);
9162 AMDGPUOperand::CreateImm(
this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
9168 OptionalImmIndexMap OptionalIdx;
9178 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
9182 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9183 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
9187 for (
unsigned E =
Operands.size();
I != E; ++
I) {
9191 if (OldIdx == NumOperands) {
9193 constexpr int DST_IDX = 0;
9195 }
else if (Src2ModIdx == NumOperands) {
9206 bool IsVOP3CvtSrDpp =
9207 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
9208 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
9209 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
9210 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12;
9211 if (IsVOP3CvtSrDpp) {
9225 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
9227 if (IsDPP8 &&
Op.isDppFI()) {
9230 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9231 }
else if (
Op.isReg()) {
9232 Op.addRegOperands(Inst, 1);
9233 }
else if (
Op.isImm() &&
9235 assert(!
Op.IsImmKindLiteral() &&
"Cannot use literal with DPP");
9236 Op.addImmOperands(Inst, 1);
9237 }
else if (
Op.isImm()) {
9238 OptionalIdx[
Op.getImmTy()] =
I;
9246 AMDGPUOperand::ImmTyByteSel);
9255 cvtVOP3P(Inst,
Operands, OptionalIdx);
9257 cvtVOP3OpSel(Inst,
Operands, OptionalIdx);
9274 AMDGPUOperand::ImmTyDppFI);
9279 OptionalImmIndexMap OptionalIdx;
9283 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9284 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
9288 for (
unsigned E =
Operands.size();
I != E; ++
I) {
9296 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
9298 if (
Op.isReg() && validateVccOperand(
Op.getReg())) {
9306 Op.addImmOperands(Inst, 1);
9308 Op.addRegWithFPInputModsOperands(Inst, 2);
9309 }
else if (
Op.isDppFI()) {
9311 }
else if (
Op.isReg()) {
9312 Op.addRegOperands(Inst, 1);
9318 Op.addRegWithFPInputModsOperands(Inst, 2);
9319 }
else if (
Op.isReg()) {
9320 Op.addRegOperands(Inst, 1);
9321 }
else if (
Op.isDPPCtrl()) {
9322 Op.addImmOperands(Inst, 1);
9323 }
else if (
Op.isImm()) {
9325 OptionalIdx[
Op.getImmTy()] =
I;
9341 AMDGPUOperand::ImmTyDppFI);
9352 AMDGPUOperand::ImmTy
Type) {
9365 .
Case(
"BYTE_0", SdwaSel::BYTE_0)
9366 .
Case(
"BYTE_1", SdwaSel::BYTE_1)
9367 .
Case(
"BYTE_2", SdwaSel::BYTE_2)
9368 .
Case(
"BYTE_3", SdwaSel::BYTE_3)
9369 .
Case(
"WORD_0", SdwaSel::WORD_0)
9370 .
Case(
"WORD_1", SdwaSel::WORD_1)
9371 .
Case(
"DWORD", SdwaSel::DWORD)
9374 if (
Int == 0xffffffff)
9375 return Error(StringLoc,
"invalid " +
Twine(Prefix) +
" value");
9394 .
Case(
"UNUSED_PAD", DstUnused::UNUSED_PAD)
9395 .
Case(
"UNUSED_SEXT", DstUnused::UNUSED_SEXT)
9396 .
Case(
"UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
9399 if (
Int == 0xffffffff)
9400 return Error(StringLoc,
"invalid dst_unused value");
9402 Operands.push_back(AMDGPUOperand::CreateImm(
this,
Int, S, AMDGPUOperand::ImmTySDWADstUnused));
9432 OptionalImmIndexMap OptionalIdx;
9433 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
9434 bool SkippedVcc =
false;
9438 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9439 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
9442 for (
unsigned E =
Operands.size();
I != E; ++
I) {
9443 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
9444 if (SkipVcc && !SkippedVcc &&
Op.isReg() &&
9445 (
Op.getReg() == AMDGPU::VCC ||
Op.getReg() == AMDGPU::VCC_LO)) {
9463 Op.addRegOrImmWithInputModsOperands(Inst, 2);
9464 }
else if (
Op.isImm()) {
9466 OptionalIdx[
Op.getImmTy()] =
I;
9474 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
9475 Opc != AMDGPU::V_NOP_sdwa_vi) {
9477 switch (BasicInstType) {
9481 AMDGPUOperand::ImmTyClampSI, 0);
9485 AMDGPUOperand::ImmTyOModSI, 0);
9489 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9493 AMDGPUOperand::ImmTySDWADstUnused,
9494 DstUnused::UNUSED_PRESERVE);
9519 llvm_unreachable(
"Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
9525 if (Inst.
getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
9526 Inst.
getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
9527 auto it = Inst.
begin();
9540#define GET_REGISTER_MATCHER
9541#define GET_MATCHER_IMPLEMENTATION
9542#define GET_MNEMONIC_SPELL_CHECKER
9543#define GET_MNEMONIC_CHECKER
9544#include "AMDGPUGenAsmMatcher.inc"
9550 return parseTokenOp(
"addr64",
Operands);
9552 return parseTokenOp(
"done",
Operands);
9554 return parseTokenOp(
"idxen",
Operands);
9556 return parseTokenOp(
"lds",
Operands);
9558 return parseTokenOp(
"offen",
Operands);
9560 return parseTokenOp(
"off",
Operands);
9562 return parseTokenOp(
"row_en",
Operands);
9564 return parseNamedBit(
"gds",
Operands, AMDGPUOperand::ImmTyGDS);
9566 return parseNamedBit(
"tfe",
Operands, AMDGPUOperand::ImmTyTFE);
9568 return tryCustomParseOperand(
Operands, MCK);
9579 AMDGPUOperand &Operand = (AMDGPUOperand&)
Op;
9582 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
9584 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
9586 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
9588 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
9590 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
9592 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
9600 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
9602 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
9603 case MCK_SOPPBrTarget:
9604 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
9605 case MCK_VReg32OrOff:
9606 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
9607 case MCK_InterpSlot:
9608 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
9609 case MCK_InterpAttr:
9610 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
9611 case MCK_InterpAttrChan:
9612 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
9614 case MCK_SReg_64_XEXEC:
9620 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
9622 return Match_InvalidOperand;
9634 if (!parseExpr(Imm)) {
9639 if (!isUInt<16>(Imm))
9640 return Error(S,
"expected a 16-bit value");
9643 AMDGPUOperand::CreateImm(
this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
9647bool AMDGPUOperand::isEndpgm()
const {
return isImmTy(ImmTyEndpgm); }
9653bool AMDGPUOperand::isSplitBarrier()
const {
return isInlinableImm(MVT::i32); }
unsigned const MachineRegisterInfo * MRI
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static unsigned getSpecialRegForName(StringRef RegName)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, unsigned OpName)
static bool IsRevOpcode(const unsigned Opcode)
static int getRegClass(RegisterKind Is, unsigned RegWidth)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, const MCRegisterInfo *MRI)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
#define LLVM_EXTERNAL_VISIBILITY
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static unsigned getOperandSize(MachineInstr &MI, unsigned Idx, MachineRegisterInfo &MRI)
static llvm::Expected< InlineInfo > decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
#define G_00B848_FWD_PROGRESS(x)
#define G_00B848_MEM_ORDERED(x)
#define G_00B848_IEEE_MODE(x)
#define G_00B848_DX10_CLAMP(x)
#define G_00B848_WGP_MODE(x)
Interface definition for SIInstrInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
unsigned unsigned DefaultVal
This file implements the SmallBitVector class.
StringSet - A set-like wrapper for the StringMap.
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
support::ulittle16_t & Lo
support::ulittle16_t & Hi
static const AMDGPUVariadicMCExpr * create(VariadicKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
Target independent representation for an assembler token.
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
bool is(TokenKind K) const
TokenKind getKind() const
This class represents an Operation in the Expression.
Base class for user error types.
Lightweight error class with error context and mandatory checking.
Tagged union holding either a T or a Error.
Class representing an expression and its matching format.
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
MCAsmParser & getParser()
Generic assembler parser interface, for use by target specific assembly parsers.
virtual MCStreamer & getStreamer()=0
Return the output streamer for the assembler.
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Context object for machine code objects.
const MCRegisterInfo * getRegisterInfo() const
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
const MCSubtargetInfo * getSubtargetInfo() const
Base class for the full range of assembler expressions which are needed for parsing.
Instances of this class represent a single low-level machine instruction.
unsigned getNumOperands() const
unsigned getOpcode() const
iterator insert(iterator I, const MCOperand &Op)
void addOperand(const MCOperand Op)
const MCOperand & getOperand(unsigned i) const
Describe properties that are true of each instruction in the target description file.
bool mayStore() const
Return true if this instruction could possibly modify memory.
Interface to description of machine instruction set.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Instances of this class represent operands of the MCInst class.
static MCOperand createReg(unsigned Reg)
static MCOperand createExpr(const MCExpr *Val)
void setReg(unsigned Reg)
Set the register number.
static MCOperand createImm(int64_t Val)
unsigned getReg() const
Returns the register number.
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
virtual bool isReg() const =0
isReg - Is this a register operand?
virtual bool isMem() const =0
isMem - Is this a memory operand?
virtual MCRegister getReg() const =0
virtual bool isToken() const =0
isToken - Is this a token operand?
virtual bool isImm() const =0
isImm - Is this an immediate operand?
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
unsigned getRegister(unsigned i) const
getRegister - Return the specified register in the class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
static constexpr unsigned NoRegister
Streaming machine code generation interface.
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
MCTargetStreamer * getTargetStreamer()
Generic base class for all target subtargets.
const FeatureBitset & getFeatureBits() const
FeatureBitset ToggleFeature(uint64_t FB)
Toggle a feature and return the re-computed feature bits.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
void setVariableValue(const MCExpr *Value)
MCTargetAsmParser - Generic interface to target specific assembly parsers.
MCSubtargetInfo & copySTI()
Create a copy of STI and return a non-const reference to it.
virtual bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
virtual bool ParseDirective(AsmToken DirectiveID)
ParseDirective - Parse a target specific assembler directive This method is deprecated,...
virtual ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
tryParseRegister - parse one register if possible
void setAvailableFeatures(const FeatureBitset &Value)
const MCSubtargetInfo & getSTI() const
virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, unsigned Kind)
Allow a target to add special case operand matching for things that tblgen doesn't/can't handle effec...
virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands)=0
ParseInstruction - Parse one assembly instruction.
virtual unsigned checkTargetMatchPredicate(MCInst &Inst)
checkTargetMatchPredicate - Validate the instruction match against any complex target predicates not ...
virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm)=0
MatchAndEmitInstruction - Recognize a series of operands of a parsed instruction as an actual MCInst ...
Target specific streamer interface.
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
Wrapper class representing virtual and physical registers.
Represents a location in source code.
static SMLoc getFromPointer(const char *Ptr)
constexpr const char * getPointer() const
constexpr bool isValid() const
Represents a range in source code.
Implements a dense probed hash-table based set with some number of buckets stored inline.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringMapEntry - This is used to represent one value that is inserted into a StringMap.
StringRef - Represent a constant reference to a string, i.e.
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
empty - Check if the string is empty.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
constexpr size_t size() const
size - Get the string size.
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
StringSet - A wrapper for StringMap that provides set-like functionality.
bool contains(StringRef key) const
Check if the set contains the given key.
std::pair< typename Base::iterator, bool > insert(StringRef key)
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
A raw_ostream that writes to an SmallVector or SmallString.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char NumVGPRs[]
Key for Kernel::CodeProps::Metadata::mNumVGPRs.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
int64_t getHwregId(StringRef Name, const MCSubtargetInfo &STI)
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
@ FIXED_NUM_SGPRS_FOR_INIT_BUG
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, std::optional< bool > EnableWavefrontSize32)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
int64_t getMsgOpId(int64_t MsgId, StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a sendmsg operation to the operation portion of the immediate encoding.
int64_t getMsgId(StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a msg_id to the message portion of the immediate encoding.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
constexpr unsigned COMPONENTS[]
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
LLVM_READNONE bool isLegalDPALU_DPPControl(unsigned DC)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const MCSubtargetInfo *STI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
unsigned mc2PseudoReg(unsigned Reg)
Convert hardware register Reg to a pseudo register.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isDPALU_DPP(const MCInstrDesc &OpDesc)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
bool isGFX9(const MCSubtargetInfo &STI)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
bool hasMAIInsts(const MCSubtargetInfo &STI)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
bool isSGPR(unsigned Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
bool isHi(unsigned Reg, const MCRegisterInfo &MRI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isInlineValue(unsigned Reg)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_V2INT32
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_INLINE_AC_V2FP16
@ OPERAND_REG_IMM_INT32
Operands with register or 32-bit immediate.
@ OPERAND_REG_IMM_BF16_DEFERRED
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_AC_BF16
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_INLINE_AC_INT16
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_V2INT16
@ OPERAND_REG_INLINE_AC_FP16
@ OPERAND_REG_INLINE_AC_INT32
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_INLINE_AC_V2BF16
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_REG_INLINE_C_V2FP32
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
@ OPERAND_REG_IMM_FP32_DEFERRED
@ OPERAND_REG_IMM_FP16_DEFERRED
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isVI(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ UNDEF
UNDEF - An undefined node.
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
Reg
All possible values of the reg field in the ModR/M byte.
std::optional< const char * > toString(const std::optional< DWARFFormValue > &V)
Take an optional DWARFFormValue and try to extract a string value from it.
Scope
Defines the scope in which this symbol should be visible: Default – Visible in the public interface o...
This is an optimization pass for GlobalISel generic memory operations.
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
int popcount(T Value) noexcept
Count the number of set bits in a value.
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
void PrintError(const Twine &Msg)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Target & getTheR600Target()
The target for R600 GPUs.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Target & getTheGCNTarget()
The target for GCN GPUs.
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
unsigned M0(unsigned Val)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool parseAmdKernelCodeField(StringRef ID, MCAsmParser &Parser, amd_kernel_code_t &C, raw_ostream &Err)
AMD Kernel Code Object (amd_kernel_code_t).
Instruction set architecture version.
const MCExpr * compute_pgm_rsrc2
const MCExpr * kernarg_size
const MCExpr * kernarg_preload
const MCExpr * compute_pgm_rsrc3
const MCExpr * private_segment_fixed_size
const MCExpr * compute_pgm_rsrc1
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
const MCExpr * group_segment_fixed_size
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
const MCExpr * kernel_code_properties
Represents the counter values to wait for in an s_waitcnt instruction.
static const fltSemantics & IEEEsingle() LLVM_READNONE
static const fltSemantics & IEEEhalf() LLVM_READNONE
static const fltSemantics & BFloat() LLVM_READNONE
opStatus
IEEE-754R 7: Default exception handling.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Description of the encoding of one expression Op.
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...
uint32_t group_segment_fixed_size
uint32_t private_segment_fixed_size