54enum RegisterKind { IS_UNKNOWN,
IS_VGPR, IS_SGPR,
IS_AGPR, IS_TTMP, IS_SPECIAL };
68 SMLoc StartLoc, EndLoc;
69 const AMDGPUAsmParser *AsmParser;
72 AMDGPUOperand(KindTy Kind_,
const AMDGPUAsmParser *AsmParser_)
73 :
Kind(Kind_), AsmParser(AsmParser_) {}
75 using Ptr = std::unique_ptr<AMDGPUOperand>;
83 bool hasFPModifiers()
const {
return Abs || Neg; }
84 bool hasIntModifiers()
const {
return Sext; }
85 bool hasModifiers()
const {
return hasFPModifiers() || hasIntModifiers(); }
87 int64_t getFPModifiersOperand()
const {
94 int64_t getIntModifiersOperand()
const {
100 int64_t getModifiersOperand()
const {
101 assert(!(hasFPModifiers() && hasIntModifiers())
102 &&
"fp and int modifiers should not be used simultaneously");
103 if (hasFPModifiers())
104 return getFPModifiersOperand();
105 if (hasIntModifiers())
106 return getIntModifiersOperand();
187 ImmKindTyMandatoryLiteral,
201 mutable ImmKindTy
Kind;
218 bool isToken()
const override {
return Kind == Token; }
220 bool isSymbolRefExpr()
const {
221 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
224 bool isImm()
const override {
225 return Kind == Immediate;
228 void setImmKindNone()
const {
230 Imm.Kind = ImmKindTyNone;
233 void setImmKindLiteral()
const {
235 Imm.Kind = ImmKindTyLiteral;
238 void setImmKindMandatoryLiteral()
const {
240 Imm.Kind = ImmKindTyMandatoryLiteral;
243 void setImmKindConst()
const {
245 Imm.Kind = ImmKindTyConst;
248 bool IsImmKindLiteral()
const {
249 return isImm() &&
Imm.Kind == ImmKindTyLiteral;
252 bool IsImmKindMandatoryLiteral()
const {
253 return isImm() &&
Imm.Kind == ImmKindTyMandatoryLiteral;
256 bool isImmKindConst()
const {
257 return isImm() &&
Imm.Kind == ImmKindTyConst;
260 bool isInlinableImm(
MVT type)
const;
261 bool isLiteralImm(
MVT type)
const;
263 bool isRegKind()
const {
267 bool isReg()
const override {
268 return isRegKind() && !hasModifiers();
271 bool isRegOrInline(
unsigned RCID,
MVT type)
const {
272 return isRegClass(RCID) || isInlinableImm(type);
276 return isRegOrInline(RCID, type) || isLiteralImm(type);
279 bool isRegOrImmWithInt16InputMods()
const {
283 bool isRegOrImmWithIntT16InputMods()
const {
287 bool isRegOrImmWithInt32InputMods()
const {
291 bool isRegOrInlineImmWithInt16InputMods()
const {
292 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
295 bool isRegOrInlineImmWithInt32InputMods()
const {
296 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
299 bool isRegOrImmWithInt64InputMods()
const {
303 bool isRegOrImmWithFP16InputMods()
const {
307 bool isRegOrImmWithFPT16InputMods()
const {
311 bool isRegOrImmWithFP32InputMods()
const {
315 bool isRegOrImmWithFP64InputMods()
const {
319 template <
bool IsFake16>
bool isRegOrInlineImmWithFP16InputMods()
const {
320 return isRegOrInline(
321 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
324 bool isRegOrInlineImmWithFP32InputMods()
const {
325 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
328 bool isPackedFP16InputMods()
const {
332 bool isVReg()
const {
333 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
334 isRegClass(AMDGPU::VReg_64RegClassID) ||
335 isRegClass(AMDGPU::VReg_96RegClassID) ||
336 isRegClass(AMDGPU::VReg_128RegClassID) ||
337 isRegClass(AMDGPU::VReg_160RegClassID) ||
338 isRegClass(AMDGPU::VReg_192RegClassID) ||
339 isRegClass(AMDGPU::VReg_256RegClassID) ||
340 isRegClass(AMDGPU::VReg_512RegClassID) ||
341 isRegClass(AMDGPU::VReg_1024RegClassID);
344 bool isVReg32()
const {
345 return isRegClass(AMDGPU::VGPR_32RegClassID);
348 bool isVReg32OrOff()
const {
349 return isOff() || isVReg32();
352 bool isNull()
const {
353 return isRegKind() &&
getReg() == AMDGPU::SGPR_NULL;
356 bool isVRegWithInputMods()
const;
357 template <
bool IsFake16>
bool isT16VRegWithInputMods()
const;
359 bool isSDWAOperand(
MVT type)
const;
360 bool isSDWAFP16Operand()
const;
361 bool isSDWAFP32Operand()
const;
362 bool isSDWAInt16Operand()
const;
363 bool isSDWAInt32Operand()
const;
365 bool isImmTy(ImmTy ImmT)
const {
369 template <ImmTy Ty>
bool isImmTy()
const {
return isImmTy(Ty); }
371 bool isImmLiteral()
const {
return isImmTy(ImmTyNone); }
373 bool isImmModifier()
const {
374 return isImm() &&
Imm.Type != ImmTyNone;
377 bool isOModSI()
const {
return isImmTy(ImmTyOModSI); }
378 bool isDim()
const {
return isImmTy(ImmTyDim); }
379 bool isR128A16()
const {
return isImmTy(ImmTyR128A16); }
380 bool isOff()
const {
return isImmTy(ImmTyOff); }
381 bool isExpTgt()
const {
return isImmTy(ImmTyExpTgt); }
382 bool isOffen()
const {
return isImmTy(ImmTyOffen); }
383 bool isIdxen()
const {
return isImmTy(ImmTyIdxen); }
384 bool isAddr64()
const {
return isImmTy(ImmTyAddr64); }
385 bool isSMEMOffsetMod()
const {
return isImmTy(ImmTySMEMOffsetMod); }
386 bool isFlatOffset()
const {
return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
387 bool isGDS()
const {
return isImmTy(ImmTyGDS); }
388 bool isLDS()
const {
return isImmTy(ImmTyLDS); }
389 bool isCPol()
const {
return isImmTy(ImmTyCPol); }
390 bool isIndexKey8bit()
const {
return isImmTy(ImmTyIndexKey8bit); }
391 bool isIndexKey16bit()
const {
return isImmTy(ImmTyIndexKey16bit); }
392 bool isTFE()
const {
return isImmTy(ImmTyTFE); }
393 bool isFORMAT()
const {
return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
394 bool isDppFI()
const {
return isImmTy(ImmTyDppFI); }
395 bool isSDWADstSel()
const {
return isImmTy(ImmTySDWADstSel); }
396 bool isSDWASrc0Sel()
const {
return isImmTy(ImmTySDWASrc0Sel); }
397 bool isSDWASrc1Sel()
const {
return isImmTy(ImmTySDWASrc1Sel); }
398 bool isSDWADstUnused()
const {
return isImmTy(ImmTySDWADstUnused); }
399 bool isInterpSlot()
const {
return isImmTy(ImmTyInterpSlot); }
400 bool isInterpAttr()
const {
return isImmTy(ImmTyInterpAttr); }
401 bool isInterpAttrChan()
const {
return isImmTy(ImmTyInterpAttrChan); }
402 bool isOpSel()
const {
return isImmTy(ImmTyOpSel); }
403 bool isOpSelHi()
const {
return isImmTy(ImmTyOpSelHi); }
404 bool isNegLo()
const {
return isImmTy(ImmTyNegLo); }
405 bool isNegHi()
const {
return isImmTy(ImmTyNegHi); }
407 bool isRegOrImm()
const {
411 bool isRegClass(
unsigned RCID)
const;
415 bool isRegOrInlineNoMods(
unsigned RCID,
MVT type)
const {
416 return isRegOrInline(RCID, type) && !hasModifiers();
419 bool isSCSrcB16()
const {
420 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
423 bool isSCSrcV2B16()
const {
427 bool isSCSrc_b32()
const {
428 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
431 bool isSCSrc_b64()
const {
432 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
435 bool isBoolReg()
const;
437 bool isSCSrcF16()
const {
438 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
441 bool isSCSrcV2F16()
const {
445 bool isSCSrcF32()
const {
446 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
449 bool isSCSrcF64()
const {
450 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
453 bool isSSrc_b32()
const {
454 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
457 bool isSSrc_b16()
const {
return isSCSrcB16() || isLiteralImm(MVT::i16); }
459 bool isSSrcV2B16()
const {
464 bool isSSrc_b64()
const {
467 return isSCSrc_b64() || isLiteralImm(MVT::i64);
470 bool isSSrc_f32()
const {
471 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
474 bool isSSrcF64()
const {
return isSCSrc_b64() || isLiteralImm(MVT::f64); }
476 bool isSSrc_bf16()
const {
return isSCSrcB16() || isLiteralImm(MVT::bf16); }
478 bool isSSrc_f16()
const {
return isSCSrcB16() || isLiteralImm(MVT::f16); }
480 bool isSSrcV2F16()
const {
485 bool isSSrcV2FP32()
const {
490 bool isSCSrcV2FP32()
const {
495 bool isSSrcV2INT32()
const {
500 bool isSCSrcV2INT32()
const {
502 return isSCSrc_b32();
505 bool isSSrcOrLds_b32()
const {
506 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
507 isLiteralImm(MVT::i32) || isExpr();
510 bool isVCSrc_b32()
const {
511 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
514 bool isVCSrcB64()
const {
515 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
518 bool isVCSrcTB16()
const {
519 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
522 bool isVCSrcTB16_Lo128()
const {
523 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
526 bool isVCSrcFake16B16_Lo128()
const {
527 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
530 bool isVCSrc_b16()
const {
531 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
534 bool isVCSrc_v2b16()
const {
return isVCSrc_b16(); }
536 bool isVCSrc_f32()
const {
537 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
540 bool isVCSrcF64()
const {
541 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
544 bool isVCSrcTBF16()
const {
545 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
548 bool isVCSrcTF16()
const {
549 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
552 bool isVCSrcTBF16_Lo128()
const {
553 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
556 bool isVCSrcTF16_Lo128()
const {
557 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
560 bool isVCSrcFake16BF16_Lo128()
const {
561 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
564 bool isVCSrcFake16F16_Lo128()
const {
565 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
568 bool isVCSrc_bf16()
const {
569 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
572 bool isVCSrc_f16()
const {
573 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
576 bool isVCSrc_v2bf16()
const {
return isVCSrc_bf16(); }
578 bool isVCSrc_v2f16()
const {
return isVCSrc_f16(); }
580 bool isVSrc_b32()
const {
581 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
584 bool isVSrc_b64()
const {
return isVCSrcF64() || isLiteralImm(MVT::i64); }
586 bool isVSrcT_b16()
const {
return isVCSrcTB16() || isLiteralImm(MVT::i16); }
588 bool isVSrcT_b16_Lo128()
const {
589 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
592 bool isVSrcFake16_b16_Lo128()
const {
593 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
596 bool isVSrc_b16()
const {
return isVCSrc_b16() || isLiteralImm(MVT::i16); }
598 bool isVSrc_v2b16()
const {
return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
600 bool isVCSrcV2FP32()
const {
604 bool isVSrc_v2f32()
const {
return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
606 bool isVCSrcV2INT32()
const {
610 bool isVSrc_v2b32()
const {
return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
612 bool isVSrc_f32()
const {
613 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
616 bool isVSrc_f64()
const {
return isVCSrcF64() || isLiteralImm(MVT::f64); }
618 bool isVSrcT_bf16()
const {
return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
620 bool isVSrcT_f16()
const {
return isVCSrcTF16() || isLiteralImm(MVT::f16); }
622 bool isVSrcT_bf16_Lo128()
const {
623 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
626 bool isVSrcT_f16_Lo128()
const {
627 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
630 bool isVSrcFake16_bf16_Lo128()
const {
631 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
634 bool isVSrcFake16_f16_Lo128()
const {
635 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
638 bool isVSrc_bf16()
const {
return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
640 bool isVSrc_f16()
const {
return isVCSrc_f16() || isLiteralImm(MVT::f16); }
642 bool isVSrc_v2bf16()
const {
643 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
646 bool isVSrc_v2f16()
const {
return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
648 bool isVISrcB32()
const {
649 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
652 bool isVISrcB16()
const {
653 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
656 bool isVISrcV2B16()
const {
660 bool isVISrcF32()
const {
661 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
664 bool isVISrcF16()
const {
665 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
668 bool isVISrcV2F16()
const {
669 return isVISrcF16() || isVISrcB32();
672 bool isVISrc_64_bf16()
const {
673 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
676 bool isVISrc_64_f16()
const {
677 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
680 bool isVISrc_64_b32()
const {
681 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
684 bool isVISrc_64B64()
const {
685 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
688 bool isVISrc_64_f64()
const {
689 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
692 bool isVISrc_64V2FP32()
const {
693 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
696 bool isVISrc_64V2INT32()
const {
697 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
700 bool isVISrc_256_b32()
const {
701 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
704 bool isVISrc_256_f32()
const {
705 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
708 bool isVISrc_256B64()
const {
709 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
712 bool isVISrc_256_f64()
const {
713 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
716 bool isVISrc_128B16()
const {
717 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
720 bool isVISrc_128V2B16()
const {
721 return isVISrc_128B16();
724 bool isVISrc_128_b32()
const {
725 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
728 bool isVISrc_128_f32()
const {
729 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
732 bool isVISrc_256V2FP32()
const {
733 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
736 bool isVISrc_256V2INT32()
const {
737 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
740 bool isVISrc_512_b32()
const {
741 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
744 bool isVISrc_512B16()
const {
745 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
748 bool isVISrc_512V2B16()
const {
749 return isVISrc_512B16();
752 bool isVISrc_512_f32()
const {
753 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
756 bool isVISrc_512F16()
const {
757 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
760 bool isVISrc_512V2F16()
const {
761 return isVISrc_512F16() || isVISrc_512_b32();
764 bool isVISrc_1024_b32()
const {
765 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
768 bool isVISrc_1024B16()
const {
769 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
772 bool isVISrc_1024V2B16()
const {
773 return isVISrc_1024B16();
776 bool isVISrc_1024_f32()
const {
777 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
780 bool isVISrc_1024F16()
const {
781 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
784 bool isVISrc_1024V2F16()
const {
785 return isVISrc_1024F16() || isVISrc_1024_b32();
788 bool isAISrcB32()
const {
789 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
792 bool isAISrcB16()
const {
793 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
796 bool isAISrcV2B16()
const {
800 bool isAISrcF32()
const {
801 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
804 bool isAISrcF16()
const {
805 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
808 bool isAISrcV2F16()
const {
809 return isAISrcF16() || isAISrcB32();
812 bool isAISrc_64B64()
const {
813 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
816 bool isAISrc_64_f64()
const {
817 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
820 bool isAISrc_128_b32()
const {
821 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
824 bool isAISrc_128B16()
const {
825 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
828 bool isAISrc_128V2B16()
const {
829 return isAISrc_128B16();
832 bool isAISrc_128_f32()
const {
833 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
836 bool isAISrc_128F16()
const {
837 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
840 bool isAISrc_128V2F16()
const {
841 return isAISrc_128F16() || isAISrc_128_b32();
844 bool isVISrc_128_bf16()
const {
845 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
848 bool isVISrc_128_f16()
const {
849 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
852 bool isVISrc_128V2F16()
const {
853 return isVISrc_128_f16() || isVISrc_128_b32();
856 bool isAISrc_256B64()
const {
857 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
860 bool isAISrc_256_f64()
const {
861 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
864 bool isAISrc_512_b32()
const {
865 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
868 bool isAISrc_512B16()
const {
869 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
872 bool isAISrc_512V2B16()
const {
873 return isAISrc_512B16();
876 bool isAISrc_512_f32()
const {
877 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
880 bool isAISrc_512F16()
const {
881 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
884 bool isAISrc_512V2F16()
const {
885 return isAISrc_512F16() || isAISrc_512_b32();
888 bool isAISrc_1024_b32()
const {
889 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
892 bool isAISrc_1024B16()
const {
893 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
896 bool isAISrc_1024V2B16()
const {
897 return isAISrc_1024B16();
900 bool isAISrc_1024_f32()
const {
901 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
904 bool isAISrc_1024F16()
const {
905 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
908 bool isAISrc_1024V2F16()
const {
909 return isAISrc_1024F16() || isAISrc_1024_b32();
912 bool isKImmFP32()
const {
913 return isLiteralImm(MVT::f32);
916 bool isKImmFP16()
const {
917 return isLiteralImm(MVT::f16);
920 bool isMem()
const override {
924 bool isExpr()
const {
928 bool isSOPPBrTarget()
const {
return isExpr() ||
isImm(); }
930 bool isSWaitCnt()
const;
931 bool isDepCtr()
const;
932 bool isSDelayALU()
const;
933 bool isHwreg()
const;
934 bool isSendMsg()
const;
935 bool isSplitBarrier()
const;
936 bool isSwizzle()
const;
937 bool isSMRDOffset8()
const;
938 bool isSMEMOffset()
const;
939 bool isSMRDLiteralOffset()
const;
941 bool isDPPCtrl()
const;
943 bool isGPRIdxMode()
const;
944 bool isS16Imm()
const;
945 bool isU16Imm()
const;
946 bool isEndpgm()
const;
948 auto getPredicate(std::function<
bool(
const AMDGPUOperand &
Op)>
P)
const {
949 return [=](){
return P(*
this); };
957 int64_t getImm()
const {
962 void setImm(int64_t Val) {
967 ImmTy getImmTy()
const {
977 SMLoc getStartLoc()
const override {
981 SMLoc getEndLoc()
const override {
986 return SMRange(StartLoc, EndLoc);
989 Modifiers getModifiers()
const {
990 assert(isRegKind() || isImmTy(ImmTyNone));
991 return isRegKind() ?
Reg.Mods :
Imm.Mods;
994 void setModifiers(Modifiers Mods) {
995 assert(isRegKind() || isImmTy(ImmTyNone));
1002 bool hasModifiers()
const {
1003 return getModifiers().hasModifiers();
1006 bool hasFPModifiers()
const {
1007 return getModifiers().hasFPModifiers();
1010 bool hasIntModifiers()
const {
1011 return getModifiers().hasIntModifiers();
1016 void addImmOperands(
MCInst &Inst,
unsigned N,
bool ApplyModifiers =
true)
const;
1018 void addLiteralImmOperand(
MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const;
1020 void addRegOperands(
MCInst &Inst,
unsigned N)
const;
1022 void addRegOrImmOperands(
MCInst &Inst,
unsigned N)
const {
1024 addRegOperands(Inst,
N);
1026 addImmOperands(Inst,
N);
1029 void addRegOrImmWithInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1030 Modifiers Mods = getModifiers();
1033 addRegOperands(Inst,
N);
1035 addImmOperands(Inst,
N,
false);
1039 void addRegOrImmWithFPInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1040 assert(!hasIntModifiers());
1041 addRegOrImmWithInputModsOperands(Inst,
N);
1044 void addRegOrImmWithIntInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1045 assert(!hasFPModifiers());
1046 addRegOrImmWithInputModsOperands(Inst,
N);
1049 void addRegWithInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1050 Modifiers Mods = getModifiers();
1053 addRegOperands(Inst,
N);
1056 void addRegWithFPInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1057 assert(!hasIntModifiers());
1058 addRegWithInputModsOperands(Inst,
N);
1061 void addRegWithIntInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1062 assert(!hasFPModifiers());
1063 addRegWithInputModsOperands(Inst,
N);
1069 case ImmTyNone:
OS <<
"None";
break;
1070 case ImmTyGDS:
OS <<
"GDS";
break;
1071 case ImmTyLDS:
OS <<
"LDS";
break;
1072 case ImmTyOffen:
OS <<
"Offen";
break;
1073 case ImmTyIdxen:
OS <<
"Idxen";
break;
1074 case ImmTyAddr64:
OS <<
"Addr64";
break;
1075 case ImmTyOffset:
OS <<
"Offset";
break;
1076 case ImmTyInstOffset:
OS <<
"InstOffset";
break;
1077 case ImmTyOffset0:
OS <<
"Offset0";
break;
1078 case ImmTyOffset1:
OS <<
"Offset1";
break;
1079 case ImmTySMEMOffsetMod:
OS <<
"SMEMOffsetMod";
break;
1080 case ImmTyCPol:
OS <<
"CPol";
break;
1081 case ImmTyIndexKey8bit:
OS <<
"index_key";
break;
1082 case ImmTyIndexKey16bit:
OS <<
"index_key";
break;
1083 case ImmTyTFE:
OS <<
"TFE";
break;
1084 case ImmTyD16:
OS <<
"D16";
break;
1085 case ImmTyFORMAT:
OS <<
"FORMAT";
break;
1086 case ImmTyClamp:
OS <<
"Clamp";
break;
1087 case ImmTyOModSI:
OS <<
"OModSI";
break;
1088 case ImmTyDPP8:
OS <<
"DPP8";
break;
1089 case ImmTyDppCtrl:
OS <<
"DppCtrl";
break;
1090 case ImmTyDppRowMask:
OS <<
"DppRowMask";
break;
1091 case ImmTyDppBankMask:
OS <<
"DppBankMask";
break;
1092 case ImmTyDppBoundCtrl:
OS <<
"DppBoundCtrl";
break;
1093 case ImmTyDppFI:
OS <<
"DppFI";
break;
1094 case ImmTySDWADstSel:
OS <<
"SDWADstSel";
break;
1095 case ImmTySDWASrc0Sel:
OS <<
"SDWASrc0Sel";
break;
1096 case ImmTySDWASrc1Sel:
OS <<
"SDWASrc1Sel";
break;
1097 case ImmTySDWADstUnused:
OS <<
"SDWADstUnused";
break;
1098 case ImmTyDMask:
OS <<
"DMask";
break;
1099 case ImmTyDim:
OS <<
"Dim";
break;
1100 case ImmTyUNorm:
OS <<
"UNorm";
break;
1101 case ImmTyDA:
OS <<
"DA";
break;
1102 case ImmTyR128A16:
OS <<
"R128A16";
break;
1103 case ImmTyA16:
OS <<
"A16";
break;
1104 case ImmTyLWE:
OS <<
"LWE";
break;
1105 case ImmTyOff:
OS <<
"Off";
break;
1106 case ImmTyExpTgt:
OS <<
"ExpTgt";
break;
1107 case ImmTyExpCompr:
OS <<
"ExpCompr";
break;
1108 case ImmTyExpVM:
OS <<
"ExpVM";
break;
1109 case ImmTyHwreg:
OS <<
"Hwreg";
break;
1110 case ImmTySendMsg:
OS <<
"SendMsg";
break;
1111 case ImmTyInterpSlot:
OS <<
"InterpSlot";
break;
1112 case ImmTyInterpAttr:
OS <<
"InterpAttr";
break;
1113 case ImmTyInterpAttrChan:
OS <<
"InterpAttrChan";
break;
1114 case ImmTyOpSel:
OS <<
"OpSel";
break;
1115 case ImmTyOpSelHi:
OS <<
"OpSelHi";
break;
1116 case ImmTyNegLo:
OS <<
"NegLo";
break;
1117 case ImmTyNegHi:
OS <<
"NegHi";
break;
1118 case ImmTySwizzle:
OS <<
"Swizzle";
break;
1119 case ImmTyGprIdxMode:
OS <<
"GprIdxMode";
break;
1120 case ImmTyHigh:
OS <<
"High";
break;
1121 case ImmTyBLGP:
OS <<
"BLGP";
break;
1122 case ImmTyCBSZ:
OS <<
"CBSZ";
break;
1123 case ImmTyABID:
OS <<
"ABID";
break;
1124 case ImmTyEndpgm:
OS <<
"Endpgm";
break;
1125 case ImmTyWaitVDST:
OS <<
"WaitVDST";
break;
1126 case ImmTyWaitEXP:
OS <<
"WaitEXP";
break;
1127 case ImmTyWaitVAVDst:
OS <<
"WaitVAVDst";
break;
1128 case ImmTyWaitVMVSrc:
OS <<
"WaitVMVSrc";
break;
1129 case ImmTyByteSel:
OS <<
"ByteSel" ;
break;
1138 <<
" mods: " <<
Reg.Mods <<
'>';
1141 OS <<
'<' << getImm();
1142 if (getImmTy() != ImmTyNone) {
1143 OS <<
" type: "; printImmTy(
OS, getImmTy());
1145 OS <<
" mods: " <<
Imm.Mods <<
'>';
1148 OS <<
'\'' << getToken() <<
'\'';
1151 OS <<
"<expr " << *Expr <<
'>';
1156 static AMDGPUOperand::Ptr CreateImm(
const AMDGPUAsmParser *AsmParser,
1157 int64_t Val,
SMLoc Loc,
1158 ImmTy
Type = ImmTyNone,
1159 bool IsFPImm =
false) {
1160 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1162 Op->Imm.IsFPImm = IsFPImm;
1163 Op->Imm.Kind = ImmKindTyNone;
1165 Op->Imm.Mods = Modifiers();
1171 static AMDGPUOperand::Ptr CreateToken(
const AMDGPUAsmParser *AsmParser,
1173 bool HasExplicitEncodingSize =
true) {
1174 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1175 Res->Tok.Data = Str.data();
1176 Res->Tok.Length = Str.size();
1177 Res->StartLoc = Loc;
1182 static AMDGPUOperand::Ptr CreateReg(
const AMDGPUAsmParser *AsmParser,
1183 unsigned RegNo,
SMLoc S,
1185 auto Op = std::make_unique<AMDGPUOperand>(
Register, AsmParser);
1186 Op->Reg.RegNo = RegNo;
1187 Op->Reg.Mods = Modifiers();
1193 static AMDGPUOperand::Ptr CreateExpr(
const AMDGPUAsmParser *AsmParser,
1195 auto Op = std::make_unique<AMDGPUOperand>(
Expression, AsmParser);
1204 OS <<
"abs:" << Mods.Abs <<
" neg: " << Mods.Neg <<
" sext:" << Mods.Sext;
1215class KernelScopeInfo {
1216 int SgprIndexUnusedMin = -1;
1217 int VgprIndexUnusedMin = -1;
1218 int AgprIndexUnusedMin = -1;
1222 void usesSgprAt(
int i) {
1223 if (i >= SgprIndexUnusedMin) {
1224 SgprIndexUnusedMin = ++i;
1233 void usesVgprAt(
int i) {
1234 if (i >= VgprIndexUnusedMin) {
1235 VgprIndexUnusedMin = ++i;
1240 VgprIndexUnusedMin);
1246 void usesAgprAt(
int i) {
1251 if (i >= AgprIndexUnusedMin) {
1252 AgprIndexUnusedMin = ++i;
1262 VgprIndexUnusedMin);
1269 KernelScopeInfo() =
default;
1275 usesSgprAt(SgprIndexUnusedMin = -1);
1276 usesVgprAt(VgprIndexUnusedMin = -1);
1278 usesAgprAt(AgprIndexUnusedMin = -1);
1282 void usesRegister(RegisterKind RegKind,
unsigned DwordRegIndex,
1283 unsigned RegWidth) {
1286 usesSgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1289 usesAgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1292 usesVgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1303 unsigned ForcedEncodingSize = 0;
1304 bool ForcedDPP =
false;
1305 bool ForcedSDWA =
false;
1306 KernelScopeInfo KernelScope;
1311#define GET_ASSEMBLER_HEADER
1312#include "AMDGPUGenAsmMatcher.inc"
1317 void createConstantSymbol(
StringRef Id, int64_t Val);
1319 bool ParseAsAbsoluteExpression(
uint32_t &Ret);
1337 const MCExpr *FlatScrUsed,
bool XNACKUsed,
1338 std::optional<bool> EnableWavefrontSize32,
1342 bool ParseDirectiveAMDGCNTarget();
1343 bool ParseDirectiveAMDHSACodeObjectVersion();
1344 bool ParseDirectiveAMDHSAKernel();
1346 bool ParseDirectiveAMDKernelCodeT();
1349 bool ParseDirectiveAMDGPUHsaKernel();
1351 bool ParseDirectiveISAVersion();
1352 bool ParseDirectiveHSAMetadata();
1353 bool ParseDirectivePALMetadataBegin();
1354 bool ParseDirectivePALMetadata();
1355 bool ParseDirectiveAMDGPULDS();
1359 bool ParseToEndDirective(
const char *AssemblerDirectiveBegin,
1360 const char *AssemblerDirectiveEnd,
1361 std::string &CollectString);
1363 bool AddNextRegisterToList(
unsigned& Reg,
unsigned& RegWidth,
1364 RegisterKind RegKind,
unsigned Reg1,
SMLoc Loc);
1365 bool ParseAMDGPURegister(RegisterKind &RegKind,
unsigned &Reg,
1366 unsigned &RegNum,
unsigned &RegWidth,
1367 bool RestoreOnFailure =
false);
1368 bool ParseAMDGPURegister(RegisterKind &RegKind,
unsigned &Reg,
1369 unsigned &RegNum,
unsigned &RegWidth,
1371 unsigned ParseRegularReg(RegisterKind &RegKind,
unsigned &RegNum,
1374 unsigned ParseSpecialReg(RegisterKind &RegKind,
unsigned &RegNum,
1377 unsigned ParseRegList(RegisterKind &RegKind,
unsigned &RegNum,
1379 bool ParseRegRange(
unsigned& Num,
unsigned& Width);
1380 unsigned getRegularReg(RegisterKind RegKind,
unsigned RegNum,
unsigned SubReg,
1381 unsigned RegWidth,
SMLoc Loc);
1385 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1386 void initializeGprCountSymbol(RegisterKind RegKind);
1387 bool updateGprCountSymbols(RegisterKind RegKind,
unsigned DwordRegIndex,
1394 OperandMode_Default,
1398 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1406 if (getFeatureBits().
none()) {
1412 if (!FB[AMDGPU::FeatureWavefrontSize64] &&
1413 !FB[AMDGPU::FeatureWavefrontSize32]) {
1424 createConstantSymbol(
".amdgcn.gfx_generation_number",
ISA.Major);
1425 createConstantSymbol(
".amdgcn.gfx_generation_minor",
ISA.Minor);
1426 createConstantSymbol(
".amdgcn.gfx_generation_stepping",
ISA.Stepping);
1428 createConstantSymbol(
".option.machine_version_major",
ISA.Major);
1429 createConstantSymbol(
".option.machine_version_minor",
ISA.Minor);
1430 createConstantSymbol(
".option.machine_version_stepping",
ISA.Stepping);
1433 initializeGprCountSymbol(IS_VGPR);
1434 initializeGprCountSymbol(IS_SGPR);
1439 createConstantSymbol(Symbol, Code);
1441 createConstantSymbol(
"UC_VERSION_W64_BIT", 0x2000);
1442 createConstantSymbol(
"UC_VERSION_W32_BIT", 0x4000);
1443 createConstantSymbol(
"UC_VERSION_MDP_BIT", 0x8000);
1513 bool hasInv2PiInlineImm()
const {
1514 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1517 bool hasFlatOffsets()
const {
1518 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1522 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1525 bool hasSGPR102_SGPR103()
const {
1529 bool hasSGPR104_SGPR105()
const {
return isGFX10Plus(); }
1531 bool hasIntClamp()
const {
1532 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1535 bool hasPartialNSAEncoding()
const {
1536 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1568 void setForcedEncodingSize(
unsigned Size) { ForcedEncodingSize =
Size; }
1569 void setForcedDPP(
bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1570 void setForcedSDWA(
bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1572 unsigned getForcedEncodingSize()
const {
return ForcedEncodingSize; }
1573 bool isForcedVOP3()
const {
return ForcedEncodingSize == 64; }
1574 bool isForcedDPP()
const {
return ForcedDPP; }
1575 bool isForcedSDWA()
const {
return ForcedSDWA; }
1577 StringRef getMatchedVariantName()
const;
1579 std::unique_ptr<AMDGPUOperand>
parseRegister(
bool RestoreOnFailure =
false);
1581 bool RestoreOnFailure);
1584 SMLoc &EndLoc)
override;
1587 unsigned Kind)
override;
1591 bool MatchingInlineAsm)
override;
1594 OperandMode Mode = OperandMode_Default);
1602 ParseStatus parseIntWithPrefix(
const char *Prefix, int64_t &
Int);
1606 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1607 std::function<
bool(int64_t &)> ConvertResult =
nullptr);
1611 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1612 bool (*ConvertResult)(int64_t &) =
nullptr);
1616 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1630 AMDGPUOperand::ImmTy
Type);
1633 bool isOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1634 bool isRegOrOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1635 bool isNamedOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1636 bool isOpcodeModifierWithVal(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1637 bool parseSP3NegModifier();
1639 bool HasLit =
false);
1642 bool HasLit =
false);
1644 bool AllowImm =
true);
1646 bool AllowImm =
true);
1651 AMDGPUOperand::ImmTy ImmTy);
1662 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1667 bool tryParseFmt(
const char *Pref, int64_t MaxVal, int64_t &Val);
1668 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt,
StringRef FormatStr,
SMLoc Loc);
1672 bool parseCnt(int64_t &IntVal);
1675 bool parseDepCtr(int64_t &IntVal,
unsigned &Mask);
1679 bool parseDelay(int64_t &Delay);
1685 struct OperandInfoTy {
1688 bool IsSymbolic =
false;
1689 bool IsDefined =
false;
1691 OperandInfoTy(int64_t Val) : Val(Val) {}
1694 struct StructuredOpField : OperandInfoTy {
1698 bool IsDefined =
false;
1703 virtual ~StructuredOpField() =
default;
1705 bool Error(AMDGPUAsmParser &Parser,
const Twine &Err)
const {
1706 Parser.Error(Loc,
"invalid " +
Desc +
": " + Err);
1710 virtual bool validate(AMDGPUAsmParser &Parser)
const {
1712 return Error(Parser,
"not supported on this GPU");
1714 return Error(Parser,
"only " +
Twine(Width) +
"-bit values are legal");
1722 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &
Op, OperandInfoTy &Stream);
1723 bool validateSendMsg(
const OperandInfoTy &Msg,
1724 const OperandInfoTy &
Op,
1725 const OperandInfoTy &Stream);
1728 OperandInfoTy &Width);
1734 SMLoc getOperandLoc(std::function<
bool(
const AMDGPUOperand&)>
Test,
1739 bool SearchMandatoryLiterals =
false)
const;
1748 bool validateSOPLiteral(
const MCInst &Inst)
const;
1750 bool validateVOPDRegBankConstraints(
const MCInst &Inst,
1752 bool validateIntClampSupported(
const MCInst &Inst);
1753 bool validateMIMGAtomicDMask(
const MCInst &Inst);
1754 bool validateMIMGGatherDMask(
const MCInst &Inst);
1756 bool validateMIMGDataSize(
const MCInst &Inst,
const SMLoc &IDLoc);
1757 bool validateMIMGAddrSize(
const MCInst &Inst,
const SMLoc &IDLoc);
1758 bool validateMIMGD16(
const MCInst &Inst);
1760 bool validateMIMGMSAA(
const MCInst &Inst);
1761 bool validateOpSel(
const MCInst &Inst);
1764 bool validateVccOperand(
unsigned Reg)
const;
1769 bool validateAGPRLdSt(
const MCInst &Inst)
const;
1770 bool validateVGPRAlign(
const MCInst &Inst)
const;
1774 bool validateDivScale(
const MCInst &Inst);
1777 const SMLoc &IDLoc);
1779 const unsigned CPol);
1781 std::optional<StringRef> validateLdsDirect(
const MCInst &Inst);
1782 unsigned getConstantBusLimit(
unsigned Opcode)
const;
1783 bool usesConstantBus(
const MCInst &Inst,
unsigned OpIdx);
1784 bool isInlineConstant(
const MCInst &Inst,
unsigned OpIdx)
const;
1785 unsigned findImplicitSGPRReadInVOP(
const MCInst &Inst)
const;
1811 AsmToken peekToken(
bool ShouldSkipSpace =
true);
1813 SMLoc getLoc()
const;
1817 void onBeginOfFile()
override;
1818 bool parsePrimaryExpr(
const MCExpr *&Res,
SMLoc &EndLoc)
override;
1829 bool parseSwizzleOperand(int64_t &
Op,
1830 const unsigned MinVal,
1831 const unsigned MaxVal,
1834 bool parseSwizzleOperands(
const unsigned OpNum, int64_t*
Op,
1835 const unsigned MinVal,
1836 const unsigned MaxVal,
1839 bool parseSwizzleOffset(int64_t &Imm);
1840 bool parseSwizzleMacro(int64_t &Imm);
1841 bool parseSwizzleQuadPerm(int64_t &Imm);
1842 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1843 bool parseSwizzleBroadcast(int64_t &Imm);
1844 bool parseSwizzleSwap(int64_t &Imm);
1845 bool parseSwizzleReverse(int64_t &Imm);
1848 int64_t parseGPRIdxMacro();
1856 OptionalImmIndexMap &OptionalIdx);
1864 OptionalImmIndexMap &OptionalIdx);
1866 OptionalImmIndexMap &OptionalIdx);
1871 bool parseDimId(
unsigned &Encoding);
1873 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1877 int64_t parseDPPCtrlSel(
StringRef Ctrl);
1878 int64_t parseDPPCtrlPerm();
1884 bool IsDPP8 =
false);
1890 AMDGPUOperand::ImmTy
Type);
1899 bool SkipDstVcc =
false,
1900 bool SkipSrcVcc =
false);
1913 return &APFloat::IEEEsingle();
1915 return &APFloat::IEEEdouble();
1917 return &APFloat::IEEEhalf();
1950 return &APFloat::IEEEsingle();
1956 return &APFloat::IEEEdouble();
1965 return &APFloat::IEEEhalf();
1973 return &APFloat::BFloat();
1988 APFloat::rmNearestTiesToEven,
1991 if (
Status != APFloat::opOK &&
1993 ((
Status & APFloat::opOverflow) != 0 ||
1994 (
Status & APFloat::opUnderflow) != 0)) {
2017bool AMDGPUOperand::isInlinableImm(
MVT type)
const {
2027 if (!isImmTy(ImmTyNone)) {
2038 if (type == MVT::f64 || type == MVT::i64) {
2040 AsmParser->hasInv2PiInlineImm());
2062 APFloat::rmNearestTiesToEven, &Lost);
2069 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2071 AsmParser->hasInv2PiInlineImm());
2076 static_cast<int32_t
>(FPLiteral.bitcastToAPInt().getZExtValue()),
2077 AsmParser->hasInv2PiInlineImm());
2081 if (type == MVT::f64 || type == MVT::i64) {
2083 AsmParser->hasInv2PiInlineImm());
2092 static_cast<int16_t
>(
Literal.getLoBits(16).getSExtValue()),
2093 type, AsmParser->hasInv2PiInlineImm());
2097 static_cast<int32_t
>(
Literal.getLoBits(32).getZExtValue()),
2098 AsmParser->hasInv2PiInlineImm());
2101bool AMDGPUOperand::isLiteralImm(
MVT type)
const {
2103 if (!isImmTy(ImmTyNone)) {
2110 if (type == MVT::f64 && hasFPModifiers()) {
2127 if (type == MVT::f64) {
2132 if (type == MVT::i64) {
2145 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2146 : (type == MVT::v2i16) ? MVT::f32
2147 : (type == MVT::v2f32) ? MVT::f32
2154bool AMDGPUOperand::isRegClass(
unsigned RCID)
const {
2155 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(
getReg());
2158bool AMDGPUOperand::isVRegWithInputMods()
const {
2159 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2161 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2162 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2165template <
bool IsFake16>
bool AMDGPUOperand::isT16VRegWithInputMods()
const {
2166 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2167 : AMDGPU::VGPR_16_Lo128RegClassID);
2170bool AMDGPUOperand::isSDWAOperand(
MVT type)
const {
2171 if (AsmParser->isVI())
2173 if (AsmParser->isGFX9Plus())
2174 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2178bool AMDGPUOperand::isSDWAFP16Operand()
const {
2179 return isSDWAOperand(MVT::f16);
2182bool AMDGPUOperand::isSDWAFP32Operand()
const {
2183 return isSDWAOperand(MVT::f32);
2186bool AMDGPUOperand::isSDWAInt16Operand()
const {
2187 return isSDWAOperand(MVT::i16);
2190bool AMDGPUOperand::isSDWAInt32Operand()
const {
2191 return isSDWAOperand(MVT::i32);
2194bool AMDGPUOperand::isBoolReg()
const {
2195 auto FB = AsmParser->getFeatureBits();
2196 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) ||
2197 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32()));
2202 assert(isImmTy(ImmTyNone) &&
Imm.Mods.hasFPModifiers());
2217void AMDGPUOperand::addImmOperands(
MCInst &Inst,
unsigned N,
bool ApplyModifiers)
const {
2225 addLiteralImmOperand(Inst,
Imm.Val,
2227 isImmTy(ImmTyNone) &&
Imm.Mods.hasFPModifiers());
2229 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2235void AMDGPUOperand::addLiteralImmOperand(
MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const {
2236 const auto& InstDesc = AsmParser->getMII()->get(Inst.
getOpcode());
2241 if (ApplyModifiers) {
2244 Val = applyInputFPModifiers(Val,
Size);
2248 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2258 AsmParser->hasInv2PiInlineImm())) {
2267 if (
Literal.getLoBits(32) != 0) {
2268 const_cast<AMDGPUAsmParser *
>(AsmParser)->
Warning(Inst.
getLoc(),
2269 "Can't encode literal as exact 64-bit floating-point operand. "
2270 "Low 32-bits will be set to zero");
2271 Val &= 0xffffffff00000000u;
2275 setImmKindLiteral();
2291 if (AsmParser->hasInv2PiInlineImm() &&
Literal == 0x3fc45f306725feed) {
2297 setImmKindLiteral();
2333 APFloat::rmNearestTiesToEven, &lost);
2337 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2340 setImmKindMandatoryLiteral();
2342 setImmKindLiteral();
2373 AsmParser->hasInv2PiInlineImm())) {
2380 setImmKindLiteral();
2398 setImmKindLiteral();
2412 setImmKindLiteral();
2421 AsmParser->hasInv2PiInlineImm())) {
2428 setImmKindLiteral();
2437 AsmParser->hasInv2PiInlineImm())) {
2444 setImmKindLiteral();
2458 AsmParser->hasInv2PiInlineImm()));
2468 AsmParser->hasInv2PiInlineImm()));
2476 setImmKindMandatoryLiteral();
2480 setImmKindMandatoryLiteral();
2487void AMDGPUOperand::addRegOperands(
MCInst &Inst,
unsigned N)
const {
2491bool AMDGPUOperand::isInlineValue()
const {
2499void AMDGPUAsmParser::createConstantSymbol(
StringRef Id, int64_t Val) {
2510 if (Is == IS_VGPR) {
2514 return AMDGPU::VGPR_32RegClassID;
2516 return AMDGPU::VReg_64RegClassID;
2518 return AMDGPU::VReg_96RegClassID;
2520 return AMDGPU::VReg_128RegClassID;
2522 return AMDGPU::VReg_160RegClassID;
2524 return AMDGPU::VReg_192RegClassID;
2526 return AMDGPU::VReg_224RegClassID;
2528 return AMDGPU::VReg_256RegClassID;
2530 return AMDGPU::VReg_288RegClassID;
2532 return AMDGPU::VReg_320RegClassID;
2534 return AMDGPU::VReg_352RegClassID;
2536 return AMDGPU::VReg_384RegClassID;
2538 return AMDGPU::VReg_512RegClassID;
2540 return AMDGPU::VReg_1024RegClassID;
2542 }
else if (Is == IS_TTMP) {
2546 return AMDGPU::TTMP_32RegClassID;
2548 return AMDGPU::TTMP_64RegClassID;
2550 return AMDGPU::TTMP_128RegClassID;
2552 return AMDGPU::TTMP_256RegClassID;
2554 return AMDGPU::TTMP_512RegClassID;
2556 }
else if (Is == IS_SGPR) {
2560 return AMDGPU::SGPR_32RegClassID;
2562 return AMDGPU::SGPR_64RegClassID;
2564 return AMDGPU::SGPR_96RegClassID;
2566 return AMDGPU::SGPR_128RegClassID;
2568 return AMDGPU::SGPR_160RegClassID;
2570 return AMDGPU::SGPR_192RegClassID;
2572 return AMDGPU::SGPR_224RegClassID;
2574 return AMDGPU::SGPR_256RegClassID;
2576 return AMDGPU::SGPR_288RegClassID;
2578 return AMDGPU::SGPR_320RegClassID;
2580 return AMDGPU::SGPR_352RegClassID;
2582 return AMDGPU::SGPR_384RegClassID;
2584 return AMDGPU::SGPR_512RegClassID;
2586 }
else if (Is == IS_AGPR) {
2590 return AMDGPU::AGPR_32RegClassID;
2592 return AMDGPU::AReg_64RegClassID;
2594 return AMDGPU::AReg_96RegClassID;
2596 return AMDGPU::AReg_128RegClassID;
2598 return AMDGPU::AReg_160RegClassID;
2600 return AMDGPU::AReg_192RegClassID;
2602 return AMDGPU::AReg_224RegClassID;
2604 return AMDGPU::AReg_256RegClassID;
2606 return AMDGPU::AReg_288RegClassID;
2608 return AMDGPU::AReg_320RegClassID;
2610 return AMDGPU::AReg_352RegClassID;
2612 return AMDGPU::AReg_384RegClassID;
2614 return AMDGPU::AReg_512RegClassID;
2616 return AMDGPU::AReg_1024RegClassID;
2624 .
Case(
"exec", AMDGPU::EXEC)
2625 .
Case(
"vcc", AMDGPU::VCC)
2626 .
Case(
"flat_scratch", AMDGPU::FLAT_SCR)
2627 .
Case(
"xnack_mask", AMDGPU::XNACK_MASK)
2628 .
Case(
"shared_base", AMDGPU::SRC_SHARED_BASE)
2629 .
Case(
"src_shared_base", AMDGPU::SRC_SHARED_BASE)
2630 .
Case(
"shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2631 .
Case(
"src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2632 .
Case(
"private_base", AMDGPU::SRC_PRIVATE_BASE)
2633 .
Case(
"src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2634 .
Case(
"private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2635 .
Case(
"src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2636 .
Case(
"pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2637 .
Case(
"src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2638 .
Case(
"lds_direct", AMDGPU::LDS_DIRECT)
2639 .
Case(
"src_lds_direct", AMDGPU::LDS_DIRECT)
2640 .
Case(
"m0", AMDGPU::M0)
2641 .
Case(
"vccz", AMDGPU::SRC_VCCZ)
2642 .
Case(
"src_vccz", AMDGPU::SRC_VCCZ)
2643 .
Case(
"execz", AMDGPU::SRC_EXECZ)
2644 .
Case(
"src_execz", AMDGPU::SRC_EXECZ)
2645 .
Case(
"scc", AMDGPU::SRC_SCC)
2646 .
Case(
"src_scc", AMDGPU::SRC_SCC)
2647 .
Case(
"tba", AMDGPU::TBA)
2648 .
Case(
"tma", AMDGPU::TMA)
2649 .
Case(
"flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2650 .
Case(
"flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2651 .
Case(
"xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2652 .
Case(
"xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2653 .
Case(
"vcc_lo", AMDGPU::VCC_LO)
2654 .
Case(
"vcc_hi", AMDGPU::VCC_HI)
2655 .
Case(
"exec_lo", AMDGPU::EXEC_LO)
2656 .
Case(
"exec_hi", AMDGPU::EXEC_HI)
2657 .
Case(
"tma_lo", AMDGPU::TMA_LO)
2658 .
Case(
"tma_hi", AMDGPU::TMA_HI)
2659 .
Case(
"tba_lo", AMDGPU::TBA_LO)
2660 .
Case(
"tba_hi", AMDGPU::TBA_HI)
2661 .
Case(
"pc", AMDGPU::PC_REG)
2662 .
Case(
"null", AMDGPU::SGPR_NULL)
2666bool AMDGPUAsmParser::ParseRegister(
MCRegister &RegNo,
SMLoc &StartLoc,
2667 SMLoc &EndLoc,
bool RestoreOnFailure) {
2668 auto R = parseRegister();
2669 if (!R)
return true;
2671 RegNo =
R->getReg();
2672 StartLoc =
R->getStartLoc();
2673 EndLoc =
R->getEndLoc();
2679 return ParseRegister(Reg, StartLoc, EndLoc,
false);
2684 bool Result = ParseRegister(Reg, StartLoc, EndLoc,
true);
2685 bool PendingErrors = getParser().hasPendingError();
2686 getParser().clearPendingErrors();
2694bool AMDGPUAsmParser::AddNextRegisterToList(
unsigned &Reg,
unsigned &RegWidth,
2695 RegisterKind RegKind,
unsigned Reg1,
2699 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2704 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2705 Reg = AMDGPU::FLAT_SCR;
2709 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2710 Reg = AMDGPU::XNACK_MASK;
2714 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2719 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2724 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2729 Error(Loc,
"register does not fit in the list");
2735 if (Reg1 != Reg + RegWidth / 32) {
2736 Error(Loc,
"registers in a list must have consecutive indices");
2754 {{
"ttmp"}, IS_TTMP},
2760 return Kind == IS_VGPR ||
2768 if (Str.starts_with(Reg.Name))
2774 return !Str.getAsInteger(10, Num);
2778AMDGPUAsmParser::isRegister(
const AsmToken &Token,
2795 if (!RegSuffix.
empty()) {
2813AMDGPUAsmParser::isRegister()
2815 return isRegister(getToken(), peekToken());
2818unsigned AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
unsigned RegNum,
2819 unsigned SubReg,
unsigned RegWidth,
2823 unsigned AlignSize = 1;
2824 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2830 if (RegNum % AlignSize != 0) {
2831 Error(Loc,
"invalid register alignment");
2832 return AMDGPU::NoRegister;
2835 unsigned RegIdx = RegNum / AlignSize;
2838 Error(Loc,
"invalid or unsupported register size");
2839 return AMDGPU::NoRegister;
2845 Error(Loc,
"register index is out of range");
2846 return AMDGPU::NoRegister;
2856 assert(Reg &&
"Invalid subregister!");
2862bool AMDGPUAsmParser::ParseRegRange(
unsigned &Num,
unsigned &RegWidth) {
2863 int64_t RegLo, RegHi;
2867 SMLoc FirstIdxLoc = getLoc();
2874 SecondIdxLoc = getLoc();
2884 if (!isUInt<32>(RegLo)) {
2885 Error(FirstIdxLoc,
"invalid register index");
2889 if (!isUInt<32>(RegHi)) {
2890 Error(SecondIdxLoc,
"invalid register index");
2894 if (RegLo > RegHi) {
2895 Error(FirstIdxLoc,
"first register index should not exceed second index");
2899 Num =
static_cast<unsigned>(RegLo);
2900 RegWidth = 32 * ((RegHi - RegLo) + 1);
2904unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2905 unsigned &RegNum,
unsigned &RegWidth,
2912 RegKind = IS_SPECIAL;
2919unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2920 unsigned &RegNum,
unsigned &RegWidth,
2924 auto Loc = getLoc();
2928 Error(Loc,
"invalid register name");
2929 return AMDGPU::NoRegister;
2937 unsigned SubReg = NoSubRegister;
2938 if (!RegSuffix.
empty()) {
2946 Error(Loc,
"invalid register index");
2947 return AMDGPU::NoRegister;
2952 if (!ParseRegRange(RegNum, RegWidth))
2953 return AMDGPU::NoRegister;
2956 return getRegularReg(RegKind, RegNum,
SubReg, RegWidth, Loc);
2959unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
unsigned &RegNum,
2962 unsigned Reg = AMDGPU::NoRegister;
2963 auto ListLoc = getLoc();
2966 "expected a register or a list of registers")) {
2967 return AMDGPU::NoRegister;
2972 auto Loc = getLoc();
2973 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2974 return AMDGPU::NoRegister;
2975 if (RegWidth != 32) {
2976 Error(Loc,
"expected a single 32-bit register");
2977 return AMDGPU::NoRegister;
2981 RegisterKind NextRegKind;
2982 unsigned NextReg, NextRegNum, NextRegWidth;
2985 if (!ParseAMDGPURegister(NextRegKind, NextReg,
2986 NextRegNum, NextRegWidth,
2988 return AMDGPU::NoRegister;
2990 if (NextRegWidth != 32) {
2991 Error(Loc,
"expected a single 32-bit register");
2992 return AMDGPU::NoRegister;
2994 if (NextRegKind != RegKind) {
2995 Error(Loc,
"registers in a list must be of the same kind");
2996 return AMDGPU::NoRegister;
2998 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2999 return AMDGPU::NoRegister;
3003 "expected a comma or a closing square bracket")) {
3004 return AMDGPU::NoRegister;
3008 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3013bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
unsigned &Reg,
3014 unsigned &RegNum,
unsigned &RegWidth,
3016 auto Loc = getLoc();
3017 Reg = AMDGPU::NoRegister;
3020 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3021 if (Reg == AMDGPU::NoRegister)
3022 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3024 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3028 if (Reg == AMDGPU::NoRegister) {
3029 assert(Parser.hasPendingError());
3033 if (!subtargetHasRegister(*
TRI, Reg)) {
3034 if (Reg == AMDGPU::SGPR_NULL) {
3035 Error(Loc,
"'null' operand is not supported on this GPU");
3038 " register not available on this GPU");
3046bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
unsigned &Reg,
3047 unsigned &RegNum,
unsigned &RegWidth,
3048 bool RestoreOnFailure ) {
3049 Reg = AMDGPU::NoRegister;
3052 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3053 if (RestoreOnFailure) {
3054 while (!Tokens.
empty()) {
3063std::optional<StringRef>
3064AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3067 return StringRef(
".amdgcn.next_free_vgpr");
3069 return StringRef(
".amdgcn.next_free_sgpr");
3071 return std::nullopt;
3075void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3076 auto SymbolName = getGprCountSymbolName(RegKind);
3077 assert(SymbolName &&
"initializing invalid register kind");
3078 MCSymbol *
Sym = getContext().getOrCreateSymbol(*SymbolName);
3082bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3083 unsigned DwordRegIndex,
3084 unsigned RegWidth) {
3089 auto SymbolName = getGprCountSymbolName(RegKind);
3092 MCSymbol *
Sym = getContext().getOrCreateSymbol(*SymbolName);
3094 int64_t NewMax = DwordRegIndex +
divideCeil(RegWidth, 32) - 1;
3097 if (!
Sym->isVariable())
3098 return !
Error(getLoc(),
3099 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3100 if (!
Sym->getVariableValue(
false)->evaluateAsAbsolute(OldCount))
3103 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3105 if (OldCount <= NewMax)
3111std::unique_ptr<AMDGPUOperand>
3112AMDGPUAsmParser::parseRegister(
bool RestoreOnFailure) {
3113 const auto &Tok = getToken();
3114 SMLoc StartLoc = Tok.getLoc();
3115 SMLoc EndLoc = Tok.getEndLoc();
3116 RegisterKind RegKind;
3117 unsigned Reg, RegNum, RegWidth;
3119 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3123 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3126 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3127 return AMDGPUOperand::CreateReg(
this, Reg, StartLoc, EndLoc);
3131 bool HasSP3AbsModifier,
bool HasLit) {
3139 HasLit = trySkipId(
"lit");
3151 const auto& Tok = getToken();
3152 const auto& NextTok = peekToken();
3155 bool Negate =
false;
3163 AMDGPUOperand::Modifiers Mods;
3174 APFloat RealVal(APFloat::IEEEdouble());
3175 auto roundMode = APFloat::rmNearestTiesToEven;
3176 if (
errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3179 RealVal.changeSign();
3182 AMDGPUOperand::CreateImm(
this, RealVal.bitcastToAPInt().getZExtValue(), S,
3183 AMDGPUOperand::ImmTyNone,
true));
3184 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3185 Op.setModifiers(Mods);
3194 if (HasSP3AbsModifier) {
3203 if (getParser().parsePrimaryExpr(Expr, EndLoc,
nullptr))
3206 if (Parser.parseExpression(Expr))
3210 if (Expr->evaluateAsAbsolute(IntVal)) {
3211 Operands.push_back(AMDGPUOperand::CreateImm(
this, IntVal, S));
3212 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3213 Op.setModifiers(Mods);
3217 Operands.push_back(AMDGPUOperand::CreateExpr(
this, Expr, S));
3230 if (
auto R = parseRegister()) {
3239 bool HasSP3AbsMod,
bool HasLit) {
3245 return parseImm(
Operands, HasSP3AbsMod, HasLit);
3249AMDGPUAsmParser::isNamedOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3252 return str ==
"abs" || str ==
"neg" || str ==
"sext";
3258AMDGPUAsmParser::isOpcodeModifierWithVal(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3263AMDGPUAsmParser::isOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3264 return isNamedOperandModifier(Token, NextToken) || Token.
is(
AsmToken::Pipe);
3268AMDGPUAsmParser::isRegOrOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3269 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3286AMDGPUAsmParser::isModifier() {
3290 peekTokens(NextToken);
3292 return isOperandModifier(Tok, NextToken[0]) ||
3293 (Tok.
is(
AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3294 isOpcodeModifierWithVal(Tok, NextToken[0]);
3320AMDGPUAsmParser::parseSP3NegModifier() {
3323 peekTokens(NextToken);
3326 (isRegister(NextToken[0], NextToken[1]) ||
3328 isId(NextToken[0],
"abs"))) {
3346 return Error(getLoc(),
"invalid syntax, expected 'neg' modifier");
3348 SP3Neg = parseSP3NegModifier();
3351 Neg = trySkipId(
"neg");
3353 return Error(Loc,
"expected register or immediate");
3357 Abs = trySkipId(
"abs");
3361 Lit = trySkipId(
"lit");
3368 return Error(Loc,
"expected register or immediate");
3372 Res = parseRegOrImm(
Operands, SP3Abs, Lit);
3379 if (Lit && !
Operands.back()->isImm())
3380 Error(Loc,
"expected immediate with lit modifier");
3382 if (SP3Abs && !skipToken(
AsmToken::Pipe,
"expected vertical bar"))
3391 AMDGPUOperand::Modifiers Mods;
3392 Mods.Abs = Abs || SP3Abs;
3393 Mods.Neg = Neg || SP3Neg;
3396 if (Mods.hasFPModifiers() || Lit) {
3397 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3399 return Error(
Op.getStartLoc(),
"expected an absolute expression");
3400 Op.setModifiers(Mods);
3408 bool Sext = trySkipId(
"sext");
3409 if (Sext && !skipToken(
AsmToken::LParen,
"expected left paren after sext"))
3424 AMDGPUOperand::Modifiers Mods;
3427 if (Mods.hasIntModifiers()) {
3428 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3430 return Error(
Op.getStartLoc(),
"expected an absolute expression");
3431 Op.setModifiers(Mods);
3438 return parseRegOrImmWithFPInputMods(
Operands,
false);
3442 return parseRegOrImmWithIntInputMods(
Operands,
false);
3446 auto Loc = getLoc();
3447 if (trySkipId(
"off")) {
3448 Operands.push_back(AMDGPUOperand::CreateImm(
this, 0, Loc,
3449 AMDGPUOperand::ImmTyOff,
false));
3456 std::unique_ptr<AMDGPUOperand>
Reg = parseRegister();
3458 Operands.push_back(std::move(Reg));
3465unsigned AMDGPUAsmParser::checkTargetMatchPredicate(
MCInst &Inst) {
3472 return Match_InvalidOperand;
3474 if (Inst.
getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3475 Inst.
getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3480 if (!
Op.isImm() ||
Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3481 return Match_InvalidOperand;
3485 return Match_Success;
3489 static const unsigned Variants[] = {
3500 if (isForcedDPP() && isForcedVOP3()) {
3504 if (getForcedEncodingSize() == 32) {
3509 if (isForcedVOP3()) {
3514 if (isForcedSDWA()) {
3520 if (isForcedDPP()) {
3528StringRef AMDGPUAsmParser::getMatchedVariantName()
const {
3529 if (isForcedDPP() && isForcedVOP3())
3532 if (getForcedEncodingSize() == 32)
3547unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(
const MCInst &Inst)
const {
3551 case AMDGPU::FLAT_SCR:
3553 case AMDGPU::VCC_LO:
3554 case AMDGPU::VCC_HI:
3561 return AMDGPU::NoRegister;
3568bool AMDGPUAsmParser::isInlineConstant(
const MCInst &Inst,
3569 unsigned OpIdx)
const {
3579 int64_t Val = MO.
getImm();
3628unsigned AMDGPUAsmParser::getConstantBusLimit(
unsigned Opcode)
const {
3634 case AMDGPU::V_LSHLREV_B64_e64:
3635 case AMDGPU::V_LSHLREV_B64_gfx10:
3636 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3637 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3638 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3639 case AMDGPU::V_LSHRREV_B64_e64:
3640 case AMDGPU::V_LSHRREV_B64_gfx10:
3641 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3642 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3643 case AMDGPU::V_ASHRREV_I64_e64:
3644 case AMDGPU::V_ASHRREV_I64_gfx10:
3645 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3646 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3647 case AMDGPU::V_LSHL_B64_e64:
3648 case AMDGPU::V_LSHR_B64_e64:
3649 case AMDGPU::V_ASHR_I64_e64:
3662 bool AddMandatoryLiterals =
false) {
3668 int16_t ImmDeferredIdx =
3685bool AMDGPUAsmParser::usesConstantBus(
const MCInst &Inst,
unsigned OpIdx) {
3688 return !isInlineConstant(Inst, OpIdx);
3695 return isSGPR(PReg,
TRI) && PReg != SGPR_NULL;
3706 const unsigned Opcode = Inst.
getOpcode();
3707 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3710 if (!LaneSelOp.
isReg())
3713 return LaneSelReg ==
M0 || LaneSelReg == M0_gfxpre11;
3716bool AMDGPUAsmParser::validateConstantBusLimitations(
3718 const unsigned Opcode = Inst.
getOpcode();
3720 unsigned LastSGPR = AMDGPU::NoRegister;
3721 unsigned ConstantBusUseCount = 0;
3722 unsigned NumLiterals = 0;
3723 unsigned LiteralSize;
3725 if (!(
Desc.TSFlags &
3741 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3742 if (SGPRUsed != AMDGPU::NoRegister) {
3743 SGPRsUsed.
insert(SGPRUsed);
3744 ++ConstantBusUseCount;
3749 for (
int OpIdx : OpIndices) {
3754 if (usesConstantBus(Inst, OpIdx)) {
3763 if (SGPRsUsed.
insert(LastSGPR).second) {
3764 ++ConstantBusUseCount;
3784 if (NumLiterals == 0) {
3787 }
else if (LiteralSize !=
Size) {
3793 ConstantBusUseCount += NumLiterals;
3795 if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3801 Error(Loc,
"invalid operand (violates constant bus restrictions)");
3805bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3808 const unsigned Opcode = Inst.
getOpcode();
3814 auto getVRegIdx = [&](
unsigned,
unsigned OperandIdx) {
3822 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
3825 auto InvalidCompOprIdx =
3826 InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc);
3827 if (!InvalidCompOprIdx)
3830 auto CompOprIdx = *InvalidCompOprIdx;
3832 std::max(InstInfo[
VOPD::X].getIndexInParsedOperands(CompOprIdx),
3833 InstInfo[
VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3836 auto Loc = ((AMDGPUOperand &)*
Operands[ParsedIdx]).getStartLoc();
3837 if (CompOprIdx == VOPD::Component::DST) {
3838 Error(Loc,
"one dst register must be even and the other odd");
3840 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3842 " operands must use different VGPR banks");
3848bool AMDGPUAsmParser::validateIntClampSupported(
const MCInst &Inst) {
3865bool AMDGPUAsmParser::validateMIMGDataSize(
const MCInst &Inst,
3866 const SMLoc &IDLoc) {
3885 unsigned TFESize = (TFEIdx != -1 && Inst.
getOperand(TFEIdx).
getImm()) ? 1 : 0;
3890 bool IsPackedD16 =
false;
3895 IsPackedD16 = D16Idx >= 0;
3897 DataSize = (DataSize + 1) / 2;
3900 if ((VDataSize / 4) == DataSize + TFESize)
3905 Modifiers = IsPackedD16 ?
"dmask and d16" :
"dmask";
3907 Modifiers = IsPackedD16 ?
"dmask, d16 and tfe" :
"dmask and tfe";
3909 Error(IDLoc,
Twine(
"image data size does not match ") + Modifiers);
3913bool AMDGPUAsmParser::validateMIMGAddrSize(
const MCInst &Inst,
3914 const SMLoc &IDLoc) {
3927 : AMDGPU::OpName::rsrc;
3934 assert(SrsrcIdx > VAddr0Idx);
3937 if (BaseOpcode->
BVH) {
3938 if (IsA16 == BaseOpcode->
A16)
3940 Error(IDLoc,
"image address size does not match a16");
3946 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3947 unsigned ActualAddrSize =
3948 IsNSA ? SrsrcIdx - VAddr0Idx
3951 unsigned ExpectedAddrSize =
3955 if (hasPartialNSAEncoding() &&
3958 int VAddrLastIdx = SrsrcIdx - 1;
3959 unsigned VAddrLastSize =
3962 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
3965 if (ExpectedAddrSize > 12)
3966 ExpectedAddrSize = 16;
3971 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3975 if (ActualAddrSize == ExpectedAddrSize)
3978 Error(IDLoc,
"image address size does not match dim and a16");
3982bool AMDGPUAsmParser::validateMIMGAtomicDMask(
const MCInst &Inst) {
3989 if (!
Desc.mayLoad() || !
Desc.mayStore())
3999 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4002bool AMDGPUAsmParser::validateMIMGGatherDMask(
const MCInst &Inst) {
4018 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4021bool AMDGPUAsmParser::validateMIMGDim(
const MCInst &Inst,
4036 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
4037 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4044bool AMDGPUAsmParser::validateMIMGMSAA(
const MCInst &Inst) {
4055 if (!BaseOpcode->
MSAA)
4064 return DimInfo->
MSAA;
4070 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4071 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4072 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4082bool AMDGPUAsmParser::validateMovrels(
const MCInst &Inst,
4106 Error(ErrLoc,
"source operand must be a VGPR");
4110bool AMDGPUAsmParser::validateMAIAccWrite(
const MCInst &Inst,
4115 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4129 "source operand must be either a VGPR or an inline constant");
4136bool AMDGPUAsmParser::validateMAISrc2(
const MCInst &Inst,
4142 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4149 if (Inst.
getOperand(Src2Idx).
isImm() && isInlineConstant(Inst, Src2Idx)) {
4151 "inline constants are not allowed for this operand");
4158bool AMDGPUAsmParser::validateMFMA(
const MCInst &Inst,
4176 if (Src2Reg == DstReg)
4180 if (
TRI->getRegClass(
Desc.operands()[0].RegClass).getSizeInBits() <= 128)
4183 if (
TRI->regsOverlap(Src2Reg, DstReg)) {
4185 "source 2 operand must not partially overlap with dst");
4192bool AMDGPUAsmParser::validateDivScale(
const MCInst &Inst) {
4196 case V_DIV_SCALE_F32_gfx6_gfx7:
4197 case V_DIV_SCALE_F32_vi:
4198 case V_DIV_SCALE_F32_gfx10:
4199 case V_DIV_SCALE_F64_gfx6_gfx7:
4200 case V_DIV_SCALE_F64_vi:
4201 case V_DIV_SCALE_F64_gfx10:
4207 for (
auto Name : {AMDGPU::OpName::src0_modifiers,
4208 AMDGPU::OpName::src2_modifiers,
4209 AMDGPU::OpName::src2_modifiers}) {
4220bool AMDGPUAsmParser::validateMIMGD16(
const MCInst &Inst) {
4240 case AMDGPU::V_SUBREV_F32_e32:
4241 case AMDGPU::V_SUBREV_F32_e64:
4242 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4243 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4244 case AMDGPU::V_SUBREV_F32_e32_vi:
4245 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4246 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4247 case AMDGPU::V_SUBREV_F32_e64_vi:
4249 case AMDGPU::V_SUBREV_CO_U32_e32:
4250 case AMDGPU::V_SUBREV_CO_U32_e64:
4251 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4252 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4254 case AMDGPU::V_SUBBREV_U32_e32:
4255 case AMDGPU::V_SUBBREV_U32_e64:
4256 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4257 case AMDGPU::V_SUBBREV_U32_e32_vi:
4258 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4259 case AMDGPU::V_SUBBREV_U32_e64_vi:
4261 case AMDGPU::V_SUBREV_U32_e32:
4262 case AMDGPU::V_SUBREV_U32_e64:
4263 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4264 case AMDGPU::V_SUBREV_U32_e32_vi:
4265 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4266 case AMDGPU::V_SUBREV_U32_e64_vi:
4268 case AMDGPU::V_SUBREV_F16_e32:
4269 case AMDGPU::V_SUBREV_F16_e64:
4270 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4271 case AMDGPU::V_SUBREV_F16_e32_vi:
4272 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4273 case AMDGPU::V_SUBREV_F16_e64_vi:
4275 case AMDGPU::V_SUBREV_U16_e32:
4276 case AMDGPU::V_SUBREV_U16_e64:
4277 case AMDGPU::V_SUBREV_U16_e32_vi:
4278 case AMDGPU::V_SUBREV_U16_e64_vi:
4280 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4281 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4282 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4284 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4285 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4287 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4288 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4290 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4291 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4293 case AMDGPU::V_LSHRREV_B32_e32:
4294 case AMDGPU::V_LSHRREV_B32_e64:
4295 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4296 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4297 case AMDGPU::V_LSHRREV_B32_e32_vi:
4298 case AMDGPU::V_LSHRREV_B32_e64_vi:
4299 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4300 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4302 case AMDGPU::V_ASHRREV_I32_e32:
4303 case AMDGPU::V_ASHRREV_I32_e64:
4304 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4305 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4306 case AMDGPU::V_ASHRREV_I32_e32_vi:
4307 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4308 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4309 case AMDGPU::V_ASHRREV_I32_e64_vi:
4311 case AMDGPU::V_LSHLREV_B32_e32:
4312 case AMDGPU::V_LSHLREV_B32_e64:
4313 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4314 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4315 case AMDGPU::V_LSHLREV_B32_e32_vi:
4316 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4317 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4318 case AMDGPU::V_LSHLREV_B32_e64_vi:
4320 case AMDGPU::V_LSHLREV_B16_e32:
4321 case AMDGPU::V_LSHLREV_B16_e64:
4322 case AMDGPU::V_LSHLREV_B16_e32_vi:
4323 case AMDGPU::V_LSHLREV_B16_e64_vi:
4324 case AMDGPU::V_LSHLREV_B16_gfx10:
4326 case AMDGPU::V_LSHRREV_B16_e32:
4327 case AMDGPU::V_LSHRREV_B16_e64:
4328 case AMDGPU::V_LSHRREV_B16_e32_vi:
4329 case AMDGPU::V_LSHRREV_B16_e64_vi:
4330 case AMDGPU::V_LSHRREV_B16_gfx10:
4332 case AMDGPU::V_ASHRREV_I16_e32:
4333 case AMDGPU::V_ASHRREV_I16_e64:
4334 case AMDGPU::V_ASHRREV_I16_e32_vi:
4335 case AMDGPU::V_ASHRREV_I16_e64_vi:
4336 case AMDGPU::V_ASHRREV_I16_gfx10:
4338 case AMDGPU::V_LSHLREV_B64_e64:
4339 case AMDGPU::V_LSHLREV_B64_gfx10:
4340 case AMDGPU::V_LSHLREV_B64_vi:
4342 case AMDGPU::V_LSHRREV_B64_e64:
4343 case AMDGPU::V_LSHRREV_B64_gfx10:
4344 case AMDGPU::V_LSHRREV_B64_vi:
4346 case AMDGPU::V_ASHRREV_I64_e64:
4347 case AMDGPU::V_ASHRREV_I64_gfx10:
4348 case AMDGPU::V_ASHRREV_I64_vi:
4350 case AMDGPU::V_PK_LSHLREV_B16:
4351 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4352 case AMDGPU::V_PK_LSHLREV_B16_vi:
4354 case AMDGPU::V_PK_LSHRREV_B16:
4355 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4356 case AMDGPU::V_PK_LSHRREV_B16_vi:
4357 case AMDGPU::V_PK_ASHRREV_I16:
4358 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4359 case AMDGPU::V_PK_ASHRREV_I16_vi:
4366std::optional<StringRef>
4367AMDGPUAsmParser::validateLdsDirect(
const MCInst &Inst) {
4369 using namespace SIInstrFlags;
4370 const unsigned Opcode = Inst.
getOpcode();
4376 if ((
Desc.TSFlags & Enc) == 0)
4377 return std::nullopt;
4379 for (
auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4384 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4387 return StringRef(
"lds_direct is not supported on this GPU");
4390 return StringRef(
"lds_direct cannot be used with this instruction");
4392 if (SrcName != OpName::src0)
4393 return StringRef(
"lds_direct may be used as src0 only");
4397 return std::nullopt;
4401 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
4402 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4403 if (
Op.isFlatOffset())
4404 return Op.getStartLoc();
4409bool AMDGPUAsmParser::validateOffset(
const MCInst &Inst,
4418 return validateFlatOffset(Inst,
Operands);
4421 return validateSMEMOffset(Inst,
Operands);
4426 const unsigned OffsetSize = 24;
4427 if (!
isIntN(OffsetSize,
Op.getImm())) {
4429 Twine(
"expected a ") +
Twine(OffsetSize) +
"-bit signed offset");
4433 const unsigned OffsetSize = 16;
4434 if (!
isUIntN(OffsetSize,
Op.getImm())) {
4436 Twine(
"expected a ") +
Twine(OffsetSize) +
"-bit unsigned offset");
4443bool AMDGPUAsmParser::validateFlatOffset(
const MCInst &Inst,
4454 if (!hasFlatOffsets() &&
Op.getImm() != 0) {
4456 "flat offset modifier is not supported on this GPU");
4463 bool AllowNegative =
4466 if (!
isIntN(OffsetSize,
Op.getImm()) || (!AllowNegative &&
Op.getImm() < 0)) {
4468 Twine(
"expected a ") +
4469 (AllowNegative ?
Twine(OffsetSize) +
"-bit signed offset"
4470 :
Twine(OffsetSize - 1) +
"-bit unsigned offset"));
4479 for (
unsigned i = 2, e =
Operands.size(); i != e; ++i) {
4480 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4481 if (
Op.isSMEMOffset() ||
Op.isSMEMOffsetMod())
4482 return Op.getStartLoc();
4487bool AMDGPUAsmParser::validateSMEMOffset(
const MCInst &Inst,
4513 : (
isVI() || IsBuffer) ?
"expected a 20-bit unsigned offset"
4514 :
"expected a 21-bit signed offset");
4519bool AMDGPUAsmParser::validateSOPLiteral(
const MCInst &Inst)
const {
4528 const int OpIndices[] = { Src0Idx, Src1Idx };
4530 unsigned NumExprs = 0;
4531 unsigned NumLiterals = 0;
4534 for (
int OpIdx : OpIndices) {
4535 if (OpIdx == -1)
break;
4540 if (MO.
isImm() && !isInlineConstant(Inst, OpIdx)) {
4542 if (NumLiterals == 0 || LiteralValue !=
Value) {
4546 }
else if (MO.
isExpr()) {
4552 return NumLiterals + NumExprs <= 1;
4555bool AMDGPUAsmParser::validateOpSel(
const MCInst &Inst) {
4569 if (OpSelIdx != -1) {
4574 if (OpSelHiIdx != -1) {
4592bool AMDGPUAsmParser::validateNeg(
const MCInst &Inst,
int OpName) {
4617 int SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
4618 AMDGPU::OpName::src1_modifiers,
4619 AMDGPU::OpName::src2_modifiers};
4621 for (
unsigned i = 0; i < 3; ++i) {
4631bool AMDGPUAsmParser::validateDPP(
const MCInst &Inst,
4635 if (DppCtrlIdx >= 0) {
4642 Error(S,
"DP ALU dpp only supports row_newbcast");
4648 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
4658 Error(S,
"invalid operand for instruction");
4663 "src1 immediate operand invalid for instruction");
4673bool AMDGPUAsmParser::validateVccOperand(
unsigned Reg)
const {
4674 auto FB = getFeatureBits();
4675 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4676 (FB[AMDGPU::FeatureWavefrontSize32] &&
Reg == AMDGPU::VCC_LO);
4680bool AMDGPUAsmParser::validateVOPLiteral(
const MCInst &Inst,
4686 !HasMandatoryLiteral && !
isVOPD(Opcode))
4691 unsigned NumExprs = 0;
4692 unsigned NumLiterals = 0;
4695 for (
int OpIdx : OpIndices) {
4705 if (MO.
isImm() && !isInlineConstant(Inst, OpIdx)) {
4711 if (!IsValid32Op && !isInt<32>(
Value) && !isUInt<32>(
Value)) {
4712 Error(getLitLoc(
Operands),
"invalid operand for instruction");
4716 if (IsFP64 && IsValid32Op)
4719 if (NumLiterals == 0 || LiteralValue !=
Value) {
4723 }
else if (MO.
isExpr()) {
4727 NumLiterals += NumExprs;
4732 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4733 Error(getLitLoc(
Operands),
"literal operands are not supported");
4737 if (NumLiterals > 1) {
4738 Error(getLitLoc(
Operands,
true),
"only one unique literal operand is allowed");
4756 unsigned Sub =
MRI->getSubReg(
Op.getReg(), AMDGPU::sub0);
4757 auto Reg = Sub ? Sub :
Op.getReg();
4759 return AGPR32.
contains(Reg) ? 1 : 0;
4762bool AMDGPUAsmParser::validateAGPRLdSt(
const MCInst &Inst)
const {
4770 : AMDGPU::OpName::vdata;
4778 if (Data2Areg >= 0 && Data2Areg != DataAreg)
4782 auto FB = getFeatureBits();
4783 if (FB[AMDGPU::FeatureGFX90AInsts]) {
4784 if (DataAreg < 0 || DstAreg < 0)
4786 return DstAreg == DataAreg;
4789 return DstAreg < 1 && DataAreg < 1;
4792bool AMDGPUAsmParser::validateVGPRAlign(
const MCInst &Inst)
const {
4793 auto FB = getFeatureBits();
4794 if (!FB[AMDGPU::FeatureGFX90AInsts])
4805 unsigned Sub =
MRI->getSubReg(
Op.getReg(), AMDGPU::sub0);
4809 if (VGPR32.
contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4811 if (AGPR32.
contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4819 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
4820 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4822 return Op.getStartLoc();
4827bool AMDGPUAsmParser::validateBLGP(
const MCInst &Inst,
4837 auto FB = getFeatureBits();
4838 bool UsesNeg =
false;
4839 if (FB[AMDGPU::FeatureGFX940Insts]) {
4841 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4842 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4843 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4844 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4849 if (IsNeg == UsesNeg)
4853 UsesNeg ?
"invalid modifier: blgp is not supported"
4854 :
"invalid modifier: neg is not supported");
4859bool AMDGPUAsmParser::validateWaitCnt(
const MCInst &Inst,
4865 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4866 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4867 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4868 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4874 if (Reg == AMDGPU::SGPR_NULL)
4878 Error(RegLoc,
"src0 must be null");
4882bool AMDGPUAsmParser::validateDS(
const MCInst &Inst,
4888 return validateGWS(Inst,
Operands);
4899 Error(S,
"gds modifier is not supported on this GPU");
4907bool AMDGPUAsmParser::validateGWS(
const MCInst &Inst,
4909 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4913 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4914 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4923 auto RegIdx =
Reg - (VGPR32.
contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4926 Error(RegLoc,
"vgpr must be even aligned");
4933bool AMDGPUAsmParser::validateCoherencyBits(
const MCInst &Inst,
4935 const SMLoc &IDLoc) {
4937 AMDGPU::OpName::cpol);
4944 return validateTHAndScopeBits(Inst,
Operands, CPol);
4950 Error(S,
"cache policy is not supported for SMRD instructions");
4954 Error(IDLoc,
"invalid cache policy for SMEM instruction");
4963 if (!(TSFlags & AllowSCCModifier)) {
4968 "scc modifier is not supported for this instruction on this GPU");
4979 :
"instruction must use glc");
4987 &CStr.data()[CStr.find(
isGFX940() ?
"sc0" :
"glc")]);
4989 :
"instruction must not use glc");
4997bool AMDGPUAsmParser::validateTHAndScopeBits(
const MCInst &Inst,
4999 const unsigned CPol) {
5003 const unsigned Opcode = Inst.
getOpcode();
5015 return PrintError(
"instruction must use th:TH_ATOMIC_RETURN");
5023 return PrintError(
"invalid th value for SMEM instruction");
5030 return PrintError(
"scope and th combination is not valid");
5039 return PrintError(
"invalid th value for atomic instructions");
5040 }
else if (IsStore) {
5042 return PrintError(
"invalid th value for store instructions");
5045 return PrintError(
"invalid th value for load instructions");
5051bool AMDGPUAsmParser::validateTFE(
const MCInst &Inst,
5054 if (
Desc.mayStore() &&
5058 Error(Loc,
"TFE modifier has no meaning for store instructions");
5066bool AMDGPUAsmParser::validateInstruction(
const MCInst &Inst,
5069 if (
auto ErrMsg = validateLdsDirect(Inst)) {
5073 if (!validateSOPLiteral(Inst)) {
5075 "only one unique literal operand is allowed");
5078 if (!validateVOPLiteral(Inst,
Operands)) {
5081 if (!validateConstantBusLimitations(Inst,
Operands)) {
5084 if (!validateVOPDRegBankConstraints(Inst,
Operands)) {
5087 if (!validateIntClampSupported(Inst)) {
5089 "integer clamping is not supported on this GPU");
5092 if (!validateOpSel(Inst)) {
5094 "invalid op_sel operand");
5097 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5099 "invalid neg_lo operand");
5102 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5104 "invalid neg_hi operand");
5107 if (!validateDPP(Inst,
Operands)) {
5111 if (!validateMIMGD16(Inst)) {
5113 "d16 modifier is not supported on this GPU");
5116 if (!validateMIMGDim(Inst,
Operands)) {
5117 Error(IDLoc,
"missing dim operand");
5120 if (!validateMIMGMSAA(Inst)) {
5122 "invalid dim; must be MSAA type");
5125 if (!validateMIMGDataSize(Inst, IDLoc)) {
5128 if (!validateMIMGAddrSize(Inst, IDLoc))
5130 if (!validateMIMGAtomicDMask(Inst)) {
5132 "invalid atomic image dmask");
5135 if (!validateMIMGGatherDMask(Inst)) {
5137 "invalid image_gather dmask: only one bit must be set");
5140 if (!validateMovrels(Inst,
Operands)) {
5143 if (!validateOffset(Inst,
Operands)) {
5146 if (!validateMAIAccWrite(Inst,
Operands)) {
5149 if (!validateMAISrc2(Inst,
Operands)) {
5152 if (!validateMFMA(Inst,
Operands)) {
5155 if (!validateCoherencyBits(Inst,
Operands, IDLoc)) {
5159 if (!validateAGPRLdSt(Inst)) {
5160 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5161 ?
"invalid register class: data and dst should be all VGPR or AGPR"
5162 :
"invalid register class: agpr loads and stores not supported on this GPU"
5166 if (!validateVGPRAlign(Inst)) {
5168 "invalid register class: vgpr tuples must be 64 bit aligned");
5175 if (!validateBLGP(Inst,
Operands)) {
5179 if (!validateDivScale(Inst)) {
5180 Error(IDLoc,
"ABS not allowed in VOP3B instructions");
5183 if (!validateWaitCnt(Inst,
Operands)) {
5186 if (!validateTFE(Inst,
Operands)) {
5195 unsigned VariantID = 0);
5199 unsigned VariantID);
5201bool AMDGPUAsmParser::isSupportedMnemo(
StringRef Mnemo,
5206bool AMDGPUAsmParser::isSupportedMnemo(
StringRef Mnemo,
5209 for (
auto Variant : Variants) {
5217bool AMDGPUAsmParser::checkUnsupportedInstruction(
StringRef Mnemo,
5218 const SMLoc &IDLoc) {
5219 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5222 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5227 getParser().clearPendingErrors();
5231 StringRef VariantName = getMatchedVariantName();
5232 if (!VariantName.
empty() && isSupportedMnemo(Mnemo, FBS)) {
5235 " variant of this instruction is not supported"));
5239 if (
isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5240 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5243 FeaturesWS32.
flip(AMDGPU::FeatureWavefrontSize64)
5244 .
flip(AMDGPU::FeatureWavefrontSize32);
5246 ComputeAvailableFeatures(FeaturesWS32);
5248 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5249 return Error(IDLoc,
"instruction requires wavesize=32");
5254 return Error(IDLoc,
"instruction not supported on this GPU");
5259 return Error(IDLoc,
"invalid instruction" + Suggestion);
5265 const auto &
Op = ((AMDGPUOperand &)*
Operands[InvalidOprIdx]);
5266 if (
Op.isToken() && InvalidOprIdx > 1) {
5267 const auto &PrevOp = ((AMDGPUOperand &)*
Operands[InvalidOprIdx - 1]);
5268 return PrevOp.isToken() && PrevOp.getToken() ==
"::";
5273bool AMDGPUAsmParser::MatchAndEmitInstruction(
SMLoc IDLoc,
unsigned &Opcode,
5277 bool MatchingInlineAsm) {
5279 unsigned Result = Match_Success;
5280 for (
auto Variant : getMatchedVariants()) {
5282 auto R = MatchInstructionImpl(
Operands, Inst, EI, MatchingInlineAsm,
5287 if (R == Match_Success || R == Match_MissingFeature ||
5288 (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5289 (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5290 Result != Match_MissingFeature)) {
5294 if (R == Match_Success)
5298 if (Result == Match_Success) {
5299 if (!validateInstruction(Inst, IDLoc,
Operands)) {
5308 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5314 case Match_MissingFeature:
5318 return Error(IDLoc,
"operands are not valid for this GPU or mode");
5320 case Match_InvalidOperand: {
5321 SMLoc ErrorLoc = IDLoc;
5324 return Error(IDLoc,
"too few operands for instruction");
5327 if (ErrorLoc ==
SMLoc())
5331 return Error(ErrorLoc,
"invalid VOPDY instruction");
5333 return Error(ErrorLoc,
"invalid operand for instruction");
5336 case Match_MnemonicFail:
5342bool AMDGPUAsmParser::ParseAsAbsoluteExpression(
uint32_t &Ret) {
5347 if (getParser().parseAbsoluteExpression(Tmp)) {
5354bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5356 return TokError(
"directive only supported for amdgcn architecture");
5358 std::string TargetIDDirective;
5359 SMLoc TargetStart = getTok().getLoc();
5360 if (getParser().parseEscapedString(TargetIDDirective))
5364 if (getTargetStreamer().getTargetID()->
toString() != TargetIDDirective)
5365 return getParser().Error(TargetRange.
Start,
5366 (
Twine(
".amdgcn_target directive's target id ") +
5367 Twine(TargetIDDirective) +
5368 Twine(
" does not match the specified target id ") +
5369 Twine(getTargetStreamer().getTargetID()->
toString())).str());
5378bool AMDGPUAsmParser::calculateGPRBlocks(
5380 const MCExpr *FlatScrUsed,
bool XNACKUsed,
5381 std::optional<bool> EnableWavefrontSize32,
const MCExpr *NextFreeVGPR,
5383 const MCExpr *&VGPRBlocks,
const MCExpr *&SGPRBlocks) {
5390 int64_t EvaluatedSGPRs;
5395 unsigned MaxAddressableNumSGPRs =
5398 if (
NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
Version.Major >= 8 &&
5399 !Features.
test(FeatureSGPRInitBug) &&
5400 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5401 return OutOfRangeError(SGPRRange);
5403 const MCExpr *ExtraSGPRs =
5407 if (
NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
5408 (
Version.Major <= 7 || Features.
test(FeatureSGPRInitBug)) &&
5409 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5410 return OutOfRangeError(SGPRRange);
5412 if (Features.
test(FeatureSGPRInitBug))
5419 auto GetNumGPRBlocks = [&Ctx](
const MCExpr *NumGPR,
5420 unsigned Granule) ->
const MCExpr * {
5424 const MCExpr *AlignToGPR =
5432 VGPRBlocks = GetNumGPRBlocks(
5441bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5443 return TokError(
"directive only supported for amdgcn architecture");
5446 return TokError(
"directive only supported for amdhsa OS");
5449 if (getParser().parseIdentifier(KernelName))
5454 &getSTI(), getContext());
5464 const MCExpr *NextFreeVGPR = ZeroExpr;
5470 const MCExpr *NextFreeSGPR = ZeroExpr;
5473 unsigned ImpliedUserSGPRCount = 0;
5477 std::optional<unsigned> ExplicitUserSGPRCount;
5478 const MCExpr *ReserveVCC = OneExpr;
5479 const MCExpr *ReserveFlatScr = OneExpr;
5480 std::optional<bool> EnableWavefrontSize32;
5486 SMRange IDRange = getTok().getLocRange();
5487 if (!parseId(
ID,
"expected .amdhsa_ directive or .end_amdhsa_kernel"))
5490 if (
ID ==
".end_amdhsa_kernel")
5494 return TokError(
".amdhsa_ directives cannot be repeated");
5496 SMLoc ValStart = getLoc();
5498 if (getParser().parseExpression(ExprVal))
5500 SMLoc ValEnd = getLoc();
5505 bool EvaluatableExpr;
5506 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
5508 return OutOfRangeError(ValRange);
5512#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
5513 if (!isUInt<ENTRY##_WIDTH>(Val)) \
5514 return OutOfRangeError(RANGE); \
5515 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
5520#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
5522 return Error(IDRange.Start, "directive should have resolvable expression", \
5525 if (
ID ==
".amdhsa_group_segment_fixed_size") {
5528 return OutOfRangeError(ValRange);
5530 }
else if (
ID ==
".amdhsa_private_segment_fixed_size") {
5533 return OutOfRangeError(ValRange);
5535 }
else if (
ID ==
".amdhsa_kernarg_size") {
5537 return OutOfRangeError(ValRange);
5539 }
else if (
ID ==
".amdhsa_user_sgpr_count") {
5541 ExplicitUserSGPRCount = Val;
5542 }
else if (
ID ==
".amdhsa_user_sgpr_private_segment_buffer") {
5546 "directive is not supported with architected flat scratch",
5549 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5552 ImpliedUserSGPRCount += 4;
5553 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_preload_length") {
5556 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
5559 return OutOfRangeError(ValRange);
5563 ImpliedUserSGPRCount += Val;
5564 PreloadLength = Val;
5566 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_preload_offset") {
5569 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
5572 return OutOfRangeError(ValRange);
5576 PreloadOffset = Val;
5577 }
else if (
ID ==
".amdhsa_user_sgpr_dispatch_ptr") {
5580 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
5583 ImpliedUserSGPRCount += 2;
5584 }
else if (
ID ==
".amdhsa_user_sgpr_queue_ptr") {
5587 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
5590 ImpliedUserSGPRCount += 2;
5591 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_segment_ptr") {
5594 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
5597 ImpliedUserSGPRCount += 2;
5598 }
else if (
ID ==
".amdhsa_user_sgpr_dispatch_id") {
5601 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
5604 ImpliedUserSGPRCount += 2;
5605 }
else if (
ID ==
".amdhsa_user_sgpr_flat_scratch_init") {
5608 "directive is not supported with architected flat scratch",
5612 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
5615 ImpliedUserSGPRCount += 2;
5616 }
else if (
ID ==
".amdhsa_user_sgpr_private_segment_size") {
5619 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5622 ImpliedUserSGPRCount += 1;
5623 }
else if (
ID ==
".amdhsa_wavefront_size32") {
5625 if (IVersion.
Major < 10)
5626 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5627 EnableWavefrontSize32 = Val;
5629 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
5631 }
else if (
ID ==
".amdhsa_uses_dynamic_stack") {
5633 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
5635 }
else if (
ID ==
".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5638 "directive is not supported with architected flat scratch",
5641 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5643 }
else if (
ID ==
".amdhsa_enable_private_segment") {
5647 "directive is not supported without architected flat scratch",
5650 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5652 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_x") {
5654 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
5656 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_y") {
5658 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
5660 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_z") {
5662 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
5664 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_info") {
5666 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
5668 }
else if (
ID ==
".amdhsa_system_vgpr_workitem_id") {
5670 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
5672 }
else if (
ID ==
".amdhsa_next_free_vgpr") {
5673 VGPRRange = ValRange;
5674 NextFreeVGPR = ExprVal;
5675 }
else if (
ID ==
".amdhsa_next_free_sgpr") {
5676 SGPRRange = ValRange;
5677 NextFreeSGPR = ExprVal;
5678 }
else if (
ID ==
".amdhsa_accum_offset") {
5680 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
5681 AccumOffset = ExprVal;
5682 }
else if (
ID ==
".amdhsa_reserve_vcc") {
5683 if (EvaluatableExpr && !isUInt<1>(Val))
5684 return OutOfRangeError(ValRange);
5685 ReserveVCC = ExprVal;
5686 }
else if (
ID ==
".amdhsa_reserve_flat_scratch") {
5687 if (IVersion.
Major < 7)
5688 return Error(IDRange.
Start,
"directive requires gfx7+", IDRange);
5691 "directive is not supported with architected flat scratch",
5693 if (EvaluatableExpr && !isUInt<1>(Val))
5694 return OutOfRangeError(ValRange);
5695 ReserveFlatScr = ExprVal;
5696 }
else if (
ID ==
".amdhsa_reserve_xnack_mask") {
5697 if (IVersion.
Major < 8)
5698 return Error(IDRange.
Start,
"directive requires gfx8+", IDRange);
5699 if (!isUInt<1>(Val))
5700 return OutOfRangeError(ValRange);
5701 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5702 return getParser().Error(IDRange.
Start,
".amdhsa_reserve_xnack_mask does not match target id",
5704 }
else if (
ID ==
".amdhsa_float_round_mode_32") {
5706 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
5708 }
else if (
ID ==
".amdhsa_float_round_mode_16_64") {
5710 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
5712 }
else if (
ID ==
".amdhsa_float_denorm_mode_32") {
5714 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
5716 }
else if (
ID ==
".amdhsa_float_denorm_mode_16_64") {
5718 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
5720 }
else if (
ID ==
".amdhsa_dx10_clamp") {
5721 if (IVersion.
Major >= 12)
5722 return Error(IDRange.
Start,
"directive unsupported on gfx12+", IDRange);
5724 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
5726 }
else if (
ID ==
".amdhsa_ieee_mode") {
5727 if (IVersion.
Major >= 12)
5728 return Error(IDRange.
Start,
"directive unsupported on gfx12+", IDRange);
5730 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
5732 }
else if (
ID ==
".amdhsa_fp16_overflow") {
5733 if (IVersion.
Major < 9)
5734 return Error(IDRange.
Start,
"directive requires gfx9+", IDRange);
5736 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
5738 }
else if (
ID ==
".amdhsa_tg_split") {
5740 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
5743 }
else if (
ID ==
".amdhsa_workgroup_processor_mode") {
5744 if (IVersion.
Major < 10)
5745 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5747 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
5749 }
else if (
ID ==
".amdhsa_memory_ordered") {
5750 if (IVersion.
Major < 10)
5751 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5753 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
5755 }
else if (
ID ==
".amdhsa_forward_progress") {
5756 if (IVersion.
Major < 10)
5757 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5759 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
5761 }
else if (
ID ==
".amdhsa_shared_vgpr_count") {
5763 if (IVersion.
Major < 10 || IVersion.
Major >= 12)
5764 return Error(IDRange.
Start,
"directive requires gfx10 or gfx11",
5766 SharedVGPRCount = Val;
5768 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
5770 }
else if (
ID ==
".amdhsa_exception_fp_ieee_invalid_op") {
5773 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
5775 }
else if (
ID ==
".amdhsa_exception_fp_denorm_src") {
5777 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5779 }
else if (
ID ==
".amdhsa_exception_fp_ieee_div_zero") {
5782 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
5784 }
else if (
ID ==
".amdhsa_exception_fp_ieee_overflow") {
5786 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5788 }
else if (
ID ==
".amdhsa_exception_fp_ieee_underflow") {
5790 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5792 }
else if (
ID ==
".amdhsa_exception_fp_ieee_inexact") {
5794 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5796 }
else if (
ID ==
".amdhsa_exception_int_div_zero") {
5798 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5800 }
else if (
ID ==
".amdhsa_round_robin_scheduling") {
5801 if (IVersion.
Major < 12)
5802 return Error(IDRange.
Start,
"directive requires gfx12+", IDRange);
5804 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
5807 return Error(IDRange.
Start,
"unknown .amdhsa_kernel directive", IDRange);
5810#undef PARSE_BITS_ENTRY
5813 if (!Seen.
contains(
".amdhsa_next_free_vgpr"))
5814 return TokError(
".amdhsa_next_free_vgpr directive is required");
5816 if (!Seen.
contains(
".amdhsa_next_free_sgpr"))
5817 return TokError(
".amdhsa_next_free_sgpr directive is required");
5819 const MCExpr *VGPRBlocks;
5820 const MCExpr *SGPRBlocks;
5821 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5822 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5823 EnableWavefrontSize32, NextFreeVGPR,
5824 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5828 int64_t EvaluatedVGPRBlocks;
5829 bool VGPRBlocksEvaluatable =
5830 VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);
5831 if (VGPRBlocksEvaluatable &&
5832 !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5833 static_cast<uint64_t>(EvaluatedVGPRBlocks))) {
5834 return OutOfRangeError(VGPRRange);
5838 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
5839 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
5841 int64_t EvaluatedSGPRBlocks;
5842 if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&
5843 !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5844 static_cast<uint64_t>(EvaluatedSGPRBlocks)))
5845 return OutOfRangeError(SGPRRange);
5848 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
5849 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
5851 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5852 return TokError(
"amdgpu_user_sgpr_count smaller than than implied by "
5853 "enabled user SGPRs");
5855 unsigned UserSGPRCount =
5856 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5858 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5859 return TokError(
"too many user SGPRs enabled");
5862 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
5863 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, getContext());
5867 return TokError(
"Kernarg size should be resolvable");
5869 if (PreloadLength && kernarg_size &&
5870 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
5871 return TokError(
"Kernarg preload length + offset is larger than the "
5872 "kernarg segment size");
5875 if (!Seen.
contains(
".amdhsa_accum_offset"))
5876 return TokError(
".amdhsa_accum_offset directive is required");
5877 int64_t EvaluatedAccum;
5878 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);
5879 uint64_t UEvaluatedAccum = EvaluatedAccum;
5880 if (AccumEvaluatable &&
5881 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
5882 return TokError(
"accum_offset should be in range [4..256] in "
5885 int64_t EvaluatedNumVGPR;
5886 if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&
5890 return TokError(
"accum_offset exceeds total VGPR allocation");
5896 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
5897 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5901 if (IVersion.
Major >= 10 && IVersion.
Major < 12) {
5903 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
5904 return TokError(
"shared_vgpr_count directive not valid on "
5905 "wavefront size 32");
5908 if (VGPRBlocksEvaluatable &&
5909 (SharedVGPRCount * 2 +
static_cast<uint64_t>(EvaluatedVGPRBlocks) >
5911 return TokError(
"shared_vgpr_count*2 + "
5912 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5917 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
5918 NextFreeVGPR, NextFreeSGPR,
5919 ReserveVCC, ReserveFlatScr);
5923bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
5925 if (ParseAsAbsoluteExpression(Version))
5928 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
5932bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(
StringRef ID,
5936 if (
ID ==
"max_scratch_backing_memory_byte_size") {
5937 Parser.eatToEndOfStatement();
5943 if (!
C.ParseKernelCodeT(
ID, getParser(), Err)) {
5944 return TokError(Err.str());
5948 if (
ID ==
"enable_wavefront_size32") {
5951 return TokError(
"enable_wavefront_size32=1 is only allowed on GFX10+");
5952 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5953 return TokError(
"enable_wavefront_size32=1 requires +WavefrontSize32");
5955 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5956 return TokError(
"enable_wavefront_size32=0 requires +WavefrontSize64");
5960 if (
ID ==
"wavefront_size") {
5961 if (
C.wavefront_size == 5) {
5963 return TokError(
"wavefront_size=5 is only allowed on GFX10+");
5964 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5965 return TokError(
"wavefront_size=5 requires +WavefrontSize32");
5966 }
else if (
C.wavefront_size == 6) {
5967 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5968 return TokError(
"wavefront_size=6 requires +WavefrontSize64");
5975bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5985 if (!parseId(
ID,
"expected value identifier or .end_amd_kernel_code_t"))
5988 if (
ID ==
".end_amd_kernel_code_t")
5991 if (ParseAMDKernelCodeTValue(
ID, KernelCode))
5995 KernelCode.
validate(&getSTI(), getContext());
5996 getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
6001bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
6003 if (!parseId(KernelName,
"expected symbol name"))
6006 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
6009 KernelScope.initialize(getContext());
6013bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6015 return Error(getLoc(),
6016 ".amd_amdgpu_isa directive is not available on non-amdgcn "
6020 auto TargetIDDirective = getLexer().getTok().getStringContents();
6021 if (getTargetStreamer().getTargetID()->
toString() != TargetIDDirective)
6022 return Error(getParser().getTok().getLoc(),
"target id must match options");
6024 getTargetStreamer().EmitISAVersion();
6030bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6033 std::string HSAMetadataString;
6038 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6039 return Error(getLoc(),
"invalid HSA metadata");
6046bool AMDGPUAsmParser::ParseToEndDirective(
const char *AssemblerDirectiveBegin,
6047 const char *AssemblerDirectiveEnd,
6048 std::string &CollectString) {
6052 getLexer().setSkipSpace(
false);
6054 bool FoundEnd =
false;
6057 CollectStream << getTokenStr();
6061 if (trySkipId(AssemblerDirectiveEnd)) {
6066 CollectStream << Parser.parseStringToEndOfStatement()
6067 << getContext().getAsmInfo()->getSeparatorString();
6069 Parser.eatToEndOfStatement();
6072 getLexer().setSkipSpace(
true);
6075 return TokError(
Twine(
"expected directive ") +
6076 Twine(AssemblerDirectiveEnd) +
Twine(
" not found"));
6079 CollectStream.flush();
6084bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6090 auto PALMetadata = getTargetStreamer().getPALMetadata();
6091 if (!PALMetadata->setFromString(
String))
6092 return Error(getLoc(),
"invalid PAL metadata");
6097bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6099 return Error(getLoc(),
6101 "not available on non-amdpal OSes")).str());
6104 auto PALMetadata = getTargetStreamer().getPALMetadata();
6105 PALMetadata->setLegacy();
6108 if (ParseAsAbsoluteExpression(Key)) {
6109 return TokError(
Twine(
"invalid value in ") +
6113 return TokError(
Twine(
"expected an even number of values in ") +
6116 if (ParseAsAbsoluteExpression(
Value)) {
6117 return TokError(
Twine(
"invalid value in ") +
6120 PALMetadata->setRegister(Key,
Value);
6129bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6130 if (getParser().checkForValidSection())
6134 SMLoc NameLoc = getLoc();
6135 if (getParser().parseIdentifier(
Name))
6136 return TokError(
"expected identifier in directive");
6139 if (getParser().parseComma())
6145 SMLoc SizeLoc = getLoc();
6146 if (getParser().parseAbsoluteExpression(
Size))
6149 return Error(SizeLoc,
"size must be non-negative");
6150 if (
Size > LocalMemorySize)
6151 return Error(SizeLoc,
"size is too large");
6153 int64_t Alignment = 4;
6155 SMLoc AlignLoc = getLoc();
6156 if (getParser().parseAbsoluteExpression(Alignment))
6159 return Error(AlignLoc,
"alignment must be a power of two");
6164 if (Alignment >= 1u << 31)
6165 return Error(AlignLoc,
"alignment is too large");
6171 Symbol->redefineIfPossible();
6172 if (!
Symbol->isUndefined())
6173 return Error(NameLoc,
"invalid symbol redefinition");
6175 getTargetStreamer().emitAMDGPULDS(Symbol,
Size,
Align(Alignment));
6179bool AMDGPUAsmParser::ParseDirective(
AsmToken DirectiveID) {
6183 if (IDVal ==
".amdhsa_kernel")
6184 return ParseDirectiveAMDHSAKernel();
6186 if (IDVal ==
".amdhsa_code_object_version")
6187 return ParseDirectiveAMDHSACodeObjectVersion();
6191 return ParseDirectiveHSAMetadata();
6193 if (IDVal ==
".amd_kernel_code_t")
6194 return ParseDirectiveAMDKernelCodeT();
6196 if (IDVal ==
".amdgpu_hsa_kernel")
6197 return ParseDirectiveAMDGPUHsaKernel();
6199 if (IDVal ==
".amd_amdgpu_isa")
6200 return ParseDirectiveISAVersion();
6204 Twine(
" directive is "
6205 "not available on non-amdhsa OSes"))
6210 if (IDVal ==
".amdgcn_target")
6211 return ParseDirectiveAMDGCNTarget();
6213 if (IDVal ==
".amdgpu_lds")
6214 return ParseDirectiveAMDGPULDS();
6217 return ParseDirectivePALMetadataBegin();
6220 return ParseDirectivePALMetadata();
6227 if (
MRI.regsOverlap(TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
6231 if (
MRI.regsOverlap(SGPR104_SGPR105, RegNo))
6232 return hasSGPR104_SGPR105();
6235 case SRC_SHARED_BASE_LO:
6236 case SRC_SHARED_BASE:
6237 case SRC_SHARED_LIMIT_LO:
6238 case SRC_SHARED_LIMIT:
6239 case SRC_PRIVATE_BASE_LO:
6240 case SRC_PRIVATE_BASE:
6241 case SRC_PRIVATE_LIMIT_LO:
6242 case SRC_PRIVATE_LIMIT:
6244 case SRC_POPS_EXITING_WAVE_ID:
6256 return (
isVI() ||
isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6285 if (
MRI.regsOverlap(SGPR102_SGPR103, RegNo))
6286 return hasSGPR102_SGPR103();
6299 Res = MatchOperandParserImpl(
Operands, Mnemonic);
6311 SMLoc LBraceLoc = getLoc();
6316 auto Loc = getLoc();
6319 Error(Loc,
"expected a register");
6323 RBraceLoc = getLoc();
6328 "expected a comma or a closing square bracket"))
6332 if (
Operands.size() - Prefix > 1) {
6334 AMDGPUOperand::CreateToken(
this,
"[", LBraceLoc));
6335 Operands.push_back(AMDGPUOperand::CreateToken(
this,
"]", RBraceLoc));
6346 setForcedEncodingSize(0);
6347 setForcedDPP(
false);
6348 setForcedSDWA(
false);
6350 if (
Name.ends_with(
"_e64_dpp")) {
6352 setForcedEncodingSize(64);
6353 return Name.substr(0,
Name.size() - 8);
6355 if (
Name.ends_with(
"_e64")) {
6356 setForcedEncodingSize(64);
6357 return Name.substr(0,
Name.size() - 4);
6359 if (
Name.ends_with(
"_e32")) {
6360 setForcedEncodingSize(32);
6361 return Name.substr(0,
Name.size() - 4);
6363 if (
Name.ends_with(
"_dpp")) {
6365 return Name.substr(0,
Name.size() - 4);
6367 if (
Name.ends_with(
"_sdwa")) {
6368 setForcedSDWA(
true);
6369 return Name.substr(0,
Name.size() - 5);
6376 unsigned VariantID);
6388 Operands.push_back(AMDGPUOperand::CreateToken(
this,
Name, NameLoc));
6390 bool IsMIMG =
Name.starts_with(
"image_");
6393 OperandMode Mode = OperandMode_Default;
6395 Mode = OperandMode_NSA;
6399 checkUnsupportedInstruction(
Name, NameLoc);
6400 if (!Parser.hasPendingError()) {
6403 :
"not a valid operand.";
6404 Error(getLoc(), Msg);
6426 if (!trySkipId(
Name))
6429 Operands.push_back(AMDGPUOperand::CreateToken(
this,
Name, S));
6433ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
const char *Prefix,
6444 std::function<
bool(int64_t &)> ConvertResult) {
6452 if (ConvertResult && !ConvertResult(
Value)) {
6456 Operands.push_back(AMDGPUOperand::CreateImm(
this,
Value, S, ImmTy));
6460ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
6462 bool (*ConvertResult)(int64_t &)) {
6471 const unsigned MaxSize = 4;
6475 for (
int I = 0; ; ++
I) {
6477 SMLoc Loc = getLoc();
6481 if (
Op != 0 &&
Op != 1)
6489 if (
I + 1 == MaxSize)
6490 return Error(getLoc(),
"expected a closing square bracket");
6496 Operands.push_back(AMDGPUOperand::CreateImm(
this, Val, S, ImmTy));
6502 AMDGPUOperand::ImmTy ImmTy) {
6506 if (trySkipId(
Name)) {
6508 }
else if (trySkipId(
"no",
Name)) {
6515 return Error(S,
"r128 modifier is not supported on this GPU");
6517 return Error(S,
"a16 modifier is not supported on this GPU");
6519 if (
isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
6520 ImmTy = AMDGPUOperand::ImmTyR128A16;
6522 Operands.push_back(AMDGPUOperand::CreateImm(
this, Bit, S, ImmTy));
6527 bool &Disabling)
const {
6528 Disabling =
Id.consume_front(
"no");
6548 SMLoc StringLoc = getLoc();
6550 int64_t CPolVal = 0;
6568 ResScope = parseScope(
Operands, Scope);
6583 Operands.push_back(AMDGPUOperand::CreateImm(
this, CPolVal, StringLoc,
6584 AMDGPUOperand::ImmTyCPol));
6589 SMLoc OpLoc = getLoc();
6590 unsigned Enabled = 0, Seen = 0;
6594 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
6601 return Error(S,
"dlc modifier is not supported on this GPU");
6604 return Error(S,
"scc modifier is not supported on this GPU");
6607 return Error(S,
"duplicate cache policy modifier");
6619 AMDGPUOperand::CreateImm(
this,
Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
6629 Operands,
"scope", {
"SCOPE_CU",
"SCOPE_SE",
"SCOPE_DEV",
"SCOPE_SYS"},
6647 if (
Value ==
"TH_DEFAULT")
6649 else if (
Value ==
"TH_STORE_LU" ||
Value ==
"TH_LOAD_RT_WB" ||
6650 Value ==
"TH_LOAD_NT_WB") {
6651 return Error(StringLoc,
"invalid th value");
6652 }
else if (
Value.consume_front(
"TH_ATOMIC_")) {
6654 }
else if (
Value.consume_front(
"TH_LOAD_")) {
6656 }
else if (
Value.consume_front(
"TH_STORE_")) {
6659 return Error(StringLoc,
"invalid th value");
6662 if (
Value ==
"BYPASS")
6693 if (TH == 0xffffffff)
6694 return Error(StringLoc,
"invalid th value");
6701 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
6702 AMDGPUOperand::ImmTy ImmT,
6704 auto i = OptionalIdx.find(ImmT);
6705 if (i != OptionalIdx.end()) {
6706 unsigned Idx = i->second;
6707 ((AMDGPUOperand &)*
Operands[
Idx]).addImmOperands(Inst, 1);
6719 StringLoc = getLoc();
6724ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
6730 SMLoc StringLoc = getLoc();
6734 Value = getTokenStr();
6738 if (
Value == Ids[IntVal])
6743 if (IntVal < 0 || IntVal >= (int64_t)Ids.
size())
6749ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
6751 AMDGPUOperand::ImmTy
Type) {
6757 Operands.push_back(AMDGPUOperand::CreateImm(
this, IntVal, S,
Type));
6766bool AMDGPUAsmParser::tryParseFmt(
const char *Pref,
6770 SMLoc Loc = getLoc();
6772 auto Res = parseIntWithPrefix(Pref, Val);
6778 if (Val < 0 || Val > MaxVal) {
6788 AMDGPUOperand::ImmTy ImmTy) {
6789 const char *Pref =
"index_key";
6791 SMLoc Loc = getLoc();
6792 auto Res = parseIntWithPrefix(Pref, ImmVal);
6796 if (ImmTy == AMDGPUOperand::ImmTyIndexKey16bit && (ImmVal < 0 || ImmVal > 1))
6799 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
6802 Operands.push_back(AMDGPUOperand::CreateImm(
this, ImmVal, Loc, ImmTy));
6807 return tryParseIndexKey(
Operands, AMDGPUOperand::ImmTyIndexKey8bit);
6811 return tryParseIndexKey(
Operands, AMDGPUOperand::ImmTyIndexKey16bit);
6816ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
6823 for (
int I = 0;
I < 2; ++
I) {
6824 if (Dfmt == DFMT_UNDEF && !tryParseFmt(
"dfmt", DFMT_MAX, Dfmt))
6827 if (Nfmt == NFMT_UNDEF && !tryParseFmt(
"nfmt", NFMT_MAX, Nfmt))
6832 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
6838 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
6841 Dfmt = (Dfmt ==
DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6842 Nfmt = (Nfmt ==
NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6848ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
6853 if (!tryParseFmt(
"format", UFMT_MAX, Fmt))
6856 if (Fmt == UFMT_UNDEF)
6863bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6871 if (Format != DFMT_UNDEF) {
6877 if (Format != NFMT_UNDEF) {
6882 Error(Loc,
"unsupported format");
6893 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6898 SMLoc Loc = getLoc();
6899 if (!parseId(Str,
"expected a format string") ||
6900 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
6902 if (Dfmt == DFMT_UNDEF)
6903 return Error(Loc,
"duplicate numeric format");
6904 if (Nfmt == NFMT_UNDEF)
6905 return Error(Loc,
"duplicate data format");
6908 Dfmt = (Dfmt ==
DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6909 Nfmt = (Nfmt ==
NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6913 if (Ufmt == UFMT_UNDEF)
6914 return Error(FormatLoc,
"unsupported format");
6929 if (Id == UFMT_UNDEF)
6933 return Error(Loc,
"unified format is not supported on this GPU");
6939ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6941 SMLoc Loc = getLoc();
6946 return Error(Loc,
"out of range format");
6951ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6959 SMLoc Loc = getLoc();
6960 if (!parseId(FormatStr,
"expected a format string"))
6963 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6965 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6975 return parseNumericFormat(Format);
6983 SMLoc Loc = getLoc();
6993 AMDGPUOperand::CreateImm(
this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
7012 Res = parseSymbolicOrNumericFormat(Format);
7017 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands[
Size - 2]);
7018 assert(
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
7025 return Error(getLoc(),
"duplicate format");
7031 parseIntWithPrefix(
"offset",
Operands, AMDGPUOperand::ImmTyOffset);
7033 Res = parseIntWithPrefix(
"inst_offset",
Operands,
7034 AMDGPUOperand::ImmTyInstOffset);
7041 parseNamedBit(
"r128",
Operands, AMDGPUOperand::ImmTyR128A16);
7043 Res = parseNamedBit(
"a16",
Operands, AMDGPUOperand::ImmTyA16);
7049 parseIntWithPrefix(
"blgp",
Operands, AMDGPUOperand::ImmTyBLGP);
7052 parseOperandArrayWithPrefix(
"neg",
Operands, AMDGPUOperand::ImmTyBLGP);
7062 OptionalImmIndexMap OptionalIdx;
7064 unsigned OperandIdx[4];
7065 unsigned EnMask = 0;
7068 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
7069 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
7074 OperandIdx[SrcIdx] = Inst.
size();
7075 Op.addRegOperands(Inst, 1);
7082 OperandIdx[SrcIdx] = Inst.
size();
7088 if (
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7089 Op.addImmOperands(Inst, 1);
7093 if (
Op.isToken() && (
Op.getToken() ==
"done" ||
Op.getToken() ==
"row_en"))
7097 OptionalIdx[
Op.getImmTy()] = i;
7103 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7110 for (
auto i = 0; i < SrcIdx; ++i) {
7112 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7137 IntVal =
encode(ISA, IntVal, CntVal);
7138 if (CntVal !=
decode(ISA, IntVal)) {
7140 IntVal =
encode(ISA, IntVal, -1);
7148bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7150 SMLoc CntLoc = getLoc();
7158 SMLoc ValLoc = getLoc();
7167 if (CntName ==
"vmcnt" || CntName ==
"vmcnt_sat") {
7169 }
else if (CntName ==
"expcnt" || CntName ==
"expcnt_sat") {
7171 }
else if (CntName ==
"lgkmcnt" || CntName ==
"lgkmcnt_sat") {
7174 Error(CntLoc,
"invalid counter name " + CntName);
7179 Error(ValLoc,
"too large value for " + CntName);
7188 Error(getLoc(),
"expected a counter name");
7215bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7216 SMLoc FieldLoc = getLoc();
7222 SMLoc ValueLoc = getLoc();
7229 if (FieldName ==
"instid0") {
7231 }
else if (FieldName ==
"instskip") {
7233 }
else if (FieldName ==
"instid1") {
7236 Error(FieldLoc,
"invalid field name " + FieldName);
7255 .
Case(
"VALU_DEP_1", 1)
7256 .
Case(
"VALU_DEP_2", 2)
7257 .
Case(
"VALU_DEP_3", 3)
7258 .
Case(
"VALU_DEP_4", 4)
7259 .
Case(
"TRANS32_DEP_1", 5)
7260 .
Case(
"TRANS32_DEP_2", 6)
7261 .
Case(
"TRANS32_DEP_3", 7)
7262 .
Case(
"FMA_ACCUM_CYCLE_1", 8)
7263 .
Case(
"SALU_CYCLE_1", 9)
7264 .
Case(
"SALU_CYCLE_2", 10)
7265 .
Case(
"SALU_CYCLE_3", 11)
7273 Delay |=
Value << Shift;
7283 if (!parseDelay(Delay))
7291 Operands.push_back(AMDGPUOperand::CreateImm(
this, Delay, S));
7296AMDGPUOperand::isSWaitCnt()
const {
7300bool AMDGPUOperand::isSDelayALU()
const {
return isImm(); }
7306void AMDGPUAsmParser::depCtrError(
SMLoc Loc,
int ErrorId,
7310 Error(Loc,
Twine(
"invalid counter name ", DepCtrName));
7313 Error(Loc,
Twine(DepCtrName,
" is not supported on this GPU"));
7316 Error(Loc,
Twine(
"duplicate counter name ", DepCtrName));
7319 Error(Loc,
Twine(
"invalid value for ", DepCtrName));
7326bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr,
unsigned &UsedOprMask) {
7330 SMLoc DepCtrLoc = getLoc();
7341 unsigned PrevOprMask = UsedOprMask;
7342 int CntVal =
encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
7345 depCtrError(DepCtrLoc, CntVal, DepCtrName);
7354 Error(getLoc(),
"expected a counter name");
7359 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
7360 DepCtr = (DepCtr & ~CntValMask) | CntVal;
7368 SMLoc Loc = getLoc();
7371 unsigned UsedOprMask = 0;
7373 if (!parseDepCtr(DepCtr, UsedOprMask))
7381 Operands.push_back(AMDGPUOperand::CreateImm(
this, DepCtr, Loc));
7385bool AMDGPUOperand::isDepCtr()
const {
return isS16Imm(); }
7391ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
7393 OperandInfoTy &Width) {
7400 HwReg.Loc = getLoc();
7403 HwReg.IsSymbolic =
true;
7405 }
else if (!
parseExpr(HwReg.Val,
"a register name")) {
7413 if (!skipToken(
AsmToken::Comma,
"expected a comma or a closing parenthesis"))
7423 Width.Loc = getLoc();
7435 SMLoc Loc = getLoc();
7437 StructuredOpField HwReg(
"id",
"hardware register", HwregId::Width,
7439 StructuredOpField
Offset(
"offset",
"bit offset", HwregOffset::Width,
7440 HwregOffset::Default);
7441 struct : StructuredOpField {
7442 using StructuredOpField::StructuredOpField;
7443 bool validate(AMDGPUAsmParser &Parser)
const override {
7445 return Error(Parser,
"only values from 1 to 32 are legal");
7448 } Width(
"size",
"bitfield width", HwregSize::Width, HwregSize::Default);
7452 Res = parseHwregFunc(HwReg,
Offset, Width);
7455 if (!validateStructuredOpFields({&HwReg, &
Offset, &Width}))
7457 ImmVal = HwregEncoding::encode(HwReg.Val,
Offset.Val, Width.Val);
7461 parseExpr(ImmVal,
"a hwreg macro, structured immediate"))
7467 if (!isUInt<16>(ImmVal))
7468 return Error(Loc,
"invalid immediate: only 16-bit values are legal");
7470 AMDGPUOperand::CreateImm(
this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
7474bool AMDGPUOperand::isHwreg()
const {
7475 return isImmTy(ImmTyHwreg);
7483AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
7485 OperandInfoTy &Stream) {
7491 Msg.IsSymbolic =
true;
7493 }
else if (!
parseExpr(Msg.Val,
"a message name")) {
7498 Op.IsDefined =
true;
7501 (
Op.Val =
getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
7504 }
else if (!
parseExpr(
Op.Val,
"an operation name")) {
7509 Stream.IsDefined =
true;
7510 Stream.Loc = getLoc();
7520AMDGPUAsmParser::validateSendMsg(
const OperandInfoTy &Msg,
7521 const OperandInfoTy &
Op,
7522 const OperandInfoTy &Stream) {
7528 bool Strict = Msg.IsSymbolic;
7532 Error(Msg.Loc,
"specified message id is not supported on this GPU");
7537 Error(Msg.Loc,
"invalid message id");
7543 Error(
Op.Loc,
"message does not support operations");
7545 Error(Msg.Loc,
"missing message operation");
7551 Error(
Op.Loc,
"specified operation id is not supported on this GPU");
7553 Error(
Op.Loc,
"invalid operation id");
7558 Error(Stream.Loc,
"message operation does not support streams");
7562 Error(Stream.Loc,
"invalid message stream id");
7572 SMLoc Loc = getLoc();
7576 OperandInfoTy
Op(OP_NONE_);
7577 OperandInfoTy Stream(STREAM_ID_NONE_);
7578 if (parseSendMsgBody(Msg,
Op, Stream) &&
7579 validateSendMsg(Msg,
Op, Stream)) {
7584 }
else if (
parseExpr(ImmVal,
"a sendmsg macro")) {
7585 if (ImmVal < 0 || !isUInt<16>(ImmVal))
7586 return Error(Loc,
"invalid immediate: only 16-bit values are legal");
7591 Operands.push_back(AMDGPUOperand::CreateImm(
this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
7595bool AMDGPUOperand::isSendMsg()
const {
7596 return isImmTy(ImmTySendMsg);
7617 return Error(S,
"invalid interpolation slot");
7619 Operands.push_back(AMDGPUOperand::CreateImm(
this, Slot, S,
7620 AMDGPUOperand::ImmTyInterpSlot));
7631 if (!Str.starts_with(
"attr"))
7632 return Error(S,
"invalid interpolation attribute");
7642 return Error(S,
"invalid or missing interpolation attribute channel");
7644 Str = Str.drop_back(2).drop_front(4);
7647 if (Str.getAsInteger(10, Attr))
7648 return Error(S,
"invalid or missing interpolation attribute number");
7651 return Error(S,
"out of bounds interpolation attribute number");
7655 Operands.push_back(AMDGPUOperand::CreateImm(
this, Attr, S,
7656 AMDGPUOperand::ImmTyInterpAttr));
7657 Operands.push_back(AMDGPUOperand::CreateImm(
7658 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
7677 return Error(S, (
Id == ET_INVALID)
7678 ?
"invalid exp target"
7679 :
"exp target is not supported on this GPU");
7681 Operands.push_back(AMDGPUOperand::CreateImm(
this,
Id, S,
7682 AMDGPUOperand::ImmTyExpTgt));
7697 return isId(getToken(),
Id);
7702 return getTokenKind() ==
Kind;
7705StringRef AMDGPUAsmParser::getId()
const {
7732 if (isId(
Id) && peekToken().is(Kind)) {
7742 if (isToken(Kind)) {
7752 if (!trySkipToken(Kind)) {
7753 Error(getLoc(), ErrMsg);
7764 if (Parser.parseExpression(Expr))
7767 if (Expr->evaluateAsAbsolute(Imm))
7771 Error(S,
"expected absolute expression");
7774 Twine(
" or an absolute expression"));
7784 if (Parser.parseExpression(Expr))
7788 if (Expr->evaluateAsAbsolute(IntVal)) {
7789 Operands.push_back(AMDGPUOperand::CreateImm(
this, IntVal, S));
7791 Operands.push_back(AMDGPUOperand::CreateExpr(
this, Expr, S));
7799 Val = getToken().getStringContents();
7803 Error(getLoc(), ErrMsg);
7810 Val = getTokenStr();
7814 if (!ErrMsg.
empty())
7815 Error(getLoc(), ErrMsg);
7820AMDGPUAsmParser::getToken()
const {
7821 return Parser.getTok();
7824AsmToken AMDGPUAsmParser::peekToken(
bool ShouldSkipSpace) {
7827 : getLexer().peekTok(ShouldSkipSpace);
7832 auto TokCount = getLexer().peekTokens(Tokens);
7839AMDGPUAsmParser::getTokenKind()
const {
7844AMDGPUAsmParser::getLoc()
const {
7845 return getToken().getLoc();
7849AMDGPUAsmParser::getTokenStr()
const {
7850 return getToken().getString();
7854AMDGPUAsmParser::lex() {
7859 return ((AMDGPUOperand &)*
Operands[0]).getStartLoc();
7863AMDGPUAsmParser::getOperandLoc(std::function<
bool(
const AMDGPUOperand&)>
Test,
7865 for (
unsigned i =
Operands.size() - 1; i > 0; --i) {
7866 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
7868 return Op.getStartLoc();
7874AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy
Type,
7876 auto Test = [=](
const AMDGPUOperand&
Op) {
return Op.isImmTy(
Type); };
7881AMDGPUAsmParser::getRegLoc(
unsigned Reg,
7883 auto Test = [=](
const AMDGPUOperand&
Op) {
7884 return Op.isRegKind() &&
Op.getReg() ==
Reg;
7890 bool SearchMandatoryLiterals)
const {
7891 auto Test = [](
const AMDGPUOperand&
Op) {
7892 return Op.IsImmKindLiteral() ||
Op.isExpr();
7895 if (SearchMandatoryLiterals && Loc == getInstLoc(
Operands))
7896 Loc = getMandatoryLitLoc(
Operands);
7901 auto Test = [](
const AMDGPUOperand &
Op) {
7902 return Op.IsImmKindMandatoryLiteral();
7909 auto Test = [](
const AMDGPUOperand&
Op) {
7910 return Op.isImmKindConst();
7927 SMLoc IdLoc = getLoc();
7933 find_if(Fields, [
Id](StructuredOpField *
F) {
return F->Id ==
Id; });
7934 if (
I == Fields.
end())
7935 return Error(IdLoc,
"unknown field");
7936 if ((*I)->IsDefined)
7937 return Error(IdLoc,
"duplicate field");
7940 (*I)->Loc = getLoc();
7943 (*I)->IsDefined =
true;
7950bool AMDGPUAsmParser::validateStructuredOpFields(
7952 return all_of(Fields, [
this](
const StructuredOpField *
F) {
7953 return F->validate(*
this);
7964 const unsigned OrMask,
7965 const unsigned XorMask) {
7968 return BITMASK_PERM_ENC |
7969 (AndMask << BITMASK_AND_SHIFT) |
7970 (OrMask << BITMASK_OR_SHIFT) |
7971 (XorMask << BITMASK_XOR_SHIFT);
7975AMDGPUAsmParser::parseSwizzleOperand(int64_t &
Op,
7976 const unsigned MinVal,
7977 const unsigned MaxVal,
7987 if (Op < MinVal || Op > MaxVal) {
7996AMDGPUAsmParser::parseSwizzleOperands(
const unsigned OpNum, int64_t*
Op,
7997 const unsigned MinVal,
7998 const unsigned MaxVal,
8001 for (
unsigned i = 0; i < OpNum; ++i) {
8002 if (!parseSwizzleOperand(
Op[i], MinVal, MaxVal, ErrMsg, Loc))
8010AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
8014 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
8015 "expected a 2-bit lane id")) {
8026AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
8033 if (!parseSwizzleOperand(GroupSize,
8035 "group size must be in the interval [2,32]",
8040 Error(Loc,
"group size must be a power of two");
8043 if (parseSwizzleOperand(LaneIdx,
8045 "lane id must be in the interval [0,group size - 1]",
8054AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
8060 if (!parseSwizzleOperand(GroupSize,
8062 "group size must be in the interval [2,32]",
8067 Error(Loc,
"group size must be a power of two");
8076AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8082 if (!parseSwizzleOperand(GroupSize,
8084 "group size must be in the interval [1,16]",
8089 Error(Loc,
"group size must be a power of two");
8098AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
8106 SMLoc StrLoc = getLoc();
8107 if (!parseString(Ctl)) {
8110 if (Ctl.
size() != BITMASK_WIDTH) {
8111 Error(StrLoc,
"expected a 5-character mask");
8115 unsigned AndMask = 0;
8116 unsigned OrMask = 0;
8117 unsigned XorMask = 0;
8119 for (
size_t i = 0; i < Ctl.
size(); ++i) {
8123 Error(StrLoc,
"invalid mask");
8145AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
8147 SMLoc OffsetLoc = getLoc();
8149 if (!
parseExpr(Imm,
"a swizzle macro")) {
8152 if (!isUInt<16>(Imm)) {
8153 Error(OffsetLoc,
"expected a 16-bit offset");
8160AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
8165 SMLoc ModeLoc = getLoc();
8168 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
8169 Ok = parseSwizzleQuadPerm(Imm);
8170 }
else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
8171 Ok = parseSwizzleBitmaskPerm(Imm);
8172 }
else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
8173 Ok = parseSwizzleBroadcast(Imm);
8174 }
else if (trySkipId(IdSymbolic[ID_SWAP])) {
8175 Ok = parseSwizzleSwap(Imm);
8176 }
else if (trySkipId(IdSymbolic[ID_REVERSE])) {
8177 Ok = parseSwizzleReverse(Imm);
8179 Error(ModeLoc,
"expected a swizzle mode");
8182 return Ok && skipToken(
AsmToken::RParen,
"expected a closing parentheses");
8192 if (trySkipId(
"offset")) {
8196 if (trySkipId(
"swizzle")) {
8197 Ok = parseSwizzleMacro(Imm);
8199 Ok = parseSwizzleOffset(Imm);
8203 Operands.push_back(AMDGPUOperand::CreateImm(
this, Imm, S, AMDGPUOperand::ImmTySwizzle));
8211AMDGPUOperand::isSwizzle()
const {
8212 return isImmTy(ImmTySwizzle);
8219int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8233 for (
unsigned ModeId = ID_MIN; ModeId <=
ID_MAX; ++ModeId) {
8234 if (trySkipId(IdSymbolic[ModeId])) {
8241 Error(S, (Imm == 0)?
8242 "expected a VGPR index mode or a closing parenthesis" :
8243 "expected a VGPR index mode");
8248 Error(S,
"duplicate VGPR index mode");
8256 "expected a comma or a closing parenthesis"))
8271 Imm = parseGPRIdxMacro();
8275 if (getParser().parseAbsoluteExpression(Imm))
8277 if (Imm < 0 || !isUInt<4>(Imm))
8278 return Error(S,
"invalid immediate: only 4-bit values are legal");
8282 AMDGPUOperand::CreateImm(
this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
8286bool AMDGPUOperand::isGPRIdxMode()
const {
8287 return isImmTy(ImmTyGprIdxMode);
8299 if (isRegister() || isModifier())
8306 assert(Opr.isImm() || Opr.isExpr());
8307 SMLoc Loc = Opr.getStartLoc();
8311 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
8312 Error(Loc,
"expected an absolute expression or a label");
8313 }
else if (Opr.isImm() && !Opr.isS16Imm()) {
8314 Error(Loc,
"expected a 16-bit signed jump offset");
8332void AMDGPUAsmParser::cvtMubufImpl(
MCInst &Inst,
8335 OptionalImmIndexMap OptionalIdx;
8336 unsigned FirstOperandIdx = 1;
8337 bool IsAtomicReturn =
false;
8344 for (
unsigned i = FirstOperandIdx, e =
Operands.size(); i != e; ++i) {
8345 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
8349 Op.addRegOperands(Inst, 1);
8353 if (IsAtomicReturn && i == FirstOperandIdx)
8354 Op.addRegOperands(Inst, 1);
8359 if (
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
8360 Op.addImmOperands(Inst, 1);
8372 OptionalIdx[
Op.getImmTy()] = i;
8383bool AMDGPUOperand::isSMRDOffset8()
const {
8384 return isImmLiteral() && isUInt<8>(getImm());
8387bool AMDGPUOperand::isSMEMOffset()
const {
8389 return isImmLiteral();
8392bool AMDGPUOperand::isSMRDLiteralOffset()
const {
8395 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
8427bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
8428 if (BoundCtrl == 0 || BoundCtrl == 1) {
8436void AMDGPUAsmParser::onBeginOfFile() {
8437 if (!getParser().getStreamer().getTargetStreamer() ||
8441 if (!getTargetStreamer().getTargetID())
8442 getTargetStreamer().initializeTargetID(getSTI(),
8443 getSTI().getFeatureString());
8446 getTargetStreamer().EmitDirectiveAMDGCNTarget();
8454bool AMDGPUAsmParser::parsePrimaryExpr(
const MCExpr *&Res,
SMLoc &EndLoc) {
8460 .
Case(
"max", AGVK::AGVK_Max)
8461 .
Case(
"or", AGVK::AGVK_Or)
8462 .
Case(
"extrasgprs", AGVK::AGVK_ExtraSGPRs)
8463 .
Case(
"totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
8464 .
Case(
"alignto", AGVK::AGVK_AlignTo)
8465 .
Case(
"occupancy", AGVK::AGVK_Occupancy)
8475 if (Exprs.
empty()) {
8476 Error(getToken().getLoc(),
8477 "empty " +
Twine(TokenId) +
" expression");
8480 if (CommaCount + 1 != Exprs.
size()) {
8481 Error(getToken().getLoc(),
8482 "mismatch of commas in " +
Twine(TokenId) +
" expression");
8489 if (getParser().parseExpression(Expr, EndLoc))
8493 if (LastTokenWasComma)
8496 Error(getToken().getLoc(),
8497 "unexpected token in " +
Twine(TokenId) +
" expression");
8503 return getParser().parsePrimaryExpr(Res, EndLoc,
nullptr);
8508 if (
Name ==
"mul") {
8509 return parseIntWithPrefix(
"mul",
Operands,
8513 if (
Name ==
"div") {
8514 return parseIntWithPrefix(
"div",
Operands,
8530 const int Ops[] = { AMDGPU::OpName::src0,
8531 AMDGPU::OpName::src1,
8532 AMDGPU::OpName::src2 };
8547 if (
DstOp.isReg() &&
8548 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(
DstOp.
getReg())) {
8552 if ((OpSel & (1 << SrcNum)) != 0)
8558void AMDGPUAsmParser::cvtVOP3OpSel(
MCInst &Inst,
8565 OptionalImmIndexMap &OptionalIdx) {
8566 cvtVOP3P(Inst,
Operands, OptionalIdx);
8575 &&
Desc.NumOperands > (OpNum + 1)
8577 &&
Desc.operands()[OpNum + 1].RegClass != -1
8579 &&
Desc.getOperandConstraint(OpNum + 1,
8580 MCOI::OperandConstraint::TIED_TO) == -1;
8585 OptionalImmIndexMap OptionalIdx;
8590 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
8591 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
8594 for (
unsigned E =
Operands.size();
I != E; ++
I) {
8595 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
8597 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8598 }
else if (
Op.isInterpSlot() ||
Op.isInterpAttr() ||
8599 Op.isInterpAttrChan()) {
8601 }
else if (
Op.isImmModifier()) {
8602 OptionalIdx[
Op.getImmTy()] =
I;
8610 AMDGPUOperand::ImmTyHigh);
8614 AMDGPUOperand::ImmTyClamp);
8618 AMDGPUOperand::ImmTyOModSI);
8623 OptionalImmIndexMap OptionalIdx;
8628 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
8629 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
8632 for (
unsigned E =
Operands.size();
I != E; ++
I) {
8633 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
8635 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8636 }
else if (
Op.isImmModifier()) {
8637 OptionalIdx[
Op.getImmTy()] =
I;
8654 const int Ops[] = { AMDGPU::OpName::src0,
8655 AMDGPU::OpName::src1,
8656 AMDGPU::OpName::src2 };
8657 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8658 AMDGPU::OpName::src1_modifiers,
8659 AMDGPU::OpName::src2_modifiers };
8663 for (
int J = 0; J < 3; ++J) {
8671 if ((OpSel & (1 << J)) != 0)
8673 if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
8674 (OpSel & (1 << 3)) != 0)
8682 OptionalImmIndexMap &OptionalIdx) {
8687 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
8688 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
8691 for (
unsigned E =
Operands.size();
I != E; ++
I) {
8692 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
8694 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8695 }
else if (
Op.isImmModifier()) {
8696 OptionalIdx[
Op.getImmTy()] =
I;
8698 Op.addRegOrImmOperands(Inst, 1);
8706 AMDGPUOperand::ImmTyByteSel);
8711 AMDGPUOperand::ImmTyClamp);
8715 AMDGPUOperand::ImmTyOModSI);
8722 auto it = Inst.
begin();
8732 OptionalImmIndexMap OptionalIdx;
8733 cvtVOP3(Inst,
Operands, OptionalIdx);
8737 OptionalImmIndexMap &OptIdx) {
8743 if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
8744 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
8745 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
8746 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {
8754 !(Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp_gfx12 ||
8755 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp_gfx12 ||
8756 Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp8_gfx12 ||
8757 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp8_gfx12 ||
8758 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
8759 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
8760 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
8761 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12)) {
8770 if (OpSelIdx != -1) {
8775 if (OpSelHiIdx != -1) {
8789 const int Ops[] = { AMDGPU::OpName::src0,
8790 AMDGPU::OpName::src1,
8791 AMDGPU::OpName::src2 };
8792 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8793 AMDGPU::OpName::src1_modifiers,
8794 AMDGPU::OpName::src2_modifiers };
8797 unsigned OpSelHi = 0;
8804 if (OpSelHiIdx != -1)
8813 for (
int J = 0; J < 3; ++J) {
8826 if (
SrcOp.isReg() && getMRI()
8833 if ((OpSel & (1 << J)) != 0)
8837 if ((OpSelHi & (1 << J)) != 0)
8840 if ((NegLo & (1 << J)) != 0)
8843 if ((NegHi & (1 << J)) != 0)
8851 OptionalImmIndexMap OptIdx;
8857 unsigned i,
unsigned Opc,
unsigned OpName) {
8859 ((AMDGPUOperand &)*
Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
8861 ((AMDGPUOperand &)*
Operands[i]).addRegOperands(Inst, 1);
8867 ((AMDGPUOperand &)*
Operands[1]).addRegOperands(Inst, 1);
8870 ((AMDGPUOperand &)*
Operands[1]).addRegOperands(Inst, 1);
8871 ((AMDGPUOperand &)*
Operands[4]).addRegOperands(Inst, 1);
8873 OptionalImmIndexMap OptIdx;
8874 for (
unsigned i = 5; i <
Operands.size(); ++i) {
8875 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
8876 OptIdx[
Op.getImmTy()] = i;
8881 AMDGPUOperand::ImmTyIndexKey8bit);
8885 AMDGPUOperand::ImmTyIndexKey16bit);
8905 Operands.push_back(AMDGPUOperand::CreateToken(
this,
"::", S));
8906 SMLoc OpYLoc = getLoc();
8909 Operands.push_back(AMDGPUOperand::CreateToken(
this, OpYName, OpYLoc));
8912 return Error(OpYLoc,
"expected a VOPDY instruction after ::");
8919 auto addOp = [&](
uint16_t ParsedOprIdx) {
8920 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[ParsedOprIdx]);
8922 Op.addRegOperands(Inst, 1);
8926 Op.addImmOperands(Inst, 1);
8938 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
8942 const auto &CInfo = InstInfo[CompIdx];
8943 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
8944 for (
unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
8945 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
8946 if (CInfo.hasSrc2Acc())
8947 addOp(CInfo.getIndexOfDstInParsedOperands());
8955bool AMDGPUOperand::isDPP8()
const {
8956 return isImmTy(ImmTyDPP8);
8959bool AMDGPUOperand::isDPPCtrl()
const {
8960 using namespace AMDGPU::DPP;
8962 bool result =
isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8964 int64_t
Imm = getImm();
8965 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8966 (
Imm >= DppCtrl::ROW_SHL_FIRST &&
Imm <= DppCtrl::ROW_SHL_LAST) ||
8967 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8968 (
Imm >= DppCtrl::ROW_ROR_FIRST &&
Imm <= DppCtrl::ROW_ROR_LAST) ||
8969 (Imm == DppCtrl::WAVE_SHL1) ||
8970 (
Imm == DppCtrl::WAVE_ROL1) ||
8971 (Imm == DppCtrl::WAVE_SHR1) ||
8972 (
Imm == DppCtrl::WAVE_ROR1) ||
8973 (Imm == DppCtrl::ROW_MIRROR) ||
8974 (
Imm == DppCtrl::ROW_HALF_MIRROR) ||
8975 (Imm == DppCtrl::BCAST15) ||
8976 (
Imm == DppCtrl::BCAST31) ||
8977 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8978 (
Imm >= DppCtrl::ROW_XMASK_FIRST &&
Imm <= DppCtrl::ROW_XMASK_LAST);
8987bool AMDGPUOperand::isBLGP()
const {
8988 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8991bool AMDGPUOperand::isS16Imm()
const {
8992 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8995bool AMDGPUOperand::isU16Imm()
const {
8996 return isImmLiteral() && isUInt<16>(getImm());
9003bool AMDGPUAsmParser::parseDimId(
unsigned &Encoding) {
9008 SMLoc Loc = getToken().getEndLoc();
9009 Token = std::string(getTokenStr());
9011 if (getLoc() != Loc)
9016 if (!parseId(Suffix))
9042 SMLoc Loc = getLoc();
9043 if (!parseDimId(Encoding))
9044 return Error(Loc,
"invalid dim value");
9046 Operands.push_back(AMDGPUOperand::CreateImm(
this, Encoding, S,
9047 AMDGPUOperand::ImmTyDim));
9065 if (!skipToken(
AsmToken::LBrac,
"expected an opening square bracket"))
9068 for (
size_t i = 0; i < 8; ++i) {
9072 SMLoc Loc = getLoc();
9073 if (getParser().parseAbsoluteExpression(Sels[i]))
9075 if (0 > Sels[i] || 7 < Sels[i])
9076 return Error(Loc,
"expected a 3-bit value");
9083 for (
size_t i = 0; i < 8; ++i)
9084 DPP8 |= (Sels[i] << (i * 3));
9086 Operands.push_back(AMDGPUOperand::CreateImm(
this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
9091AMDGPUAsmParser::isSupportedDPPCtrl(
StringRef Ctrl,
9093 if (Ctrl ==
"row_newbcast")
9096 if (Ctrl ==
"row_share" ||
9097 Ctrl ==
"row_xmask")
9100 if (Ctrl ==
"wave_shl" ||
9101 Ctrl ==
"wave_shr" ||
9102 Ctrl ==
"wave_rol" ||
9103 Ctrl ==
"wave_ror" ||
9104 Ctrl ==
"row_bcast")
9107 return Ctrl ==
"row_mirror" ||
9108 Ctrl ==
"row_half_mirror" ||
9109 Ctrl ==
"quad_perm" ||
9110 Ctrl ==
"row_shl" ||
9111 Ctrl ==
"row_shr" ||
9116AMDGPUAsmParser::parseDPPCtrlPerm() {
9119 if (!skipToken(
AsmToken::LBrac,
"expected an opening square bracket"))
9123 for (
int i = 0; i < 4; ++i) {
9128 SMLoc Loc = getLoc();
9129 if (getParser().parseAbsoluteExpression(Temp))
9131 if (Temp < 0 || Temp > 3) {
9132 Error(Loc,
"expected a 2-bit value");
9136 Val += (Temp << i * 2);
9146AMDGPUAsmParser::parseDPPCtrlSel(
StringRef Ctrl) {
9147 using namespace AMDGPU::DPP;
9152 SMLoc Loc = getLoc();
9154 if (getParser().parseAbsoluteExpression(Val))
9157 struct DppCtrlCheck {
9164 .
Case(
"wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
9165 .Case(
"wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
9166 .Case(
"wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
9167 .Case(
"wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
9168 .Case(
"row_shl", {DppCtrl::ROW_SHL0, 1, 15})
9169 .Case(
"row_shr", {DppCtrl::ROW_SHR0, 1, 15})
9170 .Case(
"row_ror", {DppCtrl::ROW_ROR0, 1, 15})
9171 .Case(
"row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
9172 .Case(
"row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
9173 .Case(
"row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
9177 if (
Check.Ctrl == -1) {
9178 Valid = (
Ctrl ==
"row_bcast" && (Val == 15 || Val == 31));
9179 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
9194 using namespace AMDGPU::DPP;
9197 !isSupportedDPPCtrl(getTokenStr(),
Operands))
9206 if (Ctrl ==
"row_mirror") {
9207 Val = DppCtrl::ROW_MIRROR;
9208 }
else if (Ctrl ==
"row_half_mirror") {
9209 Val = DppCtrl::ROW_HALF_MIRROR;
9212 if (Ctrl ==
"quad_perm") {
9213 Val = parseDPPCtrlPerm();
9215 Val = parseDPPCtrlSel(Ctrl);
9224 AMDGPUOperand::CreateImm(
this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
9230 OptionalImmIndexMap OptionalIdx;
9240 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
9244 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9245 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
9249 for (
unsigned E =
Operands.size();
I != E; ++
I) {
9253 if (OldIdx == NumOperands) {
9255 constexpr int DST_IDX = 0;
9257 }
else if (Src2ModIdx == NumOperands) {
9268 bool IsVOP3CvtSrDpp =
9269 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
9270 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
9271 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
9272 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12;
9273 if (IsVOP3CvtSrDpp) {
9287 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
9289 if (IsDPP8 &&
Op.isDppFI()) {
9292 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9293 }
else if (
Op.isReg()) {
9294 Op.addRegOperands(Inst, 1);
9295 }
else if (
Op.isImm() &&
9297 assert(!
Op.IsImmKindLiteral() &&
"Cannot use literal with DPP");
9298 Op.addImmOperands(Inst, 1);
9299 }
else if (
Op.isImm()) {
9300 OptionalIdx[
Op.getImmTy()] =
I;
9308 AMDGPUOperand::ImmTyByteSel);
9312 AMDGPUOperand::ImmTyClamp);
9318 cvtVOP3P(Inst,
Operands, OptionalIdx);
9320 cvtVOP3OpSel(Inst,
Operands, OptionalIdx);
9337 AMDGPUOperand::ImmTyDppFI);
9342 OptionalImmIndexMap OptionalIdx;
9346 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9347 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
9351 for (
unsigned E =
Operands.size();
I != E; ++
I) {
9359 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
9361 if (
Op.isReg() && validateVccOperand(
Op.getReg())) {
9369 Op.addImmOperands(Inst, 1);
9371 Op.addRegWithFPInputModsOperands(Inst, 2);
9372 }
else if (
Op.isDppFI()) {
9374 }
else if (
Op.isReg()) {
9375 Op.addRegOperands(Inst, 1);
9381 Op.addRegWithFPInputModsOperands(Inst, 2);
9382 }
else if (
Op.isReg()) {
9383 Op.addRegOperands(Inst, 1);
9384 }
else if (
Op.isDPPCtrl()) {
9385 Op.addImmOperands(Inst, 1);
9386 }
else if (
Op.isImm()) {
9388 OptionalIdx[
Op.getImmTy()] =
I;
9404 AMDGPUOperand::ImmTyDppFI);
9415 AMDGPUOperand::ImmTy
Type) {
9416 return parseStringOrIntWithPrefix(
9418 {
"BYTE_0",
"BYTE_1",
"BYTE_2",
"BYTE_3",
"WORD_0",
"WORD_1",
"DWORD"},
9423 return parseStringOrIntWithPrefix(
9424 Operands,
"dst_unused", {
"UNUSED_PAD",
"UNUSED_SEXT",
"UNUSED_PRESERVE"},
9425 AMDGPUOperand::ImmTySDWADstUnused);
9454 OptionalImmIndexMap OptionalIdx;
9455 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
9456 bool SkippedVcc =
false;
9460 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9461 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
9464 for (
unsigned E =
Operands.size();
I != E; ++
I) {
9465 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
9466 if (SkipVcc && !SkippedVcc &&
Op.isReg() &&
9467 (
Op.getReg() == AMDGPU::VCC ||
Op.getReg() == AMDGPU::VCC_LO)) {
9485 Op.addRegOrImmWithInputModsOperands(Inst, 2);
9486 }
else if (
Op.isImm()) {
9488 OptionalIdx[
Op.getImmTy()] =
I;
9496 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
9497 Opc != AMDGPU::V_NOP_sdwa_vi) {
9499 switch (BasicInstType) {
9503 AMDGPUOperand::ImmTyClamp, 0);
9507 AMDGPUOperand::ImmTyOModSI, 0);
9511 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9515 AMDGPUOperand::ImmTySDWADstUnused,
9516 DstUnused::UNUSED_PRESERVE);
9523 AMDGPUOperand::ImmTyClamp, 0);
9537 AMDGPUOperand::ImmTyClamp, 0);
9543 llvm_unreachable(
"Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
9549 if (Inst.
getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
9550 Inst.
getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
9551 auto it = Inst.
begin();
9564#define GET_REGISTER_MATCHER
9565#define GET_MATCHER_IMPLEMENTATION
9566#define GET_MNEMONIC_SPELL_CHECKER
9567#define GET_MNEMONIC_CHECKER
9568#include "AMDGPUGenAsmMatcher.inc"
9574 return parseTokenOp(
"addr64",
Operands);
9576 return parseTokenOp(
"done",
Operands);
9578 return parseTokenOp(
"idxen",
Operands);
9580 return parseTokenOp(
"lds",
Operands);
9582 return parseTokenOp(
"offen",
Operands);
9584 return parseTokenOp(
"off",
Operands);
9586 return parseTokenOp(
"row_en",
Operands);
9588 return parseNamedBit(
"gds",
Operands, AMDGPUOperand::ImmTyGDS);
9590 return parseNamedBit(
"tfe",
Operands, AMDGPUOperand::ImmTyTFE);
9592 return tryCustomParseOperand(
Operands, MCK);
9603 AMDGPUOperand &Operand = (AMDGPUOperand&)
Op;
9606 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
9608 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
9610 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
9612 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
9614 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
9616 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
9624 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
9626 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
9627 case MCK_SOPPBrTarget:
9628 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
9629 case MCK_VReg32OrOff:
9630 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
9631 case MCK_InterpSlot:
9632 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
9633 case MCK_InterpAttr:
9634 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
9635 case MCK_InterpAttrChan:
9636 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
9638 case MCK_SReg_64_XEXEC:
9644 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
9646 return Match_InvalidOperand;
9663 if (!isUInt<16>(Imm))
9664 return Error(S,
"expected a 16-bit value");
9667 AMDGPUOperand::CreateImm(
this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
9671bool AMDGPUOperand::isEndpgm()
const {
return isImmTy(ImmTyEndpgm); }
9677bool AMDGPUOperand::isSplitBarrier()
const {
return isInlinableImm(MVT::i32); }
unsigned const MachineRegisterInfo * MRI
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static unsigned getSpecialRegForName(StringRef RegName)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, unsigned OpName)
static bool IsRevOpcode(const unsigned Opcode)
static int getRegClass(RegisterKind Is, unsigned RegWidth)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, const MCRegisterInfo *MRI)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value, raw_ostream &Err)
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
#define LLVM_EXTERNAL_VISIBILITY
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static unsigned getOperandSize(MachineInstr &MI, unsigned Idx, MachineRegisterInfo &MRI)
static llvm::Expected< InlineInfo > decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
Interface definition for SIInstrInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
unsigned unsigned DefaultVal
This file implements the SmallBitVector class.
StringSet - A set-like wrapper for the StringMap.
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
support::ulittle16_t & Lo
support::ulittle16_t & Hi
static const char * getRegisterName(MCRegister Reg)
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
Target independent representation for an assembler token.
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
bool is(TokenKind K) const
TokenKind getKind() const
This class represents an Operation in the Expression.
Base class for user error types.
Lightweight error class with error context and mandatory checking.
Tagged union holding either a T or a Error.
Class representing an expression and its matching format.
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
MCAsmParser & getParser()
Generic assembler parser interface, for use by target specific assembly parsers.
virtual MCStreamer & getStreamer()=0
Return the output streamer for the assembler.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Context object for machine code objects.
const MCRegisterInfo * getRegisterInfo() const
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
const MCSubtargetInfo * getSubtargetInfo() const
Base class for the full range of assembler expressions which are needed for parsing.
Instances of this class represent a single low-level machine instruction.
unsigned getNumOperands() const
unsigned getOpcode() const
iterator insert(iterator I, const MCOperand &Op)
void addOperand(const MCOperand Op)
const MCOperand & getOperand(unsigned i) const
Describe properties that are true of each instruction in the target description file.
bool mayStore() const
Return true if this instruction could possibly modify memory.
Interface to description of machine instruction set.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Instances of this class represent operands of the MCInst class.
static MCOperand createReg(unsigned Reg)
static MCOperand createExpr(const MCExpr *Val)
void setReg(unsigned Reg)
Set the register number.
static MCOperand createImm(int64_t Val)
unsigned getReg() const
Returns the register number.
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
virtual bool isReg() const =0
isReg - Is this a register operand?
virtual bool isMem() const =0
isMem - Is this a memory operand?
virtual MCRegister getReg() const =0
virtual bool isToken() const =0
isToken - Is this a token operand?
virtual bool isImm() const =0
isImm - Is this an immediate operand?
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
unsigned getRegister(unsigned i) const
getRegister - Return the specified register in the class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
static constexpr unsigned NoRegister
Streaming machine code generation interface.
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
MCTargetStreamer * getTargetStreamer()
Generic base class for all target subtargets.
const FeatureBitset & getFeatureBits() const
FeatureBitset ToggleFeature(uint64_t FB)
Toggle a feature and return the re-computed feature bits.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
void setVariableValue(const MCExpr *Value)
MCTargetAsmParser - Generic interface to target specific assembly parsers.
MCSubtargetInfo & copySTI()
Create a copy of STI and return a non-const reference to it.
virtual bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
virtual bool ParseDirective(AsmToken DirectiveID)
ParseDirective - Parse a target specific assembler directive This method is deprecated,...
virtual ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
tryParseRegister - parse one register if possible
void setAvailableFeatures(const FeatureBitset &Value)
const MCSubtargetInfo & getSTI() const
virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, unsigned Kind)
Allow a target to add special case operand matching for things that tblgen doesn't/can't handle effec...
virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands)=0
ParseInstruction - Parse one assembly instruction.
virtual unsigned checkTargetMatchPredicate(MCInst &Inst)
checkTargetMatchPredicate - Validate the instruction match against any complex target predicates not ...
virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm)=0
MatchAndEmitInstruction - Recognize a series of operands of a parsed instruction as an actual MCInst ...
Target specific streamer interface.
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
Wrapper class representing virtual and physical registers.
Represents a location in source code.
static SMLoc getFromPointer(const char *Ptr)
constexpr const char * getPointer() const
constexpr bool isValid() const
Represents a range in source code.
Implements a dense probed hash-table based set with some number of buckets stored inline.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringMapEntry - This is used to represent one value that is inserted into a StringMap.
StringRef - Represent a constant reference to a string, i.e.
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
empty - Check if the string is empty.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
constexpr size_t size() const
size - Get the string size.
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
StringSet - A wrapper for StringMap that provides set-like functionality.
bool contains(StringRef key) const
Check if the set contains the given key.
std::pair< typename Base::iterator, bool > insert(StringRef key)
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
A raw_ostream that writes to an SmallVector or SmallString.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
int64_t getHwregId(StringRef Name, const MCSubtargetInfo &STI)
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
@ FIXED_NUM_SGPRS_FOR_INIT_BUG
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
int64_t getMsgOpId(int64_t MsgId, StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a sendmsg operation to the operation portion of the immediate encoding.
int64_t getMsgId(StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a msg_id to the message portion of the immediate encoding.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
ArrayRef< GFXVersion > getGFXVersions()
constexpr unsigned COMPONENTS[]
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
LLVM_READNONE bool isLegalDPALU_DPPControl(unsigned DC)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
unsigned mc2PseudoReg(unsigned Reg)
Convert hardware register Reg to a pseudo register.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isDPALU_DPP(const MCInstrDesc &OpDesc)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
bool isGFX9(const MCSubtargetInfo &STI)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
bool hasMAIInsts(const MCSubtargetInfo &STI)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
bool isSGPR(unsigned Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isInlineValue(unsigned Reg)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_V2INT32
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_INLINE_AC_V2FP16
@ OPERAND_REG_IMM_INT32
Operands with register or 32-bit immediate.
@ OPERAND_REG_IMM_BF16_DEFERRED
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_AC_BF16
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_INLINE_AC_INT16
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_V2INT16
@ OPERAND_REG_INLINE_AC_FP16
@ OPERAND_REG_INLINE_AC_INT32
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_INLINE_AC_V2BF16
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_REG_INLINE_C_V2FP32
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
@ OPERAND_REG_IMM_FP32_DEFERRED
@ OPERAND_REG_IMM_FP16_DEFERRED
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ UNDEF
UNDEF - An undefined node.
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
Reg
All possible values of the reg field in the ModR/M byte.
std::optional< const char * > toString(const std::optional< DWARFFormValue > &V)
Take an optional DWARFFormValue and try to extract a string value from it.
Scope
Defines the scope in which this symbol should be visible: Default – Visible in the public interface o...
This is an optimization pass for GlobalISel generic memory operations.
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
int popcount(T Value) noexcept
Count the number of set bits in a value.
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
void PrintError(const Twine &Msg)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Target & getTheR600Target()
The target for R600 GPUs.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Target & getTheGCNTarget()
The target for GCN GPUs.
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
unsigned M0(unsigned Val)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
void initDefault(const MCSubtargetInfo *STI, MCContext &Ctx, bool InitMCExpr=true)
Instruction set architecture version.
const MCExpr * compute_pgm_rsrc2
const MCExpr * kernarg_size
const MCExpr * kernarg_preload
const MCExpr * compute_pgm_rsrc3
const MCExpr * private_segment_fixed_size
const MCExpr * compute_pgm_rsrc1
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
const MCExpr * group_segment_fixed_size
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
const MCExpr * kernel_code_properties
Represents the counter values to wait for in an s_waitcnt instruction.
static const fltSemantics & IEEEsingle() LLVM_READNONE
static const fltSemantics & IEEEhalf() LLVM_READNONE
static const fltSemantics & BFloat() LLVM_READNONE
opStatus
IEEE-754R 7: Default exception handling.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Description of the encoding of one expression Op.
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...
uint32_t group_segment_fixed_size
uint32_t private_segment_fixed_size