53enum RegisterKind { IS_UNKNOWN,
IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
67 SMLoc StartLoc, EndLoc;
68 const AMDGPUAsmParser *AsmParser;
71 AMDGPUOperand(KindTy Kind_,
const AMDGPUAsmParser *AsmParser_)
72 :
Kind(Kind_), AsmParser(AsmParser_) {}
74 using Ptr = std::unique_ptr<AMDGPUOperand>;
82 bool hasFPModifiers()
const {
return Abs || Neg; }
83 bool hasIntModifiers()
const {
return Sext; }
84 bool hasModifiers()
const {
return hasFPModifiers() || hasIntModifiers(); }
86 int64_t getFPModifiersOperand()
const {
93 int64_t getIntModifiersOperand()
const {
99 int64_t getModifiersOperand()
const {
100 assert(!(hasFPModifiers() && hasIntModifiers())
101 &&
"fp and int modifiers should not be used simultaneously");
102 if (hasFPModifiers())
103 return getFPModifiersOperand();
104 if (hasIntModifiers())
105 return getIntModifiersOperand();
186 ImmKindTyMandatoryLiteral,
200 mutable ImmKindTy
Kind;
217 bool isToken()
const override {
return Kind == Token; }
219 bool isSymbolRefExpr()
const {
220 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
223 bool isImm()
const override {
224 return Kind == Immediate;
227 void setImmKindNone()
const {
229 Imm.Kind = ImmKindTyNone;
232 void setImmKindLiteral()
const {
234 Imm.Kind = ImmKindTyLiteral;
237 void setImmKindMandatoryLiteral()
const {
239 Imm.Kind = ImmKindTyMandatoryLiteral;
242 void setImmKindConst()
const {
244 Imm.Kind = ImmKindTyConst;
247 bool IsImmKindLiteral()
const {
248 return isImm() &&
Imm.Kind == ImmKindTyLiteral;
251 bool IsImmKindMandatoryLiteral()
const {
252 return isImm() &&
Imm.Kind == ImmKindTyMandatoryLiteral;
255 bool isImmKindConst()
const {
256 return isImm() &&
Imm.Kind == ImmKindTyConst;
259 bool isInlinableImm(
MVT type)
const;
260 bool isLiteralImm(
MVT type)
const;
262 bool isRegKind()
const {
266 bool isReg()
const override {
267 return isRegKind() && !hasModifiers();
270 bool isRegOrInline(
unsigned RCID,
MVT type)
const {
271 return isRegClass(RCID) || isInlinableImm(type);
275 return isRegOrInline(RCID, type) || isLiteralImm(type);
278 bool isRegOrImmWithInt16InputMods()
const {
282 bool isRegOrImmWithIntT16InputMods()
const {
286 bool isRegOrImmWithInt32InputMods()
const {
290 bool isRegOrInlineImmWithInt16InputMods()
const {
291 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
294 bool isRegOrInlineImmWithInt32InputMods()
const {
295 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
298 bool isRegOrImmWithInt64InputMods()
const {
302 bool isRegOrImmWithFP16InputMods()
const {
306 bool isRegOrImmWithFPT16InputMods()
const {
310 bool isRegOrImmWithFP32InputMods()
const {
314 bool isRegOrImmWithFP64InputMods()
const {
318 template <
bool IsFake16>
bool isRegOrInlineImmWithFP16InputMods()
const {
319 return isRegOrInline(
320 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
323 bool isRegOrInlineImmWithFP32InputMods()
const {
324 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
327 bool isPackedFP16InputMods()
const {
331 bool isVReg()
const {
332 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
333 isRegClass(AMDGPU::VReg_64RegClassID) ||
334 isRegClass(AMDGPU::VReg_96RegClassID) ||
335 isRegClass(AMDGPU::VReg_128RegClassID) ||
336 isRegClass(AMDGPU::VReg_160RegClassID) ||
337 isRegClass(AMDGPU::VReg_192RegClassID) ||
338 isRegClass(AMDGPU::VReg_256RegClassID) ||
339 isRegClass(AMDGPU::VReg_512RegClassID) ||
340 isRegClass(AMDGPU::VReg_1024RegClassID);
343 bool isVReg32()
const {
344 return isRegClass(AMDGPU::VGPR_32RegClassID);
347 bool isVReg32OrOff()
const {
348 return isOff() || isVReg32();
351 bool isNull()
const {
352 return isRegKind() &&
getReg() == AMDGPU::SGPR_NULL;
355 bool isVRegWithInputMods()
const;
356 template <
bool IsFake16>
bool isT16VRegWithInputMods()
const;
358 bool isSDWAOperand(
MVT type)
const;
359 bool isSDWAFP16Operand()
const;
360 bool isSDWAFP32Operand()
const;
361 bool isSDWAInt16Operand()
const;
362 bool isSDWAInt32Operand()
const;
364 bool isImmTy(ImmTy ImmT)
const {
368 template <ImmTy Ty>
bool isImmTy()
const {
return isImmTy(Ty); }
370 bool isImmLiteral()
const {
return isImmTy(ImmTyNone); }
372 bool isImmModifier()
const {
373 return isImm() &&
Imm.Type != ImmTyNone;
376 bool isOModSI()
const {
return isImmTy(ImmTyOModSI); }
377 bool isDim()
const {
return isImmTy(ImmTyDim); }
378 bool isR128A16()
const {
return isImmTy(ImmTyR128A16); }
379 bool isOff()
const {
return isImmTy(ImmTyOff); }
380 bool isExpTgt()
const {
return isImmTy(ImmTyExpTgt); }
381 bool isOffen()
const {
return isImmTy(ImmTyOffen); }
382 bool isIdxen()
const {
return isImmTy(ImmTyIdxen); }
383 bool isAddr64()
const {
return isImmTy(ImmTyAddr64); }
384 bool isSMEMOffsetMod()
const {
return isImmTy(ImmTySMEMOffsetMod); }
385 bool isFlatOffset()
const {
return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
386 bool isGDS()
const {
return isImmTy(ImmTyGDS); }
387 bool isLDS()
const {
return isImmTy(ImmTyLDS); }
388 bool isCPol()
const {
return isImmTy(ImmTyCPol); }
389 bool isIndexKey8bit()
const {
return isImmTy(ImmTyIndexKey8bit); }
390 bool isIndexKey16bit()
const {
return isImmTy(ImmTyIndexKey16bit); }
391 bool isTFE()
const {
return isImmTy(ImmTyTFE); }
392 bool isFORMAT()
const {
return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
393 bool isDppFI()
const {
return isImmTy(ImmTyDppFI); }
394 bool isSDWADstSel()
const {
return isImmTy(ImmTySDWADstSel); }
395 bool isSDWASrc0Sel()
const {
return isImmTy(ImmTySDWASrc0Sel); }
396 bool isSDWASrc1Sel()
const {
return isImmTy(ImmTySDWASrc1Sel); }
397 bool isSDWADstUnused()
const {
return isImmTy(ImmTySDWADstUnused); }
398 bool isInterpSlot()
const {
return isImmTy(ImmTyInterpSlot); }
399 bool isInterpAttr()
const {
return isImmTy(ImmTyInterpAttr); }
400 bool isInterpAttrChan()
const {
return isImmTy(ImmTyInterpAttrChan); }
401 bool isOpSel()
const {
return isImmTy(ImmTyOpSel); }
402 bool isOpSelHi()
const {
return isImmTy(ImmTyOpSelHi); }
403 bool isNegLo()
const {
return isImmTy(ImmTyNegLo); }
404 bool isNegHi()
const {
return isImmTy(ImmTyNegHi); }
406 bool isRegOrImm()
const {
410 bool isRegClass(
unsigned RCID)
const;
414 bool isRegOrInlineNoMods(
unsigned RCID,
MVT type)
const {
415 return isRegOrInline(RCID, type) && !hasModifiers();
418 bool isSCSrcB16()
const {
419 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
422 bool isSCSrcV2B16()
const {
426 bool isSCSrc_b32()
const {
427 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
430 bool isSCSrc_b64()
const {
431 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
434 bool isBoolReg()
const;
436 bool isSCSrcF16()
const {
437 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
440 bool isSCSrcV2F16()
const {
444 bool isSCSrcF32()
const {
445 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
448 bool isSCSrcF64()
const {
449 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
452 bool isSSrc_b32()
const {
453 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
456 bool isSSrc_b16()
const {
return isSCSrcB16() || isLiteralImm(MVT::i16); }
458 bool isSSrcV2B16()
const {
463 bool isSSrc_b64()
const {
466 return isSCSrc_b64() || isLiteralImm(MVT::i64);
469 bool isSSrc_f32()
const {
470 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
473 bool isSSrcF64()
const {
return isSCSrc_b64() || isLiteralImm(MVT::f64); }
475 bool isSSrc_bf16()
const {
return isSCSrcB16() || isLiteralImm(MVT::bf16); }
477 bool isSSrc_f16()
const {
return isSCSrcB16() || isLiteralImm(MVT::f16); }
479 bool isSSrcV2F16()
const {
484 bool isSSrcV2FP32()
const {
489 bool isSCSrcV2FP32()
const {
494 bool isSSrcV2INT32()
const {
499 bool isSCSrcV2INT32()
const {
501 return isSCSrc_b32();
504 bool isSSrcOrLds_b32()
const {
505 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
506 isLiteralImm(MVT::i32) || isExpr();
509 bool isVCSrc_b32()
const {
510 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
513 bool isVCSrcB64()
const {
514 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
517 bool isVCSrcTB16()
const {
518 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
521 bool isVCSrcTB16_Lo128()
const {
522 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
525 bool isVCSrcFake16B16_Lo128()
const {
526 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
529 bool isVCSrc_b16()
const {
530 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
533 bool isVCSrc_v2b16()
const {
return isVCSrc_b16(); }
535 bool isVCSrc_f32()
const {
536 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
539 bool isVCSrcF64()
const {
540 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
543 bool isVCSrcTBF16()
const {
544 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
547 bool isVCSrcTF16()
const {
548 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
551 bool isVCSrcTBF16_Lo128()
const {
552 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
555 bool isVCSrcTF16_Lo128()
const {
556 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
559 bool isVCSrcFake16BF16_Lo128()
const {
560 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
563 bool isVCSrcFake16F16_Lo128()
const {
564 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
567 bool isVCSrc_bf16()
const {
568 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
571 bool isVCSrc_f16()
const {
572 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
575 bool isVCSrc_v2bf16()
const {
return isVCSrc_bf16(); }
577 bool isVCSrc_v2f16()
const {
return isVCSrc_f16(); }
579 bool isVSrc_b32()
const {
580 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
583 bool isVSrc_b64()
const {
return isVCSrcF64() || isLiteralImm(MVT::i64); }
585 bool isVSrcT_b16()
const {
return isVCSrcTB16() || isLiteralImm(MVT::i16); }
587 bool isVSrcT_b16_Lo128()
const {
588 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
591 bool isVSrcFake16_b16_Lo128()
const {
592 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
595 bool isVSrc_b16()
const {
return isVCSrc_b16() || isLiteralImm(MVT::i16); }
597 bool isVSrc_v2b16()
const {
return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
599 bool isVCSrcV2FP32()
const {
603 bool isVSrc_v2f32()
const {
return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
605 bool isVCSrcV2INT32()
const {
609 bool isVSrc_v2b32()
const {
return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
611 bool isVSrc_f32()
const {
612 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
615 bool isVSrc_f64()
const {
return isVCSrcF64() || isLiteralImm(MVT::f64); }
617 bool isVSrcT_bf16()
const {
return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
619 bool isVSrcT_f16()
const {
return isVCSrcTF16() || isLiteralImm(MVT::f16); }
621 bool isVSrcT_bf16_Lo128()
const {
622 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
625 bool isVSrcT_f16_Lo128()
const {
626 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
629 bool isVSrcFake16_bf16_Lo128()
const {
630 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
633 bool isVSrcFake16_f16_Lo128()
const {
634 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
637 bool isVSrc_bf16()
const {
return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
639 bool isVSrc_f16()
const {
return isVCSrc_f16() || isLiteralImm(MVT::f16); }
641 bool isVSrc_v2bf16()
const {
642 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
645 bool isVSrc_v2f16()
const {
return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
647 bool isVISrcB32()
const {
648 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
651 bool isVISrcB16()
const {
652 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
655 bool isVISrcV2B16()
const {
659 bool isVISrcF32()
const {
660 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
663 bool isVISrcF16()
const {
664 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
667 bool isVISrcV2F16()
const {
668 return isVISrcF16() || isVISrcB32();
671 bool isVISrc_64_bf16()
const {
672 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
675 bool isVISrc_64_f16()
const {
676 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
679 bool isVISrc_64_b32()
const {
680 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
683 bool isVISrc_64B64()
const {
684 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
687 bool isVISrc_64_f64()
const {
688 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
691 bool isVISrc_64V2FP32()
const {
692 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
695 bool isVISrc_64V2INT32()
const {
696 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
699 bool isVISrc_256_b32()
const {
700 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
703 bool isVISrc_256_f32()
const {
704 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
707 bool isVISrc_256B64()
const {
708 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
711 bool isVISrc_256_f64()
const {
712 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
715 bool isVISrc_128B16()
const {
716 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
719 bool isVISrc_128V2B16()
const {
720 return isVISrc_128B16();
723 bool isVISrc_128_b32()
const {
724 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
727 bool isVISrc_128_f32()
const {
728 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
731 bool isVISrc_256V2FP32()
const {
732 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
735 bool isVISrc_256V2INT32()
const {
736 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
739 bool isVISrc_512_b32()
const {
740 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
743 bool isVISrc_512B16()
const {
744 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
747 bool isVISrc_512V2B16()
const {
748 return isVISrc_512B16();
751 bool isVISrc_512_f32()
const {
752 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
755 bool isVISrc_512F16()
const {
756 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
759 bool isVISrc_512V2F16()
const {
760 return isVISrc_512F16() || isVISrc_512_b32();
763 bool isVISrc_1024_b32()
const {
764 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
767 bool isVISrc_1024B16()
const {
768 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
771 bool isVISrc_1024V2B16()
const {
772 return isVISrc_1024B16();
775 bool isVISrc_1024_f32()
const {
776 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
779 bool isVISrc_1024F16()
const {
780 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
783 bool isVISrc_1024V2F16()
const {
784 return isVISrc_1024F16() || isVISrc_1024_b32();
787 bool isAISrcB32()
const {
788 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
791 bool isAISrcB16()
const {
792 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
795 bool isAISrcV2B16()
const {
799 bool isAISrcF32()
const {
800 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
803 bool isAISrcF16()
const {
804 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
807 bool isAISrcV2F16()
const {
808 return isAISrcF16() || isAISrcB32();
811 bool isAISrc_64B64()
const {
812 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
815 bool isAISrc_64_f64()
const {
816 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
819 bool isAISrc_128_b32()
const {
820 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
823 bool isAISrc_128B16()
const {
824 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
827 bool isAISrc_128V2B16()
const {
828 return isAISrc_128B16();
831 bool isAISrc_128_f32()
const {
832 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
835 bool isAISrc_128F16()
const {
836 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
839 bool isAISrc_128V2F16()
const {
840 return isAISrc_128F16() || isAISrc_128_b32();
843 bool isVISrc_128_bf16()
const {
844 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
847 bool isVISrc_128_f16()
const {
848 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
851 bool isVISrc_128V2F16()
const {
852 return isVISrc_128_f16() || isVISrc_128_b32();
855 bool isAISrc_256B64()
const {
856 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
859 bool isAISrc_256_f64()
const {
860 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
863 bool isAISrc_512_b32()
const {
864 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
867 bool isAISrc_512B16()
const {
868 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
871 bool isAISrc_512V2B16()
const {
872 return isAISrc_512B16();
875 bool isAISrc_512_f32()
const {
876 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
879 bool isAISrc_512F16()
const {
880 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
883 bool isAISrc_512V2F16()
const {
884 return isAISrc_512F16() || isAISrc_512_b32();
887 bool isAISrc_1024_b32()
const {
888 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
891 bool isAISrc_1024B16()
const {
892 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
895 bool isAISrc_1024V2B16()
const {
896 return isAISrc_1024B16();
899 bool isAISrc_1024_f32()
const {
900 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
903 bool isAISrc_1024F16()
const {
904 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
907 bool isAISrc_1024V2F16()
const {
908 return isAISrc_1024F16() || isAISrc_1024_b32();
911 bool isKImmFP32()
const {
912 return isLiteralImm(MVT::f32);
915 bool isKImmFP16()
const {
916 return isLiteralImm(MVT::f16);
919 bool isMem()
const override {
923 bool isExpr()
const {
927 bool isSOPPBrTarget()
const {
return isExpr() ||
isImm(); }
929 bool isSWaitCnt()
const;
930 bool isDepCtr()
const;
931 bool isSDelayALU()
const;
932 bool isHwreg()
const;
933 bool isSendMsg()
const;
934 bool isSplitBarrier()
const;
935 bool isSwizzle()
const;
936 bool isSMRDOffset8()
const;
937 bool isSMEMOffset()
const;
938 bool isSMRDLiteralOffset()
const;
940 bool isDPPCtrl()
const;
942 bool isGPRIdxMode()
const;
943 bool isS16Imm()
const;
944 bool isU16Imm()
const;
945 bool isEndpgm()
const;
947 auto getPredicate(std::function<
bool(
const AMDGPUOperand &
Op)>
P)
const {
948 return [=](){
return P(*
this); };
956 int64_t getImm()
const {
961 void setImm(int64_t Val) {
966 ImmTy getImmTy()
const {
976 SMLoc getStartLoc()
const override {
980 SMLoc getEndLoc()
const override {
985 return SMRange(StartLoc, EndLoc);
988 Modifiers getModifiers()
const {
989 assert(isRegKind() || isImmTy(ImmTyNone));
990 return isRegKind() ?
Reg.Mods :
Imm.Mods;
993 void setModifiers(Modifiers Mods) {
994 assert(isRegKind() || isImmTy(ImmTyNone));
1001 bool hasModifiers()
const {
1002 return getModifiers().hasModifiers();
1005 bool hasFPModifiers()
const {
1006 return getModifiers().hasFPModifiers();
1009 bool hasIntModifiers()
const {
1010 return getModifiers().hasIntModifiers();
1015 void addImmOperands(
MCInst &Inst,
unsigned N,
bool ApplyModifiers =
true)
const;
1017 void addLiteralImmOperand(
MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const;
1019 void addRegOperands(
MCInst &Inst,
unsigned N)
const;
1021 void addRegOrImmOperands(
MCInst &Inst,
unsigned N)
const {
1023 addRegOperands(Inst,
N);
1025 addImmOperands(Inst,
N);
1028 void addRegOrImmWithInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1029 Modifiers Mods = getModifiers();
1032 addRegOperands(Inst,
N);
1034 addImmOperands(Inst,
N,
false);
1038 void addRegOrImmWithFPInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1039 assert(!hasIntModifiers());
1040 addRegOrImmWithInputModsOperands(Inst,
N);
1043 void addRegOrImmWithIntInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1044 assert(!hasFPModifiers());
1045 addRegOrImmWithInputModsOperands(Inst,
N);
1048 void addRegWithInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1049 Modifiers Mods = getModifiers();
1052 addRegOperands(Inst,
N);
1055 void addRegWithFPInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1056 assert(!hasIntModifiers());
1057 addRegWithInputModsOperands(Inst,
N);
1060 void addRegWithIntInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1061 assert(!hasFPModifiers());
1062 addRegWithInputModsOperands(Inst,
N);
1068 case ImmTyNone:
OS <<
"None";
break;
1069 case ImmTyGDS:
OS <<
"GDS";
break;
1070 case ImmTyLDS:
OS <<
"LDS";
break;
1071 case ImmTyOffen:
OS <<
"Offen";
break;
1072 case ImmTyIdxen:
OS <<
"Idxen";
break;
1073 case ImmTyAddr64:
OS <<
"Addr64";
break;
1074 case ImmTyOffset:
OS <<
"Offset";
break;
1075 case ImmTyInstOffset:
OS <<
"InstOffset";
break;
1076 case ImmTyOffset0:
OS <<
"Offset0";
break;
1077 case ImmTyOffset1:
OS <<
"Offset1";
break;
1078 case ImmTySMEMOffsetMod:
OS <<
"SMEMOffsetMod";
break;
1079 case ImmTyCPol:
OS <<
"CPol";
break;
1080 case ImmTyIndexKey8bit:
OS <<
"index_key";
break;
1081 case ImmTyIndexKey16bit:
OS <<
"index_key";
break;
1082 case ImmTyTFE:
OS <<
"TFE";
break;
1083 case ImmTyD16:
OS <<
"D16";
break;
1084 case ImmTyFORMAT:
OS <<
"FORMAT";
break;
1085 case ImmTyClamp:
OS <<
"Clamp";
break;
1086 case ImmTyOModSI:
OS <<
"OModSI";
break;
1087 case ImmTyDPP8:
OS <<
"DPP8";
break;
1088 case ImmTyDppCtrl:
OS <<
"DppCtrl";
break;
1089 case ImmTyDppRowMask:
OS <<
"DppRowMask";
break;
1090 case ImmTyDppBankMask:
OS <<
"DppBankMask";
break;
1091 case ImmTyDppBoundCtrl:
OS <<
"DppBoundCtrl";
break;
1092 case ImmTyDppFI:
OS <<
"DppFI";
break;
1093 case ImmTySDWADstSel:
OS <<
"SDWADstSel";
break;
1094 case ImmTySDWASrc0Sel:
OS <<
"SDWASrc0Sel";
break;
1095 case ImmTySDWASrc1Sel:
OS <<
"SDWASrc1Sel";
break;
1096 case ImmTySDWADstUnused:
OS <<
"SDWADstUnused";
break;
1097 case ImmTyDMask:
OS <<
"DMask";
break;
1098 case ImmTyDim:
OS <<
"Dim";
break;
1099 case ImmTyUNorm:
OS <<
"UNorm";
break;
1100 case ImmTyDA:
OS <<
"DA";
break;
1101 case ImmTyR128A16:
OS <<
"R128A16";
break;
1102 case ImmTyA16:
OS <<
"A16";
break;
1103 case ImmTyLWE:
OS <<
"LWE";
break;
1104 case ImmTyOff:
OS <<
"Off";
break;
1105 case ImmTyExpTgt:
OS <<
"ExpTgt";
break;
1106 case ImmTyExpCompr:
OS <<
"ExpCompr";
break;
1107 case ImmTyExpVM:
OS <<
"ExpVM";
break;
1108 case ImmTyHwreg:
OS <<
"Hwreg";
break;
1109 case ImmTySendMsg:
OS <<
"SendMsg";
break;
1110 case ImmTyInterpSlot:
OS <<
"InterpSlot";
break;
1111 case ImmTyInterpAttr:
OS <<
"InterpAttr";
break;
1112 case ImmTyInterpAttrChan:
OS <<
"InterpAttrChan";
break;
1113 case ImmTyOpSel:
OS <<
"OpSel";
break;
1114 case ImmTyOpSelHi:
OS <<
"OpSelHi";
break;
1115 case ImmTyNegLo:
OS <<
"NegLo";
break;
1116 case ImmTyNegHi:
OS <<
"NegHi";
break;
1117 case ImmTySwizzle:
OS <<
"Swizzle";
break;
1118 case ImmTyGprIdxMode:
OS <<
"GprIdxMode";
break;
1119 case ImmTyHigh:
OS <<
"High";
break;
1120 case ImmTyBLGP:
OS <<
"BLGP";
break;
1121 case ImmTyCBSZ:
OS <<
"CBSZ";
break;
1122 case ImmTyABID:
OS <<
"ABID";
break;
1123 case ImmTyEndpgm:
OS <<
"Endpgm";
break;
1124 case ImmTyWaitVDST:
OS <<
"WaitVDST";
break;
1125 case ImmTyWaitEXP:
OS <<
"WaitEXP";
break;
1126 case ImmTyWaitVAVDst:
OS <<
"WaitVAVDst";
break;
1127 case ImmTyWaitVMVSrc:
OS <<
"WaitVMVSrc";
break;
1128 case ImmTyByteSel:
OS <<
"ByteSel" ;
break;
1136 OS <<
"<register " <<
getReg() <<
" mods: " <<
Reg.Mods <<
'>';
1139 OS <<
'<' << getImm();
1140 if (getImmTy() != ImmTyNone) {
1141 OS <<
" type: "; printImmTy(
OS, getImmTy());
1143 OS <<
" mods: " <<
Imm.Mods <<
'>';
1146 OS <<
'\'' << getToken() <<
'\'';
1149 OS <<
"<expr " << *Expr <<
'>';
1154 static AMDGPUOperand::Ptr CreateImm(
const AMDGPUAsmParser *AsmParser,
1155 int64_t Val,
SMLoc Loc,
1156 ImmTy
Type = ImmTyNone,
1157 bool IsFPImm =
false) {
1158 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1160 Op->Imm.IsFPImm = IsFPImm;
1161 Op->Imm.Kind = ImmKindTyNone;
1163 Op->Imm.Mods = Modifiers();
1169 static AMDGPUOperand::Ptr CreateToken(
const AMDGPUAsmParser *AsmParser,
1171 bool HasExplicitEncodingSize =
true) {
1172 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1173 Res->Tok.Data = Str.data();
1174 Res->Tok.Length = Str.size();
1175 Res->StartLoc = Loc;
1180 static AMDGPUOperand::Ptr CreateReg(
const AMDGPUAsmParser *AsmParser,
1181 unsigned RegNo,
SMLoc S,
1183 auto Op = std::make_unique<AMDGPUOperand>(
Register, AsmParser);
1184 Op->Reg.RegNo = RegNo;
1185 Op->Reg.Mods = Modifiers();
1191 static AMDGPUOperand::Ptr CreateExpr(
const AMDGPUAsmParser *AsmParser,
1193 auto Op = std::make_unique<AMDGPUOperand>(
Expression, AsmParser);
1202 OS <<
"abs:" << Mods.Abs <<
" neg: " << Mods.Neg <<
" sext:" << Mods.Sext;
1213class KernelScopeInfo {
1214 int SgprIndexUnusedMin = -1;
1215 int VgprIndexUnusedMin = -1;
1216 int AgprIndexUnusedMin = -1;
1220 void usesSgprAt(
int i) {
1221 if (i >= SgprIndexUnusedMin) {
1222 SgprIndexUnusedMin = ++i;
1231 void usesVgprAt(
int i) {
1232 if (i >= VgprIndexUnusedMin) {
1233 VgprIndexUnusedMin = ++i;
1238 VgprIndexUnusedMin);
1244 void usesAgprAt(
int i) {
1249 if (i >= AgprIndexUnusedMin) {
1250 AgprIndexUnusedMin = ++i;
1260 VgprIndexUnusedMin);
1267 KernelScopeInfo() =
default;
1273 usesSgprAt(SgprIndexUnusedMin = -1);
1274 usesVgprAt(VgprIndexUnusedMin = -1);
1276 usesAgprAt(AgprIndexUnusedMin = -1);
1280 void usesRegister(RegisterKind RegKind,
unsigned DwordRegIndex,
1281 unsigned RegWidth) {
1284 usesSgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1287 usesAgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1290 usesVgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1301 unsigned ForcedEncodingSize = 0;
1302 bool ForcedDPP =
false;
1303 bool ForcedSDWA =
false;
1304 KernelScopeInfo KernelScope;
1309#define GET_ASSEMBLER_HEADER
1310#include "AMDGPUGenAsmMatcher.inc"
1315 void createConstantSymbol(
StringRef Id, int64_t Val);
1317 bool ParseAsAbsoluteExpression(
uint32_t &Ret);
1335 const MCExpr *FlatScrUsed,
bool XNACKUsed,
1336 std::optional<bool> EnableWavefrontSize32,
1340 bool ParseDirectiveAMDGCNTarget();
1341 bool ParseDirectiveAMDHSACodeObjectVersion();
1342 bool ParseDirectiveAMDHSAKernel();
1344 bool ParseDirectiveAMDKernelCodeT();
1347 bool ParseDirectiveAMDGPUHsaKernel();
1349 bool ParseDirectiveISAVersion();
1350 bool ParseDirectiveHSAMetadata();
1351 bool ParseDirectivePALMetadataBegin();
1352 bool ParseDirectivePALMetadata();
1353 bool ParseDirectiveAMDGPULDS();
1357 bool ParseToEndDirective(
const char *AssemblerDirectiveBegin,
1358 const char *AssemblerDirectiveEnd,
1359 std::string &CollectString);
1361 bool AddNextRegisterToList(
unsigned& Reg,
unsigned& RegWidth,
1362 RegisterKind RegKind,
unsigned Reg1,
SMLoc Loc);
1363 bool ParseAMDGPURegister(RegisterKind &RegKind,
unsigned &Reg,
1364 unsigned &RegNum,
unsigned &RegWidth,
1365 bool RestoreOnFailure =
false);
1366 bool ParseAMDGPURegister(RegisterKind &RegKind,
unsigned &Reg,
1367 unsigned &RegNum,
unsigned &RegWidth,
1369 unsigned ParseRegularReg(RegisterKind &RegKind,
unsigned &RegNum,
1372 unsigned ParseSpecialReg(RegisterKind &RegKind,
unsigned &RegNum,
1375 unsigned ParseRegList(RegisterKind &RegKind,
unsigned &RegNum,
1377 bool ParseRegRange(
unsigned& Num,
unsigned& Width);
1378 unsigned getRegularReg(RegisterKind RegKind,
unsigned RegNum,
unsigned SubReg,
1379 unsigned RegWidth,
SMLoc Loc);
1383 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1384 void initializeGprCountSymbol(RegisterKind RegKind);
1385 bool updateGprCountSymbols(RegisterKind RegKind,
unsigned DwordRegIndex,
1392 OperandMode_Default,
1396 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1404 if (getFeatureBits().
none()) {
1410 if (!FB[AMDGPU::FeatureWavefrontSize64] &&
1411 !FB[AMDGPU::FeatureWavefrontSize32]) {
1422 createConstantSymbol(
".amdgcn.gfx_generation_number",
ISA.Major);
1423 createConstantSymbol(
".amdgcn.gfx_generation_minor",
ISA.Minor);
1424 createConstantSymbol(
".amdgcn.gfx_generation_stepping",
ISA.Stepping);
1426 createConstantSymbol(
".option.machine_version_major",
ISA.Major);
1427 createConstantSymbol(
".option.machine_version_minor",
ISA.Minor);
1428 createConstantSymbol(
".option.machine_version_stepping",
ISA.Stepping);
1431 initializeGprCountSymbol(IS_VGPR);
1432 initializeGprCountSymbol(IS_SGPR);
1437 createConstantSymbol(Symbol, Code);
1439 createConstantSymbol(
"UC_VERSION_W64_BIT", 0x2000);
1440 createConstantSymbol(
"UC_VERSION_W32_BIT", 0x4000);
1441 createConstantSymbol(
"UC_VERSION_MDP_BIT", 0x8000);
1511 bool hasInv2PiInlineImm()
const {
1512 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1515 bool hasFlatOffsets()
const {
1516 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1520 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1523 bool hasSGPR102_SGPR103()
const {
1527 bool hasSGPR104_SGPR105()
const {
return isGFX10Plus(); }
1529 bool hasIntClamp()
const {
1530 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1533 bool hasPartialNSAEncoding()
const {
1534 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1566 void setForcedEncodingSize(
unsigned Size) { ForcedEncodingSize =
Size; }
1567 void setForcedDPP(
bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1568 void setForcedSDWA(
bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1570 unsigned getForcedEncodingSize()
const {
return ForcedEncodingSize; }
1571 bool isForcedVOP3()
const {
return ForcedEncodingSize == 64; }
1572 bool isForcedDPP()
const {
return ForcedDPP; }
1573 bool isForcedSDWA()
const {
return ForcedSDWA; }
1575 StringRef getMatchedVariantName()
const;
1577 std::unique_ptr<AMDGPUOperand>
parseRegister(
bool RestoreOnFailure =
false);
1579 bool RestoreOnFailure);
1582 SMLoc &EndLoc)
override;
1585 unsigned Kind)
override;
1589 bool MatchingInlineAsm)
override;
1592 OperandMode Mode = OperandMode_Default);
1600 ParseStatus parseIntWithPrefix(
const char *Prefix, int64_t &
Int);
1604 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1605 std::function<
bool(int64_t &)> ConvertResult =
nullptr);
1609 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1610 bool (*ConvertResult)(int64_t &) =
nullptr);
1614 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1623 bool isOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1624 bool isRegOrOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1625 bool isNamedOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1626 bool isOpcodeModifierWithVal(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1627 bool parseSP3NegModifier();
1629 bool HasLit =
false);
1632 bool HasLit =
false);
1634 bool AllowImm =
true);
1636 bool AllowImm =
true);
1641 AMDGPUOperand::ImmTy ImmTy);
1652 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1657 bool tryParseFmt(
const char *Pref, int64_t MaxVal, int64_t &Val);
1658 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt,
StringRef FormatStr,
SMLoc Loc);
1662 bool parseCnt(int64_t &IntVal);
1665 bool parseDepCtr(int64_t &IntVal,
unsigned &Mask);
1669 bool parseDelay(int64_t &Delay);
1675 struct OperandInfoTy {
1678 bool IsSymbolic =
false;
1679 bool IsDefined =
false;
1681 OperandInfoTy(int64_t Val) : Val(Val) {}
1684 struct StructuredOpField : OperandInfoTy {
1688 bool IsDefined =
false;
1693 virtual ~StructuredOpField() =
default;
1695 bool Error(AMDGPUAsmParser &Parser,
const Twine &Err)
const {
1696 Parser.Error(Loc,
"invalid " +
Desc +
": " + Err);
1700 virtual bool validate(AMDGPUAsmParser &Parser)
const {
1702 return Error(Parser,
"not supported on this GPU");
1704 return Error(Parser,
"only " +
Twine(Width) +
"-bit values are legal");
1712 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &
Op, OperandInfoTy &Stream);
1713 bool validateSendMsg(
const OperandInfoTy &Msg,
1714 const OperandInfoTy &
Op,
1715 const OperandInfoTy &Stream);
1718 OperandInfoTy &Width);
1724 SMLoc getOperandLoc(std::function<
bool(
const AMDGPUOperand&)>
Test,
1729 bool SearchMandatoryLiterals =
false)
const;
1738 bool validateSOPLiteral(
const MCInst &Inst)
const;
1740 bool validateVOPDRegBankConstraints(
const MCInst &Inst,
1742 bool validateIntClampSupported(
const MCInst &Inst);
1743 bool validateMIMGAtomicDMask(
const MCInst &Inst);
1744 bool validateMIMGGatherDMask(
const MCInst &Inst);
1746 bool validateMIMGDataSize(
const MCInst &Inst,
const SMLoc &IDLoc);
1747 bool validateMIMGAddrSize(
const MCInst &Inst,
const SMLoc &IDLoc);
1748 bool validateMIMGD16(
const MCInst &Inst);
1750 bool validateMIMGMSAA(
const MCInst &Inst);
1751 bool validateOpSel(
const MCInst &Inst);
1754 bool validateVccOperand(
unsigned Reg)
const;
1759 bool validateAGPRLdSt(
const MCInst &Inst)
const;
1760 bool validateVGPRAlign(
const MCInst &Inst)
const;
1764 bool validateDivScale(
const MCInst &Inst);
1767 const SMLoc &IDLoc);
1769 const unsigned CPol);
1772 std::optional<StringRef> validateLdsDirect(
const MCInst &Inst);
1773 unsigned getConstantBusLimit(
unsigned Opcode)
const;
1774 bool usesConstantBus(
const MCInst &Inst,
unsigned OpIdx);
1775 bool isInlineConstant(
const MCInst &Inst,
unsigned OpIdx)
const;
1776 unsigned findImplicitSGPRReadInVOP(
const MCInst &Inst)
const;
1802 AsmToken peekToken(
bool ShouldSkipSpace =
true);
1804 SMLoc getLoc()
const;
1808 void onBeginOfFile()
override;
1809 bool parsePrimaryExpr(
const MCExpr *&Res,
SMLoc &EndLoc)
override;
1820 bool parseSwizzleOperand(int64_t &
Op,
1821 const unsigned MinVal,
1822 const unsigned MaxVal,
1825 bool parseSwizzleOperands(
const unsigned OpNum, int64_t*
Op,
1826 const unsigned MinVal,
1827 const unsigned MaxVal,
1830 bool parseSwizzleOffset(int64_t &Imm);
1831 bool parseSwizzleMacro(int64_t &Imm);
1832 bool parseSwizzleQuadPerm(int64_t &Imm);
1833 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1834 bool parseSwizzleBroadcast(int64_t &Imm);
1835 bool parseSwizzleSwap(int64_t &Imm);
1836 bool parseSwizzleReverse(int64_t &Imm);
1839 int64_t parseGPRIdxMacro();
1847 OptionalImmIndexMap &OptionalIdx);
1855 OptionalImmIndexMap &OptionalIdx);
1857 OptionalImmIndexMap &OptionalIdx);
1862 bool parseDimId(
unsigned &Encoding);
1864 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1868 int64_t parseDPPCtrlSel(
StringRef Ctrl);
1869 int64_t parseDPPCtrlPerm();
1875 bool IsDPP8 =
false);
1881 AMDGPUOperand::ImmTy
Type);
1890 bool SkipDstVcc =
false,
1891 bool SkipSrcVcc =
false);
1904 return &APFloat::IEEEsingle();
1906 return &APFloat::IEEEdouble();
1908 return &APFloat::IEEEhalf();
1941 return &APFloat::IEEEsingle();
1947 return &APFloat::IEEEdouble();
1956 return &APFloat::IEEEhalf();
1964 return &APFloat::BFloat();
1979 APFloat::rmNearestTiesToEven,
1982 if (
Status != APFloat::opOK &&
1984 ((
Status & APFloat::opOverflow) != 0 ||
1985 (
Status & APFloat::opUnderflow) != 0)) {
2008bool AMDGPUOperand::isInlinableImm(
MVT type)
const {
2018 if (!isImmTy(ImmTyNone)) {
2029 if (type == MVT::f64 || type == MVT::i64) {
2031 AsmParser->hasInv2PiInlineImm());
2053 APFloat::rmNearestTiesToEven, &Lost);
2060 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2062 AsmParser->hasInv2PiInlineImm());
2067 static_cast<int32_t
>(FPLiteral.bitcastToAPInt().getZExtValue()),
2068 AsmParser->hasInv2PiInlineImm());
2072 if (type == MVT::f64 || type == MVT::i64) {
2074 AsmParser->hasInv2PiInlineImm());
2083 static_cast<int16_t
>(
Literal.getLoBits(16).getSExtValue()),
2084 type, AsmParser->hasInv2PiInlineImm());
2088 static_cast<int32_t
>(
Literal.getLoBits(32).getZExtValue()),
2089 AsmParser->hasInv2PiInlineImm());
2092bool AMDGPUOperand::isLiteralImm(
MVT type)
const {
2094 if (!isImmTy(ImmTyNone)) {
2101 if (type == MVT::f64 && hasFPModifiers()) {
2118 if (type == MVT::f64) {
2123 if (type == MVT::i64) {
2136 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2137 : (type == MVT::v2i16) ? MVT::f32
2138 : (type == MVT::v2f32) ? MVT::f32
2145bool AMDGPUOperand::isRegClass(
unsigned RCID)
const {
2146 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(
getReg());
2149bool AMDGPUOperand::isVRegWithInputMods()
const {
2150 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2152 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2153 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2156template <
bool IsFake16>
bool AMDGPUOperand::isT16VRegWithInputMods()
const {
2157 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2158 : AMDGPU::VGPR_16_Lo128RegClassID);
2161bool AMDGPUOperand::isSDWAOperand(
MVT type)
const {
2162 if (AsmParser->isVI())
2164 if (AsmParser->isGFX9Plus())
2165 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2169bool AMDGPUOperand::isSDWAFP16Operand()
const {
2170 return isSDWAOperand(MVT::f16);
2173bool AMDGPUOperand::isSDWAFP32Operand()
const {
2174 return isSDWAOperand(MVT::f32);
2177bool AMDGPUOperand::isSDWAInt16Operand()
const {
2178 return isSDWAOperand(MVT::i16);
2181bool AMDGPUOperand::isSDWAInt32Operand()
const {
2182 return isSDWAOperand(MVT::i32);
2185bool AMDGPUOperand::isBoolReg()
const {
2186 auto FB = AsmParser->getFeatureBits();
2187 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) ||
2188 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32()));
2193 assert(isImmTy(ImmTyNone) &&
Imm.Mods.hasFPModifiers());
2208void AMDGPUOperand::addImmOperands(
MCInst &Inst,
unsigned N,
bool ApplyModifiers)
const {
2216 addLiteralImmOperand(Inst,
Imm.Val,
2218 isImmTy(ImmTyNone) &&
Imm.Mods.hasFPModifiers());
2220 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2226void AMDGPUOperand::addLiteralImmOperand(
MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const {
2227 const auto& InstDesc = AsmParser->getMII()->get(Inst.
getOpcode());
2232 if (ApplyModifiers) {
2235 Val = applyInputFPModifiers(Val,
Size);
2239 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2249 AsmParser->hasInv2PiInlineImm())) {
2258 if (
Literal.getLoBits(32) != 0) {
2259 const_cast<AMDGPUAsmParser *
>(AsmParser)->
Warning(Inst.
getLoc(),
2260 "Can't encode literal as exact 64-bit floating-point operand. "
2261 "Low 32-bits will be set to zero");
2262 Val &= 0xffffffff00000000u;
2266 setImmKindLiteral();
2282 if (AsmParser->hasInv2PiInlineImm() &&
Literal == 0x3fc45f306725feed) {
2288 setImmKindLiteral();
2324 APFloat::rmNearestTiesToEven, &lost);
2328 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2331 setImmKindMandatoryLiteral();
2333 setImmKindLiteral();
2364 AsmParser->hasInv2PiInlineImm())) {
2371 setImmKindLiteral();
2389 setImmKindLiteral();
2403 setImmKindLiteral();
2412 AsmParser->hasInv2PiInlineImm())) {
2419 setImmKindLiteral();
2428 AsmParser->hasInv2PiInlineImm())) {
2435 setImmKindLiteral();
2449 AsmParser->hasInv2PiInlineImm()));
2459 AsmParser->hasInv2PiInlineImm()));
2467 setImmKindMandatoryLiteral();
2471 setImmKindMandatoryLiteral();
2478void AMDGPUOperand::addRegOperands(
MCInst &Inst,
unsigned N)
const {
2482bool AMDGPUOperand::isInlineValue()
const {
2490void AMDGPUAsmParser::createConstantSymbol(
StringRef Id, int64_t Val) {
2501 if (Is == IS_VGPR) {
2505 return AMDGPU::VGPR_32RegClassID;
2507 return AMDGPU::VReg_64RegClassID;
2509 return AMDGPU::VReg_96RegClassID;
2511 return AMDGPU::VReg_128RegClassID;
2513 return AMDGPU::VReg_160RegClassID;
2515 return AMDGPU::VReg_192RegClassID;
2517 return AMDGPU::VReg_224RegClassID;
2519 return AMDGPU::VReg_256RegClassID;
2521 return AMDGPU::VReg_288RegClassID;
2523 return AMDGPU::VReg_320RegClassID;
2525 return AMDGPU::VReg_352RegClassID;
2527 return AMDGPU::VReg_384RegClassID;
2529 return AMDGPU::VReg_512RegClassID;
2531 return AMDGPU::VReg_1024RegClassID;
2533 }
else if (Is == IS_TTMP) {
2537 return AMDGPU::TTMP_32RegClassID;
2539 return AMDGPU::TTMP_64RegClassID;
2541 return AMDGPU::TTMP_128RegClassID;
2543 return AMDGPU::TTMP_256RegClassID;
2545 return AMDGPU::TTMP_512RegClassID;
2547 }
else if (Is == IS_SGPR) {
2551 return AMDGPU::SGPR_32RegClassID;
2553 return AMDGPU::SGPR_64RegClassID;
2555 return AMDGPU::SGPR_96RegClassID;
2557 return AMDGPU::SGPR_128RegClassID;
2559 return AMDGPU::SGPR_160RegClassID;
2561 return AMDGPU::SGPR_192RegClassID;
2563 return AMDGPU::SGPR_224RegClassID;
2565 return AMDGPU::SGPR_256RegClassID;
2567 return AMDGPU::SGPR_288RegClassID;
2569 return AMDGPU::SGPR_320RegClassID;
2571 return AMDGPU::SGPR_352RegClassID;
2573 return AMDGPU::SGPR_384RegClassID;
2575 return AMDGPU::SGPR_512RegClassID;
2577 }
else if (Is == IS_AGPR) {
2581 return AMDGPU::AGPR_32RegClassID;
2583 return AMDGPU::AReg_64RegClassID;
2585 return AMDGPU::AReg_96RegClassID;
2587 return AMDGPU::AReg_128RegClassID;
2589 return AMDGPU::AReg_160RegClassID;
2591 return AMDGPU::AReg_192RegClassID;
2593 return AMDGPU::AReg_224RegClassID;
2595 return AMDGPU::AReg_256RegClassID;
2597 return AMDGPU::AReg_288RegClassID;
2599 return AMDGPU::AReg_320RegClassID;
2601 return AMDGPU::AReg_352RegClassID;
2603 return AMDGPU::AReg_384RegClassID;
2605 return AMDGPU::AReg_512RegClassID;
2607 return AMDGPU::AReg_1024RegClassID;
2615 .
Case(
"exec", AMDGPU::EXEC)
2616 .
Case(
"vcc", AMDGPU::VCC)
2617 .
Case(
"flat_scratch", AMDGPU::FLAT_SCR)
2618 .
Case(
"xnack_mask", AMDGPU::XNACK_MASK)
2619 .
Case(
"shared_base", AMDGPU::SRC_SHARED_BASE)
2620 .
Case(
"src_shared_base", AMDGPU::SRC_SHARED_BASE)
2621 .
Case(
"shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2622 .
Case(
"src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2623 .
Case(
"private_base", AMDGPU::SRC_PRIVATE_BASE)
2624 .
Case(
"src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2625 .
Case(
"private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2626 .
Case(
"src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2627 .
Case(
"pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2628 .
Case(
"src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2629 .
Case(
"lds_direct", AMDGPU::LDS_DIRECT)
2630 .
Case(
"src_lds_direct", AMDGPU::LDS_DIRECT)
2631 .
Case(
"m0", AMDGPU::M0)
2632 .
Case(
"vccz", AMDGPU::SRC_VCCZ)
2633 .
Case(
"src_vccz", AMDGPU::SRC_VCCZ)
2634 .
Case(
"execz", AMDGPU::SRC_EXECZ)
2635 .
Case(
"src_execz", AMDGPU::SRC_EXECZ)
2636 .
Case(
"scc", AMDGPU::SRC_SCC)
2637 .
Case(
"src_scc", AMDGPU::SRC_SCC)
2638 .
Case(
"tba", AMDGPU::TBA)
2639 .
Case(
"tma", AMDGPU::TMA)
2640 .
Case(
"flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2641 .
Case(
"flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2642 .
Case(
"xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2643 .
Case(
"xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2644 .
Case(
"vcc_lo", AMDGPU::VCC_LO)
2645 .
Case(
"vcc_hi", AMDGPU::VCC_HI)
2646 .
Case(
"exec_lo", AMDGPU::EXEC_LO)
2647 .
Case(
"exec_hi", AMDGPU::EXEC_HI)
2648 .
Case(
"tma_lo", AMDGPU::TMA_LO)
2649 .
Case(
"tma_hi", AMDGPU::TMA_HI)
2650 .
Case(
"tba_lo", AMDGPU::TBA_LO)
2651 .
Case(
"tba_hi", AMDGPU::TBA_HI)
2652 .
Case(
"pc", AMDGPU::PC_REG)
2653 .
Case(
"null", AMDGPU::SGPR_NULL)
2657bool AMDGPUAsmParser::ParseRegister(
MCRegister &RegNo,
SMLoc &StartLoc,
2658 SMLoc &EndLoc,
bool RestoreOnFailure) {
2659 auto R = parseRegister();
2660 if (!R)
return true;
2662 RegNo =
R->getReg();
2663 StartLoc =
R->getStartLoc();
2664 EndLoc =
R->getEndLoc();
2670 return ParseRegister(Reg, StartLoc, EndLoc,
false);
2675 bool Result = ParseRegister(Reg, StartLoc, EndLoc,
true);
2676 bool PendingErrors = getParser().hasPendingError();
2677 getParser().clearPendingErrors();
2685bool AMDGPUAsmParser::AddNextRegisterToList(
unsigned &Reg,
unsigned &RegWidth,
2686 RegisterKind RegKind,
unsigned Reg1,
2690 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2695 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2696 Reg = AMDGPU::FLAT_SCR;
2700 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2701 Reg = AMDGPU::XNACK_MASK;
2705 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2710 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2715 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2720 Error(Loc,
"register does not fit in the list");
2726 if (Reg1 != Reg + RegWidth / 32) {
2727 Error(Loc,
"registers in a list must have consecutive indices");
2745 {{
"ttmp"}, IS_TTMP},
2751 return Kind == IS_VGPR ||
2759 if (Str.starts_with(Reg.Name))
2765 return !Str.getAsInteger(10, Num);
2769AMDGPUAsmParser::isRegister(
const AsmToken &Token,
2786 if (!RegSuffix.
empty()) {
2804AMDGPUAsmParser::isRegister()
2806 return isRegister(getToken(), peekToken());
2809unsigned AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
unsigned RegNum,
2810 unsigned SubReg,
unsigned RegWidth,
2814 unsigned AlignSize = 1;
2815 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2821 if (RegNum % AlignSize != 0) {
2822 Error(Loc,
"invalid register alignment");
2823 return AMDGPU::NoRegister;
2826 unsigned RegIdx = RegNum / AlignSize;
2829 Error(Loc,
"invalid or unsupported register size");
2830 return AMDGPU::NoRegister;
2836 Error(Loc,
"register index is out of range");
2837 return AMDGPU::NoRegister;
2847 assert(Reg &&
"Invalid subregister!");
2853bool AMDGPUAsmParser::ParseRegRange(
unsigned &Num,
unsigned &RegWidth) {
2854 int64_t RegLo, RegHi;
2858 SMLoc FirstIdxLoc = getLoc();
2865 SecondIdxLoc = getLoc();
2875 if (!isUInt<32>(RegLo)) {
2876 Error(FirstIdxLoc,
"invalid register index");
2880 if (!isUInt<32>(RegHi)) {
2881 Error(SecondIdxLoc,
"invalid register index");
2885 if (RegLo > RegHi) {
2886 Error(FirstIdxLoc,
"first register index should not exceed second index");
2890 Num =
static_cast<unsigned>(RegLo);
2891 RegWidth = 32 * ((RegHi - RegLo) + 1);
2895unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2896 unsigned &RegNum,
unsigned &RegWidth,
2903 RegKind = IS_SPECIAL;
2910unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2911 unsigned &RegNum,
unsigned &RegWidth,
2915 auto Loc = getLoc();
2919 Error(Loc,
"invalid register name");
2920 return AMDGPU::NoRegister;
2928 unsigned SubReg = NoSubRegister;
2929 if (!RegSuffix.
empty()) {
2941 Error(Loc,
"invalid register index");
2942 return AMDGPU::NoRegister;
2947 if (!ParseRegRange(RegNum, RegWidth))
2948 return AMDGPU::NoRegister;
2951 return getRegularReg(RegKind, RegNum,
SubReg, RegWidth, Loc);
2954unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
unsigned &RegNum,
2957 unsigned Reg = AMDGPU::NoRegister;
2958 auto ListLoc = getLoc();
2961 "expected a register or a list of registers")) {
2962 return AMDGPU::NoRegister;
2967 auto Loc = getLoc();
2968 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2969 return AMDGPU::NoRegister;
2970 if (RegWidth != 32) {
2971 Error(Loc,
"expected a single 32-bit register");
2972 return AMDGPU::NoRegister;
2976 RegisterKind NextRegKind;
2977 unsigned NextReg, NextRegNum, NextRegWidth;
2980 if (!ParseAMDGPURegister(NextRegKind, NextReg,
2981 NextRegNum, NextRegWidth,
2983 return AMDGPU::NoRegister;
2985 if (NextRegWidth != 32) {
2986 Error(Loc,
"expected a single 32-bit register");
2987 return AMDGPU::NoRegister;
2989 if (NextRegKind != RegKind) {
2990 Error(Loc,
"registers in a list must be of the same kind");
2991 return AMDGPU::NoRegister;
2993 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2994 return AMDGPU::NoRegister;
2998 "expected a comma or a closing square bracket")) {
2999 return AMDGPU::NoRegister;
3003 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3008bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
unsigned &Reg,
3009 unsigned &RegNum,
unsigned &RegWidth,
3011 auto Loc = getLoc();
3012 Reg = AMDGPU::NoRegister;
3015 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3016 if (Reg == AMDGPU::NoRegister)
3017 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3019 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3023 if (Reg == AMDGPU::NoRegister) {
3024 assert(Parser.hasPendingError());
3028 if (!subtargetHasRegister(*
TRI, Reg)) {
3029 if (Reg == AMDGPU::SGPR_NULL) {
3030 Error(Loc,
"'null' operand is not supported on this GPU");
3032 Error(Loc,
"register not available on this GPU");
3040bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
unsigned &Reg,
3041 unsigned &RegNum,
unsigned &RegWidth,
3042 bool RestoreOnFailure ) {
3043 Reg = AMDGPU::NoRegister;
3046 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3047 if (RestoreOnFailure) {
3048 while (!Tokens.
empty()) {
3057std::optional<StringRef>
3058AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3061 return StringRef(
".amdgcn.next_free_vgpr");
3063 return StringRef(
".amdgcn.next_free_sgpr");
3065 return std::nullopt;
3069void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3070 auto SymbolName = getGprCountSymbolName(RegKind);
3071 assert(SymbolName &&
"initializing invalid register kind");
3072 MCSymbol *
Sym = getContext().getOrCreateSymbol(*SymbolName);
3076bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3077 unsigned DwordRegIndex,
3078 unsigned RegWidth) {
3083 auto SymbolName = getGprCountSymbolName(RegKind);
3086 MCSymbol *
Sym = getContext().getOrCreateSymbol(*SymbolName);
3088 int64_t NewMax = DwordRegIndex +
divideCeil(RegWidth, 32) - 1;
3091 if (!
Sym->isVariable())
3092 return !
Error(getLoc(),
3093 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3094 if (!
Sym->getVariableValue(
false)->evaluateAsAbsolute(OldCount))
3097 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3099 if (OldCount <= NewMax)
3105std::unique_ptr<AMDGPUOperand>
3106AMDGPUAsmParser::parseRegister(
bool RestoreOnFailure) {
3107 const auto &Tok = getToken();
3108 SMLoc StartLoc = Tok.getLoc();
3109 SMLoc EndLoc = Tok.getEndLoc();
3110 RegisterKind RegKind;
3111 unsigned Reg, RegNum, RegWidth;
3113 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3117 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3120 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3121 return AMDGPUOperand::CreateReg(
this, Reg, StartLoc, EndLoc);
3125 bool HasSP3AbsModifier,
bool HasLit) {
3133 HasLit = trySkipId(
"lit");
3145 const auto& Tok = getToken();
3146 const auto& NextTok = peekToken();
3149 bool Negate =
false;
3157 AMDGPUOperand::Modifiers Mods;
3168 APFloat RealVal(APFloat::IEEEdouble());
3169 auto roundMode = APFloat::rmNearestTiesToEven;
3170 if (
errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3173 RealVal.changeSign();
3176 AMDGPUOperand::CreateImm(
this, RealVal.bitcastToAPInt().getZExtValue(), S,
3177 AMDGPUOperand::ImmTyNone,
true));
3178 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3179 Op.setModifiers(Mods);
3188 if (HasSP3AbsModifier) {
3197 if (getParser().parsePrimaryExpr(Expr, EndLoc,
nullptr))
3200 if (Parser.parseExpression(Expr))
3204 if (Expr->evaluateAsAbsolute(IntVal)) {
3205 Operands.push_back(AMDGPUOperand::CreateImm(
this, IntVal, S));
3206 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3207 Op.setModifiers(Mods);
3211 Operands.push_back(AMDGPUOperand::CreateExpr(
this, Expr, S));
3224 if (
auto R = parseRegister()) {
3233 bool HasSP3AbsMod,
bool HasLit) {
3239 return parseImm(
Operands, HasSP3AbsMod, HasLit);
3243AMDGPUAsmParser::isNamedOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3246 return str ==
"abs" || str ==
"neg" || str ==
"sext";
3252AMDGPUAsmParser::isOpcodeModifierWithVal(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3257AMDGPUAsmParser::isOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3258 return isNamedOperandModifier(Token, NextToken) || Token.
is(
AsmToken::Pipe);
3262AMDGPUAsmParser::isRegOrOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3263 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3280AMDGPUAsmParser::isModifier() {
3284 peekTokens(NextToken);
3286 return isOperandModifier(Tok, NextToken[0]) ||
3287 (Tok.
is(
AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3288 isOpcodeModifierWithVal(Tok, NextToken[0]);
3314AMDGPUAsmParser::parseSP3NegModifier() {
3317 peekTokens(NextToken);
3320 (isRegister(NextToken[0], NextToken[1]) ||
3322 isId(NextToken[0],
"abs"))) {
3340 return Error(getLoc(),
"invalid syntax, expected 'neg' modifier");
3342 SP3Neg = parseSP3NegModifier();
3345 Neg = trySkipId(
"neg");
3347 return Error(Loc,
"expected register or immediate");
3351 Abs = trySkipId(
"abs");
3355 Lit = trySkipId(
"lit");
3362 return Error(Loc,
"expected register or immediate");
3366 Res = parseRegOrImm(
Operands, SP3Abs, Lit);
3373 if (Lit && !
Operands.back()->isImm())
3374 Error(Loc,
"expected immediate with lit modifier");
3376 if (SP3Abs && !skipToken(
AsmToken::Pipe,
"expected vertical bar"))
3385 AMDGPUOperand::Modifiers Mods;
3386 Mods.Abs = Abs || SP3Abs;
3387 Mods.Neg = Neg || SP3Neg;
3390 if (Mods.hasFPModifiers() || Lit) {
3391 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3393 return Error(
Op.getStartLoc(),
"expected an absolute expression");
3394 Op.setModifiers(Mods);
3402 bool Sext = trySkipId(
"sext");
3403 if (Sext && !skipToken(
AsmToken::LParen,
"expected left paren after sext"))
3418 AMDGPUOperand::Modifiers Mods;
3421 if (Mods.hasIntModifiers()) {
3422 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3424 return Error(
Op.getStartLoc(),
"expected an absolute expression");
3425 Op.setModifiers(Mods);
3432 return parseRegOrImmWithFPInputMods(
Operands,
false);
3436 return parseRegOrImmWithIntInputMods(
Operands,
false);
3440 auto Loc = getLoc();
3441 if (trySkipId(
"off")) {
3442 Operands.push_back(AMDGPUOperand::CreateImm(
this, 0, Loc,
3443 AMDGPUOperand::ImmTyOff,
false));
3450 std::unique_ptr<AMDGPUOperand>
Reg = parseRegister();
3452 Operands.push_back(std::move(Reg));
3459unsigned AMDGPUAsmParser::checkTargetMatchPredicate(
MCInst &Inst) {
3466 return Match_InvalidOperand;
3468 if (Inst.
getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3469 Inst.
getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3474 if (!
Op.isImm() ||
Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3475 return Match_InvalidOperand;
3479 return Match_Success;
3483 static const unsigned Variants[] = {
3494 if (isForcedDPP() && isForcedVOP3()) {
3498 if (getForcedEncodingSize() == 32) {
3503 if (isForcedVOP3()) {
3508 if (isForcedSDWA()) {
3514 if (isForcedDPP()) {
3522StringRef AMDGPUAsmParser::getMatchedVariantName()
const {
3523 if (isForcedDPP() && isForcedVOP3())
3526 if (getForcedEncodingSize() == 32)
3541unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(
const MCInst &Inst)
const {
3545 case AMDGPU::FLAT_SCR:
3547 case AMDGPU::VCC_LO:
3548 case AMDGPU::VCC_HI:
3555 return AMDGPU::NoRegister;
3562bool AMDGPUAsmParser::isInlineConstant(
const MCInst &Inst,
3563 unsigned OpIdx)
const {
3573 int64_t Val = MO.
getImm();
3622unsigned AMDGPUAsmParser::getConstantBusLimit(
unsigned Opcode)
const {
3628 case AMDGPU::V_LSHLREV_B64_e64:
3629 case AMDGPU::V_LSHLREV_B64_gfx10:
3630 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3631 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3632 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3633 case AMDGPU::V_LSHRREV_B64_e64:
3634 case AMDGPU::V_LSHRREV_B64_gfx10:
3635 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3636 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3637 case AMDGPU::V_ASHRREV_I64_e64:
3638 case AMDGPU::V_ASHRREV_I64_gfx10:
3639 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3640 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3641 case AMDGPU::V_LSHL_B64_e64:
3642 case AMDGPU::V_LSHR_B64_e64:
3643 case AMDGPU::V_ASHR_I64_e64:
3656 bool AddMandatoryLiterals =
false) {
3662 int16_t ImmDeferredIdx =
3679bool AMDGPUAsmParser::usesConstantBus(
const MCInst &Inst,
unsigned OpIdx) {
3682 return !isInlineConstant(Inst, OpIdx);
3689 return isSGPR(PReg,
TRI) && PReg != SGPR_NULL;
3700 const unsigned Opcode = Inst.
getOpcode();
3701 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3704 if (!LaneSelOp.
isReg())
3707 return LaneSelReg ==
M0 || LaneSelReg == M0_gfxpre11;
3710bool AMDGPUAsmParser::validateConstantBusLimitations(
3712 const unsigned Opcode = Inst.
getOpcode();
3714 unsigned LastSGPR = AMDGPU::NoRegister;
3715 unsigned ConstantBusUseCount = 0;
3716 unsigned NumLiterals = 0;
3717 unsigned LiteralSize;
3719 if (!(
Desc.TSFlags &
3735 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3736 if (SGPRUsed != AMDGPU::NoRegister) {
3737 SGPRsUsed.
insert(SGPRUsed);
3738 ++ConstantBusUseCount;
3743 for (
int OpIdx : OpIndices) {
3748 if (usesConstantBus(Inst, OpIdx)) {
3757 if (SGPRsUsed.
insert(LastSGPR).second) {
3758 ++ConstantBusUseCount;
3778 if (NumLiterals == 0) {
3781 }
else if (LiteralSize !=
Size) {
3787 ConstantBusUseCount += NumLiterals;
3789 if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3795 Error(Loc,
"invalid operand (violates constant bus restrictions)");
3799bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3802 const unsigned Opcode = Inst.
getOpcode();
3808 auto getVRegIdx = [&](
unsigned,
unsigned OperandIdx) {
3816 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
3819 auto InvalidCompOprIdx =
3820 InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc);
3821 if (!InvalidCompOprIdx)
3824 auto CompOprIdx = *InvalidCompOprIdx;
3826 std::max(InstInfo[
VOPD::X].getIndexInParsedOperands(CompOprIdx),
3827 InstInfo[
VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3830 auto Loc = ((AMDGPUOperand &)*
Operands[ParsedIdx]).getStartLoc();
3831 if (CompOprIdx == VOPD::Component::DST) {
3832 Error(Loc,
"one dst register must be even and the other odd");
3834 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3836 " operands must use different VGPR banks");
3842bool AMDGPUAsmParser::validateIntClampSupported(
const MCInst &Inst) {
3859bool AMDGPUAsmParser::validateMIMGDataSize(
const MCInst &Inst,
3860 const SMLoc &IDLoc) {
3879 unsigned TFESize = (TFEIdx != -1 && Inst.
getOperand(TFEIdx).
getImm()) ? 1 : 0;
3884 bool IsPackedD16 =
false;
3889 IsPackedD16 = D16Idx >= 0;
3891 DataSize = (DataSize + 1) / 2;
3894 if ((VDataSize / 4) == DataSize + TFESize)
3899 Modifiers = IsPackedD16 ?
"dmask and d16" :
"dmask";
3901 Modifiers = IsPackedD16 ?
"dmask, d16 and tfe" :
"dmask and tfe";
3903 Error(IDLoc,
Twine(
"image data size does not match ") + Modifiers);
3907bool AMDGPUAsmParser::validateMIMGAddrSize(
const MCInst &Inst,
3908 const SMLoc &IDLoc) {
3921 : AMDGPU::OpName::rsrc;
3928 assert(SrsrcIdx > VAddr0Idx);
3931 if (BaseOpcode->
BVH) {
3932 if (IsA16 == BaseOpcode->
A16)
3934 Error(IDLoc,
"image address size does not match a16");
3940 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3941 unsigned ActualAddrSize =
3942 IsNSA ? SrsrcIdx - VAddr0Idx
3945 unsigned ExpectedAddrSize =
3949 if (hasPartialNSAEncoding() &&
3952 int VAddrLastIdx = SrsrcIdx - 1;
3953 unsigned VAddrLastSize =
3956 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
3959 if (ExpectedAddrSize > 12)
3960 ExpectedAddrSize = 16;
3965 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3969 if (ActualAddrSize == ExpectedAddrSize)
3972 Error(IDLoc,
"image address size does not match dim and a16");
3976bool AMDGPUAsmParser::validateMIMGAtomicDMask(
const MCInst &Inst) {
3983 if (!
Desc.mayLoad() || !
Desc.mayStore())
3993 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3996bool AMDGPUAsmParser::validateMIMGGatherDMask(
const MCInst &Inst) {
4012 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4015bool AMDGPUAsmParser::validateMIMGDim(
const MCInst &Inst,
4030 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
4031 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4038bool AMDGPUAsmParser::validateMIMGMSAA(
const MCInst &Inst) {
4049 if (!BaseOpcode->
MSAA)
4058 return DimInfo->
MSAA;
4064 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4065 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4066 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4076bool AMDGPUAsmParser::validateMovrels(
const MCInst &Inst,
4100 Error(ErrLoc,
"source operand must be a VGPR");
4104bool AMDGPUAsmParser::validateMAIAccWrite(
const MCInst &Inst,
4109 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4123 "source operand must be either a VGPR or an inline constant");
4130bool AMDGPUAsmParser::validateMAISrc2(
const MCInst &Inst,
4136 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4143 if (Inst.
getOperand(Src2Idx).
isImm() && isInlineConstant(Inst, Src2Idx)) {
4145 "inline constants are not allowed for this operand");
4152bool AMDGPUAsmParser::validateMFMA(
const MCInst &Inst,
4170 if (Src2Reg == DstReg)
4174 if (
TRI->getRegClass(
Desc.operands()[0].RegClass).getSizeInBits() <= 128)
4177 if (
TRI->regsOverlap(Src2Reg, DstReg)) {
4179 "source 2 operand must not partially overlap with dst");
4186bool AMDGPUAsmParser::validateDivScale(
const MCInst &Inst) {
4190 case V_DIV_SCALE_F32_gfx6_gfx7:
4191 case V_DIV_SCALE_F32_vi:
4192 case V_DIV_SCALE_F32_gfx10:
4193 case V_DIV_SCALE_F64_gfx6_gfx7:
4194 case V_DIV_SCALE_F64_vi:
4195 case V_DIV_SCALE_F64_gfx10:
4201 for (
auto Name : {AMDGPU::OpName::src0_modifiers,
4202 AMDGPU::OpName::src2_modifiers,
4203 AMDGPU::OpName::src2_modifiers}) {
4214bool AMDGPUAsmParser::validateMIMGD16(
const MCInst &Inst) {
4234 case AMDGPU::V_SUBREV_F32_e32:
4235 case AMDGPU::V_SUBREV_F32_e64:
4236 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4237 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4238 case AMDGPU::V_SUBREV_F32_e32_vi:
4239 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4240 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4241 case AMDGPU::V_SUBREV_F32_e64_vi:
4243 case AMDGPU::V_SUBREV_CO_U32_e32:
4244 case AMDGPU::V_SUBREV_CO_U32_e64:
4245 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4246 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4248 case AMDGPU::V_SUBBREV_U32_e32:
4249 case AMDGPU::V_SUBBREV_U32_e64:
4250 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4251 case AMDGPU::V_SUBBREV_U32_e32_vi:
4252 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4253 case AMDGPU::V_SUBBREV_U32_e64_vi:
4255 case AMDGPU::V_SUBREV_U32_e32:
4256 case AMDGPU::V_SUBREV_U32_e64:
4257 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4258 case AMDGPU::V_SUBREV_U32_e32_vi:
4259 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4260 case AMDGPU::V_SUBREV_U32_e64_vi:
4262 case AMDGPU::V_SUBREV_F16_e32:
4263 case AMDGPU::V_SUBREV_F16_e64:
4264 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4265 case AMDGPU::V_SUBREV_F16_e32_vi:
4266 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4267 case AMDGPU::V_SUBREV_F16_e64_vi:
4269 case AMDGPU::V_SUBREV_U16_e32:
4270 case AMDGPU::V_SUBREV_U16_e64:
4271 case AMDGPU::V_SUBREV_U16_e32_vi:
4272 case AMDGPU::V_SUBREV_U16_e64_vi:
4274 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4275 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4276 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4278 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4279 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4281 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4282 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4284 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4285 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4287 case AMDGPU::V_LSHRREV_B32_e32:
4288 case AMDGPU::V_LSHRREV_B32_e64:
4289 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4290 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4291 case AMDGPU::V_LSHRREV_B32_e32_vi:
4292 case AMDGPU::V_LSHRREV_B32_e64_vi:
4293 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4294 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4296 case AMDGPU::V_ASHRREV_I32_e32:
4297 case AMDGPU::V_ASHRREV_I32_e64:
4298 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4299 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4300 case AMDGPU::V_ASHRREV_I32_e32_vi:
4301 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4302 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4303 case AMDGPU::V_ASHRREV_I32_e64_vi:
4305 case AMDGPU::V_LSHLREV_B32_e32:
4306 case AMDGPU::V_LSHLREV_B32_e64:
4307 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4308 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4309 case AMDGPU::V_LSHLREV_B32_e32_vi:
4310 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4311 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4312 case AMDGPU::V_LSHLREV_B32_e64_vi:
4314 case AMDGPU::V_LSHLREV_B16_e32:
4315 case AMDGPU::V_LSHLREV_B16_e64:
4316 case AMDGPU::V_LSHLREV_B16_e32_vi:
4317 case AMDGPU::V_LSHLREV_B16_e64_vi:
4318 case AMDGPU::V_LSHLREV_B16_gfx10:
4320 case AMDGPU::V_LSHRREV_B16_e32:
4321 case AMDGPU::V_LSHRREV_B16_e64:
4322 case AMDGPU::V_LSHRREV_B16_e32_vi:
4323 case AMDGPU::V_LSHRREV_B16_e64_vi:
4324 case AMDGPU::V_LSHRREV_B16_gfx10:
4326 case AMDGPU::V_ASHRREV_I16_e32:
4327 case AMDGPU::V_ASHRREV_I16_e64:
4328 case AMDGPU::V_ASHRREV_I16_e32_vi:
4329 case AMDGPU::V_ASHRREV_I16_e64_vi:
4330 case AMDGPU::V_ASHRREV_I16_gfx10:
4332 case AMDGPU::V_LSHLREV_B64_e64:
4333 case AMDGPU::V_LSHLREV_B64_gfx10:
4334 case AMDGPU::V_LSHLREV_B64_vi:
4336 case AMDGPU::V_LSHRREV_B64_e64:
4337 case AMDGPU::V_LSHRREV_B64_gfx10:
4338 case AMDGPU::V_LSHRREV_B64_vi:
4340 case AMDGPU::V_ASHRREV_I64_e64:
4341 case AMDGPU::V_ASHRREV_I64_gfx10:
4342 case AMDGPU::V_ASHRREV_I64_vi:
4344 case AMDGPU::V_PK_LSHLREV_B16:
4345 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4346 case AMDGPU::V_PK_LSHLREV_B16_vi:
4348 case AMDGPU::V_PK_LSHRREV_B16:
4349 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4350 case AMDGPU::V_PK_LSHRREV_B16_vi:
4351 case AMDGPU::V_PK_ASHRREV_I16:
4352 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4353 case AMDGPU::V_PK_ASHRREV_I16_vi:
4360std::optional<StringRef>
4361AMDGPUAsmParser::validateLdsDirect(
const MCInst &Inst) {
4363 using namespace SIInstrFlags;
4364 const unsigned Opcode = Inst.
getOpcode();
4370 if ((
Desc.TSFlags & Enc) == 0)
4371 return std::nullopt;
4373 for (
auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4378 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4381 return StringRef(
"lds_direct is not supported on this GPU");
4384 return StringRef(
"lds_direct cannot be used with this instruction");
4386 if (SrcName != OpName::src0)
4387 return StringRef(
"lds_direct may be used as src0 only");
4391 return std::nullopt;
4395 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
4396 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4397 if (
Op.isFlatOffset())
4398 return Op.getStartLoc();
4403bool AMDGPUAsmParser::validateOffset(
const MCInst &Inst,
4412 return validateFlatOffset(Inst,
Operands);
4415 return validateSMEMOffset(Inst,
Operands);
4420 const unsigned OffsetSize = 24;
4421 if (!
isIntN(OffsetSize,
Op.getImm())) {
4423 Twine(
"expected a ") +
Twine(OffsetSize) +
"-bit signed offset");
4427 const unsigned OffsetSize = 16;
4428 if (!
isUIntN(OffsetSize,
Op.getImm())) {
4430 Twine(
"expected a ") +
Twine(OffsetSize) +
"-bit unsigned offset");
4437bool AMDGPUAsmParser::validateFlatOffset(
const MCInst &Inst,
4448 if (!hasFlatOffsets() &&
Op.getImm() != 0) {
4450 "flat offset modifier is not supported on this GPU");
4457 bool AllowNegative =
4460 if (!
isIntN(OffsetSize,
Op.getImm()) || (!AllowNegative &&
Op.getImm() < 0)) {
4462 Twine(
"expected a ") +
4463 (AllowNegative ?
Twine(OffsetSize) +
"-bit signed offset"
4464 :
Twine(OffsetSize - 1) +
"-bit unsigned offset"));
4473 for (
unsigned i = 2, e =
Operands.size(); i != e; ++i) {
4474 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4475 if (
Op.isSMEMOffset() ||
Op.isSMEMOffsetMod())
4476 return Op.getStartLoc();
4481bool AMDGPUAsmParser::validateSMEMOffset(
const MCInst &Inst,
4507 : (
isVI() || IsBuffer) ?
"expected a 20-bit unsigned offset"
4508 :
"expected a 21-bit signed offset");
4513bool AMDGPUAsmParser::validateSOPLiteral(
const MCInst &Inst)
const {
4522 const int OpIndices[] = { Src0Idx, Src1Idx };
4524 unsigned NumExprs = 0;
4525 unsigned NumLiterals = 0;
4528 for (
int OpIdx : OpIndices) {
4529 if (OpIdx == -1)
break;
4534 if (MO.
isImm() && !isInlineConstant(Inst, OpIdx)) {
4536 if (NumLiterals == 0 || LiteralValue !=
Value) {
4540 }
else if (MO.
isExpr()) {
4546 return NumLiterals + NumExprs <= 1;
4549bool AMDGPUAsmParser::validateOpSel(
const MCInst &Inst) {
4563 if (OpSelIdx != -1) {
4568 if (OpSelHiIdx != -1) {
4586bool AMDGPUAsmParser::validateNeg(
const MCInst &Inst,
int OpName) {
4611 int SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
4612 AMDGPU::OpName::src1_modifiers,
4613 AMDGPU::OpName::src2_modifiers};
4615 for (
unsigned i = 0; i < 3; ++i) {
4625bool AMDGPUAsmParser::validateDPP(
const MCInst &Inst,
4629 if (DppCtrlIdx >= 0) {
4636 Error(S,
"DP ALU dpp only supports row_newbcast");
4642 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
4652 Error(S,
"invalid operand for instruction");
4657 "src1 immediate operand invalid for instruction");
4667bool AMDGPUAsmParser::validateVccOperand(
unsigned Reg)
const {
4668 auto FB = getFeatureBits();
4669 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4670 (FB[AMDGPU::FeatureWavefrontSize32] &&
Reg == AMDGPU::VCC_LO);
4674bool AMDGPUAsmParser::validateVOPLiteral(
const MCInst &Inst,
4680 !HasMandatoryLiteral && !
isVOPD(Opcode))
4685 unsigned NumExprs = 0;
4686 unsigned NumLiterals = 0;
4689 for (
int OpIdx : OpIndices) {
4699 if (MO.
isImm() && !isInlineConstant(Inst, OpIdx)) {
4705 if (!IsValid32Op && !isInt<32>(
Value) && !isUInt<32>(
Value)) {
4706 Error(getLitLoc(
Operands),
"invalid operand for instruction");
4710 if (IsFP64 && IsValid32Op)
4713 if (NumLiterals == 0 || LiteralValue !=
Value) {
4717 }
else if (MO.
isExpr()) {
4721 NumLiterals += NumExprs;
4726 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4727 Error(getLitLoc(
Operands),
"literal operands are not supported");
4731 if (NumLiterals > 1) {
4732 Error(getLitLoc(
Operands,
true),
"only one unique literal operand is allowed");
4750 unsigned Sub =
MRI->getSubReg(
Op.getReg(), AMDGPU::sub0);
4751 auto Reg = Sub ? Sub :
Op.getReg();
4753 return AGPR32.
contains(Reg) ? 1 : 0;
4756bool AMDGPUAsmParser::validateAGPRLdSt(
const MCInst &Inst)
const {
4764 : AMDGPU::OpName::vdata;
4772 if (Data2Areg >= 0 && Data2Areg != DataAreg)
4776 auto FB = getFeatureBits();
4777 if (FB[AMDGPU::FeatureGFX90AInsts]) {
4778 if (DataAreg < 0 || DstAreg < 0)
4780 return DstAreg == DataAreg;
4783 return DstAreg < 1 && DataAreg < 1;
4786bool AMDGPUAsmParser::validateVGPRAlign(
const MCInst &Inst)
const {
4787 auto FB = getFeatureBits();
4788 if (!FB[AMDGPU::FeatureGFX90AInsts])
4799 unsigned Sub =
MRI->getSubReg(
Op.getReg(), AMDGPU::sub0);
4803 if (VGPR32.
contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4805 if (AGPR32.
contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4813 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
4814 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4816 return Op.getStartLoc();
4821bool AMDGPUAsmParser::validateBLGP(
const MCInst &Inst,
4831 auto FB = getFeatureBits();
4832 bool UsesNeg =
false;
4833 if (FB[AMDGPU::FeatureGFX940Insts]) {
4835 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4836 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4837 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4838 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4843 if (IsNeg == UsesNeg)
4847 UsesNeg ?
"invalid modifier: blgp is not supported"
4848 :
"invalid modifier: neg is not supported");
4853bool AMDGPUAsmParser::validateWaitCnt(
const MCInst &Inst,
4859 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4860 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4861 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4862 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4868 if (Reg == AMDGPU::SGPR_NULL)
4872 Error(RegLoc,
"src0 must be null");
4876bool AMDGPUAsmParser::validateDS(
const MCInst &Inst,
4882 return validateGWS(Inst,
Operands);
4893 Error(S,
"gds modifier is not supported on this GPU");
4901bool AMDGPUAsmParser::validateGWS(
const MCInst &Inst,
4903 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4907 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4908 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4917 auto RegIdx =
Reg - (VGPR32.
contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4920 Error(RegLoc,
"vgpr must be even aligned");
4927bool AMDGPUAsmParser::validateCoherencyBits(
const MCInst &Inst,
4929 const SMLoc &IDLoc) {
4931 AMDGPU::OpName::cpol);
4938 return validateTHAndScopeBits(Inst,
Operands, CPol);
4944 Error(S,
"cache policy is not supported for SMRD instructions");
4948 Error(IDLoc,
"invalid cache policy for SMEM instruction");
4957 if (!(TSFlags & AllowSCCModifier)) {
4962 "scc modifier is not supported for this instruction on this GPU");
4973 :
"instruction must use glc");
4981 &CStr.data()[CStr.find(
isGFX940() ?
"sc0" :
"glc")]);
4983 :
"instruction must not use glc");
4991bool AMDGPUAsmParser::validateTHAndScopeBits(
const MCInst &Inst,
4993 const unsigned CPol) {
4997 const unsigned Opcode = Inst.
getOpcode();
5009 return PrintError(
"instruction must use th:TH_ATOMIC_RETURN");
5017 return PrintError(
"invalid th value for SMEM instruction");
5024 return PrintError(
"scope and th combination is not valid");
5033 return PrintError(
"invalid th value for atomic instructions");
5034 }
else if (IsStore) {
5036 return PrintError(
"invalid th value for store instructions");
5039 return PrintError(
"invalid th value for load instructions");
5049 if (!Operand->isReg())
5051 unsigned Reg = Operand->getReg();
5052 if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
5054 "execz and vccz are not supported on this GPU");
5061bool AMDGPUAsmParser::validateTFE(
const MCInst &Inst,
5064 if (
Desc.mayStore() &&
5068 Error(Loc,
"TFE modifier has no meaning for store instructions");
5076bool AMDGPUAsmParser::validateInstruction(
const MCInst &Inst,
5079 if (
auto ErrMsg = validateLdsDirect(Inst)) {
5083 if (!validateSOPLiteral(Inst)) {
5085 "only one unique literal operand is allowed");
5088 if (!validateVOPLiteral(Inst,
Operands)) {
5091 if (!validateConstantBusLimitations(Inst,
Operands)) {
5094 if (!validateVOPDRegBankConstraints(Inst,
Operands)) {
5097 if (!validateIntClampSupported(Inst)) {
5099 "integer clamping is not supported on this GPU");
5102 if (!validateOpSel(Inst)) {
5104 "invalid op_sel operand");
5107 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5109 "invalid neg_lo operand");
5112 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5114 "invalid neg_hi operand");
5117 if (!validateDPP(Inst,
Operands)) {
5121 if (!validateMIMGD16(Inst)) {
5123 "d16 modifier is not supported on this GPU");
5126 if (!validateMIMGDim(Inst,
Operands)) {
5127 Error(IDLoc,
"missing dim operand");
5130 if (!validateMIMGMSAA(Inst)) {
5132 "invalid dim; must be MSAA type");
5135 if (!validateMIMGDataSize(Inst, IDLoc)) {
5138 if (!validateMIMGAddrSize(Inst, IDLoc))
5140 if (!validateMIMGAtomicDMask(Inst)) {
5142 "invalid atomic image dmask");
5145 if (!validateMIMGGatherDMask(Inst)) {
5147 "invalid image_gather dmask: only one bit must be set");
5150 if (!validateMovrels(Inst,
Operands)) {
5153 if (!validateOffset(Inst,
Operands)) {
5156 if (!validateMAIAccWrite(Inst,
Operands)) {
5159 if (!validateMAISrc2(Inst,
Operands)) {
5162 if (!validateMFMA(Inst,
Operands)) {
5165 if (!validateCoherencyBits(Inst,
Operands, IDLoc)) {
5169 if (!validateAGPRLdSt(Inst)) {
5170 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5171 ?
"invalid register class: data and dst should be all VGPR or AGPR"
5172 :
"invalid register class: agpr loads and stores not supported on this GPU"
5176 if (!validateVGPRAlign(Inst)) {
5178 "invalid register class: vgpr tuples must be 64 bit aligned");
5185 if (!validateBLGP(Inst,
Operands)) {
5189 if (!validateDivScale(Inst)) {
5190 Error(IDLoc,
"ABS not allowed in VOP3B instructions");
5193 if (!validateWaitCnt(Inst,
Operands)) {
5196 if (!validateExeczVcczOperands(
Operands)) {
5199 if (!validateTFE(Inst,
Operands)) {
5208 unsigned VariantID = 0);
5212 unsigned VariantID);
5214bool AMDGPUAsmParser::isSupportedMnemo(
StringRef Mnemo,
5219bool AMDGPUAsmParser::isSupportedMnemo(
StringRef Mnemo,
5222 for (
auto Variant : Variants) {
5230bool AMDGPUAsmParser::checkUnsupportedInstruction(
StringRef Mnemo,
5231 const SMLoc &IDLoc) {
5232 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5235 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5240 getParser().clearPendingErrors();
5244 StringRef VariantName = getMatchedVariantName();
5245 if (!VariantName.
empty() && isSupportedMnemo(Mnemo, FBS)) {
5248 " variant of this instruction is not supported"));
5252 if (
isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5253 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5256 FeaturesWS32.
flip(AMDGPU::FeatureWavefrontSize64)
5257 .
flip(AMDGPU::FeatureWavefrontSize32);
5259 ComputeAvailableFeatures(FeaturesWS32);
5261 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5262 return Error(IDLoc,
"instruction requires wavesize=32");
5267 return Error(IDLoc,
"instruction not supported on this GPU");
5272 return Error(IDLoc,
"invalid instruction" + Suggestion);
5278 const auto &
Op = ((AMDGPUOperand &)*
Operands[InvalidOprIdx]);
5279 if (
Op.isToken() && InvalidOprIdx > 1) {
5280 const auto &PrevOp = ((AMDGPUOperand &)*
Operands[InvalidOprIdx - 1]);
5281 return PrevOp.isToken() && PrevOp.getToken() ==
"::";
5286bool AMDGPUAsmParser::MatchAndEmitInstruction(
SMLoc IDLoc,
unsigned &Opcode,
5290 bool MatchingInlineAsm) {
5292 unsigned Result = Match_Success;
5293 for (
auto Variant : getMatchedVariants()) {
5295 auto R = MatchInstructionImpl(
Operands, Inst, EI, MatchingInlineAsm,
5300 if (R == Match_Success || R == Match_MissingFeature ||
5301 (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5302 (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5303 Result != Match_MissingFeature)) {
5307 if (R == Match_Success)
5311 if (Result == Match_Success) {
5312 if (!validateInstruction(Inst, IDLoc,
Operands)) {
5321 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5327 case Match_MissingFeature:
5331 return Error(IDLoc,
"operands are not valid for this GPU or mode");
5333 case Match_InvalidOperand: {
5334 SMLoc ErrorLoc = IDLoc;
5337 return Error(IDLoc,
"too few operands for instruction");
5340 if (ErrorLoc ==
SMLoc())
5344 return Error(ErrorLoc,
"invalid VOPDY instruction");
5346 return Error(ErrorLoc,
"invalid operand for instruction");
5349 case Match_MnemonicFail:
5355bool AMDGPUAsmParser::ParseAsAbsoluteExpression(
uint32_t &Ret) {
5360 if (getParser().parseAbsoluteExpression(Tmp)) {
5367bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5369 return TokError(
"directive only supported for amdgcn architecture");
5371 std::string TargetIDDirective;
5372 SMLoc TargetStart = getTok().getLoc();
5373 if (getParser().parseEscapedString(TargetIDDirective))
5377 if (getTargetStreamer().getTargetID()->
toString() != TargetIDDirective)
5378 return getParser().Error(TargetRange.
Start,
5379 (
Twine(
".amdgcn_target directive's target id ") +
5380 Twine(TargetIDDirective) +
5381 Twine(
" does not match the specified target id ") +
5382 Twine(getTargetStreamer().getTargetID()->
toString())).str());
5391bool AMDGPUAsmParser::calculateGPRBlocks(
5393 const MCExpr *FlatScrUsed,
bool XNACKUsed,
5394 std::optional<bool> EnableWavefrontSize32,
const MCExpr *NextFreeVGPR,
5396 const MCExpr *&VGPRBlocks,
const MCExpr *&SGPRBlocks) {
5403 int64_t EvaluatedSGPRs;
5408 unsigned MaxAddressableNumSGPRs =
5411 if (
NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
Version.Major >= 8 &&
5412 !Features.
test(FeatureSGPRInitBug) &&
5413 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5414 return OutOfRangeError(SGPRRange);
5416 const MCExpr *ExtraSGPRs =
5420 if (
NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
5421 (
Version.Major <= 7 || Features.
test(FeatureSGPRInitBug)) &&
5422 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5423 return OutOfRangeError(SGPRRange);
5425 if (Features.
test(FeatureSGPRInitBug))
5432 auto GetNumGPRBlocks = [&Ctx](
const MCExpr *NumGPR,
5433 unsigned Granule) ->
const MCExpr * {
5437 const MCExpr *AlignToGPR =
5445 VGPRBlocks = GetNumGPRBlocks(
5454bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5456 return TokError(
"directive only supported for amdgcn architecture");
5459 return TokError(
"directive only supported for amdhsa OS");
5462 if (getParser().parseIdentifier(KernelName))
5467 &getSTI(), getContext());
5477 const MCExpr *NextFreeVGPR = ZeroExpr;
5483 const MCExpr *NextFreeSGPR = ZeroExpr;
5486 unsigned ImpliedUserSGPRCount = 0;
5490 std::optional<unsigned> ExplicitUserSGPRCount;
5491 const MCExpr *ReserveVCC = OneExpr;
5492 const MCExpr *ReserveFlatScr = OneExpr;
5493 std::optional<bool> EnableWavefrontSize32;
5499 SMRange IDRange = getTok().getLocRange();
5500 if (!parseId(
ID,
"expected .amdhsa_ directive or .end_amdhsa_kernel"))
5503 if (
ID ==
".end_amdhsa_kernel")
5507 return TokError(
".amdhsa_ directives cannot be repeated");
5509 SMLoc ValStart = getLoc();
5511 if (getParser().parseExpression(ExprVal))
5513 SMLoc ValEnd = getLoc();
5518 bool EvaluatableExpr;
5519 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
5521 return OutOfRangeError(ValRange);
5525#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
5526 if (!isUInt<ENTRY##_WIDTH>(Val)) \
5527 return OutOfRangeError(RANGE); \
5528 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
5533#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
5535 return Error(IDRange.Start, "directive should have resolvable expression", \
5538 if (
ID ==
".amdhsa_group_segment_fixed_size") {
5541 return OutOfRangeError(ValRange);
5543 }
else if (
ID ==
".amdhsa_private_segment_fixed_size") {
5546 return OutOfRangeError(ValRange);
5548 }
else if (
ID ==
".amdhsa_kernarg_size") {
5550 return OutOfRangeError(ValRange);
5552 }
else if (
ID ==
".amdhsa_user_sgpr_count") {
5554 ExplicitUserSGPRCount = Val;
5555 }
else if (
ID ==
".amdhsa_user_sgpr_private_segment_buffer") {
5559 "directive is not supported with architected flat scratch",
5562 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5565 ImpliedUserSGPRCount += 4;
5566 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_preload_length") {
5569 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
5572 return OutOfRangeError(ValRange);
5576 ImpliedUserSGPRCount += Val;
5577 PreloadLength = Val;
5579 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_preload_offset") {
5582 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
5585 return OutOfRangeError(ValRange);
5589 PreloadOffset = Val;
5590 }
else if (
ID ==
".amdhsa_user_sgpr_dispatch_ptr") {
5593 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
5596 ImpliedUserSGPRCount += 2;
5597 }
else if (
ID ==
".amdhsa_user_sgpr_queue_ptr") {
5600 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
5603 ImpliedUserSGPRCount += 2;
5604 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_segment_ptr") {
5607 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
5610 ImpliedUserSGPRCount += 2;
5611 }
else if (
ID ==
".amdhsa_user_sgpr_dispatch_id") {
5614 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
5617 ImpliedUserSGPRCount += 2;
5618 }
else if (
ID ==
".amdhsa_user_sgpr_flat_scratch_init") {
5621 "directive is not supported with architected flat scratch",
5625 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
5628 ImpliedUserSGPRCount += 2;
5629 }
else if (
ID ==
".amdhsa_user_sgpr_private_segment_size") {
5632 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5635 ImpliedUserSGPRCount += 1;
5636 }
else if (
ID ==
".amdhsa_wavefront_size32") {
5638 if (IVersion.
Major < 10)
5639 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5640 EnableWavefrontSize32 = Val;
5642 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
5644 }
else if (
ID ==
".amdhsa_uses_dynamic_stack") {
5646 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
5648 }
else if (
ID ==
".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5651 "directive is not supported with architected flat scratch",
5654 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5656 }
else if (
ID ==
".amdhsa_enable_private_segment") {
5660 "directive is not supported without architected flat scratch",
5663 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5665 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_x") {
5667 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
5669 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_y") {
5671 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
5673 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_z") {
5675 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
5677 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_info") {
5679 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
5681 }
else if (
ID ==
".amdhsa_system_vgpr_workitem_id") {
5683 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
5685 }
else if (
ID ==
".amdhsa_next_free_vgpr") {
5686 VGPRRange = ValRange;
5687 NextFreeVGPR = ExprVal;
5688 }
else if (
ID ==
".amdhsa_next_free_sgpr") {
5689 SGPRRange = ValRange;
5690 NextFreeSGPR = ExprVal;
5691 }
else if (
ID ==
".amdhsa_accum_offset") {
5693 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
5694 AccumOffset = ExprVal;
5695 }
else if (
ID ==
".amdhsa_reserve_vcc") {
5696 if (EvaluatableExpr && !isUInt<1>(Val))
5697 return OutOfRangeError(ValRange);
5698 ReserveVCC = ExprVal;
5699 }
else if (
ID ==
".amdhsa_reserve_flat_scratch") {
5700 if (IVersion.
Major < 7)
5701 return Error(IDRange.
Start,
"directive requires gfx7+", IDRange);
5704 "directive is not supported with architected flat scratch",
5706 if (EvaluatableExpr && !isUInt<1>(Val))
5707 return OutOfRangeError(ValRange);
5708 ReserveFlatScr = ExprVal;
5709 }
else if (
ID ==
".amdhsa_reserve_xnack_mask") {
5710 if (IVersion.
Major < 8)
5711 return Error(IDRange.
Start,
"directive requires gfx8+", IDRange);
5712 if (!isUInt<1>(Val))
5713 return OutOfRangeError(ValRange);
5714 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5715 return getParser().Error(IDRange.
Start,
".amdhsa_reserve_xnack_mask does not match target id",
5717 }
else if (
ID ==
".amdhsa_float_round_mode_32") {
5719 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
5721 }
else if (
ID ==
".amdhsa_float_round_mode_16_64") {
5723 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
5725 }
else if (
ID ==
".amdhsa_float_denorm_mode_32") {
5727 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
5729 }
else if (
ID ==
".amdhsa_float_denorm_mode_16_64") {
5731 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
5733 }
else if (
ID ==
".amdhsa_dx10_clamp") {
5734 if (IVersion.
Major >= 12)
5735 return Error(IDRange.
Start,
"directive unsupported on gfx12+", IDRange);
5737 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
5739 }
else if (
ID ==
".amdhsa_ieee_mode") {
5740 if (IVersion.
Major >= 12)
5741 return Error(IDRange.
Start,
"directive unsupported on gfx12+", IDRange);
5743 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
5745 }
else if (
ID ==
".amdhsa_fp16_overflow") {
5746 if (IVersion.
Major < 9)
5747 return Error(IDRange.
Start,
"directive requires gfx9+", IDRange);
5749 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
5751 }
else if (
ID ==
".amdhsa_tg_split") {
5753 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
5756 }
else if (
ID ==
".amdhsa_workgroup_processor_mode") {
5757 if (IVersion.
Major < 10)
5758 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5760 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
5762 }
else if (
ID ==
".amdhsa_memory_ordered") {
5763 if (IVersion.
Major < 10)
5764 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5766 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
5768 }
else if (
ID ==
".amdhsa_forward_progress") {
5769 if (IVersion.
Major < 10)
5770 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5772 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
5774 }
else if (
ID ==
".amdhsa_shared_vgpr_count") {
5776 if (IVersion.
Major < 10 || IVersion.
Major >= 12)
5777 return Error(IDRange.
Start,
"directive requires gfx10 or gfx11",
5779 SharedVGPRCount = Val;
5781 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
5783 }
else if (
ID ==
".amdhsa_exception_fp_ieee_invalid_op") {
5786 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
5788 }
else if (
ID ==
".amdhsa_exception_fp_denorm_src") {
5790 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5792 }
else if (
ID ==
".amdhsa_exception_fp_ieee_div_zero") {
5795 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
5797 }
else if (
ID ==
".amdhsa_exception_fp_ieee_overflow") {
5799 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5801 }
else if (
ID ==
".amdhsa_exception_fp_ieee_underflow") {
5803 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5805 }
else if (
ID ==
".amdhsa_exception_fp_ieee_inexact") {
5807 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5809 }
else if (
ID ==
".amdhsa_exception_int_div_zero") {
5811 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5813 }
else if (
ID ==
".amdhsa_round_robin_scheduling") {
5814 if (IVersion.
Major < 12)
5815 return Error(IDRange.
Start,
"directive requires gfx12+", IDRange);
5817 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
5820 return Error(IDRange.
Start,
"unknown .amdhsa_kernel directive", IDRange);
5823#undef PARSE_BITS_ENTRY
5826 if (!Seen.
contains(
".amdhsa_next_free_vgpr"))
5827 return TokError(
".amdhsa_next_free_vgpr directive is required");
5829 if (!Seen.
contains(
".amdhsa_next_free_sgpr"))
5830 return TokError(
".amdhsa_next_free_sgpr directive is required");
5832 const MCExpr *VGPRBlocks;
5833 const MCExpr *SGPRBlocks;
5834 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5835 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5836 EnableWavefrontSize32, NextFreeVGPR,
5837 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5841 int64_t EvaluatedVGPRBlocks;
5842 bool VGPRBlocksEvaluatable =
5843 VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);
5844 if (VGPRBlocksEvaluatable &&
5845 !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5846 static_cast<uint64_t>(EvaluatedVGPRBlocks))) {
5847 return OutOfRangeError(VGPRRange);
5851 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
5852 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
5854 int64_t EvaluatedSGPRBlocks;
5855 if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&
5856 !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5857 static_cast<uint64_t>(EvaluatedSGPRBlocks)))
5858 return OutOfRangeError(SGPRRange);
5861 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
5862 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
5864 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5865 return TokError(
"amdgpu_user_sgpr_count smaller than than implied by "
5866 "enabled user SGPRs");
5868 unsigned UserSGPRCount =
5869 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5871 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5872 return TokError(
"too many user SGPRs enabled");
5875 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
5876 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, getContext());
5880 return TokError(
"Kernarg size should be resolvable");
5882 if (PreloadLength && kernarg_size &&
5883 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
5884 return TokError(
"Kernarg preload length + offset is larger than the "
5885 "kernarg segment size");
5888 if (!Seen.
contains(
".amdhsa_accum_offset"))
5889 return TokError(
".amdhsa_accum_offset directive is required");
5890 int64_t EvaluatedAccum;
5891 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);
5892 uint64_t UEvaluatedAccum = EvaluatedAccum;
5893 if (AccumEvaluatable &&
5894 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
5895 return TokError(
"accum_offset should be in range [4..256] in "
5898 int64_t EvaluatedNumVGPR;
5899 if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&
5903 return TokError(
"accum_offset exceeds total VGPR allocation");
5909 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
5910 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5914 if (IVersion.
Major >= 10 && IVersion.
Major < 12) {
5916 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
5917 return TokError(
"shared_vgpr_count directive not valid on "
5918 "wavefront size 32");
5921 if (VGPRBlocksEvaluatable &&
5922 (SharedVGPRCount * 2 +
static_cast<uint64_t>(EvaluatedVGPRBlocks) >
5924 return TokError(
"shared_vgpr_count*2 + "
5925 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5930 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
5931 NextFreeVGPR, NextFreeSGPR,
5932 ReserveVCC, ReserveFlatScr);
5936bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
5938 if (ParseAsAbsoluteExpression(Version))
5941 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
5945bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(
StringRef ID,
5949 if (
ID ==
"max_scratch_backing_memory_byte_size") {
5950 Parser.eatToEndOfStatement();
5956 if (!
C.ParseKernelCodeT(
ID, getParser(), Err)) {
5957 return TokError(Err.str());
5961 if (
ID ==
"enable_wavefront_size32") {
5964 return TokError(
"enable_wavefront_size32=1 is only allowed on GFX10+");
5965 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5966 return TokError(
"enable_wavefront_size32=1 requires +WavefrontSize32");
5968 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5969 return TokError(
"enable_wavefront_size32=0 requires +WavefrontSize64");
5973 if (
ID ==
"wavefront_size") {
5974 if (
C.wavefront_size == 5) {
5976 return TokError(
"wavefront_size=5 is only allowed on GFX10+");
5977 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5978 return TokError(
"wavefront_size=5 requires +WavefrontSize32");
5979 }
else if (
C.wavefront_size == 6) {
5980 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5981 return TokError(
"wavefront_size=6 requires +WavefrontSize64");
5988bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5998 if (!parseId(
ID,
"expected value identifier or .end_amd_kernel_code_t"))
6001 if (
ID ==
".end_amd_kernel_code_t")
6004 if (ParseAMDKernelCodeTValue(
ID, KernelCode))
6008 KernelCode.
validate(&getSTI(), getContext());
6009 getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
6014bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
6016 if (!parseId(KernelName,
"expected symbol name"))
6019 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
6022 KernelScope.initialize(getContext());
6026bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6028 return Error(getLoc(),
6029 ".amd_amdgpu_isa directive is not available on non-amdgcn "
6033 auto TargetIDDirective = getLexer().getTok().getStringContents();
6034 if (getTargetStreamer().getTargetID()->
toString() != TargetIDDirective)
6035 return Error(getParser().getTok().getLoc(),
"target id must match options");
6037 getTargetStreamer().EmitISAVersion();
6043bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6046 std::string HSAMetadataString;
6051 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6052 return Error(getLoc(),
"invalid HSA metadata");
6059bool AMDGPUAsmParser::ParseToEndDirective(
const char *AssemblerDirectiveBegin,
6060 const char *AssemblerDirectiveEnd,
6061 std::string &CollectString) {
6065 getLexer().setSkipSpace(
false);
6067 bool FoundEnd =
false;
6070 CollectStream << getTokenStr();
6074 if (trySkipId(AssemblerDirectiveEnd)) {
6079 CollectStream << Parser.parseStringToEndOfStatement()
6080 << getContext().getAsmInfo()->getSeparatorString();
6082 Parser.eatToEndOfStatement();
6085 getLexer().setSkipSpace(
true);
6088 return TokError(
Twine(
"expected directive ") +
6089 Twine(AssemblerDirectiveEnd) +
Twine(
" not found"));
6092 CollectStream.flush();
6097bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6103 auto PALMetadata = getTargetStreamer().getPALMetadata();
6104 if (!PALMetadata->setFromString(
String))
6105 return Error(getLoc(),
"invalid PAL metadata");
6110bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6112 return Error(getLoc(),
6114 "not available on non-amdpal OSes")).str());
6117 auto PALMetadata = getTargetStreamer().getPALMetadata();
6118 PALMetadata->setLegacy();
6121 if (ParseAsAbsoluteExpression(Key)) {
6122 return TokError(
Twine(
"invalid value in ") +
6126 return TokError(
Twine(
"expected an even number of values in ") +
6129 if (ParseAsAbsoluteExpression(
Value)) {
6130 return TokError(
Twine(
"invalid value in ") +
6133 PALMetadata->setRegister(Key,
Value);
6142bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6143 if (getParser().checkForValidSection())
6147 SMLoc NameLoc = getLoc();
6148 if (getParser().parseIdentifier(
Name))
6149 return TokError(
"expected identifier in directive");
6152 if (getParser().parseComma())
6158 SMLoc SizeLoc = getLoc();
6159 if (getParser().parseAbsoluteExpression(
Size))
6162 return Error(SizeLoc,
"size must be non-negative");
6163 if (
Size > LocalMemorySize)
6164 return Error(SizeLoc,
"size is too large");
6166 int64_t Alignment = 4;
6168 SMLoc AlignLoc = getLoc();
6169 if (getParser().parseAbsoluteExpression(Alignment))
6172 return Error(AlignLoc,
"alignment must be a power of two");
6177 if (Alignment >= 1u << 31)
6178 return Error(AlignLoc,
"alignment is too large");
6184 Symbol->redefineIfPossible();
6185 if (!
Symbol->isUndefined())
6186 return Error(NameLoc,
"invalid symbol redefinition");
6188 getTargetStreamer().emitAMDGPULDS(Symbol,
Size,
Align(Alignment));
6192bool AMDGPUAsmParser::ParseDirective(
AsmToken DirectiveID) {
6196 if (IDVal ==
".amdhsa_kernel")
6197 return ParseDirectiveAMDHSAKernel();
6199 if (IDVal ==
".amdhsa_code_object_version")
6200 return ParseDirectiveAMDHSACodeObjectVersion();
6204 return ParseDirectiveHSAMetadata();
6206 if (IDVal ==
".amd_kernel_code_t")
6207 return ParseDirectiveAMDKernelCodeT();
6209 if (IDVal ==
".amdgpu_hsa_kernel")
6210 return ParseDirectiveAMDGPUHsaKernel();
6212 if (IDVal ==
".amd_amdgpu_isa")
6213 return ParseDirectiveISAVersion();
6217 Twine(
" directive is "
6218 "not available on non-amdhsa OSes"))
6223 if (IDVal ==
".amdgcn_target")
6224 return ParseDirectiveAMDGCNTarget();
6226 if (IDVal ==
".amdgpu_lds")
6227 return ParseDirectiveAMDGPULDS();
6230 return ParseDirectivePALMetadataBegin();
6233 return ParseDirectivePALMetadata();
6241 if (
MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
6245 if (
MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
6246 return hasSGPR104_SGPR105();
6249 case AMDGPU::SRC_SHARED_BASE_LO:
6250 case AMDGPU::SRC_SHARED_BASE:
6251 case AMDGPU::SRC_SHARED_LIMIT_LO:
6252 case AMDGPU::SRC_SHARED_LIMIT:
6253 case AMDGPU::SRC_PRIVATE_BASE_LO:
6254 case AMDGPU::SRC_PRIVATE_BASE:
6255 case AMDGPU::SRC_PRIVATE_LIMIT_LO:
6256 case AMDGPU::SRC_PRIVATE_LIMIT:
6258 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
6261 case AMDGPU::TBA_LO:
6262 case AMDGPU::TBA_HI:
6264 case AMDGPU::TMA_LO:
6265 case AMDGPU::TMA_HI:
6267 case AMDGPU::XNACK_MASK:
6268 case AMDGPU::XNACK_MASK_LO:
6269 case AMDGPU::XNACK_MASK_HI:
6270 return (
isVI() ||
isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6271 case AMDGPU::SGPR_NULL:
6285 case AMDGPU::FLAT_SCR:
6286 case AMDGPU::FLAT_SCR_LO:
6287 case AMDGPU::FLAT_SCR_HI:
6296 if (
MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
6297 return hasSGPR102_SGPR103();
6310 Res = MatchOperandParserImpl(
Operands, Mnemonic);
6322 SMLoc LBraceLoc = getLoc();
6327 auto Loc = getLoc();
6330 Error(Loc,
"expected a register");
6334 RBraceLoc = getLoc();
6339 "expected a comma or a closing square bracket"))
6343 if (
Operands.size() - Prefix > 1) {
6345 AMDGPUOperand::CreateToken(
this,
"[", LBraceLoc));
6346 Operands.push_back(AMDGPUOperand::CreateToken(
this,
"]", RBraceLoc));
6357 setForcedEncodingSize(0);
6358 setForcedDPP(
false);
6359 setForcedSDWA(
false);
6361 if (
Name.ends_with(
"_e64_dpp")) {
6363 setForcedEncodingSize(64);
6364 return Name.substr(0,
Name.size() - 8);
6366 if (
Name.ends_with(
"_e64")) {
6367 setForcedEncodingSize(64);
6368 return Name.substr(0,
Name.size() - 4);
6370 if (
Name.ends_with(
"_e32")) {
6371 setForcedEncodingSize(32);
6372 return Name.substr(0,
Name.size() - 4);
6374 if (
Name.ends_with(
"_dpp")) {
6376 return Name.substr(0,
Name.size() - 4);
6378 if (
Name.ends_with(
"_sdwa")) {
6379 setForcedSDWA(
true);
6380 return Name.substr(0,
Name.size() - 5);
6387 unsigned VariantID);
6399 Operands.push_back(AMDGPUOperand::CreateToken(
this,
Name, NameLoc));
6401 bool IsMIMG =
Name.starts_with(
"image_");
6404 OperandMode Mode = OperandMode_Default;
6406 Mode = OperandMode_NSA;
6410 checkUnsupportedInstruction(
Name, NameLoc);
6411 if (!Parser.hasPendingError()) {
6414 :
"not a valid operand.";
6415 Error(getLoc(), Msg);
6437 if (!trySkipId(
Name))
6440 Operands.push_back(AMDGPUOperand::CreateToken(
this,
Name, S));
6444ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
const char *Prefix,
6455 std::function<
bool(int64_t &)> ConvertResult) {
6463 if (ConvertResult && !ConvertResult(
Value)) {
6467 Operands.push_back(AMDGPUOperand::CreateImm(
this,
Value, S, ImmTy));
6471ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
6473 bool (*ConvertResult)(int64_t &)) {
6482 const unsigned MaxSize = 4;
6486 for (
int I = 0; ; ++
I) {
6488 SMLoc Loc = getLoc();
6492 if (
Op != 0 &&
Op != 1)
6500 if (
I + 1 == MaxSize)
6501 return Error(getLoc(),
"expected a closing square bracket");
6507 Operands.push_back(AMDGPUOperand::CreateImm(
this, Val, S, ImmTy));
6513 AMDGPUOperand::ImmTy ImmTy) {
6517 if (trySkipId(
Name)) {
6519 }
else if (trySkipId(
"no",
Name)) {
6526 return Error(S,
"r128 modifier is not supported on this GPU");
6528 return Error(S,
"a16 modifier is not supported on this GPU");
6530 if (
isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
6531 ImmTy = AMDGPUOperand::ImmTyR128A16;
6533 Operands.push_back(AMDGPUOperand::CreateImm(
this, Bit, S, ImmTy));
6538 bool &Disabling)
const {
6539 Disabling =
Id.consume_front(
"no");
6559 SMLoc StringLoc = getLoc();
6561 int64_t CPolVal = 0;
6579 ResScope = parseScope(
Operands, Scope);
6594 Operands.push_back(AMDGPUOperand::CreateImm(
this, CPolVal, StringLoc,
6595 AMDGPUOperand::ImmTyCPol));
6600 SMLoc OpLoc = getLoc();
6601 unsigned Enabled = 0, Seen = 0;
6605 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
6612 return Error(S,
"dlc modifier is not supported on this GPU");
6615 return Error(S,
"scc modifier is not supported on this GPU");
6618 return Error(S,
"duplicate cache policy modifier");
6630 AMDGPUOperand::CreateImm(
this,
Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
6642 Res = parseStringWithPrefix(
"scope",
Value, StringLoc);
6653 if (Scope == 0xffffffff)
6654 return Error(StringLoc,
"invalid scope value");
6668 if (
Value ==
"TH_DEFAULT")
6670 else if (
Value ==
"TH_STORE_LU" ||
Value ==
"TH_LOAD_RT_WB" ||
6671 Value ==
"TH_LOAD_NT_WB") {
6672 return Error(StringLoc,
"invalid th value");
6673 }
else if (
Value.consume_front(
"TH_ATOMIC_")) {
6675 }
else if (
Value.consume_front(
"TH_LOAD_")) {
6677 }
else if (
Value.consume_front(
"TH_STORE_")) {
6680 return Error(StringLoc,
"invalid th value");
6683 if (
Value ==
"BYPASS")
6714 if (TH == 0xffffffff)
6715 return Error(StringLoc,
"invalid th value");
6722 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
6723 AMDGPUOperand::ImmTy ImmT,
6725 auto i = OptionalIdx.find(ImmT);
6726 if (i != OptionalIdx.end()) {
6727 unsigned Idx = i->second;
6728 ((AMDGPUOperand &)*
Operands[
Idx]).addImmOperands(Inst, 1);
6740 StringLoc = getLoc();
6749bool AMDGPUAsmParser::tryParseFmt(
const char *Pref,
6753 SMLoc Loc = getLoc();
6755 auto Res = parseIntWithPrefix(Pref, Val);
6761 if (Val < 0 || Val > MaxVal) {
6771 AMDGPUOperand::ImmTy ImmTy) {
6772 const char *Pref =
"index_key";
6774 SMLoc Loc = getLoc();
6775 auto Res = parseIntWithPrefix(Pref, ImmVal);
6779 if (ImmTy == AMDGPUOperand::ImmTyIndexKey16bit && (ImmVal < 0 || ImmVal > 1))
6782 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
6785 Operands.push_back(AMDGPUOperand::CreateImm(
this, ImmVal, Loc, ImmTy));
6790 return tryParseIndexKey(
Operands, AMDGPUOperand::ImmTyIndexKey8bit);
6794 return tryParseIndexKey(
Operands, AMDGPUOperand::ImmTyIndexKey16bit);
6799ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
6806 for (
int I = 0;
I < 2; ++
I) {
6807 if (Dfmt == DFMT_UNDEF && !tryParseFmt(
"dfmt", DFMT_MAX, Dfmt))
6810 if (Nfmt == NFMT_UNDEF && !tryParseFmt(
"nfmt", NFMT_MAX, Nfmt))
6815 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
6821 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
6824 Dfmt = (Dfmt ==
DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6825 Nfmt = (Nfmt ==
NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6831ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
6836 if (!tryParseFmt(
"format", UFMT_MAX, Fmt))
6839 if (Fmt == UFMT_UNDEF)
6846bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6854 if (Format != DFMT_UNDEF) {
6860 if (Format != NFMT_UNDEF) {
6865 Error(Loc,
"unsupported format");
6876 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6881 SMLoc Loc = getLoc();
6882 if (!parseId(Str,
"expected a format string") ||
6883 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
6885 if (Dfmt == DFMT_UNDEF)
6886 return Error(Loc,
"duplicate numeric format");
6887 if (Nfmt == NFMT_UNDEF)
6888 return Error(Loc,
"duplicate data format");
6891 Dfmt = (Dfmt ==
DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6892 Nfmt = (Nfmt ==
NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6896 if (Ufmt == UFMT_UNDEF)
6897 return Error(FormatLoc,
"unsupported format");
6912 if (Id == UFMT_UNDEF)
6916 return Error(Loc,
"unified format is not supported on this GPU");
6922ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6924 SMLoc Loc = getLoc();
6929 return Error(Loc,
"out of range format");
6934ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6942 SMLoc Loc = getLoc();
6943 if (!parseId(FormatStr,
"expected a format string"))
6946 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6948 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6958 return parseNumericFormat(Format);
6966 SMLoc Loc = getLoc();
6976 AMDGPUOperand::CreateImm(
this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6995 Res = parseSymbolicOrNumericFormat(Format);
7000 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands[
Size - 2]);
7001 assert(
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
7008 return Error(getLoc(),
"duplicate format");
7014 parseIntWithPrefix(
"offset",
Operands, AMDGPUOperand::ImmTyOffset);
7016 Res = parseIntWithPrefix(
"inst_offset",
Operands,
7017 AMDGPUOperand::ImmTyInstOffset);
7024 parseNamedBit(
"r128",
Operands, AMDGPUOperand::ImmTyR128A16);
7026 Res = parseNamedBit(
"a16",
Operands, AMDGPUOperand::ImmTyA16);
7032 parseIntWithPrefix(
"blgp",
Operands, AMDGPUOperand::ImmTyBLGP);
7035 parseOperandArrayWithPrefix(
"neg",
Operands, AMDGPUOperand::ImmTyBLGP);
7045 OptionalImmIndexMap OptionalIdx;
7047 unsigned OperandIdx[4];
7048 unsigned EnMask = 0;
7051 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
7052 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
7057 OperandIdx[SrcIdx] = Inst.
size();
7058 Op.addRegOperands(Inst, 1);
7065 OperandIdx[SrcIdx] = Inst.
size();
7071 if (
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7072 Op.addImmOperands(Inst, 1);
7076 if (
Op.isToken() && (
Op.getToken() ==
"done" ||
Op.getToken() ==
"row_en"))
7080 OptionalIdx[
Op.getImmTy()] = i;
7086 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7093 for (
auto i = 0; i < SrcIdx; ++i) {
7095 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7120 IntVal =
encode(ISA, IntVal, CntVal);
7121 if (CntVal !=
decode(ISA, IntVal)) {
7123 IntVal =
encode(ISA, IntVal, -1);
7131bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7133 SMLoc CntLoc = getLoc();
7141 SMLoc ValLoc = getLoc();
7150 if (CntName ==
"vmcnt" || CntName ==
"vmcnt_sat") {
7152 }
else if (CntName ==
"expcnt" || CntName ==
"expcnt_sat") {
7154 }
else if (CntName ==
"lgkmcnt" || CntName ==
"lgkmcnt_sat") {
7157 Error(CntLoc,
"invalid counter name " + CntName);
7162 Error(ValLoc,
"too large value for " + CntName);
7171 Error(getLoc(),
"expected a counter name");
7198bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7199 SMLoc FieldLoc = getLoc();
7205 SMLoc ValueLoc = getLoc();
7212 if (FieldName ==
"instid0") {
7214 }
else if (FieldName ==
"instskip") {
7216 }
else if (FieldName ==
"instid1") {
7219 Error(FieldLoc,
"invalid field name " + FieldName);
7238 .
Case(
"VALU_DEP_1", 1)
7239 .
Case(
"VALU_DEP_2", 2)
7240 .
Case(
"VALU_DEP_3", 3)
7241 .
Case(
"VALU_DEP_4", 4)
7242 .
Case(
"TRANS32_DEP_1", 5)
7243 .
Case(
"TRANS32_DEP_2", 6)
7244 .
Case(
"TRANS32_DEP_3", 7)
7245 .
Case(
"FMA_ACCUM_CYCLE_1", 8)
7246 .
Case(
"SALU_CYCLE_1", 9)
7247 .
Case(
"SALU_CYCLE_2", 10)
7248 .
Case(
"SALU_CYCLE_3", 11)
7256 Delay |=
Value << Shift;
7266 if (!parseDelay(Delay))
7274 Operands.push_back(AMDGPUOperand::CreateImm(
this, Delay, S));
7279AMDGPUOperand::isSWaitCnt()
const {
7283bool AMDGPUOperand::isSDelayALU()
const {
return isImm(); }
7289void AMDGPUAsmParser::depCtrError(
SMLoc Loc,
int ErrorId,
7293 Error(Loc,
Twine(
"invalid counter name ", DepCtrName));
7296 Error(Loc,
Twine(DepCtrName,
" is not supported on this GPU"));
7299 Error(Loc,
Twine(
"duplicate counter name ", DepCtrName));
7302 Error(Loc,
Twine(
"invalid value for ", DepCtrName));
7309bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr,
unsigned &UsedOprMask) {
7313 SMLoc DepCtrLoc = getLoc();
7324 unsigned PrevOprMask = UsedOprMask;
7325 int CntVal =
encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
7328 depCtrError(DepCtrLoc, CntVal, DepCtrName);
7337 Error(getLoc(),
"expected a counter name");
7342 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
7343 DepCtr = (DepCtr & ~CntValMask) | CntVal;
7351 SMLoc Loc = getLoc();
7354 unsigned UsedOprMask = 0;
7356 if (!parseDepCtr(DepCtr, UsedOprMask))
7364 Operands.push_back(AMDGPUOperand::CreateImm(
this, DepCtr, Loc));
7368bool AMDGPUOperand::isDepCtr()
const {
return isS16Imm(); }
7374ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
7376 OperandInfoTy &Width) {
7383 HwReg.Loc = getLoc();
7386 HwReg.IsSymbolic =
true;
7388 }
else if (!
parseExpr(HwReg.Val,
"a register name")) {
7396 if (!skipToken(
AsmToken::Comma,
"expected a comma or a closing parenthesis"))
7406 Width.Loc = getLoc();
7418 SMLoc Loc = getLoc();
7420 StructuredOpField HwReg(
"id",
"hardware register", HwregId::Width,
7422 StructuredOpField
Offset(
"offset",
"bit offset", HwregOffset::Width,
7423 HwregOffset::Default);
7424 struct : StructuredOpField {
7425 using StructuredOpField::StructuredOpField;
7426 bool validate(AMDGPUAsmParser &Parser)
const override {
7428 return Error(Parser,
"only values from 1 to 32 are legal");
7431 } Width(
"size",
"bitfield width", HwregSize::Width, HwregSize::Default);
7435 Res = parseHwregFunc(HwReg,
Offset, Width);
7438 if (!validateStructuredOpFields({&HwReg, &
Offset, &Width}))
7440 ImmVal = HwregEncoding::encode(HwReg.Val,
Offset.Val, Width.Val);
7444 parseExpr(ImmVal,
"a hwreg macro, structured immediate"))
7450 if (!isUInt<16>(ImmVal))
7451 return Error(Loc,
"invalid immediate: only 16-bit values are legal");
7453 AMDGPUOperand::CreateImm(
this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
7457bool AMDGPUOperand::isHwreg()
const {
7458 return isImmTy(ImmTyHwreg);
7466AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
7468 OperandInfoTy &Stream) {
7474 Msg.IsSymbolic =
true;
7476 }
else if (!
parseExpr(Msg.Val,
"a message name")) {
7481 Op.IsDefined =
true;
7484 (
Op.Val =
getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
7487 }
else if (!
parseExpr(
Op.Val,
"an operation name")) {
7492 Stream.IsDefined =
true;
7493 Stream.Loc = getLoc();
7503AMDGPUAsmParser::validateSendMsg(
const OperandInfoTy &Msg,
7504 const OperandInfoTy &
Op,
7505 const OperandInfoTy &Stream) {
7511 bool Strict = Msg.IsSymbolic;
7515 Error(Msg.Loc,
"specified message id is not supported on this GPU");
7520 Error(Msg.Loc,
"invalid message id");
7526 Error(
Op.Loc,
"message does not support operations");
7528 Error(Msg.Loc,
"missing message operation");
7534 Error(
Op.Loc,
"specified operation id is not supported on this GPU");
7536 Error(
Op.Loc,
"invalid operation id");
7541 Error(Stream.Loc,
"message operation does not support streams");
7545 Error(Stream.Loc,
"invalid message stream id");
7555 SMLoc Loc = getLoc();
7559 OperandInfoTy
Op(OP_NONE_);
7560 OperandInfoTy Stream(STREAM_ID_NONE_);
7561 if (parseSendMsgBody(Msg,
Op, Stream) &&
7562 validateSendMsg(Msg,
Op, Stream)) {
7567 }
else if (
parseExpr(ImmVal,
"a sendmsg macro")) {
7568 if (ImmVal < 0 || !isUInt<16>(ImmVal))
7569 return Error(Loc,
"invalid immediate: only 16-bit values are legal");
7574 Operands.push_back(AMDGPUOperand::CreateImm(
this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
7578bool AMDGPUOperand::isSendMsg()
const {
7579 return isImmTy(ImmTySendMsg);
7600 return Error(S,
"invalid interpolation slot");
7602 Operands.push_back(AMDGPUOperand::CreateImm(
this, Slot, S,
7603 AMDGPUOperand::ImmTyInterpSlot));
7614 if (!Str.starts_with(
"attr"))
7615 return Error(S,
"invalid interpolation attribute");
7625 return Error(S,
"invalid or missing interpolation attribute channel");
7627 Str = Str.drop_back(2).drop_front(4);
7630 if (Str.getAsInteger(10, Attr))
7631 return Error(S,
"invalid or missing interpolation attribute number");
7634 return Error(S,
"out of bounds interpolation attribute number");
7638 Operands.push_back(AMDGPUOperand::CreateImm(
this, Attr, S,
7639 AMDGPUOperand::ImmTyInterpAttr));
7640 Operands.push_back(AMDGPUOperand::CreateImm(
7641 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
7660 return Error(S, (
Id == ET_INVALID)
7661 ?
"invalid exp target"
7662 :
"exp target is not supported on this GPU");
7664 Operands.push_back(AMDGPUOperand::CreateImm(
this,
Id, S,
7665 AMDGPUOperand::ImmTyExpTgt));
7680 return isId(getToken(),
Id);
7685 return getTokenKind() ==
Kind;
7688StringRef AMDGPUAsmParser::getId()
const {
7715 if (isId(
Id) && peekToken().is(Kind)) {
7725 if (isToken(Kind)) {
7735 if (!trySkipToken(Kind)) {
7736 Error(getLoc(), ErrMsg);
7747 if (Parser.parseExpression(Expr))
7750 if (Expr->evaluateAsAbsolute(Imm))
7754 Error(S,
"expected absolute expression");
7757 Twine(
" or an absolute expression"));
7767 if (Parser.parseExpression(Expr))
7771 if (Expr->evaluateAsAbsolute(IntVal)) {
7772 Operands.push_back(AMDGPUOperand::CreateImm(
this, IntVal, S));
7774 Operands.push_back(AMDGPUOperand::CreateExpr(
this, Expr, S));
7782 Val = getToken().getStringContents();
7786 Error(getLoc(), ErrMsg);
7793 Val = getTokenStr();
7797 if (!ErrMsg.
empty())
7798 Error(getLoc(), ErrMsg);
7803AMDGPUAsmParser::getToken()
const {
7804 return Parser.getTok();
7807AsmToken AMDGPUAsmParser::peekToken(
bool ShouldSkipSpace) {
7810 : getLexer().peekTok(ShouldSkipSpace);
7815 auto TokCount = getLexer().peekTokens(Tokens);
7822AMDGPUAsmParser::getTokenKind()
const {
7827AMDGPUAsmParser::getLoc()
const {
7828 return getToken().getLoc();
7832AMDGPUAsmParser::getTokenStr()
const {
7833 return getToken().getString();
7837AMDGPUAsmParser::lex() {
7842 return ((AMDGPUOperand &)*
Operands[0]).getStartLoc();
7846AMDGPUAsmParser::getOperandLoc(std::function<
bool(
const AMDGPUOperand&)>
Test,
7848 for (
unsigned i =
Operands.size() - 1; i > 0; --i) {
7849 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
7851 return Op.getStartLoc();
7857AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy
Type,
7859 auto Test = [=](
const AMDGPUOperand&
Op) {
return Op.isImmTy(
Type); };
7864AMDGPUAsmParser::getRegLoc(
unsigned Reg,
7866 auto Test = [=](
const AMDGPUOperand&
Op) {
7867 return Op.isRegKind() &&
Op.getReg() ==
Reg;
7873 bool SearchMandatoryLiterals)
const {
7874 auto Test = [](
const AMDGPUOperand&
Op) {
7875 return Op.IsImmKindLiteral() ||
Op.isExpr();
7878 if (SearchMandatoryLiterals && Loc == getInstLoc(
Operands))
7879 Loc = getMandatoryLitLoc(
Operands);
7884 auto Test = [](
const AMDGPUOperand &
Op) {
7885 return Op.IsImmKindMandatoryLiteral();
7892 auto Test = [](
const AMDGPUOperand&
Op) {
7893 return Op.isImmKindConst();
7910 SMLoc IdLoc = getLoc();
7916 find_if(Fields, [
Id](StructuredOpField *
F) {
return F->Id ==
Id; });
7917 if (
I == Fields.
end())
7918 return Error(IdLoc,
"unknown field");
7919 if ((*I)->IsDefined)
7920 return Error(IdLoc,
"duplicate field");
7923 (*I)->Loc = getLoc();
7926 (*I)->IsDefined =
true;
7933bool AMDGPUAsmParser::validateStructuredOpFields(
7935 return all_of(Fields, [
this](
const StructuredOpField *
F) {
7936 return F->validate(*
this);
7947 const unsigned OrMask,
7948 const unsigned XorMask) {
7951 return BITMASK_PERM_ENC |
7952 (AndMask << BITMASK_AND_SHIFT) |
7953 (OrMask << BITMASK_OR_SHIFT) |
7954 (XorMask << BITMASK_XOR_SHIFT);
7958AMDGPUAsmParser::parseSwizzleOperand(int64_t &
Op,
7959 const unsigned MinVal,
7960 const unsigned MaxVal,
7970 if (Op < MinVal || Op > MaxVal) {
7979AMDGPUAsmParser::parseSwizzleOperands(
const unsigned OpNum, int64_t*
Op,
7980 const unsigned MinVal,
7981 const unsigned MaxVal,
7984 for (
unsigned i = 0; i < OpNum; ++i) {
7985 if (!parseSwizzleOperand(
Op[i], MinVal, MaxVal, ErrMsg, Loc))
7993AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7997 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
7998 "expected a 2-bit lane id")) {
8009AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
8016 if (!parseSwizzleOperand(GroupSize,
8018 "group size must be in the interval [2,32]",
8023 Error(Loc,
"group size must be a power of two");
8026 if (parseSwizzleOperand(LaneIdx,
8028 "lane id must be in the interval [0,group size - 1]",
8037AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
8043 if (!parseSwizzleOperand(GroupSize,
8045 "group size must be in the interval [2,32]",
8050 Error(Loc,
"group size must be a power of two");
8059AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8065 if (!parseSwizzleOperand(GroupSize,
8067 "group size must be in the interval [1,16]",
8072 Error(Loc,
"group size must be a power of two");
8081AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
8089 SMLoc StrLoc = getLoc();
8090 if (!parseString(Ctl)) {
8093 if (Ctl.
size() != BITMASK_WIDTH) {
8094 Error(StrLoc,
"expected a 5-character mask");
8098 unsigned AndMask = 0;
8099 unsigned OrMask = 0;
8100 unsigned XorMask = 0;
8102 for (
size_t i = 0; i < Ctl.
size(); ++i) {
8106 Error(StrLoc,
"invalid mask");
8128AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
8130 SMLoc OffsetLoc = getLoc();
8132 if (!
parseExpr(Imm,
"a swizzle macro")) {
8135 if (!isUInt<16>(Imm)) {
8136 Error(OffsetLoc,
"expected a 16-bit offset");
8143AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
8148 SMLoc ModeLoc = getLoc();
8151 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
8152 Ok = parseSwizzleQuadPerm(Imm);
8153 }
else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
8154 Ok = parseSwizzleBitmaskPerm(Imm);
8155 }
else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
8156 Ok = parseSwizzleBroadcast(Imm);
8157 }
else if (trySkipId(IdSymbolic[ID_SWAP])) {
8158 Ok = parseSwizzleSwap(Imm);
8159 }
else if (trySkipId(IdSymbolic[ID_REVERSE])) {
8160 Ok = parseSwizzleReverse(Imm);
8162 Error(ModeLoc,
"expected a swizzle mode");
8165 return Ok && skipToken(
AsmToken::RParen,
"expected a closing parentheses");
8175 if (trySkipId(
"offset")) {
8179 if (trySkipId(
"swizzle")) {
8180 Ok = parseSwizzleMacro(Imm);
8182 Ok = parseSwizzleOffset(Imm);
8186 Operands.push_back(AMDGPUOperand::CreateImm(
this, Imm, S, AMDGPUOperand::ImmTySwizzle));
8194AMDGPUOperand::isSwizzle()
const {
8195 return isImmTy(ImmTySwizzle);
8202int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8216 for (
unsigned ModeId = ID_MIN; ModeId <=
ID_MAX; ++ModeId) {
8217 if (trySkipId(IdSymbolic[ModeId])) {
8224 Error(S, (Imm == 0)?
8225 "expected a VGPR index mode or a closing parenthesis" :
8226 "expected a VGPR index mode");
8231 Error(S,
"duplicate VGPR index mode");
8239 "expected a comma or a closing parenthesis"))
8254 Imm = parseGPRIdxMacro();
8258 if (getParser().parseAbsoluteExpression(Imm))
8260 if (Imm < 0 || !isUInt<4>(Imm))
8261 return Error(S,
"invalid immediate: only 4-bit values are legal");
8265 AMDGPUOperand::CreateImm(
this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
8269bool AMDGPUOperand::isGPRIdxMode()
const {
8270 return isImmTy(ImmTyGprIdxMode);
8282 if (isRegister() || isModifier())
8289 assert(Opr.isImm() || Opr.isExpr());
8290 SMLoc Loc = Opr.getStartLoc();
8294 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
8295 Error(Loc,
"expected an absolute expression or a label");
8296 }
else if (Opr.isImm() && !Opr.isS16Imm()) {
8297 Error(Loc,
"expected a 16-bit signed jump offset");
8315void AMDGPUAsmParser::cvtMubufImpl(
MCInst &Inst,
8318 OptionalImmIndexMap OptionalIdx;
8319 unsigned FirstOperandIdx = 1;
8320 bool IsAtomicReturn =
false;
8327 for (
unsigned i = FirstOperandIdx, e =
Operands.size(); i != e; ++i) {
8328 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
8332 Op.addRegOperands(Inst, 1);
8336 if (IsAtomicReturn && i == FirstOperandIdx)
8337 Op.addRegOperands(Inst, 1);
8342 if (
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
8343 Op.addImmOperands(Inst, 1);
8355 OptionalIdx[
Op.getImmTy()] = i;
8366bool AMDGPUOperand::isSMRDOffset8()
const {
8367 return isImmLiteral() && isUInt<8>(getImm());
8370bool AMDGPUOperand::isSMEMOffset()
const {
8372 return isImmLiteral();
8375bool AMDGPUOperand::isSMRDLiteralOffset()
const {
8378 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
8410bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
8411 if (BoundCtrl == 0 || BoundCtrl == 1) {
8419void AMDGPUAsmParser::onBeginOfFile() {
8420 if (!getParser().getStreamer().getTargetStreamer() ||
8424 if (!getTargetStreamer().getTargetID())
8425 getTargetStreamer().initializeTargetID(getSTI(),
8426 getSTI().getFeatureString());
8429 getTargetStreamer().EmitDirectiveAMDGCNTarget();
8437bool AMDGPUAsmParser::parsePrimaryExpr(
const MCExpr *&Res,
SMLoc &EndLoc) {
8443 .
Case(
"max", AGVK::AGVK_Max)
8444 .
Case(
"or", AGVK::AGVK_Or)
8445 .
Case(
"extrasgprs", AGVK::AGVK_ExtraSGPRs)
8446 .
Case(
"totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
8447 .
Case(
"alignto", AGVK::AGVK_AlignTo)
8448 .
Case(
"occupancy", AGVK::AGVK_Occupancy)
8458 if (Exprs.
empty()) {
8459 Error(getToken().getLoc(),
8460 "empty " +
Twine(TokenId) +
" expression");
8463 if (CommaCount + 1 != Exprs.
size()) {
8464 Error(getToken().getLoc(),
8465 "mismatch of commas in " +
Twine(TokenId) +
" expression");
8472 if (getParser().parseExpression(Expr, EndLoc))
8476 if (LastTokenWasComma)
8479 Error(getToken().getLoc(),
8480 "unexpected token in " +
Twine(TokenId) +
" expression");
8486 return getParser().parsePrimaryExpr(Res, EndLoc,
nullptr);
8491 if (
Name ==
"mul") {
8492 return parseIntWithPrefix(
"mul",
Operands,
8496 if (
Name ==
"div") {
8497 return parseIntWithPrefix(
"div",
Operands,
8513 const int Ops[] = { AMDGPU::OpName::src0,
8514 AMDGPU::OpName::src1,
8515 AMDGPU::OpName::src2 };
8530 if (
DstOp.isReg() &&
8531 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(
DstOp.
getReg())) {
8535 if ((OpSel & (1 << SrcNum)) != 0)
8541void AMDGPUAsmParser::cvtVOP3OpSel(
MCInst &Inst,
8548 OptionalImmIndexMap &OptionalIdx) {
8549 cvtVOP3P(Inst,
Operands, OptionalIdx);
8558 &&
Desc.NumOperands > (OpNum + 1)
8560 &&
Desc.operands()[OpNum + 1].RegClass != -1
8562 &&
Desc.getOperandConstraint(OpNum + 1,
8563 MCOI::OperandConstraint::TIED_TO) == -1;
8568 OptionalImmIndexMap OptionalIdx;
8573 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
8574 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
8577 for (
unsigned E =
Operands.size();
I != E; ++
I) {
8578 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
8580 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8581 }
else if (
Op.isInterpSlot() ||
Op.isInterpAttr() ||
8582 Op.isInterpAttrChan()) {
8584 }
else if (
Op.isImmModifier()) {
8585 OptionalIdx[
Op.getImmTy()] =
I;
8593 AMDGPUOperand::ImmTyHigh);
8597 AMDGPUOperand::ImmTyClamp);
8601 AMDGPUOperand::ImmTyOModSI);
8606 OptionalImmIndexMap OptionalIdx;
8611 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
8612 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
8615 for (
unsigned E =
Operands.size();
I != E; ++
I) {
8616 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
8618 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8619 }
else if (
Op.isImmModifier()) {
8620 OptionalIdx[
Op.getImmTy()] =
I;
8637 const int Ops[] = { AMDGPU::OpName::src0,
8638 AMDGPU::OpName::src1,
8639 AMDGPU::OpName::src2 };
8640 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8641 AMDGPU::OpName::src1_modifiers,
8642 AMDGPU::OpName::src2_modifiers };
8646 for (
int J = 0; J < 3; ++J) {
8654 if ((OpSel & (1 << J)) != 0)
8656 if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
8657 (OpSel & (1 << 3)) != 0)
8665 OptionalImmIndexMap &OptionalIdx) {
8670 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
8671 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
8674 for (
unsigned E =
Operands.size();
I != E; ++
I) {
8675 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
8677 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8678 }
else if (
Op.isImmModifier()) {
8679 OptionalIdx[
Op.getImmTy()] =
I;
8681 Op.addRegOrImmOperands(Inst, 1);
8689 AMDGPUOperand::ImmTyByteSel);
8694 AMDGPUOperand::ImmTyClamp);
8698 AMDGPUOperand::ImmTyOModSI);
8705 auto it = Inst.
begin();
8715 OptionalImmIndexMap OptionalIdx;
8716 cvtVOP3(Inst,
Operands, OptionalIdx);
8720 OptionalImmIndexMap &OptIdx) {
8726 if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
8727 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
8728 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
8729 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {
8737 !(Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp_gfx12 ||
8738 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp_gfx12 ||
8739 Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp8_gfx12 ||
8740 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp8_gfx12 ||
8741 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
8742 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
8743 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
8744 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12)) {
8753 if (OpSelIdx != -1) {
8758 if (OpSelHiIdx != -1) {
8772 const int Ops[] = { AMDGPU::OpName::src0,
8773 AMDGPU::OpName::src1,
8774 AMDGPU::OpName::src2 };
8775 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8776 AMDGPU::OpName::src1_modifiers,
8777 AMDGPU::OpName::src2_modifiers };
8780 unsigned OpSelHi = 0;
8787 if (OpSelHiIdx != -1)
8796 for (
int J = 0; J < 3; ++J) {
8809 if (
SrcOp.isReg() && getMRI()
8816 if ((OpSel & (1 << J)) != 0)
8820 if ((OpSelHi & (1 << J)) != 0)
8823 if ((NegLo & (1 << J)) != 0)
8826 if ((NegHi & (1 << J)) != 0)
8834 OptionalImmIndexMap OptIdx;
8840 unsigned i,
unsigned Opc,
unsigned OpName) {
8842 ((AMDGPUOperand &)*
Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
8844 ((AMDGPUOperand &)*
Operands[i]).addRegOperands(Inst, 1);
8850 ((AMDGPUOperand &)*
Operands[1]).addRegOperands(Inst, 1);
8853 ((AMDGPUOperand &)*
Operands[1]).addRegOperands(Inst, 1);
8854 ((AMDGPUOperand &)*
Operands[4]).addRegOperands(Inst, 1);
8856 OptionalImmIndexMap OptIdx;
8857 for (
unsigned i = 5; i <
Operands.size(); ++i) {
8858 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
8859 OptIdx[
Op.getImmTy()] = i;
8864 AMDGPUOperand::ImmTyIndexKey8bit);
8868 AMDGPUOperand::ImmTyIndexKey16bit);
8888 Operands.push_back(AMDGPUOperand::CreateToken(
this,
"::", S));
8889 SMLoc OpYLoc = getLoc();
8892 Operands.push_back(AMDGPUOperand::CreateToken(
this, OpYName, OpYLoc));
8895 return Error(OpYLoc,
"expected a VOPDY instruction after ::");
8902 auto addOp = [&](
uint16_t ParsedOprIdx) {
8903 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[ParsedOprIdx]);
8905 Op.addRegOperands(Inst, 1);
8909 Op.addImmOperands(Inst, 1);
8921 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
8925 const auto &CInfo = InstInfo[CompIdx];
8926 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
8927 for (
unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
8928 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
8929 if (CInfo.hasSrc2Acc())
8930 addOp(CInfo.getIndexOfDstInParsedOperands());
8938bool AMDGPUOperand::isDPP8()
const {
8939 return isImmTy(ImmTyDPP8);
8942bool AMDGPUOperand::isDPPCtrl()
const {
8943 using namespace AMDGPU::DPP;
8945 bool result =
isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8947 int64_t
Imm = getImm();
8948 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8949 (
Imm >= DppCtrl::ROW_SHL_FIRST &&
Imm <= DppCtrl::ROW_SHL_LAST) ||
8950 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8951 (
Imm >= DppCtrl::ROW_ROR_FIRST &&
Imm <= DppCtrl::ROW_ROR_LAST) ||
8952 (Imm == DppCtrl::WAVE_SHL1) ||
8953 (
Imm == DppCtrl::WAVE_ROL1) ||
8954 (Imm == DppCtrl::WAVE_SHR1) ||
8955 (
Imm == DppCtrl::WAVE_ROR1) ||
8956 (Imm == DppCtrl::ROW_MIRROR) ||
8957 (
Imm == DppCtrl::ROW_HALF_MIRROR) ||
8958 (Imm == DppCtrl::BCAST15) ||
8959 (
Imm == DppCtrl::BCAST31) ||
8960 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8961 (
Imm >= DppCtrl::ROW_XMASK_FIRST &&
Imm <= DppCtrl::ROW_XMASK_LAST);
8970bool AMDGPUOperand::isBLGP()
const {
8971 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8974bool AMDGPUOperand::isS16Imm()
const {
8975 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8978bool AMDGPUOperand::isU16Imm()
const {
8979 return isImmLiteral() && isUInt<16>(getImm());
8986bool AMDGPUAsmParser::parseDimId(
unsigned &Encoding) {
8991 SMLoc Loc = getToken().getEndLoc();
8992 Token = std::string(getTokenStr());
8994 if (getLoc() != Loc)
8999 if (!parseId(Suffix))
9025 SMLoc Loc = getLoc();
9026 if (!parseDimId(Encoding))
9027 return Error(Loc,
"invalid dim value");
9029 Operands.push_back(AMDGPUOperand::CreateImm(
this, Encoding, S,
9030 AMDGPUOperand::ImmTyDim));
9048 if (!skipToken(
AsmToken::LBrac,
"expected an opening square bracket"))
9051 for (
size_t i = 0; i < 8; ++i) {
9055 SMLoc Loc = getLoc();
9056 if (getParser().parseAbsoluteExpression(Sels[i]))
9058 if (0 > Sels[i] || 7 < Sels[i])
9059 return Error(Loc,
"expected a 3-bit value");
9066 for (
size_t i = 0; i < 8; ++i)
9067 DPP8 |= (Sels[i] << (i * 3));
9069 Operands.push_back(AMDGPUOperand::CreateImm(
this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
9074AMDGPUAsmParser::isSupportedDPPCtrl(
StringRef Ctrl,
9076 if (Ctrl ==
"row_newbcast")
9079 if (Ctrl ==
"row_share" ||
9080 Ctrl ==
"row_xmask")
9083 if (Ctrl ==
"wave_shl" ||
9084 Ctrl ==
"wave_shr" ||
9085 Ctrl ==
"wave_rol" ||
9086 Ctrl ==
"wave_ror" ||
9087 Ctrl ==
"row_bcast")
9090 return Ctrl ==
"row_mirror" ||
9091 Ctrl ==
"row_half_mirror" ||
9092 Ctrl ==
"quad_perm" ||
9093 Ctrl ==
"row_shl" ||
9094 Ctrl ==
"row_shr" ||
9099AMDGPUAsmParser::parseDPPCtrlPerm() {
9102 if (!skipToken(
AsmToken::LBrac,
"expected an opening square bracket"))
9106 for (
int i = 0; i < 4; ++i) {
9111 SMLoc Loc = getLoc();
9112 if (getParser().parseAbsoluteExpression(Temp))
9114 if (Temp < 0 || Temp > 3) {
9115 Error(Loc,
"expected a 2-bit value");
9119 Val += (Temp << i * 2);
9129AMDGPUAsmParser::parseDPPCtrlSel(
StringRef Ctrl) {
9130 using namespace AMDGPU::DPP;
9135 SMLoc Loc = getLoc();
9137 if (getParser().parseAbsoluteExpression(Val))
9140 struct DppCtrlCheck {
9147 .
Case(
"wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
9148 .Case(
"wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
9149 .Case(
"wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
9150 .Case(
"wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
9151 .Case(
"row_shl", {DppCtrl::ROW_SHL0, 1, 15})
9152 .Case(
"row_shr", {DppCtrl::ROW_SHR0, 1, 15})
9153 .Case(
"row_ror", {DppCtrl::ROW_ROR0, 1, 15})
9154 .Case(
"row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
9155 .Case(
"row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
9156 .Case(
"row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
9160 if (
Check.Ctrl == -1) {
9161 Valid = (
Ctrl ==
"row_bcast" && (Val == 15 || Val == 31));
9162 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
9177 using namespace AMDGPU::DPP;
9180 !isSupportedDPPCtrl(getTokenStr(),
Operands))
9189 if (Ctrl ==
"row_mirror") {
9190 Val = DppCtrl::ROW_MIRROR;
9191 }
else if (Ctrl ==
"row_half_mirror") {
9192 Val = DppCtrl::ROW_HALF_MIRROR;
9195 if (Ctrl ==
"quad_perm") {
9196 Val = parseDPPCtrlPerm();
9198 Val = parseDPPCtrlSel(Ctrl);
9207 AMDGPUOperand::CreateImm(
this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
9213 OptionalImmIndexMap OptionalIdx;
9223 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
9227 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9228 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
9232 for (
unsigned E =
Operands.size();
I != E; ++
I) {
9236 if (OldIdx == NumOperands) {
9238 constexpr int DST_IDX = 0;
9240 }
else if (Src2ModIdx == NumOperands) {
9251 bool IsVOP3CvtSrDpp =
9252 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
9253 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
9254 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
9255 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12;
9256 if (IsVOP3CvtSrDpp) {
9270 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
9272 if (IsDPP8 &&
Op.isDppFI()) {
9275 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9276 }
else if (
Op.isReg()) {
9277 Op.addRegOperands(Inst, 1);
9278 }
else if (
Op.isImm() &&
9280 assert(!
Op.IsImmKindLiteral() &&
"Cannot use literal with DPP");
9281 Op.addImmOperands(Inst, 1);
9282 }
else if (
Op.isImm()) {
9283 OptionalIdx[
Op.getImmTy()] =
I;
9291 AMDGPUOperand::ImmTyByteSel);
9295 AMDGPUOperand::ImmTyClamp);
9301 cvtVOP3P(Inst,
Operands, OptionalIdx);
9303 cvtVOP3OpSel(Inst,
Operands, OptionalIdx);
9320 AMDGPUOperand::ImmTyDppFI);
9325 OptionalImmIndexMap OptionalIdx;
9329 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9330 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
9334 for (
unsigned E =
Operands.size();
I != E; ++
I) {
9342 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
9344 if (
Op.isReg() && validateVccOperand(
Op.getReg())) {
9352 Op.addImmOperands(Inst, 1);
9354 Op.addRegWithFPInputModsOperands(Inst, 2);
9355 }
else if (
Op.isDppFI()) {
9357 }
else if (
Op.isReg()) {
9358 Op.addRegOperands(Inst, 1);
9364 Op.addRegWithFPInputModsOperands(Inst, 2);
9365 }
else if (
Op.isReg()) {
9366 Op.addRegOperands(Inst, 1);
9367 }
else if (
Op.isDPPCtrl()) {
9368 Op.addImmOperands(Inst, 1);
9369 }
else if (
Op.isImm()) {
9371 OptionalIdx[
Op.getImmTy()] =
I;
9387 AMDGPUOperand::ImmTyDppFI);
9398 AMDGPUOperand::ImmTy
Type) {
9411 .
Case(
"BYTE_0", SdwaSel::BYTE_0)
9412 .
Case(
"BYTE_1", SdwaSel::BYTE_1)
9413 .
Case(
"BYTE_2", SdwaSel::BYTE_2)
9414 .
Case(
"BYTE_3", SdwaSel::BYTE_3)
9415 .
Case(
"WORD_0", SdwaSel::WORD_0)
9416 .
Case(
"WORD_1", SdwaSel::WORD_1)
9417 .
Case(
"DWORD", SdwaSel::DWORD)
9420 if (
Int == 0xffffffff)
9421 return Error(StringLoc,
"invalid " +
Twine(Prefix) +
" value");
9440 .
Case(
"UNUSED_PAD", DstUnused::UNUSED_PAD)
9441 .
Case(
"UNUSED_SEXT", DstUnused::UNUSED_SEXT)
9442 .
Case(
"UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
9445 if (
Int == 0xffffffff)
9446 return Error(StringLoc,
"invalid dst_unused value");
9448 Operands.push_back(AMDGPUOperand::CreateImm(
this,
Int, S, AMDGPUOperand::ImmTySDWADstUnused));
9478 OptionalImmIndexMap OptionalIdx;
9479 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
9480 bool SkippedVcc =
false;
9484 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9485 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
9488 for (
unsigned E =
Operands.size();
I != E; ++
I) {
9489 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
9490 if (SkipVcc && !SkippedVcc &&
Op.isReg() &&
9491 (
Op.getReg() == AMDGPU::VCC ||
Op.getReg() == AMDGPU::VCC_LO)) {
9509 Op.addRegOrImmWithInputModsOperands(Inst, 2);
9510 }
else if (
Op.isImm()) {
9512 OptionalIdx[
Op.getImmTy()] =
I;
9520 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
9521 Opc != AMDGPU::V_NOP_sdwa_vi) {
9523 switch (BasicInstType) {
9527 AMDGPUOperand::ImmTyClamp, 0);
9531 AMDGPUOperand::ImmTyOModSI, 0);
9535 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9539 AMDGPUOperand::ImmTySDWADstUnused,
9540 DstUnused::UNUSED_PRESERVE);
9547 AMDGPUOperand::ImmTyClamp, 0);
9561 AMDGPUOperand::ImmTyClamp, 0);
9567 llvm_unreachable(
"Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
9573 if (Inst.
getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
9574 Inst.
getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
9575 auto it = Inst.
begin();
9588#define GET_REGISTER_MATCHER
9589#define GET_MATCHER_IMPLEMENTATION
9590#define GET_MNEMONIC_SPELL_CHECKER
9591#define GET_MNEMONIC_CHECKER
9592#include "AMDGPUGenAsmMatcher.inc"
9598 return parseTokenOp(
"addr64",
Operands);
9600 return parseTokenOp(
"done",
Operands);
9602 return parseTokenOp(
"idxen",
Operands);
9604 return parseTokenOp(
"lds",
Operands);
9606 return parseTokenOp(
"offen",
Operands);
9608 return parseTokenOp(
"off",
Operands);
9610 return parseTokenOp(
"row_en",
Operands);
9612 return parseNamedBit(
"gds",
Operands, AMDGPUOperand::ImmTyGDS);
9614 return parseNamedBit(
"tfe",
Operands, AMDGPUOperand::ImmTyTFE);
9616 return tryCustomParseOperand(
Operands, MCK);
9627 AMDGPUOperand &Operand = (AMDGPUOperand&)
Op;
9630 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
9632 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
9634 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
9636 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
9638 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
9640 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
9648 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
9650 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
9651 case MCK_SOPPBrTarget:
9652 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
9653 case MCK_VReg32OrOff:
9654 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
9655 case MCK_InterpSlot:
9656 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
9657 case MCK_InterpAttr:
9658 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
9659 case MCK_InterpAttrChan:
9660 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
9662 case MCK_SReg_64_XEXEC:
9668 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
9670 return Match_InvalidOperand;
9687 if (!isUInt<16>(Imm))
9688 return Error(S,
"expected a 16-bit value");
9691 AMDGPUOperand::CreateImm(
this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
9695bool AMDGPUOperand::isEndpgm()
const {
return isImmTy(ImmTyEndpgm); }
9701bool AMDGPUOperand::isSplitBarrier()
const {
return isInlinableImm(MVT::i32); }
unsigned const MachineRegisterInfo * MRI
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static unsigned getSpecialRegForName(StringRef RegName)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, unsigned OpName)
static bool IsRevOpcode(const unsigned Opcode)
static int getRegClass(RegisterKind Is, unsigned RegWidth)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, const MCRegisterInfo *MRI)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value, raw_ostream &Err)
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
#define LLVM_EXTERNAL_VISIBILITY
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static unsigned getOperandSize(MachineInstr &MI, unsigned Idx, MachineRegisterInfo &MRI)
static llvm::Expected< InlineInfo > decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
Interface definition for SIInstrInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
unsigned unsigned DefaultVal
This file implements the SmallBitVector class.
StringSet - A set-like wrapper for the StringMap.
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
support::ulittle16_t & Lo
support::ulittle16_t & Hi
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
Target independent representation for an assembler token.
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
bool is(TokenKind K) const
TokenKind getKind() const
This class represents an Operation in the Expression.
Base class for user error types.
Lightweight error class with error context and mandatory checking.
Tagged union holding either a T or a Error.
Class representing an expression and its matching format.
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
MCAsmParser & getParser()
Generic assembler parser interface, for use by target specific assembly parsers.
virtual MCStreamer & getStreamer()=0
Return the output streamer for the assembler.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Context object for machine code objects.
const MCRegisterInfo * getRegisterInfo() const
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
const MCSubtargetInfo * getSubtargetInfo() const
Base class for the full range of assembler expressions which are needed for parsing.
Instances of this class represent a single low-level machine instruction.
unsigned getNumOperands() const
unsigned getOpcode() const
iterator insert(iterator I, const MCOperand &Op)
void addOperand(const MCOperand Op)
const MCOperand & getOperand(unsigned i) const
Describe properties that are true of each instruction in the target description file.
bool mayStore() const
Return true if this instruction could possibly modify memory.
Interface to description of machine instruction set.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Instances of this class represent operands of the MCInst class.
static MCOperand createReg(unsigned Reg)
static MCOperand createExpr(const MCExpr *Val)
void setReg(unsigned Reg)
Set the register number.
static MCOperand createImm(int64_t Val)
unsigned getReg() const
Returns the register number.
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
virtual bool isReg() const =0
isReg - Is this a register operand?
virtual bool isMem() const =0
isMem - Is this a memory operand?
virtual MCRegister getReg() const =0
virtual bool isToken() const =0
isToken - Is this a token operand?
virtual bool isImm() const =0
isImm - Is this an immediate operand?
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
unsigned getRegister(unsigned i) const
getRegister - Return the specified register in the class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
static constexpr unsigned NoRegister
Streaming machine code generation interface.
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
MCTargetStreamer * getTargetStreamer()
Generic base class for all target subtargets.
const FeatureBitset & getFeatureBits() const
FeatureBitset ToggleFeature(uint64_t FB)
Toggle a feature and return the re-computed feature bits.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
void setVariableValue(const MCExpr *Value)
MCTargetAsmParser - Generic interface to target specific assembly parsers.
MCSubtargetInfo & copySTI()
Create a copy of STI and return a non-const reference to it.
virtual bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
virtual bool ParseDirective(AsmToken DirectiveID)
ParseDirective - Parse a target specific assembler directive This method is deprecated,...
virtual ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
tryParseRegister - parse one register if possible
void setAvailableFeatures(const FeatureBitset &Value)
const MCSubtargetInfo & getSTI() const
virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, unsigned Kind)
Allow a target to add special case operand matching for things that tblgen doesn't/can't handle effec...
virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands)=0
ParseInstruction - Parse one assembly instruction.
virtual unsigned checkTargetMatchPredicate(MCInst &Inst)
checkTargetMatchPredicate - Validate the instruction match against any complex target predicates not ...
virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm)=0
MatchAndEmitInstruction - Recognize a series of operands of a parsed instruction as an actual MCInst ...
Target specific streamer interface.
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
Wrapper class representing virtual and physical registers.
Represents a location in source code.
static SMLoc getFromPointer(const char *Ptr)
constexpr const char * getPointer() const
constexpr bool isValid() const
Represents a range in source code.
Implements a dense probed hash-table based set with some number of buckets stored inline.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringMapEntry - This is used to represent one value that is inserted into a StringMap.
StringRef - Represent a constant reference to a string, i.e.
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
empty - Check if the string is empty.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
constexpr size_t size() const
size - Get the string size.
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
StringSet - A wrapper for StringMap that provides set-like functionality.
bool contains(StringRef key) const
Check if the set contains the given key.
std::pair< typename Base::iterator, bool > insert(StringRef key)
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
A raw_ostream that writes to an SmallVector or SmallString.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
int64_t getHwregId(StringRef Name, const MCSubtargetInfo &STI)
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
@ FIXED_NUM_SGPRS_FOR_INIT_BUG
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
int64_t getMsgOpId(int64_t MsgId, StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a sendmsg operation to the operation portion of the immediate encoding.
int64_t getMsgId(StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a msg_id to the message portion of the immediate encoding.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
ArrayRef< GFXVersion > getGFXVersions()
constexpr unsigned COMPONENTS[]
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
LLVM_READNONE bool isLegalDPALU_DPPControl(unsigned DC)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
unsigned mc2PseudoReg(unsigned Reg)
Convert hardware register Reg to a pseudo register.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isDPALU_DPP(const MCInstrDesc &OpDesc)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
bool isGFX9(const MCSubtargetInfo &STI)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
bool hasMAIInsts(const MCSubtargetInfo &STI)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
bool isSGPR(unsigned Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
bool isHi(unsigned Reg, const MCRegisterInfo &MRI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isInlineValue(unsigned Reg)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_V2INT32
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_INLINE_AC_V2FP16
@ OPERAND_REG_IMM_INT32
Operands with register or 32-bit immediate.
@ OPERAND_REG_IMM_BF16_DEFERRED
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_AC_BF16
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_INLINE_AC_INT16
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_V2INT16
@ OPERAND_REG_INLINE_AC_FP16
@ OPERAND_REG_INLINE_AC_INT32
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_INLINE_AC_V2BF16
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_REG_INLINE_C_V2FP32
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
@ OPERAND_REG_IMM_FP32_DEFERRED
@ OPERAND_REG_IMM_FP16_DEFERRED
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ UNDEF
UNDEF - An undefined node.
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
Reg
All possible values of the reg field in the ModR/M byte.
std::optional< const char * > toString(const std::optional< DWARFFormValue > &V)
Take an optional DWARFFormValue and try to extract a string value from it.
Scope
Defines the scope in which this symbol should be visible: Default – Visible in the public interface o...
This is an optimization pass for GlobalISel generic memory operations.
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
int popcount(T Value) noexcept
Count the number of set bits in a value.
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
void PrintError(const Twine &Msg)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Target & getTheR600Target()
The target for R600 GPUs.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Target & getTheGCNTarget()
The target for GCN GPUs.
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
unsigned M0(unsigned Val)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
void initDefault(const MCSubtargetInfo *STI, MCContext &Ctx, bool InitMCExpr=true)
Instruction set architecture version.
const MCExpr * compute_pgm_rsrc2
const MCExpr * kernarg_size
const MCExpr * kernarg_preload
const MCExpr * compute_pgm_rsrc3
const MCExpr * private_segment_fixed_size
const MCExpr * compute_pgm_rsrc1
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
const MCExpr * group_segment_fixed_size
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
const MCExpr * kernel_code_properties
Represents the counter values to wait for in an s_waitcnt instruction.
static const fltSemantics & IEEEsingle() LLVM_READNONE
static const fltSemantics & IEEEhalf() LLVM_READNONE
static const fltSemantics & BFloat() LLVM_READNONE
opStatus
IEEE-754R 7: Default exception handling.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Description of the encoding of one expression Op.
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...
uint32_t group_segment_fixed_size
uint32_t private_segment_fixed_size