57enum RegisterKind { IS_UNKNOWN,
IS_VGPR, IS_SGPR,
IS_AGPR, IS_TTMP, IS_SPECIAL };
71 SMLoc StartLoc, EndLoc;
72 const AMDGPUAsmParser *AsmParser;
75 AMDGPUOperand(KindTy Kind_,
const AMDGPUAsmParser *AsmParser_)
76 : Kind(Kind_), AsmParser(AsmParser_) {}
78 using Ptr = std::unique_ptr<AMDGPUOperand>;
86 bool hasFPModifiers()
const {
return Abs || Neg; }
87 bool hasIntModifiers()
const {
return Sext; }
88 bool hasModifiers()
const {
return hasFPModifiers() || hasIntModifiers(); }
89 bool isForcedLit()
const {
return Lit == LitModifier::Lit; }
90 bool isForcedLit64()
const {
return Lit == LitModifier::Lit64; }
92 int64_t getFPModifiersOperand()
const {
99 int64_t getIntModifiersOperand()
const {
105 int64_t getModifiersOperand()
const {
106 assert(!(hasFPModifiers() && hasIntModifiers())
107 &&
"fp and int modifiers should not be used simultaneously");
108 if (hasFPModifiers())
109 return getFPModifiersOperand();
110 if (hasIntModifiers())
111 return getIntModifiersOperand();
115 friend raw_ostream &
operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
189 ImmTyMatrixAScaleFmt,
190 ImmTyMatrixBScaleFmt,
223 mutable int MCOpIdx = -1;
226 bool isToken()
const override {
return Kind == Token; }
228 bool isSymbolRefExpr()
const {
232 bool isImm()
const override {
233 return Kind == Immediate;
236 bool isInlinableImm(MVT type)
const;
237 bool isLiteralImm(MVT type)
const;
239 bool isRegKind()
const {
240 return Kind == Register;
243 bool isReg()
const override {
244 return isRegKind() && !hasModifiers();
247 bool isRegOrInline(
unsigned RCID, MVT type)
const {
248 return isRegClass(RCID) || isInlinableImm(type);
252 return isRegOrInline(RCID, type) || isLiteralImm(type);
255 bool isRegOrImmWithInt16InputMods()
const {
259 template <
bool IsFake16>
bool isRegOrImmWithIntT16InputMods()
const {
261 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
264 bool isRegOrImmWithInt32InputMods()
const {
268 bool isRegOrInlineImmWithInt16InputMods()
const {
269 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
272 template <
bool IsFake16>
bool isRegOrInlineImmWithIntT16InputMods()
const {
273 return isRegOrInline(
274 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
277 bool isRegOrInlineImmWithInt32InputMods()
const {
278 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
281 bool isRegOrImmWithInt64InputMods()
const {
285 bool isRegOrImmWithFP16InputMods()
const {
289 template <
bool IsFake16>
bool isRegOrImmWithFPT16InputMods()
const {
291 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
294 bool isRegOrImmWithFP32InputMods()
const {
298 bool isRegOrImmWithFP64InputMods()
const {
302 template <
bool IsFake16>
bool isRegOrInlineImmWithFP16InputMods()
const {
303 return isRegOrInline(
304 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
307 bool isRegOrInlineImmWithFP32InputMods()
const {
308 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
311 bool isRegOrInlineImmWithFP64InputMods()
const {
312 return isRegOrInline(AMDGPU::VS_64RegClassID, MVT::f64);
315 bool isVRegWithInputMods(
unsigned RCID)
const {
return isRegClass(RCID); }
317 bool isVRegWithFP32InputMods()
const {
318 return isVRegWithInputMods(AMDGPU::VGPR_32RegClassID);
321 bool isVRegWithFP64InputMods()
const {
322 return isVRegWithInputMods(AMDGPU::VReg_64RegClassID);
325 bool isPackedFP16InputMods()
const {
329 bool isPackedVGPRFP32InputMods()
const {
333 bool isVReg()
const {
334 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
335 isRegClass(AMDGPU::VReg_64RegClassID) ||
336 isRegClass(AMDGPU::VReg_96RegClassID) ||
337 isRegClass(AMDGPU::VReg_128RegClassID) ||
338 isRegClass(AMDGPU::VReg_160RegClassID) ||
339 isRegClass(AMDGPU::VReg_192RegClassID) ||
340 isRegClass(AMDGPU::VReg_256RegClassID) ||
341 isRegClass(AMDGPU::VReg_512RegClassID) ||
342 isRegClass(AMDGPU::VReg_1024RegClassID);
345 bool isVReg32()
const {
346 return isRegClass(AMDGPU::VGPR_32RegClassID);
349 bool isVReg32OrOff()
const {
350 return isOff() || isVReg32();
354 return isRegKind() &&
getReg() == AMDGPU::SGPR_NULL;
357 bool isAV_LdSt_32_Align2_RegOp()
const {
358 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
359 isRegClass(AMDGPU::AGPR_32RegClassID);
362 bool isVRegWithInputMods()
const;
363 template <
bool IsFake16>
bool isT16_Lo128VRegWithInputMods()
const;
364 template <
bool IsFake16>
bool isT16VRegWithInputMods()
const;
366 bool isSDWAOperand(MVT type)
const;
367 bool isSDWAFP16Operand()
const;
368 bool isSDWAFP32Operand()
const;
369 bool isSDWAInt16Operand()
const;
370 bool isSDWAInt32Operand()
const;
372 bool isImmTy(ImmTy ImmT)
const {
373 return isImm() &&
Imm.Type == ImmT;
376 template <ImmTy Ty>
bool isImmTy()
const {
return isImmTy(Ty); }
378 bool isImmLiteral()
const {
return isImmTy(ImmTyNone); }
380 bool isImmModifier()
const {
381 return isImm() &&
Imm.Type != ImmTyNone;
384 bool isOModSI()
const {
return isImmTy(ImmTyOModSI); }
385 bool isDim()
const {
return isImmTy(ImmTyDim); }
386 bool isR128A16()
const {
return isImmTy(ImmTyR128A16); }
387 bool isOff()
const {
return isImmTy(ImmTyOff); }
388 bool isExpTgt()
const {
return isImmTy(ImmTyExpTgt); }
389 bool isOffen()
const {
return isImmTy(ImmTyOffen); }
390 bool isIdxen()
const {
return isImmTy(ImmTyIdxen); }
391 bool isAddr64()
const {
return isImmTy(ImmTyAddr64); }
392 bool isSMEMOffsetMod()
const {
return isImmTy(ImmTySMEMOffsetMod); }
393 bool isFlatOffset()
const {
return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
394 bool isGDS()
const {
return isImmTy(ImmTyGDS); }
395 bool isLDS()
const {
return isImmTy(ImmTyLDS); }
396 bool isCPol()
const {
return isImmTy(ImmTyCPol); }
397 bool isIndexKey8bit()
const {
return isImmTy(ImmTyIndexKey8bit); }
398 bool isIndexKey16bit()
const {
return isImmTy(ImmTyIndexKey16bit); }
399 bool isIndexKey32bit()
const {
return isImmTy(ImmTyIndexKey32bit); }
400 bool isMatrixAFMT()
const {
return isImmTy(ImmTyMatrixAFMT); }
401 bool isMatrixBFMT()
const {
return isImmTy(ImmTyMatrixBFMT); }
402 bool isMatrixAScale()
const {
return isImmTy(ImmTyMatrixAScale); }
403 bool isMatrixBScale()
const {
return isImmTy(ImmTyMatrixBScale); }
404 bool isMatrixAScaleFmt()
const {
return isImmTy(ImmTyMatrixAScaleFmt); }
405 bool isMatrixBScaleFmt()
const {
return isImmTy(ImmTyMatrixBScaleFmt); }
406 bool isMatrixAReuse()
const {
return isImmTy(ImmTyMatrixAReuse); }
407 bool isMatrixBReuse()
const {
return isImmTy(ImmTyMatrixBReuse); }
408 bool isTFE()
const {
return isImmTy(ImmTyTFE); }
409 bool isFORMAT()
const {
return isImmTy(ImmTyFORMAT) &&
isUInt<7>(
getImm()); }
410 bool isDppFI()
const {
return isImmTy(ImmTyDppFI); }
411 bool isSDWADstSel()
const {
return isImmTy(ImmTySDWADstSel); }
412 bool isSDWASrc0Sel()
const {
return isImmTy(ImmTySDWASrc0Sel); }
413 bool isSDWASrc1Sel()
const {
return isImmTy(ImmTySDWASrc1Sel); }
414 bool isSDWADstUnused()
const {
return isImmTy(ImmTySDWADstUnused); }
415 bool isInterpSlot()
const {
return isImmTy(ImmTyInterpSlot); }
416 bool isInterpAttr()
const {
return isImmTy(ImmTyInterpAttr); }
417 bool isInterpAttrChan()
const {
return isImmTy(ImmTyInterpAttrChan); }
418 bool isOpSel()
const {
return isImmTy(ImmTyOpSel); }
419 bool isOpSelHi()
const {
return isImmTy(ImmTyOpSelHi); }
420 bool isNegLo()
const {
return isImmTy(ImmTyNegLo); }
421 bool isNegHi()
const {
return isImmTy(ImmTyNegHi); }
422 bool isBitOp3()
const {
return isImmTy(ImmTyBitOp3) &&
isUInt<8>(
getImm()); }
423 bool isDone()
const {
return isImmTy(ImmTyDone); }
424 bool isRowEn()
const {
return isImmTy(ImmTyRowEn); }
426 bool isRegOrImm()
const {
427 return isReg() || isImm();
430 bool isRegClass(
unsigned RCID)
const;
434 bool isRegOrInlineNoMods(
unsigned RCID, MVT type)
const {
435 return isRegOrInline(RCID, type) && !hasModifiers();
438 bool isSCSrcB16()
const {
439 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
442 bool isSCSrcV2B16()
const {
446 bool isSCSrc_b32()
const {
447 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
450 bool isSCSrc_b64()
const {
451 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
454 bool isBoolReg()
const;
456 bool isSCSrcF16()
const {
457 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
460 bool isSCSrcV2F16()
const {
464 bool isSCSrcF32()
const {
465 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
468 bool isSCSrcF64()
const {
469 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
472 bool isSSrc_b32()
const {
473 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
476 bool isSSrc_b16()
const {
return isSCSrcB16() || isLiteralImm(MVT::i16); }
478 bool isSSrcV2B16()
const {
483 bool isSSrc_b64()
const {
486 return isSCSrc_b64() || isLiteralImm(MVT::i64) ||
487 (((
const MCTargetAsmParser *)AsmParser)
488 ->getAvailableFeatures()[AMDGPU::Feature64BitLiterals] &&
492 bool isSSrc_f32()
const {
493 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
496 bool isSSrcF64()
const {
return isSCSrc_b64() || isLiteralImm(MVT::f64); }
498 bool isSSrc_bf16()
const {
return isSCSrcB16() || isLiteralImm(MVT::bf16); }
500 bool isSSrc_f16()
const {
return isSCSrcB16() || isLiteralImm(MVT::f16); }
502 bool isSSrcV2F16()
const {
507 bool isSSrcV2FP32()
const {
512 bool isSCSrcV2FP32()
const {
517 bool isSSrcV2INT32()
const {
522 bool isSCSrcV2INT32()
const {
524 return isSCSrc_b32();
527 bool isSSrcOrLds_b32()
const {
528 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
529 isLiteralImm(MVT::i32) || isExpr();
532 bool isVCSrc_b32()
const {
533 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
536 bool isVCSrc_b32_Lo256()
const {
537 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo256RegClassID, MVT::i32);
540 bool isVCSrc_b64_Lo256()
const {
541 return isRegOrInlineNoMods(AMDGPU::VS_64_Lo256RegClassID, MVT::i64);
544 bool isVCSrc_b64()
const {
545 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
548 bool isVCSrcT_b16()
const {
549 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
552 bool isVCSrcTB16_Lo128()
const {
553 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
556 bool isVCSrcFake16B16_Lo128()
const {
557 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
560 bool isVCSrc_b16()
const {
561 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
564 bool isVCSrc_v2b16()
const {
return isVCSrc_b16(); }
566 bool isVCSrc_f32()
const {
567 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
570 bool isVCSrc_f64()
const {
571 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
574 bool isVCSrcTBF16()
const {
575 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
578 bool isVCSrcT_f16()
const {
579 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
582 bool isVCSrcT_bf16()
const {
583 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
586 bool isVCSrcTBF16_Lo128()
const {
587 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
590 bool isVCSrcTF16_Lo128()
const {
591 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
594 bool isVCSrcFake16BF16_Lo128()
const {
595 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
598 bool isVCSrcFake16F16_Lo128()
const {
599 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
602 bool isVCSrc_bf16()
const {
603 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
606 bool isVCSrc_f16()
const {
607 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
610 bool isVCSrc_v2bf16()
const {
return isVCSrc_bf16(); }
612 bool isVCSrc_v2f16()
const {
return isVCSrc_f16(); }
614 bool isVSrc_b32()
const {
615 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
618 bool isVSrc_b64()
const {
return isVCSrc_f64() || isLiteralImm(MVT::i64); }
620 bool isVSrcT_b16()
const {
return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
622 bool isVSrcT_b16_Lo128()
const {
623 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
626 bool isVSrcFake16_b16_Lo128()
const {
627 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
630 bool isVSrc_b16()
const {
return isVCSrc_b16() || isLiteralImm(MVT::i16); }
632 bool isVSrc_v2b16()
const {
return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
634 bool isVCSrcV2FP32()
const {
return isVCSrc_f64(); }
636 bool isVSrc_v2f32()
const {
return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
638 bool isVCSrc_v2b32()
const {
return isVCSrc_b64(); }
640 bool isVSrc_v2b32()
const {
return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
642 bool isVSrc_f32()
const {
643 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
646 bool isVSrc_f64()
const {
return isVCSrc_f64() || isLiteralImm(MVT::f64); }
648 bool isVSrcT_bf16()
const {
return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
650 bool isVSrcT_f16()
const {
return isVCSrcT_f16() || isLiteralImm(MVT::f16); }
652 bool isVSrcT_bf16_Lo128()
const {
653 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
656 bool isVSrcT_f16_Lo128()
const {
657 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
660 bool isVSrcFake16_bf16_Lo128()
const {
661 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
664 bool isVSrcFake16_f16_Lo128()
const {
665 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
668 bool isVSrc_bf16()
const {
return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
670 bool isVSrc_f16()
const {
return isVCSrc_f16() || isLiteralImm(MVT::f16); }
672 bool isVSrc_v2bf16()
const {
673 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
676 bool isVSrc_v2f16()
const {
return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
678 bool isVSrc_v2f16_splat()
const {
return isVSrc_v2f16(); }
680 bool isVSrc_NoInline_v2f16()
const {
return isVSrc_v2f16(); }
682 bool isVISrcB32()
const {
683 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
686 bool isVISrcB16()
const {
687 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
690 bool isVISrcV2B16()
const {
694 bool isVISrcF32()
const {
695 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
698 bool isVISrcF16()
const {
699 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
702 bool isVISrcV2F16()
const {
703 return isVISrcF16() || isVISrcB32();
706 bool isVISrc_64_bf16()
const {
707 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
710 bool isVISrc_64_f16()
const {
711 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
714 bool isVISrc_64_b32()
const {
715 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
718 bool isVISrc_64B64()
const {
719 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
722 bool isVISrc_64_f64()
const {
723 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
726 bool isVISrc_64V2FP32()
const {
727 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
730 bool isVISrc_64V2INT32()
const {
731 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
734 bool isVISrc_256_b32()
const {
735 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
738 bool isVISrc_256_f32()
const {
739 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
742 bool isVISrc_256B64()
const {
743 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
746 bool isVISrc_256_f64()
const {
747 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
750 bool isVISrc_512_f64()
const {
751 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f64);
754 bool isVISrc_128B16()
const {
755 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
758 bool isVISrc_128V2B16()
const {
759 return isVISrc_128B16();
762 bool isVISrc_128_b32()
const {
763 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
766 bool isVISrc_128_f32()
const {
767 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
770 bool isVISrc_256V2FP32()
const {
771 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
774 bool isVISrc_256V2INT32()
const {
775 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
778 bool isVISrc_512_b32()
const {
779 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
782 bool isVISrc_512B16()
const {
783 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
786 bool isVISrc_512V2B16()
const {
787 return isVISrc_512B16();
790 bool isVISrc_512_f32()
const {
791 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
794 bool isVISrc_512F16()
const {
795 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
798 bool isVISrc_512V2F16()
const {
799 return isVISrc_512F16() || isVISrc_512_b32();
802 bool isVISrc_1024_b32()
const {
803 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
806 bool isVISrc_1024B16()
const {
807 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
810 bool isVISrc_1024V2B16()
const {
811 return isVISrc_1024B16();
814 bool isVISrc_1024_f32()
const {
815 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
818 bool isVISrc_1024F16()
const {
819 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
822 bool isVISrc_1024V2F16()
const {
823 return isVISrc_1024F16() || isVISrc_1024_b32();
826 bool isAISrcB32()
const {
827 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
830 bool isAISrcB16()
const {
831 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
834 bool isAISrcV2B16()
const {
838 bool isAISrcF32()
const {
839 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
842 bool isAISrcF16()
const {
843 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
846 bool isAISrcV2F16()
const {
847 return isAISrcF16() || isAISrcB32();
850 bool isAISrc_64B64()
const {
851 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
854 bool isAISrc_64_f64()
const {
855 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
858 bool isAISrc_128_b32()
const {
859 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
862 bool isAISrc_128B16()
const {
863 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
866 bool isAISrc_128V2B16()
const {
867 return isAISrc_128B16();
870 bool isAISrc_128_f32()
const {
871 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
874 bool isAISrc_128F16()
const {
875 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
878 bool isAISrc_128V2F16()
const {
879 return isAISrc_128F16() || isAISrc_128_b32();
882 bool isVISrc_128_bf16()
const {
883 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
886 bool isVISrc_128_f16()
const {
887 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
890 bool isVISrc_128V2F16()
const {
891 return isVISrc_128_f16() || isVISrc_128_b32();
894 bool isAISrc_256B64()
const {
895 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
898 bool isAISrc_256_f64()
const {
899 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
902 bool isAISrc_512_b32()
const {
903 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
906 bool isAISrc_512B16()
const {
907 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
910 bool isAISrc_512V2B16()
const {
911 return isAISrc_512B16();
914 bool isAISrc_512_f32()
const {
915 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
918 bool isAISrc_512F16()
const {
919 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
922 bool isAISrc_512V2F16()
const {
923 return isAISrc_512F16() || isAISrc_512_b32();
926 bool isAISrc_1024_b32()
const {
927 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
930 bool isAISrc_1024B16()
const {
931 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
934 bool isAISrc_1024V2B16()
const {
935 return isAISrc_1024B16();
938 bool isAISrc_1024_f32()
const {
939 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
942 bool isAISrc_1024F16()
const {
943 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
946 bool isAISrc_1024V2F16()
const {
947 return isAISrc_1024F16() || isAISrc_1024_b32();
950 bool isKImmFP32()
const {
951 return isLiteralImm(MVT::f32);
954 bool isKImmFP16()
const {
955 return isLiteralImm(MVT::f16);
958 bool isKImmFP64()
const {
return isLiteralImm(MVT::f64); }
960 bool isMem()
const override {
964 bool isExpr()
const {
965 return Kind == Expression;
968 bool isSOPPBrTarget()
const {
return isExpr() || isImm(); }
970 bool isSWaitCnt()
const;
971 bool isDepCtr()
const;
972 bool isSDelayALU()
const;
973 bool isHwreg()
const;
974 bool isSendMsg()
const;
975 bool isWaitEvent()
const;
976 bool isSplitBarrier()
const;
977 bool isSwizzle()
const;
978 bool isSMRDOffset8()
const;
979 bool isSMEMOffset()
const;
980 bool isSMRDLiteralOffset()
const;
982 bool isDPPCtrl()
const;
984 bool isGPRIdxMode()
const;
985 bool isS16Imm()
const;
986 bool isU16Imm()
const;
987 bool isEndpgm()
const;
989 auto getPredicate(std::function<
bool(
const AMDGPUOperand &
Op)>
P)
const {
990 return [
this,
P]() {
return P(*
this); };
995 return StringRef(Tok.Data, Tok.Length);
1003 void setImm(int64_t Val) {
1008 ImmTy getImmTy()
const {
1013 MCRegister
getReg()
const override {
1018 SMLoc getStartLoc()
const override {
1022 SMLoc getEndLoc()
const override {
1026 SMRange getLocRange()
const {
1027 return SMRange(StartLoc, EndLoc);
1030 int getMCOpIdx()
const {
return MCOpIdx; }
1032 Modifiers getModifiers()
const {
1033 assert(isRegKind() || isImmTy(ImmTyNone));
1034 return isRegKind() ?
Reg.Mods :
Imm.Mods;
1037 void setModifiers(Modifiers Mods) {
1038 assert(isRegKind() || isImmTy(ImmTyNone));
1045 bool hasModifiers()
const {
1046 return getModifiers().hasModifiers();
1049 bool hasFPModifiers()
const {
1050 return getModifiers().hasFPModifiers();
1053 bool hasIntModifiers()
const {
1054 return getModifiers().hasIntModifiers();
1057 bool isForcedLit()
const {
1058 return isImmLiteral() && getModifiers().isForcedLit();
1061 bool isForcedLit64()
const {
1062 return isImmLiteral() && getModifiers().isForcedLit64();
1065 uint64_t applyInputFPModifiers(uint64_t Val,
unsigned Size)
const;
1067 void addImmOperands(MCInst &Inst,
unsigned N,
bool ApplyModifiers =
true)
const;
1069 void addLiteralImmOperand(MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const;
1071 void addRegOperands(MCInst &Inst,
unsigned N)
const;
1073 void addRegOrImmOperands(MCInst &Inst,
unsigned N)
const {
1075 addRegOperands(Inst,
N);
1077 addImmOperands(Inst,
N);
1080 void addRegOrImmWithInputModsOperands(MCInst &Inst,
unsigned N)
const {
1081 Modifiers Mods = getModifiers();
1084 addRegOperands(Inst,
N);
1086 addImmOperands(Inst,
N,
false);
1090 void addRegOrImmWithFPInputModsOperands(MCInst &Inst,
unsigned N)
const {
1091 assert(!hasIntModifiers());
1092 addRegOrImmWithInputModsOperands(Inst,
N);
1095 void addRegOrImmWithIntInputModsOperands(MCInst &Inst,
unsigned N)
const {
1096 assert(!hasFPModifiers());
1097 addRegOrImmWithInputModsOperands(Inst,
N);
1100 void addRegWithInputModsOperands(MCInst &Inst,
unsigned N)
const {
1101 Modifiers Mods = getModifiers();
1104 addRegOperands(Inst,
N);
1107 void addRegWithFPInputModsOperands(MCInst &Inst,
unsigned N)
const {
1108 assert(!hasIntModifiers());
1109 addRegWithInputModsOperands(Inst,
N);
1112 void addRegWithIntInputModsOperands(MCInst &Inst,
unsigned N)
const {
1113 assert(!hasFPModifiers());
1114 addRegWithInputModsOperands(Inst,
N);
1117 static void printImmTy(raw_ostream& OS, ImmTy
Type) {
1120 case ImmTyNone: OS <<
"None";
break;
1121 case ImmTyGDS: OS <<
"GDS";
break;
1122 case ImmTyLDS: OS <<
"LDS";
break;
1123 case ImmTyOffen: OS <<
"Offen";
break;
1124 case ImmTyIdxen: OS <<
"Idxen";
break;
1125 case ImmTyAddr64: OS <<
"Addr64";
break;
1126 case ImmTyOffset: OS <<
"Offset";
break;
1127 case ImmTyInstOffset: OS <<
"InstOffset";
break;
1128 case ImmTyOffset0: OS <<
"Offset0";
break;
1129 case ImmTyOffset1: OS <<
"Offset1";
break;
1130 case ImmTySMEMOffsetMod: OS <<
"SMEMOffsetMod";
break;
1131 case ImmTyCPol: OS <<
"CPol";
break;
1132 case ImmTyIndexKey8bit: OS <<
"index_key";
break;
1133 case ImmTyIndexKey16bit: OS <<
"index_key";
break;
1134 case ImmTyIndexKey32bit: OS <<
"index_key";
break;
1135 case ImmTyTFE: OS <<
"TFE";
break;
1136 case ImmTyIsAsync: OS <<
"IsAsync";
break;
1137 case ImmTyD16: OS <<
"D16";
break;
1138 case ImmTyFORMAT: OS <<
"FORMAT";
break;
1139 case ImmTyClamp: OS <<
"Clamp";
break;
1140 case ImmTyOModSI: OS <<
"OModSI";
break;
1141 case ImmTyDPP8: OS <<
"DPP8";
break;
1142 case ImmTyDppCtrl: OS <<
"DppCtrl";
break;
1143 case ImmTyDppRowMask: OS <<
"DppRowMask";
break;
1144 case ImmTyDppBankMask: OS <<
"DppBankMask";
break;
1145 case ImmTyDppBoundCtrl: OS <<
"DppBoundCtrl";
break;
1146 case ImmTyDppFI: OS <<
"DppFI";
break;
1147 case ImmTySDWADstSel: OS <<
"SDWADstSel";
break;
1148 case ImmTySDWASrc0Sel: OS <<
"SDWASrc0Sel";
break;
1149 case ImmTySDWASrc1Sel: OS <<
"SDWASrc1Sel";
break;
1150 case ImmTySDWADstUnused: OS <<
"SDWADstUnused";
break;
1151 case ImmTyDMask: OS <<
"DMask";
break;
1152 case ImmTyDim: OS <<
"Dim";
break;
1153 case ImmTyUNorm: OS <<
"UNorm";
break;
1154 case ImmTyDA: OS <<
"DA";
break;
1155 case ImmTyR128A16: OS <<
"R128A16";
break;
1156 case ImmTyA16: OS <<
"A16";
break;
1157 case ImmTyLWE: OS <<
"LWE";
break;
1158 case ImmTyOff: OS <<
"Off";
break;
1159 case ImmTyExpTgt: OS <<
"ExpTgt";
break;
1160 case ImmTyExpCompr: OS <<
"ExpCompr";
break;
1161 case ImmTyExpVM: OS <<
"ExpVM";
break;
1162 case ImmTyDone: OS <<
"Done";
break;
1163 case ImmTyRowEn: OS <<
"RowEn";
break;
1164 case ImmTyHwreg: OS <<
"Hwreg";
break;
1165 case ImmTySendMsg: OS <<
"SendMsg";
break;
1166 case ImmTyWaitEvent: OS <<
"WaitEvent";
break;
1167 case ImmTyInterpSlot: OS <<
"InterpSlot";
break;
1168 case ImmTyInterpAttr: OS <<
"InterpAttr";
break;
1169 case ImmTyInterpAttrChan: OS <<
"InterpAttrChan";
break;
1170 case ImmTyOpSel: OS <<
"OpSel";
break;
1171 case ImmTyOpSelHi: OS <<
"OpSelHi";
break;
1172 case ImmTyNegLo: OS <<
"NegLo";
break;
1173 case ImmTyNegHi: OS <<
"NegHi";
break;
1174 case ImmTySwizzle: OS <<
"Swizzle";
break;
1175 case ImmTyGprIdxMode: OS <<
"GprIdxMode";
break;
1176 case ImmTyHigh: OS <<
"High";
break;
1177 case ImmTyBLGP: OS <<
"BLGP";
break;
1178 case ImmTyCBSZ: OS <<
"CBSZ";
break;
1179 case ImmTyABID: OS <<
"ABID";
break;
1180 case ImmTyEndpgm: OS <<
"Endpgm";
break;
1181 case ImmTyWaitVDST: OS <<
"WaitVDST";
break;
1182 case ImmTyWaitEXP: OS <<
"WaitEXP";
break;
1183 case ImmTyWaitVAVDst: OS <<
"WaitVAVDst";
break;
1184 case ImmTyWaitVMVSrc: OS <<
"WaitVMVSrc";
break;
1185 case ImmTyBitOp3: OS <<
"BitOp3";
break;
1186 case ImmTyMatrixAFMT: OS <<
"ImmTyMatrixAFMT";
break;
1187 case ImmTyMatrixBFMT: OS <<
"ImmTyMatrixBFMT";
break;
1188 case ImmTyMatrixAScale: OS <<
"ImmTyMatrixAScale";
break;
1189 case ImmTyMatrixBScale: OS <<
"ImmTyMatrixBScale";
break;
1190 case ImmTyMatrixAScaleFmt: OS <<
"ImmTyMatrixAScaleFmt";
break;
1191 case ImmTyMatrixBScaleFmt: OS <<
"ImmTyMatrixBScaleFmt";
break;
1192 case ImmTyMatrixAReuse: OS <<
"ImmTyMatrixAReuse";
break;
1193 case ImmTyMatrixBReuse: OS <<
"ImmTyMatrixBReuse";
break;
1194 case ImmTyScaleSel: OS <<
"ScaleSel" ;
break;
1195 case ImmTyByteSel: OS <<
"ByteSel" ;
break;
1200 void print(raw_ostream &OS,
const MCAsmInfo &MAI)
const override {
1204 <<
" mods: " <<
Reg.Mods <<
'>';
1208 if (getImmTy() != ImmTyNone) {
1209 OS <<
" type: "; printImmTy(OS, getImmTy());
1211 OS <<
" mods: " <<
Imm.Mods <<
'>';
1224 static AMDGPUOperand::Ptr CreateImm(
const AMDGPUAsmParser *AsmParser,
1225 int64_t Val, SMLoc Loc,
1226 ImmTy
Type = ImmTyNone,
1227 bool IsFPImm =
false) {
1228 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1230 Op->Imm.IsFPImm = IsFPImm;
1232 Op->Imm.Mods = Modifiers();
1238 static AMDGPUOperand::Ptr CreateToken(
const AMDGPUAsmParser *AsmParser,
1239 StringRef Str, SMLoc Loc,
1240 bool HasExplicitEncodingSize =
true) {
1241 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1242 Res->Tok.Data = Str.data();
1243 Res->Tok.Length = Str.size();
1244 Res->StartLoc = Loc;
1249 static AMDGPUOperand::Ptr CreateReg(
const AMDGPUAsmParser *AsmParser,
1250 MCRegister
Reg, SMLoc S, SMLoc
E) {
1251 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1252 Op->Reg.RegNo =
Reg;
1253 Op->Reg.Mods = Modifiers();
1259 static AMDGPUOperand::Ptr CreateExpr(
const AMDGPUAsmParser *AsmParser,
1260 const class MCExpr *Expr, SMLoc S) {
1261 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1270 OS <<
"abs:" << Mods.Abs <<
" neg: " << Mods.Neg <<
" sext:" << Mods.Sext;
1279#define GET_REGISTER_MATCHER
1280#include "AMDGPUGenAsmMatcher.inc"
1281#undef GET_REGISTER_MATCHER
1282#undef GET_SUBTARGET_FEATURE_NAME
1287class KernelScopeInfo {
1288 int SgprIndexUnusedMin = -1;
1289 int VgprIndexUnusedMin = -1;
1290 int AgprIndexUnusedMin = -1;
1294 void usesSgprAt(
int i) {
1295 if (i >= SgprIndexUnusedMin) {
1296 SgprIndexUnusedMin = ++i;
1299 Ctx->getOrCreateSymbol(
Twine(
".kernel.sgpr_count"));
1305 void usesVgprAt(
int i) {
1306 if (i >= VgprIndexUnusedMin) {
1307 VgprIndexUnusedMin = ++i;
1310 Ctx->getOrCreateSymbol(
Twine(
".kernel.vgpr_count"));
1312 VgprIndexUnusedMin);
1318 void usesAgprAt(
int i) {
1323 if (i >= AgprIndexUnusedMin) {
1324 AgprIndexUnusedMin = ++i;
1327 Ctx->getOrCreateSymbol(
Twine(
".kernel.agpr_count"));
1332 Ctx->getOrCreateSymbol(
Twine(
".kernel.vgpr_count"));
1334 VgprIndexUnusedMin);
1341 KernelScopeInfo() =
default;
1345 MSTI = Ctx->getSubtargetInfo();
1347 usesSgprAt(SgprIndexUnusedMin = -1);
1348 usesVgprAt(VgprIndexUnusedMin = -1);
1350 usesAgprAt(AgprIndexUnusedMin = -1);
1354 void usesRegister(RegisterKind RegKind,
unsigned DwordRegIndex,
1355 unsigned RegWidth) {
1358 usesSgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1361 usesAgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1364 usesVgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1373 MCAsmParser &Parser;
1375 unsigned ForcedEncodingSize = 0;
1376 bool ForcedDPP =
false;
1377 bool ForcedSDWA =
false;
1378 KernelScopeInfo KernelScope;
1379 const unsigned HwMode;
1384#define GET_ASSEMBLER_HEADER
1385#include "AMDGPUGenAsmMatcher.inc"
1390 unsigned getRegOperandSize(
const MCInstrDesc &
Desc,
unsigned OpNo)
const {
1392 int16_t RCID = MII.getOpRegClassID(
Desc.operands()[OpNo], HwMode);
1396 std::optional<AMDGPU::InfoSectionData> InfoData;
1399 void createConstantSymbol(StringRef Id, int64_t Val);
1401 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1402 bool OutOfRangeError(SMRange
Range);
1418 bool calculateGPRBlocks(
const FeatureBitset &Features,
const MCExpr *VCCUsed,
1419 const MCExpr *FlatScrUsed,
bool XNACKUsed,
1420 std::optional<bool> EnableWavefrontSize32,
1421 const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1422 const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1423 const MCExpr *&VGPRBlocks,
const MCExpr *&SGPRBlocks);
1424 bool ParseDirectiveAMDGCNTarget();
1425 bool ParseDirectiveAMDHSACodeObjectVersion();
1426 bool ParseDirectiveAMDHSAKernel();
1427 bool ParseAMDKernelCodeTValue(StringRef
ID, AMDGPUMCKernelCodeT &Header);
1428 bool ParseDirectiveAMDKernelCodeT();
1430 bool subtargetHasRegister(
const MCRegisterInfo &MRI, MCRegister
Reg);
1431 bool ParseDirectiveAMDGPUHsaKernel();
1433 bool ParseDirectiveISAVersion();
1434 bool ParseDirectiveHSAMetadata();
1435 bool ParseDirectivePALMetadataBegin();
1436 bool ParseDirectivePALMetadata();
1437 bool ParseDirectiveAMDGPULDS();
1438 bool ParseDirectiveAMDGPUInfo();
1442 bool ParseToEndDirective(
const char *AssemblerDirectiveBegin,
1443 const char *AssemblerDirectiveEnd,
1444 std::string &CollectString);
1446 bool AddNextRegisterToList(MCRegister &
Reg,
unsigned &RegWidth,
1447 RegisterKind RegKind, MCRegister Reg1, SMLoc Loc);
1448 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &
Reg,
1449 unsigned &RegNum,
unsigned &RegWidth,
1450 bool RestoreOnFailure =
false);
1451 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &
Reg,
1452 unsigned &RegNum,
unsigned &RegWidth,
1453 SmallVectorImpl<AsmToken> &Tokens);
1454 MCRegister ParseRegularReg(RegisterKind &RegKind,
unsigned &RegNum,
1456 SmallVectorImpl<AsmToken> &Tokens);
1457 MCRegister ParseSpecialReg(RegisterKind &RegKind,
unsigned &RegNum,
1459 SmallVectorImpl<AsmToken> &Tokens);
1460 MCRegister ParseRegList(RegisterKind &RegKind,
unsigned &RegNum,
1462 SmallVectorImpl<AsmToken> &Tokens);
1463 bool ParseRegRange(
unsigned &Num,
unsigned &Width,
unsigned &SubReg);
1464 MCRegister getRegularReg(RegisterKind RegKind,
unsigned RegNum,
1465 unsigned SubReg,
unsigned RegWidth, SMLoc Loc);
1468 bool isRegister(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1469 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1470 void initializeGprCountSymbol(RegisterKind RegKind);
1471 bool updateGprCountSymbols(RegisterKind RegKind,
unsigned DwordRegIndex,
1473 void cvtMubufImpl(MCInst &Inst,
const OperandVector &Operands,
1478 OperandMode_Default,
1482 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1484 AMDGPUAsmParser(
const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1485 const MCInstrInfo &MII)
1486 : MCTargetAsmParser(STI, MII), Parser(_Parser),
1487 HwMode(STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo)) {
1490 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1494 createConstantSymbol(
".amdgcn.gfx_generation_number",
ISA.Major);
1495 createConstantSymbol(
".amdgcn.gfx_generation_minor",
ISA.Minor);
1496 createConstantSymbol(
".amdgcn.gfx_generation_stepping",
ISA.Stepping);
1498 createConstantSymbol(
".option.machine_version_major",
ISA.Major);
1499 createConstantSymbol(
".option.machine_version_minor",
ISA.Minor);
1500 createConstantSymbol(
".option.machine_version_stepping",
ISA.Stepping);
1503 initializeGprCountSymbol(IS_VGPR);
1504 initializeGprCountSymbol(IS_SGPR);
1509 createConstantSymbol(Symbol, Code);
1511 createConstantSymbol(
"UC_VERSION_W64_BIT", 0x2000);
1512 createConstantSymbol(
"UC_VERSION_W32_BIT", 0x4000);
1513 createConstantSymbol(
"UC_VERSION_MDP_BIT", 0x8000);
1591 bool isWave32()
const {
return getAvailableFeatures()[Feature_isWave32Bit]; }
1593 bool isWave64()
const {
return getAvailableFeatures()[Feature_isWave64Bit]; }
1595 bool hasInv2PiInlineImm()
const {
1596 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1599 bool has64BitLiterals()
const {
1600 return getFeatureBits()[AMDGPU::Feature64BitLiterals];
1603 bool hasFlatOffsets()
const {
1604 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1607 bool hasTrue16Insts()
const {
1608 return getFeatureBits()[AMDGPU::FeatureTrue16BitInsts];
1612 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1615 bool hasSGPR102_SGPR103()
const {
1619 bool hasSGPR104_SGPR105()
const {
return isGFX10Plus(); }
1621 bool hasIntClamp()
const {
1622 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1625 bool hasPartialNSAEncoding()
const {
1626 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1629 bool hasGloballyAddressableScratch()
const {
1630 return getFeatureBits()[AMDGPU::FeatureGloballyAddressableScratch];
1643 AMDGPUTargetStreamer &getTargetStreamer() {
1644 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1645 return static_cast<AMDGPUTargetStreamer &
>(TS);
1651 return const_cast<AMDGPUAsmParser *
>(
this)->MCTargetAsmParser::getContext();
1654 const MCRegisterInfo *getMRI()
const {
1658 const MCInstrInfo *getMII()
const {
1664 const FeatureBitset &getFeatureBits()
const {
1665 return getSTI().getFeatureBits();
1668 void setForcedEncodingSize(
unsigned Size) { ForcedEncodingSize =
Size; }
1669 void setForcedDPP(
bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1670 void setForcedSDWA(
bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1672 unsigned getForcedEncodingSize()
const {
return ForcedEncodingSize; }
1673 bool isForcedVOP3()
const {
return ForcedEncodingSize == 64; }
1674 bool isForcedDPP()
const {
return ForcedDPP; }
1675 bool isForcedSDWA()
const {
return ForcedSDWA; }
1676 ArrayRef<unsigned> getMatchedVariants()
const;
1677 StringRef getMatchedVariantName()
const;
1679 std::unique_ptr<AMDGPUOperand> parseRegister(
bool RestoreOnFailure =
false);
1680 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1681 bool RestoreOnFailure);
1682 bool parseRegister(MCRegister &
Reg, SMLoc &StartLoc, SMLoc &EndLoc)
override;
1683 ParseStatus tryParseRegister(MCRegister &
Reg, SMLoc &StartLoc,
1684 SMLoc &EndLoc)
override;
1685 unsigned checkTargetMatchPredicate(MCInst &Inst)
override;
1686 unsigned validateTargetOperandClass(MCParsedAsmOperand &
Op,
1687 unsigned Kind)
override;
1688 bool matchAndEmitInstruction(SMLoc IDLoc,
unsigned &Opcode,
1690 uint64_t &ErrorInfo,
1691 bool MatchingInlineAsm)
override;
1692 bool ParseDirective(AsmToken DirectiveID)
override;
1693 void onEndOfFile()
override;
1694 ParseStatus parseOperand(
OperandVector &Operands, StringRef Mnemonic,
1695 OperandMode
Mode = OperandMode_Default);
1696 StringRef parseMnemonicSuffix(StringRef Name);
1697 bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
1701 ParseStatus parseTokenOp(StringRef Name,
OperandVector &Operands);
1703 ParseStatus parseIntWithPrefix(
const char *Prefix, int64_t &
Int);
1706 parseIntWithPrefix(
const char *Prefix,
OperandVector &Operands,
1707 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1708 std::function<
bool(int64_t &)> ConvertResult =
nullptr);
1710 ParseStatus parseOperandArrayWithPrefix(
1712 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1713 bool (*ConvertResult)(int64_t &) =
nullptr);
1717 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1718 bool IgnoreNegative =
false);
1719 unsigned getCPolKind(StringRef Id, StringRef Mnemo,
bool &Disabling)
const;
1721 ParseStatus parseScope(
OperandVector &Operands, int64_t &Scope);
1723 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &
Value,
1725 ParseStatus parseStringOrIntWithPrefix(
OperandVector &Operands,
1727 ArrayRef<const char *> Ids,
1729 ParseStatus parseStringOrIntWithPrefix(
OperandVector &Operands,
1731 ArrayRef<const char *> Ids,
1732 AMDGPUOperand::ImmTy
Type);
1735 bool isOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1736 bool isRegOrOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1737 bool isNamedOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1738 bool isOpcodeModifierWithVal(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1739 bool parseSP3NegModifier();
1740 ParseStatus parseImm(
OperandVector &Operands,
bool HasSP3AbsModifier =
false,
1743 ParseStatus parseRegOrImm(
OperandVector &Operands,
bool HasSP3AbsMod =
false,
1745 ParseStatus parseRegOrImmWithFPInputMods(
OperandVector &Operands,
1746 bool AllowImm =
true);
1747 ParseStatus parseRegOrImmWithIntInputMods(
OperandVector &Operands,
1748 bool AllowImm =
true);
1749 ParseStatus parseRegWithFPInputMods(
OperandVector &Operands);
1750 ParseStatus parseRegWithIntInputMods(
OperandVector &Operands);
1753 AMDGPUOperand::ImmTy ImmTy);
1757 ParseStatus tryParseMatrixFMT(
OperandVector &Operands, StringRef Name,
1758 AMDGPUOperand::ImmTy
Type);
1761 ParseStatus tryParseMatrixScale(
OperandVector &Operands, StringRef Name,
1762 AMDGPUOperand::ImmTy
Type);
1765 ParseStatus tryParseMatrixScaleFmt(
OperandVector &Operands, StringRef Name,
1766 AMDGPUOperand::ImmTy
Type);
1770 ParseStatus parseDfmtNfmt(int64_t &
Format);
1771 ParseStatus parseUfmt(int64_t &
Format);
1772 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1774 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1777 ParseStatus parseSymbolicOrNumericFormat(int64_t &
Format);
1778 ParseStatus parseNumericFormat(int64_t &
Format);
1782 bool tryParseFmt(
const char *Pref, int64_t MaxVal, int64_t &Val);
1783 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1787 bool parseCnt(int64_t &IntVal);
1790 bool parseDepCtr(int64_t &IntVal,
unsigned &Mask);
1791 void depCtrError(SMLoc Loc,
int ErrorId, StringRef DepCtrName);
1794 bool parseDelay(int64_t &Delay);
1800 struct OperandInfoTy {
1803 bool IsSymbolic =
false;
1804 bool IsDefined =
false;
1806 constexpr OperandInfoTy(int64_t Val) : Val(Val) {}
1809 struct StructuredOpField : OperandInfoTy {
1813 bool IsDefined =
false;
1815 constexpr StructuredOpField(StringLiteral Id, StringLiteral Desc,
1816 unsigned Width, int64_t
Default)
1817 : OperandInfoTy(
Default), Id(Id), Desc(Desc), Width(Width) {}
1818 virtual ~StructuredOpField() =
default;
1820 bool Error(AMDGPUAsmParser &Parser,
const Twine &Err)
const {
1821 Parser.Error(Loc,
"invalid " + Desc +
": " + Err);
1825 virtual bool validate(AMDGPUAsmParser &Parser)
const {
1827 return Error(Parser,
"not supported on this GPU");
1829 return Error(Parser,
"only " + Twine(Width) +
"-bit values are legal");
1837 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &
Op, OperandInfoTy &Stream);
1838 bool validateSendMsg(
const OperandInfoTy &Msg,
1839 const OperandInfoTy &
Op,
1840 const OperandInfoTy &Stream);
1842 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &
Offset,
1843 OperandInfoTy &Width);
1845 const AMDGPUOperand &findMCOperand(
const OperandVector &Operands,
1848 static SMLoc getLaterLoc(SMLoc a, SMLoc b);
1850 SMLoc getFlatOffsetLoc(
const OperandVector &Operands)
const;
1851 SMLoc getSMEMOffsetLoc(
const OperandVector &Operands)
const;
1854 SMLoc getOperandLoc(
const OperandVector &Operands,
int MCOpIdx)
const;
1855 SMLoc getOperandLoc(std::function<
bool(
const AMDGPUOperand&)>
Test,
1857 SMLoc getImmLoc(AMDGPUOperand::ImmTy
Type,
1861 bool validateInstruction(
const MCInst &Inst, SMLoc IDLoc,
1863 bool validateOffset(
const MCInst &Inst,
const OperandVector &Operands);
1864 bool validateFlatOffset(
const MCInst &Inst,
const OperandVector &Operands);
1865 bool validateSMEMOffset(
const MCInst &Inst,
const OperandVector &Operands);
1866 bool validateSOPLiteral(
const MCInst &Inst,
const OperandVector &Operands);
1867 bool validateConstantBusLimitations(
const MCInst &Inst,
const OperandVector &Operands);
1868 std::optional<unsigned> checkVOPDRegBankConstraints(
const MCInst &Inst,
1870 bool validateVOPD(
const MCInst &Inst,
const OperandVector &Operands);
1871 bool tryVOPD(
const MCInst &Inst);
1872 bool tryVOPD3(
const MCInst &Inst);
1873 bool tryAnotherVOPDEncoding(
const MCInst &Inst);
1875 bool validateIntClampSupported(
const MCInst &Inst);
1876 bool validateMIMGAtomicDMask(
const MCInst &Inst);
1877 bool validateMIMGGatherDMask(
const MCInst &Inst);
1878 bool validateMovrels(
const MCInst &Inst,
const OperandVector &Operands);
1879 bool validateMIMGDataSize(
const MCInst &Inst, SMLoc IDLoc);
1880 bool validateMIMGAddrSize(
const MCInst &Inst, SMLoc IDLoc);
1881 bool validateMIMGD16(
const MCInst &Inst);
1882 bool validateMIMGDim(
const MCInst &Inst,
const OperandVector &Operands);
1883 bool validateTensorR128(
const MCInst &Inst);
1884 bool validateMIMGMSAA(
const MCInst &Inst);
1885 bool validateOpSel(
const MCInst &Inst);
1886 bool validateTrue16OpSel(
const MCInst &Inst);
1887 bool validateNeg(
const MCInst &Inst, AMDGPU::OpName OpName);
1888 bool validateDPP(
const MCInst &Inst,
const OperandVector &Operands);
1889 bool validateVccOperand(MCRegister
Reg)
const;
1890 bool validateVOPLiteral(
const MCInst &Inst,
const OperandVector &Operands);
1891 bool validateMAIAccWrite(
const MCInst &Inst,
const OperandVector &Operands);
1892 bool validateMAISrc2(
const MCInst &Inst,
const OperandVector &Operands);
1893 bool validateMFMA(
const MCInst &Inst,
const OperandVector &Operands);
1894 bool validateAGPRLdSt(
const MCInst &Inst)
const;
1895 bool validateVGPRAlign(
const MCInst &Inst)
const;
1896 bool validateBLGP(
const MCInst &Inst,
const OperandVector &Operands);
1897 bool validateDS(
const MCInst &Inst,
const OperandVector &Operands);
1898 bool validateGWS(
const MCInst &Inst,
const OperandVector &Operands);
1899 bool validateDivScale(
const MCInst &Inst);
1900 bool validateWaitCnt(
const MCInst &Inst,
const OperandVector &Operands);
1901 bool validateCoherencyBits(
const MCInst &Inst,
const OperandVector &Operands,
1903 bool validateTHAndScopeBits(
const MCInst &Inst,
const OperandVector &Operands,
1904 const unsigned CPol);
1905 bool validateTFE(
const MCInst &Inst,
const OperandVector &Operands);
1906 bool validateLdsDirect(
const MCInst &Inst,
const OperandVector &Operands);
1907 bool validateWMMA(
const MCInst &Inst,
const OperandVector &Operands);
1908 unsigned getConstantBusLimit(
unsigned Opcode)
const;
1909 bool usesConstantBus(
const MCInst &Inst,
unsigned OpIdx);
1910 bool isInlineConstant(
const MCInst &Inst,
unsigned OpIdx)
const;
1911 MCRegister findImplicitSGPRReadInVOP(
const MCInst &Inst)
const;
1913 bool isSupportedMnemo(StringRef Mnemo,
1914 const FeatureBitset &FBS);
1915 bool isSupportedMnemo(StringRef Mnemo,
1916 const FeatureBitset &FBS,
1917 ArrayRef<unsigned> Variants);
1918 bool checkUnsupportedInstruction(StringRef Name, SMLoc IDLoc);
1920 bool isId(
const StringRef Id)
const;
1921 bool isId(
const AsmToken &Token,
const StringRef Id)
const;
1923 StringRef getId()
const;
1924 bool trySkipId(
const StringRef Id);
1925 bool trySkipId(
const StringRef Pref,
const StringRef Id);
1929 bool parseString(StringRef &Val,
const StringRef ErrMsg =
"expected a string");
1930 bool parseId(StringRef &Val,
const StringRef ErrMsg =
"");
1936 StringRef getTokenStr()
const;
1937 AsmToken peekToken(
bool ShouldSkipSpace =
true);
1939 SMLoc getLoc()
const;
1943 void onBeginOfFile()
override;
1944 bool parsePrimaryExpr(
const MCExpr *&Res, SMLoc &EndLoc)
override;
1946 ParseStatus parseCustomOperand(
OperandVector &Operands,
unsigned MCK);
1956 bool parseSwizzleOperand(int64_t &
Op,
const unsigned MinVal,
1957 const unsigned MaxVal,
const Twine &ErrMsg,
1959 bool parseSwizzleOperands(
const unsigned OpNum, int64_t*
Op,
1960 const unsigned MinVal,
1961 const unsigned MaxVal,
1962 const StringRef ErrMsg);
1964 bool parseSwizzleOffset(int64_t &
Imm);
1965 bool parseSwizzleMacro(int64_t &
Imm);
1966 bool parseSwizzleQuadPerm(int64_t &
Imm);
1967 bool parseSwizzleBitmaskPerm(int64_t &
Imm);
1968 bool parseSwizzleBroadcast(int64_t &
Imm);
1969 bool parseSwizzleSwap(int64_t &
Imm);
1970 bool parseSwizzleReverse(int64_t &
Imm);
1971 bool parseSwizzleFFT(int64_t &
Imm);
1972 bool parseSwizzleRotate(int64_t &
Imm);
1975 int64_t parseGPRIdxMacro();
1977 void cvtMubuf(MCInst &Inst,
const OperandVector &Operands) { cvtMubufImpl(Inst, Operands,
false); }
1978 void cvtMubufAtomic(MCInst &Inst,
const OperandVector &Operands) { cvtMubufImpl(Inst, Operands,
true); }
1983 OptionalImmIndexMap &OptionalIdx);
1984 void cvtScaledMFMA(MCInst &Inst,
const OperandVector &Operands);
1985 void cvtVOP3OpSel(MCInst &Inst,
const OperandVector &Operands);
1988 void cvtSWMMAC(MCInst &Inst,
const OperandVector &Operands);
1991 void cvtVOP3OpSel(MCInst &Inst,
const OperandVector &Operands,
1992 OptionalImmIndexMap &OptionalIdx);
1994 OptionalImmIndexMap &OptionalIdx);
1996 void cvtVOP3Interp(MCInst &Inst,
const OperandVector &Operands);
1997 void cvtVINTERP(MCInst &Inst,
const OperandVector &Operands);
1998 void cvtOpSelHelper(MCInst &Inst,
unsigned OpSel);
2000 bool parseDimId(
unsigned &Encoding);
2002 bool convertDppBoundCtrl(int64_t &BoundCtrl);
2005 bool isSupportedDPPCtrl(StringRef Ctrl,
const OperandVector &Operands);
2006 int64_t parseDPPCtrlSel(StringRef Ctrl);
2007 int64_t parseDPPCtrlPerm();
2008 void cvtDPP(MCInst &Inst,
const OperandVector &Operands,
bool IsDPP8 =
false);
2010 cvtDPP(Inst, Operands,
true);
2012 void cvtVOP3DPP(MCInst &Inst,
const OperandVector &Operands,
2013 bool IsDPP8 =
false);
2014 void cvtVOP3DPP8(MCInst &Inst,
const OperandVector &Operands) {
2015 cvtVOP3DPP(Inst, Operands,
true);
2018 ParseStatus parseSDWASel(
OperandVector &Operands, StringRef Prefix,
2019 AMDGPUOperand::ImmTy
Type);
2021 void cvtSdwaVOP1(MCInst &Inst,
const OperandVector &Operands);
2022 void cvtSdwaVOP2(MCInst &Inst,
const OperandVector &Operands);
2023 void cvtSdwaVOP2b(MCInst &Inst,
const OperandVector &Operands);
2024 void cvtSdwaVOP2e(MCInst &Inst,
const OperandVector &Operands);
2025 void cvtSdwaVOPC(MCInst &Inst,
const OperandVector &Operands);
2027 uint64_t BasicInstType,
2028 bool SkipDstVcc =
false,
2029 bool SkipSrcVcc =
false);
2138bool AMDGPUOperand::isInlinableImm(
MVT type)
const {
2148 if (!isImmTy(ImmTyNone)) {
2153 if (getModifiers().
Lit != LitModifier::None)
2163 if (type == MVT::f64 || type == MVT::i64) {
2165 AsmParser->hasInv2PiInlineImm());
2168 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64,
Imm.Val));
2187 APFloat::rmNearestTiesToEven, &Lost);
2194 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2196 AsmParser->hasInv2PiInlineImm());
2201 static_cast<int32_t
>(FPLiteral.bitcastToAPInt().getZExtValue()),
2202 AsmParser->hasInv2PiInlineImm());
2206 if (type == MVT::f64 || type == MVT::i64) {
2208 AsmParser->hasInv2PiInlineImm());
2217 static_cast<int16_t
>(
Literal.getLoBits(16).getSExtValue()),
2218 type, AsmParser->hasInv2PiInlineImm());
2222 static_cast<int32_t
>(
Literal.getLoBits(32).getZExtValue()),
2223 AsmParser->hasInv2PiInlineImm());
2226bool AMDGPUOperand::isLiteralImm(MVT type)
const {
2228 if (!isImmTy(ImmTyNone)) {
2233 (type == MVT::i64 || type == MVT::f64) && AsmParser->has64BitLiterals();
2238 if (type == MVT::f64 && hasFPModifiers()) {
2258 if (type == MVT::f64) {
2263 if (type == MVT::i64) {
2276 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2277 : (type == MVT::v2i16) ? MVT::f32
2278 : (type == MVT::v2f32) ? MVT::f32
2281 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64,
Imm.Val));
2285bool AMDGPUOperand::isRegClass(
unsigned RCID)
const {
2286 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(
getReg());
2289bool AMDGPUOperand::isVRegWithInputMods()
const {
2290 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2292 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2293 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2296template <
bool IsFake16>
2297bool AMDGPUOperand::isT16_Lo128VRegWithInputMods()
const {
2298 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2299 : AMDGPU::VGPR_16_Lo128RegClassID);
2302template <
bool IsFake16>
bool AMDGPUOperand::isT16VRegWithInputMods()
const {
2303 return isRegClass(IsFake16 ? AMDGPU::VGPR_32RegClassID
2304 : AMDGPU::VGPR_16RegClassID);
2307bool AMDGPUOperand::isSDWAOperand(MVT type)
const {
2308 if (AsmParser->isVI())
2310 if (AsmParser->isGFX9Plus())
2311 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2315bool AMDGPUOperand::isSDWAFP16Operand()
const {
2316 return isSDWAOperand(MVT::f16);
2319bool AMDGPUOperand::isSDWAFP32Operand()
const {
2320 return isSDWAOperand(MVT::f32);
2323bool AMDGPUOperand::isSDWAInt16Operand()
const {
2324 return isSDWAOperand(MVT::i16);
2327bool AMDGPUOperand::isSDWAInt32Operand()
const {
2328 return isSDWAOperand(MVT::i32);
2331bool AMDGPUOperand::isBoolReg()
const {
2332 return isReg() && ((AsmParser->isWave64() && isSCSrc_b64()) ||
2333 (AsmParser->isWave32() && isSCSrc_b32()));
2336uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val,
unsigned Size)
const
2338 assert(isImmTy(ImmTyNone) &&
Imm.Mods.hasFPModifiers());
2341 const uint64_t FpSignMask = (1ULL << (
Size * 8 - 1));
2353void AMDGPUOperand::addImmOperands(MCInst &Inst,
unsigned N,
bool ApplyModifiers)
const {
2363 addLiteralImmOperand(Inst,
Imm.Val,
2365 isImmTy(ImmTyNone) &&
Imm.Mods.hasFPModifiers());
2367 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2372void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const {
2373 const auto& InstDesc = AsmParser->getMII()->get(Inst.
getOpcode());
2378 if (ApplyModifiers) {
2381 Val = applyInputFPModifiers(Val,
Size);
2385 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2387 bool CanUse64BitLiterals =
2388 AsmParser->has64BitLiterals() &&
2391 MCContext &Ctx = AsmParser->getContext();
2400 if (
Lit == LitModifier::None &&
2402 AsmParser->hasInv2PiInlineImm())) {
2410 bool HasMandatoryLiteral =
2413 if (
Literal.getLoBits(32) != 0 &&
2414 (InstDesc.getSize() != 4 || !AsmParser->has64BitLiterals()) &&
2415 !HasMandatoryLiteral) {
2416 const_cast<AMDGPUAsmParser *
>(AsmParser)->
Warning(
2418 "Can't encode literal as exact 64-bit floating-point operand. "
2419 "Low 32-bits will be set to zero");
2420 Val &= 0xffffffff00000000u;
2426 if (CanUse64BitLiterals &&
Lit == LitModifier::None &&
2432 Lit = LitModifier::Lit64;
2433 }
else if (
Lit == LitModifier::Lit) {
2447 if (CanUse64BitLiterals &&
Lit == LitModifier::None &&
2449 Lit = LitModifier::Lit64;
2456 if (
Lit == LitModifier::None && AsmParser->hasInv2PiInlineImm() &&
2457 Literal == 0x3fc45f306725feed) {
2492 APFloat::rmNearestTiesToEven, &lost);
2496 Val = FPLiteral.bitcastToAPInt().getZExtValue();
2503 if (
Lit != LitModifier::None) {
2533 if (
Lit == LitModifier::None &&
2543 if (!AsmParser->has64BitLiterals() ||
Lit == LitModifier::Lit)
2550 if (
Lit == LitModifier::None &&
2558 if (!AsmParser->has64BitLiterals()) {
2559 Val =
static_cast<uint64_t
>(Val) << 32;
2566 if (
Lit == LitModifier::Lit ||
2568 Val =
static_cast<uint64_t
>(Val) << 32;
2572 if (
Lit == LitModifier::Lit)
2598 if (
Lit != LitModifier::None) {
2606void AMDGPUOperand::addRegOperands(MCInst &Inst,
unsigned N)
const {
2611bool AMDGPUOperand::isInlineValue()
const {
2619void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2630 if (Is == IS_VGPR) {
2634 return AMDGPU::VGPR_32RegClassID;
2636 return AMDGPU::VReg_64RegClassID;
2638 return AMDGPU::VReg_96RegClassID;
2640 return AMDGPU::VReg_128RegClassID;
2642 return AMDGPU::VReg_160RegClassID;
2644 return AMDGPU::VReg_192RegClassID;
2646 return AMDGPU::VReg_224RegClassID;
2648 return AMDGPU::VReg_256RegClassID;
2650 return AMDGPU::VReg_288RegClassID;
2652 return AMDGPU::VReg_320RegClassID;
2654 return AMDGPU::VReg_352RegClassID;
2656 return AMDGPU::VReg_384RegClassID;
2658 return AMDGPU::VReg_512RegClassID;
2660 return AMDGPU::VReg_1024RegClassID;
2662 }
else if (Is == IS_TTMP) {
2666 return AMDGPU::TTMP_32RegClassID;
2668 return AMDGPU::TTMP_64RegClassID;
2670 return AMDGPU::TTMP_128RegClassID;
2672 return AMDGPU::TTMP_256RegClassID;
2674 return AMDGPU::TTMP_512RegClassID;
2676 }
else if (Is == IS_SGPR) {
2680 return AMDGPU::SGPR_32RegClassID;
2682 return AMDGPU::SGPR_64RegClassID;
2684 return AMDGPU::SGPR_96RegClassID;
2686 return AMDGPU::SGPR_128RegClassID;
2688 return AMDGPU::SGPR_160RegClassID;
2690 return AMDGPU::SGPR_192RegClassID;
2692 return AMDGPU::SGPR_224RegClassID;
2694 return AMDGPU::SGPR_256RegClassID;
2696 return AMDGPU::SGPR_288RegClassID;
2698 return AMDGPU::SGPR_320RegClassID;
2700 return AMDGPU::SGPR_352RegClassID;
2702 return AMDGPU::SGPR_384RegClassID;
2704 return AMDGPU::SGPR_512RegClassID;
2706 }
else if (Is == IS_AGPR) {
2710 return AMDGPU::AGPR_32RegClassID;
2712 return AMDGPU::AReg_64RegClassID;
2714 return AMDGPU::AReg_96RegClassID;
2716 return AMDGPU::AReg_128RegClassID;
2718 return AMDGPU::AReg_160RegClassID;
2720 return AMDGPU::AReg_192RegClassID;
2722 return AMDGPU::AReg_224RegClassID;
2724 return AMDGPU::AReg_256RegClassID;
2726 return AMDGPU::AReg_288RegClassID;
2728 return AMDGPU::AReg_320RegClassID;
2730 return AMDGPU::AReg_352RegClassID;
2732 return AMDGPU::AReg_384RegClassID;
2734 return AMDGPU::AReg_512RegClassID;
2736 return AMDGPU::AReg_1024RegClassID;
2744 .
Case(
"exec", AMDGPU::EXEC)
2745 .
Case(
"vcc", AMDGPU::VCC)
2746 .
Case(
"flat_scratch", AMDGPU::FLAT_SCR)
2747 .
Case(
"xnack_mask", AMDGPU::XNACK_MASK)
2748 .
Case(
"shared_base", AMDGPU::SRC_SHARED_BASE)
2749 .
Case(
"src_shared_base", AMDGPU::SRC_SHARED_BASE)
2750 .
Case(
"shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2751 .
Case(
"src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2752 .
Case(
"private_base", AMDGPU::SRC_PRIVATE_BASE)
2753 .
Case(
"src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2754 .
Case(
"private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2755 .
Case(
"src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2756 .
Case(
"src_flat_scratch_base_lo", AMDGPU::SRC_FLAT_SCRATCH_BASE_LO)
2757 .
Case(
"src_flat_scratch_base_hi", AMDGPU::SRC_FLAT_SCRATCH_BASE_HI)
2758 .
Case(
"pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2759 .
Case(
"src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2760 .
Case(
"lds_direct", AMDGPU::LDS_DIRECT)
2761 .
Case(
"src_lds_direct", AMDGPU::LDS_DIRECT)
2762 .
Case(
"m0", AMDGPU::M0)
2763 .
Case(
"vccz", AMDGPU::SRC_VCCZ)
2764 .
Case(
"src_vccz", AMDGPU::SRC_VCCZ)
2765 .
Case(
"execz", AMDGPU::SRC_EXECZ)
2766 .
Case(
"src_execz", AMDGPU::SRC_EXECZ)
2767 .
Case(
"scc", AMDGPU::SRC_SCC)
2768 .
Case(
"src_scc", AMDGPU::SRC_SCC)
2769 .
Case(
"tba", AMDGPU::TBA)
2770 .
Case(
"tma", AMDGPU::TMA)
2771 .
Case(
"flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2772 .
Case(
"flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2773 .
Case(
"xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2774 .
Case(
"xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2775 .
Case(
"vcc_lo", AMDGPU::VCC_LO)
2776 .
Case(
"vcc_hi", AMDGPU::VCC_HI)
2777 .
Case(
"exec_lo", AMDGPU::EXEC_LO)
2778 .
Case(
"exec_hi", AMDGPU::EXEC_HI)
2779 .
Case(
"tma_lo", AMDGPU::TMA_LO)
2780 .
Case(
"tma_hi", AMDGPU::TMA_HI)
2781 .
Case(
"tba_lo", AMDGPU::TBA_LO)
2782 .
Case(
"tba_hi", AMDGPU::TBA_HI)
2783 .
Case(
"pc", AMDGPU::PC_REG)
2784 .
Case(
"null", AMDGPU::SGPR_NULL)
2788bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2789 SMLoc &EndLoc,
bool RestoreOnFailure) {
2790 auto R = parseRegister();
2791 if (!R)
return true;
2793 RegNo =
R->getReg();
2794 StartLoc =
R->getStartLoc();
2795 EndLoc =
R->getEndLoc();
2799bool AMDGPUAsmParser::parseRegister(MCRegister &
Reg, SMLoc &StartLoc,
2801 return ParseRegister(
Reg, StartLoc, EndLoc,
false);
2804ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &
Reg, SMLoc &StartLoc,
2806 bool Result = ParseRegister(
Reg, StartLoc, EndLoc,
true);
2807 bool PendingErrors = getParser().hasPendingError();
2808 getParser().clearPendingErrors();
2816bool AMDGPUAsmParser::AddNextRegisterToList(MCRegister &
Reg,
unsigned &RegWidth,
2817 RegisterKind RegKind,
2818 MCRegister Reg1, SMLoc Loc) {
2821 if (
Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2826 if (
Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2827 Reg = AMDGPU::FLAT_SCR;
2831 if (
Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2832 Reg = AMDGPU::XNACK_MASK;
2836 if (
Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2841 if (
Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2846 if (
Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2851 Error(Loc,
"register does not fit in the list");
2857 if (Reg1 !=
Reg + RegWidth / 32) {
2858 Error(Loc,
"registers in a list must have consecutive indices");
2876 {{
"ttmp"}, IS_TTMP},
2882 return Kind == IS_VGPR ||
2890 if (Str.starts_with(
Reg.Name))
2896 return !Str.getAsInteger(10, Num);
2900AMDGPUAsmParser::isRegister(
const AsmToken &Token,
2901 const AsmToken &NextToken)
const {
2916 StringRef RegSuffix = Str.substr(
RegName.size());
2917 if (!RegSuffix.
empty()) {
2935AMDGPUAsmParser::isRegister()
2937 return isRegister(
getToken(), peekToken());
2940MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
unsigned RegNum,
2941 unsigned SubReg,
unsigned RegWidth,
2945 unsigned AlignSize = 1;
2946 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2952 if (RegNum % AlignSize != 0) {
2953 Error(Loc,
"invalid register alignment");
2954 return MCRegister();
2957 unsigned RegIdx = RegNum / AlignSize;
2960 Error(Loc,
"invalid or unsupported register size");
2961 return MCRegister();
2965 const MCRegisterClass RC =
TRI->getRegClass(RCID);
2966 if (RegIdx >= RC.
getNumRegs() || (RegKind == IS_VGPR && RegIdx > 255)) {
2967 Error(Loc,
"register index is out of range");
2968 return AMDGPU::NoRegister;
2971 if (RegKind == IS_VGPR && !
isGFX1250Plus() && RegIdx + RegWidth / 32 > 256) {
2972 Error(Loc,
"register index is out of range");
2973 return MCRegister();
2989bool AMDGPUAsmParser::ParseRegRange(
unsigned &Num,
unsigned &RegWidth,
2991 int64_t RegLo, RegHi;
2995 SMLoc FirstIdxLoc = getLoc();
3002 SecondIdxLoc = getLoc();
3013 Error(FirstIdxLoc,
"invalid register index");
3018 Error(SecondIdxLoc,
"invalid register index");
3022 if (RegLo > RegHi) {
3023 Error(FirstIdxLoc,
"first register index should not exceed second index");
3027 if (RegHi == RegLo) {
3028 StringRef RegSuffix = getTokenStr();
3029 if (RegSuffix ==
".l") {
3030 SubReg = AMDGPU::lo16;
3032 }
else if (RegSuffix ==
".h") {
3033 SubReg = AMDGPU::hi16;
3038 Num =
static_cast<unsigned>(RegLo);
3039 RegWidth = 32 * ((RegHi - RegLo) + 1);
3044MCRegister AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
3047 SmallVectorImpl<AsmToken> &Tokens) {
3053 RegKind = IS_SPECIAL;
3060MCRegister AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
3063 SmallVectorImpl<AsmToken> &Tokens) {
3065 StringRef
RegName = getTokenStr();
3066 auto Loc = getLoc();
3070 Error(Loc,
"invalid register name");
3071 return MCRegister();
3079 unsigned SubReg = NoSubRegister;
3080 if (!RegSuffix.
empty()) {
3082 SubReg = AMDGPU::lo16;
3084 SubReg = AMDGPU::hi16;
3088 Error(Loc,
"invalid register index");
3089 return MCRegister();
3094 if (!ParseRegRange(RegNum, RegWidth, SubReg))
3095 return MCRegister();
3098 return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
3101MCRegister AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
3102 unsigned &RegNum,
unsigned &RegWidth,
3103 SmallVectorImpl<AsmToken> &Tokens) {
3105 auto ListLoc = getLoc();
3108 "expected a register or a list of registers")) {
3109 return MCRegister();
3114 auto Loc = getLoc();
3115 if (!ParseAMDGPURegister(RegKind,
Reg, RegNum, RegWidth))
3116 return MCRegister();
3117 if (RegWidth != 32) {
3118 Error(Loc,
"expected a single 32-bit register");
3119 return MCRegister();
3123 RegisterKind NextRegKind;
3125 unsigned NextRegNum, NextRegWidth;
3128 if (!ParseAMDGPURegister(NextRegKind, NextReg,
3129 NextRegNum, NextRegWidth,
3131 return MCRegister();
3133 if (NextRegWidth != 32) {
3134 Error(Loc,
"expected a single 32-bit register");
3135 return MCRegister();
3137 if (NextRegKind != RegKind) {
3138 Error(Loc,
"registers in a list must be of the same kind");
3139 return MCRegister();
3141 if (!AddNextRegisterToList(
Reg, RegWidth, RegKind, NextReg, Loc))
3142 return MCRegister();
3146 "expected a comma or a closing square bracket")) {
3147 return MCRegister();
3151 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3156bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3157 MCRegister &
Reg,
unsigned &RegNum,
3159 SmallVectorImpl<AsmToken> &Tokens) {
3160 auto Loc = getLoc();
3164 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3166 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3168 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3173 assert(Parser.hasPendingError());
3177 if (!subtargetHasRegister(*
TRI,
Reg)) {
3178 if (
Reg == AMDGPU::SGPR_NULL) {
3179 Error(Loc,
"'null' operand is not supported on this GPU");
3182 " register not available on this GPU");
3190bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3191 MCRegister &
Reg,
unsigned &RegNum,
3193 bool RestoreOnFailure ) {
3197 if (ParseAMDGPURegister(RegKind,
Reg, RegNum, RegWidth, Tokens)) {
3198 if (RestoreOnFailure) {
3199 while (!Tokens.
empty()) {
3208std::optional<StringRef>
3209AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3212 return StringRef(
".amdgcn.next_free_vgpr");
3214 return StringRef(
".amdgcn.next_free_sgpr");
3216 return std::nullopt;
3220void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3221 auto SymbolName = getGprCountSymbolName(RegKind);
3222 assert(SymbolName &&
"initializing invalid register kind");
3228bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3229 unsigned DwordRegIndex,
3230 unsigned RegWidth) {
3235 auto SymbolName = getGprCountSymbolName(RegKind);
3240 int64_t NewMax = DwordRegIndex +
divideCeil(RegWidth, 32) - 1;
3244 return !
Error(getLoc(),
3245 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3249 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3251 if (OldCount <= NewMax)
3257std::unique_ptr<AMDGPUOperand>
3258AMDGPUAsmParser::parseRegister(
bool RestoreOnFailure) {
3260 SMLoc StartLoc = Tok.getLoc();
3261 SMLoc EndLoc = Tok.getEndLoc();
3262 RegisterKind RegKind;
3264 unsigned RegNum, RegWidth;
3266 if (!ParseAMDGPURegister(RegKind,
Reg, RegNum, RegWidth)) {
3270 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3273 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3274 return AMDGPUOperand::CreateReg(
this,
Reg, StartLoc, EndLoc);
3277ParseStatus AMDGPUAsmParser::parseImm(
OperandVector &Operands,
3281 if (isRegister() || isModifier())
3284 if (
Lit == LitModifier::None) {
3285 if (trySkipId(
"lit"))
3286 Lit = LitModifier::Lit;
3287 else if (trySkipId(
"lit64"))
3288 Lit = LitModifier::Lit64;
3290 if (
Lit != LitModifier::None) {
3293 ParseStatus S = parseImm(Operands, HasSP3AbsModifier,
Lit);
3302 const auto& NextTok = peekToken();
3305 bool Negate =
false;
3313 AMDGPUOperand::Modifiers Mods;
3321 StringRef Num = getTokenStr();
3324 APFloat RealVal(APFloat::IEEEdouble());
3325 auto roundMode = APFloat::rmNearestTiesToEven;
3326 if (
errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3329 RealVal.changeSign();
3332 AMDGPUOperand::CreateImm(
this, RealVal.bitcastToAPInt().getZExtValue(), S,
3333 AMDGPUOperand::ImmTyNone,
true));
3334 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*Operands.
back());
3335 Op.setModifiers(Mods);
3344 if (HasSP3AbsModifier) {
3353 if (getParser().parsePrimaryExpr(Expr, EndLoc,
nullptr))
3356 if (Parser.parseExpression(Expr))
3360 if (Expr->evaluateAsAbsolute(IntVal)) {
3362 return Error(S,
"literal value out of range");
3363 Operands.
push_back(AMDGPUOperand::CreateImm(
this, IntVal, S));
3364 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*Operands.
back());
3365 Op.setModifiers(Mods);
3367 if (
Lit != LitModifier::None)
3369 Operands.
push_back(AMDGPUOperand::CreateExpr(
this, Expr, S));
3378ParseStatus AMDGPUAsmParser::parseReg(
OperandVector &Operands) {
3382 if (
auto R = parseRegister()) {
3390ParseStatus AMDGPUAsmParser::parseRegOrImm(
OperandVector &Operands,
3392 ParseStatus Res = parseReg(Operands);
3397 return parseImm(Operands, HasSP3AbsMod,
Lit);
3401AMDGPUAsmParser::isNamedOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3404 return str ==
"abs" || str ==
"neg" || str ==
"sext";
3410AMDGPUAsmParser::isOpcodeModifierWithVal(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3415AMDGPUAsmParser::isOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3416 return isNamedOperandModifier(Token, NextToken) || Token.
is(
AsmToken::Pipe);
3420AMDGPUAsmParser::isRegOrOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3421 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3438AMDGPUAsmParser::isModifier() {
3441 AsmToken NextToken[2];
3442 peekTokens(NextToken);
3444 return isOperandModifier(Tok, NextToken[0]) ||
3445 (Tok.
is(
AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3446 isOpcodeModifierWithVal(Tok, NextToken[0]);
3472AMDGPUAsmParser::parseSP3NegModifier() {
3474 AsmToken NextToken[2];
3475 peekTokens(NextToken);
3478 (isRegister(NextToken[0], NextToken[1]) ||
3480 isId(NextToken[0],
"abs"))) {
3489AMDGPUAsmParser::parseRegOrImmWithFPInputMods(
OperandVector &Operands,
3497 return Error(getLoc(),
"invalid syntax, expected 'neg' modifier");
3499 SP3Neg = parseSP3NegModifier();
3502 Neg = trySkipId(
"neg");
3504 return Error(Loc,
"expected register or immediate");
3508 Abs = trySkipId(
"abs");
3513 if (trySkipId(
"lit")) {
3514 Lit = LitModifier::Lit;
3517 }
else if (trySkipId(
"lit64")) {
3518 Lit = LitModifier::Lit64;
3521 if (!has64BitLiterals())
3522 return Error(Loc,
"lit64 is not supported on this GPU");
3528 return Error(Loc,
"expected register or immediate");
3532 Res = parseRegOrImm(Operands, SP3Abs,
Lit);
3534 Res = parseReg(Operands);
3537 return (SP3Neg || Neg || SP3Abs || Abs ||
Lit != LitModifier::None)
3541 if (
Lit != LitModifier::None && !Operands.
back()->isImm())
3542 Error(Loc,
"expected immediate with lit modifier");
3544 if (SP3Abs && !skipToken(
AsmToken::Pipe,
"expected vertical bar"))
3550 if (
Lit != LitModifier::None &&
3554 AMDGPUOperand::Modifiers Mods;
3555 Mods.Abs = Abs || SP3Abs;
3556 Mods.Neg = Neg || SP3Neg;
3559 if (Mods.hasFPModifiers() ||
Lit != LitModifier::None) {
3560 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*Operands.
back());
3562 return Error(
Op.getStartLoc(),
"expected an absolute expression");
3563 Op.setModifiers(Mods);
3569AMDGPUAsmParser::parseRegOrImmWithIntInputMods(
OperandVector &Operands,
3571 bool Sext = trySkipId(
"sext");
3572 if (Sext && !skipToken(
AsmToken::LParen,
"expected left paren after sext"))
3577 Res = parseRegOrImm(Operands);
3579 Res = parseReg(Operands);
3587 AMDGPUOperand::Modifiers Mods;
3590 if (Mods.hasIntModifiers()) {
3591 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*Operands.
back());
3593 return Error(
Op.getStartLoc(),
"expected an absolute expression");
3594 Op.setModifiers(Mods);
3600ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(
OperandVector &Operands) {
3601 return parseRegOrImmWithFPInputMods(Operands,
false);
3604ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(
OperandVector &Operands) {
3605 return parseRegOrImmWithIntInputMods(Operands,
false);
3608ParseStatus AMDGPUAsmParser::parseVReg32OrOff(
OperandVector &Operands) {
3609 auto Loc = getLoc();
3610 if (trySkipId(
"off")) {
3611 Operands.
push_back(AMDGPUOperand::CreateImm(
this, 0, Loc,
3612 AMDGPUOperand::ImmTyOff,
false));
3619 std::unique_ptr<AMDGPUOperand>
Reg = parseRegister();
3628unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3635 return Match_InvalidOperand;
3637 if (Inst.
getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3638 Inst.
getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3641 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::dst_sel);
3643 if (!
Op.isImm() ||
Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3644 return Match_InvalidOperand;
3652 if (tryAnotherVOPDEncoding(Inst))
3653 return Match_InvalidOperand;
3655 return Match_Success;
3659 static const unsigned Variants[] = {
3669ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants()
const {
3670 if (isForcedDPP() && isForcedVOP3()) {
3674 if (getForcedEncodingSize() == 32) {
3679 if (isForcedVOP3()) {
3684 if (isForcedSDWA()) {
3690 if (isForcedDPP()) {
3698StringRef AMDGPUAsmParser::getMatchedVariantName()
const {
3699 if (isForcedDPP() && isForcedVOP3())
3702 if (getForcedEncodingSize() == 32)
3718AMDGPUAsmParser::findImplicitSGPRReadInVOP(
const MCInst &Inst)
const {
3722 case AMDGPU::FLAT_SCR:
3724 case AMDGPU::VCC_LO:
3725 case AMDGPU::VCC_HI:
3732 return MCRegister();
3739bool AMDGPUAsmParser::isInlineConstant(
const MCInst &Inst,
3740 unsigned OpIdx)
const {
3797unsigned AMDGPUAsmParser::getConstantBusLimit(
unsigned Opcode)
const {
3803 case AMDGPU::V_LSHLREV_B64_e64:
3804 case AMDGPU::V_LSHLREV_B64_gfx10:
3805 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3806 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3807 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3808 case AMDGPU::V_LSHRREV_B64_e64:
3809 case AMDGPU::V_LSHRREV_B64_gfx10:
3810 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3811 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3812 case AMDGPU::V_ASHRREV_I64_e64:
3813 case AMDGPU::V_ASHRREV_I64_gfx10:
3814 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3815 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3816 case AMDGPU::V_LSHL_B64_e64:
3817 case AMDGPU::V_LSHR_B64_e64:
3818 case AMDGPU::V_ASHR_I64_e64:
3831 bool AddMandatoryLiterals =
false) {
3834 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3838 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immX) : -1;
3840 return {getNamedOperandIdx(Opcode, OpName::src0X),
3841 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3842 getNamedOperandIdx(Opcode, OpName::vsrc2X),
3843 getNamedOperandIdx(Opcode, OpName::src0Y),
3844 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3845 getNamedOperandIdx(Opcode, OpName::vsrc2Y),
3850 return {getNamedOperandIdx(Opcode, OpName::src0),
3851 getNamedOperandIdx(Opcode, OpName::src1),
3852 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3855bool AMDGPUAsmParser::usesConstantBus(
const MCInst &Inst,
unsigned OpIdx) {
3858 return !isInlineConstant(Inst,
OpIdx);
3865 return isSGPR(PReg,
TRI) && PReg != SGPR_NULL;
3876 const unsigned Opcode = Inst.
getOpcode();
3877 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3880 if (!LaneSelOp.
isReg())
3883 return LaneSelReg ==
M0 || LaneSelReg == M0_gfxpre11;
3886bool AMDGPUAsmParser::validateConstantBusLimitations(
3888 const unsigned Opcode = Inst.
getOpcode();
3889 const MCInstrDesc &
Desc = MII.
get(Opcode);
3890 MCRegister LastSGPR;
3891 unsigned ConstantBusUseCount = 0;
3892 unsigned NumLiterals = 0;
3893 unsigned LiteralSize;
3895 if (!(
Desc.TSFlags &
3910 SmallDenseSet<MCRegister> SGPRsUsed;
3911 MCRegister SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3913 SGPRsUsed.
insert(SGPRUsed);
3914 ++ConstantBusUseCount;
3919 unsigned ConstantBusLimit = getConstantBusLimit(Opcode);
3921 for (
int OpIdx : OpIndices) {
3926 if (usesConstantBus(Inst,
OpIdx)) {
3935 if (SGPRsUsed.
insert(LastSGPR).second) {
3936 ++ConstantBusUseCount;
3956 if (NumLiterals == 0) {
3959 }
else if (LiteralSize !=
Size) {
3965 if (ConstantBusUseCount + NumLiterals > ConstantBusLimit) {
3967 "invalid operand (violates constant bus restrictions)");
3974std::optional<unsigned>
3975AMDGPUAsmParser::checkVOPDRegBankConstraints(
const MCInst &Inst,
bool AsVOPD3) {
3977 const unsigned Opcode = Inst.
getOpcode();
3983 auto getVRegIdx = [&](unsigned,
unsigned OperandIdx) {
3984 const MCOperand &Opr = Inst.
getOperand(OperandIdx);
3993 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1170 ||
3994 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
3995 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
3996 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx13 ||
3997 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250 ||
3998 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx13;
4002 for (
auto OpName : {OpName::src0X, OpName::src0Y}) {
4003 int I = getNamedOperandIdx(Opcode, OpName);
4007 int64_t
Imm =
Op.getImm();
4013 for (
auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X,
4014 OpName::vsrc2Y, OpName::imm}) {
4015 int I = getNamedOperandIdx(Opcode, OpName);
4025 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(
4026 getVRegIdx, *
TRI, SkipSrc, AllowSameVGPR, AsVOPD3);
4028 return InvalidCompOprIdx;
4031bool AMDGPUAsmParser::validateVOPD(
const MCInst &Inst,
4038 for (
const std::unique_ptr<MCParsedAsmOperand> &Operand : Operands) {
4039 AMDGPUOperand &
Op = (AMDGPUOperand &)*Operand;
4040 if ((
Op.isRegKind() ||
Op.isImmTy(AMDGPUOperand::ImmTyNone)) &&
4042 Error(
Op.getStartLoc(),
"ABS not allowed in VOPD3 instructions");
4046 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3);
4047 if (!InvalidCompOprIdx.has_value())
4050 auto CompOprIdx = *InvalidCompOprIdx;
4053 std::max(InstInfo[
VOPD::X].getIndexInParsedOperands(CompOprIdx),
4054 InstInfo[
VOPD::Y].getIndexInParsedOperands(CompOprIdx));
4055 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
4057 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
4058 if (CompOprIdx == VOPD::Component::DST) {
4060 Error(Loc,
"dst registers must be distinct");
4062 Error(Loc,
"one dst register must be even and the other odd");
4064 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
4065 Error(Loc, Twine(
"src") + Twine(CompSrcIdx) +
4066 " operands must use different VGPR banks");
4074bool AMDGPUAsmParser::tryVOPD3(
const MCInst &Inst) {
4076 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst,
false);
4077 if (!InvalidCompOprIdx.has_value())
4081 InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst,
true);
4082 if (InvalidCompOprIdx.has_value()) {
4087 if (*InvalidCompOprIdx == VOPD::Component::DST)
4100bool AMDGPUAsmParser::tryVOPD(
const MCInst &Inst) {
4101 const unsigned Opcode = Inst.
getOpcode();
4111 if (
II[
VOPD::X].getOpcode() == AMDGPU::V_CNDMASK_B32_e32 ||
4112 II[
VOPD::Y].getOpcode() == AMDGPU::V_CNDMASK_B32_e32)
4116 for (
auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
4117 OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
4118 OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
4119 int I = getNamedOperandIdx(Opcode, OpName);
4126 return !tryVOPD3(Inst);
4131bool AMDGPUAsmParser::tryAnotherVOPDEncoding(
const MCInst &Inst) {
4132 const unsigned Opcode = Inst.
getOpcode();
4137 return tryVOPD(Inst);
4138 return tryVOPD3(Inst);
4141bool AMDGPUAsmParser::validateIntClampSupported(
const MCInst &Inst) {
4147 int ClampIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::clamp);
4158bool AMDGPUAsmParser::validateMIMGDataSize(
const MCInst &Inst,
SMLoc IDLoc) {
4166 int VDataIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
4167 int DMaskIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dmask);
4168 int TFEIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::tfe);
4176 unsigned VDataSize = getRegOperandSize(
Desc, VDataIdx);
4177 unsigned TFESize = (TFEIdx != -1 && Inst.
getOperand(TFEIdx).
getImm()) ? 1 : 0;
4182 bool IsPackedD16 =
false;
4186 int D16Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::d16);
4187 IsPackedD16 = D16Idx >= 0;
4192 if ((VDataSize / 4) ==
DataSize + TFESize)
4197 Modifiers = IsPackedD16 ?
"dmask and d16" :
"dmask";
4199 Modifiers = IsPackedD16 ?
"dmask, d16 and tfe" :
"dmask and tfe";
4201 Error(IDLoc,
Twine(
"image data size does not match ") + Modifiers);
4205bool AMDGPUAsmParser::validateMIMGAddrSize(
const MCInst &Inst, SMLoc IDLoc) {
4214 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4216 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
4218 ? AMDGPU::OpName::srsrc
4219 : AMDGPU::OpName::rsrc;
4220 int SrsrcIdx = AMDGPU::getNamedOperandIdx(
Opc, RSrcOpName);
4221 int DimIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dim);
4222 int A16Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::a16);
4226 assert(SrsrcIdx > VAddr0Idx);
4229 if (BaseOpcode->
BVH) {
4230 if (IsA16 == BaseOpcode->
A16)
4232 Error(IDLoc,
"image address size does not match a16");
4238 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
4239 unsigned ActualAddrSize =
4240 IsNSA ? SrsrcIdx - VAddr0Idx : getRegOperandSize(
Desc, VAddr0Idx) / 4;
4242 unsigned ExpectedAddrSize =
4246 if (hasPartialNSAEncoding() &&
4249 int VAddrLastIdx = SrsrcIdx - 1;
4250 unsigned VAddrLastSize = getRegOperandSize(
Desc, VAddrLastIdx) / 4;
4252 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
4255 if (ExpectedAddrSize > 12)
4256 ExpectedAddrSize = 16;
4261 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
4265 if (ActualAddrSize == ExpectedAddrSize)
4268 Error(IDLoc,
"image address size does not match dim and a16");
4272bool AMDGPUAsmParser::validateMIMGAtomicDMask(
const MCInst &Inst) {
4279 if (!
Desc.mayLoad() || !
Desc.mayStore())
4282 int DMaskIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dmask);
4289 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4292bool AMDGPUAsmParser::validateMIMGGatherDMask(
const MCInst &Inst) {
4300 int DMaskIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dmask);
4308 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4311bool AMDGPUAsmParser::validateMIMGDim(
const MCInst &Inst,
4326 for (
unsigned i = 1, e = Operands.
size(); i != e; ++i) {
4327 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
4334bool AMDGPUAsmParser::validateMIMGMSAA(
const MCInst &Inst) {
4342 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4345 if (!BaseOpcode->
MSAA)
4348 int DimIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dim);
4354 return DimInfo->
MSAA;
4360 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4361 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4362 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4372bool AMDGPUAsmParser::validateMovrels(
const MCInst &Inst,
4381 const int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
4384 const MCOperand &Src0 = Inst.
getOperand(Src0Idx);
4392 Error(getOperandLoc(Operands, Src0Idx),
"source operand must be a VGPR");
4396bool AMDGPUAsmParser::validateMAIAccWrite(
const MCInst &Inst,
4401 if (
Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4404 const int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
4407 const MCOperand &Src0 = Inst.
getOperand(Src0Idx);
4414 Error(getOperandLoc(Operands, Src0Idx),
4415 "source operand must be either a VGPR or an inline constant");
4422bool AMDGPUAsmParser::validateMAISrc2(
const MCInst &Inst,
4425 const MCInstrDesc &
Desc = MII.
get(Opcode);
4428 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4431 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4435 if (Inst.
getOperand(Src2Idx).
isImm() && isInlineConstant(Inst, Src2Idx)) {
4436 Error(getOperandLoc(Operands, Src2Idx),
4437 "inline constants are not allowed for this operand");
4444bool AMDGPUAsmParser::validateMFMA(
const MCInst &Inst,
4452 int BlgpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::blgp);
4453 if (BlgpIdx != -1) {
4454 if (
const MFMA_F8F6F4_Info *Info = AMDGPU::isMFMA_F8F6F4(
Opc)) {
4455 int CbszIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::cbsz);
4465 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
4466 Error(getOperandLoc(Operands, Src0Idx),
4467 "wrong register tuple size for cbsz value " + Twine(CBSZ));
4472 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
4473 Error(getOperandLoc(Operands, Src1Idx),
4474 "wrong register tuple size for blgp value " + Twine(BLGP));
4482 const int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
4486 const MCOperand &Src2 = Inst.
getOperand(Src2Idx);
4490 MCRegister Src2Reg = Src2.
getReg();
4492 if (Src2Reg == DstReg)
4497 .getSizeInBits() <= 128)
4500 if (
TRI->regsOverlap(Src2Reg, DstReg)) {
4501 Error(getOperandLoc(Operands, Src2Idx),
4502 "source 2 operand must not partially overlap with dst");
4509bool AMDGPUAsmParser::validateDivScale(
const MCInst &Inst) {
4513 case V_DIV_SCALE_F32_gfx6_gfx7:
4514 case V_DIV_SCALE_F32_vi:
4515 case V_DIV_SCALE_F32_gfx10:
4516 case V_DIV_SCALE_F64_gfx6_gfx7:
4517 case V_DIV_SCALE_F64_vi:
4518 case V_DIV_SCALE_F64_gfx10:
4524 for (
auto Name : {AMDGPU::OpName::src0_modifiers,
4525 AMDGPU::OpName::src2_modifiers,
4526 AMDGPU::OpName::src2_modifiers}) {
4537bool AMDGPUAsmParser::validateMIMGD16(
const MCInst &Inst) {
4545 int D16Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::d16);
4554bool AMDGPUAsmParser::validateTensorR128(
const MCInst &Inst) {
4561 int R128Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::r128);
4569 case AMDGPU::V_SUBREV_F32_e32:
4570 case AMDGPU::V_SUBREV_F32_e64:
4571 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4572 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4573 case AMDGPU::V_SUBREV_F32_e32_vi:
4574 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4575 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4576 case AMDGPU::V_SUBREV_F32_e64_vi:
4578 case AMDGPU::V_SUBREV_CO_U32_e32:
4579 case AMDGPU::V_SUBREV_CO_U32_e64:
4580 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4581 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4583 case AMDGPU::V_SUBBREV_U32_e32:
4584 case AMDGPU::V_SUBBREV_U32_e64:
4585 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4586 case AMDGPU::V_SUBBREV_U32_e32_vi:
4587 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4588 case AMDGPU::V_SUBBREV_U32_e64_vi:
4590 case AMDGPU::V_SUBREV_U32_e32:
4591 case AMDGPU::V_SUBREV_U32_e64:
4592 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4593 case AMDGPU::V_SUBREV_U32_e32_vi:
4594 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4595 case AMDGPU::V_SUBREV_U32_e64_vi:
4597 case AMDGPU::V_SUBREV_F16_e32:
4598 case AMDGPU::V_SUBREV_F16_e64:
4599 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4600 case AMDGPU::V_SUBREV_F16_e32_vi:
4601 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4602 case AMDGPU::V_SUBREV_F16_e64_vi:
4604 case AMDGPU::V_SUBREV_U16_e32:
4605 case AMDGPU::V_SUBREV_U16_e64:
4606 case AMDGPU::V_SUBREV_U16_e32_vi:
4607 case AMDGPU::V_SUBREV_U16_e64_vi:
4609 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4610 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4611 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4613 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4614 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4616 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4617 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4619 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4620 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4622 case AMDGPU::V_LSHRREV_B32_e32:
4623 case AMDGPU::V_LSHRREV_B32_e64:
4624 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4625 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4626 case AMDGPU::V_LSHRREV_B32_e32_vi:
4627 case AMDGPU::V_LSHRREV_B32_e64_vi:
4628 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4629 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4631 case AMDGPU::V_ASHRREV_I32_e32:
4632 case AMDGPU::V_ASHRREV_I32_e64:
4633 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4634 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4635 case AMDGPU::V_ASHRREV_I32_e32_vi:
4636 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4637 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4638 case AMDGPU::V_ASHRREV_I32_e64_vi:
4640 case AMDGPU::V_LSHLREV_B32_e32:
4641 case AMDGPU::V_LSHLREV_B32_e64:
4642 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4643 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4644 case AMDGPU::V_LSHLREV_B32_e32_vi:
4645 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4646 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4647 case AMDGPU::V_LSHLREV_B32_e64_vi:
4649 case AMDGPU::V_LSHLREV_B16_e32:
4650 case AMDGPU::V_LSHLREV_B16_e64:
4651 case AMDGPU::V_LSHLREV_B16_e32_vi:
4652 case AMDGPU::V_LSHLREV_B16_e64_vi:
4653 case AMDGPU::V_LSHLREV_B16_gfx10:
4655 case AMDGPU::V_LSHRREV_B16_e32:
4656 case AMDGPU::V_LSHRREV_B16_e64:
4657 case AMDGPU::V_LSHRREV_B16_e32_vi:
4658 case AMDGPU::V_LSHRREV_B16_e64_vi:
4659 case AMDGPU::V_LSHRREV_B16_gfx10:
4661 case AMDGPU::V_ASHRREV_I16_e32:
4662 case AMDGPU::V_ASHRREV_I16_e64:
4663 case AMDGPU::V_ASHRREV_I16_e32_vi:
4664 case AMDGPU::V_ASHRREV_I16_e64_vi:
4665 case AMDGPU::V_ASHRREV_I16_gfx10:
4667 case AMDGPU::V_LSHLREV_B64_e64:
4668 case AMDGPU::V_LSHLREV_B64_gfx10:
4669 case AMDGPU::V_LSHLREV_B64_vi:
4671 case AMDGPU::V_LSHRREV_B64_e64:
4672 case AMDGPU::V_LSHRREV_B64_gfx10:
4673 case AMDGPU::V_LSHRREV_B64_vi:
4675 case AMDGPU::V_ASHRREV_I64_e64:
4676 case AMDGPU::V_ASHRREV_I64_gfx10:
4677 case AMDGPU::V_ASHRREV_I64_vi:
4679 case AMDGPU::V_PK_LSHLREV_B16:
4680 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4681 case AMDGPU::V_PK_LSHLREV_B16_vi:
4683 case AMDGPU::V_PK_LSHRREV_B16:
4684 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4685 case AMDGPU::V_PK_LSHRREV_B16_vi:
4686 case AMDGPU::V_PK_ASHRREV_I16:
4687 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4688 case AMDGPU::V_PK_ASHRREV_I16_vi:
4695bool AMDGPUAsmParser::validateLdsDirect(
const MCInst &Inst,
4697 using namespace SIInstrFlags;
4698 const unsigned Opcode = Inst.
getOpcode();
4699 const MCInstrDesc &
Desc = MII.
get(Opcode);
4704 if ((
Desc.TSFlags & Enc) == 0)
4707 for (
auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4708 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4712 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4715 Error(getOperandLoc(Operands, SrcIdx),
4716 "lds_direct is not supported on this GPU");
4721 Error(getOperandLoc(Operands, SrcIdx),
4722 "lds_direct cannot be used with this instruction");
4726 if (SrcName != OpName::src0) {
4727 Error(getOperandLoc(Operands, SrcIdx),
4728 "lds_direct may be used as src0 only");
4737SMLoc AMDGPUAsmParser::getFlatOffsetLoc(
const OperandVector &Operands)
const {
4738 for (
unsigned i = 1, e = Operands.
size(); i != e; ++i) {
4739 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
4740 if (
Op.isFlatOffset())
4741 return Op.getStartLoc();
4746bool AMDGPUAsmParser::validateOffset(
const MCInst &Inst,
4749 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4755 return validateFlatOffset(Inst, Operands);
4758 return validateSMEMOffset(Inst, Operands);
4764 const unsigned OffsetSize = 24;
4765 if (!
isUIntN(OffsetSize - 1,
Op.getImm())) {
4766 Error(getFlatOffsetLoc(Operands),
4767 Twine(
"expected a ") + Twine(OffsetSize - 1) +
4768 "-bit unsigned offset for buffer ops");
4772 const unsigned OffsetSize = 16;
4773 if (!
isUIntN(OffsetSize,
Op.getImm())) {
4774 Error(getFlatOffsetLoc(Operands),
4775 Twine(
"expected a ") + Twine(OffsetSize) +
"-bit unsigned offset");
4782bool AMDGPUAsmParser::validateFlatOffset(
const MCInst &Inst,
4789 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4793 if (!hasFlatOffsets() &&
Op.getImm() != 0) {
4794 Error(getFlatOffsetLoc(Operands),
4795 "flat offset modifier is not supported on this GPU");
4802 bool AllowNegative =
4805 if (!
isIntN(OffsetSize,
Op.getImm()) || (!AllowNegative &&
Op.getImm() < 0)) {
4806 Error(getFlatOffsetLoc(Operands),
4807 Twine(
"expected a ") +
4808 (AllowNegative ? Twine(OffsetSize) +
"-bit signed offset"
4809 : Twine(OffsetSize - 1) +
"-bit unsigned offset"));
4816SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(
const OperandVector &Operands)
const {
4818 for (
unsigned i = 2, e = Operands.
size(); i != e; ++i) {
4819 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
4820 if (
Op.isSMEMOffset() ||
Op.isSMEMOffsetMod())
4821 return Op.getStartLoc();
4826bool AMDGPUAsmParser::validateSMEMOffset(
const MCInst &Inst,
4836 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4850 Error(getSMEMOffsetLoc(Operands),
4852 ?
"expected a 23-bit unsigned offset for buffer ops"
4853 :
isGFX12Plus() ?
"expected a 24-bit signed offset"
4854 : (
isVI() || IsBuffer) ?
"expected a 20-bit unsigned offset"
4855 :
"expected a 21-bit signed offset");
4860bool AMDGPUAsmParser::validateSOPLiteral(
const MCInst &Inst,
4863 const MCInstrDesc &
Desc = MII.
get(Opcode);
4867 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4868 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4870 const int OpIndices[] = { Src0Idx, Src1Idx };
4872 unsigned NumExprs = 0;
4873 unsigned NumLiterals = 0;
4876 for (
int OpIdx : OpIndices) {
4877 if (
OpIdx == -1)
break;
4883 std::optional<int64_t>
Imm;
4886 }
else if (MO.
isExpr()) {
4895 if (!
Imm.has_value()) {
4897 }
else if (!isInlineConstant(Inst,
OpIdx)) {
4901 if (NumLiterals == 0 || LiteralValue !=
Value) {
4909 if (NumLiterals + NumExprs <= 1)
4912 Error(getOperandLoc(Operands, Src1Idx),
4913 "only one unique literal operand is allowed");
4917bool AMDGPUAsmParser::validateOpSel(
const MCInst &Inst) {
4920 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
4930 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
4931 if (OpSelIdx != -1) {
4935 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel_hi);
4936 if (OpSelHiIdx != -1) {
4945 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
4955 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
4956 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
4957 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
4958 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel_hi);
4960 const MCOperand &Src0 = Inst.
getOperand(Src0Idx);
4961 const MCOperand &Src1 = Inst.
getOperand(Src1Idx);
4967 auto VerifyOneSGPR = [
OpSel, OpSelHi](
unsigned Index) ->
bool {
4969 return ((OpSel & Mask) == 0) && ((OpSelHi &
Mask) == 0);
4979 int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
4980 if (Src2Idx != -1) {
4981 const MCOperand &Src2 = Inst.
getOperand(Src2Idx);
4991bool AMDGPUAsmParser::validateTrue16OpSel(
const MCInst &Inst) {
4992 if (!hasTrue16Insts())
4994 const MCRegisterInfo *MRI = getMRI();
4996 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
5002 if (OpSelOpValue == 0)
5004 unsigned OpCount = 0;
5005 for (AMDGPU::OpName OpName : {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
5006 AMDGPU::OpName::src2, AMDGPU::OpName::vdst}) {
5007 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), OpName);
5014 bool OpSelOpIsHi = ((OpSelOpValue & (1 << OpCount)) != 0);
5015 if (OpSelOpIsHi != VGPRSuffixIsHi)
5024bool AMDGPUAsmParser::validateNeg(
const MCInst &Inst, AMDGPU::OpName OpName) {
5025 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
5038 int NegIdx = AMDGPU::getNamedOperandIdx(
Opc, OpName);
5049 const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
5050 AMDGPU::OpName::src1_modifiers,
5051 AMDGPU::OpName::src2_modifiers};
5053 for (
unsigned i = 0; i < 3; ++i) {
5063bool AMDGPUAsmParser::validateDPP(
const MCInst &Inst,
5066 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dpp_ctrl);
5067 if (DppCtrlIdx >= 0) {
5074 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
5075 Error(S,
isGFX12() ?
"DP ALU dpp only supports row_share"
5076 :
"DP ALU dpp only supports row_newbcast");
5081 int Dpp8Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dpp8);
5082 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
5085 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
5087 const MCOperand &Src1 = Inst.
getOperand(Src1Idx);
5090 Error(getOperandLoc(Operands, Src1Idx),
5091 "invalid operand for instruction");
5095 Error(getInstLoc(Operands),
5096 "src1 immediate operand invalid for instruction");
5106bool AMDGPUAsmParser::validateVccOperand(MCRegister
Reg)
const {
5107 return (
Reg == AMDGPU::VCC && isWave64()) ||
5108 (
Reg == AMDGPU::VCC_LO && isWave32());
5112bool AMDGPUAsmParser::validateVOPLiteral(
const MCInst &Inst,
5115 const MCInstrDesc &
Desc = MII.
get(Opcode);
5116 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
5118 !HasMandatoryLiteral && !
isVOPD(Opcode))
5123 std::optional<unsigned> LiteralOpIdx;
5126 for (
int OpIdx : OpIndices) {
5136 std::optional<int64_t>
Imm;
5142 bool IsAnotherLiteral =
false;
5143 bool IsForcedLit = findMCOperand(Operands,
OpIdx).isForcedLit();
5144 bool IsForcedLit64 = findMCOperand(Operands,
OpIdx).isForcedLit64();
5145 if (!
Imm.has_value()) {
5147 IsAnotherLiteral =
true;
5148 }
else if (IsForcedLit || IsForcedLit64 || !isInlineConstant(Inst,
OpIdx)) {
5153 HasMandatoryLiteral);
5161 (IsForcedLit64 && !HasMandatoryLiteral)) &&
5162 (!has64BitLiterals() ||
Desc.getSize() != 4)) {
5164 "invalid operand for instruction");
5169 if (!IsForcedFP64 && (IsForcedLit64 || !IsValid32Op) &&
5170 OpIdx != getNamedOperandIdx(Opcode, OpName::src0)) {
5172 "invalid operand for instruction");
5176 if (IsFP64 && IsValid32Op && !IsForcedFP64)
5183 if (IsAnotherLiteral && !HasMandatoryLiteral &&
5184 !getFeatureBits()[FeatureVOP3Literal]) {
5186 "literal operands are not supported");
5190 if (LiteralOpIdx && IsAnotherLiteral) {
5191 Error(getLaterLoc(getOperandLoc(Operands,
OpIdx),
5192 getOperandLoc(Operands, *LiteralOpIdx)),
5193 "only one unique literal operand is allowed");
5197 if (IsAnotherLiteral)
5198 LiteralOpIdx =
OpIdx;
5221bool AMDGPUAsmParser::validateAGPRLdSt(
const MCInst &Inst)
const {
5229 ? AMDGPU::OpName::data0
5230 : AMDGPU::OpName::vdata;
5232 const MCRegisterInfo *MRI = getMRI();
5233 int DstAreg =
IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
5237 int Data2Areg =
IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
5238 if (Data2Areg >= 0 && Data2Areg != DataAreg)
5242 auto FB = getFeatureBits();
5243 if (FB[AMDGPU::FeatureGFX90AInsts]) {
5244 if (DataAreg < 0 || DstAreg < 0)
5246 return DstAreg == DataAreg;
5249 return DstAreg < 1 && DataAreg < 1;
5252bool AMDGPUAsmParser::validateVGPRAlign(
const MCInst &Inst)
const {
5253 auto FB = getFeatureBits();
5254 if (!FB[AMDGPU::FeatureRequiresAlignedVGPRs])
5258 const MCRegisterInfo *MRI = getMRI();
5261 if (FB[AMDGPU::FeatureGFX90AInsts] &&
Opc == AMDGPU::DS_READ_B96_TR_B6_vi)
5264 if (FB[AMDGPU::FeatureGFX1250Insts]) {
5268 case AMDGPU::DS_LOAD_TR6_B96:
5269 case AMDGPU::DS_LOAD_TR6_B96_gfx12:
5273 case AMDGPU::GLOBAL_LOAD_TR6_B96:
5274 case AMDGPU::GLOBAL_LOAD_TR6_B96_gfx1250: {
5278 int VAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
5279 if (VAddrIdx != -1) {
5282 if ((
Sub - AMDGPU::VGPR0) & 1)
5287 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR:
5288 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR_gfx1250:
5293 const MCRegisterClass &VGPR32 = MRI->
getRegClass(AMDGPU::VGPR_32RegClassID);
5294 const MCRegisterClass &AGPR32 = MRI->
getRegClass(AMDGPU::AGPR_32RegClassID);
5313SMLoc AMDGPUAsmParser::getBLGPLoc(
const OperandVector &Operands)
const {
5314 for (
unsigned i = 1, e = Operands.
size(); i != e; ++i) {
5315 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
5317 return Op.getStartLoc();
5322bool AMDGPUAsmParser::validateBLGP(
const MCInst &Inst,
5325 int BlgpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::blgp);
5328 SMLoc BLGPLoc = getBLGPLoc(Operands);
5331 bool IsNeg = StringRef(BLGPLoc.
getPointer()).starts_with(
"neg:");
5332 auto FB = getFeatureBits();
5333 bool UsesNeg =
false;
5334 if (FB[AMDGPU::FeatureGFX940Insts]) {
5336 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
5337 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
5338 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
5339 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
5344 if (IsNeg == UsesNeg)
5348 UsesNeg ?
"invalid modifier: blgp is not supported"
5349 :
"invalid modifier: neg is not supported");
5354bool AMDGPUAsmParser::validateWaitCnt(
const MCInst &Inst,
5360 if (
Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
5361 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
5362 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
5363 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
5366 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::sdst);
5369 if (
Reg == AMDGPU::SGPR_NULL)
5372 Error(getOperandLoc(Operands, Src0Idx),
"src0 must be null");
5376bool AMDGPUAsmParser::validateDS(
const MCInst &Inst,
5382 return validateGWS(Inst, Operands);
5387 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::gds);
5392 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
5393 Error(S,
"gds modifier is not supported on this GPU");
5401bool AMDGPUAsmParser::validateGWS(
const MCInst &Inst,
5403 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
5407 if (
Opc != AMDGPU::DS_GWS_INIT_vi &&
Opc != AMDGPU::DS_GWS_BARRIER_vi &&
5408 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
5411 const MCRegisterInfo *MRI = getMRI();
5412 const MCRegisterClass &VGPR32 = MRI->
getRegClass(AMDGPU::VGPR_32RegClassID);
5414 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::data0);
5417 auto RegIdx =
Reg - (VGPR32.
contains(
Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
5419 Error(getOperandLoc(Operands, Data0Pos),
"vgpr must be even aligned");
5426bool AMDGPUAsmParser::validateCoherencyBits(
const MCInst &Inst,
5429 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.
getOpcode(),
5430 AMDGPU::OpName::cpol);
5438 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5441 Error(S,
"scale_offset is not supported on this GPU");
5444 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5447 Error(S,
"nv is not supported on this GPU");
5452 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5455 Error(S,
"scale_offset is not supported for this instruction");
5459 return validateTHAndScopeBits(Inst, Operands, CPol);
5464 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5465 Error(S,
"cache policy is not supported for SMRD instructions");
5469 Error(IDLoc,
"invalid cache policy for SMEM instruction");
5478 if (!(TSFlags & AllowSCCModifier)) {
5479 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5483 "scc modifier is not supported for this instruction on this GPU");
5494 :
"instruction must use glc");
5499 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5502 &CStr.data()[CStr.find(
isGFX940() ?
"sc0" :
"glc")]);
5504 :
"instruction must not use glc");
5512bool AMDGPUAsmParser::validateTHAndScopeBits(
const MCInst &Inst,
5514 const unsigned CPol) {
5518 const unsigned Opcode = Inst.
getOpcode();
5519 const MCInstrDesc &TID = MII.
get(Opcode);
5522 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5529 return PrintError(
"th:TH_ATOMIC_RETURN requires a destination operand");
5534 return PrintError(
"instruction must use th:TH_ATOMIC_RETURN");
5542 return PrintError(
"invalid th value for SMEM instruction");
5549 return PrintError(
"scope and th combination is not valid");
5555 return PrintError(
"invalid th value for atomic instructions");
5558 return PrintError(
"invalid th value for store instructions");
5561 return PrintError(
"invalid th value for load instructions");
5567bool AMDGPUAsmParser::validateTFE(
const MCInst &Inst,
5570 if (
Desc.mayStore() &&
5572 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5573 if (Loc != getInstLoc(Operands)) {
5574 Error(Loc,
"TFE modifier has no meaning for store instructions");
5582bool AMDGPUAsmParser::validateWMMA(
const MCInst &Inst,
5588 auto validateFmt = [&](AMDGPU::OpName FmtOp, AMDGPU::OpName SrcOp) ->
bool {
5589 int FmtIdx = AMDGPU::getNamedOperandIdx(
Opc, FmtOp);
5593 int SrcIdx = AMDGPU::getNamedOperandIdx(
Opc, SrcOp);
5601 Error(getOperandLoc(Operands, SrcIdx),
5602 "wrong register tuple size for " +
5607 return validateFmt(AMDGPU::OpName::matrix_a_fmt, AMDGPU::OpName::src0) &&
5608 validateFmt(AMDGPU::OpName::matrix_b_fmt, AMDGPU::OpName::src1);
5611bool AMDGPUAsmParser::validateInstruction(
const MCInst &Inst, SMLoc IDLoc,
5613 if (!validateLdsDirect(Inst, Operands))
5615 if (!validateTrue16OpSel(Inst)) {
5616 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5617 "op_sel operand conflicts with 16-bit operand suffix");
5620 if (!validateSOPLiteral(Inst, Operands))
5622 if (!validateVOPLiteral(Inst, Operands)) {
5625 if (!validateConstantBusLimitations(Inst, Operands)) {
5628 if (!validateVOPD(Inst, Operands)) {
5631 if (!validateIntClampSupported(Inst)) {
5632 Error(getImmLoc(AMDGPUOperand::ImmTyClamp, Operands),
5633 "integer clamping is not supported on this GPU");
5636 if (!validateOpSel(Inst)) {
5637 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5638 "invalid op_sel operand");
5641 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5642 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5643 "invalid neg_lo operand");
5646 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5647 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5648 "invalid neg_hi operand");
5651 if (!validateDPP(Inst, Operands)) {
5655 if (!validateMIMGD16(Inst)) {
5656 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5657 "d16 modifier is not supported on this GPU");
5660 if (!validateMIMGDim(Inst, Operands)) {
5661 Error(IDLoc,
"missing dim operand");
5664 if (!validateTensorR128(Inst)) {
5665 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5666 "instruction must set modifier r128=0");
5669 if (!validateMIMGMSAA(Inst)) {
5670 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5671 "invalid dim; must be MSAA type");
5674 if (!validateMIMGDataSize(Inst, IDLoc)) {
5677 if (!validateMIMGAddrSize(Inst, IDLoc))
5679 if (!validateMIMGAtomicDMask(Inst)) {
5680 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5681 "invalid atomic image dmask");
5684 if (!validateMIMGGatherDMask(Inst)) {
5685 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5686 "invalid image_gather dmask: only one bit must be set");
5689 if (!validateMovrels(Inst, Operands)) {
5692 if (!validateOffset(Inst, Operands)) {
5695 if (!validateMAIAccWrite(Inst, Operands)) {
5698 if (!validateMAISrc2(Inst, Operands)) {
5701 if (!validateMFMA(Inst, Operands)) {
5704 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5708 if (!validateAGPRLdSt(Inst)) {
5709 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5710 ?
"invalid register class: data and dst should be all VGPR or AGPR"
5711 :
"invalid register class: agpr loads and stores not supported on this GPU"
5715 if (!validateVGPRAlign(Inst)) {
5717 "invalid register class: vgpr tuples must be 64 bit aligned");
5720 if (!validateDS(Inst, Operands)) {
5724 if (!validateBLGP(Inst, Operands)) {
5728 if (!validateDivScale(Inst)) {
5729 Error(IDLoc,
"ABS not allowed in VOP3B instructions");
5732 if (!validateWaitCnt(Inst, Operands)) {
5735 if (!validateTFE(Inst, Operands)) {
5738 if (!validateWMMA(Inst, Operands)) {
5747 unsigned VariantID = 0);
5751 unsigned VariantID);
5753bool AMDGPUAsmParser::isSupportedMnemo(
StringRef Mnemo,
5758bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5759 const FeatureBitset &FBS,
5760 ArrayRef<unsigned> Variants) {
5761 for (
auto Variant : Variants) {
5769bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5771 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5774 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5779 getParser().clearPendingErrors();
5783 StringRef VariantName = getMatchedVariantName();
5784 if (!VariantName.
empty() && isSupportedMnemo(Mnemo, FBS)) {
5787 " variant of this instruction is not supported"));
5791 if (
isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5792 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5794 FeatureBitset FeaturesWS32 = getFeatureBits();
5795 FeaturesWS32.
flip(AMDGPU::FeatureWavefrontSize64)
5796 .
flip(AMDGPU::FeatureWavefrontSize32);
5797 FeatureBitset AvailableFeaturesWS32 =
5798 ComputeAvailableFeatures(FeaturesWS32);
5800 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5801 return Error(IDLoc,
"instruction requires wavesize=32");
5805 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5806 return Error(IDLoc,
"instruction not supported on this GPU (" +
5807 getSTI().
getCPU() +
")" +
": " + Mnemo);
5812 return Error(IDLoc,
"invalid instruction" + Suggestion);
5818 const auto &
Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5819 if (
Op.isToken() && InvalidOprIdx > 1) {
5820 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5821 return PrevOp.isToken() && PrevOp.getToken() ==
"::";
5826bool AMDGPUAsmParser::matchAndEmitInstruction(SMLoc IDLoc,
unsigned &Opcode,
5829 uint64_t &ErrorInfo,
5830 bool MatchingInlineAsm) {
5833 unsigned Result = Match_Success;
5834 for (
auto Variant : getMatchedVariants()) {
5836 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5841 if (R == Match_Success || R == Match_MissingFeature ||
5842 (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5843 (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5844 Result != Match_MissingFeature)) {
5848 if (R == Match_Success)
5852 if (Result == Match_Success) {
5853 if (!validateInstruction(Inst, IDLoc, Operands)) {
5860 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).
getToken();
5861 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5867 case Match_MissingFeature:
5871 return Error(IDLoc,
"operands are not valid for this GPU or mode");
5873 case Match_InvalidOperand: {
5874 SMLoc ErrorLoc = IDLoc;
5875 if (ErrorInfo != ~0ULL) {
5876 if (ErrorInfo >= Operands.
size()) {
5877 return Error(IDLoc,
"too few operands for instruction");
5879 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5880 if (ErrorLoc == SMLoc())
5884 return Error(ErrorLoc,
"invalid VOPDY instruction");
5886 return Error(ErrorLoc,
"invalid operand for instruction");
5889 case Match_MnemonicFail:
5895bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5900 if (getParser().parseAbsoluteExpression(Tmp)) {
5903 Ret =
static_cast<uint32_t
>(Tmp);
5907bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5908 if (!getSTI().getTargetTriple().isAMDGCN())
5909 return TokError(
"directive only supported for amdgcn architecture");
5911 std::string TargetIDDirective;
5912 SMLoc TargetStart = getTok().getLoc();
5913 if (getParser().parseEscapedString(TargetIDDirective))
5916 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5917 if (getTargetStreamer().getTargetID()->
toString() != TargetIDDirective)
5918 return getParser().Error(TargetRange.
Start,
5919 (Twine(
".amdgcn_target directive's target id ") +
5920 Twine(TargetIDDirective) +
5921 Twine(
" does not match the specified target id ") +
5922 Twine(getTargetStreamer().getTargetID()->
toString())).str());
5927bool AMDGPUAsmParser::OutOfRangeError(SMRange
Range) {
5931bool AMDGPUAsmParser::calculateGPRBlocks(
5932 const FeatureBitset &Features,
const MCExpr *VCCUsed,
5933 const MCExpr *FlatScrUsed,
bool XNACKUsed,
5934 std::optional<bool> EnableWavefrontSize32,
const MCExpr *NextFreeVGPR,
5935 SMRange VGPRRange,
const MCExpr *NextFreeSGPR, SMRange SGPRRange,
5936 const MCExpr *&VGPRBlocks,
const MCExpr *&SGPRBlocks) {
5942 const MCExpr *
NumSGPRs = NextFreeSGPR;
5943 int64_t EvaluatedSGPRs;
5950 if (
NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
Version.Major >= 8 &&
5951 !Features.
test(FeatureSGPRInitBug) &&
5952 static_cast<uint64_t
>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5953 return OutOfRangeError(SGPRRange);
5955 const MCExpr *ExtraSGPRs =
5959 if (
NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
5960 (
Version.Major <= 7 || Features.
test(FeatureSGPRInitBug)) &&
5961 static_cast<uint64_t
>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5962 return OutOfRangeError(SGPRRange);
5964 if (Features.
test(FeatureSGPRInitBug))
5971 auto GetNumGPRBlocks = [&Ctx](
const MCExpr *NumGPR,
5972 unsigned Granule) ->
const MCExpr * {
5976 const MCExpr *AlignToGPR =
5978 const MCExpr *DivGPR =
5984 VGPRBlocks = GetNumGPRBlocks(
5993bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5994 if (!getSTI().getTargetTriple().isAMDGCN())
5995 return TokError(
"directive only supported for amdgcn architecture");
5998 return TokError(
"directive only supported for amdhsa OS");
6000 StringRef KernelName;
6001 if (getParser().parseIdentifier(KernelName))
6004 AMDGPU::MCKernelDescriptor KD =
6016 const MCExpr *NextFreeVGPR = ZeroExpr;
6018 const MCExpr *NamedBarCnt = ZeroExpr;
6019 uint64_t SharedVGPRCount = 0;
6020 uint64_t PreloadLength = 0;
6021 uint64_t PreloadOffset = 0;
6023 const MCExpr *NextFreeSGPR = ZeroExpr;
6026 unsigned ImpliedUserSGPRCount = 0;
6030 std::optional<unsigned> ExplicitUserSGPRCount;
6031 const MCExpr *ReserveVCC = OneExpr;
6032 const MCExpr *ReserveFlatScr = OneExpr;
6033 std::optional<bool> EnableWavefrontSize32;
6039 SMRange IDRange = getTok().getLocRange();
6040 if (!parseId(
ID,
"expected .amdhsa_ directive or .end_amdhsa_kernel"))
6043 if (
ID ==
".end_amdhsa_kernel")
6047 return TokError(
".amdhsa_ directives cannot be repeated");
6049 SMLoc ValStart = getLoc();
6050 const MCExpr *ExprVal;
6051 if (getParser().parseExpression(ExprVal))
6053 SMLoc ValEnd = getLoc();
6054 SMRange ValRange = SMRange(ValStart, ValEnd);
6057 uint64_t Val = IVal;
6058 bool EvaluatableExpr;
6059 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
6061 return OutOfRangeError(ValRange);
6065#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
6066 if (!isUInt<ENTRY##_WIDTH>(Val)) \
6067 return OutOfRangeError(RANGE); \
6068 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
6073#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
6075 return Error(IDRange.Start, "directive should have resolvable expression", \
6078 if (
ID ==
".amdhsa_group_segment_fixed_size") {
6081 return OutOfRangeError(ValRange);
6083 }
else if (
ID ==
".amdhsa_private_segment_fixed_size") {
6086 return OutOfRangeError(ValRange);
6088 }
else if (
ID ==
".amdhsa_kernarg_size") {
6090 return OutOfRangeError(ValRange);
6092 }
else if (
ID ==
".amdhsa_user_sgpr_count") {
6094 ExplicitUserSGPRCount = Val;
6095 }
else if (
ID ==
".amdhsa_user_sgpr_private_segment_buffer") {
6099 "directive is not supported with architected flat scratch",
6102 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
6105 ImpliedUserSGPRCount += 4;
6106 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_preload_length") {
6109 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
6112 return OutOfRangeError(ValRange);
6116 ImpliedUserSGPRCount += Val;
6117 PreloadLength = Val;
6119 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_preload_offset") {
6122 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
6125 return OutOfRangeError(ValRange);
6129 PreloadOffset = Val;
6130 }
else if (
ID ==
".amdhsa_user_sgpr_dispatch_ptr") {
6133 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
6136 ImpliedUserSGPRCount += 2;
6137 }
else if (
ID ==
".amdhsa_user_sgpr_queue_ptr") {
6140 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
6143 ImpliedUserSGPRCount += 2;
6144 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_segment_ptr") {
6147 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
6150 ImpliedUserSGPRCount += 2;
6151 }
else if (
ID ==
".amdhsa_user_sgpr_dispatch_id") {
6154 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
6157 ImpliedUserSGPRCount += 2;
6158 }
else if (
ID ==
".amdhsa_user_sgpr_flat_scratch_init") {
6161 "directive is not supported with architected flat scratch",
6165 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
6168 ImpliedUserSGPRCount += 2;
6169 }
else if (
ID ==
".amdhsa_user_sgpr_private_segment_size") {
6172 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
6175 ImpliedUserSGPRCount += 1;
6176 }
else if (
ID ==
".amdhsa_wavefront_size32") {
6178 if (IVersion.
Major < 10)
6179 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
6180 EnableWavefrontSize32 = Val;
6182 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
6184 }
else if (
ID ==
".amdhsa_uses_dynamic_stack") {
6186 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
6188 }
else if (
ID ==
".amdhsa_system_sgpr_private_segment_wavefront_offset") {
6191 "directive is not supported with architected flat scratch",
6194 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6196 }
else if (
ID ==
".amdhsa_enable_private_segment") {
6200 "directive is not supported without architected flat scratch",
6203 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6205 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_x") {
6207 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
6209 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_y") {
6211 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
6213 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_z") {
6215 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
6217 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_info") {
6219 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
6221 }
else if (
ID ==
".amdhsa_system_vgpr_workitem_id") {
6223 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
6225 }
else if (
ID ==
".amdhsa_next_free_vgpr") {
6226 VGPRRange = ValRange;
6227 NextFreeVGPR = ExprVal;
6228 }
else if (
ID ==
".amdhsa_next_free_sgpr") {
6229 SGPRRange = ValRange;
6230 NextFreeSGPR = ExprVal;
6231 }
else if (
ID ==
".amdhsa_accum_offset") {
6233 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
6234 AccumOffset = ExprVal;
6235 }
else if (
ID ==
".amdhsa_named_barrier_count") {
6237 return Error(IDRange.
Start,
"directive requires gfx1250+", IDRange);
6238 NamedBarCnt = ExprVal;
6239 }
else if (
ID ==
".amdhsa_reserve_vcc") {
6241 return OutOfRangeError(ValRange);
6242 ReserveVCC = ExprVal;
6243 }
else if (
ID ==
".amdhsa_reserve_flat_scratch") {
6244 if (IVersion.
Major < 7)
6245 return Error(IDRange.
Start,
"directive requires gfx7+", IDRange);
6248 "directive is not supported with architected flat scratch",
6251 return OutOfRangeError(ValRange);
6252 ReserveFlatScr = ExprVal;
6253 }
else if (
ID ==
".amdhsa_reserve_xnack_mask") {
6254 if (IVersion.
Major < 8)
6255 return Error(IDRange.
Start,
"directive requires gfx8+", IDRange);
6257 return OutOfRangeError(ValRange);
6258 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
6259 return getParser().Error(IDRange.
Start,
".amdhsa_reserve_xnack_mask does not match target id",
6261 }
else if (
ID ==
".amdhsa_float_round_mode_32") {
6263 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
6265 }
else if (
ID ==
".amdhsa_float_round_mode_16_64") {
6267 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
6269 }
else if (
ID ==
".amdhsa_float_denorm_mode_32") {
6271 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
6273 }
else if (
ID ==
".amdhsa_float_denorm_mode_16_64") {
6275 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
6277 }
else if (
ID ==
".amdhsa_dx10_clamp") {
6278 if (!getSTI().hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
6279 return Error(IDRange.
Start,
"directive unsupported on gfx1170+",
6282 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
6284 }
else if (
ID ==
".amdhsa_ieee_mode") {
6285 if (!getSTI().hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
6286 return Error(IDRange.
Start,
"directive unsupported on gfx1170+",
6289 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
6291 }
else if (
ID ==
".amdhsa_fp16_overflow") {
6292 if (IVersion.
Major < 9)
6293 return Error(IDRange.
Start,
"directive requires gfx9+", IDRange);
6295 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
6297 }
else if (
ID ==
".amdhsa_tg_split") {
6299 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
6302 }
else if (
ID ==
".amdhsa_workgroup_processor_mode") {
6305 "directive unsupported on " + getSTI().
getCPU(), IDRange);
6307 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
6309 }
else if (
ID ==
".amdhsa_memory_ordered") {
6310 if (IVersion.
Major < 10)
6311 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
6313 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
6315 }
else if (
ID ==
".amdhsa_forward_progress") {
6316 if (IVersion.
Major < 10)
6317 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
6319 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
6321 }
else if (
ID ==
".amdhsa_shared_vgpr_count") {
6323 if (IVersion.
Major < 10 || IVersion.
Major >= 12)
6324 return Error(IDRange.
Start,
"directive requires gfx10 or gfx11",
6326 SharedVGPRCount = Val;
6328 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
6330 }
else if (
ID ==
".amdhsa_inst_pref_size") {
6331 if (IVersion.
Major < 11)
6332 return Error(IDRange.
Start,
"directive requires gfx11+", IDRange);
6333 if (IVersion.
Major == 11) {
6335 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, ExprVal,
6339 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, ExprVal,
6342 }
else if (
ID ==
".amdhsa_exception_fp_ieee_invalid_op") {
6345 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
6347 }
else if (
ID ==
".amdhsa_exception_fp_denorm_src") {
6349 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
6351 }
else if (
ID ==
".amdhsa_exception_fp_ieee_div_zero") {
6354 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
6356 }
else if (
ID ==
".amdhsa_exception_fp_ieee_overflow") {
6358 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
6360 }
else if (
ID ==
".amdhsa_exception_fp_ieee_underflow") {
6362 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
6364 }
else if (
ID ==
".amdhsa_exception_fp_ieee_inexact") {
6366 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
6368 }
else if (
ID ==
".amdhsa_exception_int_div_zero") {
6370 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
6372 }
else if (
ID ==
".amdhsa_round_robin_scheduling") {
6373 if (IVersion.
Major < 12)
6374 return Error(IDRange.
Start,
"directive requires gfx12+", IDRange);
6376 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
6379 return Error(IDRange.
Start,
"unknown .amdhsa_kernel directive", IDRange);
6382#undef PARSE_BITS_ENTRY
6385 if (!Seen.
contains(
".amdhsa_next_free_vgpr"))
6386 return TokError(
".amdhsa_next_free_vgpr directive is required");
6388 if (!Seen.
contains(
".amdhsa_next_free_sgpr"))
6389 return TokError(
".amdhsa_next_free_sgpr directive is required");
6391 unsigned UserSGPRCount = ExplicitUserSGPRCount.value_or(ImpliedUserSGPRCount);
6393 return TokError(
"too many user SGPRs enabled, found " +
6394 Twine(UserSGPRCount) +
", but only " +
6400 if (PreloadLength) {
6406 const MCExpr *VGPRBlocks;
6407 const MCExpr *SGPRBlocks;
6408 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
6409 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
6410 EnableWavefrontSize32, NextFreeVGPR,
6411 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
6415 int64_t EvaluatedVGPRBlocks;
6416 bool VGPRBlocksEvaluatable =
6417 VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);
6418 if (VGPRBlocksEvaluatable &&
6420 static_cast<uint64_t
>(EvaluatedVGPRBlocks))) {
6421 return OutOfRangeError(VGPRRange);
6425 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
6426 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT,
getContext());
6428 int64_t EvaluatedSGPRBlocks;
6429 if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&
6431 static_cast<uint64_t
>(EvaluatedSGPRBlocks)))
6432 return OutOfRangeError(SGPRRange);
6435 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
6436 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
getContext());
6438 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
6439 return TokError(
"amdgpu_user_sgpr_count smaller than implied by "
6440 "enabled user SGPRs");
6446 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
6447 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT,
getContext());
6452 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,
6453 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT,
getContext());
6458 return TokError(
"Kernarg size should be resolvable");
6459 uint64_t kernarg_size = IVal;
6460 if (PreloadLength && kernarg_size &&
6461 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
6462 return TokError(
"Kernarg preload length + offset is larger than the "
6463 "kernarg segment size");
6466 if (!Seen.
contains(
".amdhsa_accum_offset"))
6467 return TokError(
".amdhsa_accum_offset directive is required");
6468 int64_t EvaluatedAccum;
6469 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);
6470 uint64_t UEvaluatedAccum = EvaluatedAccum;
6471 if (AccumEvaluatable &&
6472 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
6473 return TokError(
"accum_offset should be in range [4..256] in "
6476 int64_t EvaluatedNumVGPR;
6477 if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&
6480 alignTo(std::max((uint64_t)1, (uint64_t)EvaluatedNumVGPR), 4))
6481 return TokError(
"accum_offset exceeds total VGPR allocation");
6487 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
6488 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
6494 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
6495 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
6498 if (IVersion.
Major >= 10 && IVersion.
Major < 12) {
6500 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
6501 return TokError(
"shared_vgpr_count directive not valid on "
6502 "wavefront size 32");
6505 if (VGPRBlocksEvaluatable &&
6506 (SharedVGPRCount * 2 +
static_cast<uint64_t
>(EvaluatedVGPRBlocks) >
6508 return TokError(
"shared_vgpr_count*2 + "
6509 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
6514 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
6515 NextFreeVGPR, NextFreeSGPR,
6516 ReserveVCC, ReserveFlatScr);
6520bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
6522 if (ParseAsAbsoluteExpression(
Version))
6525 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(
Version);
6529bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef
ID,
6530 AMDGPUMCKernelCodeT &
C) {
6533 if (
ID ==
"max_scratch_backing_memory_byte_size") {
6534 Parser.eatToEndOfStatement();
6538 SmallString<40> ErrStr;
6539 raw_svector_ostream Err(ErrStr);
6540 if (!
C.ParseKernelCodeT(
ID, getParser(), Err)) {
6541 return TokError(Err.
str());
6545 if (
ID ==
"enable_wavefront_size32") {
6548 return TokError(
"enable_wavefront_size32=1 is only allowed on GFX10+");
6550 return TokError(
"enable_wavefront_size32=1 requires +WavefrontSize32");
6553 return TokError(
"enable_wavefront_size32=0 requires +WavefrontSize64");
6557 if (
ID ==
"wavefront_size") {
6558 if (
C.wavefront_size == 5) {
6560 return TokError(
"wavefront_size=5 is only allowed on GFX10+");
6562 return TokError(
"wavefront_size=5 requires +WavefrontSize32");
6563 }
else if (
C.wavefront_size == 6) {
6565 return TokError(
"wavefront_size=6 requires +WavefrontSize64");
6572bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
6573 AMDGPUMCKernelCodeT KernelCode;
6582 if (!parseId(
ID,
"expected value identifier or .end_amd_kernel_code_t"))
6585 if (
ID ==
".end_amd_kernel_code_t")
6588 if (ParseAMDKernelCodeTValue(
ID, KernelCode))
6593 getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
6598bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
6599 StringRef KernelName;
6600 if (!parseId(KernelName,
"expected symbol name"))
6603 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
6610bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6611 if (!getSTI().getTargetTriple().isAMDGCN()) {
6612 return Error(getLoc(),
6613 ".amd_amdgpu_isa directive is not available on non-amdgcn "
6617 auto TargetIDDirective = getLexer().getTok().getStringContents();
6618 if (getTargetStreamer().getTargetID()->
toString() != TargetIDDirective)
6619 return Error(getParser().getTok().getLoc(),
"target id must match options");
6621 getTargetStreamer().EmitISAVersion();
6627bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6630 std::string HSAMetadataString;
6635 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6636 return Error(getLoc(),
"invalid HSA metadata");
6643bool AMDGPUAsmParser::ParseToEndDirective(
const char *AssemblerDirectiveBegin,
6644 const char *AssemblerDirectiveEnd,
6645 std::string &CollectString) {
6647 raw_string_ostream CollectStream(CollectString);
6649 getLexer().setSkipSpace(
false);
6651 bool FoundEnd =
false;
6654 CollectStream << getTokenStr();
6658 if (trySkipId(AssemblerDirectiveEnd)) {
6663 CollectStream << Parser.parseStringToEndOfStatement()
6664 <<
getContext().getAsmInfo().getSeparatorString();
6666 Parser.eatToEndOfStatement();
6669 getLexer().setSkipSpace(
true);
6672 return TokError(Twine(
"expected directive ") +
6673 Twine(AssemblerDirectiveEnd) + Twine(
" not found"));
6680bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6686 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6687 if (!PALMetadata->setFromString(
String))
6688 return Error(getLoc(),
"invalid PAL metadata");
6693bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6695 return Error(getLoc(),
6697 "not available on non-amdpal OSes")).str());
6700 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6701 PALMetadata->setLegacy();
6704 if (ParseAsAbsoluteExpression(
Key)) {
6705 return TokError(Twine(
"invalid value in ") +
6709 return TokError(Twine(
"expected an even number of values in ") +
6712 if (ParseAsAbsoluteExpression(
Value)) {
6713 return TokError(Twine(
"invalid value in ") +
6716 PALMetadata->setRegister(
Key,
Value);
6725bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6726 if (getParser().checkForValidSection())
6730 SMLoc NameLoc = getLoc();
6731 if (getParser().parseIdentifier(Name))
6732 return TokError(
"expected identifier in directive");
6735 if (getParser().parseComma())
6741 SMLoc SizeLoc = getLoc();
6742 if (getParser().parseAbsoluteExpression(
Size))
6745 return Error(SizeLoc,
"size must be non-negative");
6746 if (
Size > LocalMemorySize)
6747 return Error(SizeLoc,
"size is too large");
6749 int64_t Alignment = 4;
6751 SMLoc AlignLoc = getLoc();
6752 if (getParser().parseAbsoluteExpression(Alignment))
6755 return Error(AlignLoc,
"alignment must be a power of two");
6760 if (Alignment >= 1u << 31)
6761 return Error(AlignLoc,
"alignment is too large");
6767 Symbol->redefineIfPossible();
6768 if (!
Symbol->isUndefined())
6769 return Error(NameLoc,
"invalid symbol redefinition");
6771 getTargetStreamer().emitAMDGPULDS(Symbol,
Size,
Align(Alignment));
6775bool AMDGPUAsmParser::ParseDirectiveAMDGPUInfo() {
6776 if (getParser().checkForValidSection())
6780 if (getParser().parseIdentifier(FuncName))
6781 return TokError(
"expected symbol name after .amdgpu_info");
6784 AMDGPU::InfoSectionData ParsedInfoData;
6785 AMDGPU::FuncInfo FI;
6787 bool HasScalarAttrs =
false;
6794 SMLoc IDLoc = getLoc();
6795 if (!parseId(
ID,
"expected directive or .end_amdgpu_info"))
6798 if (
ID ==
".end_amdgpu_info")
6806 return Error(IDLoc,
"unknown .amdgpu_info directive '" +
ID +
"'");
6808 if (Dir ==
"flags") {
6810 if (getParser().parseAbsoluteExpression(Val))
6813 FI.
UsesVCC = !!(
Flags & AMDGPU::FuncInfoFlags::FUNC_USES_VCC);
6815 !!(
Flags & AMDGPU::FuncInfoFlags::FUNC_USES_FLAT_SCRATCH);
6817 HasScalarAttrs =
true;
6818 }
else if (Dir ==
"num_sgpr") {
6820 if (getParser().parseAbsoluteExpression(Val))
6822 FI.
NumSGPR =
static_cast<uint32_t
>(Val);
6823 HasScalarAttrs =
true;
6824 }
else if (Dir ==
"num_vgpr") {
6826 if (getParser().parseAbsoluteExpression(Val))
6829 HasScalarAttrs =
true;
6830 }
else if (Dir ==
"num_agpr") {
6832 if (getParser().parseAbsoluteExpression(Val))
6835 HasScalarAttrs =
true;
6836 }
else if (Dir ==
"private_segment_size") {
6838 if (getParser().parseAbsoluteExpression(Val))
6841 HasScalarAttrs =
true;
6842 }
else if (Dir ==
"use") {
6844 if (getParser().parseIdentifier(ResName))
6845 return TokError(
"expected resource symbol for .amdgpu_use");
6846 ParsedInfoData.
Uses.push_back(
6847 {FuncSym,
getContext().getOrCreateSymbol(ResName)});
6848 }
else if (Dir ==
"call") {
6850 if (getParser().parseIdentifier(DstName))
6851 return TokError(
"expected callee symbol for .amdgpu_call");
6852 ParsedInfoData.
Calls.push_back(
6853 {FuncSym,
getContext().getOrCreateSymbol(DstName)});
6854 }
else if (Dir ==
"indirect_call") {
6856 if (getParser().parseEscapedString(TypeId))
6857 return TokError(
"expected type ID string for .amdgpu_indirect_call");
6858 ParsedInfoData.
IndirectCalls.push_back({FuncSym, std::move(TypeId)});
6859 }
else if (Dir ==
"typeid") {
6861 if (getParser().parseEscapedString(TypeId))
6862 return TokError(
"expected type ID string for .amdgpu_typeid");
6863 ParsedInfoData.
TypeIds.push_back({FuncSym, std::move(TypeId)});
6865 return Error(IDLoc,
"unknown .amdgpu_info directive '" +
ID +
"'");
6870 ParsedInfoData.
Funcs.push_back(std::move(FI));
6872 AMDGPU::InfoSectionData &
Data = InfoData ? *InfoData : InfoData.emplace();
6873 for (AMDGPU::FuncInfo &Func : ParsedInfoData.
Funcs)
6874 Data.Funcs.push_back(std::move(Func));
6875 for (std::pair<MCSymbol *, MCSymbol *> &Use : ParsedInfoData.
Uses)
6876 Data.Uses.push_back(Use);
6877 for (std::pair<MCSymbol *, MCSymbol *> &
Call : ParsedInfoData.
Calls)
6879 for (std::pair<MCSymbol *, std::string> &
IndirectCall :
6882 for (std::pair<MCSymbol *, std::string> &TypeId : ParsedInfoData.
TypeIds)
6883 Data.TypeIds.push_back(std::move(TypeId));
6888void AMDGPUAsmParser::onEndOfFile() {
6890 getTargetStreamer().emitAMDGPUInfo(*InfoData);
6893bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6894 StringRef IDVal = DirectiveID.
getString();
6897 if (IDVal ==
".amdhsa_kernel")
6898 return ParseDirectiveAMDHSAKernel();
6900 if (IDVal ==
".amdhsa_code_object_version")
6901 return ParseDirectiveAMDHSACodeObjectVersion();
6905 return ParseDirectiveHSAMetadata();
6907 if (IDVal ==
".amd_kernel_code_t")
6908 return ParseDirectiveAMDKernelCodeT();
6910 if (IDVal ==
".amdgpu_hsa_kernel")
6911 return ParseDirectiveAMDGPUHsaKernel();
6913 if (IDVal ==
".amd_amdgpu_isa")
6914 return ParseDirectiveISAVersion();
6918 Twine(
" directive is "
6919 "not available on non-amdhsa OSes"))
6924 if (IDVal ==
".amdgcn_target")
6925 return ParseDirectiveAMDGCNTarget();
6927 if (IDVal ==
".amdgpu_lds")
6928 return ParseDirectiveAMDGPULDS();
6930 if (IDVal ==
".amdgpu_info")
6931 return ParseDirectiveAMDGPUInfo();
6934 return ParseDirectivePALMetadataBegin();
6937 return ParseDirectivePALMetadata();
6942bool AMDGPUAsmParser::subtargetHasRegister(
const MCRegisterInfo &MRI,
6949 return hasSGPR104_SGPR105();
6952 case SRC_SHARED_BASE_LO:
6953 case SRC_SHARED_BASE:
6954 case SRC_SHARED_LIMIT_LO:
6955 case SRC_SHARED_LIMIT:
6956 case SRC_PRIVATE_BASE_LO:
6957 case SRC_PRIVATE_BASE:
6958 case SRC_PRIVATE_LIMIT_LO:
6959 case SRC_PRIVATE_LIMIT:
6961 case SRC_FLAT_SCRATCH_BASE_LO:
6962 case SRC_FLAT_SCRATCH_BASE_HI:
6963 return hasGloballyAddressableScratch();
6964 case SRC_POPS_EXITING_WAVE_ID:
6976 return (
isVI() ||
isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
7006 return hasSGPR102_SGPR103();
7011ParseStatus AMDGPUAsmParser::parseOperand(
OperandVector &Operands,
7014 ParseStatus Res = parseVOPD(Operands);
7019 Res = MatchOperandParserImpl(Operands, Mnemonic);
7031 SMLoc LBraceLoc = getLoc();
7036 auto Loc = getLoc();
7037 Res = parseReg(Operands);
7039 Error(Loc,
"expected a register");
7043 RBraceLoc = getLoc();
7048 "expected a comma or a closing square bracket"))
7052 if (Operands.
size() - Prefix > 1) {
7054 AMDGPUOperand::CreateToken(
this,
"[", LBraceLoc));
7055 Operands.
push_back(AMDGPUOperand::CreateToken(
this,
"]", RBraceLoc));
7061 return parseRegOrImm(Operands);
7064StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
7066 setForcedEncodingSize(0);
7067 setForcedDPP(
false);
7068 setForcedSDWA(
false);
7070 if (
Name.consume_back(
"_e64_dpp")) {
7072 setForcedEncodingSize(64);
7075 if (
Name.consume_back(
"_e64")) {
7076 setForcedEncodingSize(64);
7079 if (
Name.consume_back(
"_e32")) {
7080 setForcedEncodingSize(32);
7083 if (
Name.consume_back(
"_dpp")) {
7087 if (
Name.consume_back(
"_sdwa")) {
7088 setForcedSDWA(
true);
7096 unsigned VariantID);
7102 Name = parseMnemonicSuffix(Name);
7108 Operands.
push_back(AMDGPUOperand::CreateToken(
this, Name, NameLoc));
7110 bool IsMIMG = Name.starts_with(
"image_");
7113 OperandMode
Mode = OperandMode_Default;
7115 Mode = OperandMode_NSA;
7119 checkUnsupportedInstruction(Name, NameLoc);
7120 if (!Parser.hasPendingError()) {
7123 :
"not a valid operand.";
7124 Error(getLoc(), Msg);
7143ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
7146 if (!trySkipId(Name))
7149 Operands.
push_back(AMDGPUOperand::CreateToken(
this, Name, S));
7153ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
const char *Prefix,
7162ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
7163 const char *Prefix,
OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
7164 std::function<
bool(int64_t &)> ConvertResult) {
7168 ParseStatus Res = parseIntWithPrefix(Prefix,
Value);
7172 if (ConvertResult && !ConvertResult(
Value)) {
7173 Error(S,
"invalid " + StringRef(Prefix) +
" value.");
7176 Operands.
push_back(AMDGPUOperand::CreateImm(
this,
Value, S, ImmTy));
7180ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
7181 const char *Prefix,
OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
7182 bool (*ConvertResult)(int64_t &)) {
7191 const unsigned MaxSize = 4;
7195 for (
int I = 0; ; ++
I) {
7197 SMLoc Loc = getLoc();
7201 if (
Op != 0 &&
Op != 1)
7202 return Error(Loc,
"invalid " + StringRef(Prefix) +
" value.");
7209 if (
I + 1 == MaxSize)
7210 return Error(getLoc(),
"expected a closing square bracket");
7216 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Val, S, ImmTy));
7220ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
7222 AMDGPUOperand::ImmTy ImmTy,
7223 bool IgnoreNegative) {
7227 if (trySkipId(Name)) {
7229 }
else if (trySkipId(
"no", Name)) {
7238 return Error(S,
"r128 modifier is not supported on this GPU");
7239 if (Name ==
"a16" && !
hasA16())
7240 return Error(S,
"a16 modifier is not supported on this GPU");
7242 if (Bit == 0 && Name ==
"gds") {
7243 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).
getToken();
7245 return Error(S,
"nogds is not allowed");
7248 if (
isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
7249 ImmTy = AMDGPUOperand::ImmTyR128A16;
7251 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Bit, S, ImmTy));
7255unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
7256 bool &Disabling)
const {
7257 Disabling =
Id.consume_front(
"no");
7260 return StringSwitch<unsigned>(Id)
7267 return StringSwitch<unsigned>(Id)
7275ParseStatus AMDGPUAsmParser::parseCPol(
OperandVector &Operands) {
7277 SMLoc StringLoc = getLoc();
7279 int64_t CPolVal = 0;
7288 ResTH = parseTH(Operands, TH);
7299 ResScope = parseScope(Operands, Scope);
7312 if (trySkipId(
"nv")) {
7316 }
else if (trySkipId(
"no",
"nv")) {
7323 if (trySkipId(
"scale_offset")) {
7327 }
else if (trySkipId(
"no",
"scale_offset")) {
7340 Operands.
push_back(AMDGPUOperand::CreateImm(
this, CPolVal, StringLoc,
7341 AMDGPUOperand::ImmTyCPol));
7345 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).
getToken();
7346 SMLoc OpLoc = getLoc();
7347 unsigned Enabled = 0, Seen = 0;
7351 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
7358 return Error(S,
"dlc modifier is not supported on this GPU");
7361 return Error(S,
"scc modifier is not supported on this GPU");
7364 return Error(S,
"duplicate cache policy modifier");
7376 AMDGPUOperand::CreateImm(
this,
Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
7380ParseStatus AMDGPUAsmParser::parseScope(
OperandVector &Operands,
7385 ParseStatus Res = parseStringOrIntWithPrefix(
7386 Operands,
"scope", {
"SCOPE_CU",
"SCOPE_SE",
"SCOPE_DEV",
"SCOPE_SYS"},
7395ParseStatus AMDGPUAsmParser::parseTH(
OperandVector &Operands, int64_t &TH) {
7400 ParseStatus Res = parseStringWithPrefix(
"th",
Value, StringLoc);
7404 if (
Value ==
"TH_DEFAULT")
7406 else if (
Value ==
"TH_STORE_LU" ||
Value ==
"TH_LOAD_WB" ||
7407 Value ==
"TH_LOAD_NT_WB") {
7408 return Error(StringLoc,
"invalid th value");
7409 }
else if (
Value.consume_front(
"TH_ATOMIC_")) {
7411 }
else if (
Value.consume_front(
"TH_LOAD_")) {
7413 }
else if (
Value.consume_front(
"TH_STORE_")) {
7416 return Error(StringLoc,
"invalid th value");
7419 if (
Value ==
"BYPASS")
7424 TH |= StringSwitch<int64_t>(
Value)
7434 .Default(0xffffffff);
7436 TH |= StringSwitch<int64_t>(
Value)
7447 .Default(0xffffffff);
7450 if (TH == 0xffffffff)
7451 return Error(StringLoc,
"invalid th value");
7458 AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx,
7459 AMDGPUOperand::ImmTy ImmT, int64_t
Default = 0,
7460 std::optional<unsigned> InsertAt = std::nullopt) {
7461 auto i = OptionalIdx.find(ImmT);
7462 if (i != OptionalIdx.end()) {
7463 unsigned Idx = i->second;
7464 const AMDGPUOperand &
Op =
7465 static_cast<const AMDGPUOperand &
>(*Operands[Idx]);
7469 Op.addImmOperands(Inst, 1);
7471 if (InsertAt.has_value())
7478ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
7484 StringLoc = getLoc();
7489ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7490 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7495 SMLoc StringLoc = getLoc();
7499 Value = getTokenStr();
7503 if (
Value == Ids[IntVal])
7508 if (IntVal < 0 || IntVal >= (int64_t)Ids.
size())
7509 return Error(StringLoc,
"invalid " + Twine(Name) +
" value");
7514ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7515 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7516 AMDGPUOperand::ImmTy
Type) {
7520 ParseStatus Res = parseStringOrIntWithPrefix(Operands, Name, Ids, IntVal);
7522 Operands.
push_back(AMDGPUOperand::CreateImm(
this, IntVal, S,
Type));
7531bool AMDGPUAsmParser::tryParseFmt(
const char *Pref,
7535 SMLoc Loc = getLoc();
7537 auto Res = parseIntWithPrefix(Pref, Val);
7543 if (Val < 0 || Val > MaxVal) {
7544 Error(Loc, Twine(
"out of range ", StringRef(Pref)));
7552ParseStatus AMDGPUAsmParser::tryParseIndexKey(
OperandVector &Operands,
7553 AMDGPUOperand::ImmTy ImmTy) {
7554 const char *Pref =
"index_key";
7556 SMLoc Loc = getLoc();
7557 auto Res = parseIntWithPrefix(Pref, ImmVal);
7561 if ((ImmTy == AMDGPUOperand::ImmTyIndexKey16bit ||
7562 ImmTy == AMDGPUOperand::ImmTyIndexKey32bit) &&
7563 (ImmVal < 0 || ImmVal > 1))
7564 return Error(Loc, Twine(
"out of range ", StringRef(Pref)));
7566 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
7567 return Error(Loc, Twine(
"out of range ", StringRef(Pref)));
7569 Operands.
push_back(AMDGPUOperand::CreateImm(
this, ImmVal, Loc, ImmTy));
7573ParseStatus AMDGPUAsmParser::parseIndexKey8bit(
OperandVector &Operands) {
7574 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
7577ParseStatus AMDGPUAsmParser::parseIndexKey16bit(
OperandVector &Operands) {
7578 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
7581ParseStatus AMDGPUAsmParser::parseIndexKey32bit(
OperandVector &Operands) {
7582 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey32bit);
7585ParseStatus AMDGPUAsmParser::tryParseMatrixFMT(
OperandVector &Operands,
7587 AMDGPUOperand::ImmTy
Type) {
7592ParseStatus AMDGPUAsmParser::parseMatrixAFMT(
OperandVector &Operands) {
7593 return tryParseMatrixFMT(Operands,
"matrix_a_fmt",
7594 AMDGPUOperand::ImmTyMatrixAFMT);
7597ParseStatus AMDGPUAsmParser::parseMatrixBFMT(
OperandVector &Operands) {
7598 return tryParseMatrixFMT(Operands,
"matrix_b_fmt",
7599 AMDGPUOperand::ImmTyMatrixBFMT);
7602ParseStatus AMDGPUAsmParser::tryParseMatrixScale(
OperandVector &Operands,
7604 AMDGPUOperand::ImmTy
Type) {
7609ParseStatus AMDGPUAsmParser::parseMatrixAScale(
OperandVector &Operands) {
7610 return tryParseMatrixScale(Operands,
"matrix_a_scale",
7611 AMDGPUOperand::ImmTyMatrixAScale);
7614ParseStatus AMDGPUAsmParser::parseMatrixBScale(
OperandVector &Operands) {
7615 return tryParseMatrixScale(Operands,
"matrix_b_scale",
7616 AMDGPUOperand::ImmTyMatrixBScale);
7619ParseStatus AMDGPUAsmParser::tryParseMatrixScaleFmt(
OperandVector &Operands,
7621 AMDGPUOperand::ImmTy
Type) {
7626ParseStatus AMDGPUAsmParser::parseMatrixAScaleFmt(
OperandVector &Operands) {
7627 return tryParseMatrixScaleFmt(Operands,
"matrix_a_scale_fmt",
7628 AMDGPUOperand::ImmTyMatrixAScaleFmt);
7631ParseStatus AMDGPUAsmParser::parseMatrixBScaleFmt(
OperandVector &Operands) {
7632 return tryParseMatrixScaleFmt(Operands,
"matrix_b_scale_fmt",
7633 AMDGPUOperand::ImmTyMatrixBScaleFmt);
7638ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &
Format) {
7639 using namespace llvm::AMDGPU::MTBUFFormat;
7645 for (
int I = 0;
I < 2; ++
I) {
7646 if (Dfmt == DFMT_UNDEF && !tryParseFmt(
"dfmt", DFMT_MAX, Dfmt))
7649 if (Nfmt == NFMT_UNDEF && !tryParseFmt(
"nfmt", NFMT_MAX, Nfmt))
7654 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
7660 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
7663 Dfmt = (Dfmt ==
DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7664 Nfmt = (Nfmt ==
NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7670ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &
Format) {
7671 using namespace llvm::AMDGPU::MTBUFFormat;
7675 if (!tryParseFmt(
"format", UFMT_MAX, Fmt))
7678 if (Fmt == UFMT_UNDEF)
7685bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
7687 StringRef FormatStr,
7689 using namespace llvm::AMDGPU::MTBUFFormat;
7693 if (
Format != DFMT_UNDEF) {
7699 if (
Format != NFMT_UNDEF) {
7704 Error(Loc,
"unsupported format");
7708ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
7711 using namespace llvm::AMDGPU::MTBUFFormat;
7715 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
7720 SMLoc Loc = getLoc();
7721 if (!parseId(Str,
"expected a format string") ||
7722 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
7724 if (Dfmt == DFMT_UNDEF)
7725 return Error(Loc,
"duplicate numeric format");
7726 if (Nfmt == NFMT_UNDEF)
7727 return Error(Loc,
"duplicate data format");
7730 Dfmt = (Dfmt ==
DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7731 Nfmt = (Nfmt ==
NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7735 if (Ufmt == UFMT_UNDEF)
7736 return Error(FormatLoc,
"unsupported format");
7745ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
7748 using namespace llvm::AMDGPU::MTBUFFormat;
7751 if (Id == UFMT_UNDEF)
7755 return Error(Loc,
"unified format is not supported on this GPU");
7761ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &
Format) {
7762 using namespace llvm::AMDGPU::MTBUFFormat;
7763 SMLoc Loc = getLoc();
7768 return Error(Loc,
"out of range format");
7773ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &
Format) {
7774 using namespace llvm::AMDGPU::MTBUFFormat;
7780 StringRef FormatStr;
7781 SMLoc Loc = getLoc();
7782 if (!parseId(FormatStr,
"expected a format string"))
7785 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc,
Format);
7787 Res = parseSymbolicSplitFormat(FormatStr, Loc,
Format);
7797 return parseNumericFormat(
Format);
7800ParseStatus AMDGPUAsmParser::parseFORMAT(
OperandVector &Operands) {
7801 using namespace llvm::AMDGPU::MTBUFFormat;
7805 SMLoc Loc = getLoc();
7815 AMDGPUOperand::CreateImm(
this,
Format, Loc, AMDGPUOperand::ImmTyFORMAT));
7827 Res = parseRegOrImm(Operands);
7834 Res = parseSymbolicOrNumericFormat(
Format);
7839 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*Operands[
Size - 2]);
7840 assert(
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
7847 return Error(getLoc(),
"duplicate format");
7851ParseStatus AMDGPUAsmParser::parseFlatOffset(
OperandVector &Operands) {
7853 parseIntWithPrefix(
"offset", Operands, AMDGPUOperand::ImmTyOffset);
7855 Res = parseIntWithPrefix(
"inst_offset", Operands,
7856 AMDGPUOperand::ImmTyInstOffset);
7861ParseStatus AMDGPUAsmParser::parseR128A16(
OperandVector &Operands) {
7863 parseNamedBit(
"r128", Operands, AMDGPUOperand::ImmTyR128A16);
7865 Res = parseNamedBit(
"a16", Operands, AMDGPUOperand::ImmTyA16);
7869ParseStatus AMDGPUAsmParser::parseBLGP(
OperandVector &Operands) {
7871 parseIntWithPrefix(
"blgp", Operands, AMDGPUOperand::ImmTyBLGP);
7874 parseOperandArrayWithPrefix(
"neg", Operands, AMDGPUOperand::ImmTyBLGP);
7883void AMDGPUAsmParser::cvtExp(MCInst &Inst,
const OperandVector &Operands) {
7884 OptionalImmIndexMap OptionalIdx;
7886 unsigned OperandIdx[4];
7887 unsigned EnMask = 0;
7890 for (
unsigned i = 1, e = Operands.
size(); i != e; ++i) {
7891 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
7896 OperandIdx[SrcIdx] = Inst.
size();
7897 Op.addRegOperands(Inst, 1);
7904 OperandIdx[SrcIdx] = Inst.
size();
7910 if (
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7911 Op.addImmOperands(Inst, 1);
7915 if (
Op.isToken() && (
Op.getToken() ==
"done" ||
Op.getToken() ==
"row_en"))
7919 OptionalIdx[
Op.getImmTy()] = i;
7925 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7932 for (
auto i = 0; i < SrcIdx; ++i) {
7934 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7959 IntVal =
encode(ISA, IntVal, CntVal);
7960 if (CntVal !=
decode(ISA, IntVal)) {
7962 IntVal =
encode(ISA, IntVal, -1);
7970bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7972 SMLoc CntLoc = getLoc();
7973 StringRef CntName = getTokenStr();
7980 SMLoc ValLoc = getLoc();
7989 if (CntName ==
"vmcnt" || CntName ==
"vmcnt_sat") {
7991 }
else if (CntName ==
"expcnt" || CntName ==
"expcnt_sat") {
7993 }
else if (CntName ==
"lgkmcnt" || CntName ==
"lgkmcnt_sat") {
7996 Error(CntLoc,
"invalid counter name " + CntName);
8001 Error(ValLoc,
"too large value for " + CntName);
8010 Error(getLoc(),
"expected a counter name");
8018ParseStatus AMDGPUAsmParser::parseSWaitCnt(
OperandVector &Operands) {
8025 if (!parseCnt(Waitcnt))
8033 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Waitcnt, S));
8037bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
8038 SMLoc FieldLoc = getLoc();
8039 StringRef FieldName = getTokenStr();
8044 SMLoc ValueLoc = getLoc();
8051 if (FieldName ==
"instid0") {
8053 }
else if (FieldName ==
"instskip") {
8055 }
else if (FieldName ==
"instid1") {
8058 Error(FieldLoc,
"invalid field name " + FieldName);
8077 .Case(
"VALU_DEP_1", 1)
8078 .Case(
"VALU_DEP_2", 2)
8079 .Case(
"VALU_DEP_3", 3)
8080 .Case(
"VALU_DEP_4", 4)
8081 .Case(
"TRANS32_DEP_1", 5)
8082 .Case(
"TRANS32_DEP_2", 6)
8083 .Case(
"TRANS32_DEP_3", 7)
8084 .Case(
"FMA_ACCUM_CYCLE_1", 8)
8085 .Case(
"SALU_CYCLE_1", 9)
8086 .Case(
"SALU_CYCLE_2", 10)
8087 .Case(
"SALU_CYCLE_3", 11)
8095 Delay |=
Value << Shift;
8099ParseStatus AMDGPUAsmParser::parseSDelayALU(
OperandVector &Operands) {
8105 if (!parseDelay(Delay))
8113 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Delay, S));
8118AMDGPUOperand::isSWaitCnt()
const {
8122bool AMDGPUOperand::isSDelayALU()
const {
return isImm(); }
8128void AMDGPUAsmParser::depCtrError(SMLoc Loc,
int ErrorId,
8129 StringRef DepCtrName) {
8132 Error(Loc, Twine(
"invalid counter name ", DepCtrName));
8135 Error(Loc, Twine(DepCtrName,
" is not supported on this GPU"));
8138 Error(Loc, Twine(
"duplicate counter name ", DepCtrName));
8141 Error(Loc, Twine(
"invalid value for ", DepCtrName));
8148bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr,
unsigned &UsedOprMask) {
8150 using namespace llvm::AMDGPU::DepCtr;
8152 SMLoc DepCtrLoc = getLoc();
8153 StringRef DepCtrName = getTokenStr();
8163 unsigned PrevOprMask = UsedOprMask;
8164 int CntVal =
encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
8167 depCtrError(DepCtrLoc, CntVal, DepCtrName);
8176 Error(getLoc(),
"expected a counter name");
8181 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
8182 DepCtr = (DepCtr & ~CntValMask) | CntVal;
8186ParseStatus AMDGPUAsmParser::parseDepCtr(
OperandVector &Operands) {
8187 using namespace llvm::AMDGPU::DepCtr;
8190 SMLoc Loc = getLoc();
8193 unsigned UsedOprMask = 0;
8195 if (!parseDepCtr(DepCtr, UsedOprMask))
8203 Operands.
push_back(AMDGPUOperand::CreateImm(
this, DepCtr, Loc));
8207bool AMDGPUOperand::isDepCtr()
const {
return isS16Imm(); }
8213ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
8215 OperandInfoTy &Width) {
8216 using namespace llvm::AMDGPU::Hwreg;
8222 HwReg.Loc = getLoc();
8225 HwReg.IsSymbolic =
true;
8227 }
else if (!
parseExpr(HwReg.Val,
"a register name")) {
8235 if (!skipToken(
AsmToken::Comma,
"expected a comma or a closing parenthesis"))
8245 Width.Loc = getLoc();
8253ParseStatus AMDGPUAsmParser::parseHwreg(
OperandVector &Operands) {
8254 using namespace llvm::AMDGPU::Hwreg;
8257 SMLoc Loc = getLoc();
8259 StructuredOpField HwReg(
"id",
"hardware register", HwregId::Width,
8261 StructuredOpField
Offset(
"offset",
"bit offset", HwregOffset::Width,
8262 HwregOffset::Default);
8263 struct : StructuredOpField {
8264 using StructuredOpField::StructuredOpField;
8265 bool validate(AMDGPUAsmParser &Parser)
const override {
8267 return Error(Parser,
"only values from 1 to 32 are legal");
8270 } Width(
"size",
"bitfield width", HwregSize::Width, HwregSize::Default);
8271 ParseStatus Res = parseStructuredOpFields({&HwReg, &
Offset, &Width});
8274 Res = parseHwregFunc(HwReg,
Offset, Width);
8277 if (!validateStructuredOpFields({&HwReg, &
Offset, &Width}))
8279 ImmVal = HwregEncoding::encode(HwReg.Val,
Offset.Val, Width.Val);
8283 parseExpr(ImmVal,
"a hwreg macro, structured immediate"))
8290 return Error(Loc,
"invalid immediate: only 16-bit values are legal");
8292 AMDGPUOperand::CreateImm(
this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
8296bool AMDGPUOperand::isHwreg()
const {
8297 return isImmTy(ImmTyHwreg);
8305AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
8307 OperandInfoTy &Stream) {
8308 using namespace llvm::AMDGPU::SendMsg;
8313 Msg.IsSymbolic =
true;
8315 }
else if (!
parseExpr(Msg.Val,
"a message name")) {
8320 Op.IsDefined =
true;
8323 (
Op.Val =
getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
8326 }
else if (!
parseExpr(
Op.Val,
"an operation name")) {
8331 Stream.IsDefined =
true;
8332 Stream.Loc = getLoc();
8342AMDGPUAsmParser::validateSendMsg(
const OperandInfoTy &Msg,
8343 const OperandInfoTy &
Op,
8344 const OperandInfoTy &Stream) {
8345 using namespace llvm::AMDGPU::SendMsg;
8350 bool Strict = Msg.IsSymbolic;
8354 Error(Msg.Loc,
"specified message id is not supported on this GPU");
8359 Error(Msg.Loc,
"invalid message id");
8365 Error(
Op.Loc,
"message does not support operations");
8367 Error(Msg.Loc,
"missing message operation");
8373 Error(
Op.Loc,
"specified operation id is not supported on this GPU");
8375 Error(
Op.Loc,
"invalid operation id");
8380 Error(Stream.Loc,
"message operation does not support streams");
8384 Error(Stream.Loc,
"invalid message stream id");
8390ParseStatus AMDGPUAsmParser::parseSendMsg(
OperandVector &Operands) {
8391 using namespace llvm::AMDGPU::SendMsg;
8394 SMLoc Loc = getLoc();
8398 OperandInfoTy
Op(OP_NONE_);
8399 OperandInfoTy Stream(STREAM_ID_NONE_);
8400 if (parseSendMsgBody(Msg,
Op, Stream) &&
8401 validateSendMsg(Msg,
Op, Stream)) {
8406 }
else if (
parseExpr(ImmVal,
"a sendmsg macro")) {
8408 return Error(Loc,
"invalid immediate: only 16-bit values are legal");
8413 Operands.
push_back(AMDGPUOperand::CreateImm(
this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
8417bool AMDGPUOperand::isSendMsg()
const {
8418 return isImmTy(ImmTySendMsg);
8421ParseStatus AMDGPUAsmParser::parseWaitEvent(
OperandVector &Operands) {
8422 using namespace llvm::AMDGPU::WaitEvent;
8424 SMLoc Loc = getLoc();
8427 StructuredOpField DontWaitExportReady(
"dont_wait_export_ready",
"bit value",
8429 StructuredOpField ExportReady(
"export_ready",
"bit value", 1, 0);
8431 StructuredOpField *TargetBitfield =
8432 isGFX11() ? &DontWaitExportReady : &ExportReady;
8434 ParseStatus Res = parseStructuredOpFields({TargetBitfield});
8438 if (!validateStructuredOpFields({TargetBitfield}))
8440 ImmVal = TargetBitfield->Val;
8447 return Error(Loc,
"invalid immediate: only 16-bit values are legal");
8449 Operands.
push_back(AMDGPUOperand::CreateImm(
this, ImmVal, Loc,
8450 AMDGPUOperand::ImmTyWaitEvent));
8454bool AMDGPUOperand::isWaitEvent()
const {
return isImmTy(ImmTyWaitEvent); }
8460ParseStatus AMDGPUAsmParser::parseInterpSlot(
OperandVector &Operands) {
8467 int Slot = StringSwitch<int>(Str)
8474 return Error(S,
"invalid interpolation slot");
8476 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Slot, S,
8477 AMDGPUOperand::ImmTyInterpSlot));
8481ParseStatus AMDGPUAsmParser::parseInterpAttr(
OperandVector &Operands) {
8488 if (!Str.starts_with(
"attr"))
8489 return Error(S,
"invalid interpolation attribute");
8491 StringRef Chan = Str.take_back(2);
8492 int AttrChan = StringSwitch<int>(Chan)
8499 return Error(S,
"invalid or missing interpolation attribute channel");
8501 Str = Str.drop_back(2).drop_front(4);
8504 if (Str.getAsInteger(10, Attr))
8505 return Error(S,
"invalid or missing interpolation attribute number");
8508 return Error(S,
"out of bounds interpolation attribute number");
8512 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Attr, S,
8513 AMDGPUOperand::ImmTyInterpAttr));
8514 Operands.
push_back(AMDGPUOperand::CreateImm(
8515 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
8523ParseStatus AMDGPUAsmParser::parseExpTgt(
OperandVector &Operands) {
8524 using namespace llvm::AMDGPU::Exp;
8534 return Error(S, (Id == ET_INVALID)
8535 ?
"invalid exp target"
8536 :
"exp target is not supported on this GPU");
8538 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Id, S,
8539 AMDGPUOperand::ImmTyExpTgt));
8548AMDGPUAsmParser::isId(
const AsmToken &Token,
const StringRef Id)
const {
8553AMDGPUAsmParser::isId(
const StringRef Id)
const {
8559 return getTokenKind() ==
Kind;
8562StringRef AMDGPUAsmParser::getId()
const {
8567AMDGPUAsmParser::trySkipId(
const StringRef Id) {
8576AMDGPUAsmParser::trySkipId(
const StringRef Pref,
const StringRef Id) {
8578 StringRef Tok = getTokenStr();
8589 if (isId(Id) && peekToken().is(Kind)) {
8599 if (isToken(Kind)) {
8608 const StringRef ErrMsg) {
8609 if (!trySkipToken(Kind)) {
8610 Error(getLoc(), ErrMsg);
8617AMDGPUAsmParser::parseExpr(int64_t &
Imm, StringRef Expected) {
8621 if (Parser.parseExpression(Expr))
8624 if (Expr->evaluateAsAbsolute(
Imm))
8627 if (Expected.empty()) {
8628 Error(S,
"expected absolute expression");
8630 Error(S, Twine(
"expected ", Expected) +
8631 Twine(
" or an absolute expression"));
8641 if (Parser.parseExpression(Expr))
8645 if (Expr->evaluateAsAbsolute(IntVal)) {
8646 Operands.
push_back(AMDGPUOperand::CreateImm(
this, IntVal, S));
8648 Operands.
push_back(AMDGPUOperand::CreateExpr(
this, Expr, S));
8654AMDGPUAsmParser::parseString(StringRef &Val,
const StringRef ErrMsg) {
8656 Val =
getToken().getStringContents();
8660 Error(getLoc(), ErrMsg);
8665AMDGPUAsmParser::parseId(StringRef &Val,
const StringRef ErrMsg) {
8667 Val = getTokenStr();
8671 if (!ErrMsg.
empty())
8672 Error(getLoc(), ErrMsg);
8677AMDGPUAsmParser::getToken()
const {
8678 return Parser.getTok();
8681AsmToken AMDGPUAsmParser::peekToken(
bool ShouldSkipSpace) {
8684 : getLexer().peekTok(ShouldSkipSpace);
8689 auto TokCount = getLexer().peekTokens(Tokens);
8691 for (
auto Idx = TokCount; Idx < Tokens.
size(); ++Idx)
8696AMDGPUAsmParser::getTokenKind()
const {
8697 return getLexer().getKind();
8701AMDGPUAsmParser::getLoc()
const {
8706AMDGPUAsmParser::getTokenStr()
const {
8711AMDGPUAsmParser::lex() {
8715const AMDGPUOperand &
8716AMDGPUAsmParser::findMCOperand(
const OperandVector &Operands,
8717 int MCOpIdx)
const {
8718 for (
const auto &
Op : Operands) {
8719 const AMDGPUOperand &TargetOp =
static_cast<AMDGPUOperand &
>(*Op);
8720 if (TargetOp.getMCOpIdx() == MCOpIdx)
8726SMLoc AMDGPUAsmParser::getInstLoc(
const OperandVector &Operands)
const {
8727 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
8731SMLoc AMDGPUAsmParser::getLaterLoc(SMLoc a, SMLoc b) {
8735SMLoc AMDGPUAsmParser::getOperandLoc(
const OperandVector &Operands,
8736 int MCOpIdx)
const {
8737 return findMCOperand(Operands, MCOpIdx).getStartLoc();
8741AMDGPUAsmParser::getOperandLoc(std::function<
bool(
const AMDGPUOperand&)>
Test,
8743 for (
unsigned i = Operands.
size() - 1; i > 0; --i) {
8744 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
8746 return Op.getStartLoc();
8748 return getInstLoc(Operands);
8752AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy
Type,
8754 auto Test = [=](
const AMDGPUOperand&
Op) {
return Op.isImmTy(
Type); };
8755 return getOperandLoc(
Test, Operands);
8769 StringRef
Id = getTokenStr();
8770 SMLoc IdLoc = getLoc();
8776 find_if(Fields, [Id](StructuredOpField *
F) {
return F->Id ==
Id; });
8777 if (
I == Fields.
end())
8778 return Error(IdLoc,
"unknown field");
8779 if ((*I)->IsDefined)
8780 return Error(IdLoc,
"duplicate field");
8783 (*I)->Loc = getLoc();
8786 (*I)->IsDefined =
true;
8793bool AMDGPUAsmParser::validateStructuredOpFields(
8795 return all_of(Fields, [
this](
const StructuredOpField *
F) {
8796 return F->validate(*
this);
8807 const unsigned OrMask,
8808 const unsigned XorMask) {
8817bool AMDGPUAsmParser::parseSwizzleOperand(int64_t &
Op,
const unsigned MinVal,
8818 const unsigned MaxVal,
8819 const Twine &ErrMsg, SMLoc &Loc) {
8836AMDGPUAsmParser::parseSwizzleOperands(
const unsigned OpNum, int64_t*
Op,
8837 const unsigned MinVal,
8838 const unsigned MaxVal,
8839 const StringRef ErrMsg) {
8841 for (
unsigned i = 0; i < OpNum; ++i) {
8842 if (!parseSwizzleOperand(
Op[i], MinVal, MaxVal, ErrMsg, Loc))
8850AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &
Imm) {
8851 using namespace llvm::AMDGPU::Swizzle;
8854 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
8855 "expected a 2-bit lane id")) {
8866AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &
Imm) {
8867 using namespace llvm::AMDGPU::Swizzle;
8873 if (!parseSwizzleOperand(GroupSize,
8875 "group size must be in the interval [2,32]",
8880 Error(Loc,
"group size must be a power of two");
8883 if (parseSwizzleOperand(LaneIdx,
8885 "lane id must be in the interval [0,group size - 1]",
8894AMDGPUAsmParser::parseSwizzleReverse(int64_t &
Imm) {
8895 using namespace llvm::AMDGPU::Swizzle;
8900 if (!parseSwizzleOperand(GroupSize,
8902 "group size must be in the interval [2,32]",
8907 Error(Loc,
"group size must be a power of two");
8916AMDGPUAsmParser::parseSwizzleSwap(int64_t &
Imm) {
8917 using namespace llvm::AMDGPU::Swizzle;
8922 if (!parseSwizzleOperand(GroupSize,
8924 "group size must be in the interval [1,16]",
8929 Error(Loc,
"group size must be a power of two");
8938AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &
Imm) {
8939 using namespace llvm::AMDGPU::Swizzle;
8946 SMLoc StrLoc = getLoc();
8947 if (!parseString(Ctl)) {
8950 if (Ctl.
size() != BITMASK_WIDTH) {
8951 Error(StrLoc,
"expected a 5-character mask");
8955 unsigned AndMask = 0;
8956 unsigned OrMask = 0;
8957 unsigned XorMask = 0;
8959 for (
size_t i = 0; i < Ctl.
size(); ++i) {
8963 Error(StrLoc,
"invalid mask");
8984bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &
Imm) {
8985 using namespace llvm::AMDGPU::Swizzle;
8988 Error(getLoc(),
"FFT mode swizzle not supported on this GPU");
8994 if (!parseSwizzleOperand(Swizzle, 0, FFT_SWIZZLE_MAX,
8995 "FFT swizzle must be in the interval [0," +
8996 Twine(FFT_SWIZZLE_MAX) + Twine(
']'),
9004bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &
Imm) {
9005 using namespace llvm::AMDGPU::Swizzle;
9008 Error(getLoc(),
"Rotate mode swizzle not supported on this GPU");
9015 if (!parseSwizzleOperand(
Direction, 0, 1,
9016 "direction must be 0 (left) or 1 (right)", Loc))
9020 if (!parseSwizzleOperand(
9021 RotateSize, 0, ROTATE_MAX_SIZE,
9022 "number of threads to rotate must be in the interval [0," +
9023 Twine(ROTATE_MAX_SIZE) + Twine(
']'),
9028 (RotateSize << ROTATE_SIZE_SHIFT);
9033AMDGPUAsmParser::parseSwizzleOffset(int64_t &
Imm) {
9035 SMLoc OffsetLoc = getLoc();
9041 Error(OffsetLoc,
"expected a 16-bit offset");
9048AMDGPUAsmParser::parseSwizzleMacro(int64_t &
Imm) {
9049 using namespace llvm::AMDGPU::Swizzle;
9053 SMLoc ModeLoc = getLoc();
9056 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
9057 Ok = parseSwizzleQuadPerm(
Imm);
9058 }
else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
9059 Ok = parseSwizzleBitmaskPerm(
Imm);
9060 }
else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
9061 Ok = parseSwizzleBroadcast(
Imm);
9062 }
else if (trySkipId(IdSymbolic[ID_SWAP])) {
9063 Ok = parseSwizzleSwap(
Imm);
9064 }
else if (trySkipId(IdSymbolic[ID_REVERSE])) {
9065 Ok = parseSwizzleReverse(
Imm);
9066 }
else if (trySkipId(IdSymbolic[ID_FFT])) {
9067 Ok = parseSwizzleFFT(
Imm);
9068 }
else if (trySkipId(IdSymbolic[ID_ROTATE])) {
9069 Ok = parseSwizzleRotate(
Imm);
9071 Error(ModeLoc,
"expected a swizzle mode");
9074 return Ok && skipToken(
AsmToken::RParen,
"expected a closing parentheses");
9080ParseStatus AMDGPUAsmParser::parseSwizzle(
OperandVector &Operands) {
9084 if (trySkipId(
"offset")) {
9088 if (trySkipId(
"swizzle")) {
9089 Ok = parseSwizzleMacro(
Imm);
9091 Ok = parseSwizzleOffset(
Imm);
9095 Operands.
push_back(AMDGPUOperand::CreateImm(
this,
Imm, S, AMDGPUOperand::ImmTySwizzle));
9103AMDGPUOperand::isSwizzle()
const {
9104 return isImmTy(ImmTySwizzle);
9111int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
9113 using namespace llvm::AMDGPU::VGPRIndexMode;
9125 for (
unsigned ModeId = ID_MIN; ModeId <=
ID_MAX; ++ModeId) {
9126 if (trySkipId(IdSymbolic[ModeId])) {
9134 "expected a VGPR index mode or a closing parenthesis" :
9135 "expected a VGPR index mode");
9140 Error(S,
"duplicate VGPR index mode");
9148 "expected a comma or a closing parenthesis"))
9155ParseStatus AMDGPUAsmParser::parseGPRIdxMode(
OperandVector &Operands) {
9157 using namespace llvm::AMDGPU::VGPRIndexMode;
9163 Imm = parseGPRIdxMacro();
9167 if (getParser().parseAbsoluteExpression(
Imm))
9170 return Error(S,
"invalid immediate: only 4-bit values are legal");
9174 AMDGPUOperand::CreateImm(
this,
Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
9178bool AMDGPUOperand::isGPRIdxMode()
const {
9179 return isImmTy(ImmTyGprIdxMode);
9186ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(
OperandVector &Operands) {
9191 if (isRegister() || isModifier())
9197 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.
size() - 1]);
9198 assert(Opr.isImm() || Opr.isExpr());
9199 SMLoc Loc = Opr.getStartLoc();
9203 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
9204 Error(Loc,
"expected an absolute expression or a label");
9205 }
else if (Opr.isImm() && !Opr.isS16Imm()) {
9206 Error(Loc,
"expected a 16-bit signed jump offset");
9216ParseStatus AMDGPUAsmParser::parseBoolReg(
OperandVector &Operands) {
9217 return parseReg(Operands);
9224void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
9227 OptionalImmIndexMap OptionalIdx;
9228 unsigned FirstOperandIdx = 1;
9229 bool IsAtomicReturn =
false;
9236 for (
unsigned i = FirstOperandIdx, e = Operands.
size(); i != e; ++i) {
9237 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
9241 Op.addRegOperands(Inst, 1);
9245 if (IsAtomicReturn && i == FirstOperandIdx)
9246 Op.addRegOperands(Inst, 1);
9251 if (
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
9252 Op.addImmOperands(Inst, 1);
9264 OptionalIdx[
Op.getImmTy()] = i;
9278bool AMDGPUOperand::isSMRDOffset8()
const {
9282bool AMDGPUOperand::isSMEMOffset()
const {
9284 return isImmLiteral();
9287bool AMDGPUOperand::isSMRDLiteralOffset()
const {
9322bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
9323 if (BoundCtrl == 0 || BoundCtrl == 1) {
9331void AMDGPUAsmParser::onBeginOfFile() {
9332 if (!getParser().getStreamer().getTargetStreamer() ||
9336 if (!getTargetStreamer().getTargetID())
9337 getTargetStreamer().initializeTargetID(getSTI(),
9338 getSTI().getFeatureString());
9341 getTargetStreamer().EmitDirectiveAMDGCNTarget();
9349bool AMDGPUAsmParser::parsePrimaryExpr(
const MCExpr *&Res, SMLoc &EndLoc) {
9353 StringRef TokenId = getTokenStr();
9354 AGVK VK = StringSwitch<AGVK>(TokenId)
9355 .Case(
"max", AGVK::AGVK_Max)
9356 .Case(
"or", AGVK::AGVK_Or)
9357 .Case(
"extrasgprs", AGVK::AGVK_ExtraSGPRs)
9358 .Case(
"totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
9359 .Case(
"alignto", AGVK::AGVK_AlignTo)
9360 .Case(
"occupancy", AGVK::AGVK_Occupancy)
9361 .Case(
"instprefsize", AGVK::AGVK_InstPrefSize)
9362 .Default(AGVK::AGVK_None);
9366 uint64_t CommaCount = 0;
9371 if (Exprs.
empty()) {
9373 "empty " + Twine(TokenId) +
" expression");
9376 if (CommaCount + 1 != Exprs.
size()) {
9378 "mismatch of commas in " + Twine(TokenId) +
" expression");
9385 if (getParser().parseExpression(Expr, EndLoc))
9389 if (LastTokenWasComma)
9393 "unexpected token in " + Twine(TokenId) +
" expression");
9399 return getParser().parsePrimaryExpr(Res, EndLoc,
nullptr);
9402ParseStatus AMDGPUAsmParser::parseOModSI(
OperandVector &Operands) {
9403 StringRef
Name = getTokenStr();
9404 if (Name ==
"mul") {
9405 return parseIntWithPrefix(
"mul", Operands,
9409 if (Name ==
"div") {
9410 return parseIntWithPrefix(
"div", Operands,
9421 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
9426 const AMDGPU::OpName
Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9427 AMDGPU::OpName::src2};
9435 int DstIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
9440 int ModIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0_modifiers);
9442 if (
DstOp.isReg() &&
9447 if ((OpSel & (1 << SrcNum)) != 0)
9453void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
9455 cvtVOP3P(Inst, Operands);
9459void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
const OperandVector &Operands,
9460 OptionalImmIndexMap &OptionalIdx) {
9461 cvtVOP3P(Inst, Operands, OptionalIdx);
9470 &&
Desc.NumOperands > (OpNum + 1)
9472 &&
Desc.operands()[OpNum + 1].RegClass != -1
9474 &&
Desc.getOperandConstraint(OpNum + 1,
9478void AMDGPUAsmParser::cvtOpSelHelper(MCInst &Inst,
unsigned OpSel) {
9480 constexpr AMDGPU::OpName
Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9481 AMDGPU::OpName::src2};
9482 constexpr AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9483 AMDGPU::OpName::src1_modifiers,
9484 AMDGPU::OpName::src2_modifiers};
9485 for (
int J = 0; J < 3; ++J) {
9486 int OpIdx = AMDGPU::getNamedOperandIdx(
Opc,
Ops[J]);
9492 int ModIdx = AMDGPU::getNamedOperandIdx(
Opc, ModOps[J]);
9495 if ((OpSel & (1 << J)) != 0)
9498 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && (OpSel & (1 << 3)) != 0)
9505void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst,
const OperandVector &Operands)
9507 OptionalImmIndexMap OptionalIdx;
9512 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9513 ((AMDGPUOperand &)*Operands[
I++]).addRegOperands(Inst, 1);
9516 for (
unsigned E = Operands.
size();
I !=
E; ++
I) {
9517 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[
I]);
9519 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9520 }
else if (
Op.isInterpSlot() ||
Op.isInterpAttr() ||
9521 Op.isInterpAttrChan()) {
9523 }
else if (
Op.isImmModifier()) {
9524 OptionalIdx[
Op.getImmTy()] =
I;
9532 AMDGPUOperand::ImmTyHigh);
9536 AMDGPUOperand::ImmTyClamp);
9540 AMDGPUOperand::ImmTyOModSI);
9545 AMDGPUOperand::ImmTyOpSel);
9546 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
9549 cvtOpSelHelper(Inst, OpSel);
9553void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst,
const OperandVector &Operands)
9555 OptionalImmIndexMap OptionalIdx;
9560 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9561 ((AMDGPUOperand &)*Operands[
I++]).addRegOperands(Inst, 1);
9564 for (
unsigned E = Operands.
size();
I !=
E; ++
I) {
9565 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[
I]);
9567 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9568 }
else if (
Op.isImmModifier()) {
9569 OptionalIdx[
Op.getImmTy()] =
I;
9577 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
9587 cvtOpSelHelper(Inst, OpSel);
9590void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,
9592 OptionalImmIndexMap OptionalIdx;
9595 int CbszOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::cbsz);
9599 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J)
9600 static_cast<AMDGPUOperand &
>(*Operands[
I++]).addRegOperands(Inst, 1);
9602 for (
unsigned E = Operands.
size();
I !=
E; ++
I) {
9603 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*Operands[
I]);
9608 if (NumOperands == CbszOpIdx) {
9613 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9614 }
else if (
Op.isImmModifier()) {
9615 OptionalIdx[
Op.getImmTy()] =
I;
9617 Op.addRegOrImmOperands(Inst, 1);
9622 auto CbszIdx = OptionalIdx.find(AMDGPUOperand::ImmTyCBSZ);
9623 if (CbszIdx != OptionalIdx.end()) {
9624 int CbszVal = ((AMDGPUOperand &)*Operands[CbszIdx->second]).
getImm();
9628 int BlgpOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::blgp);
9629 auto BlgpIdx = OptionalIdx.find(AMDGPUOperand::ImmTyBLGP);
9630 if (BlgpIdx != OptionalIdx.end()) {
9631 int BlgpVal = ((AMDGPUOperand &)*Operands[BlgpIdx->second]).
getImm();
9642 auto OpselIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSel);
9643 if (OpselIdx != OptionalIdx.end()) {
9644 OpSel =
static_cast<const AMDGPUOperand &
>(*Operands[OpselIdx->second])
9648 unsigned OpSelHi = 0;
9649 auto OpselHiIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSelHi);
9650 if (OpselHiIdx != OptionalIdx.end()) {
9651 OpSelHi =
static_cast<const AMDGPUOperand &
>(*Operands[OpselHiIdx->second])
9654 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9655 AMDGPU::OpName::src1_modifiers};
9657 for (
unsigned J = 0; J < 2; ++J) {
9658 unsigned ModVal = 0;
9659 if (OpSel & (1 << J))
9661 if (OpSelHi & (1 << J))
9664 const int ModIdx = AMDGPU::getNamedOperandIdx(
Opc, ModOps[J]);
9669void AMDGPUAsmParser::cvtVOP3(MCInst &Inst,
const OperandVector &Operands,
9670 OptionalImmIndexMap &OptionalIdx) {
9675 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9676 ((AMDGPUOperand &)*Operands[
I++]).addRegOperands(Inst, 1);
9679 for (
unsigned E = Operands.
size();
I !=
E; ++
I) {
9680 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[
I]);
9682 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9683 }
else if (
Op.isImmModifier()) {
9684 OptionalIdx[
Op.getImmTy()] =
I;
9686 Op.addRegOrImmOperands(Inst, 1);
9692 AMDGPUOperand::ImmTyScaleSel);
9696 AMDGPUOperand::ImmTyClamp);
9702 AMDGPUOperand::ImmTyByteSel);
9707 AMDGPUOperand::ImmTyOModSI);
9714 auto *it = Inst.
begin();
9715 std::advance(it, AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2_modifiers));
9723void AMDGPUAsmParser::cvtVOP3(MCInst &Inst,
const OperandVector &Operands) {
9724 OptionalImmIndexMap OptionalIdx;
9725 cvtVOP3(Inst, Operands, OptionalIdx);
9728void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
const OperandVector &Operands,
9729 OptionalImmIndexMap &OptIdx) {
9735 if (
Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi ||
9736 Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi ||
9737 Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
9738 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
9739 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx11 ||
9740 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx11 ||
9741 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
9742 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12 ||
9743 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx13 ||
9744 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx13) {
9753 int VdstInIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst_in);
9754 if (VdstInIdx != -1 && VdstInIdx ==
static_cast<int>(Inst.
getNumOperands()))
9757 int BitOp3Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::bitop3);
9758 if (BitOp3Idx != -1) {
9765 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
9766 if (OpSelIdx != -1) {
9770 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel_hi);
9771 if (OpSelHiIdx != -1) {
9772 int DefaultVal =
IsPacked ? -1 : 0;
9778 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_a_fmt);
9779 if (MatrixAFMTIdx != -1) {
9781 AMDGPUOperand::ImmTyMatrixAFMT, 0);
9785 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_b_fmt);
9786 if (MatrixBFMTIdx != -1) {
9788 AMDGPUOperand::ImmTyMatrixBFMT, 0);
9791 int MatrixAScaleIdx =
9792 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_a_scale);
9793 if (MatrixAScaleIdx != -1) {
9795 AMDGPUOperand::ImmTyMatrixAScale, 0);
9798 int MatrixBScaleIdx =
9799 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_b_scale);
9800 if (MatrixBScaleIdx != -1) {
9802 AMDGPUOperand::ImmTyMatrixBScale, 0);
9805 int MatrixAScaleFmtIdx =
9806 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_a_scale_fmt);
9807 if (MatrixAScaleFmtIdx != -1) {
9809 AMDGPUOperand::ImmTyMatrixAScaleFmt, 0);
9812 int MatrixBScaleFmtIdx =
9813 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_b_scale_fmt);
9814 if (MatrixBScaleFmtIdx != -1) {
9816 AMDGPUOperand::ImmTyMatrixBScaleFmt, 0);
9821 AMDGPUOperand::ImmTyMatrixAReuse, 0);
9825 AMDGPUOperand::ImmTyMatrixBReuse, 0);
9827 int NegLoIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::neg_lo);
9831 int NegHiIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::neg_hi);
9835 const AMDGPU::OpName
Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9836 AMDGPU::OpName::src2};
9837 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9838 AMDGPU::OpName::src1_modifiers,
9839 AMDGPU::OpName::src2_modifiers};
9842 unsigned OpSelHi = 0;
9849 if (OpSelHiIdx != -1)
9858 for (
int J = 0; J < 3; ++J) {
9859 int OpIdx = AMDGPU::getNamedOperandIdx(
Opc,
Ops[J]);
9863 int ModIdx = AMDGPU::getNamedOperandIdx(
Opc, ModOps[J]);
9868 uint32_t ModVal = 0;
9871 if (SrcOp.
isReg() && getMRI()
9878 if ((OpSel & (1 << J)) != 0)
9882 if ((OpSelHi & (1 << J)) != 0)
9885 if ((NegLo & (1 << J)) != 0)
9888 if ((NegHi & (1 << J)) != 0)
9895void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
const OperandVector &Operands) {
9896 OptionalImmIndexMap OptIdx;
9897 cvtVOP3(Inst, Operands, OptIdx);
9898 cvtVOP3P(Inst, Operands, OptIdx);
9902 unsigned i,
unsigned Opc,
9904 if (AMDGPU::getNamedOperandIdx(
Opc,
OpName) != -1)
9905 ((AMDGPUOperand &)*
Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
9907 ((AMDGPUOperand &)*
Operands[i]).addRegOperands(Inst, 1);
9910void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst,
const OperandVector &Operands) {
9913 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
9916 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
9917 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1);
9919 OptionalImmIndexMap OptIdx;
9920 for (
unsigned i = 5; i < Operands.
size(); ++i) {
9921 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
9922 OptIdx[
Op.getImmTy()] = i;
9927 AMDGPUOperand::ImmTyIndexKey8bit);
9931 AMDGPUOperand::ImmTyIndexKey16bit);
9935 AMDGPUOperand::ImmTyIndexKey32bit);
9940 cvtVOP3P(Inst, Operands, OptIdx);
9947ParseStatus AMDGPUAsmParser::parseVOPD(
OperandVector &Operands) {
9955 Operands.
push_back(AMDGPUOperand::CreateToken(
this,
"::", S));
9956 SMLoc OpYLoc = getLoc();
9959 Operands.
push_back(AMDGPUOperand::CreateToken(
this, OpYName, OpYLoc));
9962 return Error(OpYLoc,
"expected a VOPDY instruction after ::");
9968void AMDGPUAsmParser::cvtVOPD(MCInst &Inst,
const OperandVector &Operands) {
9971 auto addOp = [&](uint16_t ParsedOprIdx) {
9972 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
9974 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9978 Op.addRegOperands(Inst, 1);
9982 Op.addImmOperands(Inst, 1);
9994 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
9998 const auto &CInfo = InstInfo[CompIdx];
9999 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
10000 for (
unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
10001 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
10002 if (CInfo.hasSrc2Acc())
10003 addOp(CInfo.getIndexOfDstInParsedOperands());
10007 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::bitop3);
10008 if (BitOp3Idx != -1) {
10009 OptionalImmIndexMap OptIdx;
10010 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands.
back());
10012 OptIdx[
Op.getImmTy()] = Operands.
size() - 1;
10022bool AMDGPUOperand::isDPP8()
const {
10023 return isImmTy(ImmTyDPP8);
10026bool AMDGPUOperand::isDPPCtrl()
const {
10027 using namespace AMDGPU::DPP;
10029 bool result = isImm() && getImmTy() == ImmTyDppCtrl &&
isUInt<9>(
getImm());
10032 return (
Imm >= DppCtrl::QUAD_PERM_FIRST &&
Imm <= DppCtrl::QUAD_PERM_LAST) ||
10033 (
Imm >= DppCtrl::ROW_SHL_FIRST &&
Imm <= DppCtrl::ROW_SHL_LAST) ||
10034 (
Imm >= DppCtrl::ROW_SHR_FIRST &&
Imm <= DppCtrl::ROW_SHR_LAST) ||
10035 (
Imm >= DppCtrl::ROW_ROR_FIRST &&
Imm <= DppCtrl::ROW_ROR_LAST) ||
10036 (
Imm == DppCtrl::WAVE_SHL1) ||
10037 (
Imm == DppCtrl::WAVE_ROL1) ||
10038 (
Imm == DppCtrl::WAVE_SHR1) ||
10039 (
Imm == DppCtrl::WAVE_ROR1) ||
10040 (
Imm == DppCtrl::ROW_MIRROR) ||
10041 (
Imm == DppCtrl::ROW_HALF_MIRROR) ||
10042 (
Imm == DppCtrl::BCAST15) ||
10043 (
Imm == DppCtrl::BCAST31) ||
10044 (
Imm >= DppCtrl::ROW_SHARE_FIRST &&
Imm <= DppCtrl::ROW_SHARE_LAST) ||
10045 (
Imm >= DppCtrl::ROW_XMASK_FIRST &&
Imm <= DppCtrl::ROW_XMASK_LAST);
10054bool AMDGPUOperand::isBLGP()
const {
10058bool AMDGPUOperand::isS16Imm()
const {
10062bool AMDGPUOperand::isU16Imm()
const {
10070bool AMDGPUAsmParser::parseDimId(
unsigned &Encoding) {
10075 SMLoc Loc =
getToken().getEndLoc();
10076 Token = std::string(getTokenStr());
10078 if (getLoc() != Loc)
10083 if (!parseId(Suffix))
10087 StringRef DimId = Token;
10098ParseStatus AMDGPUAsmParser::parseDim(
OperandVector &Operands) {
10102 SMLoc S = getLoc();
10108 SMLoc Loc = getLoc();
10109 if (!parseDimId(Encoding))
10110 return Error(Loc,
"invalid dim value");
10112 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Encoding, S,
10113 AMDGPUOperand::ImmTyDim));
10121ParseStatus AMDGPUAsmParser::parseDPP8(
OperandVector &Operands) {
10122 SMLoc S = getLoc();
10131 if (!skipToken(
AsmToken::LBrac,
"expected an opening square bracket"))
10134 for (
size_t i = 0; i < 8; ++i) {
10138 SMLoc Loc = getLoc();
10139 if (getParser().parseAbsoluteExpression(Sels[i]))
10141 if (0 > Sels[i] || 7 < Sels[i])
10142 return Error(Loc,
"expected a 3-bit value");
10145 if (!skipToken(
AsmToken::RBrac,
"expected a closing square bracket"))
10149 for (
size_t i = 0; i < 8; ++i)
10150 DPP8 |= (Sels[i] << (i * 3));
10152 Operands.
push_back(AMDGPUOperand::CreateImm(
this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
10157AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
10159 if (Ctrl ==
"row_newbcast")
10162 if (Ctrl ==
"row_share" ||
10163 Ctrl ==
"row_xmask")
10166 if (Ctrl ==
"wave_shl" ||
10167 Ctrl ==
"wave_shr" ||
10168 Ctrl ==
"wave_rol" ||
10169 Ctrl ==
"wave_ror" ||
10170 Ctrl ==
"row_bcast")
10173 return Ctrl ==
"row_mirror" ||
10174 Ctrl ==
"row_half_mirror" ||
10175 Ctrl ==
"quad_perm" ||
10176 Ctrl ==
"row_shl" ||
10177 Ctrl ==
"row_shr" ||
10182AMDGPUAsmParser::parseDPPCtrlPerm() {
10185 if (!skipToken(
AsmToken::LBrac,
"expected an opening square bracket"))
10189 for (
int i = 0; i < 4; ++i) {
10194 SMLoc Loc = getLoc();
10195 if (getParser().parseAbsoluteExpression(Temp))
10197 if (Temp < 0 || Temp > 3) {
10198 Error(Loc,
"expected a 2-bit value");
10202 Val += (Temp << i * 2);
10205 if (!skipToken(
AsmToken::RBrac,
"expected a closing square bracket"))
10212AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
10213 using namespace AMDGPU::DPP;
10218 SMLoc Loc = getLoc();
10220 if (getParser().parseAbsoluteExpression(Val))
10223 struct DppCtrlCheck {
10229 DppCtrlCheck
Check = StringSwitch<DppCtrlCheck>(Ctrl)
10230 .Case(
"wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
10231 .Case(
"wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
10232 .Case(
"wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
10233 .Case(
"wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
10234 .Case(
"row_shl", {DppCtrl::ROW_SHL0, 1, 15})
10235 .Case(
"row_shr", {DppCtrl::ROW_SHR0, 1, 15})
10236 .Case(
"row_ror", {DppCtrl::ROW_ROR0, 1, 15})
10237 .Case(
"row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
10238 .Case(
"row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
10239 .Case(
"row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
10243 if (
Check.Ctrl == -1) {
10244 Valid = (
Ctrl ==
"row_bcast" && (Val == 15 || Val == 31));
10252 Error(Loc, Twine(
"invalid ", Ctrl) + Twine(
" value"));
10259ParseStatus AMDGPUAsmParser::parseDPPCtrl(
OperandVector &Operands) {
10260 using namespace AMDGPU::DPP;
10263 !isSupportedDPPCtrl(getTokenStr(), Operands))
10266 SMLoc S = getLoc();
10272 if (Ctrl ==
"row_mirror") {
10273 Val = DppCtrl::ROW_MIRROR;
10274 }
else if (Ctrl ==
"row_half_mirror") {
10275 Val = DppCtrl::ROW_HALF_MIRROR;
10278 if (Ctrl ==
"quad_perm") {
10279 Val = parseDPPCtrlPerm();
10281 Val = parseDPPCtrlSel(Ctrl);
10290 AMDGPUOperand::CreateImm(
this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
10294void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst,
const OperandVector &Operands,
10296 OptionalImmIndexMap OptionalIdx;
10303 int OldIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::old);
10305 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2_modifiers);
10306 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
10310 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
10311 ((AMDGPUOperand &)*Operands[
I++]).addRegOperands(Inst, 1);
10315 int VdstInIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst_in);
10316 bool IsVOP3CvtSrDpp =
Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
10317 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx13 ||
10318 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
10319 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx13 ||
10320 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
10321 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx13 ||
10322 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
10323 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx13;
10325 for (
unsigned E = Operands.
size();
I !=
E; ++
I) {
10329 if (OldIdx == NumOperands) {
10331 constexpr int DST_IDX = 0;
10333 }
else if (Src2ModIdx == NumOperands) {
10343 if (IsVOP3CvtSrDpp) {
10352 if (TiedTo != -1) {
10357 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[
I]);
10359 if (IsDPP8 &&
Op.isDppFI()) {
10362 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
10363 }
else if (
Op.isReg()) {
10364 Op.addRegOperands(Inst, 1);
10365 }
else if (
Op.isImm() &&
10367 Op.addImmOperands(Inst, 1);
10368 }
else if (
Op.isImm()) {
10369 OptionalIdx[
Op.getImmTy()] =
I;
10377 AMDGPUOperand::ImmTyClamp);
10383 AMDGPUOperand::ImmTyByteSel);
10390 cvtVOP3P(Inst, Operands, OptionalIdx);
10392 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
10399 using namespace llvm::AMDGPU::DPP;
10409 AMDGPUOperand::ImmTyDppFI);
10413void AMDGPUAsmParser::cvtDPP(MCInst &Inst,
const OperandVector &Operands,
bool IsDPP8) {
10414 OptionalImmIndexMap OptionalIdx;
10418 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
10419 ((AMDGPUOperand &)*Operands[
I++]).addRegOperands(Inst, 1);
10423 for (
unsigned E = Operands.
size();
I !=
E; ++
I) {
10426 if (TiedTo != -1) {
10431 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[
I]);
10433 if (
Op.isReg() && validateVccOperand(
Op.getReg())) {
10441 Op.addImmOperands(Inst, 1);
10443 Op.addRegWithFPInputModsOperands(Inst, 2);
10444 }
else if (
Op.isDppFI()) {
10446 }
else if (
Op.isReg()) {
10447 Op.addRegOperands(Inst, 1);
10453 Op.addRegWithFPInputModsOperands(Inst, 2);
10454 }
else if (
Op.isReg()) {
10455 Op.addRegOperands(Inst, 1);
10456 }
else if (
Op.isDPPCtrl()) {
10457 Op.addImmOperands(Inst, 1);
10458 }
else if (
Op.isImm()) {
10460 OptionalIdx[
Op.getImmTy()] =
I;
10468 using namespace llvm::AMDGPU::DPP;
10476 AMDGPUOperand::ImmTyDppFI);
10485ParseStatus AMDGPUAsmParser::parseSDWASel(
OperandVector &Operands,
10487 AMDGPUOperand::ImmTy
Type) {
10488 return parseStringOrIntWithPrefix(
10490 {
"BYTE_0",
"BYTE_1",
"BYTE_2",
"BYTE_3",
"WORD_0",
"WORD_1",
"DWORD"},
10494ParseStatus AMDGPUAsmParser::parseSDWADstUnused(
OperandVector &Operands) {
10495 return parseStringOrIntWithPrefix(
10496 Operands,
"dst_unused", {
"UNUSED_PAD",
"UNUSED_SEXT",
"UNUSED_PRESERVE"},
10497 AMDGPUOperand::ImmTySDWADstUnused);
10500void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst,
const OperandVector &Operands) {
10504void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst,
const OperandVector &Operands) {
10508void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst,
const OperandVector &Operands) {
10512void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst,
const OperandVector &Operands) {
10516void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst,
const OperandVector &Operands) {
10520void AMDGPUAsmParser::cvtSDWA(MCInst &Inst,
const OperandVector &Operands,
10521 uint64_t BasicInstType,
10524 using namespace llvm::AMDGPU::SDWA;
10526 OptionalImmIndexMap OptionalIdx;
10527 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
10528 bool SkippedVcc =
false;
10532 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
10533 ((AMDGPUOperand &)*Operands[
I++]).addRegOperands(Inst, 1);
10536 for (
unsigned E = Operands.
size();
I !=
E; ++
I) {
10537 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[
I]);
10538 if (SkipVcc && !SkippedVcc &&
Op.isReg() &&
10539 (
Op.getReg() == AMDGPU::VCC ||
Op.getReg() == AMDGPU::VCC_LO)) {
10557 Op.addRegOrImmWithInputModsOperands(Inst, 2);
10558 }
else if (
Op.isImm()) {
10560 OptionalIdx[
Op.getImmTy()] =
I;
10564 SkippedVcc =
false;
10568 if (
Opc != AMDGPU::V_NOP_sdwa_gfx10 &&
Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
10569 Opc != AMDGPU::V_NOP_sdwa_vi) {
10571 switch (BasicInstType) {
10575 AMDGPUOperand::ImmTyClamp, 0);
10579 AMDGPUOperand::ImmTyOModSI, 0);
10583 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10587 AMDGPUOperand::ImmTySDWADstUnused,
10588 DstUnused::UNUSED_PRESERVE);
10590 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10595 AMDGPUOperand::ImmTyClamp, 0);
10600 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10601 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
10602 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10603 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10609 AMDGPUOperand::ImmTyClamp, 0);
10610 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10611 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10615 llvm_unreachable(
"Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
10621 if (Inst.
getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
10622 Inst.
getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
10623 auto *it = Inst.
begin();
10625 it, AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::src2));
10637#define GET_MATCHER_IMPLEMENTATION
10638#define GET_MNEMONIC_SPELL_CHECKER
10639#define GET_MNEMONIC_CHECKER
10640#include "AMDGPUGenAsmMatcher.inc"
10646 return parseTokenOp(
"addr64",
Operands);
10648 return parseNamedBit(
"done",
Operands, AMDGPUOperand::ImmTyDone,
true);
10650 return parseTokenOp(
"idxen",
Operands);
10652 return parseNamedBit(
"lds",
Operands, AMDGPUOperand::ImmTyLDS,
10655 return parseTokenOp(
"offen",
Operands);
10657 return parseTokenOp(
"off",
Operands);
10658 case MCK_row_95_en:
10659 return parseNamedBit(
"row_en",
Operands, AMDGPUOperand::ImmTyRowEn,
true);
10661 return parseNamedBit(
"gds",
Operands, AMDGPUOperand::ImmTyGDS);
10663 return parseNamedBit(
"tfe",
Operands, AMDGPUOperand::ImmTyTFE);
10665 return tryCustomParseOperand(
Operands, MCK);
10670unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &
Op,
10676 AMDGPUOperand &Operand = (AMDGPUOperand&)
Op;
10679 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
10681 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
10683 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
10685 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
10687 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
10689 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
10691 return Operand.isDone() ? Match_Success : Match_InvalidOperand;
10692 case MCK_row_95_en:
10693 return Operand.isRowEn() ? Match_Success : Match_InvalidOperand;
10701 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
10703 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
10704 case MCK_SOPPBrTarget:
10705 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
10706 case MCK_VReg32OrOff:
10707 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
10708 case MCK_InterpSlot:
10709 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
10710 case MCK_InterpAttr:
10711 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
10712 case MCK_InterpAttrChan:
10713 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
10715 case MCK_SReg_64_XEXEC:
10725 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
10727 return Match_InvalidOperand;
10735ParseStatus AMDGPUAsmParser::parseEndpgm(
OperandVector &Operands) {
10736 SMLoc S = getLoc();
10745 return Error(S,
"expected a 16-bit value");
10748 AMDGPUOperand::CreateImm(
this,
Imm, S, AMDGPUOperand::ImmTyEndpgm));
10752bool AMDGPUOperand::isEndpgm()
const {
return isImmTy(ImmTyEndpgm); }
10758bool AMDGPUOperand::isSplitBarrier()
const {
return isInlinableImm(MVT::i32); }
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
SmallVector< int16_t, MAX_SRC_OPERANDS_NUM > OperandIndices
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, AMDGPU::OpName OpName)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name, const MCRegisterInfo *MRI)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static bool IsRevOpcode(const unsigned Opcode)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static MCRegister getSpecialRegForName(StringRef RegName)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0, std::optional< unsigned > InsertAt=std::nullopt)
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
Enums shared between the AMDGPU backend (LLVM) and the ELF linker (LLD) for the .amdgpu....
AMDHSA kernel descriptor definitions.
static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value, raw_ostream &Err)
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_EXTERNAL_VISIBILITY
static llvm::Expected< InlineInfo > decode(GsymDataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Loop::LoopBounds::Direction Direction
Register const TargetRegisterInfo * TRI
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Interface definition for SIInstrInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
This file implements the SmallBitVector class.
StringSet - A set-like wrapper for the StringMap.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)
Initialize the set of available library functions based on the specified target triple.
static const char * getRegisterName(MCRegister Reg)
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createLit(LitModifier Lit, int64_t Value, MCContext &Ctx)
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
static const fltSemantics & IEEEsingle()
static const fltSemantics & BFloat()
static const fltSemantics & IEEEdouble()
static constexpr roundingMode rmNearestTiesToEven
static const fltSemantics & IEEEhalf()
opStatus
IEEE-754R 7: Default exception handling.
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
bool is(TokenKind K) const
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
void printExpr(raw_ostream &, const MCExpr &) const
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Context object for machine code objects.
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Instances of this class represent a single low-level machine instruction.
unsigned getNumOperands() const
unsigned getOpcode() const
iterator insert(iterator I, const MCOperand &Op)
void addOperand(const MCOperand Op)
const MCOperand & getOperand(unsigned i) const
Describe properties that are true of each instruction in the target description file.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
int16_t getOpRegClassID(const MCOperandInfo &OpInfo, unsigned HwModeId) const
Return the ID of the register class to use for OpInfo, for the active HwMode HwModeId.
Instances of this class represent operands of the MCInst class.
static MCOperand createExpr(const MCExpr *Val)
static MCOperand createReg(MCRegister Reg)
static MCOperand createImm(int64_t Val)
void setReg(MCRegister Reg)
Set the register number.
MCRegister getReg() const
Returns the register number.
const MCExpr * getExpr() const
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
bool regsOverlap(MCRegister RegA, MCRegister RegB) const
Returns true if the two registers are equal or alias each other.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Wrapper class representing physical registers. Should be passed by value.
constexpr bool isValid() const
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
Generic base class for all target subtargets.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
bool isVariable() const
isVariable - Check if this is a variable symbol.
LLVM_ABI void setVariableValue(const MCExpr *Value)
void setRedefinable(bool Value)
Mark this symbol as redefinable.
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
MCTargetAsmParser - Generic interface to target specific assembly parsers.
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
constexpr unsigned id() const
Represents a location in source code.
static SMLoc getFromPointer(const char *Ptr)
constexpr const char * getPointer() const
constexpr bool isValid() const
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Represent a constant reference to a string, i.e.
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
Check if the string is empty.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
constexpr size_t size() const
Get the string size.
constexpr const char * data() const
Get a pointer to the start of the string (which may not be null terminated).
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
bool consume_front(char Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
bool contains(StringRef key) const
Check if the set contains the given key.
std::pair< typename Base::iterator, bool > insert(StringRef key)
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
std::pair< iterator, bool > insert(const ValueT &V)
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
int64_t getHwregId(StringRef Name, const MCSubtargetInfo &STI)
static constexpr CustomOperand Operands[]
@ FIXED_NUM_SGPRS_FOR_INIT_BUG
unsigned getAddressableNumSGPRs(const MCSubtargetInfo &STI)
unsigned getVGPREncodingGranule(const MCSubtargetInfo &STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo &STI)
unsigned getLocalMemorySize(const MCSubtargetInfo &STI)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
int64_t getMsgOpId(int64_t MsgId, StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a sendmsg operation to the operation portion of the immediate encoding.
int64_t getMsgId(StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a msg_id to the message portion of the immediate encoding.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
ArrayRef< GFXVersion > getGFXVersions()
constexpr unsigned COMPONENTS[]
constexpr const char *const ModMatrixFmt[]
constexpr const char *const ModMatrixScaleFmt[]
constexpr const char *const ModMatrixScale[]
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
bool isInlineValue(MCRegister Reg)
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
FuncInfoFlags
Per-function flags packed into INFO_FLAGS entries.
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
unsigned getTemporalHintType(const MCInstrDesc TID)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY bool isLitExpr(const MCExpr *Expr)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
bool isGFX13(const MCSubtargetInfo &STI)
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool hasMAIInsts(const MCSubtargetInfo &STI)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX13Plus(const MCSubtargetInfo &STI)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
LLVM_READONLY int64_t getLitValue(const MCExpr *Expr)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
@ OPERAND_REG_IMM_V2FP16_SPLAT
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_IMM_NOINLINE_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool isGFX1250(const MCSubtargetInfo &STI)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
MCRegister mc2PseudoReg(MCRegister Reg)
Convert hardware register Reg to a pseudo register.
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool supportsWGP(const MCSubtargetInfo &STI)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool isGFX1250Plus(const MCSubtargetInfo &STI)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ UNDEF
UNDEF - An undefined node.
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
@ Valid
The data is already valid.
Scope
Defines the scope in which this symbol should be visible: Default – Visible in the public interface o...
Context & getContext() const
This is an optimization pass for GlobalISel generic memory operations.
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
FunctionAddr VTableAddr Value
StringMapEntry< Value * > ValueName
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
static bool isMem(const MachineInstr &MI, unsigned Op)
LLVM_ABI std::pair< StringRef, StringRef > getToken(StringRef Source, StringRef Delimiters=" \t\n\v\f\r")
getToken - This function extracts one token from source, ignoring any leading characters that appear ...
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
void PrintError(const Twine &Msg)
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
FunctionAddr VTableAddr uintptr_t uintptr_t DataSize
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Target & getTheR600Target()
The target for R600 GPUs.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
SmallVectorImpl< std::unique_ptr< MCParsedAsmOperand > > OperandVector
FunctionAddr VTableAddr uintptr_t uintptr_t Version
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Target & getTheGCNTarget()
The target for GCN GPUs.
@ Sub
Subtraction of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
unsigned M0(unsigned Val)
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
@ Enabled
Convert any .debug_str_offsets tables to DWARF64 if needed.
@ Default
The result value is uniform if and only if all operands are uniform.
void initDefault(const MCSubtargetInfo &STI, MCContext &Ctx, bool InitMCExpr=true)
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
uint32_t PrivateSegmentSize
SmallVector< std::pair< MCSymbol *, std::string >, 4 > IndirectCalls
SmallVector< std::pair< MCSymbol *, MCSymbol * >, 8 > Calls
SmallVector< FuncInfo, 8 > Funcs
SmallVector< std::pair< MCSymbol *, std::string >, 4 > TypeIds
SmallVector< std::pair< MCSymbol *, MCSymbol * >, 4 > Uses
Instruction set architecture version.
const MCExpr * compute_pgm_rsrc2
const MCExpr * kernarg_size
const MCExpr * kernarg_preload
const MCExpr * compute_pgm_rsrc3
const MCExpr * private_segment_fixed_size
const MCExpr * compute_pgm_rsrc1
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
const MCExpr * group_segment_fixed_size
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
const MCExpr * kernel_code_properties
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...
uint32_t group_segment_fixed_size
uint32_t private_segment_fixed_size