57enum RegisterKind { IS_UNKNOWN,
IS_VGPR, IS_SGPR,
IS_AGPR, IS_TTMP, IS_SPECIAL };
71 SMLoc StartLoc, EndLoc;
72 const AMDGPUAsmParser *AsmParser;
75 AMDGPUOperand(KindTy Kind_,
const AMDGPUAsmParser *AsmParser_)
76 : Kind(Kind_), AsmParser(AsmParser_) {}
78 using Ptr = std::unique_ptr<AMDGPUOperand>;
86 bool hasFPModifiers()
const {
return Abs || Neg; }
87 bool hasIntModifiers()
const {
return Sext; }
88 bool hasModifiers()
const {
return hasFPModifiers() || hasIntModifiers(); }
89 bool isForcedLit()
const {
return Lit == LitModifier::Lit; }
90 bool isForcedLit64()
const {
return Lit == LitModifier::Lit64; }
92 int64_t getFPModifiersOperand()
const {
99 int64_t getIntModifiersOperand()
const {
105 int64_t getModifiersOperand()
const {
106 assert(!(hasFPModifiers() && hasIntModifiers())
107 &&
"fp and int modifiers should not be used simultaneously");
108 if (hasFPModifiers())
109 return getFPModifiersOperand();
110 if (hasIntModifiers())
111 return getIntModifiersOperand();
115 friend raw_ostream &
operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
189 ImmTyMatrixAScaleFmt,
190 ImmTyMatrixBScaleFmt,
223 mutable int MCOpIdx = -1;
226 bool isToken()
const override {
return Kind == Token; }
228 bool isSymbolRefExpr()
const {
232 bool isImm()
const override {
233 return Kind == Immediate;
236 bool isInlinableImm(MVT type)
const;
237 bool isLiteralImm(MVT type)
const;
239 bool isRegKind()
const {
240 return Kind == Register;
243 bool isReg()
const override {
244 return isRegKind() && !hasModifiers();
247 bool isRegOrInline(
unsigned RCID, MVT type)
const {
248 return isRegClass(RCID) || isInlinableImm(type);
252 return isRegOrInline(RCID, type) || isLiteralImm(type);
255 bool isRegOrImmWithInt16InputMods()
const {
259 template <
bool IsFake16>
bool isRegOrImmWithIntT16InputMods()
const {
261 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
264 bool isRegOrImmWithInt32InputMods()
const {
268 bool isRegOrInlineImmWithInt16InputMods()
const {
269 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
272 template <
bool IsFake16>
bool isRegOrInlineImmWithIntT16InputMods()
const {
273 return isRegOrInline(
274 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
277 bool isRegOrInlineImmWithInt32InputMods()
const {
278 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
281 bool isRegOrImmWithInt64InputMods()
const {
285 bool isRegOrImmWithFP16InputMods()
const {
289 template <
bool IsFake16>
bool isRegOrImmWithFPT16InputMods()
const {
291 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
294 bool isRegOrImmWithFP32InputMods()
const {
298 bool isRegOrImmWithFP64InputMods()
const {
302 template <
bool IsFake16>
bool isRegOrInlineImmWithFP16InputMods()
const {
303 return isRegOrInline(
304 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
307 bool isRegOrInlineImmWithFP32InputMods()
const {
308 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
311 bool isRegOrInlineImmWithFP64InputMods()
const {
312 return isRegOrInline(AMDGPU::VS_64RegClassID, MVT::f64);
315 bool isVRegWithInputMods(
unsigned RCID)
const {
return isRegClass(RCID); }
317 bool isVRegWithFP32InputMods()
const {
318 return isVRegWithInputMods(AMDGPU::VGPR_32RegClassID);
321 bool isVRegWithFP64InputMods()
const {
322 return isVRegWithInputMods(AMDGPU::VReg_64RegClassID);
325 bool isPackedFP16InputMods()
const {
329 bool isPackedVGPRFP32InputMods()
const {
333 bool isVReg()
const {
334 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
335 isRegClass(AMDGPU::VReg_64RegClassID) ||
336 isRegClass(AMDGPU::VReg_96RegClassID) ||
337 isRegClass(AMDGPU::VReg_128RegClassID) ||
338 isRegClass(AMDGPU::VReg_160RegClassID) ||
339 isRegClass(AMDGPU::VReg_192RegClassID) ||
340 isRegClass(AMDGPU::VReg_256RegClassID) ||
341 isRegClass(AMDGPU::VReg_512RegClassID) ||
342 isRegClass(AMDGPU::VReg_1024RegClassID);
345 bool isVReg32()
const {
346 return isRegClass(AMDGPU::VGPR_32RegClassID);
349 bool isVReg32OrOff()
const {
350 return isOff() || isVReg32();
354 return isRegKind() &&
getReg() == AMDGPU::SGPR_NULL;
357 bool isAV_LdSt_32_Align2_RegOp()
const {
358 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
359 isRegClass(AMDGPU::AGPR_32RegClassID);
362 bool isVRegWithInputMods()
const;
363 template <
bool IsFake16>
bool isT16_Lo128VRegWithInputMods()
const;
364 template <
bool IsFake16>
bool isT16VRegWithInputMods()
const;
366 bool isSDWAOperand(MVT type)
const;
367 bool isSDWAFP16Operand()
const;
368 bool isSDWAFP32Operand()
const;
369 bool isSDWAInt16Operand()
const;
370 bool isSDWAInt32Operand()
const;
372 bool isImmTy(ImmTy ImmT)
const {
373 return isImm() &&
Imm.Type == ImmT;
376 template <ImmTy Ty>
bool isImmTy()
const {
return isImmTy(Ty); }
378 bool isImmLiteral()
const {
return isImmTy(ImmTyNone); }
380 bool isImmModifier()
const {
381 return isImm() &&
Imm.Type != ImmTyNone;
384 bool isOModSI()
const {
return isImmTy(ImmTyOModSI); }
385 bool isDim()
const {
return isImmTy(ImmTyDim); }
386 bool isR128A16()
const {
return isImmTy(ImmTyR128A16); }
387 bool isOff()
const {
return isImmTy(ImmTyOff); }
388 bool isExpTgt()
const {
return isImmTy(ImmTyExpTgt); }
389 bool isOffen()
const {
return isImmTy(ImmTyOffen); }
390 bool isIdxen()
const {
return isImmTy(ImmTyIdxen); }
391 bool isAddr64()
const {
return isImmTy(ImmTyAddr64); }
392 bool isSMEMOffsetMod()
const {
return isImmTy(ImmTySMEMOffsetMod); }
393 bool isFlatOffset()
const {
return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
394 bool isGDS()
const {
return isImmTy(ImmTyGDS); }
395 bool isLDS()
const {
return isImmTy(ImmTyLDS); }
396 bool isCPol()
const {
return isImmTy(ImmTyCPol); }
397 bool isIndexKey8bit()
const {
return isImmTy(ImmTyIndexKey8bit); }
398 bool isIndexKey16bit()
const {
return isImmTy(ImmTyIndexKey16bit); }
399 bool isIndexKey32bit()
const {
return isImmTy(ImmTyIndexKey32bit); }
400 bool isMatrixAFMT()
const {
return isImmTy(ImmTyMatrixAFMT); }
401 bool isMatrixBFMT()
const {
return isImmTy(ImmTyMatrixBFMT); }
402 bool isMatrixAScale()
const {
return isImmTy(ImmTyMatrixAScale); }
403 bool isMatrixBScale()
const {
return isImmTy(ImmTyMatrixBScale); }
404 bool isMatrixAScaleFmt()
const {
return isImmTy(ImmTyMatrixAScaleFmt); }
405 bool isMatrixBScaleFmt()
const {
return isImmTy(ImmTyMatrixBScaleFmt); }
406 bool isMatrixAReuse()
const {
return isImmTy(ImmTyMatrixAReuse); }
407 bool isMatrixBReuse()
const {
return isImmTy(ImmTyMatrixBReuse); }
408 bool isTFE()
const {
return isImmTy(ImmTyTFE); }
409 bool isFORMAT()
const {
return isImmTy(ImmTyFORMAT) &&
isUInt<7>(
getImm()); }
410 bool isDppFI()
const {
return isImmTy(ImmTyDppFI); }
411 bool isSDWADstSel()
const {
return isImmTy(ImmTySDWADstSel); }
412 bool isSDWASrc0Sel()
const {
return isImmTy(ImmTySDWASrc0Sel); }
413 bool isSDWASrc1Sel()
const {
return isImmTy(ImmTySDWASrc1Sel); }
414 bool isSDWADstUnused()
const {
return isImmTy(ImmTySDWADstUnused); }
415 bool isInterpSlot()
const {
return isImmTy(ImmTyInterpSlot); }
416 bool isInterpAttr()
const {
return isImmTy(ImmTyInterpAttr); }
417 bool isInterpAttrChan()
const {
return isImmTy(ImmTyInterpAttrChan); }
418 bool isOpSel()
const {
return isImmTy(ImmTyOpSel); }
419 bool isOpSelHi()
const {
return isImmTy(ImmTyOpSelHi); }
420 bool isNegLo()
const {
return isImmTy(ImmTyNegLo); }
421 bool isNegHi()
const {
return isImmTy(ImmTyNegHi); }
422 bool isBitOp3()
const {
return isImmTy(ImmTyBitOp3) &&
isUInt<8>(
getImm()); }
423 bool isDone()
const {
return isImmTy(ImmTyDone); }
424 bool isRowEn()
const {
return isImmTy(ImmTyRowEn); }
426 bool isRegOrImm()
const {
427 return isReg() || isImm();
430 bool isRegClass(
unsigned RCID)
const;
434 bool isRegOrInlineNoMods(
unsigned RCID, MVT type)
const {
435 return isRegOrInline(RCID, type) && !hasModifiers();
438 bool isSCSrcB16()
const {
439 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
442 bool isSCSrcV2B16()
const {
446 bool isSCSrc_b32()
const {
447 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
450 bool isSCSrc_b64()
const {
451 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
454 bool isBoolReg()
const;
456 bool isSCSrcF16()
const {
457 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
460 bool isSCSrcV2F16()
const {
464 bool isSCSrcF32()
const {
465 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
468 bool isSCSrcF64()
const {
469 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
472 bool isSSrc_b32()
const {
473 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
476 bool isSSrc_b16()
const {
return isSCSrcB16() || isLiteralImm(MVT::i16); }
478 bool isSSrcV2B16()
const {
483 bool isSSrc_b64()
const {
486 return isSCSrc_b64() || isLiteralImm(MVT::i64) ||
487 (((
const MCTargetAsmParser *)AsmParser)
488 ->getAvailableFeatures()[AMDGPU::Feature64BitLiterals] &&
492 bool isSSrc_f32()
const {
493 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
496 bool isSSrcF64()
const {
return isSCSrc_b64() || isLiteralImm(MVT::f64); }
498 bool isSSrc_bf16()
const {
return isSCSrcB16() || isLiteralImm(MVT::bf16); }
500 bool isSSrc_f16()
const {
return isSCSrcB16() || isLiteralImm(MVT::f16); }
502 bool isSSrcV2F16()
const {
507 bool isSSrcV2FP32()
const {
512 bool isSCSrcV2FP32()
const {
517 bool isSSrcV2INT32()
const {
522 bool isSCSrcV2INT32()
const {
524 return isSCSrc_b32();
527 bool isSSrcOrLds_b32()
const {
528 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
529 isLiteralImm(MVT::i32) || isExpr();
532 bool isVCSrc_b32()
const {
533 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
536 bool isVCSrc_b32_Lo256()
const {
537 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo256RegClassID, MVT::i32);
540 bool isVCSrc_b64_Lo256()
const {
541 return isRegOrInlineNoMods(AMDGPU::VS_64_Lo256RegClassID, MVT::i64);
544 bool isVCSrc_b64()
const {
545 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
548 bool isVCSrcT_b16()
const {
549 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
552 bool isVCSrcTB16_Lo128()
const {
553 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
556 bool isVCSrcFake16B16_Lo128()
const {
557 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
560 bool isVCSrc_b16()
const {
561 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
564 bool isVCSrc_v2b16()
const {
return isVCSrc_b16(); }
566 bool isVCSrc_f32()
const {
567 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
570 bool isVCSrc_f64()
const {
571 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
574 bool isVCSrcTBF16()
const {
575 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
578 bool isVCSrcT_f16()
const {
579 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
582 bool isVCSrcT_bf16()
const {
583 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
586 bool isVCSrcTBF16_Lo128()
const {
587 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
590 bool isVCSrcTF16_Lo128()
const {
591 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
594 bool isVCSrcFake16BF16_Lo128()
const {
595 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
598 bool isVCSrcFake16F16_Lo128()
const {
599 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
602 bool isVCSrc_bf16()
const {
603 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
606 bool isVCSrc_f16()
const {
607 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
610 bool isVCSrc_v2bf16()
const {
return isVCSrc_bf16(); }
612 bool isVCSrc_v2f16()
const {
return isVCSrc_f16(); }
614 bool isVSrc_b32()
const {
615 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
618 bool isVSrc_b64()
const {
return isVCSrc_f64() || isLiteralImm(MVT::i64); }
620 bool isVSrc_v2b64()
const {
621 return isRegOrInlineNoMods(AMDGPU::VS_128RegClassID, MVT::i64) ||
622 isLiteralImm(MVT::i64);
625 bool isVSrc_v2f64()
const {
626 return isRegOrInlineNoMods(AMDGPU::VS_128RegClassID, MVT::f64) ||
627 isLiteralImm(MVT::f64);
630 bool isVSrcT_b16()
const {
return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
632 bool isVSrcT_b16_Lo128()
const {
633 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
636 bool isVSrcFake16_b16_Lo128()
const {
637 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
640 bool isVSrc_b16()
const {
return isVCSrc_b16() || isLiteralImm(MVT::i16); }
642 bool isVSrc_v2b16()
const {
return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
644 bool isVCSrcV2FP32()
const {
return isVCSrc_f64(); }
646 bool isVSrc_v2f32()
const {
return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
648 bool isVCSrc_v2b32()
const {
return isVCSrc_b64(); }
650 bool isVSrc_v2b32()
const {
return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
652 bool isVSrc_f32()
const {
653 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
656 bool isVSrc_f64()
const {
return isVCSrc_f64() || isLiteralImm(MVT::f64); }
658 bool isVSrcT_bf16()
const {
return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
660 bool isVSrcT_f16()
const {
return isVCSrcT_f16() || isLiteralImm(MVT::f16); }
662 bool isVSrcT_bf16_Lo128()
const {
663 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
666 bool isVSrcT_f16_Lo128()
const {
667 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
670 bool isVSrcFake16_bf16_Lo128()
const {
671 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
674 bool isVSrcFake16_f16_Lo128()
const {
675 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
678 bool isVSrc_bf16()
const {
return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
680 bool isVSrc_f16()
const {
return isVCSrc_f16() || isLiteralImm(MVT::f16); }
682 bool isVSrc_v2bf16()
const {
683 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
686 bool isVSrc_v2f16()
const {
return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
688 bool isVSrc_v2f16_splat()
const {
return isVSrc_v2f16(); }
690 bool isVSrc_NoInline_v2f16()
const {
return isVSrc_v2f16(); }
692 bool isVISrcB32()
const {
693 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
696 bool isVISrcB16()
const {
697 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
700 bool isVISrcV2B16()
const {
704 bool isVISrcF32()
const {
705 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
708 bool isVISrcF16()
const {
709 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
712 bool isVISrcV2F16()
const {
713 return isVISrcF16() || isVISrcB32();
716 bool isVISrc_64_bf16()
const {
717 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
720 bool isVISrc_64_f16()
const {
721 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
724 bool isVISrc_64_b32()
const {
725 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
728 bool isVISrc_64B64()
const {
729 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
732 bool isVISrc_64_f64()
const {
733 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
736 bool isVISrc_64V2FP32()
const {
737 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
740 bool isVISrc_64V2INT32()
const {
741 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
744 bool isVISrc_256_b32()
const {
745 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
748 bool isVISrc_256_f32()
const {
749 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
752 bool isVISrc_256B64()
const {
753 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
756 bool isVISrc_256_f64()
const {
757 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
760 bool isVISrc_512_f64()
const {
761 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f64);
764 bool isVISrc_128B16()
const {
765 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
768 bool isVISrc_128V2B16()
const {
769 return isVISrc_128B16();
772 bool isVISrc_128_b32()
const {
773 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
776 bool isVISrc_128_f32()
const {
777 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
780 bool isVISrc_256V2FP32()
const {
781 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
784 bool isVISrc_256V2INT32()
const {
785 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
788 bool isVISrc_512_b32()
const {
789 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
792 bool isVISrc_512B16()
const {
793 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
796 bool isVISrc_512V2B16()
const {
797 return isVISrc_512B16();
800 bool isVISrc_512_f32()
const {
801 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
804 bool isVISrc_512F16()
const {
805 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
808 bool isVISrc_512V2F16()
const {
809 return isVISrc_512F16() || isVISrc_512_b32();
812 bool isVISrc_1024_b32()
const {
813 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
816 bool isVISrc_1024B16()
const {
817 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
820 bool isVISrc_1024V2B16()
const {
821 return isVISrc_1024B16();
824 bool isVISrc_1024_f32()
const {
825 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
828 bool isVISrc_1024F16()
const {
829 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
832 bool isVISrc_1024V2F16()
const {
833 return isVISrc_1024F16() || isVISrc_1024_b32();
836 bool isAISrcB32()
const {
837 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
840 bool isAISrcB16()
const {
841 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
844 bool isAISrcV2B16()
const {
848 bool isAISrcF32()
const {
849 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
852 bool isAISrcF16()
const {
853 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
856 bool isAISrcV2F16()
const {
857 return isAISrcF16() || isAISrcB32();
860 bool isAISrc_64B64()
const {
861 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
864 bool isAISrc_64_f64()
const {
865 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
868 bool isAISrc_128_b32()
const {
869 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
872 bool isAISrc_128B16()
const {
873 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
876 bool isAISrc_128V2B16()
const {
877 return isAISrc_128B16();
880 bool isAISrc_128_f32()
const {
881 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
884 bool isAISrc_128F16()
const {
885 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
888 bool isAISrc_128V2F16()
const {
889 return isAISrc_128F16() || isAISrc_128_b32();
892 bool isVISrc_128_bf16()
const {
893 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
896 bool isVISrc_128_f16()
const {
897 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
900 bool isVISrc_128V2F16()
const {
901 return isVISrc_128_f16() || isVISrc_128_b32();
904 bool isAISrc_256B64()
const {
905 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
908 bool isAISrc_256_f64()
const {
909 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
912 bool isAISrc_512_b32()
const {
913 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
916 bool isAISrc_512B16()
const {
917 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
920 bool isAISrc_512V2B16()
const {
921 return isAISrc_512B16();
924 bool isAISrc_512_f32()
const {
925 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
928 bool isAISrc_512F16()
const {
929 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
932 bool isAISrc_512V2F16()
const {
933 return isAISrc_512F16() || isAISrc_512_b32();
936 bool isAISrc_1024_b32()
const {
937 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
940 bool isAISrc_1024B16()
const {
941 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
944 bool isAISrc_1024V2B16()
const {
945 return isAISrc_1024B16();
948 bool isAISrc_1024_f32()
const {
949 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
952 bool isAISrc_1024F16()
const {
953 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
956 bool isAISrc_1024V2F16()
const {
957 return isAISrc_1024F16() || isAISrc_1024_b32();
960 bool isKImmFP32()
const {
961 return isLiteralImm(MVT::f32);
964 bool isKImmFP16()
const {
965 return isLiteralImm(MVT::f16);
968 bool isKImmFP64()
const {
return isLiteralImm(MVT::f64); }
970 bool isMem()
const override {
974 bool isExpr()
const {
975 return Kind == Expression;
978 bool isSOPPBrTarget()
const {
return isExpr() || isImm(); }
980 bool isSWaitCnt()
const;
981 bool isDepCtr()
const;
982 bool isSDelayALU()
const;
983 bool isHwreg()
const;
984 bool isSendMsg()
const;
985 bool isWaitEvent()
const;
986 bool isSplitBarrier()
const;
987 bool isSwizzle()
const;
988 bool isSMRDOffset8()
const;
989 bool isSMEMOffset()
const;
990 bool isSMRDLiteralOffset()
const;
992 bool isDPPCtrl()
const;
994 bool isGPRIdxMode()
const;
995 bool isS16Imm()
const;
996 bool isU16Imm()
const;
997 bool isEndpgm()
const;
999 auto getPredicate(std::function<
bool(
const AMDGPUOperand &
Op)>
P)
const {
1000 return [
this,
P]() {
return P(*
this); };
1005 return StringRef(Tok.Data, Tok.Length);
1013 void setImm(int64_t Val) {
1018 ImmTy getImmTy()
const {
1023 MCRegister
getReg()
const override {
1028 SMLoc getStartLoc()
const override {
1032 SMLoc getEndLoc()
const override {
1036 SMRange getLocRange()
const {
1037 return SMRange(StartLoc, EndLoc);
1040 int getMCOpIdx()
const {
return MCOpIdx; }
1042 Modifiers getModifiers()
const {
1043 assert(isRegKind() || isImmTy(ImmTyNone));
1044 return isRegKind() ?
Reg.Mods :
Imm.Mods;
1047 void setModifiers(Modifiers Mods) {
1048 assert(isRegKind() || isImmTy(ImmTyNone));
1055 bool hasModifiers()
const {
1056 return getModifiers().hasModifiers();
1059 bool hasFPModifiers()
const {
1060 return getModifiers().hasFPModifiers();
1063 bool hasIntModifiers()
const {
1064 return getModifiers().hasIntModifiers();
1067 bool isForcedLit()
const {
1068 return isImmLiteral() && getModifiers().isForcedLit();
1071 bool isForcedLit64()
const {
1072 return isImmLiteral() && getModifiers().isForcedLit64();
1075 uint64_t applyInputFPModifiers(uint64_t Val,
unsigned Size)
const;
1077 void addImmOperands(MCInst &Inst,
unsigned N,
bool ApplyModifiers =
true)
const;
1079 void addLiteralImmOperand(MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const;
1081 void addRegOperands(MCInst &Inst,
unsigned N)
const;
1083 void addRegOrImmOperands(MCInst &Inst,
unsigned N)
const {
1085 addRegOperands(Inst,
N);
1087 addImmOperands(Inst,
N);
1090 void addRegOrImmWithInputModsOperands(MCInst &Inst,
unsigned N)
const {
1091 Modifiers Mods = getModifiers();
1094 addRegOperands(Inst,
N);
1096 addImmOperands(Inst,
N,
false);
1100 void addRegOrImmWithFPInputModsOperands(MCInst &Inst,
unsigned N)
const {
1101 assert(!hasIntModifiers());
1102 addRegOrImmWithInputModsOperands(Inst,
N);
1105 void addRegOrImmWithIntInputModsOperands(MCInst &Inst,
unsigned N)
const {
1106 assert(!hasFPModifiers());
1107 addRegOrImmWithInputModsOperands(Inst,
N);
1110 void addRegWithInputModsOperands(MCInst &Inst,
unsigned N)
const {
1111 Modifiers Mods = getModifiers();
1114 addRegOperands(Inst,
N);
1117 void addRegWithFPInputModsOperands(MCInst &Inst,
unsigned N)
const {
1118 assert(!hasIntModifiers());
1119 addRegWithInputModsOperands(Inst,
N);
1122 void addRegWithIntInputModsOperands(MCInst &Inst,
unsigned N)
const {
1123 assert(!hasFPModifiers());
1124 addRegWithInputModsOperands(Inst,
N);
1127 static void printImmTy(raw_ostream& OS, ImmTy
Type) {
1130 case ImmTyNone: OS <<
"None";
break;
1131 case ImmTyGDS: OS <<
"GDS";
break;
1132 case ImmTyLDS: OS <<
"LDS";
break;
1133 case ImmTyOffen: OS <<
"Offen";
break;
1134 case ImmTyIdxen: OS <<
"Idxen";
break;
1135 case ImmTyAddr64: OS <<
"Addr64";
break;
1136 case ImmTyOffset: OS <<
"Offset";
break;
1137 case ImmTyInstOffset: OS <<
"InstOffset";
break;
1138 case ImmTyOffset0: OS <<
"Offset0";
break;
1139 case ImmTyOffset1: OS <<
"Offset1";
break;
1140 case ImmTySMEMOffsetMod: OS <<
"SMEMOffsetMod";
break;
1141 case ImmTyCPol: OS <<
"CPol";
break;
1142 case ImmTyIndexKey8bit: OS <<
"index_key";
break;
1143 case ImmTyIndexKey16bit: OS <<
"index_key";
break;
1144 case ImmTyIndexKey32bit: OS <<
"index_key";
break;
1145 case ImmTyTFE: OS <<
"TFE";
break;
1146 case ImmTyIsAsync: OS <<
"IsAsync";
break;
1147 case ImmTyD16: OS <<
"D16";
break;
1148 case ImmTyFORMAT: OS <<
"FORMAT";
break;
1149 case ImmTyClamp: OS <<
"Clamp";
break;
1150 case ImmTyOModSI: OS <<
"OModSI";
break;
1151 case ImmTyDPP8: OS <<
"DPP8";
break;
1152 case ImmTyDppCtrl: OS <<
"DppCtrl";
break;
1153 case ImmTyDppRowMask: OS <<
"DppRowMask";
break;
1154 case ImmTyDppBankMask: OS <<
"DppBankMask";
break;
1155 case ImmTyDppBoundCtrl: OS <<
"DppBoundCtrl";
break;
1156 case ImmTyDppFI: OS <<
"DppFI";
break;
1157 case ImmTySDWADstSel: OS <<
"SDWADstSel";
break;
1158 case ImmTySDWASrc0Sel: OS <<
"SDWASrc0Sel";
break;
1159 case ImmTySDWASrc1Sel: OS <<
"SDWASrc1Sel";
break;
1160 case ImmTySDWADstUnused: OS <<
"SDWADstUnused";
break;
1161 case ImmTyDMask: OS <<
"DMask";
break;
1162 case ImmTyDim: OS <<
"Dim";
break;
1163 case ImmTyUNorm: OS <<
"UNorm";
break;
1164 case ImmTyDA: OS <<
"DA";
break;
1165 case ImmTyR128A16: OS <<
"R128A16";
break;
1166 case ImmTyA16: OS <<
"A16";
break;
1167 case ImmTyLWE: OS <<
"LWE";
break;
1168 case ImmTyOff: OS <<
"Off";
break;
1169 case ImmTyExpTgt: OS <<
"ExpTgt";
break;
1170 case ImmTyExpCompr: OS <<
"ExpCompr";
break;
1171 case ImmTyExpVM: OS <<
"ExpVM";
break;
1172 case ImmTyDone: OS <<
"Done";
break;
1173 case ImmTyRowEn: OS <<
"RowEn";
break;
1174 case ImmTyHwreg: OS <<
"Hwreg";
break;
1175 case ImmTySendMsg: OS <<
"SendMsg";
break;
1176 case ImmTyWaitEvent: OS <<
"WaitEvent";
break;
1177 case ImmTyInterpSlot: OS <<
"InterpSlot";
break;
1178 case ImmTyInterpAttr: OS <<
"InterpAttr";
break;
1179 case ImmTyInterpAttrChan: OS <<
"InterpAttrChan";
break;
1180 case ImmTyOpSel: OS <<
"OpSel";
break;
1181 case ImmTyOpSelHi: OS <<
"OpSelHi";
break;
1182 case ImmTyNegLo: OS <<
"NegLo";
break;
1183 case ImmTyNegHi: OS <<
"NegHi";
break;
1184 case ImmTySwizzle: OS <<
"Swizzle";
break;
1185 case ImmTyGprIdxMode: OS <<
"GprIdxMode";
break;
1186 case ImmTyHigh: OS <<
"High";
break;
1187 case ImmTyBLGP: OS <<
"BLGP";
break;
1188 case ImmTyCBSZ: OS <<
"CBSZ";
break;
1189 case ImmTyABID: OS <<
"ABID";
break;
1190 case ImmTyEndpgm: OS <<
"Endpgm";
break;
1191 case ImmTyWaitVDST: OS <<
"WaitVDST";
break;
1192 case ImmTyWaitEXP: OS <<
"WaitEXP";
break;
1193 case ImmTyWaitVAVDst: OS <<
"WaitVAVDst";
break;
1194 case ImmTyWaitVMVSrc: OS <<
"WaitVMVSrc";
break;
1195 case ImmTyBitOp3: OS <<
"BitOp3";
break;
1196 case ImmTyMatrixAFMT: OS <<
"ImmTyMatrixAFMT";
break;
1197 case ImmTyMatrixBFMT: OS <<
"ImmTyMatrixBFMT";
break;
1198 case ImmTyMatrixAScale: OS <<
"ImmTyMatrixAScale";
break;
1199 case ImmTyMatrixBScale: OS <<
"ImmTyMatrixBScale";
break;
1200 case ImmTyMatrixAScaleFmt: OS <<
"ImmTyMatrixAScaleFmt";
break;
1201 case ImmTyMatrixBScaleFmt: OS <<
"ImmTyMatrixBScaleFmt";
break;
1202 case ImmTyMatrixAReuse: OS <<
"ImmTyMatrixAReuse";
break;
1203 case ImmTyMatrixBReuse: OS <<
"ImmTyMatrixBReuse";
break;
1204 case ImmTyScaleSel: OS <<
"ScaleSel" ;
break;
1205 case ImmTyByteSel: OS <<
"ByteSel" ;
break;
1210 void print(raw_ostream &OS,
const MCAsmInfo &MAI)
const override {
1214 <<
" mods: " <<
Reg.Mods <<
'>';
1218 if (getImmTy() != ImmTyNone) {
1219 OS <<
" type: "; printImmTy(OS, getImmTy());
1221 OS <<
" mods: " <<
Imm.Mods <<
'>';
1234 static AMDGPUOperand::Ptr CreateImm(
const AMDGPUAsmParser *AsmParser,
1235 int64_t Val, SMLoc Loc,
1236 ImmTy
Type = ImmTyNone,
1237 bool IsFPImm =
false) {
1238 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1240 Op->Imm.IsFPImm = IsFPImm;
1242 Op->Imm.Mods = Modifiers();
1248 static AMDGPUOperand::Ptr CreateToken(
const AMDGPUAsmParser *AsmParser,
1249 StringRef Str, SMLoc Loc,
1250 bool HasExplicitEncodingSize =
true) {
1251 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1252 Res->Tok.Data = Str.data();
1253 Res->Tok.Length = Str.size();
1254 Res->StartLoc = Loc;
1259 static AMDGPUOperand::Ptr CreateReg(
const AMDGPUAsmParser *AsmParser,
1260 MCRegister
Reg, SMLoc S, SMLoc
E) {
1261 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1262 Op->Reg.RegNo =
Reg;
1263 Op->Reg.Mods = Modifiers();
1269 static AMDGPUOperand::Ptr CreateExpr(
const AMDGPUAsmParser *AsmParser,
1270 const class MCExpr *Expr, SMLoc S) {
1271 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1280 OS <<
"abs:" << Mods.Abs <<
" neg: " << Mods.Neg <<
" sext:" << Mods.Sext;
1289#define GET_REGISTER_MATCHER
1290#include "AMDGPUGenAsmMatcher.inc"
1291#undef GET_REGISTER_MATCHER
1292#undef GET_SUBTARGET_FEATURE_NAME
1297class KernelScopeInfo {
1298 int SgprIndexUnusedMin = -1;
1299 int VgprIndexUnusedMin = -1;
1300 int AgprIndexUnusedMin = -1;
1304 void usesSgprAt(
int i) {
1305 if (i >= SgprIndexUnusedMin) {
1306 SgprIndexUnusedMin = ++i;
1309 Ctx->getOrCreateSymbol(
Twine(
".kernel.sgpr_count"));
1315 void usesVgprAt(
int i) {
1316 if (i >= VgprIndexUnusedMin) {
1317 VgprIndexUnusedMin = ++i;
1320 Ctx->getOrCreateSymbol(
Twine(
".kernel.vgpr_count"));
1322 VgprIndexUnusedMin);
1328 void usesAgprAt(
int i) {
1333 if (i >= AgprIndexUnusedMin) {
1334 AgprIndexUnusedMin = ++i;
1337 Ctx->getOrCreateSymbol(
Twine(
".kernel.agpr_count"));
1342 Ctx->getOrCreateSymbol(
Twine(
".kernel.vgpr_count"));
1344 VgprIndexUnusedMin);
1351 KernelScopeInfo() =
default;
1355 MSTI = Ctx->getSubtargetInfo();
1357 usesSgprAt(SgprIndexUnusedMin = -1);
1358 usesVgprAt(VgprIndexUnusedMin = -1);
1360 usesAgprAt(AgprIndexUnusedMin = -1);
1364 void usesRegister(RegisterKind RegKind,
unsigned DwordRegIndex,
1365 unsigned RegWidth) {
1368 usesSgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1371 usesAgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1374 usesVgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1383 MCAsmParser &Parser;
1385 unsigned ForcedEncodingSize = 0;
1386 bool ForcedDPP =
false;
1387 bool ForcedSDWA =
false;
1388 KernelScopeInfo KernelScope;
1389 const unsigned HwMode;
1394#define GET_ASSEMBLER_HEADER
1395#include "AMDGPUGenAsmMatcher.inc"
1400 unsigned getRegOperandSize(
const MCInstrDesc &
Desc,
unsigned OpNo)
const {
1402 int16_t RCID = MII.getOpRegClassID(
Desc.operands()[OpNo], HwMode);
1406 std::optional<AMDGPU::InfoSectionData> InfoData;
1409 void createConstantSymbol(StringRef Id, int64_t Val);
1411 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1412 bool OutOfRangeError(SMRange
Range);
1428 bool calculateGPRBlocks(
const FeatureBitset &Features,
const MCExpr *VCCUsed,
1429 const MCExpr *FlatScrUsed,
bool XNACKUsed,
1430 std::optional<bool> EnableWavefrontSize32,
1431 const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1432 const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1433 const MCExpr *&VGPRBlocks,
const MCExpr *&SGPRBlocks);
1434 bool ParseDirectiveAMDGCNTarget();
1435 bool ParseDirectiveAMDHSACodeObjectVersion();
1436 bool ParseDirectiveAMDHSAKernel();
1437 bool ParseAMDKernelCodeTValue(StringRef
ID, AMDGPUMCKernelCodeT &Header);
1438 bool ParseDirectiveAMDKernelCodeT();
1440 bool subtargetHasRegister(
const MCRegisterInfo &MRI, MCRegister
Reg);
1441 bool ParseDirectiveAMDGPUHsaKernel();
1443 bool ParseDirectiveISAVersion();
1444 bool ParseDirectiveHSAMetadata();
1445 bool ParseDirectivePALMetadataBegin();
1446 bool ParseDirectivePALMetadata();
1447 bool ParseDirectiveAMDGPULDS();
1448 bool ParseDirectiveAMDGPUInfo();
1452 bool ParseToEndDirective(
const char *AssemblerDirectiveBegin,
1453 const char *AssemblerDirectiveEnd,
1454 std::string &CollectString);
1456 bool AddNextRegisterToList(MCRegister &
Reg,
unsigned &RegWidth,
1457 RegisterKind RegKind, MCRegister Reg1,
1458 RegisterKind RegKind1, SMLoc Loc);
1459 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &
Reg,
1460 unsigned &RegNum,
unsigned &RegWidth,
1461 bool RestoreOnFailure =
false);
1462 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &
Reg,
1463 unsigned &RegNum,
unsigned &RegWidth,
1464 SmallVectorImpl<AsmToken> &Tokens);
1465 MCRegister ParseRegularReg(RegisterKind &RegKind,
unsigned &RegNum,
1467 SmallVectorImpl<AsmToken> &Tokens);
1468 MCRegister ParseSpecialReg(RegisterKind &RegKind,
unsigned &RegNum,
1470 SmallVectorImpl<AsmToken> &Tokens);
1471 MCRegister ParseRegList(RegisterKind &RegKind,
unsigned &RegNum,
1473 SmallVectorImpl<AsmToken> &Tokens);
1474 bool ParseRegRange(
unsigned &Num,
unsigned &Width,
unsigned &SubReg);
1475 MCRegister getRegularReg(RegisterKind RegKind,
unsigned RegNum,
1476 unsigned SubReg,
unsigned RegWidth, SMLoc Loc);
1479 bool isRegister(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1480 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1481 void initializeGprCountSymbol(RegisterKind RegKind);
1482 bool updateGprCountSymbols(RegisterKind RegKind,
unsigned DwordRegIndex,
1484 void cvtMubufImpl(MCInst &Inst,
const OperandVector &Operands,
1489 OperandMode_Default,
1493 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1495 AMDGPUAsmParser(
const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1496 const MCInstrInfo &MII)
1497 : MCTargetAsmParser(STI, MII), Parser(_Parser),
1498 HwMode(STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo)) {
1501 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1505 createConstantSymbol(
".amdgcn.gfx_generation_number",
ISA.Major);
1506 createConstantSymbol(
".amdgcn.gfx_generation_minor",
ISA.Minor);
1507 createConstantSymbol(
".amdgcn.gfx_generation_stepping",
ISA.Stepping);
1509 createConstantSymbol(
".option.machine_version_major",
ISA.Major);
1510 createConstantSymbol(
".option.machine_version_minor",
ISA.Minor);
1511 createConstantSymbol(
".option.machine_version_stepping",
ISA.Stepping);
1514 initializeGprCountSymbol(IS_VGPR);
1515 initializeGprCountSymbol(IS_SGPR);
1520 createConstantSymbol(Symbol, Code);
1522 createConstantSymbol(
"UC_VERSION_W64_BIT", 0x2000);
1523 createConstantSymbol(
"UC_VERSION_W32_BIT", 0x4000);
1524 createConstantSymbol(
"UC_VERSION_MDP_BIT", 0x8000);
1602 bool isWave32()
const {
return getAvailableFeatures()[Feature_isWave32Bit]; }
1604 bool isWave64()
const {
return getAvailableFeatures()[Feature_isWave64Bit]; }
1606 bool hasInv2PiInlineImm()
const {
1607 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1610 bool has64BitLiterals()
const {
1611 return getFeatureBits()[AMDGPU::Feature64BitLiterals];
1614 bool hasFlatOffsets()
const {
1615 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1618 bool hasTrue16Insts()
const {
1619 return getFeatureBits()[AMDGPU::FeatureTrue16BitInsts];
1623 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1626 bool hasSGPR102_SGPR103()
const {
1630 bool hasSGPR104_SGPR105()
const {
return isGFX10Plus(); }
1632 bool hasIntClamp()
const {
1633 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1636 bool hasPartialNSAEncoding()
const {
1637 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1640 bool hasGloballyAddressableScratch()
const {
1641 return getFeatureBits()[AMDGPU::FeatureGloballyAddressableScratch];
1654 AMDGPUTargetStreamer &getTargetStreamer() {
1655 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1656 return static_cast<AMDGPUTargetStreamer &
>(TS);
1662 return const_cast<AMDGPUAsmParser *
>(
this)->MCTargetAsmParser::getContext();
1665 const MCRegisterInfo *getMRI()
const {
1669 const MCInstrInfo *getMII()
const {
1675 const FeatureBitset &getFeatureBits()
const {
1676 return getSTI().getFeatureBits();
1679 void setForcedEncodingSize(
unsigned Size) { ForcedEncodingSize =
Size; }
1680 void setForcedDPP(
bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1681 void setForcedSDWA(
bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1683 unsigned getForcedEncodingSize()
const {
return ForcedEncodingSize; }
1684 bool isForcedVOP3()
const {
return ForcedEncodingSize == 64; }
1685 bool isForcedDPP()
const {
return ForcedDPP; }
1686 bool isForcedSDWA()
const {
return ForcedSDWA; }
1687 ArrayRef<unsigned> getMatchedVariants()
const;
1688 StringRef getMatchedVariantName()
const;
1690 std::unique_ptr<AMDGPUOperand> parseRegister(
bool RestoreOnFailure =
false);
1691 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1692 bool RestoreOnFailure);
1693 bool parseRegister(MCRegister &
Reg, SMLoc &StartLoc, SMLoc &EndLoc)
override;
1694 ParseStatus tryParseRegister(MCRegister &
Reg, SMLoc &StartLoc,
1695 SMLoc &EndLoc)
override;
1696 unsigned checkTargetMatchPredicate(MCInst &Inst)
override;
1697 unsigned validateTargetOperandClass(MCParsedAsmOperand &
Op,
1698 unsigned Kind)
override;
1699 bool matchAndEmitInstruction(SMLoc IDLoc,
unsigned &Opcode,
1701 uint64_t &ErrorInfo,
1702 bool MatchingInlineAsm)
override;
1703 bool ParseDirective(AsmToken DirectiveID)
override;
1704 void onEndOfFile()
override;
1705 ParseStatus parseOperand(
OperandVector &Operands, StringRef Mnemonic,
1706 OperandMode
Mode = OperandMode_Default);
1707 StringRef parseMnemonicSuffix(StringRef Name);
1708 bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
1712 ParseStatus parseTokenOp(StringRef Name,
OperandVector &Operands);
1714 ParseStatus parseIntWithPrefix(
const char *Prefix, int64_t &
Int);
1717 parseIntWithPrefix(
const char *Prefix,
OperandVector &Operands,
1718 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1719 std::function<
bool(int64_t &)> ConvertResult =
nullptr);
1721 ParseStatus parseOperandArrayWithPrefix(
1723 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1724 bool (*ConvertResult)(int64_t &) =
nullptr);
1728 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1729 bool IgnoreNegative =
false);
1730 unsigned getCPolKind(StringRef Id, StringRef Mnemo,
bool &Disabling)
const;
1732 ParseStatus parseScope(
OperandVector &Operands, int64_t &Scope);
1734 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &
Value,
1736 ParseStatus parseStringOrIntWithPrefix(
OperandVector &Operands,
1738 ArrayRef<const char *> Ids,
1740 ParseStatus parseStringOrIntWithPrefix(
OperandVector &Operands,
1742 ArrayRef<const char *> Ids,
1743 AMDGPUOperand::ImmTy
Type);
1746 bool isOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1747 bool isRegOrOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1748 bool isNamedOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1749 bool isOpcodeModifierWithVal(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1750 bool parseSP3NegModifier();
1751 ParseStatus parseImm(
OperandVector &Operands,
bool HasSP3AbsModifier =
false,
1754 ParseStatus parseRegOrImm(
OperandVector &Operands,
bool HasSP3AbsMod =
false,
1756 ParseStatus parseRegOrImmWithFPInputMods(
OperandVector &Operands,
1757 bool AllowImm =
true);
1758 ParseStatus parseRegOrImmWithIntInputMods(
OperandVector &Operands,
1759 bool AllowImm =
true);
1760 ParseStatus parseRegWithFPInputMods(
OperandVector &Operands);
1761 ParseStatus parseRegWithIntInputMods(
OperandVector &Operands);
1764 AMDGPUOperand::ImmTy ImmTy);
1768 ParseStatus tryParseMatrixFMT(
OperandVector &Operands, StringRef Name,
1769 AMDGPUOperand::ImmTy
Type);
1772 ParseStatus tryParseMatrixScale(
OperandVector &Operands, StringRef Name,
1773 AMDGPUOperand::ImmTy
Type);
1776 ParseStatus tryParseMatrixScaleFmt(
OperandVector &Operands, StringRef Name,
1777 AMDGPUOperand::ImmTy
Type);
1781 ParseStatus parseDfmtNfmt(int64_t &
Format);
1782 ParseStatus parseUfmt(int64_t &
Format);
1783 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1785 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1788 ParseStatus parseSymbolicOrNumericFormat(int64_t &
Format);
1789 ParseStatus parseNumericFormat(int64_t &
Format);
1793 bool tryParseFmt(
const char *Pref, int64_t MaxVal, int64_t &Val);
1794 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1798 bool parseCnt(int64_t &IntVal);
1801 bool parseDepCtr(int64_t &IntVal,
unsigned &Mask);
1802 void depCtrError(SMLoc Loc,
int ErrorId, StringRef DepCtrName);
1805 bool parseDelay(int64_t &Delay);
1811 struct OperandInfoTy {
1814 bool IsSymbolic =
false;
1815 bool IsDefined =
false;
1817 constexpr OperandInfoTy(int64_t Val) : Val(Val) {}
1820 struct StructuredOpField : OperandInfoTy {
1824 bool IsDefined =
false;
1826 constexpr StructuredOpField(StringLiteral Id, StringLiteral Desc,
1827 unsigned Width, int64_t
Default)
1828 : OperandInfoTy(
Default), Id(Id), Desc(Desc), Width(Width) {}
1829 virtual ~StructuredOpField() =
default;
1831 bool Error(AMDGPUAsmParser &Parser,
const Twine &Err)
const {
1832 Parser.Error(Loc,
"invalid " + Desc +
": " + Err);
1836 virtual bool validate(AMDGPUAsmParser &Parser)
const {
1838 return Error(Parser,
"not supported on this GPU");
1840 return Error(Parser,
"only " + Twine(Width) +
"-bit values are legal");
1848 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &
Op, OperandInfoTy &Stream);
1849 bool validateSendMsg(
const OperandInfoTy &Msg,
1850 const OperandInfoTy &
Op,
1851 const OperandInfoTy &Stream);
1853 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &
Offset,
1854 OperandInfoTy &Width);
1856 const AMDGPUOperand &findMCOperand(
const OperandVector &Operands,
1859 static SMLoc getLaterLoc(SMLoc a, SMLoc b);
1861 SMLoc getFlatOffsetLoc(
const OperandVector &Operands)
const;
1862 SMLoc getSMEMOffsetLoc(
const OperandVector &Operands)
const;
1865 SMLoc getOperandLoc(
const OperandVector &Operands,
int MCOpIdx)
const;
1866 SMLoc getOperandLoc(std::function<
bool(
const AMDGPUOperand&)>
Test,
1868 SMLoc getImmLoc(AMDGPUOperand::ImmTy
Type,
1872 bool validateInstruction(
const MCInst &Inst, SMLoc IDLoc,
1874 bool validateOffset(
const MCInst &Inst,
const OperandVector &Operands);
1875 bool validateFlatOffset(
const MCInst &Inst,
const OperandVector &Operands);
1876 bool validateSMEMOffset(
const MCInst &Inst,
const OperandVector &Operands);
1877 bool validateSOPLiteral(
const MCInst &Inst,
const OperandVector &Operands);
1878 bool validateConstantBusLimitations(
const MCInst &Inst,
const OperandVector &Operands);
1879 std::optional<unsigned> checkVOPDRegBankConstraints(
const MCInst &Inst,
1881 bool validateVOPD(
const MCInst &Inst,
const OperandVector &Operands);
1882 bool tryVOPD(
const MCInst &Inst);
1883 bool tryVOPD3(
const MCInst &Inst);
1884 bool tryAnotherVOPDEncoding(
const MCInst &Inst);
1886 bool validateIntClampSupported(
const MCInst &Inst);
1887 bool validateMIMGAtomicDMask(
const MCInst &Inst);
1888 bool validateMIMGGatherDMask(
const MCInst &Inst);
1889 bool validateMovrels(
const MCInst &Inst,
const OperandVector &Operands);
1890 bool validateMIMGDataSize(
const MCInst &Inst, SMLoc IDLoc);
1891 bool validateMIMGAddrSize(
const MCInst &Inst, SMLoc IDLoc);
1892 bool validateMIMGD16(
const MCInst &Inst);
1893 bool validateMIMGDim(
const MCInst &Inst,
const OperandVector &Operands);
1894 bool validateTensorR128(
const MCInst &Inst);
1895 bool validateMIMGMSAA(
const MCInst &Inst);
1896 bool validateOpSel(
const MCInst &Inst);
1897 bool validateTrue16OpSel(
const MCInst &Inst);
1898 bool validateNeg(
const MCInst &Inst, AMDGPU::OpName OpName);
1899 bool validateDPP(
const MCInst &Inst,
const OperandVector &Operands);
1900 bool validateVccOperand(MCRegister
Reg)
const;
1901 bool validateVOPLiteral(
const MCInst &Inst,
const OperandVector &Operands);
1902 bool validateMAIAccWrite(
const MCInst &Inst,
const OperandVector &Operands);
1903 bool validateMAISrc2(
const MCInst &Inst,
const OperandVector &Operands);
1904 bool validateMFMA(
const MCInst &Inst,
const OperandVector &Operands);
1905 bool validateAGPRLdSt(
const MCInst &Inst)
const;
1906 bool validateVGPRAlign(
const MCInst &Inst)
const;
1907 bool validateBLGP(
const MCInst &Inst,
const OperandVector &Operands);
1908 bool validateDS(
const MCInst &Inst,
const OperandVector &Operands);
1909 bool validateGWS(
const MCInst &Inst,
const OperandVector &Operands);
1910 bool validateDivScale(
const MCInst &Inst);
1911 bool validateWaitCnt(
const MCInst &Inst,
const OperandVector &Operands);
1912 bool validateCoherencyBits(
const MCInst &Inst,
const OperandVector &Operands,
1914 bool validateTHAndScopeBits(
const MCInst &Inst,
const OperandVector &Operands,
1915 const unsigned CPol);
1916 bool validateTFE(
const MCInst &Inst,
const OperandVector &Operands);
1917 bool validateLdsDirect(
const MCInst &Inst,
const OperandVector &Operands);
1918 bool validateWMMA(
const MCInst &Inst,
const OperandVector &Operands);
1919 unsigned getConstantBusLimit(
unsigned Opcode)
const;
1920 bool usesConstantBus(
const MCInst &Inst,
unsigned OpIdx);
1921 bool isInlineConstant(
const MCInst &Inst,
unsigned OpIdx)
const;
1922 MCRegister findImplicitSGPRReadInVOP(
const MCInst &Inst)
const;
1924 bool isSupportedMnemo(StringRef Mnemo,
1925 const FeatureBitset &FBS);
1926 bool isSupportedMnemo(StringRef Mnemo,
1927 const FeatureBitset &FBS,
1928 ArrayRef<unsigned> Variants);
1929 bool checkUnsupportedInstruction(StringRef Name, SMLoc IDLoc);
1931 bool isId(
const StringRef Id)
const;
1932 bool isId(
const AsmToken &Token,
const StringRef Id)
const;
1934 StringRef getId()
const;
1935 bool trySkipId(
const StringRef Id);
1936 bool trySkipId(
const StringRef Pref,
const StringRef Id);
1940 bool parseString(StringRef &Val,
const StringRef ErrMsg =
"expected a string");
1941 bool parseId(StringRef &Val,
const StringRef ErrMsg =
"");
1947 StringRef getTokenStr()
const;
1948 AsmToken peekToken(
bool ShouldSkipSpace =
true);
1950 SMLoc getLoc()
const;
1954 void onBeginOfFile()
override;
1955 bool parsePrimaryExpr(
const MCExpr *&Res, SMLoc &EndLoc)
override;
1957 ParseStatus parseCustomOperand(
OperandVector &Operands,
unsigned MCK);
1967 bool parseSwizzleOperand(int64_t &
Op,
const unsigned MinVal,
1968 const unsigned MaxVal,
const Twine &ErrMsg,
1970 bool parseSwizzleOperands(
const unsigned OpNum, int64_t*
Op,
1971 const unsigned MinVal,
1972 const unsigned MaxVal,
1973 const StringRef ErrMsg);
1975 bool parseSwizzleOffset(int64_t &
Imm);
1976 bool parseSwizzleMacro(int64_t &
Imm);
1977 bool parseSwizzleQuadPerm(int64_t &
Imm);
1978 bool parseSwizzleBitmaskPerm(int64_t &
Imm);
1979 bool parseSwizzleBroadcast(int64_t &
Imm);
1980 bool parseSwizzleSwap(int64_t &
Imm);
1981 bool parseSwizzleReverse(int64_t &
Imm);
1982 bool parseSwizzleFFT(int64_t &
Imm);
1983 bool parseSwizzleRotate(int64_t &
Imm);
1986 int64_t parseGPRIdxMacro();
1988 void cvtMubuf(MCInst &Inst,
const OperandVector &Operands) { cvtMubufImpl(Inst, Operands,
false); }
1989 void cvtMubufAtomic(MCInst &Inst,
const OperandVector &Operands) { cvtMubufImpl(Inst, Operands,
true); }
1994 OptionalImmIndexMap &OptionalIdx);
1995 void cvtScaledMFMA(MCInst &Inst,
const OperandVector &Operands);
1996 void cvtVOP3OpSel(MCInst &Inst,
const OperandVector &Operands);
1999 void cvtSWMMAC(MCInst &Inst,
const OperandVector &Operands);
2002 void cvtVOP3OpSel(MCInst &Inst,
const OperandVector &Operands,
2003 OptionalImmIndexMap &OptionalIdx);
2005 OptionalImmIndexMap &OptionalIdx);
2007 void cvtVOP3Interp(MCInst &Inst,
const OperandVector &Operands);
2008 void cvtVINTERP(MCInst &Inst,
const OperandVector &Operands);
2009 void cvtOpSelHelper(MCInst &Inst,
unsigned OpSel);
2011 bool parseDimId(
unsigned &Encoding);
2013 bool convertDppBoundCtrl(int64_t &BoundCtrl);
2016 bool isSupportedDPPCtrl(StringRef Ctrl,
const OperandVector &Operands);
2017 int64_t parseDPPCtrlSel(StringRef Ctrl);
2018 int64_t parseDPPCtrlPerm();
2019 void cvtDPP(MCInst &Inst,
const OperandVector &Operands,
bool IsDPP8 =
false);
2021 cvtDPP(Inst, Operands,
true);
2023 void cvtVOP3DPP(MCInst &Inst,
const OperandVector &Operands,
2024 bool IsDPP8 =
false);
2025 void cvtVOP3DPP8(MCInst &Inst,
const OperandVector &Operands) {
2026 cvtVOP3DPP(Inst, Operands,
true);
2029 ParseStatus parseSDWASel(
OperandVector &Operands, StringRef Prefix,
2030 AMDGPUOperand::ImmTy
Type);
2032 void cvtSdwaVOP1(MCInst &Inst,
const OperandVector &Operands);
2033 void cvtSdwaVOP2(MCInst &Inst,
const OperandVector &Operands);
2034 void cvtSdwaVOP2b(MCInst &Inst,
const OperandVector &Operands);
2035 void cvtSdwaVOP2e(MCInst &Inst,
const OperandVector &Operands);
2036 void cvtSdwaVOPC(MCInst &Inst,
const OperandVector &Operands);
2038 enum class SDWAInstType :
unsigned {
VOP1 = 0,
VOP2 = 1,
VOPC = 2 };
2041 SDWAInstType BasicInstType,
bool SkipDstVcc =
false,
2042 bool SkipSrcVcc =
false);
2153bool AMDGPUOperand::isInlinableImm(
MVT type)
const {
2163 if (!isImmTy(ImmTyNone)) {
2168 if (getModifiers().
Lit != LitModifier::None)
2178 if (type == MVT::f64 || type == MVT::i64) {
2180 AsmParser->hasInv2PiInlineImm());
2183 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64,
Imm.Val));
2202 APFloat::rmNearestTiesToEven, &Lost);
2209 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2211 AsmParser->hasInv2PiInlineImm());
2216 static_cast<int32_t
>(FPLiteral.bitcastToAPInt().getZExtValue()),
2217 AsmParser->hasInv2PiInlineImm());
2221 if (type == MVT::f64 || type == MVT::i64) {
2223 AsmParser->hasInv2PiInlineImm());
2232 static_cast<int16_t
>(
Literal.getLoBits(16).getSExtValue()),
2233 type, AsmParser->hasInv2PiInlineImm());
2237 static_cast<int32_t
>(
Literal.getLoBits(32).getZExtValue()),
2238 AsmParser->hasInv2PiInlineImm());
2241bool AMDGPUOperand::isLiteralImm(MVT type)
const {
2243 if (!isImmTy(ImmTyNone)) {
2248 (type == MVT::i64 || type == MVT::f64) && AsmParser->has64BitLiterals();
2253 if (type == MVT::f64 && hasFPModifiers()) {
2273 if (type == MVT::f64) {
2278 if (type == MVT::i64) {
2291 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2292 : (type == MVT::v2i16) ? MVT::f32
2293 : (type == MVT::v2f32) ? MVT::f32
2296 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64,
Imm.Val));
2300bool AMDGPUOperand::isRegClass(
unsigned RCID)
const {
2301 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(
getReg());
2304bool AMDGPUOperand::isVRegWithInputMods()
const {
2305 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2307 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2308 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2311template <
bool IsFake16>
2312bool AMDGPUOperand::isT16_Lo128VRegWithInputMods()
const {
2313 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2314 : AMDGPU::VGPR_16_Lo128RegClassID);
2317template <
bool IsFake16>
bool AMDGPUOperand::isT16VRegWithInputMods()
const {
2318 return isRegClass(IsFake16 ? AMDGPU::VGPR_32RegClassID
2319 : AMDGPU::VGPR_16RegClassID);
2322bool AMDGPUOperand::isSDWAOperand(MVT type)
const {
2323 if (AsmParser->isVI())
2325 if (AsmParser->isGFX9Plus())
2326 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2330bool AMDGPUOperand::isSDWAFP16Operand()
const {
2331 return isSDWAOperand(MVT::f16);
2334bool AMDGPUOperand::isSDWAFP32Operand()
const {
2335 return isSDWAOperand(MVT::f32);
2338bool AMDGPUOperand::isSDWAInt16Operand()
const {
2339 return isSDWAOperand(MVT::i16);
2342bool AMDGPUOperand::isSDWAInt32Operand()
const {
2343 return isSDWAOperand(MVT::i32);
2346bool AMDGPUOperand::isBoolReg()
const {
2347 return isReg() && ((AsmParser->isWave64() && isSCSrc_b64()) ||
2348 (AsmParser->isWave32() && isSCSrc_b32()));
2351uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val,
unsigned Size)
const
2353 assert(isImmTy(ImmTyNone) &&
Imm.Mods.hasFPModifiers());
2356 const uint64_t FpSignMask = (1ULL << (
Size * 8 - 1));
2368void AMDGPUOperand::addImmOperands(MCInst &Inst,
unsigned N,
bool ApplyModifiers)
const {
2378 addLiteralImmOperand(Inst,
Imm.Val,
2380 isImmTy(ImmTyNone) &&
Imm.Mods.hasFPModifiers());
2382 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2387void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const {
2388 const auto& InstDesc = AsmParser->getMII()->get(Inst.
getOpcode());
2393 if (ApplyModifiers) {
2396 Val = applyInputFPModifiers(Val,
Size);
2400 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2402 bool CanUse64BitLiterals =
2403 AsmParser->has64BitLiterals() &&
2406 MCContext &Ctx = AsmParser->getContext();
2417 if (
Lit == LitModifier::None &&
2419 AsmParser->hasInv2PiInlineImm())) {
2427 bool HasMandatoryLiteral =
2430 if (
Literal.getLoBits(32) != 0 &&
2431 (InstDesc.getSize() != 4 || !AsmParser->has64BitLiterals()) &&
2432 !HasMandatoryLiteral) {
2433 const_cast<AMDGPUAsmParser *
>(AsmParser)->
Warning(
2435 "Can't encode literal as exact 64-bit floating-point operand. "
2436 "Low 32-bits will be set to zero");
2437 Val &= 0xffffffff00000000u;
2443 if (CanUse64BitLiterals &&
Lit == LitModifier::None &&
2449 Lit = LitModifier::Lit64;
2450 }
else if (
Lit == LitModifier::Lit) {
2464 if (CanUse64BitLiterals &&
Lit == LitModifier::None &&
2466 Lit = LitModifier::Lit64;
2473 if (
Lit == LitModifier::None && AsmParser->hasInv2PiInlineImm() &&
2474 Literal == 0x3fc45f306725feed) {
2509 APFloat::rmNearestTiesToEven, &lost);
2513 Val = FPLiteral.bitcastToAPInt().getZExtValue();
2520 if (
Lit != LitModifier::None) {
2551 if (
Lit == LitModifier::None &&
2561 if (!AsmParser->has64BitLiterals() ||
Lit == LitModifier::Lit)
2569 if (
Lit == LitModifier::None &&
2577 if (!AsmParser->has64BitLiterals()) {
2578 Val =
static_cast<uint64_t
>(Val) << 32;
2585 if (
Lit == LitModifier::Lit ||
2587 Val =
static_cast<uint64_t
>(Val) << 32;
2591 if (
Lit == LitModifier::Lit)
2617 if (
Lit != LitModifier::None) {
2625void AMDGPUOperand::addRegOperands(MCInst &Inst,
unsigned N)
const {
2630bool AMDGPUOperand::isInlineValue()
const {
2638void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2649 if (Is == IS_VGPR) {
2653 return AMDGPU::VGPR_32RegClassID;
2655 return AMDGPU::VReg_64RegClassID;
2657 return AMDGPU::VReg_96RegClassID;
2659 return AMDGPU::VReg_128RegClassID;
2661 return AMDGPU::VReg_160RegClassID;
2663 return AMDGPU::VReg_192RegClassID;
2665 return AMDGPU::VReg_224RegClassID;
2667 return AMDGPU::VReg_256RegClassID;
2669 return AMDGPU::VReg_288RegClassID;
2671 return AMDGPU::VReg_320RegClassID;
2673 return AMDGPU::VReg_352RegClassID;
2675 return AMDGPU::VReg_384RegClassID;
2677 return AMDGPU::VReg_512RegClassID;
2679 return AMDGPU::VReg_1024RegClassID;
2681 }
else if (Is == IS_TTMP) {
2685 return AMDGPU::TTMP_32RegClassID;
2687 return AMDGPU::TTMP_64RegClassID;
2689 return AMDGPU::TTMP_128RegClassID;
2691 return AMDGPU::TTMP_256RegClassID;
2693 return AMDGPU::TTMP_512RegClassID;
2695 }
else if (Is == IS_SGPR) {
2699 return AMDGPU::SGPR_32RegClassID;
2701 return AMDGPU::SGPR_64RegClassID;
2703 return AMDGPU::SGPR_96RegClassID;
2705 return AMDGPU::SGPR_128RegClassID;
2707 return AMDGPU::SGPR_160RegClassID;
2709 return AMDGPU::SGPR_192RegClassID;
2711 return AMDGPU::SGPR_224RegClassID;
2713 return AMDGPU::SGPR_256RegClassID;
2715 return AMDGPU::SGPR_288RegClassID;
2717 return AMDGPU::SGPR_320RegClassID;
2719 return AMDGPU::SGPR_352RegClassID;
2721 return AMDGPU::SGPR_384RegClassID;
2723 return AMDGPU::SGPR_512RegClassID;
2725 }
else if (Is == IS_AGPR) {
2729 return AMDGPU::AGPR_32RegClassID;
2731 return AMDGPU::AReg_64RegClassID;
2733 return AMDGPU::AReg_96RegClassID;
2735 return AMDGPU::AReg_128RegClassID;
2737 return AMDGPU::AReg_160RegClassID;
2739 return AMDGPU::AReg_192RegClassID;
2741 return AMDGPU::AReg_224RegClassID;
2743 return AMDGPU::AReg_256RegClassID;
2745 return AMDGPU::AReg_288RegClassID;
2747 return AMDGPU::AReg_320RegClassID;
2749 return AMDGPU::AReg_352RegClassID;
2751 return AMDGPU::AReg_384RegClassID;
2753 return AMDGPU::AReg_512RegClassID;
2755 return AMDGPU::AReg_1024RegClassID;
2763 .
Case(
"exec", AMDGPU::EXEC)
2764 .
Case(
"vcc", AMDGPU::VCC)
2765 .
Case(
"flat_scratch", AMDGPU::FLAT_SCR)
2766 .
Case(
"xnack_mask", AMDGPU::XNACK_MASK)
2767 .
Case(
"shared_base", AMDGPU::SRC_SHARED_BASE)
2768 .
Case(
"src_shared_base", AMDGPU::SRC_SHARED_BASE)
2769 .
Case(
"shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2770 .
Case(
"src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2771 .
Case(
"private_base", AMDGPU::SRC_PRIVATE_BASE)
2772 .
Case(
"src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2773 .
Case(
"private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2774 .
Case(
"src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2775 .
Case(
"src_flat_scratch_base_lo", AMDGPU::SRC_FLAT_SCRATCH_BASE_LO)
2776 .
Case(
"src_flat_scratch_base_hi", AMDGPU::SRC_FLAT_SCRATCH_BASE_HI)
2777 .
Case(
"pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2778 .
Case(
"src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2779 .
Case(
"lds_direct", AMDGPU::LDS_DIRECT)
2780 .
Case(
"src_lds_direct", AMDGPU::LDS_DIRECT)
2781 .
Case(
"m0", AMDGPU::M0)
2782 .
Case(
"vccz", AMDGPU::SRC_VCCZ)
2783 .
Case(
"src_vccz", AMDGPU::SRC_VCCZ)
2784 .
Case(
"execz", AMDGPU::SRC_EXECZ)
2785 .
Case(
"src_execz", AMDGPU::SRC_EXECZ)
2786 .
Case(
"scc", AMDGPU::SRC_SCC)
2787 .
Case(
"src_scc", AMDGPU::SRC_SCC)
2788 .
Case(
"tba", AMDGPU::TBA)
2789 .
Case(
"tma", AMDGPU::TMA)
2790 .
Case(
"flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2791 .
Case(
"flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2792 .
Case(
"xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2793 .
Case(
"xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2794 .
Case(
"vcc_lo", AMDGPU::VCC_LO)
2795 .
Case(
"vcc_hi", AMDGPU::VCC_HI)
2796 .
Case(
"exec_lo", AMDGPU::EXEC_LO)
2797 .
Case(
"exec_hi", AMDGPU::EXEC_HI)
2798 .
Case(
"tma_lo", AMDGPU::TMA_LO)
2799 .
Case(
"tma_hi", AMDGPU::TMA_HI)
2800 .
Case(
"tba_lo", AMDGPU::TBA_LO)
2801 .
Case(
"tba_hi", AMDGPU::TBA_HI)
2802 .
Case(
"pc", AMDGPU::PC_REG)
2803 .
Case(
"null", AMDGPU::SGPR_NULL)
2807bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2808 SMLoc &EndLoc,
bool RestoreOnFailure) {
2809 auto R = parseRegister();
2810 if (!R)
return true;
2812 RegNo =
R->getReg();
2813 StartLoc =
R->getStartLoc();
2814 EndLoc =
R->getEndLoc();
2818bool AMDGPUAsmParser::parseRegister(MCRegister &
Reg, SMLoc &StartLoc,
2820 return ParseRegister(
Reg, StartLoc, EndLoc,
false);
2823ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &
Reg, SMLoc &StartLoc,
2825 bool Result = ParseRegister(
Reg, StartLoc, EndLoc,
true);
2826 bool PendingErrors = getParser().hasPendingError();
2827 getParser().clearPendingErrors();
2835bool AMDGPUAsmParser::AddNextRegisterToList(MCRegister &
Reg,
unsigned &RegWidth,
2836 RegisterKind RegKind,
2838 RegisterKind RegKind1, SMLoc Loc) {
2840 if (RegKind == IS_SGPR) {
2841 unsigned RegIdx = (
Reg - AMDGPU::SGPR0) + RegWidth / 32;
2842 if ((RegIdx == 106 && Reg1 == AMDGPU::VCC_LO) ||
2843 (RegIdx == 107 && Reg1 == AMDGPU::VCC_HI)) {
2849 if (RegKind != RegKind1) {
2850 Error(Loc,
"registers in a list must be of the same kind");
2851 return MCRegister();
2856 if (
Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2861 if (
Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2862 Reg = AMDGPU::FLAT_SCR;
2866 if (
Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2867 Reg = AMDGPU::XNACK_MASK;
2871 if (
Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2876 if (
Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2881 if (
Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2886 Error(Loc,
"register does not fit in the list");
2892 if (Reg1 !=
Reg + RegWidth / 32) {
2893 Error(Loc,
"registers in a list must have consecutive indices");
2911 {{
"ttmp"}, IS_TTMP},
2917 return Kind == IS_VGPR ||
2925 if (Str.starts_with(
Reg.Name))
2931 return !Str.getAsInteger(10, Num);
2935AMDGPUAsmParser::isRegister(
const AsmToken &Token,
2936 const AsmToken &NextToken)
const {
2951 StringRef RegSuffix = Str.substr(
RegName.size());
2952 if (!RegSuffix.
empty()) {
2970AMDGPUAsmParser::isRegister()
2972 return isRegister(
getToken(), peekToken());
2975MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
unsigned RegNum,
2976 unsigned SubReg,
unsigned RegWidth,
2980 unsigned AlignSize = 1;
2981 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2987 if (RegNum % AlignSize != 0) {
2988 Error(Loc,
"invalid register alignment");
2989 return MCRegister();
2992 unsigned RegIdx = RegNum / AlignSize;
2995 Error(Loc,
"invalid or unsupported register size");
2996 return MCRegister();
3000 const MCRegisterClass RC =
TRI->getRegClass(RCID);
3001 if (RegIdx >= RC.
getNumRegs() || (RegKind == IS_VGPR && RegIdx > 255)) {
3002 Error(Loc,
"register index is out of range");
3003 return AMDGPU::NoRegister;
3006 if (RegKind == IS_VGPR && !
isGFX1250Plus() && RegIdx + RegWidth / 32 > 256) {
3007 Error(Loc,
"register index is out of range");
3008 return MCRegister();
3024bool AMDGPUAsmParser::ParseRegRange(
unsigned &Num,
unsigned &RegWidth,
3026 int64_t RegLo, RegHi;
3030 SMLoc FirstIdxLoc = getLoc();
3037 SecondIdxLoc = getLoc();
3048 Error(FirstIdxLoc,
"invalid register index");
3053 Error(SecondIdxLoc,
"invalid register index");
3057 if (RegLo > RegHi) {
3058 Error(FirstIdxLoc,
"first register index should not exceed second index");
3062 if (RegHi == RegLo) {
3063 StringRef RegSuffix = getTokenStr();
3064 if (RegSuffix ==
".l") {
3065 SubReg = AMDGPU::lo16;
3067 }
else if (RegSuffix ==
".h") {
3068 SubReg = AMDGPU::hi16;
3073 Num =
static_cast<unsigned>(RegLo);
3074 RegWidth = 32 * ((RegHi - RegLo) + 1);
3079MCRegister AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
3082 SmallVectorImpl<AsmToken> &Tokens) {
3088 RegKind = IS_SPECIAL;
3095MCRegister AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
3098 SmallVectorImpl<AsmToken> &Tokens) {
3100 StringRef
RegName = getTokenStr();
3101 auto Loc = getLoc();
3105 Error(Loc,
"invalid register name");
3106 return MCRegister();
3114 unsigned SubReg = NoSubRegister;
3115 bool IsRange =
false;
3116 if (!RegSuffix.
empty()) {
3118 SubReg = AMDGPU::lo16;
3120 SubReg = AMDGPU::hi16;
3124 Error(Loc,
"invalid register index");
3125 return MCRegister();
3131 if (!ParseRegRange(RegNum, RegWidth, SubReg))
3132 return MCRegister();
3136 MCRegister
Reg = getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
3137 const MCRegisterInfo &
TRI = *
getContext().getRegisterInfo();
3138 if (RegKind == IS_SGPR && IsRange
3139 ? (
TRI.isSubRegister(
Reg, VCC_LO) ||
TRI.isSubRegister(
Reg, VCC_HI))
3140 : (
Reg == VCC_LO ||
Reg == VCC_HI)) {
3141 Error(Loc,
"register index is out of range");
3142 return MCRegister();
3148MCRegister AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
3149 unsigned &RegNum,
unsigned &RegWidth,
3150 SmallVectorImpl<AsmToken> &Tokens) {
3152 auto ListLoc = getLoc();
3155 "expected a register or a list of registers")) {
3156 return MCRegister();
3161 auto Loc = getLoc();
3162 if (!ParseAMDGPURegister(RegKind,
Reg, RegNum, RegWidth))
3163 return MCRegister();
3164 if (RegWidth != 32) {
3165 Error(Loc,
"expected a single 32-bit register");
3166 return MCRegister();
3170 RegisterKind NextRegKind;
3172 unsigned NextRegNum, NextRegWidth;
3175 if (!ParseAMDGPURegister(NextRegKind, NextReg,
3176 NextRegNum, NextRegWidth,
3178 return MCRegister();
3180 if (NextRegWidth != 32) {
3181 Error(Loc,
"expected a single 32-bit register");
3182 return MCRegister();
3184 if (!AddNextRegisterToList(
Reg, RegWidth, RegKind, NextReg, NextRegKind,
3186 return MCRegister();
3190 "expected a comma or a closing square bracket")) {
3191 return MCRegister();
3195 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3200bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3201 MCRegister &
Reg,
unsigned &RegNum,
3203 SmallVectorImpl<AsmToken> &Tokens) {
3204 auto Loc = getLoc();
3208 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3210 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3212 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3217 assert(Parser.hasPendingError());
3221 if (!subtargetHasRegister(*
TRI,
Reg)) {
3222 if (
Reg == AMDGPU::SGPR_NULL) {
3223 Error(Loc,
"'null' operand is not supported on this GPU");
3226 " register not available on this GPU");
3234bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3235 MCRegister &
Reg,
unsigned &RegNum,
3237 bool RestoreOnFailure ) {
3241 if (ParseAMDGPURegister(RegKind,
Reg, RegNum, RegWidth, Tokens)) {
3242 if (RestoreOnFailure) {
3243 while (!Tokens.
empty()) {
3252std::optional<StringRef>
3253AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3256 return StringRef(
".amdgcn.next_free_vgpr");
3258 return StringRef(
".amdgcn.next_free_sgpr");
3260 return std::nullopt;
3264void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3265 auto SymbolName = getGprCountSymbolName(RegKind);
3266 assert(SymbolName &&
"initializing invalid register kind");
3272bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3273 unsigned DwordRegIndex,
3274 unsigned RegWidth) {
3279 auto SymbolName = getGprCountSymbolName(RegKind);
3284 int64_t NewMax = DwordRegIndex +
divideCeil(RegWidth, 32) - 1;
3288 return !
Error(getLoc(),
3289 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3293 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3295 if (OldCount <= NewMax)
3301std::unique_ptr<AMDGPUOperand>
3302AMDGPUAsmParser::parseRegister(
bool RestoreOnFailure) {
3304 SMLoc StartLoc = Tok.getLoc();
3305 SMLoc EndLoc = Tok.getEndLoc();
3306 RegisterKind RegKind;
3308 unsigned RegNum, RegWidth;
3310 if (!ParseAMDGPURegister(RegKind,
Reg, RegNum, RegWidth)) {
3314 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3317 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3318 return AMDGPUOperand::CreateReg(
this,
Reg, StartLoc, EndLoc);
3321ParseStatus AMDGPUAsmParser::parseImm(
OperandVector &Operands,
3325 if (isRegister() || isModifier())
3328 if (
Lit == LitModifier::None) {
3329 if (trySkipId(
"lit"))
3330 Lit = LitModifier::Lit;
3331 else if (trySkipId(
"lit64"))
3332 Lit = LitModifier::Lit64;
3334 if (
Lit != LitModifier::None) {
3337 ParseStatus S = parseImm(Operands, HasSP3AbsModifier,
Lit);
3346 const auto& NextTok = peekToken();
3349 bool Negate =
false;
3357 AMDGPUOperand::Modifiers Mods;
3365 StringRef Num = getTokenStr();
3368 APFloat RealVal(APFloat::IEEEdouble());
3369 auto roundMode = APFloat::rmNearestTiesToEven;
3370 if (
errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3373 RealVal.changeSign();
3376 AMDGPUOperand::CreateImm(
this, RealVal.bitcastToAPInt().getZExtValue(), S,
3377 AMDGPUOperand::ImmTyNone,
true));
3378 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*Operands.
back());
3379 Op.setModifiers(Mods);
3388 if (HasSP3AbsModifier) {
3397 if (getParser().parsePrimaryExpr(Expr, EndLoc,
nullptr))
3400 if (Parser.parseExpression(Expr))
3404 if (Expr->evaluateAsAbsolute(IntVal)) {
3406 return Error(S,
"literal value out of range");
3407 Operands.
push_back(AMDGPUOperand::CreateImm(
this, IntVal, S));
3408 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*Operands.
back());
3409 Op.setModifiers(Mods);
3411 if (
Lit != LitModifier::None)
3413 Operands.
push_back(AMDGPUOperand::CreateExpr(
this, Expr, S));
3422ParseStatus AMDGPUAsmParser::parseReg(
OperandVector &Operands) {
3426 if (
auto R = parseRegister()) {
3434ParseStatus AMDGPUAsmParser::parseRegOrImm(
OperandVector &Operands,
3436 ParseStatus Res = parseReg(Operands);
3441 return parseImm(Operands, HasSP3AbsMod,
Lit);
3445AMDGPUAsmParser::isNamedOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3448 return str ==
"abs" || str ==
"neg" || str ==
"sext";
3454AMDGPUAsmParser::isOpcodeModifierWithVal(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3459AMDGPUAsmParser::isOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3460 return isNamedOperandModifier(Token, NextToken) || Token.
is(
AsmToken::Pipe);
3464AMDGPUAsmParser::isRegOrOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3465 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3482AMDGPUAsmParser::isModifier() {
3485 AsmToken NextToken[2];
3486 peekTokens(NextToken);
3488 return isOperandModifier(Tok, NextToken[0]) ||
3489 (Tok.
is(
AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3490 isOpcodeModifierWithVal(Tok, NextToken[0]);
3516AMDGPUAsmParser::parseSP3NegModifier() {
3518 AsmToken NextToken[2];
3519 peekTokens(NextToken);
3522 (isRegister(NextToken[0], NextToken[1]) ||
3524 isId(NextToken[0],
"abs"))) {
3533AMDGPUAsmParser::parseRegOrImmWithFPInputMods(
OperandVector &Operands,
3541 return Error(getLoc(),
"invalid syntax, expected 'neg' modifier");
3543 SP3Neg = parseSP3NegModifier();
3546 Neg = trySkipId(
"neg");
3548 return Error(Loc,
"expected register or immediate");
3552 Abs = trySkipId(
"abs");
3557 if (trySkipId(
"lit")) {
3558 Lit = LitModifier::Lit;
3561 }
else if (trySkipId(
"lit64")) {
3562 Lit = LitModifier::Lit64;
3565 if (!has64BitLiterals())
3566 return Error(Loc,
"lit64 is not supported on this GPU");
3572 return Error(Loc,
"expected register or immediate");
3576 Res = parseRegOrImm(Operands, SP3Abs,
Lit);
3578 Res = parseReg(Operands);
3581 return (SP3Neg || Neg || SP3Abs || Abs ||
Lit != LitModifier::None)
3585 if (
Lit != LitModifier::None && !Operands.
back()->isImm())
3586 Error(Loc,
"expected immediate with lit modifier");
3588 if (SP3Abs && !skipToken(
AsmToken::Pipe,
"expected vertical bar"))
3594 if (
Lit != LitModifier::None &&
3598 AMDGPUOperand::Modifiers Mods;
3599 Mods.Abs = Abs || SP3Abs;
3600 Mods.Neg = Neg || SP3Neg;
3603 if (Mods.hasFPModifiers() ||
Lit != LitModifier::None) {
3604 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*Operands.
back());
3606 return Error(
Op.getStartLoc(),
"expected an absolute expression");
3607 Op.setModifiers(Mods);
3613AMDGPUAsmParser::parseRegOrImmWithIntInputMods(
OperandVector &Operands,
3615 bool Sext = trySkipId(
"sext");
3616 if (Sext && !skipToken(
AsmToken::LParen,
"expected left paren after sext"))
3621 Res = parseRegOrImm(Operands);
3623 Res = parseReg(Operands);
3631 AMDGPUOperand::Modifiers Mods;
3634 if (Mods.hasIntModifiers()) {
3635 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*Operands.
back());
3637 return Error(
Op.getStartLoc(),
"expected an absolute expression");
3638 Op.setModifiers(Mods);
3644ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(
OperandVector &Operands) {
3645 return parseRegOrImmWithFPInputMods(Operands,
false);
3648ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(
OperandVector &Operands) {
3649 return parseRegOrImmWithIntInputMods(Operands,
false);
3652ParseStatus AMDGPUAsmParser::parseVReg32OrOff(
OperandVector &Operands) {
3653 auto Loc = getLoc();
3654 if (trySkipId(
"off")) {
3655 Operands.
push_back(AMDGPUOperand::CreateImm(
this, 0, Loc,
3656 AMDGPUOperand::ImmTyOff,
false));
3663 std::unique_ptr<AMDGPUOperand>
Reg = parseRegister();
3672unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3679 return Match_InvalidOperand;
3681 if (Inst.
getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3682 Inst.
getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3685 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::dst_sel);
3687 if (!
Op.isImm() ||
Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3688 return Match_InvalidOperand;
3696 if (tryAnotherVOPDEncoding(Inst))
3697 return Match_InvalidOperand;
3699 return Match_Success;
3703 static const unsigned Variants[] = {
3713ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants()
const {
3714 if (isForcedDPP() && isForcedVOP3()) {
3718 if (getForcedEncodingSize() == 32) {
3723 if (isForcedVOP3()) {
3728 if (isForcedSDWA()) {
3734 if (isForcedDPP()) {
3742StringRef AMDGPUAsmParser::getMatchedVariantName()
const {
3743 if (isForcedDPP() && isForcedVOP3())
3746 if (getForcedEncodingSize() == 32)
3762AMDGPUAsmParser::findImplicitSGPRReadInVOP(
const MCInst &Inst)
const {
3766 case AMDGPU::FLAT_SCR:
3768 case AMDGPU::VCC_LO:
3769 case AMDGPU::VCC_HI:
3776 return MCRegister();
3783bool AMDGPUAsmParser::isInlineConstant(
const MCInst &Inst,
3784 unsigned OpIdx)
const {
3841unsigned AMDGPUAsmParser::getConstantBusLimit(
unsigned Opcode)
const {
3847 case AMDGPU::V_LSHLREV_B64_e64:
3848 case AMDGPU::V_LSHLREV_B64_gfx10:
3849 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3850 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3851 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3852 case AMDGPU::V_LSHRREV_B64_e64:
3853 case AMDGPU::V_LSHRREV_B64_gfx10:
3854 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3855 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3856 case AMDGPU::V_ASHRREV_I64_e64:
3857 case AMDGPU::V_ASHRREV_I64_gfx10:
3858 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3859 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3860 case AMDGPU::V_LSHL_B64_e64:
3861 case AMDGPU::V_LSHR_B64_e64:
3862 case AMDGPU::V_ASHR_I64_e64:
3875 bool AddMandatoryLiterals =
false) {
3878 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3882 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immX) : -1;
3884 return {getNamedOperandIdx(Opcode, OpName::src0X),
3885 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3886 getNamedOperandIdx(Opcode, OpName::vsrc2X),
3887 getNamedOperandIdx(Opcode, OpName::src0Y),
3888 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3889 getNamedOperandIdx(Opcode, OpName::vsrc2Y),
3894 return {getNamedOperandIdx(Opcode, OpName::src0),
3895 getNamedOperandIdx(Opcode, OpName::src1),
3896 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3899bool AMDGPUAsmParser::usesConstantBus(
const MCInst &Inst,
unsigned OpIdx) {
3902 return !isInlineConstant(Inst,
OpIdx);
3909 return isSGPR(PReg,
TRI) && PReg != SGPR_NULL;
3920 const unsigned Opcode = Inst.
getOpcode();
3921 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3924 if (!LaneSelOp.
isReg())
3927 return LaneSelReg ==
M0 || LaneSelReg == M0_gfxpre11;
3930bool AMDGPUAsmParser::validateConstantBusLimitations(
3932 const unsigned Opcode = Inst.
getOpcode();
3933 const MCInstrDesc &
Desc = MII.
get(Opcode);
3934 MCRegister LastSGPR;
3935 unsigned ConstantBusUseCount = 0;
3936 unsigned NumLiterals = 0;
3937 unsigned LiteralSize;
3939 if (!(
Desc.TSFlags &
3954 SmallDenseSet<MCRegister> SGPRsUsed;
3955 MCRegister SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3957 SGPRsUsed.
insert(SGPRUsed);
3958 ++ConstantBusUseCount;
3963 unsigned ConstantBusLimit = getConstantBusLimit(Opcode);
3965 for (
int OpIdx : OpIndices) {
3970 if (usesConstantBus(Inst,
OpIdx)) {
3979 if (SGPRsUsed.
insert(LastSGPR).second) {
3980 ++ConstantBusUseCount;
4000 if (NumLiterals == 0) {
4003 }
else if (LiteralSize !=
Size) {
4009 if (ConstantBusUseCount + NumLiterals > ConstantBusLimit) {
4011 "invalid operand (violates constant bus restrictions)");
4018std::optional<unsigned>
4019AMDGPUAsmParser::checkVOPDRegBankConstraints(
const MCInst &Inst,
bool AsVOPD3) {
4021 const unsigned Opcode = Inst.
getOpcode();
4027 auto getVRegIdx = [&](unsigned,
unsigned OperandIdx) {
4028 const MCOperand &Opr = Inst.
getOperand(OperandIdx);
4037 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1170 ||
4038 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
4039 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
4040 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx13 ||
4041 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250 ||
4042 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx13;
4046 for (
auto OpName : {OpName::src0X, OpName::src0Y}) {
4047 int I = getNamedOperandIdx(Opcode, OpName);
4051 int64_t
Imm =
Op.getImm();
4057 for (
auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X,
4058 OpName::vsrc2Y, OpName::imm}) {
4059 int I = getNamedOperandIdx(Opcode, OpName);
4069 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(
4070 getVRegIdx, *
TRI, SkipSrc, AllowSameVGPR, AsVOPD3);
4072 return InvalidCompOprIdx;
4075bool AMDGPUAsmParser::validateVOPD(
const MCInst &Inst,
4082 for (
const std::unique_ptr<MCParsedAsmOperand> &Operand : Operands) {
4083 AMDGPUOperand &
Op = (AMDGPUOperand &)*Operand;
4084 if ((
Op.isRegKind() ||
Op.isImmTy(AMDGPUOperand::ImmTyNone)) &&
4086 Error(
Op.getStartLoc(),
"ABS not allowed in VOPD3 instructions");
4090 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3);
4091 if (!InvalidCompOprIdx.has_value())
4094 auto CompOprIdx = *InvalidCompOprIdx;
4097 std::max(InstInfo[
VOPD::X].getIndexInParsedOperands(CompOprIdx),
4098 InstInfo[
VOPD::Y].getIndexInParsedOperands(CompOprIdx));
4099 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
4101 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
4102 if (CompOprIdx == VOPD::Component::DST) {
4104 Error(Loc,
"dst registers must be distinct");
4106 Error(Loc,
"one dst register must be even and the other odd");
4108 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
4109 Error(Loc, Twine(
"src") + Twine(CompSrcIdx) +
4110 " operands must use different VGPR banks");
4118bool AMDGPUAsmParser::tryVOPD3(
const MCInst &Inst) {
4120 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst,
false);
4121 if (!InvalidCompOprIdx.has_value())
4125 InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst,
true);
4126 if (InvalidCompOprIdx.has_value()) {
4131 if (*InvalidCompOprIdx == VOPD::Component::DST)
4144bool AMDGPUAsmParser::tryVOPD(
const MCInst &Inst) {
4145 const unsigned Opcode = Inst.
getOpcode();
4160 for (
auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
4161 OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
4162 OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
4163 int I = getNamedOperandIdx(Opcode, OpName);
4170 return !tryVOPD3(Inst);
4175bool AMDGPUAsmParser::tryAnotherVOPDEncoding(
const MCInst &Inst) {
4176 const unsigned Opcode = Inst.
getOpcode();
4181 return tryVOPD(Inst);
4182 return tryVOPD3(Inst);
4185bool AMDGPUAsmParser::validateIntClampSupported(
const MCInst &Inst) {
4191 int ClampIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::clamp);
4202bool AMDGPUAsmParser::validateMIMGDataSize(
const MCInst &Inst,
SMLoc IDLoc) {
4210 int VDataIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
4211 int DMaskIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dmask);
4212 int TFEIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::tfe);
4220 unsigned VDataSize = getRegOperandSize(
Desc, VDataIdx);
4221 unsigned TFESize = (TFEIdx != -1 && Inst.
getOperand(TFEIdx).
getImm()) ? 1 : 0;
4226 bool IsPackedD16 =
false;
4230 int D16Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::d16);
4231 IsPackedD16 = D16Idx >= 0;
4236 if ((VDataSize / 4) ==
DataSize + TFESize)
4241 Modifiers = IsPackedD16 ?
"dmask and d16" :
"dmask";
4243 Modifiers = IsPackedD16 ?
"dmask, d16 and tfe" :
"dmask and tfe";
4245 Error(IDLoc,
Twine(
"image data size does not match ") + Modifiers);
4249bool AMDGPUAsmParser::validateMIMGAddrSize(
const MCInst &Inst, SMLoc IDLoc) {
4258 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4260 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
4262 ? AMDGPU::OpName::srsrc
4263 : AMDGPU::OpName::rsrc;
4264 int SrsrcIdx = AMDGPU::getNamedOperandIdx(
Opc, RSrcOpName);
4265 int DimIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dim);
4266 int A16Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::a16);
4270 assert(SrsrcIdx > VAddr0Idx);
4273 if (BaseOpcode->
BVH) {
4274 if (IsA16 == BaseOpcode->
A16)
4276 Error(IDLoc,
"image address size does not match a16");
4282 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
4283 unsigned ActualAddrSize =
4284 IsNSA ? SrsrcIdx - VAddr0Idx : getRegOperandSize(
Desc, VAddr0Idx) / 4;
4286 unsigned ExpectedAddrSize =
4290 if (hasPartialNSAEncoding() &&
4293 int VAddrLastIdx = SrsrcIdx - 1;
4294 unsigned VAddrLastSize = getRegOperandSize(
Desc, VAddrLastIdx) / 4;
4296 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
4299 if (ExpectedAddrSize > 12)
4300 ExpectedAddrSize = 16;
4305 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
4309 if (ActualAddrSize == ExpectedAddrSize)
4312 Error(IDLoc,
"image address size does not match dim and a16");
4316bool AMDGPUAsmParser::validateMIMGAtomicDMask(
const MCInst &Inst) {
4323 if (!
Desc.mayLoad() || !
Desc.mayStore())
4326 int DMaskIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dmask);
4333 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4336bool AMDGPUAsmParser::validateMIMGGatherDMask(
const MCInst &Inst) {
4344 int DMaskIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dmask);
4352 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4355bool AMDGPUAsmParser::validateMIMGDim(
const MCInst &Inst,
4370 for (
unsigned i = 1, e = Operands.
size(); i != e; ++i) {
4371 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
4378bool AMDGPUAsmParser::validateMIMGMSAA(
const MCInst &Inst) {
4386 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4389 if (!BaseOpcode->
MSAA)
4392 int DimIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dim);
4398 return DimInfo->
MSAA;
4404 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4405 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4406 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4416bool AMDGPUAsmParser::validateMovrels(
const MCInst &Inst,
4425 const int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
4428 const MCOperand &Src0 = Inst.
getOperand(Src0Idx);
4436 Error(getOperandLoc(Operands, Src0Idx),
"source operand must be a VGPR");
4440bool AMDGPUAsmParser::validateMAIAccWrite(
const MCInst &Inst,
4445 if (
Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4448 const int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
4451 const MCOperand &Src0 = Inst.
getOperand(Src0Idx);
4458 Error(getOperandLoc(Operands, Src0Idx),
4459 "source operand must be either a VGPR or an inline constant");
4466bool AMDGPUAsmParser::validateMAISrc2(
const MCInst &Inst,
4469 const MCInstrDesc &
Desc = MII.
get(Opcode);
4472 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4475 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4479 if (Inst.
getOperand(Src2Idx).
isImm() && isInlineConstant(Inst, Src2Idx)) {
4480 Error(getOperandLoc(Operands, Src2Idx),
4481 "inline constants are not allowed for this operand");
4488bool AMDGPUAsmParser::validateMFMA(
const MCInst &Inst,
4496 int BlgpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::blgp);
4497 if (BlgpIdx != -1) {
4498 if (
const MFMA_F8F6F4_Info *Info = AMDGPU::isMFMA_F8F6F4(
Opc)) {
4499 int CbszIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::cbsz);
4509 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
4510 Error(getOperandLoc(Operands, Src0Idx),
4511 "wrong register tuple size for cbsz value " + Twine(CBSZ));
4516 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
4517 Error(getOperandLoc(Operands, Src1Idx),
4518 "wrong register tuple size for blgp value " + Twine(BLGP));
4526 const int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
4530 const MCOperand &Src2 = Inst.
getOperand(Src2Idx);
4534 MCRegister Src2Reg = Src2.
getReg();
4536 if (Src2Reg == DstReg)
4541 .getSizeInBits() <= 128)
4544 if (
TRI->regsOverlap(Src2Reg, DstReg)) {
4545 Error(getOperandLoc(Operands, Src2Idx),
4546 "source 2 operand must not partially overlap with dst");
4553bool AMDGPUAsmParser::validateDivScale(
const MCInst &Inst) {
4557 case V_DIV_SCALE_F32_gfx6_gfx7:
4558 case V_DIV_SCALE_F32_vi:
4559 case V_DIV_SCALE_F32_gfx10:
4560 case V_DIV_SCALE_F64_gfx6_gfx7:
4561 case V_DIV_SCALE_F64_vi:
4562 case V_DIV_SCALE_F64_gfx10:
4568 for (
auto Name : {AMDGPU::OpName::src0_modifiers,
4569 AMDGPU::OpName::src2_modifiers,
4570 AMDGPU::OpName::src2_modifiers}) {
4581bool AMDGPUAsmParser::validateMIMGD16(
const MCInst &Inst) {
4589 int D16Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::d16);
4598bool AMDGPUAsmParser::validateTensorR128(
const MCInst &Inst) {
4605 int R128Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::r128);
4613 case AMDGPU::V_SUBREV_F32_e32:
4614 case AMDGPU::V_SUBREV_F32_e64:
4615 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4616 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4617 case AMDGPU::V_SUBREV_F32_e32_vi:
4618 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4619 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4620 case AMDGPU::V_SUBREV_F32_e64_vi:
4622 case AMDGPU::V_SUBREV_CO_U32_e32:
4623 case AMDGPU::V_SUBREV_CO_U32_e64:
4624 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4625 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4627 case AMDGPU::V_SUBBREV_U32_e32:
4628 case AMDGPU::V_SUBBREV_U32_e64:
4629 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4630 case AMDGPU::V_SUBBREV_U32_e32_vi:
4631 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4632 case AMDGPU::V_SUBBREV_U32_e64_vi:
4634 case AMDGPU::V_SUBREV_U32_e32:
4635 case AMDGPU::V_SUBREV_U32_e64:
4636 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4637 case AMDGPU::V_SUBREV_U32_e32_vi:
4638 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4639 case AMDGPU::V_SUBREV_U32_e64_vi:
4641 case AMDGPU::V_SUBREV_F16_e32:
4642 case AMDGPU::V_SUBREV_F16_e64:
4643 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4644 case AMDGPU::V_SUBREV_F16_e32_vi:
4645 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4646 case AMDGPU::V_SUBREV_F16_e64_vi:
4648 case AMDGPU::V_SUBREV_U16_e32:
4649 case AMDGPU::V_SUBREV_U16_e64:
4650 case AMDGPU::V_SUBREV_U16_e32_vi:
4651 case AMDGPU::V_SUBREV_U16_e64_vi:
4653 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4654 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4655 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4657 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4658 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4660 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4661 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4663 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4664 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4666 case AMDGPU::V_LSHRREV_B32_e32:
4667 case AMDGPU::V_LSHRREV_B32_e64:
4668 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4669 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4670 case AMDGPU::V_LSHRREV_B32_e32_vi:
4671 case AMDGPU::V_LSHRREV_B32_e64_vi:
4672 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4673 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4675 case AMDGPU::V_ASHRREV_I32_e32:
4676 case AMDGPU::V_ASHRREV_I32_e64:
4677 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4678 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4679 case AMDGPU::V_ASHRREV_I32_e32_vi:
4680 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4681 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4682 case AMDGPU::V_ASHRREV_I32_e64_vi:
4684 case AMDGPU::V_LSHLREV_B32_e32:
4685 case AMDGPU::V_LSHLREV_B32_e64:
4686 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4687 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4688 case AMDGPU::V_LSHLREV_B32_e32_vi:
4689 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4690 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4691 case AMDGPU::V_LSHLREV_B32_e64_vi:
4693 case AMDGPU::V_LSHLREV_B16_e32:
4694 case AMDGPU::V_LSHLREV_B16_e64:
4695 case AMDGPU::V_LSHLREV_B16_e32_vi:
4696 case AMDGPU::V_LSHLREV_B16_e64_vi:
4697 case AMDGPU::V_LSHLREV_B16_gfx10:
4699 case AMDGPU::V_LSHRREV_B16_e32:
4700 case AMDGPU::V_LSHRREV_B16_e64:
4701 case AMDGPU::V_LSHRREV_B16_e32_vi:
4702 case AMDGPU::V_LSHRREV_B16_e64_vi:
4703 case AMDGPU::V_LSHRREV_B16_gfx10:
4705 case AMDGPU::V_ASHRREV_I16_e32:
4706 case AMDGPU::V_ASHRREV_I16_e64:
4707 case AMDGPU::V_ASHRREV_I16_e32_vi:
4708 case AMDGPU::V_ASHRREV_I16_e64_vi:
4709 case AMDGPU::V_ASHRREV_I16_gfx10:
4711 case AMDGPU::V_LSHLREV_B64_e64:
4712 case AMDGPU::V_LSHLREV_B64_gfx10:
4713 case AMDGPU::V_LSHLREV_B64_vi:
4715 case AMDGPU::V_LSHRREV_B64_e64:
4716 case AMDGPU::V_LSHRREV_B64_gfx10:
4717 case AMDGPU::V_LSHRREV_B64_vi:
4719 case AMDGPU::V_ASHRREV_I64_e64:
4720 case AMDGPU::V_ASHRREV_I64_gfx10:
4721 case AMDGPU::V_ASHRREV_I64_vi:
4723 case AMDGPU::V_PK_LSHLREV_B16:
4724 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4725 case AMDGPU::V_PK_LSHLREV_B16_vi:
4727 case AMDGPU::V_PK_LSHRREV_B16:
4728 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4729 case AMDGPU::V_PK_LSHRREV_B16_vi:
4730 case AMDGPU::V_PK_ASHRREV_I16:
4731 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4732 case AMDGPU::V_PK_ASHRREV_I16_vi:
4739bool AMDGPUAsmParser::validateLdsDirect(
const MCInst &Inst,
4741 using namespace SIInstrFlags;
4742 const unsigned Opcode = Inst.
getOpcode();
4743 const MCInstrDesc &
Desc = MII.
get(Opcode);
4748 if ((
Desc.TSFlags & Enc) == 0)
4751 for (
auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4752 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4756 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4759 Error(getOperandLoc(Operands, SrcIdx),
4760 "lds_direct is not supported on this GPU");
4765 Error(getOperandLoc(Operands, SrcIdx),
4766 "lds_direct cannot be used with this instruction");
4770 if (SrcName != OpName::src0) {
4771 Error(getOperandLoc(Operands, SrcIdx),
4772 "lds_direct may be used as src0 only");
4781SMLoc AMDGPUAsmParser::getFlatOffsetLoc(
const OperandVector &Operands)
const {
4782 for (
unsigned i = 1, e = Operands.
size(); i != e; ++i) {
4783 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
4784 if (
Op.isFlatOffset())
4785 return Op.getStartLoc();
4790bool AMDGPUAsmParser::validateOffset(
const MCInst &Inst,
4793 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4799 return validateFlatOffset(Inst, Operands);
4802 return validateSMEMOffset(Inst, Operands);
4808 const unsigned OffsetSize = 24;
4809 if (!
isUIntN(OffsetSize - 1,
Op.getImm())) {
4810 Error(getFlatOffsetLoc(Operands),
4811 Twine(
"expected a ") + Twine(OffsetSize - 1) +
4812 "-bit unsigned offset for buffer ops");
4816 const unsigned OffsetSize = 16;
4817 if (!
isUIntN(OffsetSize,
Op.getImm())) {
4818 Error(getFlatOffsetLoc(Operands),
4819 Twine(
"expected a ") + Twine(OffsetSize) +
"-bit unsigned offset");
4826bool AMDGPUAsmParser::validateFlatOffset(
const MCInst &Inst,
4833 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4837 if (!hasFlatOffsets() &&
Op.getImm() != 0) {
4838 Error(getFlatOffsetLoc(Operands),
4839 "flat offset modifier is not supported on this GPU");
4846 bool AllowNegative =
4849 if (!
isIntN(OffsetSize,
Op.getImm()) || (!AllowNegative &&
Op.getImm() < 0)) {
4850 Error(getFlatOffsetLoc(Operands),
4851 Twine(
"expected a ") +
4852 (AllowNegative ? Twine(OffsetSize) +
"-bit signed offset"
4853 : Twine(OffsetSize - 1) +
"-bit unsigned offset"));
4860SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(
const OperandVector &Operands)
const {
4862 for (
unsigned i = 2, e = Operands.
size(); i != e; ++i) {
4863 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
4864 if (
Op.isSMEMOffset() ||
Op.isSMEMOffsetMod())
4865 return Op.getStartLoc();
4870bool AMDGPUAsmParser::validateSMEMOffset(
const MCInst &Inst,
4880 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4894 Error(getSMEMOffsetLoc(Operands),
4896 ?
"expected a 23-bit unsigned offset for buffer ops"
4897 :
isGFX12Plus() ?
"expected a 24-bit signed offset"
4898 : (
isVI() || IsBuffer) ?
"expected a 20-bit unsigned offset"
4899 :
"expected a 21-bit signed offset");
4904bool AMDGPUAsmParser::validateSOPLiteral(
const MCInst &Inst,
4907 const MCInstrDesc &
Desc = MII.
get(Opcode);
4911 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4912 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4914 const int OpIndices[] = { Src0Idx, Src1Idx };
4916 unsigned NumExprs = 0;
4917 unsigned NumLiterals = 0;
4920 for (
int OpIdx : OpIndices) {
4921 if (
OpIdx == -1)
break;
4927 std::optional<int64_t>
Imm;
4930 }
else if (MO.
isExpr()) {
4939 if (!
Imm.has_value()) {
4941 }
else if (!isInlineConstant(Inst,
OpIdx)) {
4945 if (NumLiterals == 0 || LiteralValue !=
Value) {
4953 if (NumLiterals + NumExprs <= 1)
4956 Error(getOperandLoc(Operands, Src1Idx),
4957 "only one unique literal operand is allowed");
4961bool AMDGPUAsmParser::validateOpSel(
const MCInst &Inst) {
4964 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
4974 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
4975 if (OpSelIdx != -1) {
4979 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel_hi);
4980 if (OpSelHiIdx != -1) {
4989 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
4999 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
5000 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
5001 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
5002 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel_hi);
5004 const MCOperand &Src0 = Inst.
getOperand(Src0Idx);
5005 const MCOperand &Src1 = Inst.
getOperand(Src1Idx);
5011 auto VerifyOneSGPR = [
OpSel, OpSelHi](
unsigned Index) ->
bool {
5013 return ((OpSel & Mask) == 0) && ((OpSelHi &
Mask) == 0);
5023 int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
5024 if (Src2Idx != -1) {
5025 const MCOperand &Src2 = Inst.
getOperand(Src2Idx);
5035bool AMDGPUAsmParser::validateTrue16OpSel(
const MCInst &Inst) {
5036 if (!hasTrue16Insts())
5038 const MCRegisterInfo *MRI = getMRI();
5040 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
5046 if (OpSelOpValue == 0)
5048 unsigned OpCount = 0;
5049 for (AMDGPU::OpName OpName : {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
5050 AMDGPU::OpName::src2, AMDGPU::OpName::vdst}) {
5051 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), OpName);
5058 bool OpSelOpIsHi = ((OpSelOpValue & (1 << OpCount)) != 0);
5059 if (OpSelOpIsHi != VGPRSuffixIsHi)
5068bool AMDGPUAsmParser::validateNeg(
const MCInst &Inst, AMDGPU::OpName OpName) {
5069 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
5082 int NegIdx = AMDGPU::getNamedOperandIdx(
Opc, OpName);
5093 const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
5094 AMDGPU::OpName::src1_modifiers,
5095 AMDGPU::OpName::src2_modifiers};
5097 for (
unsigned i = 0; i < 3; ++i) {
5107bool AMDGPUAsmParser::validateDPP(
const MCInst &Inst,
5110 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dpp_ctrl);
5111 if (DppCtrlIdx >= 0) {
5118 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
5119 Error(S,
isGFX12() ?
"DP ALU dpp only supports row_share"
5120 :
"DP ALU dpp only supports row_newbcast");
5125 int Dpp8Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dpp8);
5126 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
5129 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
5131 const MCOperand &Src1 = Inst.
getOperand(Src1Idx);
5134 Error(getOperandLoc(Operands, Src1Idx),
5135 "invalid operand for instruction");
5139 Error(getInstLoc(Operands),
5140 "src1 immediate operand invalid for instruction");
5150bool AMDGPUAsmParser::validateVccOperand(MCRegister
Reg)
const {
5151 return (
Reg == AMDGPU::VCC && isWave64()) ||
5152 (
Reg == AMDGPU::VCC_LO && isWave32());
5156bool AMDGPUAsmParser::validateVOPLiteral(
const MCInst &Inst,
5159 const MCInstrDesc &
Desc = MII.
get(Opcode);
5160 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
5162 !HasMandatoryLiteral && !
isVOPD(Opcode))
5167 std::optional<unsigned> LiteralOpIdx;
5170 for (
int OpIdx : OpIndices) {
5180 std::optional<int64_t>
Imm;
5186 bool IsAnotherLiteral =
false;
5187 bool IsForcedLit = findMCOperand(Operands,
OpIdx).isForcedLit();
5188 bool IsForcedLit64 = findMCOperand(Operands,
OpIdx).isForcedLit64();
5189 if (!
Imm.has_value()) {
5191 IsAnotherLiteral =
true;
5192 }
else if (IsForcedLit || IsForcedLit64 || !isInlineConstant(Inst,
OpIdx)) {
5197 HasMandatoryLiteral);
5198 unsigned OpTy =
Desc.operands()[
OpIdx].OperandType;
5208 (IsForcedLit64 && !HasMandatoryLiteral)) &&
5209 (!has64BitLiterals() ||
Desc.getSize() != 4)) {
5211 "invalid operand for instruction");
5216 if (!IsForcedFP64 && (IsForcedLit64 || !IsValid32Op) &&
5217 OpIdx != getNamedOperandIdx(Opcode, OpName::src0)) {
5219 "invalid operand for instruction");
5223 if (IsFP64 && IsValid32Op && !IsForcedFP64)
5230 if (IsAnotherLiteral && !HasMandatoryLiteral &&
5231 !getFeatureBits()[FeatureVOP3Literal]) {
5233 "literal operands are not supported");
5237 if (LiteralOpIdx && IsAnotherLiteral) {
5238 Error(getLaterLoc(getOperandLoc(Operands,
OpIdx),
5239 getOperandLoc(Operands, *LiteralOpIdx)),
5240 "only one unique literal operand is allowed");
5244 if (IsAnotherLiteral)
5245 LiteralOpIdx =
OpIdx;
5268bool AMDGPUAsmParser::validateAGPRLdSt(
const MCInst &Inst)
const {
5276 ? AMDGPU::OpName::data0
5277 : AMDGPU::OpName::vdata;
5279 const MCRegisterInfo *MRI = getMRI();
5280 int DstAreg =
IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
5284 int Data2Areg =
IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
5285 if (Data2Areg >= 0 && Data2Areg != DataAreg)
5289 auto FB = getFeatureBits();
5290 if (FB[AMDGPU::FeatureGFX90AInsts]) {
5291 if (DataAreg < 0 || DstAreg < 0)
5293 return DstAreg == DataAreg;
5296 return DstAreg < 1 && DataAreg < 1;
5299bool AMDGPUAsmParser::validateVGPRAlign(
const MCInst &Inst)
const {
5300 auto FB = getFeatureBits();
5301 if (!FB[AMDGPU::FeatureRequiresAlignedVGPRs])
5305 const MCRegisterInfo *MRI = getMRI();
5308 if (FB[AMDGPU::FeatureGFX90AInsts] &&
Opc == AMDGPU::DS_READ_B96_TR_B6_vi)
5311 if (FB[AMDGPU::FeatureGFX1250Insts]) {
5315 case AMDGPU::DS_LOAD_TR6_B96:
5316 case AMDGPU::DS_LOAD_TR6_B96_gfx12:
5320 case AMDGPU::GLOBAL_LOAD_TR6_B96:
5321 case AMDGPU::GLOBAL_LOAD_TR6_B96_gfx1250: {
5325 int VAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
5326 if (VAddrIdx != -1) {
5329 if ((
Sub - AMDGPU::VGPR0) & 1)
5334 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR:
5335 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR_gfx1250:
5340 const MCRegisterClass &VGPR32 = MRI->
getRegClass(AMDGPU::VGPR_32RegClassID);
5341 const MCRegisterClass &AGPR32 = MRI->
getRegClass(AMDGPU::AGPR_32RegClassID);
5360SMLoc AMDGPUAsmParser::getBLGPLoc(
const OperandVector &Operands)
const {
5361 for (
unsigned i = 1, e = Operands.
size(); i != e; ++i) {
5362 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
5364 return Op.getStartLoc();
5369bool AMDGPUAsmParser::validateBLGP(
const MCInst &Inst,
5372 int BlgpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::blgp);
5375 SMLoc BLGPLoc = getBLGPLoc(Operands);
5378 bool IsNeg = StringRef(BLGPLoc.
getPointer()).starts_with(
"neg:");
5379 auto FB = getFeatureBits();
5380 bool UsesNeg =
false;
5381 if (FB[AMDGPU::FeatureGFX940Insts]) {
5383 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
5384 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
5385 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
5386 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
5391 if (IsNeg == UsesNeg)
5395 UsesNeg ?
"invalid modifier: blgp is not supported"
5396 :
"invalid modifier: neg is not supported");
5401bool AMDGPUAsmParser::validateWaitCnt(
const MCInst &Inst,
5407 if (
Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
5408 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
5409 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
5410 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
5413 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::sdst);
5416 if (
Reg == AMDGPU::SGPR_NULL)
5419 Error(getOperandLoc(Operands, Src0Idx),
"src0 must be null");
5423bool AMDGPUAsmParser::validateDS(
const MCInst &Inst,
5429 return validateGWS(Inst, Operands);
5434 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::gds);
5439 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
5440 Error(S,
"gds modifier is not supported on this GPU");
5448bool AMDGPUAsmParser::validateGWS(
const MCInst &Inst,
5450 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
5454 if (
Opc != AMDGPU::DS_GWS_INIT_vi &&
Opc != AMDGPU::DS_GWS_BARRIER_vi &&
5455 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
5458 const MCRegisterInfo *MRI = getMRI();
5459 const MCRegisterClass &VGPR32 = MRI->
getRegClass(AMDGPU::VGPR_32RegClassID);
5461 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::data0);
5464 auto RegIdx =
Reg - (VGPR32.
contains(
Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
5466 Error(getOperandLoc(Operands, Data0Pos),
"vgpr must be even aligned");
5473bool AMDGPUAsmParser::validateCoherencyBits(
const MCInst &Inst,
5476 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.
getOpcode(),
5477 AMDGPU::OpName::cpol);
5485 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5488 Error(S,
"scale_offset is not supported on this GPU");
5491 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5494 Error(S,
"nv is not supported on this GPU");
5499 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5502 Error(S,
"scale_offset is not supported for this instruction");
5506 return validateTHAndScopeBits(Inst, Operands, CPol);
5511 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5512 Error(S,
"cache policy is not supported for SMRD instructions");
5516 Error(IDLoc,
"invalid cache policy for SMEM instruction");
5525 if (!(TSFlags & AllowSCCModifier)) {
5526 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5530 "scc modifier is not supported for this instruction on this GPU");
5541 :
"instruction must use glc");
5546 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5549 &CStr.data()[CStr.find(
isGFX940() ?
"sc0" :
"glc")]);
5551 :
"instruction must not use glc");
5559bool AMDGPUAsmParser::validateTHAndScopeBits(
const MCInst &Inst,
5561 const unsigned CPol) {
5565 const unsigned Opcode = Inst.
getOpcode();
5566 const MCInstrDesc &TID = MII.
get(Opcode);
5569 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5576 return PrintError(
"th:TH_ATOMIC_RETURN requires a destination operand");
5581 return PrintError(
"instruction must use th:TH_ATOMIC_RETURN");
5589 return PrintError(
"invalid th value for SMEM instruction");
5596 return PrintError(
"scope and th combination is not valid");
5602 return PrintError(
"invalid th value for atomic instructions");
5605 return PrintError(
"invalid th value for store instructions");
5608 return PrintError(
"invalid th value for load instructions");
5614bool AMDGPUAsmParser::validateTFE(
const MCInst &Inst,
5617 if (
Desc.mayStore() &&
5619 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5620 if (Loc != getInstLoc(Operands)) {
5621 Error(Loc,
"TFE modifier has no meaning for store instructions");
5629bool AMDGPUAsmParser::validateWMMA(
const MCInst &Inst,
5635 int AFmtIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_a_fmt);
5639 int BFmtIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_b_fmt);
5642 auto validateFmt = [&](
unsigned Fmt, AMDGPU::OpName SrcOp) ->
bool {
5643 int SrcIdx = AMDGPU::getNamedOperandIdx(
Opc, SrcOp);
5651 Error(getOperandLoc(Operands, SrcIdx),
5652 "wrong register tuple size for " +
5657 if (!validateFmt(AFmt, AMDGPU::OpName::src0) ||
5658 !validateFmt(BFmt, AMDGPU::OpName::src1))
5662 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_a_scale_fmt);
5663 if (AScaleIdx == -1)
5667 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_b_scale_fmt);
5670 Error(getImmLoc(AMDGPUOperand::ImmTyMatrixAFMT, Operands),
5671 "invalid matrix and scale format combination");
5678bool AMDGPUAsmParser::validateInstruction(
const MCInst &Inst, SMLoc IDLoc,
5680 if (!validateLdsDirect(Inst, Operands))
5682 if (!validateTrue16OpSel(Inst)) {
5683 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5684 "op_sel operand conflicts with 16-bit operand suffix");
5687 if (!validateSOPLiteral(Inst, Operands))
5689 if (!validateVOPLiteral(Inst, Operands)) {
5692 if (!validateConstantBusLimitations(Inst, Operands)) {
5695 if (!validateVOPD(Inst, Operands)) {
5698 if (!validateIntClampSupported(Inst)) {
5699 Error(getImmLoc(AMDGPUOperand::ImmTyClamp, Operands),
5700 "integer clamping is not supported on this GPU");
5703 if (!validateOpSel(Inst)) {
5704 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5705 "invalid op_sel operand");
5708 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5709 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5710 "invalid neg_lo operand");
5713 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5714 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5715 "invalid neg_hi operand");
5718 if (!validateDPP(Inst, Operands)) {
5722 if (!validateMIMGD16(Inst)) {
5723 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5724 "d16 modifier is not supported on this GPU");
5727 if (!validateMIMGDim(Inst, Operands)) {
5728 Error(IDLoc,
"missing dim operand");
5731 if (!validateTensorR128(Inst)) {
5732 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5733 "instruction must set modifier r128=0");
5736 if (!validateMIMGMSAA(Inst)) {
5737 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5738 "invalid dim; must be MSAA type");
5741 if (!validateMIMGDataSize(Inst, IDLoc)) {
5744 if (!validateMIMGAddrSize(Inst, IDLoc))
5746 if (!validateMIMGAtomicDMask(Inst)) {
5747 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5748 "invalid atomic image dmask");
5751 if (!validateMIMGGatherDMask(Inst)) {
5752 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5753 "invalid image_gather dmask: only one bit must be set");
5756 if (!validateMovrels(Inst, Operands)) {
5759 if (!validateOffset(Inst, Operands)) {
5762 if (!validateMAIAccWrite(Inst, Operands)) {
5765 if (!validateMAISrc2(Inst, Operands)) {
5768 if (!validateMFMA(Inst, Operands)) {
5771 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5775 if (!validateAGPRLdSt(Inst)) {
5776 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5777 ?
"invalid register class: data and dst should be all VGPR or AGPR"
5778 :
"invalid register class: agpr loads and stores not supported on this GPU"
5782 if (!validateVGPRAlign(Inst)) {
5784 "invalid register class: vgpr tuples must be 64 bit aligned");
5787 if (!validateDS(Inst, Operands)) {
5791 if (!validateBLGP(Inst, Operands)) {
5795 if (!validateDivScale(Inst)) {
5796 Error(IDLoc,
"ABS not allowed in VOP3B instructions");
5799 if (!validateWaitCnt(Inst, Operands)) {
5802 if (!validateTFE(Inst, Operands)) {
5805 if (!validateWMMA(Inst, Operands)) {
5814 unsigned VariantID = 0);
5818 unsigned VariantID);
5820bool AMDGPUAsmParser::isSupportedMnemo(
StringRef Mnemo,
5825bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5826 const FeatureBitset &FBS,
5827 ArrayRef<unsigned> Variants) {
5828 for (
auto Variant : Variants) {
5836bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5838 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5841 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5846 getParser().clearPendingErrors();
5850 StringRef VariantName = getMatchedVariantName();
5851 if (!VariantName.
empty() && isSupportedMnemo(Mnemo, FBS)) {
5854 " variant of this instruction is not supported"));
5858 if (
isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5859 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5861 FeatureBitset FeaturesWS32 = getFeatureBits();
5862 FeaturesWS32.
flip(AMDGPU::FeatureWavefrontSize64)
5863 .
flip(AMDGPU::FeatureWavefrontSize32);
5864 FeatureBitset AvailableFeaturesWS32 =
5865 ComputeAvailableFeatures(FeaturesWS32);
5867 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5868 return Error(IDLoc,
"instruction requires wavesize=32");
5872 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5873 return Error(IDLoc,
"instruction not supported on this GPU (" +
5874 getSTI().
getCPU() +
")" +
": " + Mnemo);
5879 return Error(IDLoc,
"invalid instruction" + Suggestion);
5885 const auto &
Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5886 if (
Op.isToken() && InvalidOprIdx > 1) {
5887 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5888 return PrevOp.isToken() && PrevOp.getToken() ==
"::";
5893bool AMDGPUAsmParser::matchAndEmitInstruction(SMLoc IDLoc,
unsigned &Opcode,
5896 uint64_t &ErrorInfo,
5897 bool MatchingInlineAsm) {
5900 unsigned Result = Match_Success;
5901 for (
auto Variant : getMatchedVariants()) {
5903 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5908 if (R == Match_Success || R == Match_MissingFeature ||
5909 (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5910 (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5911 Result != Match_MissingFeature)) {
5915 if (R == Match_Success)
5919 if (Result == Match_Success) {
5920 if (!validateInstruction(Inst, IDLoc, Operands)) {
5927 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).
getToken();
5928 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5934 case Match_MissingFeature:
5938 return Error(IDLoc,
"operands are not valid for this GPU or mode");
5940 case Match_InvalidOperand: {
5941 SMLoc ErrorLoc = IDLoc;
5942 if (ErrorInfo != ~0ULL) {
5943 if (ErrorInfo >= Operands.
size()) {
5944 return Error(IDLoc,
"too few operands for instruction");
5946 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5947 if (ErrorLoc == SMLoc())
5951 return Error(ErrorLoc,
"invalid VOPDY instruction");
5953 return Error(ErrorLoc,
"invalid operand for instruction");
5956 case Match_MnemonicFail:
5962bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5967 if (getParser().parseAbsoluteExpression(Tmp)) {
5970 Ret =
static_cast<uint32_t
>(Tmp);
5974bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5975 if (!getSTI().getTargetTriple().isAMDGCN())
5976 return TokError(
"directive only supported for amdgcn architecture");
5978 std::string TargetIDDirective;
5979 SMLoc TargetStart = getTok().getLoc();
5980 if (getParser().parseEscapedString(TargetIDDirective))
5983 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5984 if (getTargetStreamer().getTargetID()->
toString() != TargetIDDirective)
5985 return getParser().Error(TargetRange.
Start,
5986 (Twine(
".amdgcn_target directive's target id ") +
5987 Twine(TargetIDDirective) +
5988 Twine(
" does not match the specified target id ") +
5989 Twine(getTargetStreamer().getTargetID()->
toString())).str());
5994bool AMDGPUAsmParser::OutOfRangeError(SMRange
Range) {
5998bool AMDGPUAsmParser::calculateGPRBlocks(
5999 const FeatureBitset &Features,
const MCExpr *VCCUsed,
6000 const MCExpr *FlatScrUsed,
bool XNACKUsed,
6001 std::optional<bool> EnableWavefrontSize32,
const MCExpr *NextFreeVGPR,
6002 SMRange VGPRRange,
const MCExpr *NextFreeSGPR, SMRange SGPRRange,
6003 const MCExpr *&VGPRBlocks,
const MCExpr *&SGPRBlocks) {
6009 const MCExpr *
NumSGPRs = NextFreeSGPR;
6010 int64_t EvaluatedSGPRs;
6017 if (
NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
Version.Major >= 8 &&
6018 !Features.
test(FeatureSGPRInitBug) &&
6019 static_cast<uint64_t
>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
6020 return OutOfRangeError(SGPRRange);
6022 const MCExpr *ExtraSGPRs =
6026 if (
NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
6027 (
Version.Major <= 7 || Features.
test(FeatureSGPRInitBug)) &&
6028 static_cast<uint64_t
>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
6029 return OutOfRangeError(SGPRRange);
6031 if (Features.
test(FeatureSGPRInitBug))
6038 auto GetNumGPRBlocks = [&Ctx](
const MCExpr *NumGPR,
6039 unsigned Granule) ->
const MCExpr * {
6043 const MCExpr *AlignToGPR =
6045 const MCExpr *DivGPR =
6051 VGPRBlocks = GetNumGPRBlocks(
6060bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
6061 if (!getSTI().getTargetTriple().isAMDGCN())
6062 return TokError(
"directive only supported for amdgcn architecture");
6065 return TokError(
"directive only supported for amdhsa OS");
6067 StringRef KernelName;
6068 if (getParser().parseIdentifier(KernelName))
6071 AMDGPU::MCKernelDescriptor KD =
6083 const MCExpr *NextFreeVGPR = ZeroExpr;
6085 const MCExpr *NamedBarCnt = ZeroExpr;
6086 uint64_t SharedVGPRCount = 0;
6087 uint64_t PreloadLength = 0;
6088 uint64_t PreloadOffset = 0;
6090 const MCExpr *NextFreeSGPR = ZeroExpr;
6093 unsigned ImpliedUserSGPRCount = 0;
6097 std::optional<unsigned> ExplicitUserSGPRCount;
6098 const MCExpr *ReserveVCC = OneExpr;
6099 const MCExpr *ReserveFlatScr = OneExpr;
6100 std::optional<bool> EnableWavefrontSize32;
6106 SMRange IDRange = getTok().getLocRange();
6107 if (!parseId(
ID,
"expected .amdhsa_ directive or .end_amdhsa_kernel"))
6110 if (
ID ==
".end_amdhsa_kernel")
6114 return TokError(
".amdhsa_ directives cannot be repeated");
6116 SMLoc ValStart = getLoc();
6117 const MCExpr *ExprVal;
6118 if (getParser().parseExpression(ExprVal))
6120 SMLoc ValEnd = getLoc();
6121 SMRange ValRange = SMRange(ValStart, ValEnd);
6124 uint64_t Val = IVal;
6125 bool EvaluatableExpr;
6126 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
6128 return OutOfRangeError(ValRange);
6132#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
6133 if (!isUInt<ENTRY##_WIDTH>(Val)) \
6134 return OutOfRangeError(RANGE); \
6135 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
6140#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
6142 return Error(IDRange.Start, "directive should have resolvable expression", \
6145 if (
ID ==
".amdhsa_group_segment_fixed_size") {
6148 return OutOfRangeError(ValRange);
6150 }
else if (
ID ==
".amdhsa_private_segment_fixed_size") {
6153 return OutOfRangeError(ValRange);
6155 }
else if (
ID ==
".amdhsa_kernarg_size") {
6157 return OutOfRangeError(ValRange);
6159 }
else if (
ID ==
".amdhsa_user_sgpr_count") {
6161 ExplicitUserSGPRCount = Val;
6162 }
else if (
ID ==
".amdhsa_user_sgpr_private_segment_buffer") {
6166 "directive is not supported with architected flat scratch",
6169 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
6172 ImpliedUserSGPRCount += 4;
6173 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_preload_length") {
6176 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
6179 return OutOfRangeError(ValRange);
6183 ImpliedUserSGPRCount += Val;
6184 PreloadLength = Val;
6186 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_preload_offset") {
6189 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
6192 return OutOfRangeError(ValRange);
6196 PreloadOffset = Val;
6197 }
else if (
ID ==
".amdhsa_user_sgpr_dispatch_ptr") {
6200 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
6203 ImpliedUserSGPRCount += 2;
6204 }
else if (
ID ==
".amdhsa_user_sgpr_queue_ptr") {
6207 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
6210 ImpliedUserSGPRCount += 2;
6211 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_segment_ptr") {
6214 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
6217 ImpliedUserSGPRCount += 2;
6218 }
else if (
ID ==
".amdhsa_user_sgpr_dispatch_id") {
6221 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
6224 ImpliedUserSGPRCount += 2;
6225 }
else if (
ID ==
".amdhsa_user_sgpr_flat_scratch_init") {
6228 "directive is not supported with architected flat scratch",
6232 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
6235 ImpliedUserSGPRCount += 2;
6236 }
else if (
ID ==
".amdhsa_user_sgpr_private_segment_size") {
6239 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
6242 ImpliedUserSGPRCount += 1;
6243 }
else if (
ID ==
".amdhsa_wavefront_size32") {
6245 if (IVersion.
Major < 10)
6246 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
6247 EnableWavefrontSize32 = Val;
6249 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
6251 }
else if (
ID ==
".amdhsa_uses_dynamic_stack") {
6253 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
6255 }
else if (
ID ==
".amdhsa_system_sgpr_private_segment_wavefront_offset") {
6258 "directive is not supported with architected flat scratch",
6261 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6263 }
else if (
ID ==
".amdhsa_enable_private_segment") {
6267 "directive is not supported without architected flat scratch",
6270 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6272 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_x") {
6274 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
6276 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_y") {
6278 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
6280 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_z") {
6282 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
6284 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_info") {
6286 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
6288 }
else if (
ID ==
".amdhsa_system_vgpr_workitem_id") {
6290 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
6292 }
else if (
ID ==
".amdhsa_next_free_vgpr") {
6293 VGPRRange = ValRange;
6294 NextFreeVGPR = ExprVal;
6295 }
else if (
ID ==
".amdhsa_next_free_sgpr") {
6296 SGPRRange = ValRange;
6297 NextFreeSGPR = ExprVal;
6298 }
else if (
ID ==
".amdhsa_accum_offset") {
6300 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
6301 AccumOffset = ExprVal;
6302 }
else if (
ID ==
".amdhsa_named_barrier_count") {
6304 return Error(IDRange.
Start,
"directive requires gfx1250+", IDRange);
6305 NamedBarCnt = ExprVal;
6306 }
else if (
ID ==
".amdhsa_reserve_vcc") {
6308 return OutOfRangeError(ValRange);
6309 ReserveVCC = ExprVal;
6310 }
else if (
ID ==
".amdhsa_reserve_flat_scratch") {
6311 if (IVersion.
Major < 7)
6312 return Error(IDRange.
Start,
"directive requires gfx7+", IDRange);
6315 "directive is not supported with architected flat scratch",
6318 return OutOfRangeError(ValRange);
6319 ReserveFlatScr = ExprVal;
6320 }
else if (
ID ==
".amdhsa_reserve_xnack_mask") {
6321 if (IVersion.
Major < 8)
6322 return Error(IDRange.
Start,
"directive requires gfx8+", IDRange);
6324 return OutOfRangeError(ValRange);
6325 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
6326 return getParser().Error(IDRange.
Start,
".amdhsa_reserve_xnack_mask does not match target id",
6328 }
else if (
ID ==
".amdhsa_float_round_mode_32") {
6330 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
6332 }
else if (
ID ==
".amdhsa_float_round_mode_16_64") {
6334 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
6336 }
else if (
ID ==
".amdhsa_float_denorm_mode_32") {
6338 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
6340 }
else if (
ID ==
".amdhsa_float_denorm_mode_16_64") {
6342 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
6344 }
else if (
ID ==
".amdhsa_dx10_clamp") {
6345 if (!getSTI().hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
6346 return Error(IDRange.
Start,
"directive unsupported on gfx1170+",
6349 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
6351 }
else if (
ID ==
".amdhsa_ieee_mode") {
6352 if (!getSTI().hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
6353 return Error(IDRange.
Start,
"directive unsupported on gfx1170+",
6356 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
6358 }
else if (
ID ==
".amdhsa_fp16_overflow") {
6359 if (IVersion.
Major < 9)
6360 return Error(IDRange.
Start,
"directive requires gfx9+", IDRange);
6362 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
6364 }
else if (
ID ==
".amdhsa_tg_split") {
6366 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
6369 }
else if (
ID ==
".amdhsa_workgroup_processor_mode") {
6372 "directive unsupported on " + getSTI().
getCPU(), IDRange);
6374 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
6376 }
else if (
ID ==
".amdhsa_memory_ordered") {
6377 if (IVersion.
Major < 10)
6378 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
6380 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
6382 }
else if (
ID ==
".amdhsa_forward_progress") {
6383 if (IVersion.
Major < 10)
6384 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
6386 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
6388 }
else if (
ID ==
".amdhsa_shared_vgpr_count") {
6390 if (IVersion.
Major < 10 || IVersion.
Major >= 12)
6391 return Error(IDRange.
Start,
"directive requires gfx10 or gfx11",
6393 SharedVGPRCount = Val;
6395 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
6397 }
else if (
ID ==
".amdhsa_inst_pref_size") {
6398 if (IVersion.
Major < 11)
6399 return Error(IDRange.
Start,
"directive requires gfx11+", IDRange);
6400 if (IVersion.
Major == 11) {
6402 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, ExprVal,
6406 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, ExprVal,
6409 }
else if (
ID ==
".amdhsa_exception_fp_ieee_invalid_op") {
6412 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
6414 }
else if (
ID ==
".amdhsa_exception_fp_denorm_src") {
6416 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
6418 }
else if (
ID ==
".amdhsa_exception_fp_ieee_div_zero") {
6421 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
6423 }
else if (
ID ==
".amdhsa_exception_fp_ieee_overflow") {
6425 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
6427 }
else if (
ID ==
".amdhsa_exception_fp_ieee_underflow") {
6429 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
6431 }
else if (
ID ==
".amdhsa_exception_fp_ieee_inexact") {
6433 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
6435 }
else if (
ID ==
".amdhsa_exception_int_div_zero") {
6437 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
6439 }
else if (
ID ==
".amdhsa_round_robin_scheduling") {
6440 if (IVersion.
Major < 12)
6441 return Error(IDRange.
Start,
"directive requires gfx12+", IDRange);
6443 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
6446 return Error(IDRange.
Start,
"unknown .amdhsa_kernel directive", IDRange);
6449#undef PARSE_BITS_ENTRY
6452 if (!Seen.
contains(
".amdhsa_next_free_vgpr"))
6453 return TokError(
".amdhsa_next_free_vgpr directive is required");
6455 if (!Seen.
contains(
".amdhsa_next_free_sgpr"))
6456 return TokError(
".amdhsa_next_free_sgpr directive is required");
6458 unsigned UserSGPRCount = ExplicitUserSGPRCount.value_or(ImpliedUserSGPRCount);
6460 return TokError(
"too many user SGPRs enabled, found " +
6461 Twine(UserSGPRCount) +
", but only " +
6467 if (PreloadLength) {
6473 const MCExpr *VGPRBlocks;
6474 const MCExpr *SGPRBlocks;
6475 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
6476 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
6477 EnableWavefrontSize32, NextFreeVGPR,
6478 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
6482 int64_t EvaluatedVGPRBlocks;
6483 bool VGPRBlocksEvaluatable =
6484 VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);
6485 if (VGPRBlocksEvaluatable &&
6487 static_cast<uint64_t
>(EvaluatedVGPRBlocks))) {
6488 return OutOfRangeError(VGPRRange);
6492 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
6493 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT,
getContext());
6495 int64_t EvaluatedSGPRBlocks;
6496 if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&
6498 static_cast<uint64_t
>(EvaluatedSGPRBlocks)))
6499 return OutOfRangeError(SGPRRange);
6502 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
6503 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
getContext());
6505 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
6506 return TokError(
"amdgpu_user_sgpr_count smaller than implied by "
6507 "enabled user SGPRs");
6513 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
6514 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT,
getContext());
6519 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,
6520 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT,
getContext());
6525 return TokError(
"Kernarg size should be resolvable");
6526 uint64_t kernarg_size = IVal;
6527 if (PreloadLength && kernarg_size &&
6528 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
6529 return TokError(
"Kernarg preload length + offset is larger than the "
6530 "kernarg segment size");
6533 if (!Seen.
contains(
".amdhsa_accum_offset"))
6534 return TokError(
".amdhsa_accum_offset directive is required");
6535 int64_t EvaluatedAccum;
6536 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);
6537 uint64_t UEvaluatedAccum = EvaluatedAccum;
6538 if (AccumEvaluatable &&
6539 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
6540 return TokError(
"accum_offset should be in range [4..256] in "
6543 int64_t EvaluatedNumVGPR;
6544 if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&
6547 alignTo(std::max((uint64_t)1, (uint64_t)EvaluatedNumVGPR), 4))
6548 return TokError(
"accum_offset exceeds total VGPR allocation");
6554 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
6555 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
6561 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
6562 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
6565 if (IVersion.
Major >= 10 && IVersion.
Major < 12) {
6567 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
6568 return TokError(
"shared_vgpr_count directive not valid on "
6569 "wavefront size 32");
6572 if (VGPRBlocksEvaluatable &&
6573 (SharedVGPRCount * 2 +
static_cast<uint64_t
>(EvaluatedVGPRBlocks) >
6575 return TokError(
"shared_vgpr_count*2 + "
6576 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
6581 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
6582 NextFreeVGPR, NextFreeSGPR,
6583 ReserveVCC, ReserveFlatScr);
6587bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
6589 if (ParseAsAbsoluteExpression(
Version))
6592 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(
Version);
6596bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef
ID,
6597 AMDGPUMCKernelCodeT &
C) {
6600 if (
ID ==
"max_scratch_backing_memory_byte_size") {
6601 Parser.eatToEndOfStatement();
6605 SmallString<40> ErrStr;
6606 raw_svector_ostream Err(ErrStr);
6607 if (!
C.ParseKernelCodeT(
ID, getParser(), Err)) {
6608 return TokError(Err.
str());
6612 if (
ID ==
"enable_wavefront_size32") {
6615 return TokError(
"enable_wavefront_size32=1 is only allowed on GFX10+");
6617 return TokError(
"enable_wavefront_size32=1 requires +WavefrontSize32");
6620 return TokError(
"enable_wavefront_size32=0 requires +WavefrontSize64");
6624 if (
ID ==
"wavefront_size") {
6625 if (
C.wavefront_size == 5) {
6627 return TokError(
"wavefront_size=5 is only allowed on GFX10+");
6629 return TokError(
"wavefront_size=5 requires +WavefrontSize32");
6630 }
else if (
C.wavefront_size == 6) {
6632 return TokError(
"wavefront_size=6 requires +WavefrontSize64");
6639bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
6640 AMDGPUMCKernelCodeT KernelCode;
6649 if (!parseId(
ID,
"expected value identifier or .end_amd_kernel_code_t"))
6652 if (
ID ==
".end_amd_kernel_code_t")
6655 if (ParseAMDKernelCodeTValue(
ID, KernelCode))
6660 getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
6665bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
6666 StringRef KernelName;
6667 if (!parseId(KernelName,
"expected symbol name"))
6670 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
6677bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6678 if (!getSTI().getTargetTriple().isAMDGCN()) {
6679 return Error(getLoc(),
6680 ".amd_amdgpu_isa directive is not available on non-amdgcn "
6684 auto TargetIDDirective = getLexer().getTok().getStringContents();
6685 if (getTargetStreamer().getTargetID()->
toString() != TargetIDDirective)
6686 return Error(getParser().getTok().getLoc(),
"target id must match options");
6688 getTargetStreamer().EmitISAVersion();
6694bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6697 std::string HSAMetadataString;
6702 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6703 return Error(getLoc(),
"invalid HSA metadata");
6710bool AMDGPUAsmParser::ParseToEndDirective(
const char *AssemblerDirectiveBegin,
6711 const char *AssemblerDirectiveEnd,
6712 std::string &CollectString) {
6714 raw_string_ostream CollectStream(CollectString);
6716 getLexer().setSkipSpace(
false);
6718 bool FoundEnd =
false;
6721 CollectStream << getTokenStr();
6725 if (trySkipId(AssemblerDirectiveEnd)) {
6730 CollectStream << Parser.parseStringToEndOfStatement()
6731 <<
getContext().getAsmInfo().getSeparatorString();
6733 Parser.eatToEndOfStatement();
6736 getLexer().setSkipSpace(
true);
6739 return TokError(Twine(
"expected directive ") +
6740 Twine(AssemblerDirectiveEnd) + Twine(
" not found"));
6747bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6753 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6754 if (!PALMetadata->setFromString(
String))
6755 return Error(getLoc(),
"invalid PAL metadata");
6760bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6762 return Error(getLoc(),
6764 "not available on non-amdpal OSes")).str());
6767 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6768 PALMetadata->setLegacy();
6771 if (ParseAsAbsoluteExpression(
Key)) {
6772 return TokError(Twine(
"invalid value in ") +
6776 return TokError(Twine(
"expected an even number of values in ") +
6779 if (ParseAsAbsoluteExpression(
Value)) {
6780 return TokError(Twine(
"invalid value in ") +
6783 PALMetadata->setRegister(
Key,
Value);
6792bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6793 if (getParser().checkForValidSection())
6797 SMLoc NameLoc = getLoc();
6798 if (getParser().parseIdentifier(Name))
6799 return TokError(
"expected identifier in directive");
6802 if (getParser().parseComma())
6808 SMLoc SizeLoc = getLoc();
6809 if (getParser().parseAbsoluteExpression(
Size))
6812 return Error(SizeLoc,
"size must be non-negative");
6813 if (
Size > LocalMemorySize)
6814 return Error(SizeLoc,
"size is too large");
6816 int64_t Alignment = 4;
6818 SMLoc AlignLoc = getLoc();
6819 if (getParser().parseAbsoluteExpression(Alignment))
6822 return Error(AlignLoc,
"alignment must be a power of two");
6827 if (Alignment >= 1u << 31)
6828 return Error(AlignLoc,
"alignment is too large");
6834 Symbol->redefineIfPossible();
6835 if (!
Symbol->isUndefined())
6836 return Error(NameLoc,
"invalid symbol redefinition");
6838 getTargetStreamer().emitAMDGPULDS(Symbol,
Size,
Align(Alignment));
6842bool AMDGPUAsmParser::ParseDirectiveAMDGPUInfo() {
6843 if (getParser().checkForValidSection())
6847 if (getParser().parseIdentifier(FuncName))
6848 return TokError(
"expected symbol name after .amdgpu_info");
6851 AMDGPU::InfoSectionData ParsedInfoData;
6852 AMDGPU::FuncInfo FI;
6854 bool HasScalarAttrs =
false;
6861 SMLoc IDLoc = getLoc();
6862 if (!parseId(
ID,
"expected directive or .end_amdgpu_info"))
6865 if (
ID ==
".end_amdgpu_info")
6873 return Error(IDLoc,
"unknown .amdgpu_info directive '" +
ID +
"'");
6875 if (Dir ==
"flags") {
6877 if (getParser().parseAbsoluteExpression(Val))
6880 FI.
UsesVCC = !!(
Flags & AMDGPU::FuncInfoFlags::FUNC_USES_VCC);
6882 !!(
Flags & AMDGPU::FuncInfoFlags::FUNC_USES_FLAT_SCRATCH);
6884 HasScalarAttrs =
true;
6885 }
else if (Dir ==
"num_sgpr") {
6887 if (getParser().parseAbsoluteExpression(Val))
6889 FI.
NumSGPR =
static_cast<uint32_t
>(Val);
6890 HasScalarAttrs =
true;
6891 }
else if (Dir ==
"num_vgpr") {
6893 if (getParser().parseAbsoluteExpression(Val))
6896 HasScalarAttrs =
true;
6897 }
else if (Dir ==
"num_agpr") {
6899 if (getParser().parseAbsoluteExpression(Val))
6902 HasScalarAttrs =
true;
6903 }
else if (Dir ==
"private_segment_size") {
6905 if (getParser().parseAbsoluteExpression(Val))
6908 HasScalarAttrs =
true;
6909 }
else if (Dir ==
"use") {
6911 if (getParser().parseIdentifier(ResName))
6912 return TokError(
"expected resource symbol for .amdgpu_use");
6913 ParsedInfoData.
Uses.push_back(
6914 {FuncSym,
getContext().getOrCreateSymbol(ResName)});
6915 }
else if (Dir ==
"call") {
6917 if (getParser().parseIdentifier(DstName))
6918 return TokError(
"expected callee symbol for .amdgpu_call");
6919 ParsedInfoData.
Calls.push_back(
6920 {FuncSym,
getContext().getOrCreateSymbol(DstName)});
6921 }
else if (Dir ==
"indirect_call") {
6923 if (getParser().parseEscapedString(TypeId))
6924 return TokError(
"expected type ID string for .amdgpu_indirect_call");
6925 ParsedInfoData.
IndirectCalls.push_back({FuncSym, std::move(TypeId)});
6926 }
else if (Dir ==
"typeid") {
6928 if (getParser().parseEscapedString(TypeId))
6929 return TokError(
"expected type ID string for .amdgpu_typeid");
6930 ParsedInfoData.
TypeIds.push_back({FuncSym, std::move(TypeId)});
6932 return Error(IDLoc,
"unknown .amdgpu_info directive '" +
ID +
"'");
6937 ParsedInfoData.
Funcs.push_back(std::move(FI));
6939 AMDGPU::InfoSectionData &
Data = InfoData ? *InfoData : InfoData.emplace();
6940 for (AMDGPU::FuncInfo &Func : ParsedInfoData.
Funcs)
6941 Data.Funcs.push_back(std::move(Func));
6942 for (std::pair<MCSymbol *, MCSymbol *> &Use : ParsedInfoData.
Uses)
6943 Data.Uses.push_back(Use);
6944 for (std::pair<MCSymbol *, MCSymbol *> &
Call : ParsedInfoData.
Calls)
6946 for (std::pair<MCSymbol *, std::string> &
IndirectCall :
6949 for (std::pair<MCSymbol *, std::string> &TypeId : ParsedInfoData.
TypeIds)
6950 Data.TypeIds.push_back(std::move(TypeId));
6955void AMDGPUAsmParser::onEndOfFile() {
6957 getTargetStreamer().emitAMDGPUInfo(*InfoData);
6960bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6961 StringRef IDVal = DirectiveID.
getString();
6964 if (IDVal ==
".amdhsa_kernel")
6965 return ParseDirectiveAMDHSAKernel();
6967 if (IDVal ==
".amdhsa_code_object_version")
6968 return ParseDirectiveAMDHSACodeObjectVersion();
6972 return ParseDirectiveHSAMetadata();
6974 if (IDVal ==
".amd_kernel_code_t")
6975 return ParseDirectiveAMDKernelCodeT();
6977 if (IDVal ==
".amdgpu_hsa_kernel")
6978 return ParseDirectiveAMDGPUHsaKernel();
6980 if (IDVal ==
".amd_amdgpu_isa")
6981 return ParseDirectiveISAVersion();
6985 Twine(
" directive is "
6986 "not available on non-amdhsa OSes"))
6991 if (IDVal ==
".amdgcn_target")
6992 return ParseDirectiveAMDGCNTarget();
6994 if (IDVal ==
".amdgpu_lds")
6995 return ParseDirectiveAMDGPULDS();
6997 if (IDVal ==
".amdgpu_info")
6998 return ParseDirectiveAMDGPUInfo();
7001 return ParseDirectivePALMetadataBegin();
7004 return ParseDirectivePALMetadata();
7009bool AMDGPUAsmParser::subtargetHasRegister(
const MCRegisterInfo &MRI,
7016 return hasSGPR104_SGPR105();
7019 case SRC_SHARED_BASE_LO:
7020 case SRC_SHARED_BASE:
7021 case SRC_SHARED_LIMIT_LO:
7022 case SRC_SHARED_LIMIT:
7023 case SRC_PRIVATE_BASE_LO:
7024 case SRC_PRIVATE_BASE:
7025 case SRC_PRIVATE_LIMIT_LO:
7026 case SRC_PRIVATE_LIMIT:
7028 case SRC_FLAT_SCRATCH_BASE_LO:
7029 case SRC_FLAT_SCRATCH_BASE_HI:
7030 return hasGloballyAddressableScratch();
7031 case SRC_POPS_EXITING_WAVE_ID:
7043 return (
isVI() ||
isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
7073 return hasSGPR102_SGPR103();
7078ParseStatus AMDGPUAsmParser::parseOperand(
OperandVector &Operands,
7081 ParseStatus Res = parseVOPD(Operands);
7086 Res = MatchOperandParserImpl(Operands, Mnemonic);
7098 SMLoc LBraceLoc = getLoc();
7103 auto Loc = getLoc();
7104 Res = parseReg(Operands);
7106 Error(Loc,
"expected a register");
7110 RBraceLoc = getLoc();
7115 "expected a comma or a closing square bracket"))
7119 if (Operands.
size() - Prefix > 1) {
7121 AMDGPUOperand::CreateToken(
this,
"[", LBraceLoc));
7122 Operands.
push_back(AMDGPUOperand::CreateToken(
this,
"]", RBraceLoc));
7128 return parseRegOrImm(Operands);
7131StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
7133 setForcedEncodingSize(0);
7134 setForcedDPP(
false);
7135 setForcedSDWA(
false);
7137 if (
Name.consume_back(
"_e64_dpp")) {
7139 setForcedEncodingSize(64);
7142 if (
Name.consume_back(
"_e64")) {
7143 setForcedEncodingSize(64);
7146 if (
Name.consume_back(
"_e32")) {
7147 setForcedEncodingSize(32);
7150 if (
Name.consume_back(
"_dpp")) {
7154 if (
Name.consume_back(
"_sdwa")) {
7155 setForcedSDWA(
true);
7163 unsigned VariantID);
7169 Name = parseMnemonicSuffix(Name);
7175 Operands.
push_back(AMDGPUOperand::CreateToken(
this, Name, NameLoc));
7177 bool IsMIMG = Name.starts_with(
"image_");
7180 OperandMode
Mode = OperandMode_Default;
7182 Mode = OperandMode_NSA;
7186 checkUnsupportedInstruction(Name, NameLoc);
7187 if (!Parser.hasPendingError()) {
7190 :
"not a valid operand.";
7191 Error(getLoc(), Msg);
7210ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
7213 if (!trySkipId(Name))
7216 Operands.
push_back(AMDGPUOperand::CreateToken(
this, Name, S));
7220ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
const char *Prefix,
7229ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
7230 const char *Prefix,
OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
7231 std::function<
bool(int64_t &)> ConvertResult) {
7235 ParseStatus Res = parseIntWithPrefix(Prefix,
Value);
7239 if (ConvertResult && !ConvertResult(
Value)) {
7240 Error(S,
"invalid " + StringRef(Prefix) +
" value.");
7243 Operands.
push_back(AMDGPUOperand::CreateImm(
this,
Value, S, ImmTy));
7247ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
7248 const char *Prefix,
OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
7249 bool (*ConvertResult)(int64_t &)) {
7258 const unsigned MaxSize = 4;
7262 for (
int I = 0; ; ++
I) {
7264 SMLoc Loc = getLoc();
7268 if (
Op != 0 &&
Op != 1)
7269 return Error(Loc,
"invalid " + StringRef(Prefix) +
" value.");
7276 if (
I + 1 == MaxSize)
7277 return Error(getLoc(),
"expected a closing square bracket");
7283 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Val, S, ImmTy));
7287ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
7289 AMDGPUOperand::ImmTy ImmTy,
7290 bool IgnoreNegative) {
7294 if (trySkipId(Name)) {
7296 }
else if (trySkipId(
"no", Name)) {
7305 return Error(S,
"r128 modifier is not supported on this GPU");
7306 if (Name ==
"a16" && !
hasA16())
7307 return Error(S,
"a16 modifier is not supported on this GPU");
7309 if (Bit == 0 && Name ==
"gds") {
7310 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).
getToken();
7312 return Error(S,
"nogds is not allowed");
7315 if (
isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
7316 ImmTy = AMDGPUOperand::ImmTyR128A16;
7318 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Bit, S, ImmTy));
7322unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
7323 bool &Disabling)
const {
7324 Disabling =
Id.consume_front(
"no");
7327 return StringSwitch<unsigned>(Id)
7334 return StringSwitch<unsigned>(Id)
7342ParseStatus AMDGPUAsmParser::parseCPol(
OperandVector &Operands) {
7344 SMLoc StringLoc = getLoc();
7346 int64_t CPolVal = 0;
7355 ResTH = parseTH(Operands, TH);
7366 ResScope = parseScope(Operands, Scope);
7379 if (trySkipId(
"nv")) {
7383 }
else if (trySkipId(
"no",
"nv")) {
7390 if (trySkipId(
"scale_offset")) {
7394 }
else if (trySkipId(
"no",
"scale_offset")) {
7407 Operands.
push_back(AMDGPUOperand::CreateImm(
this, CPolVal, StringLoc,
7408 AMDGPUOperand::ImmTyCPol));
7412 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).
getToken();
7413 SMLoc OpLoc = getLoc();
7414 unsigned Enabled = 0, Seen = 0;
7418 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
7425 return Error(S,
"dlc modifier is not supported on this GPU");
7428 return Error(S,
"scc modifier is not supported on this GPU");
7431 return Error(S,
"duplicate cache policy modifier");
7443 AMDGPUOperand::CreateImm(
this,
Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
7447ParseStatus AMDGPUAsmParser::parseScope(
OperandVector &Operands,
7452 ParseStatus Res = parseStringOrIntWithPrefix(
7453 Operands,
"scope", {
"SCOPE_CU",
"SCOPE_SE",
"SCOPE_DEV",
"SCOPE_SYS"},
7462ParseStatus AMDGPUAsmParser::parseTH(
OperandVector &Operands, int64_t &TH) {
7467 ParseStatus Res = parseStringWithPrefix(
"th",
Value, StringLoc);
7471 if (
Value ==
"TH_DEFAULT")
7473 else if (
Value ==
"TH_STORE_LU" ||
Value ==
"TH_LOAD_WB" ||
7474 Value ==
"TH_LOAD_NT_WB") {
7475 return Error(StringLoc,
"invalid th value");
7476 }
else if (
Value.consume_front(
"TH_ATOMIC_")) {
7478 }
else if (
Value.consume_front(
"TH_LOAD_")) {
7480 }
else if (
Value.consume_front(
"TH_STORE_")) {
7483 return Error(StringLoc,
"invalid th value");
7486 if (
Value ==
"BYPASS")
7491 TH |= StringSwitch<int64_t>(
Value)
7501 .Default(0xffffffff);
7503 TH |= StringSwitch<int64_t>(
Value)
7514 .Default(0xffffffff);
7517 if (TH == 0xffffffff)
7518 return Error(StringLoc,
"invalid th value");
7525 AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx,
7526 AMDGPUOperand::ImmTy ImmT, int64_t
Default = 0,
7527 std::optional<unsigned> InsertAt = std::nullopt) {
7528 auto i = OptionalIdx.find(ImmT);
7529 if (i != OptionalIdx.end()) {
7530 unsigned Idx = i->second;
7531 const AMDGPUOperand &
Op =
7532 static_cast<const AMDGPUOperand &
>(*Operands[Idx]);
7536 Op.addImmOperands(Inst, 1);
7538 if (InsertAt.has_value())
7545ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
7551 StringLoc = getLoc();
7556ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7557 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7562 SMLoc StringLoc = getLoc();
7566 Value = getTokenStr();
7570 if (
Value == Ids[IntVal])
7575 if (IntVal < 0 || IntVal >= (int64_t)Ids.
size())
7576 return Error(StringLoc,
"invalid " + Twine(Name) +
" value");
7581ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7582 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7583 AMDGPUOperand::ImmTy
Type) {
7587 ParseStatus Res = parseStringOrIntWithPrefix(Operands, Name, Ids, IntVal);
7589 Operands.
push_back(AMDGPUOperand::CreateImm(
this, IntVal, S,
Type));
7598bool AMDGPUAsmParser::tryParseFmt(
const char *Pref,
7602 SMLoc Loc = getLoc();
7604 auto Res = parseIntWithPrefix(Pref, Val);
7610 if (Val < 0 || Val > MaxVal) {
7611 Error(Loc, Twine(
"out of range ", StringRef(Pref)));
7619ParseStatus AMDGPUAsmParser::tryParseIndexKey(
OperandVector &Operands,
7620 AMDGPUOperand::ImmTy ImmTy) {
7621 const char *Pref =
"index_key";
7623 SMLoc Loc = getLoc();
7624 auto Res = parseIntWithPrefix(Pref, ImmVal);
7628 if ((ImmTy == AMDGPUOperand::ImmTyIndexKey16bit ||
7629 ImmTy == AMDGPUOperand::ImmTyIndexKey32bit) &&
7630 (ImmVal < 0 || ImmVal > 1))
7631 return Error(Loc, Twine(
"out of range ", StringRef(Pref)));
7633 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
7634 return Error(Loc, Twine(
"out of range ", StringRef(Pref)));
7636 Operands.
push_back(AMDGPUOperand::CreateImm(
this, ImmVal, Loc, ImmTy));
7640ParseStatus AMDGPUAsmParser::parseIndexKey8bit(
OperandVector &Operands) {
7641 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
7644ParseStatus AMDGPUAsmParser::parseIndexKey16bit(
OperandVector &Operands) {
7645 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
7648ParseStatus AMDGPUAsmParser::parseIndexKey32bit(
OperandVector &Operands) {
7649 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey32bit);
7652ParseStatus AMDGPUAsmParser::tryParseMatrixFMT(
OperandVector &Operands,
7654 AMDGPUOperand::ImmTy
Type) {
7659ParseStatus AMDGPUAsmParser::parseMatrixAFMT(
OperandVector &Operands) {
7660 return tryParseMatrixFMT(Operands,
"matrix_a_fmt",
7661 AMDGPUOperand::ImmTyMatrixAFMT);
7664ParseStatus AMDGPUAsmParser::parseMatrixBFMT(
OperandVector &Operands) {
7665 return tryParseMatrixFMT(Operands,
"matrix_b_fmt",
7666 AMDGPUOperand::ImmTyMatrixBFMT);
7669ParseStatus AMDGPUAsmParser::tryParseMatrixScale(
OperandVector &Operands,
7671 AMDGPUOperand::ImmTy
Type) {
7676ParseStatus AMDGPUAsmParser::parseMatrixAScale(
OperandVector &Operands) {
7677 return tryParseMatrixScale(Operands,
"matrix_a_scale",
7678 AMDGPUOperand::ImmTyMatrixAScale);
7681ParseStatus AMDGPUAsmParser::parseMatrixBScale(
OperandVector &Operands) {
7682 return tryParseMatrixScale(Operands,
"matrix_b_scale",
7683 AMDGPUOperand::ImmTyMatrixBScale);
7686ParseStatus AMDGPUAsmParser::tryParseMatrixScaleFmt(
OperandVector &Operands,
7688 AMDGPUOperand::ImmTy
Type) {
7693ParseStatus AMDGPUAsmParser::parseMatrixAScaleFmt(
OperandVector &Operands) {
7694 return tryParseMatrixScaleFmt(Operands,
"matrix_a_scale_fmt",
7695 AMDGPUOperand::ImmTyMatrixAScaleFmt);
7698ParseStatus AMDGPUAsmParser::parseMatrixBScaleFmt(
OperandVector &Operands) {
7699 return tryParseMatrixScaleFmt(Operands,
"matrix_b_scale_fmt",
7700 AMDGPUOperand::ImmTyMatrixBScaleFmt);
7705ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &
Format) {
7706 using namespace llvm::AMDGPU::MTBUFFormat;
7712 for (
int I = 0;
I < 2; ++
I) {
7713 if (Dfmt == DFMT_UNDEF && !tryParseFmt(
"dfmt", DFMT_MAX, Dfmt))
7716 if (Nfmt == NFMT_UNDEF && !tryParseFmt(
"nfmt", NFMT_MAX, Nfmt))
7721 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
7727 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
7730 Dfmt = (Dfmt ==
DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7731 Nfmt = (Nfmt ==
NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7737ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &
Format) {
7738 using namespace llvm::AMDGPU::MTBUFFormat;
7742 if (!tryParseFmt(
"format", UFMT_MAX, Fmt))
7745 if (Fmt == UFMT_UNDEF)
7752bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
7754 StringRef FormatStr,
7756 using namespace llvm::AMDGPU::MTBUFFormat;
7760 if (
Format != DFMT_UNDEF) {
7766 if (
Format != NFMT_UNDEF) {
7771 Error(Loc,
"unsupported format");
7775ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
7778 using namespace llvm::AMDGPU::MTBUFFormat;
7782 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
7787 SMLoc Loc = getLoc();
7788 if (!parseId(Str,
"expected a format string") ||
7789 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
7791 if (Dfmt == DFMT_UNDEF)
7792 return Error(Loc,
"duplicate numeric format");
7793 if (Nfmt == NFMT_UNDEF)
7794 return Error(Loc,
"duplicate data format");
7797 Dfmt = (Dfmt ==
DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7798 Nfmt = (Nfmt ==
NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7802 if (Ufmt == UFMT_UNDEF)
7803 return Error(FormatLoc,
"unsupported format");
7812ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
7815 using namespace llvm::AMDGPU::MTBUFFormat;
7818 if (Id == UFMT_UNDEF)
7822 return Error(Loc,
"unified format is not supported on this GPU");
7828ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &
Format) {
7829 using namespace llvm::AMDGPU::MTBUFFormat;
7830 SMLoc Loc = getLoc();
7835 return Error(Loc,
"out of range format");
7840ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &
Format) {
7841 using namespace llvm::AMDGPU::MTBUFFormat;
7847 StringRef FormatStr;
7848 SMLoc Loc = getLoc();
7849 if (!parseId(FormatStr,
"expected a format string"))
7852 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc,
Format);
7854 Res = parseSymbolicSplitFormat(FormatStr, Loc,
Format);
7864 return parseNumericFormat(
Format);
7867ParseStatus AMDGPUAsmParser::parseFORMAT(
OperandVector &Operands) {
7868 using namespace llvm::AMDGPU::MTBUFFormat;
7872 SMLoc Loc = getLoc();
7882 AMDGPUOperand::CreateImm(
this,
Format, Loc, AMDGPUOperand::ImmTyFORMAT));
7894 Res = parseRegOrImm(Operands);
7901 Res = parseSymbolicOrNumericFormat(
Format);
7906 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*Operands[
Size - 2]);
7907 assert(
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
7914 return Error(getLoc(),
"duplicate format");
7918ParseStatus AMDGPUAsmParser::parseFlatOffset(
OperandVector &Operands) {
7920 parseIntWithPrefix(
"offset", Operands, AMDGPUOperand::ImmTyOffset);
7922 Res = parseIntWithPrefix(
"inst_offset", Operands,
7923 AMDGPUOperand::ImmTyInstOffset);
7928ParseStatus AMDGPUAsmParser::parseR128A16(
OperandVector &Operands) {
7930 parseNamedBit(
"r128", Operands, AMDGPUOperand::ImmTyR128A16);
7932 Res = parseNamedBit(
"a16", Operands, AMDGPUOperand::ImmTyA16);
7936ParseStatus AMDGPUAsmParser::parseBLGP(
OperandVector &Operands) {
7938 parseIntWithPrefix(
"blgp", Operands, AMDGPUOperand::ImmTyBLGP);
7941 parseOperandArrayWithPrefix(
"neg", Operands, AMDGPUOperand::ImmTyBLGP);
7950void AMDGPUAsmParser::cvtExp(MCInst &Inst,
const OperandVector &Operands) {
7951 OptionalImmIndexMap OptionalIdx;
7953 unsigned OperandIdx[4];
7954 unsigned EnMask = 0;
7957 for (
unsigned i = 1, e = Operands.
size(); i != e; ++i) {
7958 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
7963 OperandIdx[SrcIdx] = Inst.
size();
7964 Op.addRegOperands(Inst, 1);
7971 OperandIdx[SrcIdx] = Inst.
size();
7977 if (
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7978 Op.addImmOperands(Inst, 1);
7982 if (
Op.isToken() && (
Op.getToken() ==
"done" ||
Op.getToken() ==
"row_en"))
7986 OptionalIdx[
Op.getImmTy()] = i;
7992 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7999 for (
auto i = 0; i < SrcIdx; ++i) {
8001 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
8026 IntVal =
encode(ISA, IntVal, CntVal);
8027 if (CntVal !=
decode(ISA, IntVal)) {
8029 IntVal =
encode(ISA, IntVal, -1);
8037bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
8039 SMLoc CntLoc = getLoc();
8040 StringRef CntName = getTokenStr();
8047 SMLoc ValLoc = getLoc();
8056 if (CntName ==
"vmcnt" || CntName ==
"vmcnt_sat") {
8058 }
else if (CntName ==
"expcnt" || CntName ==
"expcnt_sat") {
8060 }
else if (CntName ==
"lgkmcnt" || CntName ==
"lgkmcnt_sat") {
8063 Error(CntLoc,
"invalid counter name " + CntName);
8068 Error(ValLoc,
"too large value for " + CntName);
8077 Error(getLoc(),
"expected a counter name");
8085ParseStatus AMDGPUAsmParser::parseSWaitCnt(
OperandVector &Operands) {
8092 if (!parseCnt(Waitcnt))
8100 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Waitcnt, S));
8104bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
8105 SMLoc FieldLoc = getLoc();
8106 StringRef FieldName = getTokenStr();
8111 SMLoc ValueLoc = getLoc();
8118 if (FieldName ==
"instid0") {
8120 }
else if (FieldName ==
"instskip") {
8122 }
else if (FieldName ==
"instid1") {
8125 Error(FieldLoc,
"invalid field name " + FieldName);
8144 .Case(
"VALU_DEP_1", 1)
8145 .Case(
"VALU_DEP_2", 2)
8146 .Case(
"VALU_DEP_3", 3)
8147 .Case(
"VALU_DEP_4", 4)
8148 .Case(
"TRANS32_DEP_1", 5)
8149 .Case(
"TRANS32_DEP_2", 6)
8150 .Case(
"TRANS32_DEP_3", 7)
8151 .Case(
"FMA_ACCUM_CYCLE_1", 8)
8152 .Case(
"SALU_CYCLE_1", 9)
8153 .Case(
"SALU_CYCLE_2", 10)
8154 .Case(
"SALU_CYCLE_3", 11)
8162 Delay |=
Value << Shift;
8166ParseStatus AMDGPUAsmParser::parseSDelayALU(
OperandVector &Operands) {
8172 if (!parseDelay(Delay))
8180 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Delay, S));
8185AMDGPUOperand::isSWaitCnt()
const {
8189bool AMDGPUOperand::isSDelayALU()
const {
return isImm(); }
8195void AMDGPUAsmParser::depCtrError(SMLoc Loc,
int ErrorId,
8196 StringRef DepCtrName) {
8199 Error(Loc, Twine(
"invalid counter name ", DepCtrName));
8202 Error(Loc, Twine(DepCtrName,
" is not supported on this GPU"));
8205 Error(Loc, Twine(
"duplicate counter name ", DepCtrName));
8208 Error(Loc, Twine(
"invalid value for ", DepCtrName));
8215bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr,
unsigned &UsedOprMask) {
8217 using namespace llvm::AMDGPU::DepCtr;
8219 SMLoc DepCtrLoc = getLoc();
8220 StringRef DepCtrName = getTokenStr();
8230 unsigned PrevOprMask = UsedOprMask;
8231 int CntVal =
encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
8234 depCtrError(DepCtrLoc, CntVal, DepCtrName);
8243 Error(getLoc(),
"expected a counter name");
8248 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
8249 DepCtr = (DepCtr & ~CntValMask) | CntVal;
8253ParseStatus AMDGPUAsmParser::parseDepCtr(
OperandVector &Operands) {
8254 using namespace llvm::AMDGPU::DepCtr;
8257 SMLoc Loc = getLoc();
8260 unsigned UsedOprMask = 0;
8262 if (!parseDepCtr(DepCtr, UsedOprMask))
8270 Operands.
push_back(AMDGPUOperand::CreateImm(
this, DepCtr, Loc));
8274bool AMDGPUOperand::isDepCtr()
const {
return isS16Imm(); }
8280ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
8282 OperandInfoTy &Width) {
8283 using namespace llvm::AMDGPU::Hwreg;
8289 HwReg.Loc = getLoc();
8292 HwReg.IsSymbolic =
true;
8294 }
else if (!
parseExpr(HwReg.Val,
"a register name")) {
8302 if (!skipToken(
AsmToken::Comma,
"expected a comma or a closing parenthesis"))
8312 Width.Loc = getLoc();
8320ParseStatus AMDGPUAsmParser::parseHwreg(
OperandVector &Operands) {
8321 using namespace llvm::AMDGPU::Hwreg;
8324 SMLoc Loc = getLoc();
8326 StructuredOpField HwReg(
"id",
"hardware register", HwregId::Width,
8328 StructuredOpField
Offset(
"offset",
"bit offset", HwregOffset::Width,
8329 HwregOffset::Default);
8330 struct : StructuredOpField {
8331 using StructuredOpField::StructuredOpField;
8332 bool validate(AMDGPUAsmParser &Parser)
const override {
8334 return Error(Parser,
"only values from 1 to 32 are legal");
8337 } Width(
"size",
"bitfield width", HwregSize::Width, HwregSize::Default);
8338 ParseStatus Res = parseStructuredOpFields({&HwReg, &
Offset, &Width});
8341 Res = parseHwregFunc(HwReg,
Offset, Width);
8344 if (!validateStructuredOpFields({&HwReg, &
Offset, &Width}))
8346 ImmVal = HwregEncoding::encode(HwReg.Val,
Offset.Val, Width.Val);
8350 parseExpr(ImmVal,
"a hwreg macro, structured immediate"))
8357 return Error(Loc,
"invalid immediate: only 16-bit values are legal");
8359 AMDGPUOperand::CreateImm(
this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
8363bool AMDGPUOperand::isHwreg()
const {
8364 return isImmTy(ImmTyHwreg);
8372AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
8374 OperandInfoTy &Stream) {
8375 using namespace llvm::AMDGPU::SendMsg;
8380 Msg.IsSymbolic =
true;
8382 }
else if (!
parseExpr(Msg.Val,
"a message name")) {
8387 Op.IsDefined =
true;
8390 (
Op.Val =
getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
8393 }
else if (!
parseExpr(
Op.Val,
"an operation name")) {
8398 Stream.IsDefined =
true;
8399 Stream.Loc = getLoc();
8409AMDGPUAsmParser::validateSendMsg(
const OperandInfoTy &Msg,
8410 const OperandInfoTy &
Op,
8411 const OperandInfoTy &Stream) {
8412 using namespace llvm::AMDGPU::SendMsg;
8417 bool Strict = Msg.IsSymbolic;
8421 Error(Msg.Loc,
"specified message id is not supported on this GPU");
8426 Error(Msg.Loc,
"invalid message id");
8432 Error(
Op.Loc,
"message does not support operations");
8434 Error(Msg.Loc,
"missing message operation");
8440 Error(
Op.Loc,
"specified operation id is not supported on this GPU");
8442 Error(
Op.Loc,
"invalid operation id");
8447 Error(Stream.Loc,
"message operation does not support streams");
8451 Error(Stream.Loc,
"invalid message stream id");
8457ParseStatus AMDGPUAsmParser::parseSendMsg(
OperandVector &Operands) {
8458 using namespace llvm::AMDGPU::SendMsg;
8461 SMLoc Loc = getLoc();
8465 OperandInfoTy
Op(OP_NONE_);
8466 OperandInfoTy Stream(STREAM_ID_NONE_);
8467 if (parseSendMsgBody(Msg,
Op, Stream) &&
8468 validateSendMsg(Msg,
Op, Stream)) {
8473 }
else if (
parseExpr(ImmVal,
"a sendmsg macro")) {
8475 return Error(Loc,
"invalid immediate: only 16-bit values are legal");
8480 Operands.
push_back(AMDGPUOperand::CreateImm(
this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
8484bool AMDGPUOperand::isSendMsg()
const {
8485 return isImmTy(ImmTySendMsg);
8488ParseStatus AMDGPUAsmParser::parseWaitEvent(
OperandVector &Operands) {
8489 using namespace llvm::AMDGPU::WaitEvent;
8491 SMLoc Loc = getLoc();
8494 StructuredOpField DontWaitExportReady(
"dont_wait_export_ready",
"bit value",
8496 StructuredOpField ExportReady(
"export_ready",
"bit value", 1, 0);
8498 StructuredOpField *TargetBitfield =
8499 isGFX11() ? &DontWaitExportReady : &ExportReady;
8501 ParseStatus Res = parseStructuredOpFields({TargetBitfield});
8505 if (!validateStructuredOpFields({TargetBitfield}))
8507 ImmVal = TargetBitfield->Val;
8514 return Error(Loc,
"invalid immediate: only 16-bit values are legal");
8516 Operands.
push_back(AMDGPUOperand::CreateImm(
this, ImmVal, Loc,
8517 AMDGPUOperand::ImmTyWaitEvent));
8521bool AMDGPUOperand::isWaitEvent()
const {
return isImmTy(ImmTyWaitEvent); }
8527ParseStatus AMDGPUAsmParser::parseInterpSlot(
OperandVector &Operands) {
8534 int Slot = StringSwitch<int>(Str)
8541 return Error(S,
"invalid interpolation slot");
8543 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Slot, S,
8544 AMDGPUOperand::ImmTyInterpSlot));
8548ParseStatus AMDGPUAsmParser::parseInterpAttr(
OperandVector &Operands) {
8555 if (!Str.starts_with(
"attr"))
8556 return Error(S,
"invalid interpolation attribute");
8558 StringRef Chan = Str.take_back(2);
8559 int AttrChan = StringSwitch<int>(Chan)
8566 return Error(S,
"invalid or missing interpolation attribute channel");
8568 Str = Str.drop_back(2).drop_front(4);
8571 if (Str.getAsInteger(10, Attr))
8572 return Error(S,
"invalid or missing interpolation attribute number");
8575 return Error(S,
"out of bounds interpolation attribute number");
8579 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Attr, S,
8580 AMDGPUOperand::ImmTyInterpAttr));
8581 Operands.
push_back(AMDGPUOperand::CreateImm(
8582 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
8590ParseStatus AMDGPUAsmParser::parseExpTgt(
OperandVector &Operands) {
8591 using namespace llvm::AMDGPU::Exp;
8601 return Error(S, (Id == ET_INVALID)
8602 ?
"invalid exp target"
8603 :
"exp target is not supported on this GPU");
8605 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Id, S,
8606 AMDGPUOperand::ImmTyExpTgt));
8615AMDGPUAsmParser::isId(
const AsmToken &Token,
const StringRef Id)
const {
8620AMDGPUAsmParser::isId(
const StringRef Id)
const {
8626 return getTokenKind() ==
Kind;
8629StringRef AMDGPUAsmParser::getId()
const {
8634AMDGPUAsmParser::trySkipId(
const StringRef Id) {
8643AMDGPUAsmParser::trySkipId(
const StringRef Pref,
const StringRef Id) {
8645 StringRef Tok = getTokenStr();
8656 if (isId(Id) && peekToken().is(Kind)) {
8666 if (isToken(Kind)) {
8675 const StringRef ErrMsg) {
8676 if (!trySkipToken(Kind)) {
8677 Error(getLoc(), ErrMsg);
8684AMDGPUAsmParser::parseExpr(int64_t &
Imm, StringRef Expected) {
8688 if (Parser.parseExpression(Expr))
8691 if (Expr->evaluateAsAbsolute(
Imm))
8694 if (Expected.empty()) {
8695 Error(S,
"expected absolute expression");
8697 Error(S, Twine(
"expected ", Expected) +
8698 Twine(
" or an absolute expression"));
8708 if (Parser.parseExpression(Expr))
8712 if (Expr->evaluateAsAbsolute(IntVal)) {
8713 Operands.
push_back(AMDGPUOperand::CreateImm(
this, IntVal, S));
8715 Operands.
push_back(AMDGPUOperand::CreateExpr(
this, Expr, S));
8721AMDGPUAsmParser::parseString(StringRef &Val,
const StringRef ErrMsg) {
8723 Val =
getToken().getStringContents();
8727 Error(getLoc(), ErrMsg);
8732AMDGPUAsmParser::parseId(StringRef &Val,
const StringRef ErrMsg) {
8734 Val = getTokenStr();
8738 if (!ErrMsg.
empty())
8739 Error(getLoc(), ErrMsg);
8744AMDGPUAsmParser::getToken()
const {
8745 return Parser.getTok();
8748AsmToken AMDGPUAsmParser::peekToken(
bool ShouldSkipSpace) {
8751 : getLexer().peekTok(ShouldSkipSpace);
8756 auto TokCount = getLexer().peekTokens(Tokens);
8758 for (
auto Idx = TokCount; Idx < Tokens.
size(); ++Idx)
8763AMDGPUAsmParser::getTokenKind()
const {
8764 return getLexer().getKind();
8768AMDGPUAsmParser::getLoc()
const {
8773AMDGPUAsmParser::getTokenStr()
const {
8778AMDGPUAsmParser::lex() {
8782const AMDGPUOperand &
8783AMDGPUAsmParser::findMCOperand(
const OperandVector &Operands,
8784 int MCOpIdx)
const {
8785 for (
const auto &
Op : Operands) {
8786 const AMDGPUOperand &TargetOp =
static_cast<AMDGPUOperand &
>(*Op);
8787 if (TargetOp.getMCOpIdx() == MCOpIdx)
8793SMLoc AMDGPUAsmParser::getInstLoc(
const OperandVector &Operands)
const {
8794 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
8798SMLoc AMDGPUAsmParser::getLaterLoc(SMLoc a, SMLoc b) {
8802SMLoc AMDGPUAsmParser::getOperandLoc(
const OperandVector &Operands,
8803 int MCOpIdx)
const {
8804 return findMCOperand(Operands, MCOpIdx).getStartLoc();
8808AMDGPUAsmParser::getOperandLoc(std::function<
bool(
const AMDGPUOperand&)>
Test,
8810 for (
unsigned i = Operands.
size() - 1; i > 0; --i) {
8811 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
8813 return Op.getStartLoc();
8815 return getInstLoc(Operands);
8819AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy
Type,
8821 auto Test = [=](
const AMDGPUOperand&
Op) {
return Op.isImmTy(
Type); };
8822 return getOperandLoc(
Test, Operands);
8836 StringRef
Id = getTokenStr();
8837 SMLoc IdLoc = getLoc();
8843 find_if(Fields, [Id](StructuredOpField *
F) {
return F->Id ==
Id; });
8844 if (
I == Fields.
end())
8845 return Error(IdLoc,
"unknown field");
8846 if ((*I)->IsDefined)
8847 return Error(IdLoc,
"duplicate field");
8850 (*I)->Loc = getLoc();
8853 (*I)->IsDefined =
true;
8860bool AMDGPUAsmParser::validateStructuredOpFields(
8862 return all_of(Fields, [
this](
const StructuredOpField *
F) {
8863 return F->validate(*
this);
8874 const unsigned OrMask,
8875 const unsigned XorMask) {
8884bool AMDGPUAsmParser::parseSwizzleOperand(int64_t &
Op,
const unsigned MinVal,
8885 const unsigned MaxVal,
8886 const Twine &ErrMsg, SMLoc &Loc) {
8903AMDGPUAsmParser::parseSwizzleOperands(
const unsigned OpNum, int64_t*
Op,
8904 const unsigned MinVal,
8905 const unsigned MaxVal,
8906 const StringRef ErrMsg) {
8908 for (
unsigned i = 0; i < OpNum; ++i) {
8909 if (!parseSwizzleOperand(
Op[i], MinVal, MaxVal, ErrMsg, Loc))
8917AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &
Imm) {
8918 using namespace llvm::AMDGPU::Swizzle;
8921 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
8922 "expected a 2-bit lane id")) {
8933AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &
Imm) {
8934 using namespace llvm::AMDGPU::Swizzle;
8940 if (!parseSwizzleOperand(GroupSize,
8942 "group size must be in the interval [2,32]",
8947 Error(Loc,
"group size must be a power of two");
8950 if (parseSwizzleOperand(LaneIdx,
8952 "lane id must be in the interval [0,group size - 1]",
8961AMDGPUAsmParser::parseSwizzleReverse(int64_t &
Imm) {
8962 using namespace llvm::AMDGPU::Swizzle;
8967 if (!parseSwizzleOperand(GroupSize,
8969 "group size must be in the interval [2,32]",
8974 Error(Loc,
"group size must be a power of two");
8983AMDGPUAsmParser::parseSwizzleSwap(int64_t &
Imm) {
8984 using namespace llvm::AMDGPU::Swizzle;
8989 if (!parseSwizzleOperand(GroupSize,
8991 "group size must be in the interval [1,16]",
8996 Error(Loc,
"group size must be a power of two");
9005AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &
Imm) {
9006 using namespace llvm::AMDGPU::Swizzle;
9013 SMLoc StrLoc = getLoc();
9014 if (!parseString(Ctl)) {
9017 if (Ctl.
size() != BITMASK_WIDTH) {
9018 Error(StrLoc,
"expected a 5-character mask");
9022 unsigned AndMask = 0;
9023 unsigned OrMask = 0;
9024 unsigned XorMask = 0;
9026 for (
size_t i = 0; i < Ctl.
size(); ++i) {
9030 Error(StrLoc,
"invalid mask");
9051bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &
Imm) {
9052 using namespace llvm::AMDGPU::Swizzle;
9055 Error(getLoc(),
"FFT mode swizzle not supported on this GPU");
9061 if (!parseSwizzleOperand(Swizzle, 0, FFT_SWIZZLE_MAX,
9062 "FFT swizzle must be in the interval [0," +
9063 Twine(FFT_SWIZZLE_MAX) + Twine(
']'),
9071bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &
Imm) {
9072 using namespace llvm::AMDGPU::Swizzle;
9075 Error(getLoc(),
"Rotate mode swizzle not supported on this GPU");
9082 if (!parseSwizzleOperand(
Direction, 0, 1,
9083 "direction must be 0 (left) or 1 (right)", Loc))
9087 if (!parseSwizzleOperand(
9088 RotateSize, 0, ROTATE_MAX_SIZE,
9089 "number of threads to rotate must be in the interval [0," +
9090 Twine(ROTATE_MAX_SIZE) + Twine(
']'),
9095 (RotateSize << ROTATE_SIZE_SHIFT);
9100AMDGPUAsmParser::parseSwizzleOffset(int64_t &
Imm) {
9102 SMLoc OffsetLoc = getLoc();
9108 Error(OffsetLoc,
"expected a 16-bit offset");
9115AMDGPUAsmParser::parseSwizzleMacro(int64_t &
Imm) {
9116 using namespace llvm::AMDGPU::Swizzle;
9120 SMLoc ModeLoc = getLoc();
9123 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
9124 Ok = parseSwizzleQuadPerm(
Imm);
9125 }
else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
9126 Ok = parseSwizzleBitmaskPerm(
Imm);
9127 }
else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
9128 Ok = parseSwizzleBroadcast(
Imm);
9129 }
else if (trySkipId(IdSymbolic[ID_SWAP])) {
9130 Ok = parseSwizzleSwap(
Imm);
9131 }
else if (trySkipId(IdSymbolic[ID_REVERSE])) {
9132 Ok = parseSwizzleReverse(
Imm);
9133 }
else if (trySkipId(IdSymbolic[ID_FFT])) {
9134 Ok = parseSwizzleFFT(
Imm);
9135 }
else if (trySkipId(IdSymbolic[ID_ROTATE])) {
9136 Ok = parseSwizzleRotate(
Imm);
9138 Error(ModeLoc,
"expected a swizzle mode");
9141 return Ok && skipToken(
AsmToken::RParen,
"expected a closing parentheses");
9147ParseStatus AMDGPUAsmParser::parseSwizzle(
OperandVector &Operands) {
9151 if (trySkipId(
"offset")) {
9155 if (trySkipId(
"swizzle")) {
9156 Ok = parseSwizzleMacro(
Imm);
9158 Ok = parseSwizzleOffset(
Imm);
9162 Operands.
push_back(AMDGPUOperand::CreateImm(
this,
Imm, S, AMDGPUOperand::ImmTySwizzle));
9170AMDGPUOperand::isSwizzle()
const {
9171 return isImmTy(ImmTySwizzle);
9178int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
9180 using namespace llvm::AMDGPU::VGPRIndexMode;
9192 for (
unsigned ModeId = ID_MIN; ModeId <=
ID_MAX; ++ModeId) {
9193 if (trySkipId(IdSymbolic[ModeId])) {
9201 "expected a VGPR index mode or a closing parenthesis" :
9202 "expected a VGPR index mode");
9207 Error(S,
"duplicate VGPR index mode");
9215 "expected a comma or a closing parenthesis"))
9222ParseStatus AMDGPUAsmParser::parseGPRIdxMode(
OperandVector &Operands) {
9224 using namespace llvm::AMDGPU::VGPRIndexMode;
9230 Imm = parseGPRIdxMacro();
9234 if (getParser().parseAbsoluteExpression(
Imm))
9237 return Error(S,
"invalid immediate: only 4-bit values are legal");
9241 AMDGPUOperand::CreateImm(
this,
Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
9245bool AMDGPUOperand::isGPRIdxMode()
const {
9246 return isImmTy(ImmTyGprIdxMode);
9253ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(
OperandVector &Operands) {
9258 if (isRegister() || isModifier())
9264 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.
size() - 1]);
9265 assert(Opr.isImm() || Opr.isExpr());
9266 SMLoc Loc = Opr.getStartLoc();
9270 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
9271 Error(Loc,
"expected an absolute expression or a label");
9272 }
else if (Opr.isImm() && !Opr.isS16Imm()) {
9273 Error(Loc,
"expected a 16-bit signed jump offset");
9283ParseStatus AMDGPUAsmParser::parseBoolReg(
OperandVector &Operands) {
9284 return parseReg(Operands);
9291void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
9294 OptionalImmIndexMap OptionalIdx;
9295 unsigned FirstOperandIdx = 1;
9296 bool IsAtomicReturn =
false;
9303 for (
unsigned i = FirstOperandIdx, e = Operands.
size(); i != e; ++i) {
9304 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
9308 Op.addRegOperands(Inst, 1);
9312 if (IsAtomicReturn && i == FirstOperandIdx)
9313 Op.addRegOperands(Inst, 1);
9318 if (
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
9319 Op.addImmOperands(Inst, 1);
9331 OptionalIdx[
Op.getImmTy()] = i;
9345bool AMDGPUOperand::isSMRDOffset8()
const {
9349bool AMDGPUOperand::isSMEMOffset()
const {
9351 return isImmLiteral();
9354bool AMDGPUOperand::isSMRDLiteralOffset()
const {
9389bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
9390 if (BoundCtrl == 0 || BoundCtrl == 1) {
9398void AMDGPUAsmParser::onBeginOfFile() {
9399 if (!getParser().getStreamer().getTargetStreamer() ||
9403 if (!getTargetStreamer().getTargetID())
9404 getTargetStreamer().initializeTargetID(getSTI(),
9405 getSTI().getFeatureString());
9408 getTargetStreamer().EmitDirectiveAMDGCNTarget();
9417bool AMDGPUAsmParser::parsePrimaryExpr(
const MCExpr *&Res, SMLoc &EndLoc) {
9421 StringRef TokenId = getTokenStr();
9422 AGVK VK = StringSwitch<AGVK>(TokenId)
9423 .Case(
"max", AGVK::AGVK_Max)
9424 .Case(
"min", AGVK::AGVK_Min)
9425 .Case(
"or", AGVK::AGVK_Or)
9426 .Case(
"extrasgprs", AGVK::AGVK_ExtraSGPRs)
9427 .Case(
"totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
9428 .Case(
"alignto", AGVK::AGVK_AlignTo)
9429 .Case(
"occupancy", AGVK::AGVK_Occupancy)
9430 .Case(
"instprefsize", AGVK::AGVK_InstPrefSize)
9431 .Default(AGVK::AGVK_None);
9435 uint64_t CommaCount = 0;
9440 if (Exprs.
empty()) {
9442 "empty " + Twine(TokenId) +
" expression");
9445 if (CommaCount + 1 != Exprs.
size()) {
9447 "mismatch of commas in " + Twine(TokenId) +
" expression");
9454 if (getParser().parseExpression(Expr, EndLoc))
9458 if (LastTokenWasComma)
9462 "unexpected token in " + Twine(TokenId) +
" expression");
9468 return getParser().parsePrimaryExpr(Res, EndLoc,
nullptr);
9471ParseStatus AMDGPUAsmParser::parseOModSI(
OperandVector &Operands) {
9472 StringRef
Name = getTokenStr();
9473 if (Name ==
"mul") {
9474 return parseIntWithPrefix(
"mul", Operands,
9478 if (Name ==
"div") {
9479 return parseIntWithPrefix(
"div", Operands,
9490 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
9495 const AMDGPU::OpName
Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9496 AMDGPU::OpName::src2};
9504 int DstIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
9509 int ModIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0_modifiers);
9511 if (
DstOp.isReg() &&
9516 if ((OpSel & (1 << SrcNum)) != 0)
9522void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
9524 cvtVOP3P(Inst, Operands);
9528void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
const OperandVector &Operands,
9529 OptionalImmIndexMap &OptionalIdx) {
9530 cvtVOP3P(Inst, Operands, OptionalIdx);
9539 &&
Desc.NumOperands > (OpNum + 1)
9541 &&
Desc.operands()[OpNum + 1].RegClass != -1
9543 &&
Desc.getOperandConstraint(OpNum + 1,
9547void AMDGPUAsmParser::cvtOpSelHelper(MCInst &Inst,
unsigned OpSel) {
9549 constexpr AMDGPU::OpName
Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9550 AMDGPU::OpName::src2};
9551 constexpr AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9552 AMDGPU::OpName::src1_modifiers,
9553 AMDGPU::OpName::src2_modifiers};
9554 for (
int J = 0; J < 3; ++J) {
9555 int OpIdx = AMDGPU::getNamedOperandIdx(
Opc,
Ops[J]);
9561 int ModIdx = AMDGPU::getNamedOperandIdx(
Opc, ModOps[J]);
9564 if ((OpSel & (1 << J)) != 0)
9567 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && (OpSel & (1 << 3)) != 0)
9574void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst,
const OperandVector &Operands)
9576 OptionalImmIndexMap OptionalIdx;
9581 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9582 ((AMDGPUOperand &)*Operands[
I++]).addRegOperands(Inst, 1);
9585 for (
unsigned E = Operands.
size();
I !=
E; ++
I) {
9586 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[
I]);
9588 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9589 }
else if (
Op.isInterpSlot() ||
Op.isInterpAttr() ||
9590 Op.isInterpAttrChan()) {
9592 }
else if (
Op.isImmModifier()) {
9593 OptionalIdx[
Op.getImmTy()] =
I;
9601 AMDGPUOperand::ImmTyHigh);
9605 AMDGPUOperand::ImmTyClamp);
9609 AMDGPUOperand::ImmTyOModSI);
9614 AMDGPUOperand::ImmTyOpSel);
9615 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
9618 cvtOpSelHelper(Inst, OpSel);
9622void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst,
const OperandVector &Operands)
9624 OptionalImmIndexMap OptionalIdx;
9629 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9630 ((AMDGPUOperand &)*Operands[
I++]).addRegOperands(Inst, 1);
9633 for (
unsigned E = Operands.
size();
I !=
E; ++
I) {
9634 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[
I]);
9636 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9637 }
else if (
Op.isImmModifier()) {
9638 OptionalIdx[
Op.getImmTy()] =
I;
9646 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
9656 cvtOpSelHelper(Inst, OpSel);
9659void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,
9661 OptionalImmIndexMap OptionalIdx;
9664 int CbszOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::cbsz);
9668 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J)
9669 static_cast<AMDGPUOperand &
>(*Operands[
I++]).addRegOperands(Inst, 1);
9671 for (
unsigned E = Operands.
size();
I !=
E; ++
I) {
9672 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*Operands[
I]);
9677 if (NumOperands == CbszOpIdx) {
9682 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9683 }
else if (
Op.isImmModifier()) {
9684 OptionalIdx[
Op.getImmTy()] =
I;
9686 Op.addRegOrImmOperands(Inst, 1);
9691 auto CbszIdx = OptionalIdx.find(AMDGPUOperand::ImmTyCBSZ);
9692 if (CbszIdx != OptionalIdx.end()) {
9693 int CbszVal = ((AMDGPUOperand &)*Operands[CbszIdx->second]).
getImm();
9697 int BlgpOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::blgp);
9698 auto BlgpIdx = OptionalIdx.find(AMDGPUOperand::ImmTyBLGP);
9699 if (BlgpIdx != OptionalIdx.end()) {
9700 int BlgpVal = ((AMDGPUOperand &)*Operands[BlgpIdx->second]).
getImm();
9711 auto OpselIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSel);
9712 if (OpselIdx != OptionalIdx.end()) {
9713 OpSel =
static_cast<const AMDGPUOperand &
>(*Operands[OpselIdx->second])
9717 unsigned OpSelHi = 0;
9718 auto OpselHiIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSelHi);
9719 if (OpselHiIdx != OptionalIdx.end()) {
9720 OpSelHi =
static_cast<const AMDGPUOperand &
>(*Operands[OpselHiIdx->second])
9723 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9724 AMDGPU::OpName::src1_modifiers};
9726 for (
unsigned J = 0; J < 2; ++J) {
9727 unsigned ModVal = 0;
9728 if (OpSel & (1 << J))
9730 if (OpSelHi & (1 << J))
9733 const int ModIdx = AMDGPU::getNamedOperandIdx(
Opc, ModOps[J]);
9738void AMDGPUAsmParser::cvtVOP3(MCInst &Inst,
const OperandVector &Operands,
9739 OptionalImmIndexMap &OptionalIdx) {
9744 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9745 ((AMDGPUOperand &)*Operands[
I++]).addRegOperands(Inst, 1);
9748 for (
unsigned E = Operands.
size();
I !=
E; ++
I) {
9749 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[
I]);
9751 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9752 }
else if (
Op.isImmModifier()) {
9753 OptionalIdx[
Op.getImmTy()] =
I;
9755 Op.addRegOrImmOperands(Inst, 1);
9761 AMDGPUOperand::ImmTyScaleSel);
9765 AMDGPUOperand::ImmTyClamp);
9771 AMDGPUOperand::ImmTyByteSel);
9776 AMDGPUOperand::ImmTyOModSI);
9783 auto *it = Inst.
begin();
9784 std::advance(it, AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2_modifiers));
9792void AMDGPUAsmParser::cvtVOP3(MCInst &Inst,
const OperandVector &Operands) {
9793 OptionalImmIndexMap OptionalIdx;
9794 cvtVOP3(Inst, Operands, OptionalIdx);
9797void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
const OperandVector &Operands,
9798 OptionalImmIndexMap &OptIdx) {
9804 if (
Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi ||
9805 Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi ||
9806 Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
9807 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
9808 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx11 ||
9809 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx11 ||
9810 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
9811 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12 ||
9812 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx13 ||
9813 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx13) {
9822 int VdstInIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst_in);
9823 if (VdstInIdx != -1 && VdstInIdx ==
static_cast<int>(Inst.
getNumOperands()))
9826 int BitOp3Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::bitop3);
9827 if (BitOp3Idx != -1) {
9834 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
9835 if (OpSelIdx != -1) {
9839 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel_hi);
9840 if (OpSelHiIdx != -1) {
9841 int DefaultVal =
IsPacked ? -1 : 0;
9847 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_a_fmt);
9848 if (MatrixAFMTIdx != -1) {
9850 AMDGPUOperand::ImmTyMatrixAFMT, 0);
9854 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_b_fmt);
9855 if (MatrixBFMTIdx != -1) {
9857 AMDGPUOperand::ImmTyMatrixBFMT, 0);
9860 int MatrixAScaleIdx =
9861 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_a_scale);
9862 if (MatrixAScaleIdx != -1) {
9864 AMDGPUOperand::ImmTyMatrixAScale, 0);
9867 int MatrixBScaleIdx =
9868 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_b_scale);
9869 if (MatrixBScaleIdx != -1) {
9871 AMDGPUOperand::ImmTyMatrixBScale, 0);
9874 int MatrixAScaleFmtIdx =
9875 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_a_scale_fmt);
9876 if (MatrixAScaleFmtIdx != -1) {
9878 AMDGPUOperand::ImmTyMatrixAScaleFmt, 0);
9881 int MatrixBScaleFmtIdx =
9882 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_b_scale_fmt);
9883 if (MatrixBScaleFmtIdx != -1) {
9885 AMDGPUOperand::ImmTyMatrixBScaleFmt, 0);
9890 AMDGPUOperand::ImmTyMatrixAReuse, 0);
9894 AMDGPUOperand::ImmTyMatrixBReuse, 0);
9896 int NegLoIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::neg_lo);
9900 int NegHiIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::neg_hi);
9904 const AMDGPU::OpName
Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9905 AMDGPU::OpName::src2};
9906 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9907 AMDGPU::OpName::src1_modifiers,
9908 AMDGPU::OpName::src2_modifiers};
9911 unsigned OpSelHi = 0;
9918 if (OpSelHiIdx != -1)
9927 for (
int J = 0; J < 3; ++J) {
9928 int OpIdx = AMDGPU::getNamedOperandIdx(
Opc,
Ops[J]);
9932 int ModIdx = AMDGPU::getNamedOperandIdx(
Opc, ModOps[J]);
9937 uint32_t ModVal = 0;
9940 if (SrcOp.
isReg() && getMRI()
9947 if ((OpSel & (1 << J)) != 0)
9951 if ((OpSelHi & (1 << J)) != 0)
9954 if ((NegLo & (1 << J)) != 0)
9957 if ((NegHi & (1 << J)) != 0)
9964void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
const OperandVector &Operands) {
9965 OptionalImmIndexMap OptIdx;
9966 cvtVOP3(Inst, Operands, OptIdx);
9967 cvtVOP3P(Inst, Operands, OptIdx);
9971 unsigned i,
unsigned Opc,
9973 if (AMDGPU::getNamedOperandIdx(
Opc,
OpName) != -1)
9974 ((AMDGPUOperand &)*
Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
9976 ((AMDGPUOperand &)*
Operands[i]).addRegOperands(Inst, 1);
9979void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst,
const OperandVector &Operands) {
9982 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
9985 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
9986 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1);
9988 OptionalImmIndexMap OptIdx;
9989 for (
unsigned i = 5; i < Operands.
size(); ++i) {
9990 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
9991 OptIdx[
Op.getImmTy()] = i;
9996 AMDGPUOperand::ImmTyIndexKey8bit);
10000 AMDGPUOperand::ImmTyIndexKey16bit);
10004 AMDGPUOperand::ImmTyIndexKey32bit);
10009 cvtVOP3P(Inst, Operands, OptIdx);
10016ParseStatus AMDGPUAsmParser::parseVOPD(
OperandVector &Operands) {
10021 SMLoc S = getLoc();
10024 Operands.
push_back(AMDGPUOperand::CreateToken(
this,
"::", S));
10025 SMLoc OpYLoc = getLoc();
10028 Operands.
push_back(AMDGPUOperand::CreateToken(
this, OpYName, OpYLoc));
10031 return Error(OpYLoc,
"expected a VOPDY instruction after ::");
10037void AMDGPUAsmParser::cvtVOPD(MCInst &Inst,
const OperandVector &Operands) {
10040 auto addOp = [&](uint16_t ParsedOprIdx) {
10041 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
10043 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
10047 Op.addRegOperands(Inst, 1);
10051 Op.addImmOperands(Inst, 1);
10063 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
10067 const auto &CInfo = InstInfo[CompIdx];
10068 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
10069 for (
unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
10070 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
10071 if (CInfo.hasSrc2Acc())
10072 addOp(CInfo.getIndexOfDstInParsedOperands());
10076 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::bitop3);
10077 if (BitOp3Idx != -1) {
10078 OptionalImmIndexMap OptIdx;
10079 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands.
back());
10081 OptIdx[
Op.getImmTy()] = Operands.
size() - 1;
10091bool AMDGPUOperand::isDPP8()
const {
10092 return isImmTy(ImmTyDPP8);
10095bool AMDGPUOperand::isDPPCtrl()
const {
10096 using namespace AMDGPU::DPP;
10098 bool result = isImm() && getImmTy() == ImmTyDppCtrl &&
isUInt<9>(
getImm());
10101 return (
Imm >= DppCtrl::QUAD_PERM_FIRST &&
Imm <= DppCtrl::QUAD_PERM_LAST) ||
10102 (
Imm >= DppCtrl::ROW_SHL_FIRST &&
Imm <= DppCtrl::ROW_SHL_LAST) ||
10103 (
Imm >= DppCtrl::ROW_SHR_FIRST &&
Imm <= DppCtrl::ROW_SHR_LAST) ||
10104 (
Imm >= DppCtrl::ROW_ROR_FIRST &&
Imm <= DppCtrl::ROW_ROR_LAST) ||
10105 (
Imm == DppCtrl::WAVE_SHL1) ||
10106 (
Imm == DppCtrl::WAVE_ROL1) ||
10107 (
Imm == DppCtrl::WAVE_SHR1) ||
10108 (
Imm == DppCtrl::WAVE_ROR1) ||
10109 (
Imm == DppCtrl::ROW_MIRROR) ||
10110 (
Imm == DppCtrl::ROW_HALF_MIRROR) ||
10111 (
Imm == DppCtrl::BCAST15) ||
10112 (
Imm == DppCtrl::BCAST31) ||
10113 (
Imm >= DppCtrl::ROW_SHARE_FIRST &&
Imm <= DppCtrl::ROW_SHARE_LAST) ||
10114 (
Imm >= DppCtrl::ROW_XMASK_FIRST &&
Imm <= DppCtrl::ROW_XMASK_LAST);
10123bool AMDGPUOperand::isBLGP()
const {
10127bool AMDGPUOperand::isS16Imm()
const {
10131bool AMDGPUOperand::isU16Imm()
const {
10139bool AMDGPUAsmParser::parseDimId(
unsigned &Encoding) {
10144 SMLoc Loc =
getToken().getEndLoc();
10145 Token = std::string(getTokenStr());
10147 if (getLoc() != Loc)
10152 if (!parseId(Suffix))
10156 StringRef DimId = Token;
10167ParseStatus AMDGPUAsmParser::parseDim(
OperandVector &Operands) {
10171 SMLoc S = getLoc();
10177 SMLoc Loc = getLoc();
10178 if (!parseDimId(Encoding))
10179 return Error(Loc,
"invalid dim value");
10181 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Encoding, S,
10182 AMDGPUOperand::ImmTyDim));
10190ParseStatus AMDGPUAsmParser::parseDPP8(
OperandVector &Operands) {
10191 SMLoc S = getLoc();
10200 if (!skipToken(
AsmToken::LBrac,
"expected an opening square bracket"))
10203 for (
size_t i = 0; i < 8; ++i) {
10207 SMLoc Loc = getLoc();
10208 if (getParser().parseAbsoluteExpression(Sels[i]))
10210 if (0 > Sels[i] || 7 < Sels[i])
10211 return Error(Loc,
"expected a 3-bit value");
10214 if (!skipToken(
AsmToken::RBrac,
"expected a closing square bracket"))
10218 for (
size_t i = 0; i < 8; ++i)
10219 DPP8 |= (Sels[i] << (i * 3));
10221 Operands.
push_back(AMDGPUOperand::CreateImm(
this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
10226AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
10228 if (Ctrl ==
"row_newbcast")
10231 if (Ctrl ==
"row_share" ||
10232 Ctrl ==
"row_xmask")
10235 if (Ctrl ==
"wave_shl" ||
10236 Ctrl ==
"wave_shr" ||
10237 Ctrl ==
"wave_rol" ||
10238 Ctrl ==
"wave_ror" ||
10239 Ctrl ==
"row_bcast")
10242 return Ctrl ==
"row_mirror" ||
10243 Ctrl ==
"row_half_mirror" ||
10244 Ctrl ==
"quad_perm" ||
10245 Ctrl ==
"row_shl" ||
10246 Ctrl ==
"row_shr" ||
10251AMDGPUAsmParser::parseDPPCtrlPerm() {
10254 if (!skipToken(
AsmToken::LBrac,
"expected an opening square bracket"))
10258 for (
int i = 0; i < 4; ++i) {
10263 SMLoc Loc = getLoc();
10264 if (getParser().parseAbsoluteExpression(Temp))
10266 if (Temp < 0 || Temp > 3) {
10267 Error(Loc,
"expected a 2-bit value");
10271 Val += (Temp << i * 2);
10274 if (!skipToken(
AsmToken::RBrac,
"expected a closing square bracket"))
10281AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
10282 using namespace AMDGPU::DPP;
10287 SMLoc Loc = getLoc();
10289 if (getParser().parseAbsoluteExpression(Val))
10292 struct DppCtrlCheck {
10298 DppCtrlCheck
Check = StringSwitch<DppCtrlCheck>(Ctrl)
10299 .Case(
"wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
10300 .Case(
"wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
10301 .Case(
"wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
10302 .Case(
"wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
10303 .Case(
"row_shl", {DppCtrl::ROW_SHL0, 1, 15})
10304 .Case(
"row_shr", {DppCtrl::ROW_SHR0, 1, 15})
10305 .Case(
"row_ror", {DppCtrl::ROW_ROR0, 1, 15})
10306 .Case(
"row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
10307 .Case(
"row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
10308 .Case(
"row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
10312 if (
Check.Ctrl == -1) {
10313 Valid = (
Ctrl ==
"row_bcast" && (Val == 15 || Val == 31));
10321 Error(Loc, Twine(
"invalid ", Ctrl) + Twine(
" value"));
10328ParseStatus AMDGPUAsmParser::parseDPPCtrl(
OperandVector &Operands) {
10329 using namespace AMDGPU::DPP;
10332 !isSupportedDPPCtrl(getTokenStr(), Operands))
10335 SMLoc S = getLoc();
10341 if (Ctrl ==
"row_mirror") {
10342 Val = DppCtrl::ROW_MIRROR;
10343 }
else if (Ctrl ==
"row_half_mirror") {
10344 Val = DppCtrl::ROW_HALF_MIRROR;
10347 if (Ctrl ==
"quad_perm") {
10348 Val = parseDPPCtrlPerm();
10350 Val = parseDPPCtrlSel(Ctrl);
10359 AMDGPUOperand::CreateImm(
this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
10363void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst,
const OperandVector &Operands,
10365 OptionalImmIndexMap OptionalIdx;
10372 int OldIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::old);
10374 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2_modifiers);
10375 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
10379 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
10380 ((AMDGPUOperand &)*Operands[
I++]).addRegOperands(Inst, 1);
10384 int VdstInIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst_in);
10385 bool IsVOP3CvtSrDpp =
Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
10386 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx13 ||
10387 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
10388 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx13 ||
10389 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
10390 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx13 ||
10391 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
10392 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx13;
10394 for (
unsigned E = Operands.
size();
I !=
E; ++
I) {
10398 if (OldIdx == NumOperands) {
10400 constexpr int DST_IDX = 0;
10402 }
else if (Src2ModIdx == NumOperands) {
10412 if (IsVOP3CvtSrDpp) {
10421 if (TiedTo != -1) {
10426 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[
I]);
10428 if (IsDPP8 &&
Op.isDppFI()) {
10431 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
10432 }
else if (
Op.isReg()) {
10433 Op.addRegOperands(Inst, 1);
10434 }
else if (
Op.isImm() &&
10436 Op.addImmOperands(Inst, 1);
10437 }
else if (
Op.isImm()) {
10438 OptionalIdx[
Op.getImmTy()] =
I;
10446 AMDGPUOperand::ImmTyClamp);
10452 AMDGPUOperand::ImmTyByteSel);
10459 cvtVOP3P(Inst, Operands, OptionalIdx);
10461 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
10468 using namespace llvm::AMDGPU::DPP;
10478 AMDGPUOperand::ImmTyDppFI);
10482void AMDGPUAsmParser::cvtDPP(MCInst &Inst,
const OperandVector &Operands,
bool IsDPP8) {
10483 OptionalImmIndexMap OptionalIdx;
10487 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
10488 ((AMDGPUOperand &)*Operands[
I++]).addRegOperands(Inst, 1);
10492 for (
unsigned E = Operands.
size();
I !=
E; ++
I) {
10495 if (TiedTo != -1) {
10500 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[
I]);
10502 if (
Op.isReg() && validateVccOperand(
Op.getReg())) {
10510 Op.addImmOperands(Inst, 1);
10512 Op.addRegWithFPInputModsOperands(Inst, 2);
10513 }
else if (
Op.isDppFI()) {
10515 }
else if (
Op.isReg()) {
10516 Op.addRegOperands(Inst, 1);
10522 Op.addRegWithFPInputModsOperands(Inst, 2);
10523 }
else if (
Op.isReg()) {
10524 Op.addRegOperands(Inst, 1);
10525 }
else if (
Op.isDPPCtrl()) {
10526 Op.addImmOperands(Inst, 1);
10527 }
else if (
Op.isImm()) {
10529 OptionalIdx[
Op.getImmTy()] =
I;
10537 using namespace llvm::AMDGPU::DPP;
10545 AMDGPUOperand::ImmTyDppFI);
10554ParseStatus AMDGPUAsmParser::parseSDWASel(
OperandVector &Operands,
10556 AMDGPUOperand::ImmTy
Type) {
10557 return parseStringOrIntWithPrefix(
10559 {
"BYTE_0",
"BYTE_1",
"BYTE_2",
"BYTE_3",
"WORD_0",
"WORD_1",
"DWORD"},
10563ParseStatus AMDGPUAsmParser::parseSDWADstUnused(
OperandVector &Operands) {
10564 return parseStringOrIntWithPrefix(
10565 Operands,
"dst_unused", {
"UNUSED_PAD",
"UNUSED_SEXT",
"UNUSED_PRESERVE"},
10566 AMDGPUOperand::ImmTySDWADstUnused);
10569void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst,
const OperandVector &Operands) {
10570 cvtSDWA(Inst, Operands, SDWAInstType::VOP1);
10573void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst,
const OperandVector &Operands) {
10574 cvtSDWA(Inst, Operands, SDWAInstType::VOP2);
10577void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst,
const OperandVector &Operands) {
10578 cvtSDWA(Inst, Operands, SDWAInstType::VOP2,
true,
true);
10581void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst,
const OperandVector &Operands) {
10582 cvtSDWA(Inst, Operands, SDWAInstType::VOP2,
false,
true);
10585void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst,
const OperandVector &Operands) {
10586 cvtSDWA(Inst, Operands, SDWAInstType::VOPC,
isVI());
10589void AMDGPUAsmParser::cvtSDWA(MCInst &Inst,
const OperandVector &Operands,
10590 SDWAInstType BasicInstType,
bool SkipDstVcc,
10592 using namespace llvm::AMDGPU::SDWA;
10594 OptionalImmIndexMap OptionalIdx;
10595 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
10596 bool SkippedVcc =
false;
10600 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
10601 ((AMDGPUOperand &)*Operands[
I++]).addRegOperands(Inst, 1);
10604 for (
unsigned E = Operands.
size();
I !=
E; ++
I) {
10605 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[
I]);
10606 if (SkipVcc && !SkippedVcc &&
Op.isReg() &&
10607 (
Op.getReg() == AMDGPU::VCC ||
Op.getReg() == AMDGPU::VCC_LO)) {
10613 if (BasicInstType == SDWAInstType::VOP2 &&
10619 if (BasicInstType == SDWAInstType::VOPC && Inst.
getNumOperands() == 0) {
10625 Op.addRegOrImmWithInputModsOperands(Inst, 2);
10626 }
else if (
Op.isImm()) {
10628 OptionalIdx[
Op.getImmTy()] =
I;
10632 SkippedVcc =
false;
10636 if (
Opc != AMDGPU::V_NOP_sdwa_gfx10 &&
Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
10637 Opc != AMDGPU::V_NOP_sdwa_vi) {
10639 switch (BasicInstType) {
10640 case SDWAInstType::VOP1:
10643 AMDGPUOperand::ImmTyClamp, 0);
10647 AMDGPUOperand::ImmTyOModSI, 0);
10651 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10655 AMDGPUOperand::ImmTySDWADstUnused,
10656 DstUnused::UNUSED_PRESERVE);
10658 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10661 case SDWAInstType::VOP2:
10663 AMDGPUOperand::ImmTyClamp, 0);
10668 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10669 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
10670 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10671 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10674 case SDWAInstType::VOPC:
10677 AMDGPUOperand::ImmTyClamp, 0);
10678 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10679 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10686 if (Inst.
getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
10687 Inst.
getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
10688 auto *it = Inst.
begin();
10690 it, AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::src2));
10702#define GET_MATCHER_IMPLEMENTATION
10703#define GET_MNEMONIC_SPELL_CHECKER
10704#define GET_MNEMONIC_CHECKER
10705#include "AMDGPUGenAsmMatcher.inc"
10711 return parseTokenOp(
"addr64",
Operands);
10713 return parseNamedBit(
"done",
Operands, AMDGPUOperand::ImmTyDone,
true);
10715 return parseTokenOp(
"idxen",
Operands);
10717 return parseNamedBit(
"lds",
Operands, AMDGPUOperand::ImmTyLDS,
10720 return parseTokenOp(
"offen",
Operands);
10722 return parseTokenOp(
"off",
Operands);
10723 case MCK_row_95_en:
10724 return parseNamedBit(
"row_en",
Operands, AMDGPUOperand::ImmTyRowEn,
true);
10726 return parseNamedBit(
"gds",
Operands, AMDGPUOperand::ImmTyGDS);
10728 return parseNamedBit(
"tfe",
Operands, AMDGPUOperand::ImmTyTFE);
10730 return tryCustomParseOperand(
Operands, MCK);
10735unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &
Op,
10741 AMDGPUOperand &Operand = (AMDGPUOperand&)
Op;
10744 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
10746 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
10748 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
10750 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
10752 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
10754 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
10756 return Operand.isDone() ? Match_Success : Match_InvalidOperand;
10757 case MCK_row_95_en:
10758 return Operand.isRowEn() ? Match_Success : Match_InvalidOperand;
10766 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
10768 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
10769 case MCK_SOPPBrTarget:
10770 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
10771 case MCK_VReg32OrOff:
10772 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
10773 case MCK_InterpSlot:
10774 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
10775 case MCK_InterpAttr:
10776 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
10777 case MCK_InterpAttrChan:
10778 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
10780 case MCK_SReg_64_XEXEC:
10790 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
10792 return Match_InvalidOperand;
10800ParseStatus AMDGPUAsmParser::parseEndpgm(
OperandVector &Operands) {
10801 SMLoc S = getLoc();
10810 return Error(S,
"expected a 16-bit value");
10813 AMDGPUOperand::CreateImm(
this,
Imm, S, AMDGPUOperand::ImmTyEndpgm));
10817bool AMDGPUOperand::isEndpgm()
const {
return isImmTy(ImmTyEndpgm); }
10823bool AMDGPUOperand::isSplitBarrier()
const {
return isInlinableImm(MVT::i32); }
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
SmallVector< int16_t, MAX_SRC_OPERANDS_NUM > OperandIndices
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, AMDGPU::OpName OpName)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name, const MCRegisterInfo *MRI)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static bool IsRevOpcode(const unsigned Opcode)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static MCRegister getSpecialRegForName(StringRef RegName)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0, std::optional< unsigned > InsertAt=std::nullopt)
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
Enums shared between the AMDGPU backend (LLVM) and the ELF linker (LLD) for the .amdgpu....
AMDHSA kernel descriptor definitions.
static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value, raw_ostream &Err)
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_EXTERNAL_VISIBILITY
static llvm::Expected< InlineInfo > decode(GsymDataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
static Value * getOpcode(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB)
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Loop::LoopBounds::Direction Direction
Register const TargetRegisterInfo * TRI
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Interface definition for SIInstrInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
This file implements the SmallBitVector class.
StringSet - A set-like wrapper for the StringMap.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)
Initialize the set of available library functions based on the specified target triple.
static const char * getRegisterName(MCRegister Reg)
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createLit(LitModifier Lit, int64_t Value, MCContext &Ctx)
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
static const fltSemantics & IEEEsingle()
static const fltSemantics & BFloat()
static const fltSemantics & IEEEdouble()
static constexpr roundingMode rmNearestTiesToEven
static const fltSemantics & IEEEhalf()
opStatus
IEEE-754R 7: Default exception handling.
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
bool is(TokenKind K) const
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
void printExpr(raw_ostream &, const MCExpr &) const
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Context object for machine code objects.
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Instances of this class represent a single low-level machine instruction.
unsigned getNumOperands() const
unsigned getOpcode() const
iterator insert(iterator I, const MCOperand &Op)
void addOperand(const MCOperand Op)
const MCOperand & getOperand(unsigned i) const
Describe properties that are true of each instruction in the target description file.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
int16_t getOpRegClassID(const MCOperandInfo &OpInfo, unsigned HwModeId) const
Return the ID of the register class to use for OpInfo, for the active HwMode HwModeId.
Instances of this class represent operands of the MCInst class.
static MCOperand createExpr(const MCExpr *Val)
static MCOperand createReg(MCRegister Reg)
static MCOperand createImm(int64_t Val)
void setReg(MCRegister Reg)
Set the register number.
MCRegister getReg() const
Returns the register number.
const MCExpr * getExpr() const
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
bool regsOverlap(MCRegister RegA, MCRegister RegB) const
Returns true if the two registers are equal or alias each other.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Wrapper class representing physical registers. Should be passed by value.
constexpr bool isValid() const
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
Generic base class for all target subtargets.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
bool isVariable() const
isVariable - Check if this is a variable symbol.
LLVM_ABI void setVariableValue(const MCExpr *Value)
void setRedefinable(bool Value)
Mark this symbol as redefinable.
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
MCTargetAsmParser - Generic interface to target specific assembly parsers.
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
constexpr unsigned id() const
Represents a location in source code.
static SMLoc getFromPointer(const char *Ptr)
constexpr const char * getPointer() const
constexpr bool isValid() const
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Represent a constant reference to a string, i.e.
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
Check if the string is empty.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
constexpr size_t size() const
Get the string size.
constexpr const char * data() const
Get a pointer to the start of the string (which may not be null terminated).
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
bool consume_front(char Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
bool contains(StringRef key) const
Check if the set contains the given key.
std::pair< typename Base::iterator, bool > insert(StringRef key)
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
std::pair< iterator, bool > insert(const ValueT &V)
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
int64_t getHwregId(StringRef Name, const MCSubtargetInfo &STI)
static constexpr CustomOperand Operands[]
@ FIXED_NUM_SGPRS_FOR_INIT_BUG
unsigned getAddressableNumSGPRs(const MCSubtargetInfo &STI)
unsigned getVGPREncodingGranule(const MCSubtargetInfo &STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo &STI)
unsigned getLocalMemorySize(const MCSubtargetInfo &STI)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
int64_t getMsgOpId(int64_t MsgId, StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a sendmsg operation to the operation portion of the immediate encoding.
int64_t getMsgId(StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a msg_id to the message portion of the immediate encoding.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
ArrayRef< GFXVersion > getGFXVersions()
constexpr unsigned COMPONENTS[]
constexpr const char *const ModMatrixFmt[]
constexpr const char *const ModMatrixScaleFmt[]
constexpr const char *const ModMatrixScale[]
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
bool isInlineValue(MCRegister Reg)
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
FuncInfoFlags
Per-function flags packed into INFO_FLAGS entries.
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
unsigned getTemporalHintType(const MCInstrDesc TID)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY bool isLitExpr(const MCExpr *Expr)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
bool isGFX13(const MCSubtargetInfo &STI)
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool hasMAIInsts(const MCSubtargetInfo &STI)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX13Plus(const MCSubtargetInfo &STI)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
LLVM_READONLY int64_t getLitValue(const MCExpr *Expr)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit)
bool isValidWMMAScaleFmtCombination(unsigned AFmt, unsigned AScale, unsigned BFmt, unsigned BScale)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT64
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
@ OPERAND_REG_IMM_V2FP16_SPLAT
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_IMM_NOINLINE_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool isGFX1250(const MCSubtargetInfo &STI)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
MCRegister mc2PseudoReg(MCRegister Reg)
Convert hardware register Reg to a pseudo register.
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool supportsWGP(const MCSubtargetInfo &STI)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool isGFX1250Plus(const MCSubtargetInfo &STI)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ UNDEF
UNDEF - An undefined node.
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
@ Valid
The data is already valid.
Scope
Defines the scope in which this symbol should be visible: Default – Visible in the public interface o...
Context & getContext() const
This is an optimization pass for GlobalISel generic memory operations.
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
FunctionAddr VTableAddr Value
StringMapEntry< Value * > ValueName
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
static bool isMem(const MachineInstr &MI, unsigned Op)
LLVM_ABI std::pair< StringRef, StringRef > getToken(StringRef Source, StringRef Delimiters=" \t\n\v\f\r")
getToken - This function extracts one token from source, ignoring any leading characters that appear ...
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
LLVM_ABI void PrintError(const Twine &Msg)
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
FunctionAddr VTableAddr uintptr_t uintptr_t DataSize
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Target & getTheR600Target()
The target for R600 GPUs.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
SmallVectorImpl< std::unique_ptr< MCParsedAsmOperand > > OperandVector
FunctionAddr VTableAddr uintptr_t uintptr_t Version
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Target & getTheGCNTarget()
The target for GCN GPUs.
@ Sub
Subtraction of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
unsigned M0(unsigned Val)
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
@ Enabled
Convert any .debug_str_offsets tables to DWARF64 if needed.
@ Default
The result value is uniform if and only if all operands are uniform.
void initDefault(const MCSubtargetInfo &STI, MCContext &Ctx, bool InitMCExpr=true)
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
uint32_t PrivateSegmentSize
SmallVector< std::pair< MCSymbol *, std::string >, 4 > IndirectCalls
SmallVector< std::pair< MCSymbol *, MCSymbol * >, 8 > Calls
SmallVector< FuncInfo, 8 > Funcs
SmallVector< std::pair< MCSymbol *, std::string >, 4 > TypeIds
SmallVector< std::pair< MCSymbol *, MCSymbol * >, 4 > Uses
Instruction set architecture version.
const MCExpr * compute_pgm_rsrc2
const MCExpr * kernarg_size
const MCExpr * kernarg_preload
const MCExpr * compute_pgm_rsrc3
const MCExpr * private_segment_fixed_size
const MCExpr * compute_pgm_rsrc1
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
const MCExpr * group_segment_fixed_size
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
const MCExpr * kernel_code_properties
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...
uint32_t group_segment_fixed_size
uint32_t private_segment_fixed_size