57enum RegisterKind { IS_UNKNOWN,
IS_VGPR, IS_SGPR,
IS_AGPR, IS_TTMP, IS_SPECIAL };
71 SMLoc StartLoc, EndLoc;
72 const AMDGPUAsmParser *AsmParser;
75 AMDGPUOperand(KindTy Kind_,
const AMDGPUAsmParser *AsmParser_)
76 : Kind(Kind_), AsmParser(AsmParser_) {}
78 using Ptr = std::unique_ptr<AMDGPUOperand>;
86 bool hasFPModifiers()
const {
return Abs || Neg; }
87 bool hasIntModifiers()
const {
return Sext; }
88 bool hasModifiers()
const {
return hasFPModifiers() || hasIntModifiers(); }
89 bool isForcedLit()
const {
return Lit == LitModifier::Lit; }
90 bool isForcedLit64()
const {
return Lit == LitModifier::Lit64; }
92 int64_t getFPModifiersOperand()
const {
99 int64_t getIntModifiersOperand()
const {
105 int64_t getModifiersOperand()
const {
106 assert(!(hasFPModifiers() && hasIntModifiers())
107 &&
"fp and int modifiers should not be used simultaneously");
108 if (hasFPModifiers())
109 return getFPModifiersOperand();
110 if (hasIntModifiers())
111 return getIntModifiersOperand();
115 friend raw_ostream &
operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
189 ImmTyMatrixAScaleFmt,
190 ImmTyMatrixBScaleFmt,
223 mutable int MCOpIdx = -1;
226 bool isToken()
const override {
return Kind == Token; }
228 bool isSymbolRefExpr()
const {
232 bool isImm()
const override {
233 return Kind == Immediate;
236 bool isInlinableImm(MVT type)
const;
237 bool isLiteralImm(MVT type)
const;
239 bool isRegKind()
const {
240 return Kind == Register;
243 bool isReg()
const override {
244 return isRegKind() && !hasModifiers();
247 bool isRegOrInline(
unsigned RCID, MVT type)
const {
248 return isRegClass(RCID) || isInlinableImm(type);
252 return isRegOrInline(RCID, type) || isLiteralImm(type);
255 bool isRegOrImmWithInt16InputMods()
const {
259 template <
bool IsFake16>
bool isRegOrImmWithIntT16InputMods()
const {
261 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
264 bool isRegOrImmWithInt32InputMods()
const {
268 bool isRegOrInlineImmWithInt16InputMods()
const {
269 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
272 template <
bool IsFake16>
bool isRegOrInlineImmWithIntT16InputMods()
const {
273 return isRegOrInline(
274 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
277 bool isRegOrInlineImmWithInt32InputMods()
const {
278 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
281 bool isRegOrImmWithInt64InputMods()
const {
285 bool isRegOrImmWithFP16InputMods()
const {
289 template <
bool IsFake16>
bool isRegOrImmWithFPT16InputMods()
const {
291 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
294 bool isRegOrImmWithFP32InputMods()
const {
298 bool isRegOrImmWithFP64InputMods()
const {
302 template <
bool IsFake16>
bool isRegOrInlineImmWithFP16InputMods()
const {
303 return isRegOrInline(
304 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
307 bool isRegOrInlineImmWithFP32InputMods()
const {
308 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
311 bool isRegOrInlineImmWithFP64InputMods()
const {
312 return isRegOrInline(AMDGPU::VS_64RegClassID, MVT::f64);
315 bool isVRegWithInputMods(
unsigned RCID)
const {
return isRegClass(RCID); }
317 bool isVRegWithFP32InputMods()
const {
318 return isVRegWithInputMods(AMDGPU::VGPR_32RegClassID);
321 bool isVRegWithFP64InputMods()
const {
322 return isVRegWithInputMods(AMDGPU::VReg_64RegClassID);
325 bool isPackedFP16InputMods()
const {
329 bool isPackedVGPRFP32InputMods()
const {
333 bool isVReg()
const {
334 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
335 isRegClass(AMDGPU::VReg_64RegClassID) ||
336 isRegClass(AMDGPU::VReg_96RegClassID) ||
337 isRegClass(AMDGPU::VReg_128RegClassID) ||
338 isRegClass(AMDGPU::VReg_160RegClassID) ||
339 isRegClass(AMDGPU::VReg_192RegClassID) ||
340 isRegClass(AMDGPU::VReg_256RegClassID) ||
341 isRegClass(AMDGPU::VReg_512RegClassID) ||
342 isRegClass(AMDGPU::VReg_1024RegClassID);
345 bool isVReg32()
const {
346 return isRegClass(AMDGPU::VGPR_32RegClassID);
349 bool isVReg32OrOff()
const {
350 return isOff() || isVReg32();
354 return isRegKind() &&
getReg() == AMDGPU::SGPR_NULL;
357 bool isAV_LdSt_32_Align2_RegOp()
const {
358 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
359 isRegClass(AMDGPU::AGPR_32RegClassID);
362 bool isVRegWithInputMods()
const;
363 template <
bool IsFake16>
bool isT16_Lo128VRegWithInputMods()
const;
364 template <
bool IsFake16>
bool isT16VRegWithInputMods()
const;
366 bool isSDWAOperand(MVT type)
const;
367 bool isSDWAFP16Operand()
const;
368 bool isSDWAFP32Operand()
const;
369 bool isSDWAInt16Operand()
const;
370 bool isSDWAInt32Operand()
const;
372 bool isImmTy(ImmTy ImmT)
const {
373 return isImm() &&
Imm.Type == ImmT;
376 template <ImmTy Ty>
bool isImmTy()
const {
return isImmTy(Ty); }
378 bool isImmLiteral()
const {
return isImmTy(ImmTyNone); }
380 bool isImmModifier()
const {
381 return isImm() &&
Imm.Type != ImmTyNone;
384 bool isOModSI()
const {
return isImmTy(ImmTyOModSI); }
385 bool isDim()
const {
return isImmTy(ImmTyDim); }
386 bool isR128A16()
const {
return isImmTy(ImmTyR128A16); }
387 bool isOff()
const {
return isImmTy(ImmTyOff); }
388 bool isExpTgt()
const {
return isImmTy(ImmTyExpTgt); }
389 bool isOffen()
const {
return isImmTy(ImmTyOffen); }
390 bool isIdxen()
const {
return isImmTy(ImmTyIdxen); }
391 bool isAddr64()
const {
return isImmTy(ImmTyAddr64); }
392 bool isSMEMOffsetMod()
const {
return isImmTy(ImmTySMEMOffsetMod); }
393 bool isFlatOffset()
const {
return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
394 bool isGDS()
const {
return isImmTy(ImmTyGDS); }
395 bool isLDS()
const {
return isImmTy(ImmTyLDS); }
396 bool isCPol()
const {
return isImmTy(ImmTyCPol); }
397 bool isIndexKey8bit()
const {
return isImmTy(ImmTyIndexKey8bit); }
398 bool isIndexKey16bit()
const {
return isImmTy(ImmTyIndexKey16bit); }
399 bool isIndexKey32bit()
const {
return isImmTy(ImmTyIndexKey32bit); }
400 bool isMatrixAFMT()
const {
return isImmTy(ImmTyMatrixAFMT); }
401 bool isMatrixBFMT()
const {
return isImmTy(ImmTyMatrixBFMT); }
402 bool isMatrixAScale()
const {
return isImmTy(ImmTyMatrixAScale); }
403 bool isMatrixBScale()
const {
return isImmTy(ImmTyMatrixBScale); }
404 bool isMatrixAScaleFmt()
const {
return isImmTy(ImmTyMatrixAScaleFmt); }
405 bool isMatrixBScaleFmt()
const {
return isImmTy(ImmTyMatrixBScaleFmt); }
406 bool isMatrixAReuse()
const {
return isImmTy(ImmTyMatrixAReuse); }
407 bool isMatrixBReuse()
const {
return isImmTy(ImmTyMatrixBReuse); }
408 bool isTFE()
const {
return isImmTy(ImmTyTFE); }
409 bool isFORMAT()
const {
return isImmTy(ImmTyFORMAT) &&
isUInt<7>(
getImm()); }
410 bool isDppFI()
const {
return isImmTy(ImmTyDppFI); }
411 bool isSDWADstSel()
const {
return isImmTy(ImmTySDWADstSel); }
412 bool isSDWASrc0Sel()
const {
return isImmTy(ImmTySDWASrc0Sel); }
413 bool isSDWASrc1Sel()
const {
return isImmTy(ImmTySDWASrc1Sel); }
414 bool isSDWADstUnused()
const {
return isImmTy(ImmTySDWADstUnused); }
415 bool isInterpSlot()
const {
return isImmTy(ImmTyInterpSlot); }
416 bool isInterpAttr()
const {
return isImmTy(ImmTyInterpAttr); }
417 bool isInterpAttrChan()
const {
return isImmTy(ImmTyInterpAttrChan); }
418 bool isOpSel()
const {
return isImmTy(ImmTyOpSel); }
419 bool isOpSelHi()
const {
return isImmTy(ImmTyOpSelHi); }
420 bool isNegLo()
const {
return isImmTy(ImmTyNegLo); }
421 bool isNegHi()
const {
return isImmTy(ImmTyNegHi); }
422 bool isBitOp3()
const {
return isImmTy(ImmTyBitOp3) &&
isUInt<8>(
getImm()); }
423 bool isDone()
const {
return isImmTy(ImmTyDone); }
424 bool isRowEn()
const {
return isImmTy(ImmTyRowEn); }
426 bool isRegOrImm()
const {
427 return isReg() || isImm();
430 bool isRegClass(
unsigned RCID)
const;
434 bool isRegOrInlineNoMods(
unsigned RCID, MVT type)
const {
435 return isRegOrInline(RCID, type) && !hasModifiers();
438 bool isSCSrcB16()
const {
439 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
442 bool isSCSrcV2B16()
const {
446 bool isSCSrc_b32()
const {
447 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
450 bool isSCSrc_b64()
const {
451 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
454 bool isBoolReg()
const;
456 bool isSCSrcF16()
const {
457 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
460 bool isSCSrcV2F16()
const {
464 bool isSCSrcF32()
const {
465 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
468 bool isSCSrcF64()
const {
469 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
472 bool isSSrc_b32()
const {
473 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
476 bool isSSrc_b16()
const {
return isSCSrcB16() || isLiteralImm(MVT::i16); }
478 bool isSSrcV2B16()
const {
483 bool isSSrc_b64()
const {
486 return isSCSrc_b64() || isLiteralImm(MVT::i64) ||
487 (((
const MCTargetAsmParser *)AsmParser)
488 ->getAvailableFeatures()[AMDGPU::Feature64BitLiterals] &&
492 bool isSSrc_f32()
const {
493 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
496 bool isSSrcF64()
const {
return isSCSrc_b64() || isLiteralImm(MVT::f64); }
498 bool isSSrc_bf16()
const {
return isSCSrcB16() || isLiteralImm(MVT::bf16); }
500 bool isSSrc_f16()
const {
return isSCSrcB16() || isLiteralImm(MVT::f16); }
502 bool isSSrcV2F16()
const {
507 bool isSSrcV2FP32()
const {
512 bool isSCSrcV2FP32()
const {
517 bool isSSrcV2INT32()
const {
522 bool isSCSrcV2INT32()
const {
524 return isSCSrc_b32();
527 bool isSSrcOrLds_b32()
const {
528 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
529 isLiteralImm(MVT::i32) || isExpr();
532 bool isVCSrc_b32()
const {
533 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
536 bool isVCSrc_b32_Lo256()
const {
537 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo256RegClassID, MVT::i32);
540 bool isVCSrc_b64_Lo256()
const {
541 return isRegOrInlineNoMods(AMDGPU::VS_64_Lo256RegClassID, MVT::i64);
544 bool isVCSrc_b64()
const {
545 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
548 bool isVCSrcT_b16()
const {
549 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
552 bool isVCSrcTB16_Lo128()
const {
553 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
556 bool isVCSrcFake16B16_Lo128()
const {
557 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
560 bool isVCSrc_b16()
const {
561 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
564 bool isVCSrc_v2b16()
const {
return isVCSrc_b16(); }
566 bool isVCSrc_f32()
const {
567 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
570 bool isVCSrc_f64()
const {
571 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
574 bool isVCSrcTBF16()
const {
575 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
578 bool isVCSrcT_f16()
const {
579 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
582 bool isVCSrcT_bf16()
const {
583 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
586 bool isVCSrcTBF16_Lo128()
const {
587 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
590 bool isVCSrcTF16_Lo128()
const {
591 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
594 bool isVCSrcFake16BF16_Lo128()
const {
595 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
598 bool isVCSrcFake16F16_Lo128()
const {
599 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
602 bool isVCSrc_bf16()
const {
603 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
606 bool isVCSrc_f16()
const {
607 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
610 bool isVCSrc_v2bf16()
const {
return isVCSrc_bf16(); }
612 bool isVCSrc_v2f16()
const {
return isVCSrc_f16(); }
614 bool isVSrc_b32()
const {
615 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
618 bool isVSrc_b64()
const {
return isVCSrc_f64() || isLiteralImm(MVT::i64); }
620 bool isVSrc_v2b64()
const {
621 return isRegOrInlineNoMods(AMDGPU::VS_128RegClassID, MVT::i64) ||
622 isLiteralImm(MVT::i64);
625 bool isVSrc_v2f64()
const {
626 return isRegOrInlineNoMods(AMDGPU::VS_128RegClassID, MVT::f64) ||
627 isLiteralImm(MVT::f64);
630 bool isVSrcT_b16()
const {
return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
632 bool isVSrcT_b16_Lo128()
const {
633 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
636 bool isVSrcFake16_b16_Lo128()
const {
637 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
640 bool isVSrc_b16()
const {
return isVCSrc_b16() || isLiteralImm(MVT::i16); }
642 bool isVSrc_v2b16()
const {
return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
644 bool isVCSrcV2FP32()
const {
return isVCSrc_f64(); }
646 bool isVSrc_v2f32()
const {
return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
648 bool isVCSrc_v2b32()
const {
return isVCSrc_b64(); }
650 bool isVSrc_v2b32()
const {
return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
652 bool isVSrc_f32()
const {
653 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
656 bool isVSrc_f64()
const {
return isVCSrc_f64() || isLiteralImm(MVT::f64); }
658 bool isVSrcT_bf16()
const {
return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
660 bool isVSrcT_f16()
const {
return isVCSrcT_f16() || isLiteralImm(MVT::f16); }
662 bool isVSrcT_bf16_Lo128()
const {
663 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
666 bool isVSrcT_f16_Lo128()
const {
667 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
670 bool isVSrcFake16_bf16_Lo128()
const {
671 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
674 bool isVSrcFake16_f16_Lo128()
const {
675 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
678 bool isVSrc_bf16()
const {
return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
680 bool isVSrc_f16()
const {
return isVCSrc_f16() || isLiteralImm(MVT::f16); }
682 bool isVSrc_v2bf16()
const {
683 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
686 bool isVSrc_v2f16()
const {
return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
688 bool isVSrc_v2f16_splat()
const {
return isVSrc_v2f16(); }
690 bool isVSrc_NoInline_v2f16()
const {
return isVSrc_v2f16(); }
692 bool isVISrcB32()
const {
693 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
696 bool isVISrcB16()
const {
697 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
700 bool isVISrcV2B16()
const {
704 bool isVISrcF32()
const {
705 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
708 bool isVISrcF16()
const {
709 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
712 bool isVISrcV2F16()
const {
713 return isVISrcF16() || isVISrcB32();
716 bool isVISrc_64_bf16()
const {
717 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
720 bool isVISrc_64_f16()
const {
721 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
724 bool isVISrc_64_b32()
const {
725 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
728 bool isVISrc_64B64()
const {
729 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
732 bool isVISrc_64_f64()
const {
733 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
736 bool isVISrc_64V2FP32()
const {
737 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
740 bool isVISrc_64V2INT32()
const {
741 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
744 bool isVISrc_256_b32()
const {
745 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
748 bool isVISrc_256_f32()
const {
749 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
752 bool isVISrc_256B64()
const {
753 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
756 bool isVISrc_256_f64()
const {
757 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
760 bool isVISrc_512_f64()
const {
761 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f64);
764 bool isVISrc_128B16()
const {
765 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
768 bool isVISrc_128V2B16()
const {
769 return isVISrc_128B16();
772 bool isVISrc_128_b32()
const {
773 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
776 bool isVISrc_128_f32()
const {
777 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
780 bool isVISrc_256V2FP32()
const {
781 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
784 bool isVISrc_256V2INT32()
const {
785 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
788 bool isVISrc_512_b32()
const {
789 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
792 bool isVISrc_512B16()
const {
793 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
796 bool isVISrc_512V2B16()
const {
797 return isVISrc_512B16();
800 bool isVISrc_512_f32()
const {
801 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
804 bool isVISrc_512F16()
const {
805 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
808 bool isVISrc_512V2F16()
const {
809 return isVISrc_512F16() || isVISrc_512_b32();
812 bool isVISrc_1024_b32()
const {
813 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
816 bool isVISrc_1024B16()
const {
817 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
820 bool isVISrc_1024V2B16()
const {
821 return isVISrc_1024B16();
824 bool isVISrc_1024_f32()
const {
825 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
828 bool isVISrc_1024F16()
const {
829 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
832 bool isVISrc_1024V2F16()
const {
833 return isVISrc_1024F16() || isVISrc_1024_b32();
836 bool isAISrcB32()
const {
837 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
840 bool isAISrcB16()
const {
841 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
844 bool isAISrcV2B16()
const {
848 bool isAISrcF32()
const {
849 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
852 bool isAISrcF16()
const {
853 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
856 bool isAISrcV2F16()
const {
857 return isAISrcF16() || isAISrcB32();
860 bool isAISrc_64B64()
const {
861 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
864 bool isAISrc_64_f64()
const {
865 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
868 bool isAISrc_128_b32()
const {
869 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
872 bool isAISrc_128B16()
const {
873 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
876 bool isAISrc_128V2B16()
const {
877 return isAISrc_128B16();
880 bool isAISrc_128_f32()
const {
881 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
884 bool isAISrc_128F16()
const {
885 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
888 bool isAISrc_128V2F16()
const {
889 return isAISrc_128F16() || isAISrc_128_b32();
892 bool isVISrc_128_bf16()
const {
893 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
896 bool isVISrc_128_f16()
const {
897 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
900 bool isVISrc_128V2F16()
const {
901 return isVISrc_128_f16() || isVISrc_128_b32();
904 bool isAISrc_256B64()
const {
905 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
908 bool isAISrc_256_f64()
const {
909 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
912 bool isAISrc_512_b32()
const {
913 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
916 bool isAISrc_512B16()
const {
917 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
920 bool isAISrc_512V2B16()
const {
921 return isAISrc_512B16();
924 bool isAISrc_512_f32()
const {
925 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
928 bool isAISrc_512F16()
const {
929 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
932 bool isAISrc_512V2F16()
const {
933 return isAISrc_512F16() || isAISrc_512_b32();
936 bool isAISrc_1024_b32()
const {
937 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
940 bool isAISrc_1024B16()
const {
941 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
944 bool isAISrc_1024V2B16()
const {
945 return isAISrc_1024B16();
948 bool isAISrc_1024_f32()
const {
949 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
952 bool isAISrc_1024F16()
const {
953 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
956 bool isAISrc_1024V2F16()
const {
957 return isAISrc_1024F16() || isAISrc_1024_b32();
960 bool isKImmFP32()
const {
961 return isLiteralImm(MVT::f32);
964 bool isKImmFP16()
const {
965 return isLiteralImm(MVT::f16);
968 bool isKImmFP64()
const {
return isLiteralImm(MVT::f64); }
970 bool isMem()
const override {
974 bool isExpr()
const {
975 return Kind == Expression;
978 bool isSOPPBrTarget()
const {
return isExpr() || isImm(); }
980 bool isSWaitCnt()
const;
981 bool isDepCtr()
const;
982 bool isSDelayALU()
const;
983 bool isHwreg()
const;
984 bool isSendMsg()
const;
985 bool isWaitEvent()
const;
986 bool isSplitBarrier()
const;
987 bool isSwizzle()
const;
988 bool isSMRDOffset8()
const;
989 bool isSMEMOffset()
const;
990 bool isSMRDLiteralOffset()
const;
992 bool isDPPCtrl()
const;
994 bool isGPRIdxMode()
const;
995 bool isS16Imm()
const;
996 bool isU16Imm()
const;
997 bool isEndpgm()
const;
999 auto getPredicate(std::function<
bool(
const AMDGPUOperand &
Op)>
P)
const {
1000 return [
this,
P]() {
return P(*
this); };
1005 return StringRef(Tok.Data, Tok.Length);
1013 void setImm(int64_t Val) {
1018 ImmTy getImmTy()
const {
1023 MCRegister
getReg()
const override {
1028 SMLoc getStartLoc()
const override {
1032 SMLoc getEndLoc()
const override {
1036 SMRange getLocRange()
const {
1037 return SMRange(StartLoc, EndLoc);
1040 int getMCOpIdx()
const {
return MCOpIdx; }
1042 Modifiers getModifiers()
const {
1043 assert(isRegKind() || isImmTy(ImmTyNone));
1044 return isRegKind() ?
Reg.Mods :
Imm.Mods;
1047 void setModifiers(Modifiers Mods) {
1048 assert(isRegKind() || isImmTy(ImmTyNone));
1055 bool hasModifiers()
const {
1056 return getModifiers().hasModifiers();
1059 bool hasFPModifiers()
const {
1060 return getModifiers().hasFPModifiers();
1063 bool hasIntModifiers()
const {
1064 return getModifiers().hasIntModifiers();
1067 bool isForcedLit()
const {
1068 return isImmLiteral() && getModifiers().isForcedLit();
1071 bool isForcedLit64()
const {
1072 return isImmLiteral() && getModifiers().isForcedLit64();
1075 uint64_t applyInputFPModifiers(uint64_t Val,
unsigned Size)
const;
1077 void addImmOperands(MCInst &Inst,
unsigned N,
bool ApplyModifiers =
true)
const;
1079 void addLiteralImmOperand(MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const;
1081 void addRegOperands(MCInst &Inst,
unsigned N)
const;
1083 void addRegOrImmOperands(MCInst &Inst,
unsigned N)
const {
1085 addRegOperands(Inst,
N);
1087 addImmOperands(Inst,
N);
1090 void addRegOrImmWithInputModsOperands(MCInst &Inst,
unsigned N)
const {
1091 Modifiers Mods = getModifiers();
1094 addRegOperands(Inst,
N);
1096 addImmOperands(Inst,
N,
false);
1100 void addRegOrImmWithFPInputModsOperands(MCInst &Inst,
unsigned N)
const {
1101 assert(!hasIntModifiers());
1102 addRegOrImmWithInputModsOperands(Inst,
N);
1105 void addRegOrImmWithIntInputModsOperands(MCInst &Inst,
unsigned N)
const {
1106 assert(!hasFPModifiers());
1107 addRegOrImmWithInputModsOperands(Inst,
N);
1110 void addRegWithInputModsOperands(MCInst &Inst,
unsigned N)
const {
1111 Modifiers Mods = getModifiers();
1114 addRegOperands(Inst,
N);
1117 void addRegWithFPInputModsOperands(MCInst &Inst,
unsigned N)
const {
1118 assert(!hasIntModifiers());
1119 addRegWithInputModsOperands(Inst,
N);
1122 void addRegWithIntInputModsOperands(MCInst &Inst,
unsigned N)
const {
1123 assert(!hasFPModifiers());
1124 addRegWithInputModsOperands(Inst,
N);
1127 static void printImmTy(raw_ostream& OS, ImmTy
Type) {
1130 case ImmTyNone: OS <<
"None";
break;
1131 case ImmTyGDS: OS <<
"GDS";
break;
1132 case ImmTyLDS: OS <<
"LDS";
break;
1133 case ImmTyOffen: OS <<
"Offen";
break;
1134 case ImmTyIdxen: OS <<
"Idxen";
break;
1135 case ImmTyAddr64: OS <<
"Addr64";
break;
1136 case ImmTyOffset: OS <<
"Offset";
break;
1137 case ImmTyInstOffset: OS <<
"InstOffset";
break;
1138 case ImmTyOffset0: OS <<
"Offset0";
break;
1139 case ImmTyOffset1: OS <<
"Offset1";
break;
1140 case ImmTySMEMOffsetMod: OS <<
"SMEMOffsetMod";
break;
1141 case ImmTyCPol: OS <<
"CPol";
break;
1142 case ImmTyIndexKey8bit: OS <<
"index_key";
break;
1143 case ImmTyIndexKey16bit: OS <<
"index_key";
break;
1144 case ImmTyIndexKey32bit: OS <<
"index_key";
break;
1145 case ImmTyTFE: OS <<
"TFE";
break;
1146 case ImmTyIsAsync: OS <<
"IsAsync";
break;
1147 case ImmTyD16: OS <<
"D16";
break;
1148 case ImmTyFORMAT: OS <<
"FORMAT";
break;
1149 case ImmTyClamp: OS <<
"Clamp";
break;
1150 case ImmTyOModSI: OS <<
"OModSI";
break;
1151 case ImmTyDPP8: OS <<
"DPP8";
break;
1152 case ImmTyDppCtrl: OS <<
"DppCtrl";
break;
1153 case ImmTyDppRowMask: OS <<
"DppRowMask";
break;
1154 case ImmTyDppBankMask: OS <<
"DppBankMask";
break;
1155 case ImmTyDppBoundCtrl: OS <<
"DppBoundCtrl";
break;
1156 case ImmTyDppFI: OS <<
"DppFI";
break;
1157 case ImmTySDWADstSel: OS <<
"SDWADstSel";
break;
1158 case ImmTySDWASrc0Sel: OS <<
"SDWASrc0Sel";
break;
1159 case ImmTySDWASrc1Sel: OS <<
"SDWASrc1Sel";
break;
1160 case ImmTySDWADstUnused: OS <<
"SDWADstUnused";
break;
1161 case ImmTyDMask: OS <<
"DMask";
break;
1162 case ImmTyDim: OS <<
"Dim";
break;
1163 case ImmTyUNorm: OS <<
"UNorm";
break;
1164 case ImmTyDA: OS <<
"DA";
break;
1165 case ImmTyR128A16: OS <<
"R128A16";
break;
1166 case ImmTyA16: OS <<
"A16";
break;
1167 case ImmTyLWE: OS <<
"LWE";
break;
1168 case ImmTyOff: OS <<
"Off";
break;
1169 case ImmTyExpTgt: OS <<
"ExpTgt";
break;
1170 case ImmTyExpCompr: OS <<
"ExpCompr";
break;
1171 case ImmTyExpVM: OS <<
"ExpVM";
break;
1172 case ImmTyDone: OS <<
"Done";
break;
1173 case ImmTyRowEn: OS <<
"RowEn";
break;
1174 case ImmTyHwreg: OS <<
"Hwreg";
break;
1175 case ImmTySendMsg: OS <<
"SendMsg";
break;
1176 case ImmTyWaitEvent: OS <<
"WaitEvent";
break;
1177 case ImmTyInterpSlot: OS <<
"InterpSlot";
break;
1178 case ImmTyInterpAttr: OS <<
"InterpAttr";
break;
1179 case ImmTyInterpAttrChan: OS <<
"InterpAttrChan";
break;
1180 case ImmTyOpSel: OS <<
"OpSel";
break;
1181 case ImmTyOpSelHi: OS <<
"OpSelHi";
break;
1182 case ImmTyNegLo: OS <<
"NegLo";
break;
1183 case ImmTyNegHi: OS <<
"NegHi";
break;
1184 case ImmTySwizzle: OS <<
"Swizzle";
break;
1185 case ImmTyGprIdxMode: OS <<
"GprIdxMode";
break;
1186 case ImmTyHigh: OS <<
"High";
break;
1187 case ImmTyBLGP: OS <<
"BLGP";
break;
1188 case ImmTyCBSZ: OS <<
"CBSZ";
break;
1189 case ImmTyABID: OS <<
"ABID";
break;
1190 case ImmTyEndpgm: OS <<
"Endpgm";
break;
1191 case ImmTyWaitVDST: OS <<
"WaitVDST";
break;
1192 case ImmTyWaitEXP: OS <<
"WaitEXP";
break;
1193 case ImmTyWaitVAVDst: OS <<
"WaitVAVDst";
break;
1194 case ImmTyWaitVMVSrc: OS <<
"WaitVMVSrc";
break;
1195 case ImmTyBitOp3: OS <<
"BitOp3";
break;
1196 case ImmTyMatrixAFMT: OS <<
"ImmTyMatrixAFMT";
break;
1197 case ImmTyMatrixBFMT: OS <<
"ImmTyMatrixBFMT";
break;
1198 case ImmTyMatrixAScale: OS <<
"ImmTyMatrixAScale";
break;
1199 case ImmTyMatrixBScale: OS <<
"ImmTyMatrixBScale";
break;
1200 case ImmTyMatrixAScaleFmt: OS <<
"ImmTyMatrixAScaleFmt";
break;
1201 case ImmTyMatrixBScaleFmt: OS <<
"ImmTyMatrixBScaleFmt";
break;
1202 case ImmTyMatrixAReuse: OS <<
"ImmTyMatrixAReuse";
break;
1203 case ImmTyMatrixBReuse: OS <<
"ImmTyMatrixBReuse";
break;
1204 case ImmTyScaleSel: OS <<
"ScaleSel" ;
break;
1205 case ImmTyByteSel: OS <<
"ByteSel" ;
break;
1210 void print(raw_ostream &OS,
const MCAsmInfo &MAI)
const override {
1214 <<
" mods: " <<
Reg.Mods <<
'>';
1218 if (getImmTy() != ImmTyNone) {
1219 OS <<
" type: "; printImmTy(OS, getImmTy());
1221 OS <<
" mods: " <<
Imm.Mods <<
'>';
1234 static AMDGPUOperand::Ptr CreateImm(
const AMDGPUAsmParser *AsmParser,
1235 int64_t Val, SMLoc Loc,
1236 ImmTy
Type = ImmTyNone,
1237 bool IsFPImm =
false) {
1238 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1240 Op->Imm.IsFPImm = IsFPImm;
1242 Op->Imm.Mods = Modifiers();
1248 static AMDGPUOperand::Ptr CreateToken(
const AMDGPUAsmParser *AsmParser,
1249 StringRef Str, SMLoc Loc,
1250 bool HasExplicitEncodingSize =
true) {
1251 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1252 Res->Tok.Data = Str.data();
1253 Res->Tok.Length = Str.size();
1254 Res->StartLoc = Loc;
1259 static AMDGPUOperand::Ptr CreateReg(
const AMDGPUAsmParser *AsmParser,
1260 MCRegister
Reg, SMLoc S, SMLoc
E) {
1261 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1262 Op->Reg.RegNo =
Reg;
1263 Op->Reg.Mods = Modifiers();
1269 static AMDGPUOperand::Ptr CreateExpr(
const AMDGPUAsmParser *AsmParser,
1270 const class MCExpr *Expr, SMLoc S) {
1271 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1280 OS <<
"abs:" << Mods.Abs <<
" neg: " << Mods.Neg <<
" sext:" << Mods.Sext;
1289#define GET_REGISTER_MATCHER
1290#include "AMDGPUGenAsmMatcher.inc"
1291#undef GET_REGISTER_MATCHER
1292#undef GET_SUBTARGET_FEATURE_NAME
1297class KernelScopeInfo {
1298 int SgprIndexUnusedMin = -1;
1299 int VgprIndexUnusedMin = -1;
1300 int AgprIndexUnusedMin = -1;
1304 void usesSgprAt(
int i) {
1305 if (i >= SgprIndexUnusedMin) {
1306 SgprIndexUnusedMin = ++i;
1309 Ctx->getOrCreateSymbol(
Twine(
".kernel.sgpr_count"));
1315 void usesVgprAt(
int i) {
1316 if (i >= VgprIndexUnusedMin) {
1317 VgprIndexUnusedMin = ++i;
1320 Ctx->getOrCreateSymbol(
Twine(
".kernel.vgpr_count"));
1322 VgprIndexUnusedMin);
1328 void usesAgprAt(
int i) {
1333 if (i >= AgprIndexUnusedMin) {
1334 AgprIndexUnusedMin = ++i;
1337 Ctx->getOrCreateSymbol(
Twine(
".kernel.agpr_count"));
1342 Ctx->getOrCreateSymbol(
Twine(
".kernel.vgpr_count"));
1344 VgprIndexUnusedMin);
1351 KernelScopeInfo() =
default;
1355 MSTI = Ctx->getSubtargetInfo();
1357 usesSgprAt(SgprIndexUnusedMin = -1);
1358 usesVgprAt(VgprIndexUnusedMin = -1);
1360 usesAgprAt(AgprIndexUnusedMin = -1);
1364 void usesRegister(RegisterKind RegKind,
unsigned DwordRegIndex,
1365 unsigned RegWidth) {
1368 usesSgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1371 usesAgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1374 usesVgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1383 MCAsmParser &Parser;
1385 unsigned ForcedEncodingSize = 0;
1386 bool ForcedDPP =
false;
1387 bool ForcedSDWA =
false;
1388 KernelScopeInfo KernelScope;
1389 const unsigned HwMode;
1394#define GET_ASSEMBLER_HEADER
1395#include "AMDGPUGenAsmMatcher.inc"
1400 unsigned getRegOperandSize(
const MCInstrDesc &
Desc,
unsigned OpNo)
const {
1402 int16_t RCID = MII.getOpRegClassID(
Desc.operands()[OpNo], HwMode);
1406 std::optional<AMDGPU::InfoSectionData> InfoData;
1409 void createConstantSymbol(StringRef Id, int64_t Val);
1411 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1412 bool OutOfRangeError(SMRange
Range);
1428 bool calculateGPRBlocks(
const FeatureBitset &Features,
const MCExpr *VCCUsed,
1429 const MCExpr *FlatScrUsed,
bool XNACKUsed,
1430 std::optional<bool> EnableWavefrontSize32,
1431 const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1432 const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1433 const MCExpr *&VGPRBlocks,
const MCExpr *&SGPRBlocks);
1434 bool ParseDirectiveAMDGCNTarget();
1435 bool ParseDirectiveAMDHSACodeObjectVersion();
1436 bool ParseDirectiveAMDHSAKernel();
1437 bool ParseAMDKernelCodeTValue(StringRef
ID, AMDGPUMCKernelCodeT &Header);
1438 bool ParseDirectiveAMDKernelCodeT();
1440 bool subtargetHasRegister(
const MCRegisterInfo &MRI, MCRegister
Reg);
1441 bool ParseDirectiveAMDGPUHsaKernel();
1443 bool ParseDirectiveISAVersion();
1444 bool ParseDirectiveHSAMetadata();
1445 bool ParseDirectivePALMetadataBegin();
1446 bool ParseDirectivePALMetadata();
1447 bool ParseDirectiveAMDGPULDS();
1448 bool ParseDirectiveAMDGPUInfo();
1452 bool ParseToEndDirective(
const char *AssemblerDirectiveBegin,
1453 const char *AssemblerDirectiveEnd,
1454 std::string &CollectString);
1456 bool AddNextRegisterToList(MCRegister &
Reg,
unsigned &RegWidth,
1457 RegisterKind RegKind, MCRegister Reg1,
1458 RegisterKind RegKind1, SMLoc Loc);
1459 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &
Reg,
1460 unsigned &RegNum,
unsigned &RegWidth,
1461 bool RestoreOnFailure =
false);
1462 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &
Reg,
1463 unsigned &RegNum,
unsigned &RegWidth,
1464 SmallVectorImpl<AsmToken> &Tokens);
1465 MCRegister ParseRegularReg(RegisterKind &RegKind,
unsigned &RegNum,
1467 SmallVectorImpl<AsmToken> &Tokens);
1468 MCRegister ParseSpecialReg(RegisterKind &RegKind,
unsigned &RegNum,
1470 SmallVectorImpl<AsmToken> &Tokens);
1471 MCRegister ParseRegList(RegisterKind &RegKind,
unsigned &RegNum,
1473 SmallVectorImpl<AsmToken> &Tokens);
1474 bool ParseRegRange(
unsigned &Num,
unsigned &Width,
unsigned &SubReg);
1475 MCRegister getRegularReg(RegisterKind RegKind,
unsigned RegNum,
1476 unsigned SubReg,
unsigned RegWidth, SMLoc Loc);
1479 bool isRegister(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1480 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1481 void initializeGprCountSymbol(RegisterKind RegKind);
1482 bool updateGprCountSymbols(RegisterKind RegKind,
unsigned DwordRegIndex,
1484 void cvtMubufImpl(MCInst &Inst,
const OperandVector &Operands,
1489 OperandMode_Default,
1493 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1495 AMDGPUAsmParser(
const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1496 const MCInstrInfo &MII)
1497 : MCTargetAsmParser(STI, MII), Parser(_Parser),
1498 HwMode(STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo)) {
1501 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1505 createConstantSymbol(
".amdgcn.gfx_generation_number",
ISA.Major);
1506 createConstantSymbol(
".amdgcn.gfx_generation_minor",
ISA.Minor);
1507 createConstantSymbol(
".amdgcn.gfx_generation_stepping",
ISA.Stepping);
1509 createConstantSymbol(
".option.machine_version_major",
ISA.Major);
1510 createConstantSymbol(
".option.machine_version_minor",
ISA.Minor);
1511 createConstantSymbol(
".option.machine_version_stepping",
ISA.Stepping);
1514 initializeGprCountSymbol(IS_VGPR);
1515 initializeGprCountSymbol(IS_SGPR);
1520 createConstantSymbol(Symbol, Code);
1522 createConstantSymbol(
"UC_VERSION_W64_BIT", 0x2000);
1523 createConstantSymbol(
"UC_VERSION_W32_BIT", 0x4000);
1524 createConstantSymbol(
"UC_VERSION_MDP_BIT", 0x8000);
1602 bool isWave32()
const {
return getAvailableFeatures()[Feature_isWave32Bit]; }
1604 bool isWave64()
const {
return getAvailableFeatures()[Feature_isWave64Bit]; }
1606 bool hasInv2PiInlineImm()
const {
1607 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1610 bool has64BitLiterals()
const {
1611 return getFeatureBits()[AMDGPU::Feature64BitLiterals];
1614 bool hasFlatOffsets()
const {
1615 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1618 bool hasTrue16Insts()
const {
1619 return getFeatureBits()[AMDGPU::FeatureTrue16BitInsts];
1623 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1626 bool hasSGPR102_SGPR103()
const {
1630 bool hasSGPR104_SGPR105()
const {
return isGFX10Plus(); }
1633 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1636 bool hasPartialNSAEncoding()
const {
1637 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1640 bool hasGloballyAddressableScratch()
const {
1641 return getFeatureBits()[AMDGPU::FeatureGloballyAddressableScratch];
1654 AMDGPUTargetStreamer &getTargetStreamer() {
1655 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1656 return static_cast<AMDGPUTargetStreamer &
>(TS);
1662 return const_cast<AMDGPUAsmParser *
>(
this)->MCTargetAsmParser::getContext();
1665 const MCRegisterInfo *getMRI()
const {
1669 const MCInstrInfo *getMII()
const {
1675 const FeatureBitset &getFeatureBits()
const {
1676 return getSTI().getFeatureBits();
1679 void setForcedEncodingSize(
unsigned Size) { ForcedEncodingSize =
Size; }
1680 void setForcedDPP(
bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1681 void setForcedSDWA(
bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1683 unsigned getForcedEncodingSize()
const {
return ForcedEncodingSize; }
1684 bool isForcedVOP3()
const {
return ForcedEncodingSize == 64; }
1685 bool isForcedDPP()
const {
return ForcedDPP; }
1686 bool isForcedSDWA()
const {
return ForcedSDWA; }
1687 ArrayRef<unsigned> getMatchedVariants()
const;
1688 StringRef getMatchedVariantName()
const;
1690 std::unique_ptr<AMDGPUOperand> parseRegister(
bool RestoreOnFailure =
false);
1691 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1692 bool RestoreOnFailure);
1693 bool parseRegister(MCRegister &
Reg, SMLoc &StartLoc, SMLoc &EndLoc)
override;
1694 ParseStatus tryParseRegister(MCRegister &
Reg, SMLoc &StartLoc,
1695 SMLoc &EndLoc)
override;
1696 unsigned checkTargetMatchPredicate(MCInst &Inst)
override;
1697 unsigned validateTargetOperandClass(MCParsedAsmOperand &
Op,
1698 unsigned Kind)
override;
1699 bool matchAndEmitInstruction(SMLoc IDLoc,
unsigned &Opcode,
1701 uint64_t &ErrorInfo,
1702 bool MatchingInlineAsm)
override;
1703 bool ParseDirective(AsmToken DirectiveID)
override;
1704 void onEndOfFile()
override;
1705 ParseStatus parseOperand(
OperandVector &Operands, StringRef Mnemonic,
1706 OperandMode
Mode = OperandMode_Default);
1707 StringRef parseMnemonicSuffix(StringRef Name);
1708 bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
1712 ParseStatus parseTokenOp(StringRef Name,
OperandVector &Operands);
1714 ParseStatus parseIntWithPrefix(
const char *Prefix, int64_t &
Int);
1717 parseIntWithPrefix(
const char *Prefix,
OperandVector &Operands,
1718 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1719 std::function<
bool(int64_t &)> ConvertResult =
nullptr);
1721 ParseStatus parseOperandArrayWithPrefix(
1723 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1724 bool (*ConvertResult)(int64_t &) =
nullptr);
1728 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1729 bool IgnoreNegative =
false);
1730 unsigned getCPolKind(StringRef Id, StringRef Mnemo,
bool &Disabling)
const;
1732 ParseStatus parseScope(
OperandVector &Operands, int64_t &Scope);
1734 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &
Value,
1736 ParseStatus parseStringOrIntWithPrefix(
OperandVector &Operands,
1738 ArrayRef<const char *> Ids,
1740 ParseStatus parseStringOrIntWithPrefix(
OperandVector &Operands,
1742 ArrayRef<const char *> Ids,
1743 AMDGPUOperand::ImmTy
Type);
1746 bool isOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1747 bool isRegOrOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1748 bool isNamedOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1749 bool isOpcodeModifierWithVal(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1750 bool parseSP3NegModifier();
1751 ParseStatus parseImm(
OperandVector &Operands,
bool HasSP3AbsModifier =
false,
1754 ParseStatus parseRegOrImm(
OperandVector &Operands,
bool HasSP3AbsMod =
false,
1756 ParseStatus parseRegOrImmWithFPInputMods(
OperandVector &Operands,
1757 bool AllowImm =
true);
1758 ParseStatus parseRegOrImmWithIntInputMods(
OperandVector &Operands,
1759 bool AllowImm =
true);
1760 ParseStatus parseRegWithFPInputMods(
OperandVector &Operands);
1761 ParseStatus parseRegWithIntInputMods(
OperandVector &Operands);
1764 AMDGPUOperand::ImmTy ImmTy);
1768 ParseStatus tryParseMatrixFMT(
OperandVector &Operands, StringRef Name,
1769 AMDGPUOperand::ImmTy
Type);
1772 ParseStatus tryParseMatrixScale(
OperandVector &Operands, StringRef Name,
1773 AMDGPUOperand::ImmTy
Type);
1776 ParseStatus tryParseMatrixScaleFmt(
OperandVector &Operands, StringRef Name,
1777 AMDGPUOperand::ImmTy
Type);
1781 ParseStatus parseDfmtNfmt(int64_t &
Format);
1782 ParseStatus parseUfmt(int64_t &
Format);
1783 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1785 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1788 ParseStatus parseSymbolicOrNumericFormat(int64_t &
Format);
1789 ParseStatus parseNumericFormat(int64_t &
Format);
1793 bool tryParseFmt(
const char *Pref, int64_t MaxVal, int64_t &Val);
1794 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1798 bool parseCnt(int64_t &IntVal);
1801 bool parseDepCtr(int64_t &IntVal,
unsigned &Mask);
1802 void depCtrError(SMLoc Loc,
int ErrorId, StringRef DepCtrName);
1805 bool parseDelay(int64_t &Delay);
1811 struct OperandInfoTy {
1814 bool IsSymbolic =
false;
1815 bool IsDefined =
false;
1817 constexpr OperandInfoTy(int64_t Val) : Val(Val) {}
1820 struct StructuredOpField : OperandInfoTy {
1824 bool IsDefined =
false;
1826 constexpr StructuredOpField(StringLiteral Id, StringLiteral Desc,
1827 unsigned Width, int64_t
Default)
1828 : OperandInfoTy(
Default), Id(Id), Desc(Desc), Width(Width) {}
1829 virtual ~StructuredOpField() =
default;
1831 bool Error(AMDGPUAsmParser &Parser,
const Twine &Err)
const {
1832 Parser.Error(Loc,
"invalid " + Desc +
": " + Err);
1836 virtual bool validate(AMDGPUAsmParser &Parser)
const {
1838 return Error(Parser,
"not supported on this GPU");
1840 return Error(Parser,
"only " + Twine(Width) +
"-bit values are legal");
1848 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &
Op, OperandInfoTy &Stream);
1849 bool validateSendMsg(
const OperandInfoTy &Msg,
1850 const OperandInfoTy &
Op,
1851 const OperandInfoTy &Stream);
1853 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &
Offset,
1854 OperandInfoTy &Width);
1856 const AMDGPUOperand &findMCOperand(
const OperandVector &Operands,
1859 static SMLoc getLaterLoc(SMLoc a, SMLoc b);
1861 SMLoc getFlatOffsetLoc(
const OperandVector &Operands)
const;
1862 SMLoc getSMEMOffsetLoc(
const OperandVector &Operands)
const;
1865 SMLoc getOperandLoc(
const OperandVector &Operands,
int MCOpIdx)
const;
1866 SMLoc getOperandLoc(std::function<
bool(
const AMDGPUOperand&)>
Test,
1868 SMLoc getImmLoc(AMDGPUOperand::ImmTy
Type,
1872 bool validateInstruction(
const MCInst &Inst, SMLoc IDLoc,
1874 bool validateOffset(
const MCInst &Inst,
const OperandVector &Operands);
1875 bool validateFlatOffset(
const MCInst &Inst,
const OperandVector &Operands);
1876 bool validateSMEMOffset(
const MCInst &Inst,
const OperandVector &Operands);
1877 bool validateSOPLiteral(
const MCInst &Inst,
const OperandVector &Operands);
1878 bool validateConstantBusLimitations(
const MCInst &Inst,
const OperandVector &Operands);
1879 std::optional<unsigned> checkVOPDRegBankConstraints(
const MCInst &Inst,
1881 bool validateVOPD(
const MCInst &Inst,
const OperandVector &Operands);
1882 bool tryVOPD(
const MCInst &Inst);
1883 bool tryVOPD3(
const MCInst &Inst);
1884 bool tryAnotherVOPDEncoding(
const MCInst &Inst);
1886 bool validateIntClampSupported(
const MCInst &Inst);
1887 bool validateMIMGAtomicDMask(
const MCInst &Inst);
1888 bool validateMIMGGatherDMask(
const MCInst &Inst);
1889 bool validateMovrels(
const MCInst &Inst,
const OperandVector &Operands);
1890 bool validateMIMGDataSize(
const MCInst &Inst, SMLoc IDLoc);
1891 bool validateMIMGAddrSize(
const MCInst &Inst, SMLoc IDLoc);
1892 bool validateMIMGD16(
const MCInst &Inst);
1893 bool validateMIMGDim(
const MCInst &Inst,
const OperandVector &Operands);
1894 bool validateTensorR128(
const MCInst &Inst);
1895 bool validateMIMGMSAA(
const MCInst &Inst);
1896 bool validateOpSel(
const MCInst &Inst);
1897 bool validateTrue16OpSel(
const MCInst &Inst);
1898 bool validateNeg(
const MCInst &Inst, AMDGPU::OpName OpName);
1899 bool validateDPP(
const MCInst &Inst,
const OperandVector &Operands);
1900 bool validateVccOperand(MCRegister
Reg)
const;
1901 bool validateVOPLiteral(
const MCInst &Inst,
const OperandVector &Operands);
1902 bool validateMAIAccWrite(
const MCInst &Inst,
const OperandVector &Operands);
1903 bool validateMAISrc2(
const MCInst &Inst,
const OperandVector &Operands);
1904 bool validateMFMA(
const MCInst &Inst,
const OperandVector &Operands);
1905 bool validateAGPRLdSt(
const MCInst &Inst)
const;
1906 bool validateVGPRAlign(
const MCInst &Inst)
const;
1907 bool validateBLGP(
const MCInst &Inst,
const OperandVector &Operands);
1908 bool validateDS(
const MCInst &Inst,
const OperandVector &Operands);
1909 bool validateGWS(
const MCInst &Inst,
const OperandVector &Operands);
1910 bool validateDivScale(
const MCInst &Inst);
1911 bool validateWaitCnt(
const MCInst &Inst,
const OperandVector &Operands);
1912 bool validateCoherencyBits(
const MCInst &Inst,
const OperandVector &Operands,
1914 bool validateTHAndScopeBits(
const MCInst &Inst,
const OperandVector &Operands,
1915 const unsigned CPol);
1916 bool validateTFE(
const MCInst &Inst,
const OperandVector &Operands);
1917 bool validateLdsDirect(
const MCInst &Inst,
const OperandVector &Operands);
1918 bool validateWMMA(
const MCInst &Inst,
const OperandVector &Operands);
1919 unsigned getConstantBusLimit(
unsigned Opcode)
const;
1920 bool usesConstantBus(
const MCInst &Inst,
unsigned OpIdx);
1921 bool isInlineConstant(
const MCInst &Inst,
unsigned OpIdx)
const;
1922 MCRegister findImplicitSGPRReadInVOP(
const MCInst &Inst)
const;
1924 bool isSupportedMnemo(StringRef Mnemo,
1925 const FeatureBitset &FBS);
1926 bool isSupportedMnemo(StringRef Mnemo,
1927 const FeatureBitset &FBS,
1928 ArrayRef<unsigned> Variants);
1929 bool checkUnsupportedInstruction(StringRef Name, SMLoc IDLoc);
1931 bool isId(
const StringRef Id)
const;
1932 bool isId(
const AsmToken &Token,
const StringRef Id)
const;
1934 StringRef getId()
const;
1935 bool trySkipId(
const StringRef Id);
1936 bool trySkipId(
const StringRef Pref,
const StringRef Id);
1940 bool parseString(StringRef &Val,
const StringRef ErrMsg =
"expected a string");
1941 bool parseId(StringRef &Val,
const StringRef ErrMsg =
"");
1947 StringRef getTokenStr()
const;
1948 AsmToken peekToken(
bool ShouldSkipSpace =
true);
1950 SMLoc getLoc()
const;
1954 void onBeginOfFile()
override;
1955 bool parsePrimaryExpr(
const MCExpr *&Res, SMLoc &EndLoc)
override;
1957 ParseStatus parseCustomOperand(
OperandVector &Operands,
unsigned MCK);
1967 bool parseSwizzleOperand(int64_t &
Op,
const unsigned MinVal,
1968 const unsigned MaxVal,
const Twine &ErrMsg,
1970 bool parseSwizzleOperands(
const unsigned OpNum, int64_t*
Op,
1971 const unsigned MinVal,
1972 const unsigned MaxVal,
1973 const StringRef ErrMsg);
1975 bool parseSwizzleOffset(int64_t &
Imm);
1976 bool parseSwizzleMacro(int64_t &
Imm);
1977 bool parseSwizzleQuadPerm(int64_t &
Imm);
1978 bool parseSwizzleBitmaskPerm(int64_t &
Imm);
1979 bool parseSwizzleBroadcast(int64_t &
Imm);
1980 bool parseSwizzleSwap(int64_t &
Imm);
1981 bool parseSwizzleReverse(int64_t &
Imm);
1982 bool parseSwizzleFFT(int64_t &
Imm);
1983 bool parseSwizzleRotate(int64_t &
Imm);
1986 int64_t parseGPRIdxMacro();
1988 void cvtMubuf(MCInst &Inst,
const OperandVector &Operands) { cvtMubufImpl(Inst, Operands,
false); }
1989 void cvtMubufAtomic(MCInst &Inst,
const OperandVector &Operands) { cvtMubufImpl(Inst, Operands,
true); }
1994 OptionalImmIndexMap &OptionalIdx);
1995 void cvtScaledMFMA(MCInst &Inst,
const OperandVector &Operands);
1996 void cvtVOP3OpSel(MCInst &Inst,
const OperandVector &Operands);
1999 void cvtSWMMAC(MCInst &Inst,
const OperandVector &Operands);
2002 void cvtVOP3OpSel(MCInst &Inst,
const OperandVector &Operands,
2003 OptionalImmIndexMap &OptionalIdx);
2005 OptionalImmIndexMap &OptionalIdx);
2007 void cvtVOP3Interp(MCInst &Inst,
const OperandVector &Operands);
2008 void cvtVINTERP(MCInst &Inst,
const OperandVector &Operands);
2009 void cvtOpSelHelper(MCInst &Inst,
unsigned OpSel);
2011 bool parseDimId(
unsigned &Encoding);
2013 bool convertDppBoundCtrl(int64_t &BoundCtrl);
2016 bool isSupportedDPPCtrl(StringRef Ctrl,
const OperandVector &Operands);
2017 int64_t parseDPPCtrlSel(StringRef Ctrl);
2018 int64_t parseDPPCtrlPerm();
2019 void cvtDPP(MCInst &Inst,
const OperandVector &Operands,
bool IsDPP8 =
false);
2021 cvtDPP(Inst, Operands,
true);
2023 void cvtVOP3DPP(MCInst &Inst,
const OperandVector &Operands,
2024 bool IsDPP8 =
false);
2025 void cvtVOP3DPP8(MCInst &Inst,
const OperandVector &Operands) {
2026 cvtVOP3DPP(Inst, Operands,
true);
2029 ParseStatus parseSDWASel(
OperandVector &Operands, StringRef Prefix,
2030 AMDGPUOperand::ImmTy
Type);
2032 void cvtSdwaVOP1(MCInst &Inst,
const OperandVector &Operands);
2033 void cvtSdwaVOP2(MCInst &Inst,
const OperandVector &Operands);
2034 void cvtSdwaVOP2b(MCInst &Inst,
const OperandVector &Operands);
2035 void cvtSdwaVOP2e(MCInst &Inst,
const OperandVector &Operands);
2036 void cvtSdwaVOPC(MCInst &Inst,
const OperandVector &Operands);
2038 enum class SDWAInstType :
unsigned {
VOP1 = 0,
VOP2 = 1,
VOPC = 2 };
2041 SDWAInstType BasicInstType,
bool SkipDstVcc =
false,
2042 bool SkipSrcVcc =
false);
2153bool AMDGPUOperand::isInlinableImm(
MVT type)
const {
2163 if (!isImmTy(ImmTyNone)) {
2168 if (getModifiers().
Lit != LitModifier::None)
2178 if (type == MVT::f64 || type == MVT::i64) {
2180 AsmParser->hasInv2PiInlineImm());
2183 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64,
Imm.Val));
2202 APFloat::rmNearestTiesToEven, &Lost);
2209 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2211 AsmParser->hasInv2PiInlineImm());
2216 static_cast<int32_t
>(FPLiteral.bitcastToAPInt().getZExtValue()),
2217 AsmParser->hasInv2PiInlineImm());
2221 if (type == MVT::f64 || type == MVT::i64) {
2223 AsmParser->hasInv2PiInlineImm());
2232 static_cast<int16_t
>(
Literal.getLoBits(16).getSExtValue()),
2233 type, AsmParser->hasInv2PiInlineImm());
2237 static_cast<int32_t
>(
Literal.getLoBits(32).getZExtValue()),
2238 AsmParser->hasInv2PiInlineImm());
2241bool AMDGPUOperand::isLiteralImm(MVT type)
const {
2243 if (!isImmTy(ImmTyNone)) {
2248 (type == MVT::i64 || type == MVT::f64) && AsmParser->has64BitLiterals();
2253 if (type == MVT::f64 && hasFPModifiers()) {
2273 if (type == MVT::f64) {
2278 if (type == MVT::i64) {
2291 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2292 : (type == MVT::v2i16) ? MVT::f32
2293 : (type == MVT::v2f32) ? MVT::f32
2296 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64,
Imm.Val));
2300bool AMDGPUOperand::isRegClass(
unsigned RCID)
const {
2301 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(
getReg());
2304bool AMDGPUOperand::isVRegWithInputMods()
const {
2305 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2307 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2308 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2311template <
bool IsFake16>
2312bool AMDGPUOperand::isT16_Lo128VRegWithInputMods()
const {
2313 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2314 : AMDGPU::VGPR_16_Lo128RegClassID);
2317template <
bool IsFake16>
bool AMDGPUOperand::isT16VRegWithInputMods()
const {
2318 return isRegClass(IsFake16 ? AMDGPU::VGPR_32RegClassID
2319 : AMDGPU::VGPR_16RegClassID);
2322bool AMDGPUOperand::isSDWAOperand(MVT type)
const {
2323 if (AsmParser->isVI())
2325 if (AsmParser->isGFX9Plus())
2326 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2330bool AMDGPUOperand::isSDWAFP16Operand()
const {
2331 return isSDWAOperand(MVT::f16);
2334bool AMDGPUOperand::isSDWAFP32Operand()
const {
2335 return isSDWAOperand(MVT::f32);
2338bool AMDGPUOperand::isSDWAInt16Operand()
const {
2339 return isSDWAOperand(MVT::i16);
2342bool AMDGPUOperand::isSDWAInt32Operand()
const {
2343 return isSDWAOperand(MVT::i32);
2346bool AMDGPUOperand::isBoolReg()
const {
2347 return isReg() && ((AsmParser->isWave64() && isSCSrc_b64()) ||
2348 (AsmParser->isWave32() && isSCSrc_b32()));
2351uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val,
unsigned Size)
const
2353 assert(isImmTy(ImmTyNone) &&
Imm.Mods.hasFPModifiers());
2356 const uint64_t FpSignMask = (1ULL << (
Size * 8 - 1));
2368void AMDGPUOperand::addImmOperands(MCInst &Inst,
unsigned N,
bool ApplyModifiers)
const {
2378 addLiteralImmOperand(Inst,
Imm.Val,
2380 isImmTy(ImmTyNone) &&
Imm.Mods.hasFPModifiers());
2382 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2387void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const {
2388 const auto& InstDesc = AsmParser->getMII()->get(Inst.
getOpcode());
2393 if (ApplyModifiers) {
2396 Val = applyInputFPModifiers(Val,
Size);
2400 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2402 bool CanUse64BitLiterals =
2403 AsmParser->has64BitLiterals() &&
2406 MCContext &Ctx = AsmParser->getContext();
2417 if (
Lit == LitModifier::None &&
2419 AsmParser->hasInv2PiInlineImm())) {
2427 bool HasMandatoryLiteral =
2430 if (
Literal.getLoBits(32) != 0 &&
2431 (InstDesc.getSize() != 4 || !AsmParser->has64BitLiterals()) &&
2432 !HasMandatoryLiteral) {
2433 const_cast<AMDGPUAsmParser *
>(AsmParser)->
Warning(
2435 "Can't encode literal as exact 64-bit floating-point operand. "
2436 "Low 32-bits will be set to zero");
2437 Val &= 0xffffffff00000000u;
2443 if (CanUse64BitLiterals &&
Lit == LitModifier::None &&
2449 Lit = LitModifier::Lit64;
2450 }
else if (
Lit == LitModifier::Lit) {
2464 if (CanUse64BitLiterals &&
Lit == LitModifier::None &&
2466 Lit = LitModifier::Lit64;
2473 if (
Lit == LitModifier::None && AsmParser->hasInv2PiInlineImm() &&
2474 Literal == 0x3fc45f306725feed) {
2509 APFloat::rmNearestTiesToEven, &lost);
2513 Val = FPLiteral.bitcastToAPInt().getZExtValue();
2520 if (
Lit != LitModifier::None) {
2551 if (
Lit == LitModifier::None &&
2561 if (!AsmParser->has64BitLiterals() ||
Lit == LitModifier::Lit)
2569 if (
Lit == LitModifier::None &&
2577 if (!AsmParser->has64BitLiterals()) {
2578 Val =
static_cast<uint64_t
>(Val) << 32;
2585 if (
Lit == LitModifier::Lit ||
2587 Val =
static_cast<uint64_t
>(Val) << 32;
2591 if (
Lit == LitModifier::Lit)
2617 if (
Lit != LitModifier::None) {
2625void AMDGPUOperand::addRegOperands(MCInst &Inst,
unsigned N)
const {
2630bool AMDGPUOperand::isInlineValue()
const {
2638void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2649 if (Is == IS_VGPR) {
2653 return AMDGPU::VGPR_32RegClassID;
2655 return AMDGPU::VReg_64RegClassID;
2657 return AMDGPU::VReg_96RegClassID;
2659 return AMDGPU::VReg_128RegClassID;
2661 return AMDGPU::VReg_160RegClassID;
2663 return AMDGPU::VReg_192RegClassID;
2665 return AMDGPU::VReg_224RegClassID;
2667 return AMDGPU::VReg_256RegClassID;
2669 return AMDGPU::VReg_288RegClassID;
2671 return AMDGPU::VReg_320RegClassID;
2673 return AMDGPU::VReg_352RegClassID;
2675 return AMDGPU::VReg_384RegClassID;
2677 return AMDGPU::VReg_512RegClassID;
2679 return AMDGPU::VReg_1024RegClassID;
2681 }
else if (Is == IS_TTMP) {
2685 return AMDGPU::TTMP_32RegClassID;
2687 return AMDGPU::TTMP_64RegClassID;
2689 return AMDGPU::TTMP_128RegClassID;
2691 return AMDGPU::TTMP_256RegClassID;
2693 return AMDGPU::TTMP_512RegClassID;
2695 }
else if (Is == IS_SGPR) {
2699 return AMDGPU::SGPR_32RegClassID;
2701 return AMDGPU::SGPR_64RegClassID;
2703 return AMDGPU::SGPR_96RegClassID;
2705 return AMDGPU::SGPR_128RegClassID;
2707 return AMDGPU::SGPR_160RegClassID;
2709 return AMDGPU::SGPR_192RegClassID;
2711 return AMDGPU::SGPR_224RegClassID;
2713 return AMDGPU::SGPR_256RegClassID;
2715 return AMDGPU::SGPR_288RegClassID;
2717 return AMDGPU::SGPR_320RegClassID;
2719 return AMDGPU::SGPR_352RegClassID;
2721 return AMDGPU::SGPR_384RegClassID;
2723 return AMDGPU::SGPR_512RegClassID;
2725 }
else if (Is == IS_AGPR) {
2729 return AMDGPU::AGPR_32RegClassID;
2731 return AMDGPU::AReg_64RegClassID;
2733 return AMDGPU::AReg_96RegClassID;
2735 return AMDGPU::AReg_128RegClassID;
2737 return AMDGPU::AReg_160RegClassID;
2739 return AMDGPU::AReg_192RegClassID;
2741 return AMDGPU::AReg_224RegClassID;
2743 return AMDGPU::AReg_256RegClassID;
2745 return AMDGPU::AReg_288RegClassID;
2747 return AMDGPU::AReg_320RegClassID;
2749 return AMDGPU::AReg_352RegClassID;
2751 return AMDGPU::AReg_384RegClassID;
2753 return AMDGPU::AReg_512RegClassID;
2755 return AMDGPU::AReg_1024RegClassID;
2763 .
Case(
"exec", AMDGPU::EXEC)
2764 .
Case(
"vcc", AMDGPU::VCC)
2765 .
Case(
"flat_scratch", AMDGPU::FLAT_SCR)
2766 .
Case(
"xnack_mask", AMDGPU::XNACK_MASK)
2767 .
Case(
"shared_base", AMDGPU::SRC_SHARED_BASE)
2768 .
Case(
"src_shared_base", AMDGPU::SRC_SHARED_BASE)
2769 .
Case(
"shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2770 .
Case(
"src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2771 .
Case(
"private_base", AMDGPU::SRC_PRIVATE_BASE)
2772 .
Case(
"src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2773 .
Case(
"private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2774 .
Case(
"src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2775 .
Case(
"src_flat_scratch_base_lo", AMDGPU::SRC_FLAT_SCRATCH_BASE_LO)
2776 .
Case(
"src_flat_scratch_base_hi", AMDGPU::SRC_FLAT_SCRATCH_BASE_HI)
2777 .
Case(
"pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2778 .
Case(
"src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2779 .
Case(
"lds_direct", AMDGPU::LDS_DIRECT)
2780 .
Case(
"src_lds_direct", AMDGPU::LDS_DIRECT)
2781 .
Case(
"m0", AMDGPU::M0)
2782 .
Case(
"vccz", AMDGPU::SRC_VCCZ)
2783 .
Case(
"src_vccz", AMDGPU::SRC_VCCZ)
2784 .
Case(
"execz", AMDGPU::SRC_EXECZ)
2785 .
Case(
"src_execz", AMDGPU::SRC_EXECZ)
2786 .
Case(
"scc", AMDGPU::SRC_SCC)
2787 .
Case(
"src_scc", AMDGPU::SRC_SCC)
2788 .
Case(
"tba", AMDGPU::TBA)
2789 .
Case(
"tma", AMDGPU::TMA)
2790 .
Case(
"flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2791 .
Case(
"flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2792 .
Case(
"xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2793 .
Case(
"xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2794 .
Case(
"vcc_lo", AMDGPU::VCC_LO)
2795 .
Case(
"vcc_hi", AMDGPU::VCC_HI)
2796 .
Case(
"exec_lo", AMDGPU::EXEC_LO)
2797 .
Case(
"exec_hi", AMDGPU::EXEC_HI)
2798 .
Case(
"tma_lo", AMDGPU::TMA_LO)
2799 .
Case(
"tma_hi", AMDGPU::TMA_HI)
2800 .
Case(
"tba_lo", AMDGPU::TBA_LO)
2801 .
Case(
"tba_hi", AMDGPU::TBA_HI)
2802 .
Case(
"pc", AMDGPU::PC_REG)
2803 .
Case(
"null", AMDGPU::SGPR_NULL)
2807bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2808 SMLoc &EndLoc,
bool RestoreOnFailure) {
2809 auto R = parseRegister();
2810 if (!R)
return true;
2812 RegNo =
R->getReg();
2813 StartLoc =
R->getStartLoc();
2814 EndLoc =
R->getEndLoc();
2818bool AMDGPUAsmParser::parseRegister(MCRegister &
Reg, SMLoc &StartLoc,
2820 return ParseRegister(
Reg, StartLoc, EndLoc,
false);
2823ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &
Reg, SMLoc &StartLoc,
2825 bool Result = ParseRegister(
Reg, StartLoc, EndLoc,
true);
2826 bool PendingErrors = getParser().hasPendingError();
2827 getParser().clearPendingErrors();
2835bool AMDGPUAsmParser::AddNextRegisterToList(MCRegister &
Reg,
unsigned &RegWidth,
2836 RegisterKind RegKind,
2838 RegisterKind RegKind1, SMLoc Loc) {
2840 if (RegKind == IS_SGPR) {
2841 unsigned RegIdx = (
Reg - AMDGPU::SGPR0) + RegWidth / 32;
2842 if ((RegIdx == 106 && Reg1 == AMDGPU::VCC_LO) ||
2843 (RegIdx == 107 && Reg1 == AMDGPU::VCC_HI)) {
2849 if (RegKind != RegKind1) {
2850 Error(Loc,
"registers in a list must be of the same kind");
2851 return MCRegister();
2856 if (
Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2861 if (
Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2862 Reg = AMDGPU::FLAT_SCR;
2866 if (
Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2867 Reg = AMDGPU::XNACK_MASK;
2871 if (
Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2876 if (
Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2881 if (
Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2886 Error(Loc,
"register does not fit in the list");
2892 if (Reg1 !=
Reg + RegWidth / 32) {
2893 Error(Loc,
"registers in a list must have consecutive indices");
2911 {{
"ttmp"}, IS_TTMP},
2917 return Kind == IS_VGPR ||
2925 if (Str.starts_with(
Reg.Name))
2931 return !Str.getAsInteger(10, Num);
2935AMDGPUAsmParser::isRegister(
const AsmToken &Token,
2936 const AsmToken &NextToken)
const {
2951 StringRef RegSuffix = Str.substr(
RegName.size());
2952 if (!RegSuffix.
empty()) {
2970AMDGPUAsmParser::isRegister()
2972 return isRegister(
getToken(), peekToken());
2975MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
unsigned RegNum,
2976 unsigned SubReg,
unsigned RegWidth,
2980 unsigned AlignSize = 1;
2981 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2987 if (RegNum % AlignSize != 0) {
2988 Error(Loc,
"invalid register alignment");
2989 return MCRegister();
2992 unsigned RegIdx = RegNum / AlignSize;
2995 Error(Loc,
"invalid or unsupported register size");
2996 return MCRegister();
3000 const MCRegisterClass &RC =
TRI->getRegClass(RCID);
3001 if (RegIdx >= RC.
getNumRegs() || (RegKind == IS_VGPR && RegIdx > 255)) {
3002 Error(Loc,
"register index is out of range");
3003 return AMDGPU::NoRegister;
3006 if (RegKind == IS_VGPR && !
isGFX1250Plus() && RegIdx + RegWidth / 32 > 256) {
3007 Error(Loc,
"register index is out of range");
3008 return MCRegister();
3024bool AMDGPUAsmParser::ParseRegRange(
unsigned &Num,
unsigned &RegWidth,
3026 int64_t RegLo, RegHi;
3030 SMLoc FirstIdxLoc = getLoc();
3037 SecondIdxLoc = getLoc();
3048 Error(FirstIdxLoc,
"invalid register index");
3053 Error(SecondIdxLoc,
"invalid register index");
3057 if (RegLo > RegHi) {
3058 Error(FirstIdxLoc,
"first register index should not exceed second index");
3062 if (RegHi == RegLo) {
3063 StringRef RegSuffix = getTokenStr();
3064 if (RegSuffix ==
".l") {
3065 SubReg = AMDGPU::lo16;
3067 }
else if (RegSuffix ==
".h") {
3068 SubReg = AMDGPU::hi16;
3073 Num =
static_cast<unsigned>(RegLo);
3074 RegWidth = 32 * ((RegHi - RegLo) + 1);
3079MCRegister AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
3082 SmallVectorImpl<AsmToken> &Tokens) {
3088 RegKind = IS_SPECIAL;
3095MCRegister AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
3098 SmallVectorImpl<AsmToken> &Tokens) {
3100 StringRef
RegName = getTokenStr();
3101 auto Loc = getLoc();
3105 Error(Loc,
"invalid register name");
3106 return MCRegister();
3114 unsigned SubReg = NoSubRegister;
3115 bool IsRange =
false;
3116 if (!RegSuffix.
empty()) {
3118 SubReg = AMDGPU::lo16;
3120 SubReg = AMDGPU::hi16;
3124 Error(Loc,
"invalid register index");
3125 return MCRegister();
3131 if (!ParseRegRange(RegNum, RegWidth, SubReg))
3132 return MCRegister();
3136 MCRegister
Reg = getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
3137 const MCRegisterInfo &
TRI = *
getContext().getRegisterInfo();
3138 if (RegKind == IS_SGPR && IsRange
3139 ? (
TRI.isSubRegister(
Reg, VCC_LO) ||
TRI.isSubRegister(
Reg, VCC_HI))
3140 : (
Reg == VCC_LO ||
Reg == VCC_HI)) {
3141 Error(Loc,
"register index is out of range");
3142 return MCRegister();
3148MCRegister AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
3149 unsigned &RegNum,
unsigned &RegWidth,
3150 SmallVectorImpl<AsmToken> &Tokens) {
3152 auto ListLoc = getLoc();
3155 "expected a register or a list of registers")) {
3156 return MCRegister();
3161 auto Loc = getLoc();
3162 if (!ParseAMDGPURegister(RegKind,
Reg, RegNum, RegWidth))
3163 return MCRegister();
3164 if (RegWidth != 32) {
3165 Error(Loc,
"expected a single 32-bit register");
3166 return MCRegister();
3170 RegisterKind NextRegKind;
3172 unsigned NextRegNum, NextRegWidth;
3175 if (!ParseAMDGPURegister(NextRegKind, NextReg,
3176 NextRegNum, NextRegWidth,
3178 return MCRegister();
3180 if (NextRegWidth != 32) {
3181 Error(Loc,
"expected a single 32-bit register");
3182 return MCRegister();
3184 if (!AddNextRegisterToList(
Reg, RegWidth, RegKind, NextReg, NextRegKind,
3186 return MCRegister();
3190 "expected a comma or a closing square bracket")) {
3191 return MCRegister();
3195 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3200bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3201 MCRegister &
Reg,
unsigned &RegNum,
3203 SmallVectorImpl<AsmToken> &Tokens) {
3204 auto Loc = getLoc();
3208 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3210 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3212 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3217 assert(Parser.hasPendingError());
3221 if (!subtargetHasRegister(*
TRI,
Reg)) {
3222 if (
Reg == AMDGPU::SGPR_NULL) {
3223 Error(Loc,
"'null' operand is not supported on this GPU");
3226 " register not available on this GPU");
3234bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3235 MCRegister &
Reg,
unsigned &RegNum,
3237 bool RestoreOnFailure ) {
3241 if (ParseAMDGPURegister(RegKind,
Reg, RegNum, RegWidth, Tokens)) {
3242 if (RestoreOnFailure) {
3243 while (!Tokens.
empty()) {
3252std::optional<StringRef>
3253AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3256 return StringRef(
".amdgcn.next_free_vgpr");
3258 return StringRef(
".amdgcn.next_free_sgpr");
3260 return std::nullopt;
3264void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3265 auto SymbolName = getGprCountSymbolName(RegKind);
3266 assert(SymbolName &&
"initializing invalid register kind");
3272bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3273 unsigned DwordRegIndex,
3274 unsigned RegWidth) {
3279 auto SymbolName = getGprCountSymbolName(RegKind);
3284 int64_t NewMax = DwordRegIndex +
divideCeil(RegWidth, 32) - 1;
3288 return !
Error(getLoc(),
3289 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3293 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3295 if (OldCount <= NewMax)
3301std::unique_ptr<AMDGPUOperand>
3302AMDGPUAsmParser::parseRegister(
bool RestoreOnFailure) {
3304 SMLoc StartLoc = Tok.getLoc();
3305 SMLoc EndLoc = Tok.getEndLoc();
3306 RegisterKind RegKind;
3308 unsigned RegNum, RegWidth;
3310 if (!ParseAMDGPURegister(RegKind,
Reg, RegNum, RegWidth)) {
3314 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3317 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3318 return AMDGPUOperand::CreateReg(
this,
Reg, StartLoc, EndLoc);
3321ParseStatus AMDGPUAsmParser::parseImm(
OperandVector &Operands,
3325 if (isRegister() || isModifier())
3328 if (
Lit == LitModifier::None) {
3329 if (trySkipId(
"lit"))
3330 Lit = LitModifier::Lit;
3331 else if (trySkipId(
"lit64"))
3332 Lit = LitModifier::Lit64;
3334 if (
Lit != LitModifier::None) {
3337 ParseStatus S = parseImm(Operands, HasSP3AbsModifier,
Lit);
3346 const auto& NextTok = peekToken();
3349 bool Negate =
false;
3357 AMDGPUOperand::Modifiers Mods;
3365 StringRef Num = getTokenStr();
3368 APFloat RealVal(APFloat::IEEEdouble());
3369 auto roundMode = APFloat::rmNearestTiesToEven;
3370 if (
errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3373 RealVal.changeSign();
3376 AMDGPUOperand::CreateImm(
this, RealVal.bitcastToAPInt().getZExtValue(), S,
3377 AMDGPUOperand::ImmTyNone,
true));
3378 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*Operands.
back());
3379 Op.setModifiers(Mods);
3388 if (HasSP3AbsModifier) {
3397 if (getParser().parsePrimaryExpr(Expr, EndLoc,
nullptr))
3400 if (Parser.parseExpression(Expr))
3404 if (Expr->evaluateAsAbsolute(IntVal)) {
3406 return Error(S,
"literal value out of range");
3407 Operands.
push_back(AMDGPUOperand::CreateImm(
this, IntVal, S));
3408 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*Operands.
back());
3409 Op.setModifiers(Mods);
3411 if (
Lit != LitModifier::None)
3413 Operands.
push_back(AMDGPUOperand::CreateExpr(
this, Expr, S));
3422ParseStatus AMDGPUAsmParser::parseReg(
OperandVector &Operands) {
3426 if (
auto R = parseRegister()) {
3434ParseStatus AMDGPUAsmParser::parseRegOrImm(
OperandVector &Operands,
3436 ParseStatus Res = parseReg(Operands);
3441 return parseImm(Operands, HasSP3AbsMod,
Lit);
3445AMDGPUAsmParser::isNamedOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3448 return str ==
"abs" || str ==
"neg" || str ==
"sext";
3454AMDGPUAsmParser::isOpcodeModifierWithVal(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3459AMDGPUAsmParser::isOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3460 return isNamedOperandModifier(Token, NextToken) || Token.
is(
AsmToken::Pipe);
3464AMDGPUAsmParser::isRegOrOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3465 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3482AMDGPUAsmParser::isModifier() {
3485 AsmToken NextToken[2];
3486 peekTokens(NextToken);
3488 return isOperandModifier(Tok, NextToken[0]) ||
3489 (Tok.
is(
AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3490 isOpcodeModifierWithVal(Tok, NextToken[0]);
3516AMDGPUAsmParser::parseSP3NegModifier() {
3518 AsmToken NextToken[2];
3519 peekTokens(NextToken);
3522 (isRegister(NextToken[0], NextToken[1]) ||
3524 isId(NextToken[0],
"abs"))) {
3533AMDGPUAsmParser::parseRegOrImmWithFPInputMods(
OperandVector &Operands,
3541 return Error(getLoc(),
"invalid syntax, expected 'neg' modifier");
3543 SP3Neg = parseSP3NegModifier();
3546 Neg = trySkipId(
"neg");
3548 return Error(Loc,
"expected register or immediate");
3552 Abs = trySkipId(
"abs");
3557 if (trySkipId(
"lit")) {
3558 Lit = LitModifier::Lit;
3561 }
else if (trySkipId(
"lit64")) {
3562 Lit = LitModifier::Lit64;
3565 if (!has64BitLiterals())
3566 return Error(Loc,
"lit64 is not supported on this GPU");
3572 return Error(Loc,
"expected register or immediate");
3576 Res = parseRegOrImm(Operands, SP3Abs,
Lit);
3578 Res = parseReg(Operands);
3581 return (SP3Neg || Neg || SP3Abs || Abs ||
Lit != LitModifier::None)
3585 if (
Lit != LitModifier::None && !Operands.
back()->isImm())
3586 Error(Loc,
"expected immediate with lit modifier");
3588 if (SP3Abs && !skipToken(
AsmToken::Pipe,
"expected vertical bar"))
3594 if (
Lit != LitModifier::None &&
3598 AMDGPUOperand::Modifiers Mods;
3599 Mods.Abs = Abs || SP3Abs;
3600 Mods.Neg = Neg || SP3Neg;
3603 if (Mods.hasFPModifiers() ||
Lit != LitModifier::None) {
3604 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*Operands.
back());
3606 return Error(
Op.getStartLoc(),
"expected an absolute expression");
3607 Op.setModifiers(Mods);
3613AMDGPUAsmParser::parseRegOrImmWithIntInputMods(
OperandVector &Operands,
3615 bool Sext = trySkipId(
"sext");
3616 if (Sext && !skipToken(
AsmToken::LParen,
"expected left paren after sext"))
3621 Res = parseRegOrImm(Operands);
3623 Res = parseReg(Operands);
3631 AMDGPUOperand::Modifiers Mods;
3634 if (Mods.hasIntModifiers()) {
3635 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*Operands.
back());
3637 return Error(
Op.getStartLoc(),
"expected an absolute expression");
3638 Op.setModifiers(Mods);
3644ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(
OperandVector &Operands) {
3645 return parseRegOrImmWithFPInputMods(Operands,
false);
3648ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(
OperandVector &Operands) {
3649 return parseRegOrImmWithIntInputMods(Operands,
false);
3652ParseStatus AMDGPUAsmParser::parseVReg32OrOff(
OperandVector &Operands) {
3653 auto Loc = getLoc();
3654 if (trySkipId(
"off")) {
3655 Operands.
push_back(AMDGPUOperand::CreateImm(
this, 0, Loc,
3656 AMDGPUOperand::ImmTyOff,
false));
3663 std::unique_ptr<AMDGPUOperand>
Reg = parseRegister();
3672unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3679 return Match_InvalidOperand;
3681 if (Inst.
getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3682 Inst.
getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3685 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::dst_sel);
3687 if (!
Op.isImm() ||
Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3688 return Match_InvalidOperand;
3696 if (tryAnotherVOPDEncoding(Inst))
3697 return Match_InvalidOperand;
3699 return Match_Success;
3703 static const unsigned Variants[] = {
3713ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants()
const {
3714 if (isForcedDPP() && isForcedVOP3()) {
3718 if (getForcedEncodingSize() == 32) {
3723 if (isForcedVOP3()) {
3728 if (isForcedSDWA()) {
3734 if (isForcedDPP()) {
3742StringRef AMDGPUAsmParser::getMatchedVariantName()
const {
3743 if (isForcedDPP() && isForcedVOP3())
3746 if (getForcedEncodingSize() == 32)
3762AMDGPUAsmParser::findImplicitSGPRReadInVOP(
const MCInst &Inst)
const {
3766 case AMDGPU::FLAT_SCR:
3768 case AMDGPU::VCC_LO:
3769 case AMDGPU::VCC_HI:
3776 return MCRegister();
3783bool AMDGPUAsmParser::isInlineConstant(
const MCInst &Inst,
3784 unsigned OpIdx)
const {
3841unsigned AMDGPUAsmParser::getConstantBusLimit(
unsigned Opcode)
const {
3847 case AMDGPU::V_LSHLREV_B64_e64:
3848 case AMDGPU::V_LSHLREV_B64_gfx10:
3849 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3850 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3851 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3852 case AMDGPU::V_LSHRREV_B64_e64:
3853 case AMDGPU::V_LSHRREV_B64_gfx10:
3854 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3855 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3856 case AMDGPU::V_ASHRREV_I64_e64:
3857 case AMDGPU::V_ASHRREV_I64_gfx10:
3858 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3859 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3860 case AMDGPU::V_LSHL_B64_e64:
3861 case AMDGPU::V_LSHR_B64_e64:
3862 case AMDGPU::V_ASHR_I64_e64:
3875 bool AddMandatoryLiterals =
false) {
3878 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3882 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immX) : -1;
3884 return {getNamedOperandIdx(Opcode, OpName::src0X),
3885 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3886 getNamedOperandIdx(Opcode, OpName::vsrc2X),
3887 getNamedOperandIdx(Opcode, OpName::src0Y),
3888 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3889 getNamedOperandIdx(Opcode, OpName::vsrc2Y),
3894 return {getNamedOperandIdx(Opcode, OpName::src0),
3895 getNamedOperandIdx(Opcode, OpName::src1),
3896 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3899bool AMDGPUAsmParser::usesConstantBus(
const MCInst &Inst,
unsigned OpIdx) {
3902 return !isInlineConstant(Inst,
OpIdx);
3909 return isSGPR(PReg,
TRI) && PReg != SGPR_NULL;
3920 const unsigned Opcode = Inst.
getOpcode();
3921 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3924 if (!LaneSelOp.
isReg())
3927 return LaneSelReg ==
M0 || LaneSelReg == M0_gfxpre11;
3930bool AMDGPUAsmParser::validateConstantBusLimitations(
3932 const unsigned Opcode = Inst.
getOpcode();
3933 const MCInstrDesc &
Desc = MII.
get(Opcode);
3934 MCRegister LastSGPR;
3935 unsigned ConstantBusUseCount = 0;
3936 unsigned NumLiterals = 0;
3937 unsigned LiteralSize;
3939 if (!(
Desc.TSFlags &
3954 SmallDenseSet<MCRegister> SGPRsUsed;
3955 MCRegister SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3957 SGPRsUsed.
insert(SGPRUsed);
3958 ++ConstantBusUseCount;
3963 unsigned ConstantBusLimit = getConstantBusLimit(Opcode);
3965 for (
int OpIdx : OpIndices) {
3970 if (usesConstantBus(Inst,
OpIdx)) {
3979 if (SGPRsUsed.
insert(LastSGPR).second) {
3980 ++ConstantBusUseCount;
4000 if (NumLiterals == 0) {
4003 }
else if (LiteralSize !=
Size) {
4009 if (ConstantBusUseCount + NumLiterals > ConstantBusLimit) {
4011 "invalid operand (violates constant bus restrictions)");
4018std::optional<unsigned>
4019AMDGPUAsmParser::checkVOPDRegBankConstraints(
const MCInst &Inst,
bool AsVOPD3) {
4021 const unsigned Opcode = Inst.
getOpcode();
4027 auto getVRegIdx = [&](unsigned,
unsigned OperandIdx) {
4028 const MCOperand &Opr = Inst.
getOperand(OperandIdx);
4037 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1170 ||
4038 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
4039 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
4040 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx13 ||
4041 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250 ||
4042 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx13;
4046 for (
auto OpName : {OpName::src0X, OpName::src0Y}) {
4047 int I = getNamedOperandIdx(Opcode, OpName);
4051 int64_t
Imm =
Op.getImm();
4057 for (
auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X,
4058 OpName::vsrc2Y, OpName::imm}) {
4059 int I = getNamedOperandIdx(Opcode, OpName);
4069 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(
4070 getVRegIdx, *
TRI, SkipSrc, AllowSameVGPR, AsVOPD3);
4072 return InvalidCompOprIdx;
4075bool AMDGPUAsmParser::validateVOPD(
const MCInst &Inst,
4082 for (
const std::unique_ptr<MCParsedAsmOperand> &Operand : Operands) {
4083 AMDGPUOperand &
Op = (AMDGPUOperand &)*Operand;
4084 if ((
Op.isRegKind() ||
Op.isImmTy(AMDGPUOperand::ImmTyNone)) &&
4086 Error(
Op.getStartLoc(),
"ABS not allowed in VOPD3 instructions");
4090 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3);
4091 if (!InvalidCompOprIdx.has_value())
4094 auto CompOprIdx = *InvalidCompOprIdx;
4097 std::max(InstInfo[
VOPD::X].getIndexInParsedOperands(CompOprIdx),
4098 InstInfo[
VOPD::Y].getIndexInParsedOperands(CompOprIdx));
4099 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
4101 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
4102 if (CompOprIdx == VOPD::Component::DST) {
4104 Error(Loc,
"dst registers must be distinct");
4106 Error(Loc,
"one dst register must be even and the other odd");
4108 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
4109 Error(Loc, Twine(
"src") + Twine(CompSrcIdx) +
4110 " operands must use different VGPR banks");
4118bool AMDGPUAsmParser::tryVOPD3(
const MCInst &Inst) {
4120 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst,
false);
4121 if (!InvalidCompOprIdx.has_value())
4125 InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst,
true);
4126 if (InvalidCompOprIdx.has_value()) {
4131 if (*InvalidCompOprIdx == VOPD::Component::DST)
4144bool AMDGPUAsmParser::tryVOPD(
const MCInst &Inst) {
4145 const unsigned Opcode = Inst.
getOpcode();
4155 if (
II[
VOPD::X].getOpcode() == AMDGPU::V_CNDMASK_B32_e32 ||
4156 II[
VOPD::Y].getOpcode() == AMDGPU::V_CNDMASK_B32_e32)
4160 for (
auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
4161 OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
4162 OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
4163 int I = getNamedOperandIdx(Opcode, OpName);
4170 return !tryVOPD3(Inst);
4175bool AMDGPUAsmParser::tryAnotherVOPDEncoding(
const MCInst &Inst) {
4176 const unsigned Opcode = Inst.
getOpcode();
4181 return tryVOPD(Inst);
4182 return tryVOPD3(Inst);
4185bool AMDGPUAsmParser::validateIntClampSupported(
const MCInst &Inst) {
4191 int ClampIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::clamp);
4202bool AMDGPUAsmParser::validateMIMGDataSize(
const MCInst &Inst,
SMLoc IDLoc) {
4210 int VDataIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
4211 int DMaskIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dmask);
4212 int TFEIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::tfe);
4220 unsigned VDataSize = getRegOperandSize(
Desc, VDataIdx);
4221 unsigned TFESize = (TFEIdx != -1 && Inst.
getOperand(TFEIdx).
getImm()) ? 1 : 0;
4226 bool IsPackedD16 =
false;
4230 int D16Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::d16);
4231 IsPackedD16 = D16Idx >= 0;
4233 DataSize = (DataSize + 1) / 2;
4236 if ((VDataSize / 4) == DataSize + TFESize)
4241 Modifiers = IsPackedD16 ?
"dmask and d16" :
"dmask";
4243 Modifiers = IsPackedD16 ?
"dmask, d16 and tfe" :
"dmask and tfe";
4245 Error(IDLoc,
Twine(
"image data size does not match ") + Modifiers);
4249bool AMDGPUAsmParser::validateMIMGAddrSize(
const MCInst &Inst, SMLoc IDLoc) {
4258 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4260 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
4262 ? AMDGPU::OpName::srsrc
4263 : AMDGPU::OpName::rsrc;
4264 int SrsrcIdx = AMDGPU::getNamedOperandIdx(
Opc, RSrcOpName);
4265 int DimIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dim);
4266 int A16Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::a16);
4270 assert(SrsrcIdx > VAddr0Idx);
4273 if (BaseOpcode->
BVH) {
4274 if (IsA16 == BaseOpcode->
A16)
4276 Error(IDLoc,
"image address size does not match a16");
4282 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
4283 unsigned ActualAddrSize =
4284 IsNSA ? SrsrcIdx - VAddr0Idx : getRegOperandSize(
Desc, VAddr0Idx) / 4;
4286 unsigned ExpectedAddrSize =
4290 if (hasPartialNSAEncoding() &&
4293 int VAddrLastIdx = SrsrcIdx - 1;
4294 unsigned VAddrLastSize = getRegOperandSize(
Desc, VAddrLastIdx) / 4;
4296 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
4299 if (ExpectedAddrSize > 12)
4300 ExpectedAddrSize = 16;
4305 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
4309 if (ActualAddrSize == ExpectedAddrSize)
4312 Error(IDLoc,
"image address size does not match dim and a16");
4316bool AMDGPUAsmParser::validateMIMGAtomicDMask(
const MCInst &Inst) {
4323 if (!
Desc.mayLoad() || !
Desc.mayStore())
4326 int DMaskIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dmask);
4333 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4336bool AMDGPUAsmParser::validateMIMGGatherDMask(
const MCInst &Inst) {
4344 int DMaskIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dmask);
4352 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4355bool AMDGPUAsmParser::validateMIMGDim(
const MCInst &Inst,
4370 for (
unsigned i = 1, e = Operands.
size(); i != e; ++i) {
4371 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
4378bool AMDGPUAsmParser::validateMIMGMSAA(
const MCInst &Inst) {
4386 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4389 if (!BaseOpcode->
MSAA)
4392 int DimIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dim);
4398 return DimInfo->
MSAA;
4404 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4405 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4406 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4416bool AMDGPUAsmParser::validateMovrels(
const MCInst &Inst,
4425 const int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
4428 const MCOperand &Src0 = Inst.
getOperand(Src0Idx);
4436 Error(getOperandLoc(Operands, Src0Idx),
"source operand must be a VGPR");
4440bool AMDGPUAsmParser::validateMAIAccWrite(
const MCInst &Inst,
4445 if (
Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4448 const int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
4451 const MCOperand &Src0 = Inst.
getOperand(Src0Idx);
4458 Error(getOperandLoc(Operands, Src0Idx),
4459 "source operand must be either a VGPR or an inline constant");
4466bool AMDGPUAsmParser::validateMAISrc2(
const MCInst &Inst,
4469 const MCInstrDesc &
Desc = MII.
get(Opcode);
4472 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4475 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4479 if (Inst.
getOperand(Src2Idx).
isImm() && isInlineConstant(Inst, Src2Idx)) {
4480 Error(getOperandLoc(Operands, Src2Idx),
4481 "inline constants are not allowed for this operand");
4488bool AMDGPUAsmParser::validateMFMA(
const MCInst &Inst,
4496 int BlgpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::blgp);
4497 if (BlgpIdx != -1) {
4498 if (
const MFMA_F8F6F4_Info *Info = AMDGPU::isMFMA_F8F6F4(
Opc)) {
4499 int CbszIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::cbsz);
4509 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
4510 Error(getOperandLoc(Operands, Src0Idx),
4511 "wrong register tuple size for cbsz value " + Twine(CBSZ));
4516 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
4517 Error(getOperandLoc(Operands, Src1Idx),
4518 "wrong register tuple size for blgp value " + Twine(BLGP));
4526 const int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
4530 const MCOperand &Src2 = Inst.
getOperand(Src2Idx);
4534 MCRegister Src2Reg = Src2.
getReg();
4536 if (Src2Reg == DstReg)
4541 .getSizeInBits() <= 128)
4544 if (
TRI->regsOverlap(Src2Reg, DstReg)) {
4545 Error(getOperandLoc(Operands, Src2Idx),
4546 "source 2 operand must not partially overlap with dst");
4553bool AMDGPUAsmParser::validateDivScale(
const MCInst &Inst) {
4557 case V_DIV_SCALE_F32_gfx6_gfx7:
4558 case V_DIV_SCALE_F32_vi:
4559 case V_DIV_SCALE_F32_gfx10:
4560 case V_DIV_SCALE_F64_gfx6_gfx7:
4561 case V_DIV_SCALE_F64_vi:
4562 case V_DIV_SCALE_F64_gfx10:
4568 for (
auto Name : {AMDGPU::OpName::src0_modifiers,
4569 AMDGPU::OpName::src2_modifiers,
4570 AMDGPU::OpName::src2_modifiers}) {
4581bool AMDGPUAsmParser::validateMIMGD16(
const MCInst &Inst) {
4589 int D16Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::d16);
4598bool AMDGPUAsmParser::validateTensorR128(
const MCInst &Inst) {
4605 int R128Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::r128);
4613 case AMDGPU::V_SUBREV_F32_e32:
4614 case AMDGPU::V_SUBREV_F32_e64:
4615 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4616 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4617 case AMDGPU::V_SUBREV_F32_e32_vi:
4618 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4619 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4620 case AMDGPU::V_SUBREV_F32_e64_vi:
4622 case AMDGPU::V_SUBREV_CO_U32_e32:
4623 case AMDGPU::V_SUBREV_CO_U32_e64:
4624 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4625 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4627 case AMDGPU::V_SUBBREV_U32_e32:
4628 case AMDGPU::V_SUBBREV_U32_e64:
4629 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4630 case AMDGPU::V_SUBBREV_U32_e32_vi:
4631 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4632 case AMDGPU::V_SUBBREV_U32_e64_vi:
4634 case AMDGPU::V_SUBREV_U32_e32:
4635 case AMDGPU::V_SUBREV_U32_e64:
4636 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4637 case AMDGPU::V_SUBREV_U32_e32_vi:
4638 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4639 case AMDGPU::V_SUBREV_U32_e64_vi:
4641 case AMDGPU::V_SUBREV_F16_e32:
4642 case AMDGPU::V_SUBREV_F16_e64:
4643 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4644 case AMDGPU::V_SUBREV_F16_e32_vi:
4645 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4646 case AMDGPU::V_SUBREV_F16_e64_vi:
4648 case AMDGPU::V_SUBREV_U16_e32:
4649 case AMDGPU::V_SUBREV_U16_e64:
4650 case AMDGPU::V_SUBREV_U16_e32_vi:
4651 case AMDGPU::V_SUBREV_U16_e64_vi:
4653 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4654 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4655 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4657 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4658 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4660 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4661 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4663 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4664 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4666 case AMDGPU::V_LSHRREV_B32_e32:
4667 case AMDGPU::V_LSHRREV_B32_e64:
4668 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4669 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4670 case AMDGPU::V_LSHRREV_B32_e32_vi:
4671 case AMDGPU::V_LSHRREV_B32_e64_vi:
4672 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4673 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4675 case AMDGPU::V_ASHRREV_I32_e32:
4676 case AMDGPU::V_ASHRREV_I32_e64:
4677 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4678 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4679 case AMDGPU::V_ASHRREV_I32_e32_vi:
4680 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4681 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4682 case AMDGPU::V_ASHRREV_I32_e64_vi:
4684 case AMDGPU::V_LSHLREV_B32_e32:
4685 case AMDGPU::V_LSHLREV_B32_e64:
4686 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4687 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4688 case AMDGPU::V_LSHLREV_B32_e32_vi:
4689 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4690 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4691 case AMDGPU::V_LSHLREV_B32_e64_vi:
4693 case AMDGPU::V_LSHLREV_B16_e32:
4694 case AMDGPU::V_LSHLREV_B16_e64:
4695 case AMDGPU::V_LSHLREV_B16_e32_vi:
4696 case AMDGPU::V_LSHLREV_B16_e64_vi:
4697 case AMDGPU::V_LSHLREV_B16_gfx10:
4699 case AMDGPU::V_LSHRREV_B16_e32:
4700 case AMDGPU::V_LSHRREV_B16_e64:
4701 case AMDGPU::V_LSHRREV_B16_e32_vi:
4702 case AMDGPU::V_LSHRREV_B16_e64_vi:
4703 case AMDGPU::V_LSHRREV_B16_gfx10:
4705 case AMDGPU::V_ASHRREV_I16_e32:
4706 case AMDGPU::V_ASHRREV_I16_e64:
4707 case AMDGPU::V_ASHRREV_I16_e32_vi:
4708 case AMDGPU::V_ASHRREV_I16_e64_vi:
4709 case AMDGPU::V_ASHRREV_I16_gfx10:
4711 case AMDGPU::V_LSHLREV_B64_e64:
4712 case AMDGPU::V_LSHLREV_B64_gfx10:
4713 case AMDGPU::V_LSHLREV_B64_vi:
4715 case AMDGPU::V_LSHRREV_B64_e64:
4716 case AMDGPU::V_LSHRREV_B64_gfx10:
4717 case AMDGPU::V_LSHRREV_B64_vi:
4719 case AMDGPU::V_ASHRREV_I64_e64:
4720 case AMDGPU::V_ASHRREV_I64_gfx10:
4721 case AMDGPU::V_ASHRREV_I64_vi:
4723 case AMDGPU::V_PK_LSHLREV_B16:
4724 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4725 case AMDGPU::V_PK_LSHLREV_B16_vi:
4727 case AMDGPU::V_PK_LSHRREV_B16:
4728 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4729 case AMDGPU::V_PK_LSHRREV_B16_vi:
4730 case AMDGPU::V_PK_ASHRREV_I16:
4731 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4732 case AMDGPU::V_PK_ASHRREV_I16_vi:
4739bool AMDGPUAsmParser::validateLdsDirect(
const MCInst &Inst,
4741 using namespace SIInstrFlags;
4742 const unsigned Opcode = Inst.
getOpcode();
4743 const MCInstrDesc &
Desc = MII.
get(Opcode);
4748 if ((
Desc.TSFlags & Enc) == 0)
4751 for (
auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4752 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4756 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4759 Error(getOperandLoc(Operands, SrcIdx),
4760 "lds_direct is not supported on this GPU");
4765 Error(getOperandLoc(Operands, SrcIdx),
4766 "lds_direct cannot be used with this instruction");
4770 if (SrcName != OpName::src0) {
4771 Error(getOperandLoc(Operands, SrcIdx),
4772 "lds_direct may be used as src0 only");
4781SMLoc AMDGPUAsmParser::getFlatOffsetLoc(
const OperandVector &Operands)
const {
4782 for (
unsigned i = 1, e = Operands.
size(); i != e; ++i) {
4783 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
4784 if (
Op.isFlatOffset())
4785 return Op.getStartLoc();
4790bool AMDGPUAsmParser::validateOffset(
const MCInst &Inst,
4793 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4799 return validateFlatOffset(Inst, Operands);
4802 return validateSMEMOffset(Inst, Operands);
4808 const unsigned OffsetSize = 24;
4809 if (!
isUIntN(OffsetSize - 1,
Op.getImm())) {
4810 Error(getFlatOffsetLoc(Operands),
4811 Twine(
"expected a ") + Twine(OffsetSize - 1) +
4812 "-bit unsigned offset for buffer ops");
4816 const unsigned OffsetSize = 16;
4817 if (!
isUIntN(OffsetSize,
Op.getImm())) {
4818 Error(getFlatOffsetLoc(Operands),
4819 Twine(
"expected a ") + Twine(OffsetSize) +
"-bit unsigned offset");
4826bool AMDGPUAsmParser::validateFlatOffset(
const MCInst &Inst,
4833 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4837 if (!hasFlatOffsets() &&
Op.getImm() != 0) {
4838 Error(getFlatOffsetLoc(Operands),
4839 "flat offset modifier is not supported on this GPU");
4846 bool AllowNegative =
4849 if (!
isIntN(OffsetSize,
Op.getImm()) || (!AllowNegative &&
Op.getImm() < 0)) {
4850 Error(getFlatOffsetLoc(Operands),
4851 Twine(
"expected a ") +
4852 (AllowNegative ? Twine(OffsetSize) +
"-bit signed offset"
4853 : Twine(OffsetSize - 1) +
"-bit unsigned offset"));
4860SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(
const OperandVector &Operands)
const {
4862 for (
unsigned i = 2, e = Operands.
size(); i != e; ++i) {
4863 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
4864 if (
Op.isSMEMOffset() ||
Op.isSMEMOffsetMod())
4865 return Op.getStartLoc();
4870bool AMDGPUAsmParser::validateSMEMOffset(
const MCInst &Inst,
4880 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4894 Error(getSMEMOffsetLoc(Operands),
4896 ?
"expected a 23-bit unsigned offset for buffer ops"
4897 :
isGFX12Plus() ?
"expected a 24-bit signed offset"
4898 : (
isVI() || IsBuffer) ?
"expected a 20-bit unsigned offset"
4899 :
"expected a 21-bit signed offset");
4904bool AMDGPUAsmParser::validateSOPLiteral(
const MCInst &Inst,
4907 const MCInstrDesc &
Desc = MII.
get(Opcode);
4911 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4912 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4914 const int OpIndices[] = { Src0Idx, Src1Idx };
4916 unsigned NumExprs = 0;
4917 unsigned NumLiterals = 0;
4920 for (
int OpIdx : OpIndices) {
4921 if (
OpIdx == -1)
break;
4927 std::optional<int64_t>
Imm;
4930 }
else if (MO.
isExpr()) {
4939 if (!
Imm.has_value()) {
4941 }
else if (!isInlineConstant(Inst,
OpIdx)) {
4945 if (NumLiterals == 0 || LiteralValue !=
Value) {
4953 if (NumLiterals + NumExprs <= 1)
4956 Error(getOperandLoc(Operands, Src1Idx),
4957 "only one unique literal operand is allowed");
4961bool AMDGPUAsmParser::validateOpSel(
const MCInst &Inst) {
4964 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
4974 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
4975 if (OpSelIdx != -1) {
4979 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel_hi);
4980 if (OpSelHiIdx != -1) {
4989 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
4999 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
5000 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
5001 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
5002 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel_hi);
5004 const MCOperand &Src0 = Inst.
getOperand(Src0Idx);
5005 const MCOperand &Src1 = Inst.
getOperand(Src1Idx);
5011 auto VerifyOneSGPR = [
OpSel, OpSelHi](
unsigned Index) ->
bool {
5013 return ((OpSel & Mask) == 0) && ((OpSelHi &
Mask) == 0);
5023 int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
5024 if (Src2Idx != -1) {
5025 const MCOperand &Src2 = Inst.
getOperand(Src2Idx);
5035bool AMDGPUAsmParser::validateTrue16OpSel(
const MCInst &Inst) {
5036 if (!hasTrue16Insts())
5038 const MCRegisterInfo *MRI = getMRI();
5040 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
5046 if (OpSelOpValue == 0)
5048 unsigned OpCount = 0;
5049 for (AMDGPU::OpName OpName : {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
5050 AMDGPU::OpName::src2, AMDGPU::OpName::vdst}) {
5051 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), OpName);
5058 bool OpSelOpIsHi = ((OpSelOpValue & (1 << OpCount)) != 0);
5059 if (OpSelOpIsHi != VGPRSuffixIsHi)
5068bool AMDGPUAsmParser::validateNeg(
const MCInst &Inst, AMDGPU::OpName OpName) {
5069 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
5082 int NegIdx = AMDGPU::getNamedOperandIdx(
Opc, OpName);
5093 const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
5094 AMDGPU::OpName::src1_modifiers,
5095 AMDGPU::OpName::src2_modifiers};
5097 for (
unsigned i = 0; i < 3; ++i) {
5107bool AMDGPUAsmParser::validateDPP(
const MCInst &Inst,
5110 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dpp_ctrl);
5111 if (DppCtrlIdx >= 0) {
5118 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
5119 Error(S,
isGFX12() ?
"DP ALU dpp only supports row_share"
5120 :
"DP ALU dpp only supports row_newbcast");
5125 int Dpp8Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dpp8);
5126 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
5129 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
5131 const MCOperand &Src1 = Inst.
getOperand(Src1Idx);
5134 Error(getOperandLoc(Operands, Src1Idx),
5135 "invalid operand for instruction");
5139 Error(getInstLoc(Operands),
5140 "src1 immediate operand invalid for instruction");
5150bool AMDGPUAsmParser::validateVccOperand(MCRegister
Reg)
const {
5151 return (
Reg == AMDGPU::VCC && isWave64()) ||
5152 (
Reg == AMDGPU::VCC_LO && isWave32());
5156bool AMDGPUAsmParser::validateVOPLiteral(
const MCInst &Inst,
5159 const MCInstrDesc &
Desc = MII.
get(Opcode);
5160 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
5162 !HasMandatoryLiteral && !
isVOPD(Opcode))
5167 std::optional<unsigned> LiteralOpIdx;
5170 for (
int OpIdx : OpIndices) {
5180 std::optional<int64_t>
Imm;
5186 bool IsAnotherLiteral =
false;
5187 bool IsForcedLit = findMCOperand(Operands,
OpIdx).isForcedLit();
5188 bool IsForcedLit64 = findMCOperand(Operands,
OpIdx).isForcedLit64();
5189 if (!
Imm.has_value()) {
5191 IsAnotherLiteral =
true;
5192 }
else if (IsForcedLit || IsForcedLit64 || !isInlineConstant(Inst,
OpIdx)) {
5197 HasMandatoryLiteral);
5198 unsigned OpTy =
Desc.operands()[
OpIdx].OperandType;
5208 (IsForcedLit64 && !HasMandatoryLiteral)) &&
5209 (!has64BitLiterals() ||
Desc.getSize() != 4)) {
5211 "invalid operand for instruction");
5216 if (!IsForcedFP64 && (IsForcedLit64 || !IsValid32Op) &&
5217 OpIdx != getNamedOperandIdx(Opcode, OpName::src0)) {
5219 "invalid operand for instruction");
5223 if (IsFP64 && IsValid32Op && !IsForcedFP64)
5230 if (IsAnotherLiteral && !HasMandatoryLiteral &&
5231 !getFeatureBits()[FeatureVOP3Literal]) {
5233 "literal operands are not supported");
5237 if (LiteralOpIdx && IsAnotherLiteral) {
5238 Error(getLaterLoc(getOperandLoc(Operands,
OpIdx),
5239 getOperandLoc(Operands, *LiteralOpIdx)),
5240 "only one unique literal operand is allowed");
5244 if (IsAnotherLiteral)
5245 LiteralOpIdx =
OpIdx;
5268bool AMDGPUAsmParser::validateAGPRLdSt(
const MCInst &Inst)
const {
5276 ? AMDGPU::OpName::data0
5277 : AMDGPU::OpName::vdata;
5279 const MCRegisterInfo *MRI = getMRI();
5280 int DstAreg =
IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
5284 int Data2Areg =
IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
5285 if (Data2Areg >= 0 && Data2Areg != DataAreg)
5289 auto FB = getFeatureBits();
5290 if (FB[AMDGPU::FeatureGFX90AInsts]) {
5291 if (DataAreg < 0 || DstAreg < 0)
5293 return DstAreg == DataAreg;
5296 return DstAreg < 1 && DataAreg < 1;
5299bool AMDGPUAsmParser::validateVGPRAlign(
const MCInst &Inst)
const {
5300 auto FB = getFeatureBits();
5301 if (!FB[AMDGPU::FeatureRequiresAlignedVGPRs])
5305 const MCRegisterInfo *MRI = getMRI();
5308 if (FB[AMDGPU::FeatureGFX90AInsts] &&
Opc == AMDGPU::DS_READ_B96_TR_B6_vi)
5311 if (FB[AMDGPU::FeatureGFX1250Insts]) {
5315 case AMDGPU::DS_LOAD_TR6_B96:
5316 case AMDGPU::DS_LOAD_TR6_B96_gfx12:
5320 case AMDGPU::GLOBAL_LOAD_TR6_B96:
5321 case AMDGPU::GLOBAL_LOAD_TR6_B96_gfx1250: {
5325 int VAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
5326 if (VAddrIdx != -1) {
5329 if ((
Sub - AMDGPU::VGPR0) & 1)
5334 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR:
5335 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR_gfx1250:
5340 const MCRegisterClass &VGPR32 = MRI->
getRegClass(AMDGPU::VGPR_32RegClassID);
5341 const MCRegisterClass &AGPR32 = MRI->
getRegClass(AMDGPU::AGPR_32RegClassID);
5360SMLoc AMDGPUAsmParser::getBLGPLoc(
const OperandVector &Operands)
const {
5361 for (
unsigned i = 1, e = Operands.
size(); i != e; ++i) {
5362 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
5364 return Op.getStartLoc();
5369bool AMDGPUAsmParser::validateBLGP(
const MCInst &Inst,
5372 int BlgpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::blgp);
5375 SMLoc BLGPLoc = getBLGPLoc(Operands);
5378 bool IsNeg = StringRef(BLGPLoc.
getPointer()).starts_with(
"neg:");
5379 auto FB = getFeatureBits();
5380 bool UsesNeg =
false;
5381 if (FB[AMDGPU::FeatureGFX940Insts]) {
5383 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
5384 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
5385 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
5386 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
5391 if (IsNeg == UsesNeg)
5395 UsesNeg ?
"invalid modifier: blgp is not supported"
5396 :
"invalid modifier: neg is not supported");
5401bool AMDGPUAsmParser::validateWaitCnt(
const MCInst &Inst,
5407 if (
Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
5408 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
5409 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
5410 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
5413 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::sdst);
5416 if (
Reg == AMDGPU::SGPR_NULL)
5419 Error(getOperandLoc(Operands, Src0Idx),
"src0 must be null");
5423bool AMDGPUAsmParser::validateDS(
const MCInst &Inst,
5429 return validateGWS(Inst, Operands);
5434 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::gds);
5439 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
5440 Error(S,
"gds modifier is not supported on this GPU");
5448bool AMDGPUAsmParser::validateGWS(
const MCInst &Inst,
5450 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
5454 if (
Opc != AMDGPU::DS_GWS_INIT_vi &&
Opc != AMDGPU::DS_GWS_BARRIER_vi &&
5455 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
5458 const MCRegisterInfo *MRI = getMRI();
5459 const MCRegisterClass &VGPR32 = MRI->
getRegClass(AMDGPU::VGPR_32RegClassID);
5461 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::data0);
5464 auto RegIdx =
Reg - (VGPR32.
contains(
Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
5466 Error(getOperandLoc(Operands, Data0Pos),
"vgpr must be even aligned");
5473bool AMDGPUAsmParser::validateCoherencyBits(
const MCInst &Inst,
5476 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.
getOpcode(),
5477 AMDGPU::OpName::cpol);
5485 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5488 Error(S,
"scale_offset is not supported on this GPU");
5491 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5494 Error(S,
"nv is not supported on this GPU");
5499 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5502 Error(S,
"scale_offset is not supported for this instruction");
5506 return validateTHAndScopeBits(Inst, Operands, CPol);
5511 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5512 Error(S,
"cache policy is not supported for SMRD instructions");
5516 Error(IDLoc,
"invalid cache policy for SMEM instruction");
5525 if (!(TSFlags & AllowSCCModifier)) {
5526 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5530 "scc modifier is not supported for this instruction on this GPU");
5541 :
"instruction must use glc");
5546 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5549 &CStr.data()[CStr.find(
isGFX940() ?
"sc0" :
"glc")]);
5551 :
"instruction must not use glc");
5559bool AMDGPUAsmParser::validateTHAndScopeBits(
const MCInst &Inst,
5561 const unsigned CPol) {
5565 const unsigned Opcode = Inst.
getOpcode();
5566 const MCInstrDesc &TID = MII.
get(Opcode);
5569 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5576 return PrintError(
"th:TH_ATOMIC_RETURN requires a destination operand");
5581 return PrintError(
"instruction must use th:TH_ATOMIC_RETURN");
5589 return PrintError(
"invalid th value for SMEM instruction");
5596 return PrintError(
"scope and th combination is not valid");
5602 return PrintError(
"invalid th value for atomic instructions");
5605 return PrintError(
"invalid th value for store instructions");
5608 return PrintError(
"invalid th value for load instructions");
5614bool AMDGPUAsmParser::validateTFE(
const MCInst &Inst,
5617 if (
Desc.mayStore() &&
5619 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5620 if (Loc != getInstLoc(Operands)) {
5621 Error(Loc,
"TFE modifier has no meaning for store instructions");
5629bool AMDGPUAsmParser::validateWMMA(
const MCInst &Inst,
5635 int AFmtIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_a_fmt);
5639 int BFmtIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_b_fmt);
5642 auto validateFmt = [&](
unsigned Fmt, AMDGPU::OpName SrcOp) ->
bool {
5643 int SrcIdx = AMDGPU::getNamedOperandIdx(
Opc, SrcOp);
5651 Error(getOperandLoc(Operands, SrcIdx),
5652 "wrong register tuple size for " +
5657 if (!validateFmt(AFmt, AMDGPU::OpName::src0) ||
5658 !validateFmt(BFmt, AMDGPU::OpName::src1))
5662 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_a_scale_fmt);
5663 if (AScaleIdx == -1)
5667 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_b_scale_fmt);
5670 Error(getImmLoc(AMDGPUOperand::ImmTyMatrixAFMT, Operands),
5671 "invalid matrix and scale format combination");
5678bool AMDGPUAsmParser::validateInstruction(
const MCInst &Inst, SMLoc IDLoc,
5680 if (!validateLdsDirect(Inst, Operands))
5682 if (!validateTrue16OpSel(Inst)) {
5683 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5684 "op_sel operand conflicts with 16-bit operand suffix");
5687 if (!validateSOPLiteral(Inst, Operands))
5689 if (!validateVOPLiteral(Inst, Operands)) {
5692 if (!validateConstantBusLimitations(Inst, Operands)) {
5695 if (!validateVOPD(Inst, Operands)) {
5698 if (!validateIntClampSupported(Inst)) {
5699 Error(getImmLoc(AMDGPUOperand::ImmTyClamp, Operands),
5700 "integer clamping is not supported on this GPU");
5703 if (!validateOpSel(Inst)) {
5704 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5705 "invalid op_sel operand");
5708 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5709 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5710 "invalid neg_lo operand");
5713 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5714 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5715 "invalid neg_hi operand");
5718 if (!validateDPP(Inst, Operands)) {
5722 if (!validateMIMGD16(Inst)) {
5723 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5724 "d16 modifier is not supported on this GPU");
5727 if (!validateMIMGDim(Inst, Operands)) {
5728 Error(IDLoc,
"missing dim operand");
5731 if (!validateTensorR128(Inst)) {
5732 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5733 "instruction must set modifier r128=0");
5736 if (!validateMIMGMSAA(Inst)) {
5737 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5738 "invalid dim; must be MSAA type");
5741 if (!validateMIMGDataSize(Inst, IDLoc)) {
5744 if (!validateMIMGAddrSize(Inst, IDLoc))
5746 if (!validateMIMGAtomicDMask(Inst)) {
5747 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5748 "invalid atomic image dmask");
5751 if (!validateMIMGGatherDMask(Inst)) {
5752 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5753 "invalid image_gather dmask: only one bit must be set");
5756 if (!validateMovrels(Inst, Operands)) {
5759 if (!validateOffset(Inst, Operands)) {
5762 if (!validateMAIAccWrite(Inst, Operands)) {
5765 if (!validateMAISrc2(Inst, Operands)) {
5768 if (!validateMFMA(Inst, Operands)) {
5771 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5775 if (!validateAGPRLdSt(Inst)) {
5776 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5777 ?
"invalid register class: data and dst should be all VGPR or AGPR"
5778 :
"invalid register class: agpr loads and stores not supported on this GPU"
5782 if (!validateVGPRAlign(Inst)) {
5784 "invalid register class: vgpr tuples must be 64 bit aligned");
5787 if (!validateDS(Inst, Operands)) {
5791 if (!validateBLGP(Inst, Operands)) {
5795 if (!validateDivScale(Inst)) {
5796 Error(IDLoc,
"ABS not allowed in VOP3B instructions");
5799 if (!validateWaitCnt(Inst, Operands)) {
5802 if (!validateTFE(Inst, Operands)) {
5805 if (!validateWMMA(Inst, Operands)) {
5814 unsigned VariantID = 0);
5818 unsigned VariantID);
5820bool AMDGPUAsmParser::isSupportedMnemo(
StringRef Mnemo,
5825bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5826 const FeatureBitset &FBS,
5827 ArrayRef<unsigned> Variants) {
5828 for (
auto Variant : Variants) {
5836bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5838 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5841 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5846 getParser().clearPendingErrors();
5850 StringRef VariantName = getMatchedVariantName();
5851 if (!VariantName.
empty() && isSupportedMnemo(Mnemo, FBS)) {
5854 " variant of this instruction is not supported"));
5858 if (
isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5859 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5861 FeatureBitset FeaturesWS32 = getFeatureBits();
5862 FeaturesWS32.
flip(AMDGPU::FeatureWavefrontSize64)
5863 .
flip(AMDGPU::FeatureWavefrontSize32);
5864 FeatureBitset AvailableFeaturesWS32 =
5865 ComputeAvailableFeatures(FeaturesWS32);
5867 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5868 return Error(IDLoc,
"instruction requires wavesize=32");
5872 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5873 return Error(IDLoc,
"instruction not supported on this GPU (" +
5874 getSTI().
getCPU() +
")" +
": " + Mnemo);
5879 return Error(IDLoc,
"invalid instruction" + Suggestion);
5885 const auto &
Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5886 if (
Op.isToken() && InvalidOprIdx > 1) {
5887 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5888 return PrevOp.isToken() && PrevOp.getToken() ==
"::";
5893bool AMDGPUAsmParser::matchAndEmitInstruction(SMLoc IDLoc,
unsigned &Opcode,
5896 uint64_t &ErrorInfo,
5897 bool MatchingInlineAsm) {
5900 unsigned Result = Match_Success;
5901 for (
auto Variant : getMatchedVariants()) {
5903 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5908 if (R == Match_Success || R == Match_MissingFeature ||
5909 (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5910 (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5911 Result != Match_MissingFeature)) {
5915 if (R == Match_Success)
5919 if (Result == Match_Success) {
5920 if (!validateInstruction(Inst, IDLoc, Operands)) {
5927 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).
getToken();
5928 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5934 case Match_MissingFeature:
5938 return Error(IDLoc,
"operands are not valid for this GPU or mode");
5940 case Match_InvalidOperand: {
5941 SMLoc ErrorLoc = IDLoc;
5942 if (ErrorInfo != ~0ULL) {
5943 if (ErrorInfo >= Operands.
size()) {
5944 return Error(IDLoc,
"too few operands for instruction");
5946 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5947 if (ErrorLoc == SMLoc())
5951 return Error(ErrorLoc,
"invalid VOPDY instruction");
5953 return Error(ErrorLoc,
"invalid operand for instruction");
5956 case Match_MnemonicFail:
5962bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5967 if (getParser().parseAbsoluteExpression(Tmp)) {
5970 Ret =
static_cast<uint32_t
>(Tmp);
5974bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5975 if (!getSTI().getTargetTriple().isAMDGCN())
5976 return TokError(
"directive only supported for amdgcn architecture");
5978 std::string TargetIDDirective;
5979 SMLoc TargetStart = getTok().getLoc();
5980 if (getParser().parseEscapedString(TargetIDDirective))
5983 std::optional<AMDGPU::TargetID> MaybeParsed =
5986 return getParser().Error(TargetStart,
"malformed target ID");
5989 const std::optional<AMDGPU::TargetID> &CurrentTargetID =
5990 getTargetStreamer().getTargetID();
5992 if (*CurrentTargetID != ParsedTargetID) {
5993 return getParser().Error(
5994 TargetStart, Twine(
".amdgcn_target directive's target id ") +
5996 Twine(
" does not match the specified target id ") +
5997 Twine(CurrentTargetID->toString()));
6003bool AMDGPUAsmParser::OutOfRangeError(SMRange
Range) {
6007bool AMDGPUAsmParser::calculateGPRBlocks(
6008 const FeatureBitset &Features,
const MCExpr *VCCUsed,
6009 const MCExpr *FlatScrUsed,
bool XNACKUsed,
6010 std::optional<bool> EnableWavefrontSize32,
const MCExpr *NextFreeVGPR,
6011 SMRange VGPRRange,
const MCExpr *NextFreeSGPR, SMRange SGPRRange,
6012 const MCExpr *&VGPRBlocks,
const MCExpr *&SGPRBlocks) {
6018 const MCExpr *
NumSGPRs = NextFreeSGPR;
6019 int64_t EvaluatedSGPRs;
6026 if (
NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
Version.Major >= 8 &&
6027 !Features.
test(FeatureSGPRInitBug) &&
6028 static_cast<uint64_t
>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
6029 return OutOfRangeError(SGPRRange);
6031 const MCExpr *ExtraSGPRs =
6035 if (
NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
6036 (
Version.Major <= 7 || Features.
test(FeatureSGPRInitBug)) &&
6037 static_cast<uint64_t
>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
6038 return OutOfRangeError(SGPRRange);
6040 if (Features.
test(FeatureSGPRInitBug))
6047 auto GetNumGPRBlocks = [&Ctx](
const MCExpr *NumGPR,
6048 unsigned Granule) ->
const MCExpr * {
6052 const MCExpr *AlignToGPR =
6054 const MCExpr *DivGPR =
6060 VGPRBlocks = GetNumGPRBlocks(
6069bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
6070 if (!getSTI().getTargetTriple().isAMDGCN())
6071 return TokError(
"directive only supported for amdgcn architecture");
6074 return TokError(
"directive only supported for amdhsa OS");
6076 StringRef KernelName;
6077 if (getParser().parseIdentifier(KernelName))
6080 AMDGPU::MCKernelDescriptor KD =
6092 const MCExpr *NextFreeVGPR = ZeroExpr;
6094 const MCExpr *NamedBarCnt = ZeroExpr;
6095 uint64_t SharedVGPRCount = 0;
6096 uint64_t PreloadLength = 0;
6097 uint64_t PreloadOffset = 0;
6099 const MCExpr *NextFreeSGPR = ZeroExpr;
6102 unsigned ImpliedUserSGPRCount = 0;
6106 std::optional<unsigned> ExplicitUserSGPRCount;
6107 const MCExpr *ReserveVCC = OneExpr;
6108 const MCExpr *ReserveFlatScr = OneExpr;
6109 std::optional<bool> EnableWavefrontSize32;
6115 SMRange IDRange = getTok().getLocRange();
6116 if (!parseId(
ID,
"expected .amdhsa_ directive or .end_amdhsa_kernel"))
6119 if (
ID ==
".end_amdhsa_kernel")
6123 return TokError(
".amdhsa_ directives cannot be repeated");
6125 SMLoc ValStart = getLoc();
6126 const MCExpr *ExprVal;
6127 if (getParser().parseExpression(ExprVal))
6129 SMLoc ValEnd = getLoc();
6130 SMRange ValRange = SMRange(ValStart, ValEnd);
6133 uint64_t Val = IVal;
6134 bool EvaluatableExpr;
6135 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
6137 return OutOfRangeError(ValRange);
6141#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
6142 if (!isUInt<ENTRY##_WIDTH>(Val)) \
6143 return OutOfRangeError(RANGE); \
6144 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
6149#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
6151 return Error(IDRange.Start, "directive should have resolvable expression", \
6154 if (
ID ==
".amdhsa_group_segment_fixed_size") {
6157 return OutOfRangeError(ValRange);
6159 }
else if (
ID ==
".amdhsa_private_segment_fixed_size") {
6162 return OutOfRangeError(ValRange);
6164 }
else if (
ID ==
".amdhsa_kernarg_size") {
6166 return OutOfRangeError(ValRange);
6168 }
else if (
ID ==
".amdhsa_user_sgpr_count") {
6170 ExplicitUserSGPRCount = Val;
6171 }
else if (
ID ==
".amdhsa_user_sgpr_private_segment_buffer") {
6175 "directive is not supported with architected flat scratch",
6178 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
6181 ImpliedUserSGPRCount += 4;
6182 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_preload_length") {
6185 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
6188 return OutOfRangeError(ValRange);
6192 ImpliedUserSGPRCount += Val;
6193 PreloadLength = Val;
6195 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_preload_offset") {
6198 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
6201 return OutOfRangeError(ValRange);
6205 PreloadOffset = Val;
6206 }
else if (
ID ==
".amdhsa_user_sgpr_dispatch_ptr") {
6209 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
6212 ImpliedUserSGPRCount += 2;
6213 }
else if (
ID ==
".amdhsa_user_sgpr_queue_ptr") {
6216 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
6219 ImpliedUserSGPRCount += 2;
6220 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_segment_ptr") {
6223 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
6226 ImpliedUserSGPRCount += 2;
6227 }
else if (
ID ==
".amdhsa_user_sgpr_dispatch_id") {
6230 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
6233 ImpliedUserSGPRCount += 2;
6234 }
else if (
ID ==
".amdhsa_user_sgpr_flat_scratch_init") {
6237 "directive is not supported with architected flat scratch",
6241 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
6244 ImpliedUserSGPRCount += 2;
6245 }
else if (
ID ==
".amdhsa_user_sgpr_private_segment_size") {
6248 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
6251 ImpliedUserSGPRCount += 1;
6252 }
else if (
ID ==
".amdhsa_wavefront_size32") {
6254 if (IVersion.
Major < 10)
6255 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
6256 EnableWavefrontSize32 = Val;
6258 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
6260 }
else if (
ID ==
".amdhsa_uses_dynamic_stack") {
6262 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
6264 }
else if (
ID ==
".amdhsa_system_sgpr_private_segment_wavefront_offset") {
6267 "directive is not supported with architected flat scratch",
6270 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6272 }
else if (
ID ==
".amdhsa_enable_private_segment") {
6276 "directive is not supported without architected flat scratch",
6279 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6281 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_x") {
6283 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
6285 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_y") {
6287 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
6289 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_z") {
6291 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
6293 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_info") {
6295 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
6297 }
else if (
ID ==
".amdhsa_system_vgpr_workitem_id") {
6299 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
6301 }
else if (
ID ==
".amdhsa_next_free_vgpr") {
6302 VGPRRange = ValRange;
6303 NextFreeVGPR = ExprVal;
6304 }
else if (
ID ==
".amdhsa_next_free_sgpr") {
6305 SGPRRange = ValRange;
6306 NextFreeSGPR = ExprVal;
6307 }
else if (
ID ==
".amdhsa_accum_offset") {
6309 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
6310 AccumOffset = ExprVal;
6311 }
else if (
ID ==
".amdhsa_named_barrier_count") {
6313 return Error(IDRange.
Start,
"directive requires gfx1250+", IDRange);
6314 NamedBarCnt = ExprVal;
6315 }
else if (
ID ==
".amdhsa_reserve_vcc") {
6317 return OutOfRangeError(ValRange);
6318 ReserveVCC = ExprVal;
6319 }
else if (
ID ==
".amdhsa_reserve_flat_scratch") {
6320 if (IVersion.
Major < 7)
6321 return Error(IDRange.
Start,
"directive requires gfx7+", IDRange);
6324 "directive is not supported with architected flat scratch",
6327 return OutOfRangeError(ValRange);
6328 ReserveFlatScr = ExprVal;
6329 }
else if (
ID ==
".amdhsa_reserve_xnack_mask") {
6330 if (IVersion.
Major < 8)
6331 return Error(IDRange.
Start,
"directive requires gfx8+", IDRange);
6333 return OutOfRangeError(ValRange);
6334 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
6335 return getParser().Error(IDRange.
Start,
".amdhsa_reserve_xnack_mask does not match target id",
6337 }
else if (
ID ==
".amdhsa_float_round_mode_32") {
6339 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
6341 }
else if (
ID ==
".amdhsa_float_round_mode_16_64") {
6343 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
6345 }
else if (
ID ==
".amdhsa_float_denorm_mode_32") {
6347 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
6349 }
else if (
ID ==
".amdhsa_float_denorm_mode_16_64") {
6351 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
6353 }
else if (
ID ==
".amdhsa_dx10_clamp") {
6354 if (!getSTI().
hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
6355 return Error(IDRange.
Start,
"directive unsupported on gfx1170+",
6358 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
6360 }
else if (
ID ==
".amdhsa_ieee_mode") {
6361 if (!getSTI().
hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
6362 return Error(IDRange.
Start,
"directive unsupported on gfx1170+",
6365 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
6367 }
else if (
ID ==
".amdhsa_fp16_overflow") {
6368 if (IVersion.
Major < 9)
6369 return Error(IDRange.
Start,
"directive requires gfx9+", IDRange);
6371 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
6373 }
else if (
ID ==
".amdhsa_tg_split") {
6375 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
6378 }
else if (
ID ==
".amdhsa_workgroup_processor_mode") {
6381 "directive unsupported on " + getSTI().
getCPU(), IDRange);
6383 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
6385 }
else if (
ID ==
".amdhsa_memory_ordered") {
6386 if (IVersion.
Major < 10)
6387 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
6389 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
6391 }
else if (
ID ==
".amdhsa_forward_progress") {
6392 if (IVersion.
Major < 10)
6393 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
6395 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
6397 }
else if (
ID ==
".amdhsa_shared_vgpr_count") {
6399 if (IVersion.
Major < 10 || IVersion.
Major >= 12)
6400 return Error(IDRange.
Start,
"directive requires gfx10 or gfx11",
6402 SharedVGPRCount = Val;
6404 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
6406 }
else if (
ID ==
".amdhsa_inst_pref_size") {
6407 if (IVersion.
Major < 11)
6408 return Error(IDRange.
Start,
"directive requires gfx11+", IDRange);
6409 if (IVersion.
Major == 11) {
6411 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, ExprVal,
6415 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, ExprVal,
6418 }
else if (
ID ==
".amdhsa_exception_fp_ieee_invalid_op") {
6421 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
6423 }
else if (
ID ==
".amdhsa_exception_fp_denorm_src") {
6425 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
6427 }
else if (
ID ==
".amdhsa_exception_fp_ieee_div_zero") {
6430 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
6432 }
else if (
ID ==
".amdhsa_exception_fp_ieee_overflow") {
6434 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
6436 }
else if (
ID ==
".amdhsa_exception_fp_ieee_underflow") {
6438 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
6440 }
else if (
ID ==
".amdhsa_exception_fp_ieee_inexact") {
6442 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
6444 }
else if (
ID ==
".amdhsa_exception_int_div_zero") {
6446 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
6448 }
else if (
ID ==
".amdhsa_round_robin_scheduling") {
6449 if (IVersion.
Major < 12)
6450 return Error(IDRange.
Start,
"directive requires gfx12+", IDRange);
6452 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
6455 return Error(IDRange.
Start,
"unknown .amdhsa_kernel directive", IDRange);
6458#undef PARSE_BITS_ENTRY
6461 if (!Seen.
contains(
".amdhsa_next_free_vgpr"))
6462 return TokError(
".amdhsa_next_free_vgpr directive is required");
6464 if (!Seen.
contains(
".amdhsa_next_free_sgpr"))
6465 return TokError(
".amdhsa_next_free_sgpr directive is required");
6467 unsigned UserSGPRCount = ExplicitUserSGPRCount.value_or(ImpliedUserSGPRCount);
6469 return TokError(
"too many user SGPRs enabled, found " +
6470 Twine(UserSGPRCount) +
", but only " +
6476 if (PreloadLength) {
6482 const MCExpr *VGPRBlocks;
6483 const MCExpr *SGPRBlocks;
6484 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
6485 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
6486 EnableWavefrontSize32, NextFreeVGPR,
6487 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
6491 int64_t EvaluatedVGPRBlocks;
6492 bool VGPRBlocksEvaluatable =
6493 VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);
6494 if (VGPRBlocksEvaluatable &&
6496 static_cast<uint64_t
>(EvaluatedVGPRBlocks))) {
6497 return OutOfRangeError(VGPRRange);
6501 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
6502 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT,
getContext());
6504 int64_t EvaluatedSGPRBlocks;
6505 if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&
6507 static_cast<uint64_t
>(EvaluatedSGPRBlocks)))
6508 return OutOfRangeError(SGPRRange);
6511 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
6512 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
getContext());
6514 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
6515 return TokError(
"amdgpu_user_sgpr_count smaller than implied by "
6516 "enabled user SGPRs");
6522 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
6523 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT,
getContext());
6528 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,
6529 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT,
getContext());
6534 return TokError(
"Kernarg size should be resolvable");
6535 uint64_t kernarg_size = IVal;
6536 if (PreloadLength && kernarg_size &&
6537 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
6538 return TokError(
"Kernarg preload length + offset is larger than the "
6539 "kernarg segment size");
6542 if (!Seen.
contains(
".amdhsa_accum_offset"))
6543 return TokError(
".amdhsa_accum_offset directive is required");
6544 int64_t EvaluatedAccum;
6545 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);
6546 uint64_t UEvaluatedAccum = EvaluatedAccum;
6547 if (AccumEvaluatable &&
6548 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
6549 return TokError(
"accum_offset should be in range [4..256] in "
6552 int64_t EvaluatedNumVGPR;
6553 if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&
6556 alignTo(std::max((uint64_t)1, (uint64_t)EvaluatedNumVGPR), 4))
6557 return TokError(
"accum_offset exceeds total VGPR allocation");
6563 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
6564 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
6570 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
6571 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
6574 if (IVersion.
Major >= 10 && IVersion.
Major < 12) {
6576 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
6577 return TokError(
"shared_vgpr_count directive not valid on "
6578 "wavefront size 32");
6581 if (VGPRBlocksEvaluatable &&
6582 (SharedVGPRCount * 2 +
static_cast<uint64_t
>(EvaluatedVGPRBlocks) >
6584 return TokError(
"shared_vgpr_count*2 + "
6585 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
6590 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
6591 NextFreeVGPR, NextFreeSGPR,
6592 ReserveVCC, ReserveFlatScr);
6596bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
6598 if (ParseAsAbsoluteExpression(
Version))
6601 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(
Version);
6605bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef
ID,
6606 AMDGPUMCKernelCodeT &
C) {
6609 if (
ID ==
"max_scratch_backing_memory_byte_size") {
6610 Parser.eatToEndOfStatement();
6614 SmallString<40> ErrStr;
6615 raw_svector_ostream Err(ErrStr);
6616 if (!
C.ParseKernelCodeT(
ID, getParser(), Err)) {
6617 return TokError(Err.
str());
6621 if (
ID ==
"enable_wavefront_size32") {
6624 return TokError(
"enable_wavefront_size32=1 is only allowed on GFX10+");
6626 return TokError(
"enable_wavefront_size32=1 requires +WavefrontSize32");
6629 return TokError(
"enable_wavefront_size32=0 requires +WavefrontSize64");
6633 if (
ID ==
"wavefront_size") {
6634 if (
C.wavefront_size == 5) {
6636 return TokError(
"wavefront_size=5 is only allowed on GFX10+");
6638 return TokError(
"wavefront_size=5 requires +WavefrontSize32");
6639 }
else if (
C.wavefront_size == 6) {
6641 return TokError(
"wavefront_size=6 requires +WavefrontSize64");
6648bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
6649 AMDGPUMCKernelCodeT KernelCode;
6658 if (!parseId(
ID,
"expected value identifier or .end_amd_kernel_code_t"))
6661 if (
ID ==
".end_amd_kernel_code_t")
6664 if (ParseAMDKernelCodeTValue(
ID, KernelCode))
6669 getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
6674bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
6675 StringRef KernelName;
6676 if (!parseId(KernelName,
"expected symbol name"))
6679 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
6686bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6687 if (!getSTI().getTargetTriple().isAMDGCN()) {
6688 return Error(getLoc(),
6689 ".amd_amdgpu_isa directive is not available on non-amdgcn "
6693 StringRef TargetIDDirective = getLexer().getTok().getStringContents();
6695 std::optional<AMDGPU::TargetID> MaybeParsed =
6698 return Error(getParser().getTok().getLoc(),
"malformed target id");
6701 const std::optional<AMDGPU::TargetID> &CurrentTargetID =
6702 getTargetStreamer().getTargetID();
6704 if (*CurrentTargetID != ParsedTargetID) {
6705 return Error(getParser().getTok().getLoc(),
6706 Twine(
".amd_amdgpu_isa directive's target id ") +
6708 Twine(
" does not match the specified target id ") +
6709 Twine(CurrentTargetID->toString()));
6712 getTargetStreamer().EmitISAVersion();
6718bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6721 std::string HSAMetadataString;
6726 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6727 return Error(getLoc(),
"invalid HSA metadata");
6734bool AMDGPUAsmParser::ParseToEndDirective(
const char *AssemblerDirectiveBegin,
6735 const char *AssemblerDirectiveEnd,
6736 std::string &CollectString) {
6738 raw_string_ostream CollectStream(CollectString);
6740 getLexer().setSkipSpace(
false);
6742 bool FoundEnd =
false;
6745 CollectStream << getTokenStr();
6749 if (trySkipId(AssemblerDirectiveEnd)) {
6754 CollectStream << Parser.parseStringToEndOfStatement()
6755 <<
getContext().getAsmInfo().getSeparatorString();
6757 Parser.eatToEndOfStatement();
6760 getLexer().setSkipSpace(
true);
6763 return TokError(Twine(
"expected directive ") +
6764 Twine(AssemblerDirectiveEnd) + Twine(
" not found"));
6771bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6777 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6778 if (!PALMetadata->setFromString(
String))
6779 return Error(getLoc(),
"invalid PAL metadata");
6784bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6786 return Error(getLoc(),
6788 "not available on non-amdpal OSes")).str());
6791 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6792 PALMetadata->setLegacy();
6795 if (ParseAsAbsoluteExpression(
Key)) {
6796 return TokError(Twine(
"invalid value in ") +
6800 return TokError(Twine(
"expected an even number of values in ") +
6803 if (ParseAsAbsoluteExpression(
Value)) {
6804 return TokError(Twine(
"invalid value in ") +
6807 PALMetadata->setRegister(
Key,
Value);
6816bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6817 if (getParser().checkForValidSection())
6821 SMLoc NameLoc = getLoc();
6822 if (getParser().parseIdentifier(Name))
6823 return TokError(
"expected identifier in directive");
6826 if (getParser().parseComma())
6832 SMLoc SizeLoc = getLoc();
6833 if (getParser().parseAbsoluteExpression(
Size))
6836 return Error(SizeLoc,
"size must be non-negative");
6837 if (
Size > LocalMemorySize)
6838 return Error(SizeLoc,
"size is too large");
6840 int64_t Alignment = 4;
6842 SMLoc AlignLoc = getLoc();
6843 if (getParser().parseAbsoluteExpression(Alignment))
6846 return Error(AlignLoc,
"alignment must be a power of two");
6851 if (Alignment >= 1u << 31)
6852 return Error(AlignLoc,
"alignment is too large");
6858 Symbol->redefineIfPossible();
6859 if (!
Symbol->isUndefined())
6860 return Error(NameLoc,
"invalid symbol redefinition");
6862 getTargetStreamer().emitAMDGPULDS(Symbol,
Size,
Align(Alignment));
6866bool AMDGPUAsmParser::ParseDirectiveAMDGPUInfo() {
6867 if (getParser().checkForValidSection())
6871 if (getParser().parseIdentifier(FuncName))
6872 return TokError(
"expected symbol name after .amdgpu_info");
6875 AMDGPU::InfoSectionData ParsedInfoData;
6876 AMDGPU::FuncInfo FI;
6878 bool HasScalarAttrs =
false;
6885 SMLoc IDLoc = getLoc();
6886 if (!parseId(
ID,
"expected directive or .end_amdgpu_info"))
6889 if (
ID ==
".end_amdgpu_info")
6897 return Error(IDLoc,
"unknown .amdgpu_info directive '" +
ID +
"'");
6899 if (Dir ==
"flags") {
6901 if (getParser().parseAbsoluteExpression(Val))
6904 FI.
UsesVCC = !!(
Flags & AMDGPU::FuncInfoFlags::FUNC_USES_VCC);
6906 !!(
Flags & AMDGPU::FuncInfoFlags::FUNC_USES_FLAT_SCRATCH);
6908 HasScalarAttrs =
true;
6909 }
else if (Dir ==
"num_sgpr") {
6911 if (getParser().parseAbsoluteExpression(Val))
6913 FI.
NumSGPR =
static_cast<uint32_t
>(Val);
6914 HasScalarAttrs =
true;
6915 }
else if (Dir ==
"num_vgpr") {
6917 if (getParser().parseAbsoluteExpression(Val))
6920 HasScalarAttrs =
true;
6921 }
else if (Dir ==
"num_agpr") {
6923 if (getParser().parseAbsoluteExpression(Val))
6926 HasScalarAttrs =
true;
6927 }
else if (Dir ==
"private_segment_size") {
6929 if (getParser().parseAbsoluteExpression(Val))
6932 HasScalarAttrs =
true;
6933 }
else if (Dir ==
"use") {
6935 if (getParser().parseIdentifier(ResName))
6936 return TokError(
"expected resource symbol for .amdgpu_use");
6937 ParsedInfoData.
Uses.push_back(
6938 {FuncSym,
getContext().getOrCreateSymbol(ResName)});
6939 }
else if (Dir ==
"call") {
6941 if (getParser().parseIdentifier(DstName))
6942 return TokError(
"expected callee symbol for .amdgpu_call");
6943 ParsedInfoData.
Calls.push_back(
6944 {FuncSym,
getContext().getOrCreateSymbol(DstName)});
6945 }
else if (Dir ==
"indirect_call") {
6947 if (getParser().parseEscapedString(TypeId))
6948 return TokError(
"expected type ID string for .amdgpu_indirect_call");
6949 ParsedInfoData.
IndirectCalls.push_back({FuncSym, std::move(TypeId)});
6950 }
else if (Dir ==
"typeid") {
6952 if (getParser().parseEscapedString(TypeId))
6953 return TokError(
"expected type ID string for .amdgpu_typeid");
6954 ParsedInfoData.
TypeIds.push_back({FuncSym, std::move(TypeId)});
6956 return Error(IDLoc,
"unknown .amdgpu_info directive '" +
ID +
"'");
6961 ParsedInfoData.
Funcs.push_back(std::move(FI));
6963 AMDGPU::InfoSectionData &
Data = InfoData ? *InfoData : InfoData.emplace();
6964 for (AMDGPU::FuncInfo &Func : ParsedInfoData.
Funcs)
6965 Data.Funcs.push_back(std::move(Func));
6966 for (std::pair<MCSymbol *, MCSymbol *> &Use : ParsedInfoData.
Uses)
6967 Data.Uses.push_back(Use);
6968 for (std::pair<MCSymbol *, MCSymbol *> &
Call : ParsedInfoData.
Calls)
6970 for (std::pair<MCSymbol *, std::string> &
IndirectCall :
6973 for (std::pair<MCSymbol *, std::string> &TypeId : ParsedInfoData.
TypeIds)
6974 Data.TypeIds.push_back(std::move(TypeId));
6979void AMDGPUAsmParser::onEndOfFile() {
6981 getTargetStreamer().emitAMDGPUInfo(*InfoData);
6984bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6985 StringRef IDVal = DirectiveID.
getString();
6988 if (IDVal ==
".amdhsa_kernel")
6989 return ParseDirectiveAMDHSAKernel();
6991 if (IDVal ==
".amdhsa_code_object_version")
6992 return ParseDirectiveAMDHSACodeObjectVersion();
6996 return ParseDirectiveHSAMetadata();
6998 if (IDVal ==
".amd_kernel_code_t")
6999 return ParseDirectiveAMDKernelCodeT();
7001 if (IDVal ==
".amdgpu_hsa_kernel")
7002 return ParseDirectiveAMDGPUHsaKernel();
7004 if (IDVal ==
".amd_amdgpu_isa")
7005 return ParseDirectiveISAVersion();
7009 Twine(
" directive is "
7010 "not available on non-amdhsa OSes"))
7015 if (IDVal ==
".amdgcn_target")
7016 return ParseDirectiveAMDGCNTarget();
7018 if (IDVal ==
".amdgpu_lds")
7019 return ParseDirectiveAMDGPULDS();
7021 if (IDVal ==
".amdgpu_info")
7022 return ParseDirectiveAMDGPUInfo();
7025 return ParseDirectivePALMetadataBegin();
7028 return ParseDirectivePALMetadata();
7033bool AMDGPUAsmParser::subtargetHasRegister(
const MCRegisterInfo &MRI,
7040 return hasSGPR104_SGPR105();
7043 case SRC_SHARED_BASE_LO:
7044 case SRC_SHARED_BASE:
7045 case SRC_SHARED_LIMIT_LO:
7046 case SRC_SHARED_LIMIT:
7047 case SRC_PRIVATE_BASE_LO:
7048 case SRC_PRIVATE_BASE:
7049 case SRC_PRIVATE_LIMIT_LO:
7050 case SRC_PRIVATE_LIMIT:
7052 case SRC_FLAT_SCRATCH_BASE_LO:
7053 case SRC_FLAT_SCRATCH_BASE_HI:
7054 return hasGloballyAddressableScratch();
7055 case SRC_POPS_EXITING_WAVE_ID:
7067 return (
isVI() ||
isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
7097 return hasSGPR102_SGPR103();
7102ParseStatus AMDGPUAsmParser::parseOperand(
OperandVector &Operands,
7105 ParseStatus Res = parseVOPD(Operands);
7110 Res = MatchOperandParserImpl(Operands, Mnemonic);
7122 SMLoc LBraceLoc = getLoc();
7127 auto Loc = getLoc();
7128 Res = parseReg(Operands);
7130 Error(Loc,
"expected a register");
7134 RBraceLoc = getLoc();
7139 "expected a comma or a closing square bracket"))
7143 if (Operands.
size() - Prefix > 1) {
7145 AMDGPUOperand::CreateToken(
this,
"[", LBraceLoc));
7146 Operands.
push_back(AMDGPUOperand::CreateToken(
this,
"]", RBraceLoc));
7152 return parseRegOrImm(Operands);
7155StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
7157 setForcedEncodingSize(0);
7158 setForcedDPP(
false);
7159 setForcedSDWA(
false);
7161 if (
Name.consume_back(
"_e64_dpp")) {
7163 setForcedEncodingSize(64);
7166 if (
Name.consume_back(
"_e64")) {
7167 setForcedEncodingSize(64);
7170 if (
Name.consume_back(
"_e32")) {
7171 setForcedEncodingSize(32);
7174 if (
Name.consume_back(
"_dpp")) {
7178 if (
Name.consume_back(
"_sdwa")) {
7179 setForcedSDWA(
true);
7187 unsigned VariantID);
7193 Name = parseMnemonicSuffix(Name);
7199 Operands.
push_back(AMDGPUOperand::CreateToken(
this, Name, NameLoc));
7201 bool IsMIMG = Name.starts_with(
"image_");
7204 OperandMode
Mode = OperandMode_Default;
7206 Mode = OperandMode_NSA;
7210 checkUnsupportedInstruction(Name, NameLoc);
7211 if (!Parser.hasPendingError()) {
7214 :
"not a valid operand.";
7215 Error(getLoc(), Msg);
7234ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
7237 if (!trySkipId(Name))
7240 Operands.
push_back(AMDGPUOperand::CreateToken(
this, Name, S));
7244ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
const char *Prefix,
7253ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
7254 const char *Prefix,
OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
7255 std::function<
bool(int64_t &)> ConvertResult) {
7259 ParseStatus Res = parseIntWithPrefix(Prefix,
Value);
7263 if (ConvertResult && !ConvertResult(
Value)) {
7264 Error(S,
"invalid " + StringRef(Prefix) +
" value.");
7267 Operands.
push_back(AMDGPUOperand::CreateImm(
this,
Value, S, ImmTy));
7271ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
7272 const char *Prefix,
OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
7273 bool (*ConvertResult)(int64_t &)) {
7282 const unsigned MaxSize = 4;
7286 for (
int I = 0; ; ++
I) {
7288 SMLoc Loc = getLoc();
7292 if (
Op != 0 &&
Op != 1)
7293 return Error(Loc,
"invalid " + StringRef(Prefix) +
" value.");
7300 if (
I + 1 == MaxSize)
7301 return Error(getLoc(),
"expected a closing square bracket");
7307 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Val, S, ImmTy));
7311ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
7313 AMDGPUOperand::ImmTy ImmTy,
7314 bool IgnoreNegative) {
7318 if (trySkipId(Name)) {
7320 }
else if (trySkipId(
"no", Name)) {
7329 return Error(S,
"r128 modifier is not supported on this GPU");
7330 if (Name ==
"a16" && !
hasA16())
7331 return Error(S,
"a16 modifier is not supported on this GPU");
7333 if (Bit == 0 && Name ==
"gds") {
7334 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).
getToken();
7336 return Error(S,
"nogds is not allowed");
7339 if (
isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
7340 ImmTy = AMDGPUOperand::ImmTyR128A16;
7342 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Bit, S, ImmTy));
7346unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
7347 bool &Disabling)
const {
7348 Disabling =
Id.consume_front(
"no");
7351 return StringSwitch<unsigned>(Id)
7358 return StringSwitch<unsigned>(Id)
7366ParseStatus AMDGPUAsmParser::parseCPol(
OperandVector &Operands) {
7368 SMLoc StringLoc = getLoc();
7370 int64_t CPolVal = 0;
7379 ResTH = parseTH(Operands, TH);
7390 ResScope = parseScope(Operands, Scope);
7403 if (trySkipId(
"nv")) {
7407 }
else if (trySkipId(
"no",
"nv")) {
7414 if (trySkipId(
"scale_offset")) {
7418 }
else if (trySkipId(
"no",
"scale_offset")) {
7431 Operands.
push_back(AMDGPUOperand::CreateImm(
this, CPolVal, StringLoc,
7432 AMDGPUOperand::ImmTyCPol));
7436 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).
getToken();
7437 SMLoc OpLoc = getLoc();
7438 unsigned Enabled = 0, Seen = 0;
7442 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
7449 return Error(S,
"dlc modifier is not supported on this GPU");
7452 return Error(S,
"scc modifier is not supported on this GPU");
7455 return Error(S,
"duplicate cache policy modifier");
7467 AMDGPUOperand::CreateImm(
this,
Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
7471ParseStatus AMDGPUAsmParser::parseScope(
OperandVector &Operands,
7476 ParseStatus Res = parseStringOrIntWithPrefix(
7477 Operands,
"scope", {
"SCOPE_CU",
"SCOPE_SE",
"SCOPE_DEV",
"SCOPE_SYS"},
7486ParseStatus AMDGPUAsmParser::parseTH(
OperandVector &Operands, int64_t &TH) {
7491 ParseStatus Res = parseStringWithPrefix(
"th",
Value, StringLoc);
7495 if (
Value ==
"TH_DEFAULT")
7497 else if (
Value ==
"TH_STORE_LU" ||
Value ==
"TH_LOAD_WB" ||
7498 Value ==
"TH_LOAD_NT_WB") {
7499 return Error(StringLoc,
"invalid th value");
7500 }
else if (
Value.consume_front(
"TH_ATOMIC_")) {
7502 }
else if (
Value.consume_front(
"TH_LOAD_")) {
7504 }
else if (
Value.consume_front(
"TH_STORE_")) {
7507 return Error(StringLoc,
"invalid th value");
7510 if (
Value ==
"BYPASS")
7515 TH |= StringSwitch<int64_t>(
Value)
7525 .Default(0xffffffff);
7527 TH |= StringSwitch<int64_t>(
Value)
7538 .Default(0xffffffff);
7541 if (TH == 0xffffffff)
7542 return Error(StringLoc,
"invalid th value");
7549 AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx,
7550 AMDGPUOperand::ImmTy ImmT, int64_t
Default = 0,
7551 std::optional<unsigned> InsertAt = std::nullopt) {
7552 auto i = OptionalIdx.find(ImmT);
7553 if (i != OptionalIdx.end()) {
7554 unsigned Idx = i->second;
7555 const AMDGPUOperand &
Op =
7556 static_cast<const AMDGPUOperand &
>(*Operands[Idx]);
7560 Op.addImmOperands(Inst, 1);
7562 if (InsertAt.has_value())
7569ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
7575 StringLoc = getLoc();
7580ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7581 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7586 SMLoc StringLoc = getLoc();
7590 Value = getTokenStr();
7594 if (
Value == Ids[IntVal])
7599 if (IntVal < 0 || IntVal >= (int64_t)Ids.
size())
7600 return Error(StringLoc,
"invalid " + Twine(Name) +
" value");
7605ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7606 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7607 AMDGPUOperand::ImmTy
Type) {
7611 ParseStatus Res = parseStringOrIntWithPrefix(Operands, Name, Ids, IntVal);
7613 Operands.
push_back(AMDGPUOperand::CreateImm(
this, IntVal, S,
Type));
7622bool AMDGPUAsmParser::tryParseFmt(
const char *Pref,
7626 SMLoc Loc = getLoc();
7628 auto Res = parseIntWithPrefix(Pref, Val);
7634 if (Val < 0 || Val > MaxVal) {
7635 Error(Loc, Twine(
"out of range ", StringRef(Pref)));
7643ParseStatus AMDGPUAsmParser::tryParseIndexKey(
OperandVector &Operands,
7644 AMDGPUOperand::ImmTy ImmTy) {
7645 const char *Pref =
"index_key";
7647 SMLoc Loc = getLoc();
7648 auto Res = parseIntWithPrefix(Pref, ImmVal);
7652 if ((ImmTy == AMDGPUOperand::ImmTyIndexKey16bit ||
7653 ImmTy == AMDGPUOperand::ImmTyIndexKey32bit) &&
7654 (ImmVal < 0 || ImmVal > 1))
7655 return Error(Loc, Twine(
"out of range ", StringRef(Pref)));
7657 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
7658 return Error(Loc, Twine(
"out of range ", StringRef(Pref)));
7660 Operands.
push_back(AMDGPUOperand::CreateImm(
this, ImmVal, Loc, ImmTy));
7664ParseStatus AMDGPUAsmParser::parseIndexKey8bit(
OperandVector &Operands) {
7665 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
7668ParseStatus AMDGPUAsmParser::parseIndexKey16bit(
OperandVector &Operands) {
7669 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
7672ParseStatus AMDGPUAsmParser::parseIndexKey32bit(
OperandVector &Operands) {
7673 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey32bit);
7676ParseStatus AMDGPUAsmParser::tryParseMatrixFMT(
OperandVector &Operands,
7678 AMDGPUOperand::ImmTy
Type) {
7683ParseStatus AMDGPUAsmParser::parseMatrixAFMT(
OperandVector &Operands) {
7684 return tryParseMatrixFMT(Operands,
"matrix_a_fmt",
7685 AMDGPUOperand::ImmTyMatrixAFMT);
7688ParseStatus AMDGPUAsmParser::parseMatrixBFMT(
OperandVector &Operands) {
7689 return tryParseMatrixFMT(Operands,
"matrix_b_fmt",
7690 AMDGPUOperand::ImmTyMatrixBFMT);
7693ParseStatus AMDGPUAsmParser::tryParseMatrixScale(
OperandVector &Operands,
7695 AMDGPUOperand::ImmTy
Type) {
7700ParseStatus AMDGPUAsmParser::parseMatrixAScale(
OperandVector &Operands) {
7701 return tryParseMatrixScale(Operands,
"matrix_a_scale",
7702 AMDGPUOperand::ImmTyMatrixAScale);
7705ParseStatus AMDGPUAsmParser::parseMatrixBScale(
OperandVector &Operands) {
7706 return tryParseMatrixScale(Operands,
"matrix_b_scale",
7707 AMDGPUOperand::ImmTyMatrixBScale);
7710ParseStatus AMDGPUAsmParser::tryParseMatrixScaleFmt(
OperandVector &Operands,
7712 AMDGPUOperand::ImmTy
Type) {
7717ParseStatus AMDGPUAsmParser::parseMatrixAScaleFmt(
OperandVector &Operands) {
7718 return tryParseMatrixScaleFmt(Operands,
"matrix_a_scale_fmt",
7719 AMDGPUOperand::ImmTyMatrixAScaleFmt);
7722ParseStatus AMDGPUAsmParser::parseMatrixBScaleFmt(
OperandVector &Operands) {
7723 return tryParseMatrixScaleFmt(Operands,
"matrix_b_scale_fmt",
7724 AMDGPUOperand::ImmTyMatrixBScaleFmt);
7729ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &
Format) {
7730 using namespace llvm::AMDGPU::MTBUFFormat;
7736 for (
int I = 0;
I < 2; ++
I) {
7737 if (Dfmt == DFMT_UNDEF && !tryParseFmt(
"dfmt", DFMT_MAX, Dfmt))
7740 if (Nfmt == NFMT_UNDEF && !tryParseFmt(
"nfmt", NFMT_MAX, Nfmt))
7745 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
7751 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
7754 Dfmt = (Dfmt ==
DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7755 Nfmt = (Nfmt ==
NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7761ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &
Format) {
7762 using namespace llvm::AMDGPU::MTBUFFormat;
7766 if (!tryParseFmt(
"format", UFMT_MAX, Fmt))
7769 if (Fmt == UFMT_UNDEF)
7776bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
7778 StringRef FormatStr,
7780 using namespace llvm::AMDGPU::MTBUFFormat;
7784 if (
Format != DFMT_UNDEF) {
7790 if (
Format != NFMT_UNDEF) {
7795 Error(Loc,
"unsupported format");
7799ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
7802 using namespace llvm::AMDGPU::MTBUFFormat;
7806 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
7811 SMLoc Loc = getLoc();
7812 if (!parseId(Str,
"expected a format string") ||
7813 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
7815 if (Dfmt == DFMT_UNDEF)
7816 return Error(Loc,
"duplicate numeric format");
7817 if (Nfmt == NFMT_UNDEF)
7818 return Error(Loc,
"duplicate data format");
7821 Dfmt = (Dfmt ==
DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7822 Nfmt = (Nfmt ==
NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7826 if (Ufmt == UFMT_UNDEF)
7827 return Error(FormatLoc,
"unsupported format");
7836ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
7839 using namespace llvm::AMDGPU::MTBUFFormat;
7842 if (Id == UFMT_UNDEF)
7846 return Error(Loc,
"unified format is not supported on this GPU");
7852ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &
Format) {
7853 using namespace llvm::AMDGPU::MTBUFFormat;
7854 SMLoc Loc = getLoc();
7859 return Error(Loc,
"out of range format");
7864ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &
Format) {
7865 using namespace llvm::AMDGPU::MTBUFFormat;
7871 StringRef FormatStr;
7872 SMLoc Loc = getLoc();
7873 if (!parseId(FormatStr,
"expected a format string"))
7876 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc,
Format);
7878 Res = parseSymbolicSplitFormat(FormatStr, Loc,
Format);
7888 return parseNumericFormat(
Format);
7891ParseStatus AMDGPUAsmParser::parseFORMAT(
OperandVector &Operands) {
7892 using namespace llvm::AMDGPU::MTBUFFormat;
7896 SMLoc Loc = getLoc();
7906 AMDGPUOperand::CreateImm(
this,
Format, Loc, AMDGPUOperand::ImmTyFORMAT));
7918 Res = parseRegOrImm(Operands);
7925 Res = parseSymbolicOrNumericFormat(
Format);
7930 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*Operands[
Size - 2]);
7931 assert(
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
7938 return Error(getLoc(),
"duplicate format");
7942ParseStatus AMDGPUAsmParser::parseFlatOffset(
OperandVector &Operands) {
7944 parseIntWithPrefix(
"offset", Operands, AMDGPUOperand::ImmTyOffset);
7946 Res = parseIntWithPrefix(
"inst_offset", Operands,
7947 AMDGPUOperand::ImmTyInstOffset);
7952ParseStatus AMDGPUAsmParser::parseR128A16(
OperandVector &Operands) {
7954 parseNamedBit(
"r128", Operands, AMDGPUOperand::ImmTyR128A16);
7956 Res = parseNamedBit(
"a16", Operands, AMDGPUOperand::ImmTyA16);
7960ParseStatus AMDGPUAsmParser::parseBLGP(
OperandVector &Operands) {
7962 parseIntWithPrefix(
"blgp", Operands, AMDGPUOperand::ImmTyBLGP);
7965 parseOperandArrayWithPrefix(
"neg", Operands, AMDGPUOperand::ImmTyBLGP);
7974void AMDGPUAsmParser::cvtExp(MCInst &Inst,
const OperandVector &Operands) {
7975 OptionalImmIndexMap OptionalIdx;
7977 unsigned OperandIdx[4];
7978 unsigned EnMask = 0;
7981 for (
unsigned i = 1, e = Operands.
size(); i != e; ++i) {
7982 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
7987 OperandIdx[SrcIdx] = Inst.
size();
7988 Op.addRegOperands(Inst, 1);
7995 OperandIdx[SrcIdx] = Inst.
size();
8001 if (
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
8002 Op.addImmOperands(Inst, 1);
8006 if (
Op.isToken() && (
Op.getToken() ==
"done" ||
Op.getToken() ==
"row_en"))
8010 OptionalIdx[
Op.getImmTy()] = i;
8016 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
8023 for (
auto i = 0; i < SrcIdx; ++i) {
8025 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
8050 IntVal =
encode(ISA, IntVal, CntVal);
8051 if (CntVal !=
decode(ISA, IntVal)) {
8053 IntVal =
encode(ISA, IntVal, -1);
8061bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
8063 SMLoc CntLoc = getLoc();
8064 StringRef CntName = getTokenStr();
8071 SMLoc ValLoc = getLoc();
8080 if (CntName ==
"vmcnt" || CntName ==
"vmcnt_sat") {
8082 }
else if (CntName ==
"expcnt" || CntName ==
"expcnt_sat") {
8084 }
else if (CntName ==
"lgkmcnt" || CntName ==
"lgkmcnt_sat") {
8087 Error(CntLoc,
"invalid counter name " + CntName);
8092 Error(ValLoc,
"too large value for " + CntName);
8101 Error(getLoc(),
"expected a counter name");
8109ParseStatus AMDGPUAsmParser::parseSWaitCnt(
OperandVector &Operands) {
8116 if (!parseCnt(Waitcnt))
8124 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Waitcnt, S));
8128bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
8129 SMLoc FieldLoc = getLoc();
8130 StringRef FieldName = getTokenStr();
8135 SMLoc ValueLoc = getLoc();
8142 if (FieldName ==
"instid0") {
8144 }
else if (FieldName ==
"instskip") {
8146 }
else if (FieldName ==
"instid1") {
8149 Error(FieldLoc,
"invalid field name " + FieldName);
8168 .Case(
"VALU_DEP_1", 1)
8169 .Case(
"VALU_DEP_2", 2)
8170 .Case(
"VALU_DEP_3", 3)
8171 .Case(
"VALU_DEP_4", 4)
8172 .Case(
"TRANS32_DEP_1", 5)
8173 .Case(
"TRANS32_DEP_2", 6)
8174 .Case(
"TRANS32_DEP_3", 7)
8175 .Case(
"FMA_ACCUM_CYCLE_1", 8)
8176 .Case(
"SALU_CYCLE_1", 9)
8177 .Case(
"SALU_CYCLE_2", 10)
8178 .Case(
"SALU_CYCLE_3", 11)
8186 Delay |=
Value << Shift;
8190ParseStatus AMDGPUAsmParser::parseSDelayALU(
OperandVector &Operands) {
8196 if (!parseDelay(Delay))
8204 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Delay, S));
8209AMDGPUOperand::isSWaitCnt()
const {
8213bool AMDGPUOperand::isSDelayALU()
const {
return isImm(); }
8219void AMDGPUAsmParser::depCtrError(SMLoc Loc,
int ErrorId,
8220 StringRef DepCtrName) {
8223 Error(Loc, Twine(
"invalid counter name ", DepCtrName));
8226 Error(Loc, Twine(DepCtrName,
" is not supported on this GPU"));
8229 Error(Loc, Twine(
"duplicate counter name ", DepCtrName));
8232 Error(Loc, Twine(
"invalid value for ", DepCtrName));
8239bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr,
unsigned &UsedOprMask) {
8241 using namespace llvm::AMDGPU::DepCtr;
8243 SMLoc DepCtrLoc = getLoc();
8244 StringRef DepCtrName = getTokenStr();
8254 unsigned PrevOprMask = UsedOprMask;
8255 int CntVal =
encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
8258 depCtrError(DepCtrLoc, CntVal, DepCtrName);
8267 Error(getLoc(),
"expected a counter name");
8272 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
8273 DepCtr = (DepCtr & ~CntValMask) | CntVal;
8277ParseStatus AMDGPUAsmParser::parseDepCtr(
OperandVector &Operands) {
8278 using namespace llvm::AMDGPU::DepCtr;
8281 SMLoc Loc = getLoc();
8284 unsigned UsedOprMask = 0;
8286 if (!parseDepCtr(DepCtr, UsedOprMask))
8294 Operands.
push_back(AMDGPUOperand::CreateImm(
this, DepCtr, Loc));
8298bool AMDGPUOperand::isDepCtr()
const {
return isS16Imm(); }
8304ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
8306 OperandInfoTy &Width) {
8307 using namespace llvm::AMDGPU::Hwreg;
8313 HwReg.Loc = getLoc();
8316 HwReg.IsSymbolic =
true;
8318 }
else if (!
parseExpr(HwReg.Val,
"a register name")) {
8326 if (!skipToken(
AsmToken::Comma,
"expected a comma or a closing parenthesis"))
8336 Width.Loc = getLoc();
8344ParseStatus AMDGPUAsmParser::parseHwreg(
OperandVector &Operands) {
8345 using namespace llvm::AMDGPU::Hwreg;
8348 SMLoc Loc = getLoc();
8350 StructuredOpField HwReg(
"id",
"hardware register", HwregId::Width,
8352 StructuredOpField
Offset(
"offset",
"bit offset", HwregOffset::Width,
8353 HwregOffset::Default);
8354 struct : StructuredOpField {
8355 using StructuredOpField::StructuredOpField;
8356 bool validate(AMDGPUAsmParser &Parser)
const override {
8358 return Error(Parser,
"only values from 1 to 32 are legal");
8361 } Width(
"size",
"bitfield width", HwregSize::Width, HwregSize::Default);
8362 ParseStatus Res = parseStructuredOpFields({&HwReg, &
Offset, &Width});
8365 Res = parseHwregFunc(HwReg,
Offset, Width);
8368 if (!validateStructuredOpFields({&HwReg, &
Offset, &Width}))
8370 ImmVal = HwregEncoding::encode(HwReg.Val,
Offset.Val, Width.Val);
8374 parseExpr(ImmVal,
"a hwreg macro, structured immediate"))
8381 return Error(Loc,
"invalid immediate: only 16-bit values are legal");
8383 AMDGPUOperand::CreateImm(
this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
8387bool AMDGPUOperand::isHwreg()
const {
8388 return isImmTy(ImmTyHwreg);
8396AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
8398 OperandInfoTy &Stream) {
8399 using namespace llvm::AMDGPU::SendMsg;
8404 Msg.IsSymbolic =
true;
8406 }
else if (!
parseExpr(Msg.Val,
"a message name")) {
8411 Op.IsDefined =
true;
8414 (
Op.Val =
getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
8417 }
else if (!
parseExpr(
Op.Val,
"an operation name")) {
8422 Stream.IsDefined =
true;
8423 Stream.Loc = getLoc();
8433AMDGPUAsmParser::validateSendMsg(
const OperandInfoTy &Msg,
8434 const OperandInfoTy &
Op,
8435 const OperandInfoTy &Stream) {
8436 using namespace llvm::AMDGPU::SendMsg;
8441 bool Strict = Msg.IsSymbolic;
8445 Error(Msg.Loc,
"specified message id is not supported on this GPU");
8450 Error(Msg.Loc,
"invalid message id");
8456 Error(
Op.Loc,
"message does not support operations");
8458 Error(Msg.Loc,
"missing message operation");
8464 Error(
Op.Loc,
"specified operation id is not supported on this GPU");
8466 Error(
Op.Loc,
"invalid operation id");
8471 Error(Stream.Loc,
"message operation does not support streams");
8475 Error(Stream.Loc,
"invalid message stream id");
8481ParseStatus AMDGPUAsmParser::parseSendMsg(
OperandVector &Operands) {
8482 using namespace llvm::AMDGPU::SendMsg;
8485 SMLoc Loc = getLoc();
8489 OperandInfoTy
Op(OP_NONE_);
8490 OperandInfoTy Stream(STREAM_ID_NONE_);
8491 if (parseSendMsgBody(Msg,
Op, Stream) &&
8492 validateSendMsg(Msg,
Op, Stream)) {
8497 }
else if (
parseExpr(ImmVal,
"a sendmsg macro")) {
8499 return Error(Loc,
"invalid immediate: only 16-bit values are legal");
8504 Operands.
push_back(AMDGPUOperand::CreateImm(
this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
8508bool AMDGPUOperand::isSendMsg()
const {
8509 return isImmTy(ImmTySendMsg);
8512ParseStatus AMDGPUAsmParser::parseWaitEvent(
OperandVector &Operands) {
8513 using namespace llvm::AMDGPU::WaitEvent;
8515 SMLoc Loc = getLoc();
8518 StructuredOpField DontWaitExportReady(
"dont_wait_export_ready",
"bit value",
8520 StructuredOpField ExportReady(
"export_ready",
"bit value", 1, 0);
8522 StructuredOpField *TargetBitfield =
8523 isGFX11() ? &DontWaitExportReady : &ExportReady;
8525 ParseStatus Res = parseStructuredOpFields({TargetBitfield});
8529 if (!validateStructuredOpFields({TargetBitfield}))
8531 ImmVal = TargetBitfield->Val;
8538 return Error(Loc,
"invalid immediate: only 16-bit values are legal");
8540 Operands.
push_back(AMDGPUOperand::CreateImm(
this, ImmVal, Loc,
8541 AMDGPUOperand::ImmTyWaitEvent));
8545bool AMDGPUOperand::isWaitEvent()
const {
return isImmTy(ImmTyWaitEvent); }
8551ParseStatus AMDGPUAsmParser::parseInterpSlot(
OperandVector &Operands) {
8558 int Slot = StringSwitch<int>(Str)
8565 return Error(S,
"invalid interpolation slot");
8567 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Slot, S,
8568 AMDGPUOperand::ImmTyInterpSlot));
8572ParseStatus AMDGPUAsmParser::parseInterpAttr(
OperandVector &Operands) {
8579 if (!Str.starts_with(
"attr"))
8580 return Error(S,
"invalid interpolation attribute");
8582 StringRef Chan = Str.take_back(2);
8583 int AttrChan = StringSwitch<int>(Chan)
8590 return Error(S,
"invalid or missing interpolation attribute channel");
8592 Str = Str.drop_back(2).drop_front(4);
8595 if (Str.getAsInteger(10, Attr))
8596 return Error(S,
"invalid or missing interpolation attribute number");
8599 return Error(S,
"out of bounds interpolation attribute number");
8603 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Attr, S,
8604 AMDGPUOperand::ImmTyInterpAttr));
8605 Operands.
push_back(AMDGPUOperand::CreateImm(
8606 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
8614ParseStatus AMDGPUAsmParser::parseExpTgt(
OperandVector &Operands) {
8615 using namespace llvm::AMDGPU::Exp;
8625 return Error(S, (Id == ET_INVALID)
8626 ?
"invalid exp target"
8627 :
"exp target is not supported on this GPU");
8629 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Id, S,
8630 AMDGPUOperand::ImmTyExpTgt));
8639AMDGPUAsmParser::isId(
const AsmToken &Token,
const StringRef Id)
const {
8644AMDGPUAsmParser::isId(
const StringRef Id)
const {
8650 return getTokenKind() ==
Kind;
8653StringRef AMDGPUAsmParser::getId()
const {
8658AMDGPUAsmParser::trySkipId(
const StringRef Id) {
8667AMDGPUAsmParser::trySkipId(
const StringRef Pref,
const StringRef Id) {
8669 StringRef Tok = getTokenStr();
8680 if (isId(Id) && peekToken().is(Kind)) {
8690 if (isToken(Kind)) {
8699 const StringRef ErrMsg) {
8700 if (!trySkipToken(Kind)) {
8701 Error(getLoc(), ErrMsg);
8708AMDGPUAsmParser::parseExpr(int64_t &
Imm, StringRef Expected) {
8712 if (Parser.parseExpression(Expr))
8715 if (Expr->evaluateAsAbsolute(
Imm))
8718 if (Expected.empty()) {
8719 Error(S,
"expected absolute expression");
8721 Error(S, Twine(
"expected ", Expected) +
8722 Twine(
" or an absolute expression"));
8732 if (Parser.parseExpression(Expr))
8736 if (Expr->evaluateAsAbsolute(IntVal)) {
8737 Operands.
push_back(AMDGPUOperand::CreateImm(
this, IntVal, S));
8739 Operands.
push_back(AMDGPUOperand::CreateExpr(
this, Expr, S));
8745AMDGPUAsmParser::parseString(StringRef &Val,
const StringRef ErrMsg) {
8747 Val =
getToken().getStringContents();
8751 Error(getLoc(), ErrMsg);
8756AMDGPUAsmParser::parseId(StringRef &Val,
const StringRef ErrMsg) {
8758 Val = getTokenStr();
8762 if (!ErrMsg.
empty())
8763 Error(getLoc(), ErrMsg);
8768AMDGPUAsmParser::getToken()
const {
8769 return Parser.getTok();
8772AsmToken AMDGPUAsmParser::peekToken(
bool ShouldSkipSpace) {
8775 : getLexer().peekTok(ShouldSkipSpace);
8780 auto TokCount = getLexer().peekTokens(Tokens);
8782 for (
auto Idx = TokCount; Idx < Tokens.
size(); ++Idx)
8787AMDGPUAsmParser::getTokenKind()
const {
8788 return getLexer().getKind();
8792AMDGPUAsmParser::getLoc()
const {
8797AMDGPUAsmParser::getTokenStr()
const {
8802AMDGPUAsmParser::lex() {
8806const AMDGPUOperand &
8807AMDGPUAsmParser::findMCOperand(
const OperandVector &Operands,
8808 int MCOpIdx)
const {
8809 for (
const auto &
Op : Operands) {
8810 const AMDGPUOperand &TargetOp =
static_cast<AMDGPUOperand &
>(*Op);
8811 if (TargetOp.getMCOpIdx() == MCOpIdx)
8817SMLoc AMDGPUAsmParser::getInstLoc(
const OperandVector &Operands)
const {
8818 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
8822SMLoc AMDGPUAsmParser::getLaterLoc(SMLoc a, SMLoc b) {
8826SMLoc AMDGPUAsmParser::getOperandLoc(
const OperandVector &Operands,
8827 int MCOpIdx)
const {
8828 return findMCOperand(Operands, MCOpIdx).getStartLoc();
8832AMDGPUAsmParser::getOperandLoc(std::function<
bool(
const AMDGPUOperand&)>
Test,
8834 for (
unsigned i = Operands.
size() - 1; i > 0; --i) {
8835 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
8837 return Op.getStartLoc();
8839 return getInstLoc(Operands);
8843AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy
Type,
8845 auto Test = [=](
const AMDGPUOperand&
Op) {
return Op.isImmTy(
Type); };
8846 return getOperandLoc(
Test, Operands);
8860 StringRef
Id = getTokenStr();
8861 SMLoc IdLoc = getLoc();
8867 find_if(Fields, [Id](StructuredOpField *
F) {
return F->Id ==
Id; });
8868 if (
I == Fields.
end())
8869 return Error(IdLoc,
"unknown field");
8870 if ((*I)->IsDefined)
8871 return Error(IdLoc,
"duplicate field");
8874 (*I)->Loc = getLoc();
8877 (*I)->IsDefined =
true;
8884bool AMDGPUAsmParser::validateStructuredOpFields(
8886 return all_of(Fields, [
this](
const StructuredOpField *
F) {
8887 return F->validate(*
this);
8898 const unsigned OrMask,
8899 const unsigned XorMask) {
8908bool AMDGPUAsmParser::parseSwizzleOperand(int64_t &
Op,
const unsigned MinVal,
8909 const unsigned MaxVal,
8910 const Twine &ErrMsg, SMLoc &Loc) {
8927AMDGPUAsmParser::parseSwizzleOperands(
const unsigned OpNum, int64_t*
Op,
8928 const unsigned MinVal,
8929 const unsigned MaxVal,
8930 const StringRef ErrMsg) {
8932 for (
unsigned i = 0; i < OpNum; ++i) {
8933 if (!parseSwizzleOperand(
Op[i], MinVal, MaxVal, ErrMsg, Loc))
8941AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &
Imm) {
8942 using namespace llvm::AMDGPU::Swizzle;
8945 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
8946 "expected a 2-bit lane id")) {
8957AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &
Imm) {
8958 using namespace llvm::AMDGPU::Swizzle;
8964 if (!parseSwizzleOperand(GroupSize,
8966 "group size must be in the interval [2,32]",
8971 Error(Loc,
"group size must be a power of two");
8974 if (parseSwizzleOperand(LaneIdx,
8976 "lane id must be in the interval [0,group size - 1]",
8985AMDGPUAsmParser::parseSwizzleReverse(int64_t &
Imm) {
8986 using namespace llvm::AMDGPU::Swizzle;
8991 if (!parseSwizzleOperand(GroupSize,
8993 "group size must be in the interval [2,32]",
8998 Error(Loc,
"group size must be a power of two");
9007AMDGPUAsmParser::parseSwizzleSwap(int64_t &
Imm) {
9008 using namespace llvm::AMDGPU::Swizzle;
9013 if (!parseSwizzleOperand(GroupSize,
9015 "group size must be in the interval [1,16]",
9020 Error(Loc,
"group size must be a power of two");
9029AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &
Imm) {
9030 using namespace llvm::AMDGPU::Swizzle;
9037 SMLoc StrLoc = getLoc();
9038 if (!parseString(Ctl)) {
9041 if (Ctl.
size() != BITMASK_WIDTH) {
9042 Error(StrLoc,
"expected a 5-character mask");
9046 unsigned AndMask = 0;
9047 unsigned OrMask = 0;
9048 unsigned XorMask = 0;
9050 for (
size_t i = 0; i < Ctl.
size(); ++i) {
9054 Error(StrLoc,
"invalid mask");
9075bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &
Imm) {
9076 using namespace llvm::AMDGPU::Swizzle;
9079 Error(getLoc(),
"FFT mode swizzle not supported on this GPU");
9085 if (!parseSwizzleOperand(Swizzle, 0, FFT_SWIZZLE_MAX,
9086 "FFT swizzle must be in the interval [0," +
9087 Twine(FFT_SWIZZLE_MAX) + Twine(
']'),
9095bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &
Imm) {
9096 using namespace llvm::AMDGPU::Swizzle;
9099 Error(getLoc(),
"Rotate mode swizzle not supported on this GPU");
9106 if (!parseSwizzleOperand(
Direction, 0, 1,
9107 "direction must be 0 (left) or 1 (right)", Loc))
9111 if (!parseSwizzleOperand(
9112 RotateSize, 0, ROTATE_MAX_SIZE,
9113 "number of threads to rotate must be in the interval [0," +
9114 Twine(ROTATE_MAX_SIZE) + Twine(
']'),
9119 (RotateSize << ROTATE_SIZE_SHIFT);
9124AMDGPUAsmParser::parseSwizzleOffset(int64_t &
Imm) {
9126 SMLoc OffsetLoc = getLoc();
9132 Error(OffsetLoc,
"expected a 16-bit offset");
9139AMDGPUAsmParser::parseSwizzleMacro(int64_t &
Imm) {
9140 using namespace llvm::AMDGPU::Swizzle;
9144 SMLoc ModeLoc = getLoc();
9147 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
9148 Ok = parseSwizzleQuadPerm(
Imm);
9149 }
else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
9150 Ok = parseSwizzleBitmaskPerm(
Imm);
9151 }
else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
9152 Ok = parseSwizzleBroadcast(
Imm);
9153 }
else if (trySkipId(IdSymbolic[ID_SWAP])) {
9154 Ok = parseSwizzleSwap(
Imm);
9155 }
else if (trySkipId(IdSymbolic[ID_REVERSE])) {
9156 Ok = parseSwizzleReverse(
Imm);
9157 }
else if (trySkipId(IdSymbolic[ID_FFT])) {
9158 Ok = parseSwizzleFFT(
Imm);
9159 }
else if (trySkipId(IdSymbolic[ID_ROTATE])) {
9160 Ok = parseSwizzleRotate(
Imm);
9162 Error(ModeLoc,
"expected a swizzle mode");
9165 return Ok && skipToken(
AsmToken::RParen,
"expected a closing parentheses");
9171ParseStatus AMDGPUAsmParser::parseSwizzle(
OperandVector &Operands) {
9175 if (trySkipId(
"offset")) {
9179 if (trySkipId(
"swizzle")) {
9180 Ok = parseSwizzleMacro(
Imm);
9182 Ok = parseSwizzleOffset(
Imm);
9186 Operands.
push_back(AMDGPUOperand::CreateImm(
this,
Imm, S, AMDGPUOperand::ImmTySwizzle));
9194AMDGPUOperand::isSwizzle()
const {
9195 return isImmTy(ImmTySwizzle);
9202int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
9204 using namespace llvm::AMDGPU::VGPRIndexMode;
9216 for (
unsigned ModeId = ID_MIN; ModeId <=
ID_MAX; ++ModeId) {
9217 if (trySkipId(IdSymbolic[ModeId])) {
9225 "expected a VGPR index mode or a closing parenthesis" :
9226 "expected a VGPR index mode");
9231 Error(S,
"duplicate VGPR index mode");
9239 "expected a comma or a closing parenthesis"))
9246ParseStatus AMDGPUAsmParser::parseGPRIdxMode(
OperandVector &Operands) {
9248 using namespace llvm::AMDGPU::VGPRIndexMode;
9254 Imm = parseGPRIdxMacro();
9258 if (getParser().parseAbsoluteExpression(
Imm))
9261 return Error(S,
"invalid immediate: only 4-bit values are legal");
9265 AMDGPUOperand::CreateImm(
this,
Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
9269bool AMDGPUOperand::isGPRIdxMode()
const {
9270 return isImmTy(ImmTyGprIdxMode);
9277ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(
OperandVector &Operands) {
9282 if (isRegister() || isModifier())
9288 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.
size() - 1]);
9289 assert(Opr.isImm() || Opr.isExpr());
9290 SMLoc Loc = Opr.getStartLoc();
9294 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
9295 Error(Loc,
"expected an absolute expression or a label");
9296 }
else if (Opr.isImm() && !Opr.isS16Imm()) {
9297 Error(Loc,
"expected a 16-bit signed jump offset");
9307ParseStatus AMDGPUAsmParser::parseBoolReg(
OperandVector &Operands) {
9308 return parseReg(Operands);
9315void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
9318 OptionalImmIndexMap OptionalIdx;
9319 unsigned FirstOperandIdx = 1;
9320 bool IsAtomicReturn =
false;
9327 for (
unsigned i = FirstOperandIdx, e = Operands.
size(); i != e; ++i) {
9328 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
9332 Op.addRegOperands(Inst, 1);
9336 if (IsAtomicReturn && i == FirstOperandIdx)
9337 Op.addRegOperands(Inst, 1);
9342 if (
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
9343 Op.addImmOperands(Inst, 1);
9355 OptionalIdx[
Op.getImmTy()] = i;
9369bool AMDGPUOperand::isSMRDOffset8()
const {
9373bool AMDGPUOperand::isSMEMOffset()
const {
9375 return isImmLiteral();
9378bool AMDGPUOperand::isSMRDLiteralOffset()
const {
9413bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
9414 if (BoundCtrl == 0 || BoundCtrl == 1) {
9422void AMDGPUAsmParser::onBeginOfFile() {
9423 if (!getParser().getStreamer().getTargetStreamer() ||
9427 if (!getTargetStreamer().getTargetID())
9428 getTargetStreamer().initializeTargetID(getSTI(),
9429 getSTI().getFeatureString());
9432 getTargetStreamer().EmitDirectiveAMDGCNTarget();
9441bool AMDGPUAsmParser::parsePrimaryExpr(
const MCExpr *&Res, SMLoc &EndLoc) {
9445 StringRef TokenId = getTokenStr();
9446 AGVK VK = StringSwitch<AGVK>(TokenId)
9447 .Case(
"max", AGVK::AGVK_Max)
9448 .Case(
"min", AGVK::AGVK_Min)
9449 .Case(
"or", AGVK::AGVK_Or)
9450 .Case(
"extrasgprs", AGVK::AGVK_ExtraSGPRs)
9451 .Case(
"totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
9452 .Case(
"alignto", AGVK::AGVK_AlignTo)
9453 .Case(
"occupancy", AGVK::AGVK_Occupancy)
9454 .Case(
"instprefsize", AGVK::AGVK_InstPrefSize)
9455 .Default(AGVK::AGVK_None);
9459 uint64_t CommaCount = 0;
9464 if (Exprs.
empty()) {
9466 "empty " + Twine(TokenId) +
" expression");
9469 if (CommaCount + 1 != Exprs.
size()) {
9471 "mismatch of commas in " + Twine(TokenId) +
" expression");
9475 Expected && Exprs.
size() != Expected) {
9476 Error(
getToken().getLoc(), Twine(TokenId) +
" expression expects " +
9477 Twine(Expected) +
" operands");
9484 if (getParser().parseExpression(Expr, EndLoc))
9488 if (LastTokenWasComma)
9492 "unexpected token in " + Twine(TokenId) +
" expression");
9498 return getParser().parsePrimaryExpr(Res, EndLoc,
nullptr);
9501ParseStatus AMDGPUAsmParser::parseOModSI(
OperandVector &Operands) {
9502 StringRef
Name = getTokenStr();
9503 if (Name ==
"mul") {
9504 return parseIntWithPrefix(
"mul", Operands,
9508 if (Name ==
"div") {
9509 return parseIntWithPrefix(
"div", Operands,
9520 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
9525 const AMDGPU::OpName
Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9526 AMDGPU::OpName::src2};
9534 int DstIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
9539 int ModIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0_modifiers);
9541 if (
DstOp.isReg() &&
9546 if ((OpSel & (1 << SrcNum)) != 0)
9552void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
9554 cvtVOP3P(Inst, Operands);
9558void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
const OperandVector &Operands,
9559 OptionalImmIndexMap &OptionalIdx) {
9560 cvtVOP3P(Inst, Operands, OptionalIdx);
9569 &&
Desc.NumOperands > (OpNum + 1)
9571 &&
Desc.operands()[OpNum + 1].RegClass != -1
9573 &&
Desc.getOperandConstraint(OpNum + 1,
9577void AMDGPUAsmParser::cvtOpSelHelper(MCInst &Inst,
unsigned OpSel) {
9579 constexpr AMDGPU::OpName
Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9580 AMDGPU::OpName::src2};
9581 constexpr AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9582 AMDGPU::OpName::src1_modifiers,
9583 AMDGPU::OpName::src2_modifiers};
9584 for (
int J = 0; J < 3; ++J) {
9585 int OpIdx = AMDGPU::getNamedOperandIdx(
Opc,
Ops[J]);
9591 int ModIdx = AMDGPU::getNamedOperandIdx(
Opc, ModOps[J]);
9594 if ((OpSel & (1 << J)) != 0)
9597 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && (OpSel & (1 << 3)) != 0)
9604void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst,
const OperandVector &Operands)
9606 OptionalImmIndexMap OptionalIdx;
9611 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9612 ((AMDGPUOperand &)*Operands[
I++]).addRegOperands(Inst, 1);
9615 for (
unsigned E = Operands.
size();
I !=
E; ++
I) {
9616 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[
I]);
9618 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9619 }
else if (
Op.isInterpSlot() ||
Op.isInterpAttr() ||
9620 Op.isInterpAttrChan()) {
9622 }
else if (
Op.isImmModifier()) {
9623 OptionalIdx[
Op.getImmTy()] =
I;
9631 AMDGPUOperand::ImmTyHigh);
9635 AMDGPUOperand::ImmTyClamp);
9639 AMDGPUOperand::ImmTyOModSI);
9644 AMDGPUOperand::ImmTyOpSel);
9645 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
9648 cvtOpSelHelper(Inst, OpSel);
9652void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst,
const OperandVector &Operands)
9654 OptionalImmIndexMap OptionalIdx;
9659 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9660 ((AMDGPUOperand &)*Operands[
I++]).addRegOperands(Inst, 1);
9663 for (
unsigned E = Operands.
size();
I !=
E; ++
I) {
9664 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[
I]);
9666 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9667 }
else if (
Op.isImmModifier()) {
9668 OptionalIdx[
Op.getImmTy()] =
I;
9676 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
9686 cvtOpSelHelper(Inst, OpSel);
9689void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,
9691 OptionalImmIndexMap OptionalIdx;
9694 int CbszOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::cbsz);
9698 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J)
9699 static_cast<AMDGPUOperand &
>(*Operands[
I++]).addRegOperands(Inst, 1);
9701 for (
unsigned E = Operands.
size();
I !=
E; ++
I) {
9702 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*Operands[
I]);
9707 if (NumOperands == CbszOpIdx) {
9712 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9713 }
else if (
Op.isImmModifier()) {
9714 OptionalIdx[
Op.getImmTy()] =
I;
9716 Op.addRegOrImmOperands(Inst, 1);
9721 auto CbszIdx = OptionalIdx.find(AMDGPUOperand::ImmTyCBSZ);
9722 if (CbszIdx != OptionalIdx.end()) {
9723 int CbszVal = ((AMDGPUOperand &)*Operands[CbszIdx->second]).
getImm();
9727 int BlgpOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::blgp);
9728 auto BlgpIdx = OptionalIdx.find(AMDGPUOperand::ImmTyBLGP);
9729 if (BlgpIdx != OptionalIdx.end()) {
9730 int BlgpVal = ((AMDGPUOperand &)*Operands[BlgpIdx->second]).
getImm();
9741 auto OpselIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSel);
9742 if (OpselIdx != OptionalIdx.end()) {
9743 OpSel =
static_cast<const AMDGPUOperand &
>(*Operands[OpselIdx->second])
9747 unsigned OpSelHi = 0;
9748 auto OpselHiIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSelHi);
9749 if (OpselHiIdx != OptionalIdx.end()) {
9750 OpSelHi =
static_cast<const AMDGPUOperand &
>(*Operands[OpselHiIdx->second])
9753 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9754 AMDGPU::OpName::src1_modifiers};
9756 for (
unsigned J = 0; J < 2; ++J) {
9757 unsigned ModVal = 0;
9758 if (OpSel & (1 << J))
9760 if (OpSelHi & (1 << J))
9763 const int ModIdx = AMDGPU::getNamedOperandIdx(
Opc, ModOps[J]);
9768void AMDGPUAsmParser::cvtVOP3(MCInst &Inst,
const OperandVector &Operands,
9769 OptionalImmIndexMap &OptionalIdx) {
9774 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9775 ((AMDGPUOperand &)*Operands[
I++]).addRegOperands(Inst, 1);
9778 for (
unsigned E = Operands.
size();
I !=
E; ++
I) {
9779 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[
I]);
9781 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9782 }
else if (
Op.isImmModifier()) {
9783 OptionalIdx[
Op.getImmTy()] =
I;
9785 Op.addRegOrImmOperands(Inst, 1);
9791 AMDGPUOperand::ImmTyScaleSel);
9795 AMDGPUOperand::ImmTyClamp);
9801 AMDGPUOperand::ImmTyByteSel);
9806 AMDGPUOperand::ImmTyOModSI);
9813 auto *it = Inst.
begin();
9814 std::advance(it, AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2_modifiers));
9822void AMDGPUAsmParser::cvtVOP3(MCInst &Inst,
const OperandVector &Operands) {
9823 OptionalImmIndexMap OptionalIdx;
9824 cvtVOP3(Inst, Operands, OptionalIdx);
9827void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
const OperandVector &Operands,
9828 OptionalImmIndexMap &OptIdx) {
9834 if (
Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi ||
9835 Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi ||
9836 Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
9837 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
9838 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx11 ||
9839 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx11 ||
9840 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
9841 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12 ||
9842 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx13 ||
9843 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx13) {
9852 int VdstInIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst_in);
9853 if (VdstInIdx != -1 && VdstInIdx ==
static_cast<int>(Inst.
getNumOperands()))
9856 int BitOp3Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::bitop3);
9857 if (BitOp3Idx != -1) {
9864 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
9865 if (OpSelIdx != -1) {
9869 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel_hi);
9870 if (OpSelHiIdx != -1) {
9871 int DefaultVal =
IsPacked ? -1 : 0;
9877 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_a_fmt);
9878 if (MatrixAFMTIdx != -1) {
9880 AMDGPUOperand::ImmTyMatrixAFMT, 0);
9884 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_b_fmt);
9885 if (MatrixBFMTIdx != -1) {
9887 AMDGPUOperand::ImmTyMatrixBFMT, 0);
9890 int MatrixAScaleIdx =
9891 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_a_scale);
9892 if (MatrixAScaleIdx != -1) {
9894 AMDGPUOperand::ImmTyMatrixAScale, 0);
9897 int MatrixBScaleIdx =
9898 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_b_scale);
9899 if (MatrixBScaleIdx != -1) {
9901 AMDGPUOperand::ImmTyMatrixBScale, 0);
9904 int MatrixAScaleFmtIdx =
9905 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_a_scale_fmt);
9906 if (MatrixAScaleFmtIdx != -1) {
9908 AMDGPUOperand::ImmTyMatrixAScaleFmt, 0);
9911 int MatrixBScaleFmtIdx =
9912 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_b_scale_fmt);
9913 if (MatrixBScaleFmtIdx != -1) {
9915 AMDGPUOperand::ImmTyMatrixBScaleFmt, 0);
9920 AMDGPUOperand::ImmTyMatrixAReuse, 0);
9924 AMDGPUOperand::ImmTyMatrixBReuse, 0);
9926 int NegLoIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::neg_lo);
9930 int NegHiIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::neg_hi);
9934 const AMDGPU::OpName
Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9935 AMDGPU::OpName::src2};
9936 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9937 AMDGPU::OpName::src1_modifiers,
9938 AMDGPU::OpName::src2_modifiers};
9941 unsigned OpSelHi = 0;
9948 if (OpSelHiIdx != -1)
9957 for (
int J = 0; J < 3; ++J) {
9958 int OpIdx = AMDGPU::getNamedOperandIdx(
Opc,
Ops[J]);
9962 int ModIdx = AMDGPU::getNamedOperandIdx(
Opc, ModOps[J]);
9967 uint32_t ModVal = 0;
9970 if (SrcOp.
isReg() && getMRI()
9977 if ((OpSel & (1 << J)) != 0)
9981 if ((OpSelHi & (1 << J)) != 0)
9984 if ((NegLo & (1 << J)) != 0)
9987 if ((NegHi & (1 << J)) != 0)
9994void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
const OperandVector &Operands) {
9995 OptionalImmIndexMap OptIdx;
9996 cvtVOP3(Inst, Operands, OptIdx);
9997 cvtVOP3P(Inst, Operands, OptIdx);
10001 unsigned i,
unsigned Opc,
10002 AMDGPU::OpName
OpName) {
10003 if (AMDGPU::getNamedOperandIdx(
Opc,
OpName) != -1)
10004 ((AMDGPUOperand &)*
Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
10006 ((AMDGPUOperand &)*
Operands[i]).addRegOperands(Inst, 1);
10009void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst,
const OperandVector &Operands) {
10012 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
10015 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
10016 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1);
10018 OptionalImmIndexMap OptIdx;
10019 for (
unsigned i = 5; i < Operands.
size(); ++i) {
10020 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
10021 OptIdx[
Op.getImmTy()] = i;
10026 AMDGPUOperand::ImmTyIndexKey8bit);
10030 AMDGPUOperand::ImmTyIndexKey16bit);
10034 AMDGPUOperand::ImmTyIndexKey32bit);
10039 cvtVOP3P(Inst, Operands, OptIdx);
10046ParseStatus AMDGPUAsmParser::parseVOPD(
OperandVector &Operands) {
10051 SMLoc S = getLoc();
10054 Operands.
push_back(AMDGPUOperand::CreateToken(
this,
"::", S));
10055 SMLoc OpYLoc = getLoc();
10058 Operands.
push_back(AMDGPUOperand::CreateToken(
this, OpYName, OpYLoc));
10061 return Error(OpYLoc,
"expected a VOPDY instruction after ::");
10067void AMDGPUAsmParser::cvtVOPD(MCInst &Inst,
const OperandVector &Operands) {
10070 auto addOp = [&](uint16_t ParsedOprIdx) {
10071 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
10073 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
10077 Op.addRegOperands(Inst, 1);
10081 Op.addImmOperands(Inst, 1);
10093 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
10097 const auto &CInfo = InstInfo[CompIdx];
10098 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
10099 for (
unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
10100 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
10101 if (CInfo.hasSrc2Acc())
10102 addOp(CInfo.getIndexOfDstInParsedOperands());
10106 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::bitop3);
10107 if (BitOp3Idx != -1) {
10108 OptionalImmIndexMap OptIdx;
10109 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands.
back());
10111 OptIdx[
Op.getImmTy()] = Operands.
size() - 1;
10121bool AMDGPUOperand::isDPP8()
const {
10122 return isImmTy(ImmTyDPP8);
10125bool AMDGPUOperand::isDPPCtrl()
const {
10126 using namespace AMDGPU::DPP;
10128 bool result = isImm() && getImmTy() == ImmTyDppCtrl &&
isUInt<9>(
getImm());
10131 return (
Imm >= DppCtrl::QUAD_PERM_FIRST &&
Imm <= DppCtrl::QUAD_PERM_LAST) ||
10132 (
Imm >= DppCtrl::ROW_SHL_FIRST &&
Imm <= DppCtrl::ROW_SHL_LAST) ||
10133 (
Imm >= DppCtrl::ROW_SHR_FIRST &&
Imm <= DppCtrl::ROW_SHR_LAST) ||
10134 (
Imm >= DppCtrl::ROW_ROR_FIRST &&
Imm <= DppCtrl::ROW_ROR_LAST) ||
10135 (
Imm == DppCtrl::WAVE_SHL1) ||
10136 (
Imm == DppCtrl::WAVE_ROL1) ||
10137 (
Imm == DppCtrl::WAVE_SHR1) ||
10138 (
Imm == DppCtrl::WAVE_ROR1) ||
10139 (
Imm == DppCtrl::ROW_MIRROR) ||
10140 (
Imm == DppCtrl::ROW_HALF_MIRROR) ||
10141 (
Imm == DppCtrl::BCAST15) ||
10142 (
Imm == DppCtrl::BCAST31) ||
10143 (
Imm >= DppCtrl::ROW_SHARE_FIRST &&
Imm <= DppCtrl::ROW_SHARE_LAST) ||
10144 (
Imm >= DppCtrl::ROW_XMASK_FIRST &&
Imm <= DppCtrl::ROW_XMASK_LAST);
10153bool AMDGPUOperand::isBLGP()
const {
10157bool AMDGPUOperand::isS16Imm()
const {
10161bool AMDGPUOperand::isU16Imm()
const {
10169bool AMDGPUAsmParser::parseDimId(
unsigned &Encoding) {
10174 SMLoc Loc =
getToken().getEndLoc();
10175 Token = std::string(getTokenStr());
10177 if (getLoc() != Loc)
10182 if (!parseId(Suffix))
10186 StringRef DimId = Token;
10197ParseStatus AMDGPUAsmParser::parseDim(
OperandVector &Operands) {
10201 SMLoc S = getLoc();
10207 SMLoc Loc = getLoc();
10208 if (!parseDimId(Encoding))
10209 return Error(Loc,
"invalid dim value");
10211 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Encoding, S,
10212 AMDGPUOperand::ImmTyDim));
10220ParseStatus AMDGPUAsmParser::parseDPP8(
OperandVector &Operands) {
10221 SMLoc S = getLoc();
10230 if (!skipToken(
AsmToken::LBrac,
"expected an opening square bracket"))
10233 for (
size_t i = 0; i < 8; ++i) {
10237 SMLoc Loc = getLoc();
10238 if (getParser().parseAbsoluteExpression(Sels[i]))
10240 if (0 > Sels[i] || 7 < Sels[i])
10241 return Error(Loc,
"expected a 3-bit value");
10244 if (!skipToken(
AsmToken::RBrac,
"expected a closing square bracket"))
10248 for (
size_t i = 0; i < 8; ++i)
10249 DPP8 |= (Sels[i] << (i * 3));
10251 Operands.
push_back(AMDGPUOperand::CreateImm(
this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
10256AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
10258 if (Ctrl ==
"row_newbcast")
10261 if (Ctrl ==
"row_share" ||
10262 Ctrl ==
"row_xmask")
10265 if (Ctrl ==
"wave_shl" ||
10266 Ctrl ==
"wave_shr" ||
10267 Ctrl ==
"wave_rol" ||
10268 Ctrl ==
"wave_ror" ||
10269 Ctrl ==
"row_bcast")
10272 return Ctrl ==
"row_mirror" ||
10273 Ctrl ==
"row_half_mirror" ||
10274 Ctrl ==
"quad_perm" ||
10275 Ctrl ==
"row_shl" ||
10276 Ctrl ==
"row_shr" ||
10281AMDGPUAsmParser::parseDPPCtrlPerm() {
10284 if (!skipToken(
AsmToken::LBrac,
"expected an opening square bracket"))
10288 for (
int i = 0; i < 4; ++i) {
10293 SMLoc Loc = getLoc();
10294 if (getParser().parseAbsoluteExpression(Temp))
10296 if (Temp < 0 || Temp > 3) {
10297 Error(Loc,
"expected a 2-bit value");
10301 Val += (Temp << i * 2);
10304 if (!skipToken(
AsmToken::RBrac,
"expected a closing square bracket"))
10311AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
10312 using namespace AMDGPU::DPP;
10317 SMLoc Loc = getLoc();
10319 if (getParser().parseAbsoluteExpression(Val))
10322 struct DppCtrlCheck {
10328 DppCtrlCheck
Check = StringSwitch<DppCtrlCheck>(Ctrl)
10329 .Case(
"wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
10330 .Case(
"wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
10331 .Case(
"wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
10332 .Case(
"wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
10333 .Case(
"row_shl", {DppCtrl::ROW_SHL0, 1, 15})
10334 .Case(
"row_shr", {DppCtrl::ROW_SHR0, 1, 15})
10335 .Case(
"row_ror", {DppCtrl::ROW_ROR0, 1, 15})
10336 .Case(
"row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
10337 .Case(
"row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
10338 .Case(
"row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
10342 if (
Check.Ctrl == -1) {
10343 Valid = (
Ctrl ==
"row_bcast" && (Val == 15 || Val == 31));
10351 Error(Loc, Twine(
"invalid ", Ctrl) + Twine(
" value"));
10358ParseStatus AMDGPUAsmParser::parseDPPCtrl(
OperandVector &Operands) {
10359 using namespace AMDGPU::DPP;
10362 !isSupportedDPPCtrl(getTokenStr(), Operands))
10365 SMLoc S = getLoc();
10371 if (Ctrl ==
"row_mirror") {
10372 Val = DppCtrl::ROW_MIRROR;
10373 }
else if (Ctrl ==
"row_half_mirror") {
10374 Val = DppCtrl::ROW_HALF_MIRROR;
10377 if (Ctrl ==
"quad_perm") {
10378 Val = parseDPPCtrlPerm();
10380 Val = parseDPPCtrlSel(Ctrl);
10389 AMDGPUOperand::CreateImm(
this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
10393void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst,
const OperandVector &Operands,
10395 OptionalImmIndexMap OptionalIdx;
10402 int OldIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::old);
10404 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2_modifiers);
10405 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
10409 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
10410 ((AMDGPUOperand &)*Operands[
I++]).addRegOperands(Inst, 1);
10414 int VdstInIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst_in);
10415 bool IsVOP3CvtSrDpp =
Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
10416 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx13 ||
10417 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
10418 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx13 ||
10419 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
10420 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx13 ||
10421 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
10422 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx13;
10424 for (
unsigned E = Operands.
size();
I !=
E; ++
I) {
10428 if (OldIdx == NumOperands) {
10430 constexpr int DST_IDX = 0;
10432 }
else if (Src2ModIdx == NumOperands) {
10442 if (IsVOP3CvtSrDpp) {
10451 if (TiedTo != -1) {
10456 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[
I]);
10458 if (IsDPP8 &&
Op.isDppFI()) {
10461 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
10462 }
else if (
Op.isReg()) {
10463 Op.addRegOperands(Inst, 1);
10464 }
else if (
Op.isImm() &&
10466 Op.addImmOperands(Inst, 1);
10467 }
else if (
Op.isImm()) {
10468 OptionalIdx[
Op.getImmTy()] =
I;
10476 AMDGPUOperand::ImmTyClamp);
10482 AMDGPUOperand::ImmTyByteSel);
10489 cvtVOP3P(Inst, Operands, OptionalIdx);
10491 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
10498 using namespace llvm::AMDGPU::DPP;
10508 AMDGPUOperand::ImmTyDppFI);
10512void AMDGPUAsmParser::cvtDPP(MCInst &Inst,
const OperandVector &Operands,
bool IsDPP8) {
10513 OptionalImmIndexMap OptionalIdx;
10517 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
10518 ((AMDGPUOperand &)*Operands[
I++]).addRegOperands(Inst, 1);
10522 for (
unsigned E = Operands.
size();
I !=
E; ++
I) {
10525 if (TiedTo != -1) {
10530 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[
I]);
10532 if (
Op.isReg() && validateVccOperand(
Op.getReg())) {
10540 Op.addImmOperands(Inst, 1);
10542 Op.addRegWithFPInputModsOperands(Inst, 2);
10543 }
else if (
Op.isDppFI()) {
10545 }
else if (
Op.isReg()) {
10546 Op.addRegOperands(Inst, 1);
10552 Op.addRegWithFPInputModsOperands(Inst, 2);
10553 }
else if (
Op.isReg()) {
10554 Op.addRegOperands(Inst, 1);
10555 }
else if (
Op.isDPPCtrl()) {
10556 Op.addImmOperands(Inst, 1);
10557 }
else if (
Op.isImm()) {
10559 OptionalIdx[
Op.getImmTy()] =
I;
10567 using namespace llvm::AMDGPU::DPP;
10575 AMDGPUOperand::ImmTyDppFI);
10584ParseStatus AMDGPUAsmParser::parseSDWASel(
OperandVector &Operands,
10586 AMDGPUOperand::ImmTy
Type) {
10587 return parseStringOrIntWithPrefix(
10589 {
"BYTE_0",
"BYTE_1",
"BYTE_2",
"BYTE_3",
"WORD_0",
"WORD_1",
"DWORD"},
10593ParseStatus AMDGPUAsmParser::parseSDWADstUnused(
OperandVector &Operands) {
10594 return parseStringOrIntWithPrefix(
10595 Operands,
"dst_unused", {
"UNUSED_PAD",
"UNUSED_SEXT",
"UNUSED_PRESERVE"},
10596 AMDGPUOperand::ImmTySDWADstUnused);
10599void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst,
const OperandVector &Operands) {
10600 cvtSDWA(Inst, Operands, SDWAInstType::VOP1);
10603void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst,
const OperandVector &Operands) {
10604 cvtSDWA(Inst, Operands, SDWAInstType::VOP2);
10607void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst,
const OperandVector &Operands) {
10608 cvtSDWA(Inst, Operands, SDWAInstType::VOP2,
true,
true);
10611void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst,
const OperandVector &Operands) {
10612 cvtSDWA(Inst, Operands, SDWAInstType::VOP2,
false,
true);
10615void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst,
const OperandVector &Operands) {
10616 cvtSDWA(Inst, Operands, SDWAInstType::VOPC,
isVI());
10619void AMDGPUAsmParser::cvtSDWA(MCInst &Inst,
const OperandVector &Operands,
10620 SDWAInstType BasicInstType,
bool SkipDstVcc,
10622 using namespace llvm::AMDGPU::SDWA;
10624 OptionalImmIndexMap OptionalIdx;
10625 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
10626 bool SkippedVcc =
false;
10630 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
10631 ((AMDGPUOperand &)*Operands[
I++]).addRegOperands(Inst, 1);
10634 for (
unsigned E = Operands.
size();
I !=
E; ++
I) {
10635 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[
I]);
10636 if (SkipVcc && !SkippedVcc &&
Op.isReg() &&
10637 (
Op.getReg() == AMDGPU::VCC ||
Op.getReg() == AMDGPU::VCC_LO)) {
10643 if (BasicInstType == SDWAInstType::VOP2 &&
10649 if (BasicInstType == SDWAInstType::VOPC && Inst.
getNumOperands() == 0) {
10655 Op.addRegOrImmWithInputModsOperands(Inst, 2);
10656 }
else if (
Op.isImm()) {
10658 OptionalIdx[
Op.getImmTy()] =
I;
10662 SkippedVcc =
false;
10666 if (
Opc != AMDGPU::V_NOP_sdwa_gfx10 &&
Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
10667 Opc != AMDGPU::V_NOP_sdwa_vi) {
10669 switch (BasicInstType) {
10670 case SDWAInstType::VOP1:
10673 AMDGPUOperand::ImmTyClamp, 0);
10677 AMDGPUOperand::ImmTyOModSI, 0);
10681 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10685 AMDGPUOperand::ImmTySDWADstUnused,
10686 DstUnused::UNUSED_PRESERVE);
10688 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10691 case SDWAInstType::VOP2:
10693 AMDGPUOperand::ImmTyClamp, 0);
10698 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10699 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
10700 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10701 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10704 case SDWAInstType::VOPC:
10707 AMDGPUOperand::ImmTyClamp, 0);
10708 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10709 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10716 if (Inst.
getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
10717 Inst.
getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
10718 auto *it = Inst.
begin();
10720 it, AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::src2));
10732#define GET_MATCHER_IMPLEMENTATION
10733#define GET_MNEMONIC_SPELL_CHECKER
10734#define GET_MNEMONIC_CHECKER
10735#include "AMDGPUGenAsmMatcher.inc"
10741 return parseTokenOp(
"addr64",
Operands);
10743 return parseNamedBit(
"done",
Operands, AMDGPUOperand::ImmTyDone,
true);
10745 return parseTokenOp(
"idxen",
Operands);
10747 return parseNamedBit(
"lds",
Operands, AMDGPUOperand::ImmTyLDS,
10750 return parseTokenOp(
"offen",
Operands);
10752 return parseTokenOp(
"off",
Operands);
10753 case MCK_row_95_en:
10754 return parseNamedBit(
"row_en",
Operands, AMDGPUOperand::ImmTyRowEn,
true);
10756 return parseNamedBit(
"gds",
Operands, AMDGPUOperand::ImmTyGDS);
10758 return parseNamedBit(
"tfe",
Operands, AMDGPUOperand::ImmTyTFE);
10760 return tryCustomParseOperand(
Operands, MCK);
10765unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &
Op,
10771 AMDGPUOperand &Operand = (AMDGPUOperand&)
Op;
10774 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
10776 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
10778 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
10780 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
10782 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
10784 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
10786 return Operand.isDone() ? Match_Success : Match_InvalidOperand;
10787 case MCK_row_95_en:
10788 return Operand.isRowEn() ? Match_Success : Match_InvalidOperand;
10796 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
10798 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
10799 case MCK_SOPPBrTarget:
10800 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
10801 case MCK_VReg32OrOff:
10802 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
10803 case MCK_InterpSlot:
10804 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
10805 case MCK_InterpAttr:
10806 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
10807 case MCK_InterpAttrChan:
10808 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
10810 case MCK_SReg_64_XEXEC:
10820 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
10822 return Match_InvalidOperand;
10830ParseStatus AMDGPUAsmParser::parseEndpgm(
OperandVector &Operands) {
10831 SMLoc S = getLoc();
10840 return Error(S,
"expected a 16-bit value");
10843 AMDGPUOperand::CreateImm(
this,
Imm, S, AMDGPUOperand::ImmTyEndpgm));
10847bool AMDGPUOperand::isEndpgm()
const {
return isImmTy(ImmTyEndpgm); }
10853bool AMDGPUOperand::isSplitBarrier()
const {
return isInlinableImm(MVT::i32); }
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
SmallVector< int16_t, MAX_SRC_OPERANDS_NUM > OperandIndices
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, AMDGPU::OpName OpName)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name, const MCRegisterInfo *MRI)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static bool IsRevOpcode(const unsigned Opcode)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static MCRegister getSpecialRegForName(StringRef RegName)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0, std::optional< unsigned > InsertAt=std::nullopt)
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
Enums shared between the AMDGPU backend (LLVM) and the ELF linker (LLD) for the .amdgpu....
AMDHSA kernel descriptor definitions.
static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value, raw_ostream &Err)
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_EXTERNAL_VISIBILITY
static llvm::Expected< InlineInfo > decode(GsymDataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Loop::LoopBounds::Direction Direction
static bool hasFeature(StringRef Feature, const FeatureBitset &FeatureBits, ArrayRef< SubtargetFeatureKV > ProcFeatures)
Register const TargetRegisterInfo * TRI
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Interface definition for SIInstrInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
This file implements the SmallBitVector class.
StringSet - A set-like wrapper for the StringMap.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)
Initialize the set of available library functions based on the specified target triple.
static const char * getRegisterName(MCRegister Reg)
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static unsigned getNumExpectedArgs(VariantKind Kind)
static const AMDGPUMCExpr * createLit(LitModifier Lit, int64_t Value, MCContext &Ctx)
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
static std::optional< TargetID > parseTargetIDString(StringRef TargetIDDirective)
std::string toString() const
static const fltSemantics & IEEEsingle()
static const fltSemantics & BFloat()
static const fltSemantics & IEEEdouble()
static constexpr roundingMode rmNearestTiesToEven
static const fltSemantics & IEEEhalf()
opStatus
IEEE-754R 7: Default exception handling.
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
bool is(TokenKind K) const
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
void printExpr(raw_ostream &, const MCExpr &) const
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Context object for machine code objects.
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Instances of this class represent a single low-level machine instruction.
unsigned getNumOperands() const
unsigned getOpcode() const
iterator insert(iterator I, const MCOperand &Op)
void addOperand(const MCOperand Op)
const MCOperand & getOperand(unsigned i) const
Describe properties that are true of each instruction in the target description file.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
int16_t getOpRegClassID(const MCOperandInfo &OpInfo, unsigned HwModeId) const
Return the ID of the register class to use for OpInfo, for the active HwMode HwModeId.
Instances of this class represent operands of the MCInst class.
static MCOperand createExpr(const MCExpr *Val)
static MCOperand createReg(MCRegister Reg)
static MCOperand createImm(int64_t Val)
void setReg(MCRegister Reg)
Set the register number.
MCRegister getReg() const
Returns the register number.
const MCExpr * getExpr() const
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
bool regsOverlap(MCRegister RegA, MCRegister RegB) const
Returns true if the two registers are equal or alias each other.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Wrapper class representing physical registers. Should be passed by value.
constexpr bool isValid() const
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
Generic base class for all target subtargets.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
bool isVariable() const
isVariable - Check if this is a variable symbol.
LLVM_ABI void setVariableValue(const MCExpr *Value)
void setRedefinable(bool Value)
Mark this symbol as redefinable.
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
MCTargetAsmParser - Generic interface to target specific assembly parsers.
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
constexpr unsigned id() const
Represents a location in source code.
static SMLoc getFromPointer(const char *Ptr)
constexpr const char * getPointer() const
constexpr bool isValid() const
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Represent a constant reference to a string, i.e.
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
Check if the string is empty.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
constexpr size_t size() const
Get the string size.
constexpr const char * data() const
Get a pointer to the start of the string (which may not be null terminated).
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
bool consume_front(char Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
bool contains(StringRef key) const
Check if the set contains the given key.
std::pair< typename Base::iterator, bool > insert(StringRef key)
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
std::pair< iterator, bool > insert(const ValueT &V)
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
int64_t getHwregId(StringRef Name, const MCSubtargetInfo &STI)
static constexpr CustomOperand Operands[]
@ FIXED_NUM_SGPRS_FOR_INIT_BUG
unsigned getAddressableNumSGPRs(const MCSubtargetInfo &STI)
unsigned getVGPREncodingGranule(const MCSubtargetInfo &STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo &STI)
unsigned getLocalMemorySize(const MCSubtargetInfo &STI)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
int64_t getMsgOpId(int64_t MsgId, StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a sendmsg operation to the operation portion of the immediate encoding.
int64_t getMsgId(StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a msg_id to the message portion of the immediate encoding.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
ArrayRef< GFXVersion > getGFXVersions()
constexpr unsigned COMPONENTS[]
constexpr const char *const ModMatrixFmt[]
constexpr const char *const ModMatrixScaleFmt[]
constexpr const char *const ModMatrixScale[]
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
bool isInlineValue(MCRegister Reg)
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
FuncInfoFlags
Per-function flags packed into INFO_FLAGS entries.
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
unsigned getTemporalHintType(const MCInstrDesc TID)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY bool isLitExpr(const MCExpr *Expr)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
bool isGFX13(const MCSubtargetInfo &STI)
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool hasMAIInsts(const MCSubtargetInfo &STI)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX13Plus(const MCSubtargetInfo &STI)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
LLVM_READONLY int64_t getLitValue(const MCExpr *Expr)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
AMDGPU::TargetID TargetID
int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit)
bool isValidWMMAScaleFmtCombination(unsigned AFmt, unsigned AScale, unsigned BFmt, unsigned BScale)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT64
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
@ OPERAND_REG_IMM_V2FP16_SPLAT
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_IMM_NOINLINE_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool isGFX1250(const MCSubtargetInfo &STI)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
MCRegister mc2PseudoReg(MCRegister Reg)
Convert hardware register Reg to a pseudo register.
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool supportsWGP(const MCSubtargetInfo &STI)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool isGFX1250Plus(const MCSubtargetInfo &STI)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ UNDEF
UNDEF - An undefined node.
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
constexpr bool hasIntClamp(const T &...O)
@ Valid
The data is already valid.
Scope
Defines the scope in which this symbol should be visible: Default – Visible in the public interface o...
Context & getContext() const
This is an optimization pass for GlobalISel generic memory operations.
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
StringMapEntry< Value * > ValueName
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
static bool isMem(const MachineInstr &MI, unsigned Op)
LLVM_ABI std::pair< StringRef, StringRef > getToken(StringRef Source, StringRef Delimiters=" \t\n\v\f\r")
getToken - This function extracts one token from source, ignoring any leading characters that appear ...
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
LLVM_ABI void PrintError(const Twine &Msg)
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Target & getTheR600Target()
The target for R600 GPUs.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Value
SmallVectorImpl< std::unique_ptr< MCParsedAsmOperand > > OperandVector
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Target & getTheGCNTarget()
The target for GCN GPUs.
@ Sub
Subtraction of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
unsigned M0(unsigned Val)
ArrayRef(const T &OneElt) -> ArrayRef< T >
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
@ Enabled
Convert any .debug_str_offsets tables to DWARF64 if needed.
@ Default
The result value is uniform if and only if all operands are uniform.
void initDefault(const MCSubtargetInfo &STI, MCContext &Ctx, bool InitMCExpr=true)
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
uint32_t PrivateSegmentSize
SmallVector< std::pair< MCSymbol *, std::string >, 4 > IndirectCalls
SmallVector< std::pair< MCSymbol *, MCSymbol * >, 8 > Calls
SmallVector< FuncInfo, 8 > Funcs
SmallVector< std::pair< MCSymbol *, std::string >, 4 > TypeIds
SmallVector< std::pair< MCSymbol *, MCSymbol * >, 4 > Uses
Instruction set architecture version.
const MCExpr * compute_pgm_rsrc2
const MCExpr * kernarg_size
const MCExpr * kernarg_preload
const MCExpr * compute_pgm_rsrc3
const MCExpr * private_segment_fixed_size
const MCExpr * compute_pgm_rsrc1
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
const MCExpr * group_segment_fixed_size
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
const MCExpr * kernel_code_properties
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...
uint32_t group_segment_fixed_size
uint32_t private_segment_fixed_size