54#include "llvm/IR/IntrinsicsX86.h"
65using namespace PatternMatch;
67#define DEBUG_TYPE "lower-amx-type"
71 m_Intrinsic<Intrinsic::x86_cast_vector_to_tile>(
m_Value())) ||
72 match(
II, m_Intrinsic<Intrinsic::x86_cast_tile_to_vector>(
m_Value()));
92 auto *
II = dyn_cast<IntrinsicInst>(
I);
102 if (V->getType()->isX86_AMXTy())
112 if (
I.getType()->isX86_AMXTy())
124 unsigned AllocaAS =
DL.getAllocaAddrSpace();
126 new AllocaInst(Ty, AllocaAS,
"",
F.getEntryBlock().begin());
133 if (!isa<AllocaInst>(&
I))
145 std::map<Value *, Value *> Col2Row, Row2Col;
156 unsigned Granularity) {
157 if (Col2Row.count(V))
160 Value *RealRow =
nullptr;
161 if (isa<ConstantInt>(V))
163 Builder.
getInt16((cast<ConstantInt>(V)->getSExtValue()) / Granularity);
164 else if (isa<Instruction>(V)) {
179 cast<Instruction>(RealRow)->moveAfter(cast<Instruction>(V));
187 Col2Row[V] = RealRow;
192 unsigned Granularity) {
193 if (Row2Col.count(V))
196 Value *RealCol =
nullptr;
197 if (isa<ConstantInt>(V))
199 Builder.
getInt16((cast<ConstantInt>(V)->getSExtValue()) * Granularity);
200 else if (isa<Instruction>(V)) {
203 cast<Instruction>(RealCol)->moveAfter(cast<Instruction>(V));
211 Row2Col[V] = RealCol;
220 Value *Row =
nullptr, *Col =
nullptr;
221 switch (
II->getIntrinsicID()) {
224 case Intrinsic::x86_t2rpntlvwz0_internal:
225 case Intrinsic::x86_t2rpntlvwz0t1_internal:
226 case Intrinsic::x86_t2rpntlvwz1_internal:
227 case Intrinsic::x86_t2rpntlvwz1t1_internal:
228 case Intrinsic::x86_tileloadd64_internal:
229 case Intrinsic::x86_tileloaddt164_internal:
230 case Intrinsic::x86_tilestored64_internal:
231 case Intrinsic::x86_t2rpntlvwz0rs_internal:
232 case Intrinsic::x86_t2rpntlvwz0rst1_internal:
233 case Intrinsic::x86_t2rpntlvwz1rs_internal:
234 case Intrinsic::x86_t2rpntlvwz1rst1_internal:
235 case Intrinsic::x86_tileloaddrs64_internal:
236 case Intrinsic::x86_tileloaddrst164_internal: {
237 Row =
II->getArgOperand(0);
238 Col =
II->getArgOperand(1);
243 case Intrinsic::x86_tcmmimfp16ps_internal:
244 case Intrinsic::x86_tcmmrlfp16ps_internal:
245 case Intrinsic::x86_tdpbssd_internal:
246 case Intrinsic::x86_tdpbsud_internal:
247 case Intrinsic::x86_tdpbusd_internal:
248 case Intrinsic::x86_tdpbuud_internal:
249 case Intrinsic::x86_tdpbf16ps_internal:
250 case Intrinsic::x86_tdpfp16ps_internal:
251 case Intrinsic::x86_tmmultf32ps_internal: {
254 Row =
II->getArgOperand(0);
255 Col =
II->getArgOperand(1);
258 Row =
II->getArgOperand(0);
259 Col =
II->getArgOperand(2);
263 Col =
II->getArgOperand(1);
268 case Intrinsic::x86_ttransposed_internal:
269 case Intrinsic::x86_tconjtfp16_internal: {
270 assert((OpNo == 2) &&
"Illegal Operand Number.");
275 case Intrinsic::x86_tcvtrowd2ps_internal:
276 case Intrinsic::x86_tcvtrowps2pbf16h_internal:
277 case Intrinsic::x86_tcvtrowps2pbf16l_internal:
278 case Intrinsic::x86_tcvtrowps2phh_internal:
279 case Intrinsic::x86_tcvtrowps2phl_internal:
280 case Intrinsic::x86_tilemovrow_internal: {
281 assert(OpNo == 2 &&
"Illegal Operand Number.");
282 Row =
II->getArgOperand(0);
283 Col =
II->getArgOperand(1);
286 case Intrinsic::x86_ttdpbf16ps_internal:
287 case Intrinsic::x86_ttdpfp16ps_internal:
288 case Intrinsic::x86_ttcmmimfp16ps_internal:
289 case Intrinsic::x86_ttcmmrlfp16ps_internal:
290 case Intrinsic::x86_tconjtcmmimfp16ps_internal:
291 case Intrinsic::x86_ttmmultf32ps_internal: {
294 Row =
II->getArgOperand(0);
295 Col =
II->getArgOperand(1);
303 Col =
II->getArgOperand(1);
310 return std::make_pair(Row, Col);
314 Use &U = *(Phi->use_begin());
315 unsigned OpNo = U.getOperandNo();
316 User *V = U.getUser();
322 if (
isAMXCast(dyn_cast<Instruction>(V))) {
325 Use &U = *(V->use_begin());
326 OpNo = U.getOperandNo();
329 return getShape(cast<IntrinsicInst>(V), OpNo);
330 }
else if (isa<PHINode>(V)) {
333 Use &U = *(V->use_begin());
340 return std::make_pair(
nullptr,
nullptr);
344class X86LowerAMXType {
351 std::map<Value *, Value *> Col2Row, Row2Col;
367 Value *Row =
nullptr, *Col =
nullptr;
369 unsigned OpNo =
U.getOperandNo();
370 auto *
II = cast<IntrinsicInst>(
U.getUser());
371 std::tie(Row, Col) =
SC->getShape(
II, OpNo);
374 Value *Stride = Builder.getInt64(64);
375 Value *I8Ptr =
LD->getOperand(0);
376 std::array<Value *, 4>
Args = {Row, Col, I8Ptr, Stride};
379 Builder.CreateIntrinsic(Intrinsic::x86_tileloadd64_internal, {},
Args);
380 Bitcast->replaceAllUsesWith(NewInst);
393 auto *
II = cast<IntrinsicInst>(Tile);
396 Value *Row =
II->getOperand(0);
397 Value *Col =
II->getOperand(1);
401 Value *Stride = Builder.getInt64(64);
402 Value *I8Ptr =
ST->getOperand(1);
403 std::array<Value *, 5>
Args = {Row, Col, I8Ptr, Stride, Tile};
404 Builder.CreateIntrinsic(Intrinsic::x86_tilestored64_internal, {},
Args);
416 Value *Vec = Builder.CreateLoad(
Bitcast->getType(),
ST->getOperand(1));
417 Bitcast->replaceAllUsesWith(Vec);
421bool X86LowerAMXType::transformBitcast(
BitCastInst *Bitcast) {
424 Value *I8Ptr, *Stride;
425 auto *Src =
Bitcast->getOperand(0);
427 auto Prepare = [&](
Type *MemTy) {
430 Stride = Builder.getInt64(64);
433 if (
Bitcast->getType()->isX86_AMXTy()) {
443 unsigned OpNo =
U.getOperandNo();
444 auto *
II = dyn_cast<IntrinsicInst>(
U.getUser());
447 Prepare(
Bitcast->getOperand(0)->getType());
448 Builder.CreateStore(Src, AllocaAddr);
450 Value *Row =
nullptr, *Col =
nullptr;
451 std::tie(Row, Col) =
SC->getShape(
II, OpNo);
452 std::array<Value *, 4>
Args = {Row, Col, I8Ptr, Stride};
454 Builder.CreateIntrinsic(Intrinsic::x86_tileloadd64_internal, {},
Args);
455 Bitcast->replaceAllUsesWith(NewInst);
464 auto *
II = dyn_cast<IntrinsicInst>(Src);
468 Value *Row =
II->getOperand(0);
469 Value *Col =
II->getOperand(1);
470 std::array<Value *, 5>
Args = {Row, Col, I8Ptr, Stride, Src};
471 Builder.CreateIntrinsic(Intrinsic::x86_tilestored64_internal, {},
Args);
472 Value *NewInst = Builder.CreateLoad(
Bitcast->getType(), AllocaAddr);
473 Bitcast->replaceAllUsesWith(NewInst);
479bool X86LowerAMXType::visit() {
485 auto *
Bitcast = dyn_cast<BitCastInst>(&Inst);
490 if (
Bitcast->getType()->isX86_AMXTy()) {
497 if (transformBitcast(Bitcast))
517 combineLoadBitcast(LD, Bitcast);
521 }
else if (Src->getType()->isX86_AMXTy()) {
528 ST = dyn_cast<StoreInst>(
U.getUser());
533 if (transformBitcast(Bitcast))
557 combineBitcastStore(Bitcast, ST);
565 bool C = !DeadInsts.
empty();
567 for (
auto *Inst : DeadInsts)
568 Inst->eraseFromParent();
578 unsigned AllocaAS =
DL.getAllocaAddrSpace();
579 Type *V256I32Ty = VectorType::get(Builder.
getInt32Ty(), 256,
false);
581 new AllocaInst(V256I32Ty, AllocaAS,
"",
F->getEntryBlock().begin());
591 auto *
II = dyn_cast<IntrinsicInst>(TileDef);
594 if (
auto *Extr = dyn_cast<ExtractValueInst>(TileDef)) {
595 assert(Extr->hasIndices() &&
"Tile extract miss index!");
596 Idx = Extr->getIndices()[0];
597 II = cast<IntrinsicInst>(Extr->getOperand(0));
600 assert(
II &&
"Not tile intrinsic!");
608 std::array<Value *, 5> Args = {Row, Col,
Ptr, Stride, TileDef};
611 Builder.
CreateIntrinsic(Intrinsic::x86_tilestored64_internal, {}, Args);
617 assert(V->getType()->isX86_AMXTy() &&
"Not define tile!");
623 Value *PhiOp = cast<PHINode>(V)->getIncomingValue(0);
624 II = cast<IntrinsicInst>(PhiOp);
625 }
else if (
auto *Extr = dyn_cast<ExtractValueInst>(V)) {
627 assert(Extr->hasIndices() &&
"Tile extract miss index!");
628 Idx = Extr->getIndices()[0];
629 II = cast<IntrinsicInst>(Extr->getOperand(0));
631 II = cast<IntrinsicInst>(V);
636 Instruction *UserI = cast<Instruction>(U.getUser());
639 std::array<Value *, 4> Args = {Row, Col,
Ptr, Stride};
642 Builder.
CreateIntrinsic(Intrinsic::x86_tileloadd64_internal, {}, Args);
647 for (
Use &U :
I->uses()) {
648 User *V = U.getUser();
658class X86VolatileTileData {
666 bool volatileTileData();
671Value *X86VolatileTileData::updatePhiIncomings(
675 for (
auto *
I : Incomings) {
679 for (
Use &U :
I->uses()) {
681 if (isa<PHINode>(V) || V == Store)
691 for (
Use &U :
PHI->uses())
693 PHI->eraseFromParent();
751void X86VolatileTileData::volatileTilePHI(
PHINode *
PHI) {
755 for (
unsigned I = 0, E =
PHI->getNumIncomingValues();
I != E; ++
I) {
758 assert(Inst &&
"We shouldn't fold AMX instrution!");
762 Value *StorePtr = updatePhiIncomings(BB, Incomings);
763 replacePhiDefWithLoad(
PHI, StorePtr);
782void X86VolatileTileData::volatileTileNonPHI(
Instruction *
I) {
788 for (
Use &U :
I->uses()) {
790 assert(!isa<PHINode>(V) &&
"PHI Nodes should be excluded!");
808bool X86VolatileTileData::volatileTileData() {
809 bool Changed =
false;
815 if (!
I.getType()->isX86_AMXTy())
817 if (isa<PHINode>(&
I))
827 volatileTileNonPHI(
I);
832 volatileTilePHI(dyn_cast<PHINode>(
I));
843class X86LowerAMXCast {
846 std::unique_ptr<DominatorTree> DT;
850 :
Func(
F),
SC(ShapeC), DT(nullptr) {}
856 bool transformAllAMXCast();
870 for (
unsigned i = 0, e =
I->getNumOperands(); i != e; ++i) {
871 Value *OpV =
I->getOperand(i);
872 I->setOperand(i,
nullptr);
880 if (
Instruction *OpI = dyn_cast<Instruction>(OpV)) {
886 I->eraseFromParent();
900bool X86LowerAMXCast::optimizeAMXCastFromPhi(
905 Type *SrcTy = Src->getType();
917 while (!PhiWorklist.
empty()) {
919 for (
unsigned I = 0;
I < OldPN->getNumOperands(); ++
I) {
920 Value *IncValue = OldPN->getIncomingValue(
I);
923 if (isa<Constant>(IncValue)) {
924 auto *IncConst = dyn_cast<Constant>(IncValue);
925 if (!isa<UndefValue>(IncValue) && !IncConst->isZeroValue())
927 Value *Row =
nullptr, *Col =
nullptr;
928 std::tie(Row, Col) =
SC->getShape(OldPN);
931 if (!Row || !Col || !isa<Constant>(Row) || !isa<Constant>(Col))
934 auto *
Block = OldPN->getIncomingBlock(
I);
937 Intrinsic::x86_tilezero_internal, {}, {Row, Col});
939 NewInst = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector,
940 {IncValue->
getType()}, {NewInst});
943 OldPN->setIncomingValue(
I, NewInst);
947 if (
auto *PNode = dyn_cast<PHINode>(IncValue)) {
948 if (OldPhiNodes.
insert(PNode))
952 Instruction *ACI = dyn_cast<Instruction>(IncValue);
957 if (TyA != DestTy || TyB != SrcTy)
967 for (
auto *OldPN : OldPhiNodes) {
974 if (TyA != DestTy || TyB != SrcTy)
976 }
else if (
auto *
PHI = dyn_cast<PHINode>(V)) {
995 if (OldPhiNodes.count(
PHI) == 0)
1004 for (
auto *OldPN : OldPhiNodes) {
1005 Builder.SetInsertPoint(OldPN);
1006 PHINode *NewPN = Builder.CreatePHI(DestTy, OldPN->getNumOperands());
1007 NewPNodes[OldPN] = NewPN;
1011 for (
auto *OldPN : OldPhiNodes) {
1012 PHINode *NewPN = NewPNodes[OldPN];
1013 for (
unsigned j = 0, e = OldPN->getNumOperands(); j != e; ++j) {
1014 Value *
V = OldPN->getOperand(j);
1015 Value *NewV =
nullptr;
1020 else if (
auto *PrevPN = dyn_cast<PHINode>(V))
1021 NewV = NewPNodes[PrevPN];
1023 NewPN->
addIncoming(NewV, OldPN->getIncomingBlock(j));
1035 for (
auto *OldPN : OldPhiNodes) {
1036 PHINode *NewPN = NewPNodes[OldPN];
1042 assert(TyA == DestTy && TyB == SrcTy);
1047 }
else if (
auto *
PHI = dyn_cast<PHINode>(V)) {
1059static Value *getShapeFromAMXIntrinsic(
Value *Inst,
unsigned ShapeIdx,
1064 auto *
II = cast<IntrinsicInst>(Inst);
1066 return II->getOperand(0);
1068 assert(ShapeIdx < 2 &&
"Currently 2 shapes in 1 instruction at most!");
1069 return II->getOperand(ShapeIdx + 1);
1080 assert(Tile->getType()->isX86_AMXTy() &&
"Not Tile Operand!");
1083 if (Tile->getNumUses() != 1)
1089 Value *Row =
nullptr;
1090 Value *Col =
nullptr;
1093 auto *
II = cast<IntrinsicInst>(Tile);
1096 Row =
II->getOperand(0);
1097 Col =
II->getOperand(1);
1110 auto *
II = cast<ExtractValueInst>(Tile);
1111 assert(
II &&
"We meet unhandle source in fetching tile value!");
1112 unsigned ShapeIdx =
II->getIndices()[0];
1113 Value *Tiles =
II->getOperand(0);
1114 Row = getShapeFromAMXIntrinsic(Tiles, ShapeIdx,
true);
1115 Col = getShapeFromAMXIntrinsic(Tiles, ShapeIdx,
false);
1117 assert(Row && Col &&
"Shape got failed!");
1120 Value *Stride = Builder.CreateSExt(Col, Builder.getInt64Ty());
1121 Value *I8Ptr = Builder.CreateBitCast(
ST->getOperand(1), Builder.getPtrTy());
1122 std::array<Value *, 5>
Args = {Row, Col, I8Ptr, Stride, Tile};
1123 Builder.CreateIntrinsic(Intrinsic::x86_tilestored64_internal, {},
Args);
1133 bool EraseLoad =
true;
1134 Value *Row =
nullptr, *Col =
nullptr;
1136 unsigned OpNo =
U.getOperandNo();
1137 auto *
II = cast<IntrinsicInst>(
U.getUser());
1142 std::tie(Row, Col) =
SC->getShape(
II, OpNo);
1145 Value *Stride = Builder.CreateSExt(Col, Builder.getInt64Ty());
1156 Builder.SetInsertPoint(&*std::next(
LD->getIterator()));
1157 Builder.CreateStore(LD, AllocaAddr);
1159 Builder.SetInsertPoint(Cast);
1160 I8Ptr = Builder.CreateBitCast(AllocaAddr, Builder.getPtrTy());
1163 I8Ptr = Builder.CreateBitCast(
LD->getOperand(0), Builder.getPtrTy());
1165 std::array<Value *, 4>
Args = {Row, Col, I8Ptr, Stride};
1168 Builder.CreateIntrinsic(Intrinsic::x86_tileloadd64_internal, {},
Args);
1175 bool Change =
false;
1176 for (
auto *Cast : Casts) {
1177 auto *
II = cast<IntrinsicInst>(Cast);
1183 if (
II->getIntrinsicID() == Intrinsic::x86_cast_tile_to_vector) {
1189 if (combineCastStore(cast<IntrinsicInst>(Cast), Store)) {
1194 for (
auto *Store : DeadStores)
1195 Store->eraseFromParent();
1199 if (!Load || !
Load->hasOneUse())
1206 if (combineLoadCast(cast<IntrinsicInst>(Cast), Load)) {
1209 Load->eraseFromParent();
1217 bool Change =
false;
1227 m_Intrinsic<Intrinsic::x86_cast_vector_to_tile>(
m_Value(Vec))))
1229 else if (
match(&
I, m_Intrinsic<Intrinsic::x86_cast_tile_to_vector>(
1236 for (
auto *Inst : Insts) {
1239 if (!
II ||
II->getIntrinsicID() != IID)
1248 II->replaceAllUsesWith(Inst->getOperand(0));
1254 Convert(Vec2TileInsts, Intrinsic::x86_cast_tile_to_vector);
1255 Convert(Tile2VecInsts, Intrinsic::x86_cast_vector_to_tile);
1259 for (
auto *Inst : Insts) {
1261 Inst->eraseFromParent();
1269 EraseInst(Vec2TileInsts);
1270 EraseInst(Tile2VecInsts);
1271 LLVM_DEBUG(
dbgs() <<
"[LowerAMXTYpe][combineAMXcast] IR dump after combine "
1272 "Vec2Tile and Tile2Vec:\n";
1274 Change |= combineLdSt(LiveCasts);
1275 EraseInst(LiveCasts);
1276 LLVM_DEBUG(
dbgs() <<
"[LowerAMXTYpe][combineAMXcast] IR dump after combine "
1277 "AMXCast and load/store:\n";
1284 if (isa<PHINode>(
I.getOperand(0)))
1289 for (
auto *
I : PhiCastWorkList) {
1293 PHINode *PN = cast<PHINode>(
I->getOperand(0));
1294 if (optimizeAMXCastFromPhi(cast<IntrinsicInst>(
I), PN, DeadInst)) {
1302 while (!DeadInst.
empty()) {
1306 LLVM_DEBUG(
dbgs() <<
"[LowerAMXTYpe][combineAMXcast] IR dump after "
1307 "optimizeAMXCastFromPhi:\n";
1314bool X86LowerAMXCast::transformAMXCast(
IntrinsicInst *AMXCast) {
1317 Value *I8Ptr, *Stride;
1320 auto Prepare = [&](
Type *MemTy) {
1322 I8Ptr = Builder.CreateBitCast(AllocaAddr, Builder.getPtrTy());
1323 Stride = Builder.getInt64(64);
1344 unsigned OpNo =
U.getOperandNo();
1345 auto *
II = dyn_cast<IntrinsicInst>(
U.getUser());
1349 Builder.CreateStore(Src, AllocaAddr);
1351 Value *Row =
nullptr, *Col =
nullptr;
1352 std::tie(Row, Col) =
SC->getShape(
II, OpNo);
1353 std::array<Value *, 4>
Args = {
1354 Row, Col, I8Ptr, Builder.CreateSExt(Col, Builder.getInt64Ty())};
1356 Builder.CreateIntrinsic(Intrinsic::x86_tileloadd64_internal, {},
Args);
1367 auto *
II = dyn_cast<IntrinsicInst>(Src);
1371 Value *Row =
II->getOperand(0);
1372 Value *Col =
II->getOperand(1);
1373 std::array<Value *, 5>
Args = {
1374 Row, Col, I8Ptr, Builder.CreateSExt(Col, Builder.getInt64Ty()), Src};
1375 Builder.CreateIntrinsic(Intrinsic::x86_tilestored64_internal, {},
Args);
1376 Value *NewInst = Builder.CreateLoad(AMXCast->
getType(), AllocaAddr);
1384bool X86LowerAMXCast::transformAllAMXCast() {
1385 bool Change =
false;
1395 for (
auto *Inst : WorkLists) {
1396 Change |= transformAMXCast(cast<IntrinsicInst>(Inst));
1426 &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
F);
1429 X86LowerAMXCast LAC(
F, &SC);
1430 C |= LAC.combineAMXcast(TLI);
1433 C |= LAC.transformAllAMXCast();
1435 X86LowerAMXType LAT(
F, &SC);
1441 if (
TM->getOptLevel() == CodeGenOptLevel::None) {
1446 if (!
F.hasFnAttribute(Attribute::OptimizeNone)) {
1447 X86VolatileTileData VTD(
F);
1448 C = VTD.volatileTileData() ||
C;
1464static const char PassName[] =
"Lower AMX type for load/store";
1465char X86LowerAMXTypeLegacyPass::ID = 0;
1474 return new X86LowerAMXTypeLegacyPass();
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool DCEInstruction(Instruction *I, SmallSetVector< Instruction *, 16 > &WorkList, const TargetLibraryInfo *TLI)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
This file implements a set that has insertion order iteration characteristics.
Target-Independent Code Generator Pass Configuration Options pass.
static bool isAMXCast(Instruction *II)
static void replaceWithTileLoad(Use &U, Value *Ptr, bool IsPHI=false)
static Instruction * createTileStore(Instruction *TileDef, Value *Ptr)
static unsigned getNumDefTiles(IntrinsicInst *II)
static Value * getAllocaPos(BasicBlock *BB)
static bool containsAMXCode(Function &F)
static bool isIncomingOfPHI(Instruction *I)
static bool isAMXIntrinsic(Value *I)
static const char PassName[]
static Instruction * getFirstNonAllocaInTheEntryBlock(Function &F)
static AllocaInst * createAllocaInstAtEntry(IRBuilder<> &Builder, BasicBlock *BB, Type *Ty)
Value * getRowFromCol(Instruction *II, Value *V, unsigned Granularity)
ShapeCalculator(TargetMachine *TargetM)
Value * getColFromRow(Instruction *II, Value *V, unsigned Granularity)
std::pair< Value *, Value * > getShape(IntrinsicInst *II, unsigned OpNo)
an instruction to allocate memory on the stack
void setAlignment(Align Align)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
This class represents a no-op cast from one type to another.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
Value * CreateNUWMul(Value *LHS, Value *RHS, const Twine &Name="")
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Value * CreateUDiv(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LLVMContext & getContext() const
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
ConstantInt * getInt16(uint16_t C)
Get a constant 16-bit value.
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
value_type pop_back_val()
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
Primary interface to the complete machine description for the target machine.
Target-Independent Code Generator Pass Configuration Options.
The instances of the Type class are immutable: once they are created, they are never changed.
static Type * getX86_AMXTy(LLVMContext &C)
unsigned getNumContainedTypes() const
Return the number of types in the derived type.
bool isX86_AMXTy() const
Return true if this is X86 AMX.
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
A Use represents the edge between a Value definition and its users.
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
bool match(Val *V, const Pattern &P)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
iterator_range< po_iterator< T > > post_order(const T &G)
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
auto reverse(ContainerTy &&C)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void initializeX86LowerAMXTypeLegacyPassPass(PassRegistry &)
bool salvageKnowledge(Instruction *I, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Calls BuildAssumeFromInst and if the resulting llvm.assume is valid insert if before I.
FunctionPass * createX86LowerAMXTypePass()
The pass transforms load/store <256 x i32> to AMX load/store intrinsics or split the data to two <128...