48#include "llvm/IR/IntrinsicsX86.h"
56using namespace PatternMatch;
58#define DEBUG_TYPE "pre-amx-config"
62 if (Operand->getType()->isX86_AMXTy())
68 return II->
getIntrinsicID() == Intrinsic::x86_tileloadd64_internal ||
73 return II->
getIntrinsicID() == Intrinsic::x86_tilestored64_internal;
79 if (Operand->getType()->isX86_AMXTy())
86 if ((isa<CallInst>(
I) && !isa<IntrinsicInst>(
I)) ||
I->isTerminator())
93class X86PreAMXConfig {
100 bool preTileConfig();
102 bool findConfigShapes(PosAndShapesMap &PosAndShapes);
163 Value *PalettePos =
Builder.CreateGEP(I8Ty, I8Ptr, PaletteOffset);
164 Builder.CreateStore(PaletteValue, PalettePos);
166 for (
int I = 0,
E = Shapes.
size() / 2;
I <
E;
I++) {
169 const std::string ShapeName =
"amx.tmm." + itostr(
I);
170 Value *RowPos =
Builder.CreateGEP(I8Ty, I8Ptr, RowOffset,
171 ShapeName +
".shape.row");
172 Value *ColPos =
Builder.CreateGEP(I8Ty, I8Ptr, ColOffset);
173 ColPos =
Builder.CreateBitCast(ColPos, PointerType::get(I16Ty, 0),
174 ShapeName +
".shape.col");
175 Value *Row = Shapes[
I * 2];
176 Value *Col = Shapes[
I * 2 + 1];
177 Row =
Builder.CreateTrunc(Row, I8Ty);
178 Builder.CreateStore(Row, RowPos);
179 Builder.CreateStore(Col, ColPos);
183void X86PreAMXConfig::addTileConfig(
Instruction *ModelStart,
188 unsigned AddrSpace =
DL.getAllocaAddrSpace();
190 Type *V512Ty = VectorType::get(
Builder.getInt32Ty(), 16,
false);
194 new AllocaInst(V512Ty, AddrSpace,
"", &
F.getEntryBlock().front());
195 Addr->setAlignment(Alignment);
200 preWriteTileCfg(I8Ptr, Builder, Shapes);
202 Builder.CreateIntrinsic(Intrinsic::x86_ldtilecfg_internal, std::nullopt,
219 if (
Op->getType()->isX86_AMXTy())
220 if (!Loads.
erase(Op))
226 return Loads.
empty() && (
ST == cast<Value>(KeyAMX));
233 if (!
Op->getType()->isX86_AMXTy())
237 "All KeyAMX's tile definiation should comes from TileLoad!");
245 return Shapes.
size() != 0;
268 for (
auto I = Iter,
E = BB->
end();
I !=
E; ++
I) {
277 if (!checkVolatileModel(Loads, II, KeyAMX))
282 assert(!KeyAMX &&
"Too many key amx intrinsic!");
286 assert(PosEnd != BB->
end() &&
"Not find TileStore!");
290 KeyAMX = dyn_cast<IntrinsicInst>(&*PosEnd);
293 assert(Shapes.
empty() &&
"Shapes should be clean.");
294 getKeyAMXShapes(KeyAMX, Shapes);
310bool X86PreAMXConfig::findConfigShapes(PosAndShapesMap &PosAndShapes) {
321 I = getShapesAndConfigPosEnd(
I, PosAndShapes[&*
I]);
358bool X86PreAMXConfig::preTileConfig() {
359 PosAndShapesMap PosAndShapes;
360 bool NeedCfg = findConfigShapes(PosAndShapes);
363 for (
auto &IPAndShapes : PosAndShapes)
364 addTileConfig(IPAndShapes.first, IPAndShapes.second);
391 X86PreAMXConfig PCFG(
F);
392 C = PCFG.preTileConfig();
406static const char PassName[] =
"Pre AMX Tile Config";
407char X86PreAMXConfigPass::ID = 0;
413 return new X86PreAMXConfigPass();
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static const T * Find(StringRef S, ArrayRef< T > A)
Find KV in array using binary search.
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallSet class.
Target-Independent Code Generator Pass Configuration Options pass.
static bool brokenVolatile(Instruction *I)
static bool onlyTileDef(IntrinsicInst *II)
static bool isAMXIntrinsic(IntrinsicInst *II)
static bool isTileLoad(IntrinsicInst *II)
static bool isTileStore(IntrinsicInst *II)
static const char PassName[]
an instruction to allocate memory on the stack
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
Common base class shared among various IRBuilders.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
This class implements a map that also provides access to all stored values in a deterministic order.
A Module instance is used to store all the information related to an LLVM module.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
bool contains(const T &V) const
Check if the SmallSet contains the given element.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Primary interface to the complete machine description for the target machine.
Target-Independent Code Generator Pass Configuration Options.
The instances of the Type class are immutable: once they are created, they are never changed.
static IntegerType * getInt16Ty(LLVMContext &C)
static IntegerType * getInt8Ty(LLVMContext &C)
bool isX86_AMXTy() const
Return true if this is X86 AMX.
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
This is an optimization pass for GlobalISel generic memory operations.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
void initializeX86PreAMXConfigPassPass(PassRegistry &)
FunctionPass * createX86PreAMXConfigPass()
The pass insert tile config intrinsics for AMX fast register allocation.
This struct is a compact representation of a valid (non-zero power of two) alignment.