Go to the documentation of this file.
48 #include "llvm/IR/IntrinsicsX86.h"
56 using namespace PatternMatch;
58 #define DEBUG_TYPE "pre-amx-config"
62 if (Operand->getType()->isX86_AMXTy())
68 return II->
getIntrinsicID() == Intrinsic::x86_tileloadd64_internal ||
73 return II->
getIntrinsicID() == Intrinsic::x86_tilestored64_internal;
79 if (Operand->getType()->isX86_AMXTy())
86 if ((isa<CallInst>(
I) && !isa<IntrinsicInst>(
I)) ||
I->isTerminator())
93 class X86PreAMXConfig {
100 bool findConfigShapes(
165 new StoreInst(PaletteValue, PalettePos, Pos);
167 for (
int I = 0,
E = Shapes.size() / 2;
I <
E;
I++) {
170 const std::string ShapeName =
"amx.tmm." + itostr(
I);
172 ShapeName +
".shape.row", Pos);
175 ShapeName +
".shape.col", Pos);
176 Value *Row = Shapes[
I * 2];
177 Value *Col = Shapes[
I * 2 + 1];
186 bool X86PreAMXConfig::addTileConfig(
Instruction *ModelStart,
191 unsigned AddrSpace =
DL.getAllocaAddrSpace();
197 new AllocaInst(V512Ty, AddrSpace,
"", &
F.getEntryBlock().front());
198 Addr->setAlignment(Alignment);
201 std::array<Value *, 1>
Args = {I8Ptr};
203 Builder.CreateIntrinsic(Intrinsic::x86_ldtilecfg_internal, None,
Args);
207 assert(Init0 &&
"Not Zero initilizate the cfg mem!");
209 preWriteTileCfg(I8Ptr, Cfg, Shapes);
227 if (
Op->getType()->isX86_AMXTy())
234 return Loads.
empty() && (
ST == cast<Value>(KeyAMX));
241 if (!
Op->getType()->isX86_AMXTy())
245 "All KeyAMX's tile definiation should comes from TileLoad!");
253 return Shapes.size() != 0;
276 for (
auto I = Iter,
E =
BB->end();
I !=
E; ++
I) {
285 if (!checkVolatileModel(Loads, II, KeyAMX))
290 assert(!KeyAMX &&
"Too many key amx intrinsic!");
294 assert(PosEnd !=
BB->end() &&
"Not find TileStore!");
298 KeyAMX = dyn_cast<IntrinsicInst>(&*PosEnd);
301 assert(Shapes.empty() &&
"Shapes should be clean.");
302 getKeyAMXShapes(KeyAMX, Shapes);
318 bool X86PreAMXConfig::findConfigShapes(
330 I = getShapesAndConfigPosEnd(
I, PosAndShapes[&*
I]);
367 bool X86PreAMXConfig::preTileConfig() {
369 bool NeedCfg = findConfigShapes(PosAndShapes);
372 for (
auto &IPAndShapes : PosAndShapes)
373 addTileConfig(IPAndShapes.first, IPAndShapes.second);
400 X86PreAMXConfig PCFG(
F);
401 C = PCFG.preTileConfig();
415 static const char PassName[] =
"Pre AMX Tile Config";
422 return new X86PreAMXConfigPass();
This is an optimization pass for GlobalISel generic memory operations.
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
A parsed version of the target data layout string in and methods for querying it.
InstListType::iterator iterator
Instruction iterators...
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
This class represents a no-op cast from one type to another.
static bool isAMXIntrinsic(IntrinsicInst *II)
The instances of the Type class are immutable: once they are created, they are never changed.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
static IntegerType * getInt8Ty(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
LLVM Basic Block Representation.
static bool onlyTileDef(IntrinsicInst *II)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
(vector float) vec_cmpeq(*A, *B) C
Represent the analysis usage information of a pass.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
This struct is a compact representation of a valid (non-zero power of two) alignment.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
static bool isTileStore(IntrinsicInst *II)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Target-Independent Code Generator Pass Configuration Options.
static bool isTileLoad(IntrinsicInst *II)
An instruction for storing to memory.
This class represents a truncation of integer types.
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
This is an important class for using LLVM in a threaded context.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Primary interface to the complete machine description for the target machine.
A Module instance is used to store all the information related to an LLVM module.
static const T * Find(StringRef S, ArrayRef< T > A)
Find KV in array using binary search.
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
FunctionPass * createX86PreAMXConfigPass()
The pass insert tile config intrinsics for AMX fast register allocation.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Type * getType() const
All values are typed, get the type of this value.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
LLVMContext & getContext() const
Get the context in which this basic block lives.
static bool runOnFunction(Function &F, bool PostInlining)
static IntegerType * getInt64Ty(LLVMContext &C)
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
static const char PassName[]
static bool brokenVolatile(Instruction *I)
A wrapper class for inspecting calls to intrinsic functions.
unsigned getNumOperands() const
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const BasicBlock * getParent() const
LLVM_NODISCARD bool empty() const
static IntegerType * getInt16Ty(LLVMContext &C)
const char LLVMTargetMachineRef TM
FunctionPass class - This class is used to implement most global optimizations.
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
AnalysisUsage & addRequired()
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
an instruction to allocate memory on the stack
Value * getOperand(unsigned i) const
LLVM Value Representation.
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
void initializeX86PreAMXConfigPassPass(PassRegistry &)
bool isX86_AMXTy() const
Return true if this is X86 AMX.
bool contains(const T &V) const
Check if the SmallSet contains the given element.