45#define DEBUG_TYPE "tile-pre-config"
51 ": Failed to config tile register, please define the shape earlier");
69 Pos(
std::distance(
MBB->instr_begin(), ++
MI->getIterator())) {}
72 Pos(
std::distance(
MBB->instr_begin(), ++
MI->getIterator())) {}
75 operator bool()
const {
return MBB !=
nullptr; }
79 bool operator!=(
const MIRef &RHS)
const {
return !(*
this ==
RHS); }
95 bool HasAMXRegLiveIn =
false;
96 bool TileCfgForbidden =
false;
97 bool NeedTileCfgLiveIn =
false;
111 if (Iter ==
MI.operands_end())
114 return !UsableRegs.
none();
119 if (
MI.isPHI() ||
MI.isDebugInstr() ||
MI.getNumOperands() < 3)
121 switch (
MI.getOpcode()) {
122 case X86::PTILESTOREDV:
123 case X86::PTCVTROWD2PSrreV:
124 case X86::PTCVTROWD2PSrriV:
125 case X86::PTCVTROWPS2PBF16HrreV:
126 case X86::PTCVTROWPS2PBF16HrriV:
127 case X86::PTCVTROWPS2PBF16LrreV:
128 case X86::PTCVTROWPS2PBF16LrriV:
129 case X86::PTCVTROWPS2PHHrreV:
130 case X86::PTCVTROWPS2PHHrriV:
131 case X86::PTCVTROWPS2PHLrreV:
132 case X86::PTCVTROWPS2PHLrriV:
133 case X86::PTILEMOVROWrreV:
134 case X86::PTILEMOVROWrriV:
145 if (
MRI->getRegClass(MO.
getReg())->getID() == X86::TILERegClassID)
147 if (
MRI->getRegClass(MO.
getReg())->getID() == X86::TILEPAIRRegClassID)
152 collectShapeInfo(
MI, Shapes);
161 if (
ML->contains(Bottom) &&
ML->isLoopLatch(Bottom))
172 MIRef &FirstAMX = BBVisitedInfo[
MBB].FirstAMX;
174 auto InsertPoint = FirstAMX.MI->getIterator();
175 for (
auto I = FirstShapeBelowAMX, E = Shapes.
end();
I != E; ++
I) {
177 if (
I->MI->mayLoadOrStore())
179 for (
auto &MO :
I->MI->operands()) {
188 MBB->
insert(InsertPoint,
I->MI->removeFromParent());
201 return "Tile Register Pre-configure";
215 BBVisitedInfo.
clear();
226char X86PreTileConfig::ID = 0;
229 "Tile Register Pre-configure",
false,
false)
234void X86PreTileConfig::collectShapeInfo(
MachineInstr &
MI,
unsigned Shapes) {
238 if (
I == ShapeBBs[
MBB].end() || *
I != MIR)
244 for (
unsigned I = 1;
I < Shapes + 2; ++
I)
246 while (!WorkList.
empty()) {
249 assert(
DefMI &&
"R must has one define instruction");
257 if (
MI &&
MI->isMoveImmediate())
264 RecordShape(
DefMI, DefMBB);
268 RecordShape(
DefMI, DefMBB);
286 AMXRegs.set(X86::TMM0 +
I);
290 MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
293 for (
auto &
MBB : MF) {
295 for (
auto &
MI :
MBB) {
297 if (isAMXInstruction(
MI)) {
299 if (BBVisitedInfo[&
MBB].LastCall)
300 CfgNeedInsert.
insert(BBVisitedInfo[&
MBB].LastCall);
302 BBVisitedInfo[&
MBB].NeedTileCfgLiveIn =
true;
304 if (!BBVisitedInfo[&
MBB].FirstAMX)
305 BBVisitedInfo[&
MBB].FirstAMX = MIRef(&
MI, &
MBB, Pos);
306 }
else if (
MI.isCall() && isDestructiveCall(
MI, AMXRegs)) {
308 BBVisitedInfo[&
MBB].LastCall = MIRef(&
MI, &
MBB, Pos);
311 if (BBVisitedInfo[&
MBB].NeedTileCfgLiveIn) {
317 if (BBVisitedInfo[&
MBB].FirstAMX || BBVisitedInfo[&
MBB].HasAMXRegLiveIn)
319 if (!isLoopBackEdge(Succ, &
MBB))
320 BBVisitedInfo[Succ].HasAMXRegLiveIn =
true;
324 while (!CfgLiveInBBs.
empty()) {
327 if (BBVisitedInfo[Pred].LastCall) {
328 CfgNeedInsert.
insert(BBVisitedInfo[Pred].LastCall);
329 }
else if (!BBVisitedInfo[Pred].NeedTileCfgLiveIn) {
330 BBVisitedInfo[Pred].NeedTileCfgLiveIn =
true;
331 if (Pred == &MF.front())
332 CfgNeedInsert.
insert(MIRef(Pred));
340 if (CfgNeedInsert.
empty())
345 for (
auto &
I : ShapeBBs) {
347 if (BBVisitedInfo[
I.first].HasAMXRegLiveIn) {
354 if (BBVisitedInfo[
I.first].FirstAMX &&
355 BBVisitedInfo[
I.first].FirstAMX <
I.second.back() &&
356 !hoistShapesInBB(
I.first,
I.second)) {
362 while (!WorkList.
empty()) {
365 if (!BBVisitedInfo[Pred].TileCfgForbidden && !isLoopBackEdge(
MBB, Pred)) {
366 BBVisitedInfo[Pred].TileCfgForbidden =
true;
374 int SS = MF.getFrameInfo().CreateStackObject(
375 ST.getTileConfigSize(),
ST.getTileConfigAlignment(),
false);
378 for (
const auto &
I : CfgNeedInsert) {
381 while (!WorkList.
empty()) {
383 if (!VisitedOrInserted.
count(
I)) {
384 if (!BBVisitedInfo[
I.MBB].TileCfgForbidden) {
392 for (
auto *Succ :
I.MBB->successors())
393 if (BBVisitedInfo[Succ].NeedTileCfgLiveIn)
400 for (MIRef
I : InsertPoints) {
402 if (ShapeBBs.count(
I.MBB) &&
I < ShapeBBs[
I.MBB].back())
403 I = ShapeBBs[
I.MBB].back();
406 if (VisitedOrInserted.
insert(
I).second) {
407 auto II =
I.MI ?
I.MI->getIterator() :
I.MBB->instr_begin();
417 if (
ST.hasAVX512()) {
418 Register Zmm =
MRI->createVirtualRegister(&X86::VR512RegClass);
422 }
else if (
ST.hasAVX2()) {
423 Register Ymm =
MRI->createVirtualRegister(&X86::VR256RegClass);
430 assert(
ST.hasSSE2() &&
"AMX should assume SSE2 enabled");
431 unsigned StoreOpc =
ST.hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr;
432 Register Xmm =
MRI->createVirtualRegister(&X86::VR128RegClass);
449 return new X86PreTileConfig();
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
const HexagonInstrInfo * TII
Module.h This file contains the declarations for the Module class.
unsigned const TargetRegisterInfo * TRI
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallSet class.
Tile Register Pre configure
static void emitErrorMsg(MachineFunction &MF)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
void clearBitsInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
clearBitsInMask - Clear any bits in this vector that are set in Mask.
bool none() const
none - Returns true if none of the bits are set.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
FunctionPass class - This class is used to implement most global optimizations.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
This is an important class for using LLVM in a threaded context.
void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
bool isLoopHeader(const BlockT *BB) const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< succ_iterator > successors()
iterator_range< pred_iterator > predecessors()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
virtual void releaseMemory()
releaseMemory() - This member can be implemented by a pass if it wants to be able to release its memo...
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
TargetInstrInfo - Interface to description of machine instruction set.
unsigned getNumRegs() const
Return the number of registers in this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
AMXProgModelEnum getAMXProgModel() const
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
bool operator<(int64_t V1, const APSInt &V2)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool operator!=(uint64_t V1, const APInt &V2)
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
bool operator>(int64_t V1, const APSInt &V2)
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
FunctionPass * createX86PreTileConfigPass()
Return a pass that insert pseudo tile config instruction.
Implement std::hash so that hash_code can be used in STL containers.