45#define DEBUG_TYPE "tile-pre-config"
51 ": Failed to config tile register, please define the shape earlier");
57 MachineInstr *MI =
nullptr;
58 MachineBasicBlock *MBB =
nullptr;
62 MIRef(MachineBasicBlock *MBB) : MBB(MBB) {
63 for (
auto I = MBB->begin(),
E = MBB->end();
I !=
E &&
I->isPHI();
67 MIRef(MachineInstr *MI)
69 Pos(std::distance(MBB->instr_begin(), ++MI->getIterator())) {}
70 MIRef(MachineInstr *MI, MachineBasicBlock *MBB)
72 Pos(std::distance(MBB->instr_begin(), ++MI->getIterator())) {}
73 MIRef(MachineInstr *MI, MachineBasicBlock *MBB,
size_t Pos)
74 : MI(MI), MBB(MBB), Pos(Pos) {}
75 operator bool()
const {
return MBB !=
nullptr; }
77 return MI ==
RHS.MI && MBB ==
RHS.MBB;
83 return std::tie(MBB, Pos) < std::tie(
RHS.MBB,
RHS.Pos);
88 return std::tie(MBB, Pos) > std::tie(
RHS.MBB,
RHS.Pos);
95 bool HasAMXRegLiveIn =
false;
96 bool TileCfgForbidden =
false;
97 bool NeedTileCfgLiveIn =
false;
101 MachineRegisterInfo *MRI =
nullptr;
102 const MachineLoopInfo *MLI =
nullptr;
103 SmallPtrSet<MachineInstr *, 8> DefVisited;
104 DenseMap<MachineBasicBlock *, BBInfo> BBVisitedInfo;
105 DenseMap<MachineBasicBlock *, SmallVector<MIRef, 8>> ShapeBBs;
108 bool isDestructiveCall(MachineInstr &
MI, BitVector UsableRegs) {
110 MI.operands(), [](MachineOperand &MO) { return MO.isRegMask(); });
111 if (Iter ==
MI.operands_end())
114 return !UsableRegs.
none();
118 bool isAMXInstruction(MachineInstr &
MI) {
119 if (
MI.isPHI() ||
MI.isDebugInstr() ||
MI.getNumOperands() < 3)
121 switch (
MI.getOpcode()) {
122 case X86::PTILESTOREDV:
123 case X86::PTCVTROWD2PSrreV:
124 case X86::PTCVTROWD2PSrriV:
125 case X86::PTCVTROWPS2BF16HrreV:
126 case X86::PTCVTROWPS2BF16HrriV:
127 case X86::PTCVTROWPS2BF16LrreV:
128 case X86::PTCVTROWPS2BF16LrriV:
129 case X86::PTCVTROWPS2PHHrreV:
130 case X86::PTCVTROWPS2PHHrriV:
131 case X86::PTCVTROWPS2PHLrreV:
132 case X86::PTCVTROWPS2PHLrriV:
133 case X86::PTILEMOVROWrreV:
134 case X86::PTILEMOVROWrriV:
140 MachineOperand &MO =
MI.getOperand(0);
145 if (MRI->getRegClass(MO.
getReg())->getID() == X86::TILERegClassID)
147 if (MRI->getRegClass(MO.
getReg())->getID() == X86::TILEPAIRRegClassID)
152 collectShapeInfo(
MI, Shapes);
157 bool isLoopBackEdge(MachineBasicBlock *Header, MachineBasicBlock *Bottom) {
158 if (!MLI->isLoopHeader(Header))
160 auto *
ML = MLI->getLoopFor(Header);
161 if (
ML->contains(Bottom) &&
ML->isLoopLatch(Bottom))
168 void collectShapeInfo(MachineInstr &
MI,
unsigned Shapes);
171 bool hoistShapesInBB(MachineBasicBlock *
MBB, SmallVectorImpl<MIRef> &Shapes) {
172 MIRef &FirstAMX = BBVisitedInfo[
MBB].FirstAMX;
175 for (
auto I = FirstShapeBelowAMX,
E = Shapes.
end();
I !=
E; ++
I) {
177 if (
I->MI->mayLoadOrStore())
179 for (
auto &MO :
I->MI->operands()) {
184 if (MO.
isReg() && MIRef(MRI->getVRegDef(MO.
getReg())) > FirstAMX)
188 MBB->
insert(InsertPoint,
I->MI->removeFromParent());
197 X86PreTileConfig() : MachineFunctionPass(ID) {}
200 StringRef getPassName()
const override {
201 return "Tile Register Pre-configure";
205 void getAnalysisUsage(AnalysisUsage &AU)
const override {
212 void releaseMemory()
override {
215 BBVisitedInfo.clear();
219 bool runOnMachineFunction(MachineFunction &MF)
override;
226char X86PreTileConfig::ID = 0;
229 "Tile Register Pre-configure",
false,
false)
234void X86PreTileConfig::collectShapeInfo(
MachineInstr &
MI,
unsigned Shapes) {
237 auto &Refs = ShapeBBs[
MBB];
239 if (
I == Refs.end() || *
I != MIR)
245 for (
unsigned I = 1;
I < Shapes + 2; ++
I)
247 while (!WorkList.
empty()) {
248 Register R = WorkList.pop_back_val();
249 MachineInstr *DefMI = MRI->getVRegDef(R);
250 assert(DefMI &&
"R must has one define instruction");
251 MachineBasicBlock *DefMBB = DefMI->getParent();
252 if (DefMI->isMoveImmediate() || !DefVisited.insert(DefMI).second)
256 if (DefMI->getOpcode() == X86::COPY) {
257 MachineInstr *MI = MRI->getVRegDef(DefMI->getOperand(1).getReg());
258 if (MI && MI->isMoveImmediate())
263 for (unsigned I = 1; I < DefMI->getNumOperands(); I += 2)
264 if (isLoopBackEdge(DefMBB, DefMI->getOperand(I + 1).getMBB()))
265 RecordShape(DefMI, DefMBB);
267 WorkList.push_back(DefMI->getOperand(I).getReg());
269 RecordShape(DefMI, DefMBB);
275 X86MachineFunctionInfo *X86FI = MF.
getInfo<X86MachineFunctionInfo>();
281 const TargetInstrInfo *
TII =
ST.getInstrInfo();
282 const TargetRegisterInfo *
TRI =
ST.getRegisterInfo();
283 const TargetRegisterClass *RC =
TRI->getRegClass(X86::TILERegClassID);
285 BitVector AMXRegs(
TRI->getNumRegs());
287 AMXRegs.set(X86::TMM0 +
I);
291 MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
292 SmallSet<MIRef, 8> CfgNeedInsert;
293 SmallVector<MachineBasicBlock *, 8> CfgLiveInBBs;
294 for (
auto &
MBB : MF) {
296 auto &
Info = BBVisitedInfo[&
MBB];
297 for (
auto &
MI :
MBB) {
299 if (isAMXInstruction(
MI)) {
304 Info.NeedTileCfgLiveIn =
true;
308 }
else if (
MI.isCall() && isDestructiveCall(
MI, AMXRegs)) {
313 if (
Info.NeedTileCfgLiveIn) {
319 if (
Info.FirstAMX ||
Info.HasAMXRegLiveIn)
321 if (!isLoopBackEdge(Succ, &
MBB))
322 BBVisitedInfo[Succ].HasAMXRegLiveIn =
true;
326 while (!CfgLiveInBBs.
empty()) {
329 auto &
Info = BBVisitedInfo[Pred];
332 }
else if (!
Info.NeedTileCfgLiveIn) {
333 Info.NeedTileCfgLiveIn =
true;
334 if (Pred == &MF.front())
335 CfgNeedInsert.
insert(MIRef(Pred));
343 if (CfgNeedInsert.
empty())
347 SmallVector<MachineBasicBlock *, 8> WorkList;
348 for (
auto &
I : ShapeBBs) {
349 auto &
Info = BBVisitedInfo[
I.first];
351 if (
Info.HasAMXRegLiveIn) {
358 if (
Info.FirstAMX &&
Info.FirstAMX <
I.second.back() &&
359 !hoistShapesInBB(
I.first,
I.second)) {
365 while (!WorkList.
empty()) {
368 auto &
Info = BBVisitedInfo[Pred];
369 if (!
Info.TileCfgForbidden && !isLoopBackEdge(
MBB, Pred)) {
370 Info.TileCfgForbidden =
true;
377 SmallSet<MIRef, 8> VisitedOrInserted;
378 int SS = MF.getFrameInfo().CreateStackObject(
379 ST.getTileConfigSize(),
ST.getTileConfigAlignment(),
false);
382 for (
const auto &
I : CfgNeedInsert) {
383 SmallSet<MIRef, 8> InsertPoints;
385 while (!WorkList.
empty()) {
387 if (!VisitedOrInserted.
count(
I)) {
388 if (!BBVisitedInfo[
I.MBB].TileCfgForbidden) {
396 for (
auto *Succ :
I.MBB->successors())
397 if (BBVisitedInfo[Succ].NeedTileCfgLiveIn)
404 for (MIRef
I : InsertPoints) {
406 auto It = ShapeBBs.find(
I.MBB);
407 if (It != ShapeBBs.end() &&
I < It->second.back())
408 I = It->second.back();
411 if (VisitedOrInserted.
insert(
I).second) {
412 auto II =
I.MI ?
I.MI->getIterator() :
I.MBB->instr_begin();
420 MachineBasicBlock &
MBB = MF.
front();
422 if (
ST.hasAVX512()) {
423 Register Zmm =
MRI->createVirtualRegister(&X86::VR512RegClass);
427 }
else if (
ST.hasAVX2()) {
428 Register Ymm =
MRI->createVirtualRegister(&X86::VR256RegClass);
435 assert(
ST.hasSSE2() &&
"AMX should assume SSE2 enabled");
436 unsigned StoreOpc =
ST.hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr;
437 Register Xmm =
MRI->createVirtualRegister(&X86::VR128RegClass);
454 return new X86PreTileConfig();
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
const HexagonInstrInfo * TII
Module.h This file contains the declarations for the Module class.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the SmallSet class.
static void emitErrorMsg(MachineFunction &MF)
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
void clearBitsInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
clearBitsInMask - Clear any bits in this vector that are set in Mask.
bool none() const
none - Returns true if none of the bits are set.
FunctionPass class - This class is used to implement most global optimizations.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
This is an important class for using LLVM in a threaded context.
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< succ_iterator > successors()
iterator_range< pred_iterator > predecessors()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
unsigned getNumRegs() const
Return the number of registers in this class.
AMXProgModelEnum getAMXProgModel() const
self_iterator getIterator()
This is an optimization pass for GlobalISel generic memory operations.
bool operator<(int64_t V1, const APSInt &V2)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool operator!=(uint64_t V1, const APInt &V2)
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
bool operator>(int64_t V1, const APSInt &V2)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
FunctionPass * createX86PreTileConfigPass()
Return a pass that insert pseudo tile config instruction.