46#define DEBUG_TYPE "x86-pre-tile-config"
52 ": Failed to config tile register, please define the shape earlier");
58 MachineInstr *MI =
nullptr;
59 MachineBasicBlock *MBB =
nullptr;
63 MIRef(MachineBasicBlock *MBB) : MBB(MBB) {
64 for (
auto I = MBB->begin(),
E = MBB->end();
I !=
E &&
I->isPHI();
68 MIRef(MachineInstr *MI)
70 Pos(std::distance(MBB->instr_begin(), ++MI->getIterator())) {}
71 MIRef(MachineInstr *MI, MachineBasicBlock *MBB)
73 Pos(std::distance(MBB->instr_begin(), ++MI->getIterator())) {}
74 MIRef(MachineInstr *MI, MachineBasicBlock *MBB,
size_t Pos)
75 : MI(MI), MBB(MBB), Pos(Pos) {}
76 operator bool()
const {
return MBB !=
nullptr; }
78 return MI ==
RHS.MI && MBB ==
RHS.MBB;
84 return std::tie(MBB, Pos) < std::tie(
RHS.MBB,
RHS.Pos);
89 return std::tie(MBB, Pos) > std::tie(
RHS.MBB,
RHS.Pos);
96 bool HasAMXRegLiveIn =
false;
97 bool TileCfgForbidden =
false;
98 bool NeedTileCfgLiveIn =
false;
101class X86PreTileConfigImpl {
102 std::function<MachineLoopInfo *()> GetMLI;
103 MachineRegisterInfo *MRI =
nullptr;
104 const MachineLoopInfo *MLI =
nullptr;
105 SmallPtrSet<MachineInstr *, 8> DefVisited;
106 DenseMap<MachineBasicBlock *, BBInfo> BBVisitedInfo;
107 DenseMap<MachineBasicBlock *, SmallVector<MIRef, 8>> ShapeBBs;
110 bool isDestructiveCall(MachineInstr &
MI, BitVector UsableRegs) {
112 MI.operands(), [](MachineOperand &MO) { return MO.isRegMask(); });
113 if (Iter ==
MI.operands_end())
116 return !UsableRegs.
none();
120 bool isAMXInstruction(MachineInstr &
MI) {
121 if (
MI.isPHI() ||
MI.isDebugInstr() ||
MI.getNumOperands() < 3)
123 switch (
MI.getOpcode()) {
124 case X86::PTILESTOREDV:
125 case X86::PTCVTROWD2PSrreV:
126 case X86::PTCVTROWD2PSrriV:
127 case X86::PTCVTROWPS2BF16HrreV:
128 case X86::PTCVTROWPS2BF16HrriV:
129 case X86::PTCVTROWPS2BF16LrreV:
130 case X86::PTCVTROWPS2BF16LrriV:
131 case X86::PTCVTROWPS2PHHrreV:
132 case X86::PTCVTROWPS2PHHrriV:
133 case X86::PTCVTROWPS2PHLrreV:
134 case X86::PTCVTROWPS2PHLrriV:
135 case X86::PTILEMOVROWrreV:
136 case X86::PTILEMOVROWrriV:
142 MachineOperand &MO =
MI.getOperand(0);
146 if (MRI->getRegClass(MO.
getReg())->getID() != X86::TILERegClassID)
149 collectShapeInfo(
MI);
154 bool isLoopBackEdge(MachineBasicBlock *Header, MachineBasicBlock *Bottom) {
155 if (!MLI->isLoopHeader(Header))
157 auto *
ML = MLI->getLoopFor(Header);
158 if (
ML->contains(Bottom) &&
ML->isLoopLatch(Bottom))
165 void collectShapeInfo(MachineInstr &
MI);
168 bool hoistShapesInBB(MachineBasicBlock *
MBB, SmallVectorImpl<MIRef> &Shapes) {
169 MIRef &FirstAMX = BBVisitedInfo[
MBB].FirstAMX;
172 for (
auto I = FirstShapeBelowAMX,
E = Shapes.
end();
I !=
E; ++
I) {
174 if (
I->MI->mayLoadOrStore())
176 for (
auto &MO :
I->MI->operands()) {
181 if (MO.
isReg() && MIRef(MRI->getVRegDef(MO.
getReg())) > FirstAMX)
185 MBB->
insert(InsertPoint,
I->MI->removeFromParent());
194 void releaseMemory() {
197 BBVisitedInfo.clear();
201 X86PreTileConfigImpl(std::function<MachineLoopInfo *()> GetMLI)
203 bool runOnMachineFunction(MachineFunction &MF);
208 X86PreTileConfigLegacy() : MachineFunctionPass(ID) {}
211 StringRef getPassName()
const override {
212 return "Tile Register Pre-configure";
216 void getAnalysisUsage(AnalysisUsage &AU)
const override {
223 bool runOnMachineFunction(MachineFunction &MF)
override;
230char X86PreTileConfigLegacy::ID = 0;
233 "Tile Register Pre-configure",
false,
false)
241 auto &Refs = ShapeBBs[
MBB];
243 if (
I == Refs.end() || *
I != MIR)
248 {
MI.getOperand(1).getReg(),
MI.getOperand(2).getReg()});
249 while (!WorkList.empty()) {
250 Register R = WorkList.pop_back_val();
252 assert(
DefMI &&
"R must has one define instruction");
257 if (
DefMI->isPHI()) {
258 for (
unsigned I = 1;
I <
DefMI->getNumOperands();
I += 2)
259 if (isLoopBackEdge(DefMBB,
DefMI->getOperand(
I + 1).getMBB()))
260 RecordShape(
DefMI, DefMBB);
262 WorkList.push_back(
DefMI->getOperand(
I).getReg());
264 RecordShape(
DefMI, DefMBB);
270 scope_exit ClearStateOnExit([
this] { releaseMemory(); });
272 X86MachineFunctionInfo *X86FI = MF.
getInfo<X86MachineFunctionInfo>();
274 if (X86FI->getAMXProgModel() != AMXProgModelEnum::ManagedRA)
278 const TargetInstrInfo *
TII =
ST.getInstrInfo();
279 const TargetRegisterInfo *
TRI =
ST.getRegisterInfo();
280 const TargetRegisterClass *RC =
TRI->getRegClass(X86::TILERegClassID);
282 BitVector AMXRegs(
TRI->getNumRegs());
284 AMXRegs.set(X86::TMM0 +
I);
289 SmallSet<MIRef, 8> CfgNeedInsert;
290 SmallVector<MachineBasicBlock *, 8> CfgLiveInBBs;
291 for (
auto &
MBB : MF) {
293 auto &
Info = BBVisitedInfo[&
MBB];
294 for (
auto &
MI :
MBB) {
296 if (isAMXInstruction(
MI)) {
301 Info.NeedTileCfgLiveIn =
true;
305 }
else if (
MI.isCall() && isDestructiveCall(
MI, AMXRegs)) {
310 if (
Info.NeedTileCfgLiveIn) {
316 if (
Info.FirstAMX ||
Info.HasAMXRegLiveIn)
318 if (!isLoopBackEdge(Succ, &
MBB))
319 BBVisitedInfo[Succ].HasAMXRegLiveIn =
true;
323 while (!CfgLiveInBBs.
empty()) {
326 auto &
Info = BBVisitedInfo[Pred];
329 }
else if (!
Info.NeedTileCfgLiveIn) {
330 Info.NeedTileCfgLiveIn =
true;
331 if (Pred == &MF.front())
332 CfgNeedInsert.
insert(MIRef(Pred));
340 if (CfgNeedInsert.
empty())
344 SmallVector<MachineBasicBlock *, 8> WorkList;
345 for (
auto &
I : ShapeBBs) {
346 auto &
Info = BBVisitedInfo[
I.first];
348 if (
Info.HasAMXRegLiveIn) {
355 if (
Info.FirstAMX &&
Info.FirstAMX <
I.second.back() &&
356 !hoistShapesInBB(
I.first,
I.second)) {
362 while (!WorkList.
empty()) {
365 auto &
Info = BBVisitedInfo[Pred];
366 if (!
Info.TileCfgForbidden && !isLoopBackEdge(
MBB, Pred)) {
367 Info.TileCfgForbidden =
true;
374 SmallSet<MIRef, 8> VisitedOrInserted;
375 int SS = MF.getFrameInfo().CreateStackObject(
376 ST.getTileConfigSize(),
ST.getTileConfigAlignment(),
false);
379 for (
const auto &
I : CfgNeedInsert) {
380 SmallSet<MIRef, 8> InsertPoints;
382 while (!WorkList.
empty()) {
384 if (!VisitedOrInserted.
count(
I)) {
385 if (!BBVisitedInfo[
I.MBB].TileCfgForbidden) {
393 for (
auto *Succ :
I.MBB->successors())
394 if (BBVisitedInfo[Succ].NeedTileCfgLiveIn)
401 for (MIRef
I : InsertPoints) {
403 auto It = ShapeBBs.find(
I.MBB);
404 if (It != ShapeBBs.end() &&
I < It->second.back())
405 I = It->second.back();
408 if (VisitedOrInserted.
insert(
I).second) {
409 auto II =
I.MI ?
I.MI->getIterator() :
I.MBB->instr_begin();
417 MachineBasicBlock &
MBB = MF.
front();
419 if (
ST.hasAVX512()) {
420 Register Zmm =
MRI->createVirtualRegister(&X86::VR512RegClass);
424 }
else if (
ST.hasAVX2()) {
425 Register Ymm =
MRI->createVirtualRegister(&X86::VR256RegClass);
432 assert(
ST.hasSSE2() &&
"AMX should assume SSE2 enabled");
433 unsigned StoreOpc =
ST.hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr;
434 Register Xmm =
MRI->createVirtualRegister(&X86::VR128RegClass);
451 return new X86PreTileConfigLegacy();
454bool X86PreTileConfigLegacy::runOnMachineFunction(
MachineFunction &MF) {
455 X86PreTileConfigImpl Impl(
456 [
this]() {
return &getAnalysis<MachineLoopInfoWrapperPass>().getLI(); });
457 return Impl.runOnMachineFunction(MF);
463 X86PreTileConfigImpl Impl(
465 return Impl.runOnMachineFunction(MF)
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
const HexagonInstrInfo * TII
Module.h This file contains the declarations for the Module class.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallSet class.
static void emitErrorMsg(MachineFunction &MF)
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
void clearBitsInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
clearBitsInMask - Clear any bits in this vector that are set in Mask.
bool none() const
none - Returns true if none of the bits are set.
Represents analyses that only rely on functions' control flow.
FunctionPass class - This class is used to implement most global optimizations.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
This is an important class for using LLVM in a threaded context.
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< succ_iterator > successors()
iterator_range< pred_iterator > predecessors()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Analysis pass that exposes the MachineLoopInfo for a machine function.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
unsigned getNumRegs() const
Return the number of registers in this class.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
self_iterator getIterator()
This is an optimization pass for GlobalISel generic memory operations.
bool operator<(int64_t V1, const APSInt &V2)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
scope_exit(Callable) -> scope_exit< Callable >
bool operator!=(uint64_t V1, const APInt &V2)
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
bool operator>(int64_t V1, const APSInt &V2)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
FunctionPass * createX86PreTileConfigLegacyPass()
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.