44#define DEBUG_TYPE "tile-pre-config"
50 ": Failed to config tile register, please define the shape earlier");
68 Pos(
std::distance(
MBB->instr_begin(), ++
MI->getIterator())) {}
71 Pos(
std::distance(
MBB->instr_begin(), ++
MI->getIterator())) {}
74 operator bool()
const {
return MBB !=
nullptr; }
78 bool operator!=(
const MIRef &RHS)
const {
return !(*
this ==
RHS); }
94 bool HasAMXRegLiveIn =
false;
95 bool TileCfgForbidden =
false;
96 bool NeedTileCfgLiveIn =
false;
110 if (Iter ==
MI.operands_end())
113 return !UsableRegs.
none();
118 if (
MI.isPHI() ||
MI.isDebugInstr() ||
MI.getNumOperands() < 3)
124 MRI->getRegClass(MO.
getReg())->getID() == X86::TILERegClassID) {
125 collectShapeInfo(
MI);
129 return MI.getOpcode() == X86::PTILESTOREDV;
137 if (
ML->contains(Bottom) &&
ML->isLoopLatch(Bottom))
148 MIRef &FirstAMX = BBVisitedInfo[
MBB].FirstAMX;
150 auto InsertPoint = FirstAMX.MI->getIterator();
151 for (
auto I = FirstShapeBelowAMX,
E = Shapes.
end();
I !=
E; ++
I) {
153 if (
I->MI->mayLoadOrStore())
155 for (
auto &MO :
I->MI->operands()) {
164 MBB->
insert(InsertPoint,
I->MI->removeFromParent());
177 return "Tile Register Pre-configure";
191 BBVisitedInfo.
clear();
202char X86PreTileConfig::ID = 0;
205 "Tile Register Pre-configure",
false,
false)
214 if (
I == ShapeBBs[
MBB].end() || *
I != MIR)
219 {
MI.getOperand(1).
getReg(),
MI.getOperand(2).getReg()});
220 while (!WorkList.empty()) {
221 Register R = WorkList.pop_back_val();
223 assert(
DefMI &&
"R must has one define instruction");
230 RecordShape(
DefMI, DefMBB);
234 RecordShape(
DefMI, DefMBB);
248 AMXRegs.set(X86::TMM0 +
I);
252 MLI = &getAnalysis<MachineLoopInfo>();
255 for (
auto &
MBB : MF) {
257 for (
auto &
MI :
MBB) {
259 if (isAMXInstruction(
MI)) {
261 if (BBVisitedInfo[&
MBB].LastCall)
262 CfgNeedInsert.
insert(BBVisitedInfo[&
MBB].LastCall);
264 BBVisitedInfo[&
MBB].NeedTileCfgLiveIn =
true;
266 if (!BBVisitedInfo[&
MBB].FirstAMX)
267 BBVisitedInfo[&
MBB].FirstAMX = MIRef(&
MI, &
MBB, Pos);
268 }
else if (
MI.isCall() && isDestructiveCall(
MI, AMXRegs)) {
270 BBVisitedInfo[&
MBB].LastCall = MIRef(&
MI, &
MBB, Pos);
273 if (BBVisitedInfo[&
MBB].NeedTileCfgLiveIn) {
279 if (BBVisitedInfo[&
MBB].FirstAMX || BBVisitedInfo[&
MBB].HasAMXRegLiveIn)
281 if (!isLoopBackEdge(Succ, &
MBB))
282 BBVisitedInfo[Succ].HasAMXRegLiveIn =
true;
286 while (!CfgLiveInBBs.
empty()) {
289 if (BBVisitedInfo[Pred].LastCall) {
290 CfgNeedInsert.
insert(BBVisitedInfo[Pred].LastCall);
291 }
else if (!BBVisitedInfo[Pred].NeedTileCfgLiveIn) {
292 BBVisitedInfo[Pred].NeedTileCfgLiveIn =
true;
293 if (Pred == &MF.front())
294 CfgNeedInsert.
insert(MIRef(Pred));
302 if (CfgNeedInsert.
empty())
308 for (
auto &
I : ShapeBBs) {
310 if (BBVisitedInfo[
I.first].HasAMXRegLiveIn) {
317 if (BBVisitedInfo[
I.first].FirstAMX &&
318 BBVisitedInfo[
I.first].FirstAMX <
I.second.back() &&
319 !hoistShapesInBB(
I.first,
I.second)) {
325 while (!WorkList.
empty()) {
328 if (!BBVisitedInfo[Pred].TileCfgForbidden && !isLoopBackEdge(
MBB, Pred)) {
329 BBVisitedInfo[Pred].TileCfgForbidden =
true;
337 int SS = MF.getFrameInfo().CreateStackObject(
338 ST.getTileConfigSize(),
ST.getTileConfigAlignment(),
false);
341 for (
const auto &
I : CfgNeedInsert) {
344 while (!WorkList.
empty()) {
346 if (!VisitedOrInserted.
count(
I)) {
347 if (!BBVisitedInfo[
I.MBB].TileCfgForbidden) {
355 for (
auto *Succ :
I.MBB->successors())
356 if (BBVisitedInfo[Succ].NeedTileCfgLiveIn)
363 for (MIRef
I : InsertPoints) {
365 if (ShapeBBs.count(
I.MBB) &&
I < ShapeBBs[
I.MBB].back())
366 I = ShapeBBs[
I.MBB].back();
369 if (VisitedOrInserted.
insert(
I).second) {
370 auto II =
I.MI ?
I.MI->getIterator() :
I.MBB->instr_begin();
380 if (
ST.hasAVX512()) {
381 Register Zmm =
MRI->createVirtualRegister(&X86::VR512RegClass);
385 }
else if (
ST.hasAVX2()) {
386 Register Ymm =
MRI->createVirtualRegister(&X86::VR256RegClass);
393 assert(
ST.hasSSE2() &&
"AMX should assume SSE2 enabled");
394 unsigned StoreOpc =
ST.hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr;
395 Register Xmm =
MRI->createVirtualRegister(&X86::VR128RegClass);
412 return new X86PreTileConfig();
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
const HexagonInstrInfo * TII
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallSet class.
Tile Register Pre configure
static void emitErrorMsg(MachineFunction &MF)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
void clearBitsInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
clearBitsInMask - Clear any bits in this vector that are set in Mask.
bool none() const
none - Returns true if none of the bits are set.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
FunctionPass class - This class is used to implement most global optimizations.
This is an important class for using LLVM in a threaded context.
void emitError(uint64_t LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< succ_iterator > successors()
iterator_range< pred_iterator > predecessors()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
MachineModuleInfo & getMMI() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction.
const MachineOperand & getOperand(unsigned i) const
bool isLoopHeader(const MachineBasicBlock *BB) const
True if the block is a loop header node.
MachineLoop * getLoopFor(const MachineBasicBlock *BB) const
Return the innermost loop that BB lives in.
const Module * getModule() const
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVMContext & getContext() const
Get the global data context.
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
virtual void releaseMemory()
releaseMemory() - This member can be implemented by a pass if it wants to be able to release its memo...
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
TargetInstrInfo - Interface to description of machine instruction set.
unsigned getNumRegs() const
Return the number of registers in this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
void setHasVirtualTileReg(bool v)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
bool operator<(int64_t V1, const APSInt &V2)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool operator!=(uint64_t V1, const APInt &V2)
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
bool operator>(int64_t V1, const APSInt &V2)
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
FunctionPass * createX86PreTileConfigPass()
Return a pass that insert pseudo tile config instruction.
Implement std::hash so that hash_code can be used in STL containers.