43#define DEBUG_TYPE "tile-pre-config"
49 ": Failed to config tile register, please define the shape earlier");
67 Pos(
std::distance(
MBB->instr_begin(), ++
MI->getIterator())) {}
70 Pos(
std::distance(
MBB->instr_begin(), ++
MI->getIterator())) {}
73 operator bool()
const {
return MBB !=
nullptr; }
77 bool operator!=(
const MIRef &RHS)
const {
return !(*
this ==
RHS); }
93 bool HasAMXRegLiveIn =
false;
94 bool TileCfgForbidden =
false;
95 bool NeedTileCfgLiveIn =
false;
109 if (Iter ==
MI.operands_end())
112 return !UsableRegs.
none();
117 if (
MI.isPHI() ||
MI.isDebugInstr() ||
MI.getNumOperands() < 3)
123 MRI->getRegClass(MO.
getReg())->getID() == X86::TILERegClassID) {
124 collectShapeInfo(
MI);
128 return MI.getOpcode() == X86::PTILESTOREDV;
136 if (
ML->contains(Bottom) &&
ML->isLoopLatch(Bottom))
147 MIRef &FirstAMX = BBVisitedInfo[
MBB].FirstAMX;
149 auto InsertPoint = FirstAMX.MI->getIterator();
150 for (
auto I = FirstShapeBelowAMX,
E = Shapes.
end();
I !=
E; ++
I) {
152 if (
I->MI->mayLoadOrStore())
154 for (
auto &MO :
I->MI->operands()) {
163 MBB->
insert(InsertPoint,
I->MI->removeFromParent());
176 return "Tile Register Pre-configure";
190 BBVisitedInfo.
clear();
201char X86PreTileConfig::ID = 0;
204 "Tile Register Pre-configure",
false,
false)
213 if (
I == ShapeBBs[
MBB].end() || *
I != MIR)
218 {
MI.getOperand(1).
getReg(),
MI.getOperand(2).getReg()});
219 while (!WorkList.empty()) {
220 Register R = WorkList.pop_back_val();
222 assert(
DefMI &&
"R must has one define instruction");
229 RecordShape(
DefMI, DefMBB);
233 RecordShape(
DefMI, DefMBB);
247 AMXRegs.set(X86::TMM0 +
I);
251 MLI = &getAnalysis<MachineLoopInfo>();
254 for (
auto &
MBB : MF) {
256 for (
auto &
MI :
MBB) {
258 if (isAMXInstruction(
MI)) {
260 if (BBVisitedInfo[&
MBB].LastCall)
261 CfgNeedInsert.
insert(BBVisitedInfo[&
MBB].LastCall);
263 BBVisitedInfo[&
MBB].NeedTileCfgLiveIn =
true;
265 if (!BBVisitedInfo[&
MBB].FirstAMX)
266 BBVisitedInfo[&
MBB].FirstAMX = MIRef(&
MI, &
MBB, Pos);
267 }
else if (
MI.isCall() && isDestructiveCall(
MI, AMXRegs)) {
269 BBVisitedInfo[&
MBB].LastCall = MIRef(&
MI, &
MBB, Pos);
272 if (BBVisitedInfo[&
MBB].NeedTileCfgLiveIn) {
278 if (BBVisitedInfo[&
MBB].FirstAMX || BBVisitedInfo[&
MBB].HasAMXRegLiveIn)
280 if (!isLoopBackEdge(Succ, &
MBB))
281 BBVisitedInfo[Succ].HasAMXRegLiveIn =
true;
285 while (!CfgLiveInBBs.
empty()) {
288 if (BBVisitedInfo[Pred].LastCall) {
289 CfgNeedInsert.
insert(BBVisitedInfo[Pred].LastCall);
290 }
else if (!BBVisitedInfo[Pred].NeedTileCfgLiveIn) {
291 BBVisitedInfo[Pred].NeedTileCfgLiveIn =
true;
292 if (Pred == &MF.front())
293 CfgNeedInsert.
insert(MIRef(Pred));
301 if (CfgNeedInsert.
empty())
307 for (
auto &
I : ShapeBBs) {
309 if (BBVisitedInfo[
I.first].HasAMXRegLiveIn) {
316 if (BBVisitedInfo[
I.first].FirstAMX &&
317 BBVisitedInfo[
I.first].FirstAMX <
I.second.back() &&
318 !hoistShapesInBB(
I.first,
I.second)) {
324 while (!WorkList.
empty()) {
327 if (!BBVisitedInfo[Pred].TileCfgForbidden && !isLoopBackEdge(
MBB, Pred)) {
328 BBVisitedInfo[Pred].TileCfgForbidden =
true;
336 int SS = MF.getFrameInfo().CreateStackObject(
337 ST.getTileConfigSize(),
ST.getTileConfigAlignment(),
false);
340 for (
const auto &
I : CfgNeedInsert) {
343 while (!WorkList.
empty()) {
345 if (!VisitedOrInserted.
count(
I)) {
346 if (!BBVisitedInfo[
I.MBB].TileCfgForbidden) {
354 for (
auto *Succ :
I.MBB->successors())
355 if (BBVisitedInfo[Succ].NeedTileCfgLiveIn)
362 for (MIRef
I : InsertPoints) {
364 if (ShapeBBs.count(
I.MBB) &&
I < ShapeBBs[
I.MBB].back())
365 I = ShapeBBs[
I.MBB].back();
368 if (VisitedOrInserted.
insert(
I).second) {
369 auto II =
I.MI ?
I.MI->getIterator() :
I.MBB->instr_begin();
379 if (
ST.hasAVX512()) {
380 Register Zmm =
MRI->createVirtualRegister(&X86::VR512RegClass);
384 }
else if (
ST.hasAVX2()) {
385 Register Ymm =
MRI->createVirtualRegister(&X86::VR256RegClass);
392 assert(
ST.hasSSE2() &&
"AMX should assume SSE2 enabled");
393 unsigned StoreOpc =
ST.hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr;
394 Register Xmm =
MRI->createVirtualRegister(&X86::VR128RegClass);
411 return new X86PreTileConfig();
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
const HexagonInstrInfo * TII
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Tile Register Pre configure
static void emitErrorMsg(MachineFunction &MF)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
void clearBitsInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
clearBitsInMask - Clear any bits in this vector that are set in Mask.
bool none() const
none - Returns true if none of the bits are set.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
FunctionPass class - This class is used to implement most global optimizations.
This is an important class for using LLVM in a threaded context.
void emitError(uint64_t LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< succ_iterator > successors()
iterator_range< pred_iterator > predecessors()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
MachineModuleInfo & getMMI() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction.
const MachineOperand & getOperand(unsigned i) const
bool isLoopHeader(const MachineBasicBlock *BB) const
True if the block is a loop header node.
MachineLoop * getLoopFor(const MachineBasicBlock *BB) const
Return the innermost loop that BB lives in.
const Module * getModule() const
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVMContext & getContext() const
Get the global data context.
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
virtual void releaseMemory()
releaseMemory() - This member can be implemented by a pass if it wants to be able to release its memo...
Wrapper class representing virtual and physical registers.
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
TargetInstrInfo - Interface to description of machine instruction set.
unsigned getNumRegs() const
Return the number of registers in this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
void setHasVirtualTileReg(bool v)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
bool operator<(int64_t V1, const APSInt &V2)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool operator!=(uint64_t V1, const APInt &V2)
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
bool operator>(int64_t V1, const APSInt &V2)
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
FunctionPass * createX86PreTileConfigPass()
Return a pass that insert pseudo tile config instruction.