45#define DEBUG_TYPE "tile-pre-config"
51 ": Failed to config tile register, please define the shape earlier");
69 Pos(
std::distance(
MBB->instr_begin(), ++
MI->getIterator())) {}
72 Pos(
std::distance(
MBB->instr_begin(), ++
MI->getIterator())) {}
75 operator bool()
const {
return MBB !=
nullptr; }
79 bool operator!=(
const MIRef &RHS)
const {
return !(*
this ==
RHS); }
95 bool HasAMXRegLiveIn =
false;
96 bool TileCfgForbidden =
false;
97 bool NeedTileCfgLiveIn =
false;
111 if (Iter ==
MI.operands_end())
114 return !UsableRegs.
none();
119 if (
MI.isPHI() ||
MI.isDebugInstr() ||
MI.getNumOperands() < 3)
125 MRI->getRegClass(MO.
getReg())->getID() == X86::TILERegClassID) {
126 collectShapeInfo(
MI);
130 return MI.getOpcode() == X86::PTILESTOREDV;
138 if (
ML->contains(Bottom) &&
ML->isLoopLatch(Bottom))
149 MIRef &FirstAMX = BBVisitedInfo[
MBB].FirstAMX;
151 auto InsertPoint = FirstAMX.MI->getIterator();
152 for (
auto I = FirstShapeBelowAMX, E = Shapes.
end();
I != E; ++
I) {
154 if (
I->MI->mayLoadOrStore())
156 for (
auto &MO :
I->MI->operands()) {
165 MBB->
insert(InsertPoint,
I->MI->removeFromParent());
178 return "Tile Register Pre-configure";
192 BBVisitedInfo.
clear();
203char X86PreTileConfig::ID = 0;
206 "Tile Register Pre-configure",
false,
false)
215 if (
I == ShapeBBs[
MBB].end() || *
I != MIR)
220 {
MI.getOperand(1).
getReg(),
MI.getOperand(2).getReg()});
221 while (!WorkList.empty()) {
222 Register R = WorkList.pop_back_val();
224 assert(
DefMI &&
"R must has one define instruction");
231 RecordShape(
DefMI, DefMBB);
235 RecordShape(
DefMI, DefMBB);
253 AMXRegs.set(X86::TMM0 +
I);
257 MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
260 for (
auto &
MBB : MF) {
262 for (
auto &
MI :
MBB) {
264 if (isAMXInstruction(
MI)) {
266 if (BBVisitedInfo[&
MBB].LastCall)
267 CfgNeedInsert.
insert(BBVisitedInfo[&
MBB].LastCall);
269 BBVisitedInfo[&
MBB].NeedTileCfgLiveIn =
true;
271 if (!BBVisitedInfo[&
MBB].FirstAMX)
272 BBVisitedInfo[&
MBB].FirstAMX = MIRef(&
MI, &
MBB, Pos);
273 }
else if (
MI.isCall() && isDestructiveCall(
MI, AMXRegs)) {
275 BBVisitedInfo[&
MBB].LastCall = MIRef(&
MI, &
MBB, Pos);
278 if (BBVisitedInfo[&
MBB].NeedTileCfgLiveIn) {
284 if (BBVisitedInfo[&
MBB].FirstAMX || BBVisitedInfo[&
MBB].HasAMXRegLiveIn)
286 if (!isLoopBackEdge(Succ, &
MBB))
287 BBVisitedInfo[Succ].HasAMXRegLiveIn =
true;
291 while (!CfgLiveInBBs.
empty()) {
294 if (BBVisitedInfo[Pred].LastCall) {
295 CfgNeedInsert.
insert(BBVisitedInfo[Pred].LastCall);
296 }
else if (!BBVisitedInfo[Pred].NeedTileCfgLiveIn) {
297 BBVisitedInfo[Pred].NeedTileCfgLiveIn =
true;
298 if (Pred == &MF.front())
299 CfgNeedInsert.
insert(MIRef(Pred));
307 if (CfgNeedInsert.
empty())
312 for (
auto &
I : ShapeBBs) {
314 if (BBVisitedInfo[
I.first].HasAMXRegLiveIn) {
321 if (BBVisitedInfo[
I.first].FirstAMX &&
322 BBVisitedInfo[
I.first].FirstAMX <
I.second.back() &&
323 !hoistShapesInBB(
I.first,
I.second)) {
329 while (!WorkList.
empty()) {
332 if (!BBVisitedInfo[Pred].TileCfgForbidden && !isLoopBackEdge(
MBB, Pred)) {
333 BBVisitedInfo[Pred].TileCfgForbidden =
true;
341 int SS = MF.getFrameInfo().CreateStackObject(
342 ST.getTileConfigSize(),
ST.getTileConfigAlignment(),
false);
345 for (
const auto &
I : CfgNeedInsert) {
348 while (!WorkList.
empty()) {
350 if (!VisitedOrInserted.
count(
I)) {
351 if (!BBVisitedInfo[
I.MBB].TileCfgForbidden) {
359 for (
auto *Succ :
I.MBB->successors())
360 if (BBVisitedInfo[Succ].NeedTileCfgLiveIn)
367 for (MIRef
I : InsertPoints) {
369 if (ShapeBBs.count(
I.MBB) &&
I < ShapeBBs[
I.MBB].back())
370 I = ShapeBBs[
I.MBB].back();
373 if (VisitedOrInserted.
insert(
I).second) {
374 auto II =
I.MI ?
I.MI->getIterator() :
I.MBB->instr_begin();
384 if (
ST.hasAVX512()) {
385 Register Zmm =
MRI->createVirtualRegister(&X86::VR512RegClass);
389 }
else if (
ST.hasAVX2()) {
390 Register Ymm =
MRI->createVirtualRegister(&X86::VR256RegClass);
397 assert(
ST.hasSSE2() &&
"AMX should assume SSE2 enabled");
398 unsigned StoreOpc =
ST.hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr;
399 Register Xmm =
MRI->createVirtualRegister(&X86::VR128RegClass);
416 return new X86PreTileConfig();
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
const HexagonInstrInfo * TII
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
Module.h This file contains the declarations for the Module class.
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallSet class.
Tile Register Pre configure
static void emitErrorMsg(MachineFunction &MF)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
void clearBitsInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
clearBitsInMask - Clear any bits in this vector that are set in Mask.
bool none() const
none - Returns true if none of the bits are set.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
FunctionPass class - This class is used to implement most global optimizations.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
This is an important class for using LLVM in a threaded context.
void emitError(uint64_t LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
bool isLoopHeader(const BlockT *BB) const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< succ_iterator > successors()
iterator_range< pred_iterator > predecessors()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
virtual void releaseMemory()
releaseMemory() - This member can be implemented by a pass if it wants to be able to release its memo...
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
TargetInstrInfo - Interface to description of machine instruction set.
unsigned getNumRegs() const
Return the number of registers in this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
AMXProgModelEnum getAMXProgModel() const
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
bool operator<(int64_t V1, const APSInt &V2)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool operator!=(uint64_t V1, const APInt &V2)
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
bool operator>(int64_t V1, const APSInt &V2)
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
FunctionPass * createX86PreTileConfigPass()
Return a pass that insert pseudo tile config instruction.
Implement std::hash so that hash_code can be used in STL containers.