46#define DEBUG_TYPE "aarch64-a57-fp-load-balancing"
52 cl::desc(
"Always modify dest registers regardless of color"),
59 cl::desc(
"Ignore balance information, always return "
60 "(1: Even, 2: Odd)."),
68 switch (
MI->getOpcode()) {
69 case AArch64::FMULSrr:
70 case AArch64::FNMULSrr:
71 case AArch64::FMULDrr:
72 case AArch64::FNMULDrr:
81 switch (
MI->getOpcode()) {
82 case AArch64::FMSUBSrrr:
83 case AArch64::FMADDSrrr:
84 case AArch64::FNMSUBSrrr:
85 case AArch64::FNMADDSrrr:
86 case AArch64::FMSUBDrrr:
87 case AArch64::FMADDDrrr:
88 case AArch64::FNMSUBDrrr:
89 case AArch64::FNMADDDrrr:
101enum class Color { Even, Odd };
103static const char *ColorNames[2] = {
"Even",
"Odd" };
108class AArch64A57FPLoadBalancingImpl {
110 bool run(MachineFunction &MF);
113 MachineRegisterInfo *MRI;
114 const TargetRegisterInfo *TRI;
115 RegisterClassInfo RCI;
118 bool colorChainSet(std::vector<Chain *> GV, MachineBasicBlock &
MBB,
120 bool colorChain(Chain *
G, Color
C, MachineBasicBlock &
MBB);
121 int scavengeRegister(Chain *
G, Color
C, MachineBasicBlock &
MBB);
122 void scanInstruction(MachineInstr *
MI,
unsigned Idx,
123 std::map<unsigned, Chain *> &Active,
124 std::vector<std::unique_ptr<Chain>> &AllChains);
125 void maybeKillChain(MachineOperand &MO,
unsigned Idx,
126 std::map<unsigned, Chain *> &RegChains);
128 Chain *getAndEraseNext(Color PreferredColor, std::vector<Chain *> &L);
134 explicit AArch64A57FPLoadBalancingLegacy() : MachineFunctionPass(ID) {}
136 bool runOnMachineFunction(MachineFunction &MF)
override;
138 MachineFunctionProperties getRequiredProperties()
const override {
139 return MachineFunctionProperties().setNoVRegs();
142 StringRef getPassName()
const override {
143 return "A57 FP Anti-dependency breaker";
146 void getAnalysisUsage(AnalysisUsage &AU)
const override {
153char AArch64A57FPLoadBalancingLegacy::ID = 0;
156 "AArch64 A57 FP Load-Balancing",
false,
false)
218 "Chain: broken invariant. A Chain can only be killed after its last "
239 "Chain: broken invariant. A Chain can only be killed after its last "
269 unsigned OtherEnd =
Other.KillInst ?
311 if (!MF.
getSubtarget<AArch64Subtarget>().balanceFPOps())
321 for (
auto &
MBB : MF) {
328bool AArch64A57FPLoadBalancingLegacy::runOnMachineFunction(
329 MachineFunction &MF) {
332 return AArch64A57FPLoadBalancingImpl().run(MF);
338 if (AArch64A57FPLoadBalancingImpl().
run(MF)) {
349 <<
" - scanning instructions...\n");
356 std::map<unsigned, Chain*> ActiveChains;
357 std::vector<std::unique_ptr<Chain>> AllChains;
360 scanInstruction(&
MI, Idx++, ActiveChains, AllChains);
363 <<
" chains created.\n");
373 for (
auto &
I : AllChains)
376 for (
auto &
I : AllChains)
377 for (
auto &J : AllChains)
378 if (
I != J &&
I->rangeOverlapsWith(*J))
379 EC.unionSets(
I.get(), J.get());
380 LLVM_DEBUG(
dbgs() <<
"Created " << EC.getNumClasses() <<
" disjoint sets.\n");
386 std::vector<std::vector<Chain*> > V;
387 for (
const auto &E : EC) {
390 std::vector<Chain *> Cs(EC.member_begin(*E), EC.member_end());
391 if (Cs.empty())
continue;
392 V.push_back(std::move(Cs));
398 [](
const std::vector<Chain *> &
A,
const std::vector<Chain *> &
B) {
399 return A.front()->startsBefore(
B.front());
416 Changed |= colorChainSet(std::move(
I),
MBB, Parity);
421Chain *AArch64A57FPLoadBalancingImpl::getAndEraseNext(Color PreferredColor,
422 std::vector<Chain *> &L) {
434 const unsigned SizeFuzz = 1;
435 unsigned MinSize =
L.front()->size() - SizeFuzz;
436 for (
auto I =
L.begin(),
E =
L.end();
I !=
E; ++
I) {
437 if ((*I)->size() <= MinSize) {
444 if ((*I)->getPreferredColor() == PreferredColor) {
452 Chain *Ch =
L.front();
457bool AArch64A57FPLoadBalancingImpl::colorChainSet(std::vector<Chain *> GV,
458 MachineBasicBlock &
MBB,
461 LLVM_DEBUG(
dbgs() <<
"colorChainSet(): #sets=" << GV.size() <<
"\n");
472 llvm::sort(GV, [](
const Chain *G1,
const Chain *G2) {
473 if (G1->size() != G2->size())
474 return G1->size() > G2->size();
475 if (G1->requiresFixup() != G2->requiresFixup())
476 return G1->requiresFixup() > G2->requiresFixup();
478 assert((G1 == G2 || (G1->startsBefore(G2) ^ G2->startsBefore(G1))) &&
479 "Starts before not total order!");
480 return G1->startsBefore(G2);
483 Color PreferredColor = Parity < 0 ? Color::Even : Color::Odd;
484 while (Chain *
G = getAndEraseNext(PreferredColor, GV)) {
486 Color
C = PreferredColor;
489 C =
G->getPreferredColor();
492 <<
", Color=" << ColorNames[(
int)
C] <<
"\n");
497 if (
G->requiresFixup() &&
C !=
G->getPreferredColor()) {
498 C =
G->getPreferredColor();
500 <<
" - not worthwhile changing; "
502 << ColorNames[(
int)
C] <<
"\n");
507 Parity += (
C == Color::Even) ?
G->size() : -
G->size();
508 PreferredColor = Parity < 0 ? Color::Even : Color::Odd;
514int AArch64A57FPLoadBalancingImpl::scavengeRegister(Chain *
G, Color
C,
515 MachineBasicBlock &
MBB) {
518 LiveRegUnits Units(*
TRI);
519 Units.addLiveOuts(
MBB);
522 while (
I != ChainEnd) {
524 Units.stepBackward(*
I);
529 assert(ChainBegin != ChainEnd &&
"Chain should contain instructions");
532 Units.accumulate(*
I);
533 }
while (
I != ChainBegin);
536 unsigned RegClassID = ChainBegin->getDesc().operands()[0].RegClass;
537 auto Ord = RCI.
getOrder(
TRI->getRegClass(RegClassID));
538 for (
auto Reg : Ord) {
539 if (!Units.available(
Reg))
541 if (
C == getColor(
Reg))
548bool AArch64A57FPLoadBalancingImpl::colorChain(Chain *
G, Color
C,
549 MachineBasicBlock &
MBB) {
552 << ColorNames[(
int)
C] <<
")\n");
556 int Reg = scavengeRegister(
G,
C,
MBB);
563 std::map<unsigned, unsigned> Substs;
564 for (MachineInstr &
I : *
G) {
565 if (!
G->contains(
I) && (&
I !=
G->getKill() ||
G->isKillImmutable()))
570 std::vector<unsigned> ToErase;
571 for (
auto &U :
I.operands()) {
572 if (
U.isReg() &&
U.isUse() && Substs.find(
U.getReg()) != Substs.end()) {
574 U.setReg(Substs[OrigReg]);
578 ToErase.push_back(OrigReg);
579 }
else if (
U.isRegMask()) {
580 for (
auto J : Substs) {
581 if (
U.clobbersPhysReg(J.first))
582 ToErase.push_back(J.first);
587 for (
auto J : ToErase)
591 if (&
I !=
G->getKill()) {
592 MachineOperand &MO =
I.getOperand(0);
595 if (
G->requiresFixup() && &
I ==
G->getLast())
606 assert(Substs.size() == 0 &&
"No substitutions should be left active!");
618void AArch64A57FPLoadBalancingImpl::scanInstruction(
619 MachineInstr *
MI,
unsigned Idx, std::map<unsigned, Chain *> &ActiveChains,
620 std::vector<std::unique_ptr<Chain>> &AllChains) {
625 for (
auto &
I :
MI->uses())
626 maybeKillChain(
I, Idx, ActiveChains);
627 for (
auto &
I :
MI->defs())
628 maybeKillChain(
I, Idx, ActiveChains);
632 Register DestReg =
MI->getOperand(0).getReg();
637 auto G = std::make_unique<Chain>(
MI, Idx, getColor(DestReg));
638 ActiveChains[DestReg] =
G.get();
639 AllChains.push_back(std::move(
G));
645 Register DestReg =
MI->getOperand(0).getReg();
646 Register AccumReg =
MI->getOperand(3).getReg();
648 maybeKillChain(
MI->getOperand(1), Idx, ActiveChains);
649 maybeKillChain(
MI->getOperand(2), Idx, ActiveChains);
650 if (DestReg != AccumReg)
651 maybeKillChain(
MI->getOperand(0), Idx, ActiveChains);
653 if (ActiveChains.find(AccumReg) != ActiveChains.end()) {
662 if (
MI->getOperand(3).isKill()) {
664 LLVM_DEBUG(
dbgs() <<
"Instruction was successfully added to chain.\n");
665 ActiveChains[AccumReg]->add(
MI, Idx, getColor(DestReg));
667 if (DestReg != AccumReg) {
668 ActiveChains[DestReg] = ActiveChains[AccumReg];
669 ActiveChains.erase(AccumReg);
675 dbgs() <<
"Cannot add to chain because accumulator operand wasn't "
676 <<
"marked <kill>!\n");
677 maybeKillChain(
MI->getOperand(3), Idx, ActiveChains);
682 auto G = std::make_unique<Chain>(
MI, Idx, getColor(DestReg));
683 ActiveChains[DestReg] =
G.get();
684 AllChains.push_back(std::move(
G));
690 for (
auto &
I :
MI->uses())
691 maybeKillChain(
I, Idx, ActiveChains);
692 for (
auto &
I :
MI->defs())
693 maybeKillChain(
I, Idx, ActiveChains);
698void AArch64A57FPLoadBalancingImpl::maybeKillChain(
699 MachineOperand &MO,
unsigned Idx,
700 std::map<unsigned, Chain *> &ActiveChains) {
708 if (MO.
isKill() && ActiveChains.find(MO.
getReg()) != ActiveChains.end()) {
713 ActiveChains.erase(MO.
getReg());
717 for (
auto I = ActiveChains.begin(),
E = ActiveChains.end();
722 I->second->setKill(
MI, Idx,
true);
723 ActiveChains.erase(
I++);
731Color AArch64A57FPLoadBalancingImpl::getColor(
unsigned Reg) {
732 if ((
TRI->getEncodingValue(
Reg) % 2) == 0)
740 return new AArch64A57FPLoadBalancingLegacy();
static bool isMul(MachineInstr *MI)
static cl::opt< unsigned > OverrideBalance("aarch64-a57-fp-load-balancing-override", cl::desc("Ignore balance information, always return " "(1: Even, 2: Odd)."), cl::init(0), cl::Hidden)
static cl::opt< bool > TransformAll("aarch64-a57-fp-load-balancing-force-all", cl::desc("Always modify dest registers regardless of color"), cl::init(false), cl::Hidden)
static bool isMla(MachineInstr *MI)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
static bool runOnBasicBlock(MachineBasicBlock *MBB, unsigned BasicBlockNum, VRegRenamer &Renamer)
Register const TargetRegisterInfo * TRI
Promote Memory to Register
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file declares the machine register scavenger class.
MachineInstr * StartInst
The important (marker) instructions.
MachineBasicBlock::iterator begin() const
bool isKillImmutable() const
Can the Kill instruction (assuming one exists) be modified?
void add(MachineInstr *MI, unsigned Idx, Color C)
Add a new instruction into the chain.
bool contains(MachineInstr &MI)
Return true if MI is a member of the chain.
Color LastColor
The "color" of LastInst.
bool requiresFixup() const
Return true if the group will require a fixup MOV at the end.
MachineInstr * getLast() const
Return the last instruction in the chain.
bool KillIsImmutable
True if KillInst cannot be modified.
bool rangeOverlapsWith(const Chain &Other) const
Return true if this chain (StartInst..KillInst) overlaps with Other.
MachineInstr * getStart() const
Return the first instruction in the chain.
unsigned size() const
Return the number of instructions in the chain.
MachineBasicBlock::iterator end() const
Return an instruction that can be used as an iterator for the end of the chain.
void setKill(MachineInstr *MI, unsigned Idx, bool Immutable)
Inform the chain that its last active register (the dest register of LastInst) is killed by MI with n...
MachineInstr * getKill() const
Return the "kill" instruction (as set with setKill()) or NULL.
unsigned StartInstIdx
The index, from the start of the basic block, that each marker appears.
Color getPreferredColor()
Return the preferred color of this chain.
std::string str() const
Return a simple string representation of the chain.
std::set< MachineInstr * > Insts
All instructions in the chain.
Chain(MachineInstr *MI, unsigned Idx, Color C)
bool startsBefore(const Chain *Other) const
Return true if this chain starts before Other.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Represents analyses that only rely on functions' control flow.
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
FunctionPass class - This class is used to implement most global optimizations.
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Representation of each machine instruction.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Register getReg() const
getReg - Returns the register number.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
const TargetRegisterInfo * getTargetRegisterInfo() const
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
LLVM_ABI void runOnMachineFunction(const MachineFunction &MF, bool Rev=false)
runOnFunction - Prepare to answer questions about MF.
ArrayRef< MCPhysReg > getOrder(const TargetRegisterClass *RC) const
getOrder - Returns the preferred allocation order for RC.
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
@ C
The default llvm calling convention, compatible with C.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
FunctionPass * createAArch64A57FPLoadBalancingLegacyPass()
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.