52#include "llvm/Config/llvm-config.h"
59#define DEBUG_TYPE "ppc-vsx-swaps"
65struct PPCVSXSwapEntry {
73 unsigned int IsLoad : 1;
74 unsigned int IsStore : 1;
75 unsigned int IsSwap : 1;
76 unsigned int MentionsPhysVR : 1;
77 unsigned int IsSwappable : 1;
78 unsigned int MentionsPartialVR : 1;
79 unsigned int SpecialHandling : 3;
80 unsigned int WebRejected : 1;
81 unsigned int WillRemove : 1;
103 std::vector<PPCVSXSwapEntry> SwapVector;
121 bool gatherVectorInstructions();
130 unsigned lookThruCopyLike(
unsigned SrcReg,
unsigned VecIdx);
136 void recordUnoptimizableWebs();
139 void markSwapsForRemoval();
148 unsigned DstReg,
unsigned SrcReg);
151 void handleSpecialSwappables(
int EntryIdx);
154 void dumpSwapVector();
170 bool isScalarVecReg(
unsigned Reg) {
178 bool isAnyVecReg(
unsigned Reg,
bool &Partial) {
179 if (isScalarVecReg(
Reg))
200 if (gatherVectorInstructions()) {
202 recordUnoptimizableWebs();
203 markSwapsForRemoval();
226 const int InitialVectorSize(256);
228 SwapVector.reserve(InitialVectorSize);
233 EC =
new EquivalenceClasses<int>;
239bool PPCVSXSwapRemoval::gatherVectorInstructions() {
240 bool RelevantFunction =
false;
242 for (MachineBasicBlock &
MBB : *MF) {
243 for (MachineInstr &
MI :
MBB) {
245 if (
MI.isDebugInstr())
248 bool RelevantInstr =
false;
249 bool Partial =
false;
251 for (
const MachineOperand &MO :
MI.operands()) {
258 if (isAnyVecReg(
Reg, Partial))
259 RelevantInstr =
true;
265 RelevantFunction =
true;
270 PPCVSXSwapEntry SwapEntry{};
271 int VecIdx = addSwapEntry(&
MI, SwapEntry);
273 switch(
MI.getOpcode()) {
282 SwapVector[VecIdx].MentionsPartialVR = 1;
284 SwapVector[VecIdx].IsSwappable = 1;
286 case PPC::XXPERMDI: {
294 int immed =
MI.getOperand(3).getImm();
296 unsigned trueReg1 = lookThruCopyLike(
MI.getOperand(1).getReg(),
298 unsigned trueReg2 = lookThruCopyLike(
MI.getOperand(2).getReg(),
300 if (trueReg1 == trueReg2)
301 SwapVector[VecIdx].IsSwap = 1;
305 SwapVector[VecIdx].IsSwappable = 1;
306 SwapVector[VecIdx].SpecialHandling = SHValues::SH_XXPERMDI;
316 }
else if (immed == 0 || immed == 3) {
318 SwapVector[VecIdx].IsSwappable = 1;
319 SwapVector[VecIdx].SpecialHandling = SHValues::SH_XXPERMDI;
321 unsigned trueReg1 = lookThruCopyLike(
MI.getOperand(1).getReg(),
323 unsigned trueReg2 = lookThruCopyLike(
MI.getOperand(2).getReg(),
325 if (trueReg1 == trueReg2)
326 SwapVector[VecIdx].MentionsPhysVR = 0;
330 SwapVector[VecIdx].IsSwappable = 1;
331 SwapVector[VecIdx].SpecialHandling = SHValues::SH_XXPERMDI;
340 SwapVector[VecIdx].IsLoad = 1;
346 SwapVector[VecIdx].IsLoad = 1;
347 SwapVector[VecIdx].IsSwap = 1;
357 SwapVector[VecIdx].IsLoad = 1;
358 SwapVector[VecIdx].IsSwappable = 1;
365 SwapVector[VecIdx].IsStore = 1;
371 SwapVector[VecIdx].IsStore = 1;
372 SwapVector[VecIdx].IsSwap = 1;
379 SwapVector[VecIdx].IsSwappable = 1;
384 else if (isScalarVecReg(
MI.getOperand(0).getReg()) &&
385 isScalarVecReg(
MI.getOperand(1).getReg()))
386 SwapVector[VecIdx].IsSwappable = 1;
388 case PPC::SUBREG_TO_REG: {
398 SwapVector[VecIdx].IsSwappable = 1;
399 else if (
isVecReg(
MI.getOperand(0).getReg()) &&
400 isScalarVecReg(
MI.getOperand(2).getReg())) {
401 SwapVector[VecIdx].IsSwappable = 1;
402 SwapVector[VecIdx].SpecialHandling = SHValues::SH_COPYWIDEN;
412 SwapVector[VecIdx].IsSwappable = 1;
413 SwapVector[VecIdx].SpecialHandling = SHValues::SH_SPLAT;
424 case PPC::INLINEASM_BR:
425 case PPC::EXTRACT_SUBREG:
426 case PPC::INSERT_SUBREG:
427 case PPC::COPY_TO_REGCLASS:
447 case PPC::VCIPHERLAST:
467 case PPC::VNCIPHERLAST:
492 case PPC::VSHASIGMAD:
493 case PPC::VSHASIGMAW:
527 if (RelevantFunction) {
532 return RelevantFunction;
537int PPCVSXSwapRemoval::addSwapEntry(MachineInstr *
MI,
538 PPCVSXSwapEntry& SwapEntry) {
539 SwapEntry.VSEMI =
MI;
540 SwapEntry.VSEId = SwapVector.size();
541 SwapVector.push_back(SwapEntry);
542 EC->insert(SwapEntry.VSEId);
543 SwapMap[
MI] = SwapEntry.VSEId;
544 return SwapEntry.VSEId;
556unsigned PPCVSXSwapRemoval::lookThruCopyLike(
unsigned SrcReg,
558 MachineInstr *
MI =
MRI->getVRegDef(SrcReg);
559 if (!
MI->isCopyLike())
564 CopySrcReg =
MI->getOperand(1).getReg();
566 assert(
MI->isSubregToReg() &&
"bad opcode for lookThruCopyLike");
567 CopySrcReg =
MI->getOperand(2).getReg();
570 if (!Register::isVirtualRegister(CopySrcReg)) {
571 if (!isScalarVecReg(CopySrcReg))
572 SwapVector[VecIdx].MentionsPhysVR = 1;
576 return lookThruCopyLike(CopySrcReg, VecIdx);
586void PPCVSXSwapRemoval::formWebs() {
588 LLVM_DEBUG(
dbgs() <<
"\n*** Forming webs for swap removal ***\n\n");
590 for (
unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
592 MachineInstr *
MI = SwapVector[EntryIdx].VSEMI;
601 for (
const MachineOperand &MO :
MI->operands()) {
610 if (!(
MI->isCopy() && isScalarVecReg(
Reg)))
611 SwapVector[EntryIdx].MentionsPhysVR = 1;
620 "Inconsistency: def of vector reg not found in swap map!");
621 int DefIdx = SwapMap[
DefMI];
622 (void)EC->unionSets(SwapVector[DefIdx].VSEId,
623 SwapVector[EntryIdx].VSEId);
626 SwapVector[DefIdx].VSEId,
627 SwapVector[EntryIdx].VSEId));
638void PPCVSXSwapRemoval::recordUnoptimizableWebs() {
640 LLVM_DEBUG(
dbgs() <<
"\n*** Rejecting webs for swap removal ***\n\n");
642 for (
unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
643 int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId);
646 if (SwapVector[Repr].WebRejected)
652 if (SwapVector[EntryIdx].MentionsPhysVR ||
653 SwapVector[EntryIdx].MentionsPartialVR ||
654 !(SwapVector[EntryIdx].IsSwappable || SwapVector[EntryIdx].IsSwap)) {
656 SwapVector[Repr].WebRejected = 1;
659 dbgs() <<
format(
"Web %d rejected for physreg, partial reg, or not "
663 LLVM_DEBUG(SwapVector[EntryIdx].VSEMI->dump());
669 else if (SwapVector[EntryIdx].IsLoad && SwapVector[EntryIdx].IsSwap) {
670 MachineInstr *
MI = SwapVector[EntryIdx].VSEMI;
677 for (MachineInstr &
UseMI :
MRI->use_nodbg_instructions(DefReg)) {
678 int UseIdx = SwapMap[&
UseMI];
680 if (!SwapVector[UseIdx].IsSwap || SwapVector[UseIdx].IsLoad ||
681 SwapVector[UseIdx].IsStore) {
683 SwapVector[Repr].WebRejected = 1;
686 "Web %d rejected for load not feeding swap\n", Repr));
697 if (SwapVector[UseIdx].IsSwap && !SwapVector[UseIdx].IsLoad &&
698 !SwapVector[UseIdx].IsStore) {
700 for (MachineInstr &UseOfUseMI :
701 MRI->use_nodbg_instructions(SwapDefReg)) {
702 int UseOfUseIdx = SwapMap[&UseOfUseMI];
703 if (SwapVector[UseOfUseIdx].IsStore) {
704 SwapVector[Repr].WebRejected = 1;
707 "Web %d rejected for load/swap feeding a store\n", Repr));
720 }
else if (SwapVector[EntryIdx].IsStore && SwapVector[EntryIdx].IsSwap) {
721 MachineInstr *
MI = SwapVector[EntryIdx].VSEMI;
725 int DefIdx = SwapMap[
DefMI];
727 if (!SwapVector[DefIdx].IsSwap || SwapVector[DefIdx].IsLoad ||
728 SwapVector[DefIdx].IsStore) {
730 SwapVector[Repr].WebRejected = 1;
733 "Web %d rejected for store not fed by swap\n", Repr));
743 for (MachineInstr &
UseMI :
MRI->use_nodbg_instructions(DefReg)) {
744 int UseIdx = SwapMap[&
UseMI];
746 if (SwapVector[UseIdx].VSEMI->getOpcode() !=
MI->getOpcode()) {
747 SwapVector[Repr].WebRejected = 1;
751 "Web %d rejected for swap not feeding only stores\n", Repr));
772void PPCVSXSwapRemoval::markSwapsForRemoval() {
776 for (
unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
778 if (SwapVector[EntryIdx].IsLoad && SwapVector[EntryIdx].IsSwap) {
779 int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId);
781 if (!SwapVector[Repr].WebRejected) {
782 MachineInstr *
MI = SwapVector[EntryIdx].VSEMI;
785 for (MachineInstr &
UseMI :
MRI->use_nodbg_instructions(DefReg)) {
786 int UseIdx = SwapMap[&
UseMI];
787 SwapVector[UseIdx].WillRemove = 1;
794 }
else if (SwapVector[EntryIdx].IsStore && SwapVector[EntryIdx].IsSwap) {
795 int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId);
797 if (!SwapVector[Repr].WebRejected) {
798 MachineInstr *
MI = SwapVector[EntryIdx].VSEMI;
801 int DefIdx = SwapMap[
DefMI];
802 SwapVector[DefIdx].WillRemove = 1;
808 }
else if (SwapVector[EntryIdx].IsSwappable &&
809 SwapVector[EntryIdx].SpecialHandling != 0) {
810 int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId);
812 if (!SwapVector[Repr].WebRejected)
813 handleSpecialSwappables(EntryIdx);
823void PPCVSXSwapRemoval::insertSwap(MachineInstr *
MI,
825 unsigned DstReg,
unsigned SrcReg) {
826 BuildMI(*
MI->getParent(), InsertPoint,
MI->getDebugLoc(),
827 TII->get(PPC::XXPERMDI), DstReg)
838void PPCVSXSwapRemoval::handleSpecialSwappables(
int EntryIdx) {
839 switch (SwapVector[EntryIdx].SpecialHandling) {
846 case SHValues::SH_SPLAT: {
847 MachineInstr *
MI = SwapVector[EntryIdx].VSEMI;
853 switch (
MI->getOpcode()) {
856 case PPC::VSPLTB: NElts = 16;
break;
857 case PPC::VSPLTH: NElts = 8;
break;
859 case PPC::XXSPLTW: NElts = 4;
break;
863 if (
MI->getOpcode() == PPC::XXSPLTW)
864 EltNo =
MI->getOperand(2).getImm();
866 EltNo =
MI->getOperand(1).getImm();
868 EltNo = (EltNo + NElts / 2) % NElts;
869 if (
MI->getOpcode() == PPC::XXSPLTW)
870 MI->getOperand(2).setImm(EltNo);
872 MI->getOperand(1).setImm(EltNo);
885 case SHValues::SH_XXPERMDI: {
886 MachineInstr *
MI = SwapVector[EntryIdx].VSEMI;
891 unsigned Selector =
MI->getOperand(3).getImm();
892 if (Selector == 0 || Selector == 3)
893 Selector = 3 - Selector;
894 MI->getOperand(3).setImm(Selector);
898 MI->getOperand(1).setReg(Reg2);
899 MI->getOperand(2).setReg(Reg1);
902 bool IsKill1 =
MI->getOperand(1).isKill();
903 bool IsKill2 =
MI->getOperand(2).isKill();
904 MI->getOperand(1).setIsKill(IsKill2);
905 MI->getOperand(2).setIsKill(IsKill1);
915 case SHValues::SH_COPYWIDEN: {
916 MachineInstr *
MI = SwapVector[EntryIdx].VSEMI;
922 const TargetRegisterClass *DstRC =
MRI->getRegClass(DstReg);
923 Register NewVReg =
MRI->createVirtualRegister(DstRC);
925 MI->getOperand(0).setReg(NewVReg);
936 if (DstRC == &PPC::VRRCRegClass) {
937 Register VSRCTmp1 =
MRI->createVirtualRegister(&PPC::VSRCRegClass);
938 Register VSRCTmp2 =
MRI->createVirtualRegister(&PPC::VSRCRegClass);
940 BuildMI(*
MI->getParent(), InsertPoint,
MI->getDebugLoc(),
941 TII->get(PPC::COPY), VSRCTmp1)
945 insertSwap(
MI, InsertPoint, VSRCTmp2, VSRCTmp1);
948 BuildMI(*
MI->getParent(), InsertPoint,
MI->getDebugLoc(),
949 TII->get(PPC::COPY), DstReg)
954 insertSwap(
MI, InsertPoint, DstReg, NewVReg);
964bool PPCVSXSwapRemoval::removeSwaps() {
970 for (
unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
971 if (SwapVector[EntryIdx].WillRemove) {
973 MachineInstr *
MI = SwapVector[EntryIdx].VSEMI;
974 MachineBasicBlock *
MBB =
MI->getParent();
976 MI->getOperand(0).getReg())
977 .
add(
MI->getOperand(1));
980 SwapVector[EntryIdx].VSEId));
983 MI->eraseFromParent();
990#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
994 for (
unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
996 MachineInstr *
MI = SwapVector[EntryIdx].VSEMI;
997 int ID = SwapVector[EntryIdx].VSEId;
1001 dbgs() <<
format(
" %bb.%3d",
MI->getParent()->getNumber());
1002 dbgs() <<
format(
" %14s ",
TII->getName(
MI->getOpcode()).str().c_str());
1004 if (SwapVector[EntryIdx].IsLoad)
1006 if (SwapVector[EntryIdx].IsStore)
1008 if (SwapVector[EntryIdx].IsSwap)
1010 if (SwapVector[EntryIdx].MentionsPhysVR)
1011 dbgs() <<
"physreg ";
1012 if (SwapVector[EntryIdx].MentionsPartialVR)
1013 dbgs() <<
"partialreg ";
1015 if (SwapVector[EntryIdx].IsSwappable) {
1016 dbgs() <<
"swappable ";
1017 switch(SwapVector[EntryIdx].SpecialHandling) {
1019 dbgs() <<
"special:**unknown**";
1024 dbgs() <<
"special:extract ";
1027 dbgs() <<
"special:insert ";
1030 dbgs() <<
"special:load ";
1033 dbgs() <<
"special:store ";
1036 dbgs() <<
"special:splat ";
1039 dbgs() <<
"special:xxpermdi ";
1042 dbgs() <<
"special:copywiden ";
1047 if (SwapVector[EntryIdx].WebRejected)
1048 dbgs() <<
"rejected ";
1049 if (SwapVector[EntryIdx].WillRemove)
1050 dbgs() <<
"remove ";
1064 "PowerPC VSX Swap Removal",
false,
false)
1068char PPCVSXSwapRemoval::
ID = 0;
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isRegInClass(const MachineOperand &MO, const TargetRegisterClass *Class)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file defines the DenseMap class.
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
static Register UseReg(const MachineOperand &MO)
const HexagonInstrInfo * TII
static bool isVecReg(unsigned Reg)
Promote Memory to Register
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
FunctionPass class - This class is used to implement most global optimizations.
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
LLVM_ABI void dump() const
const MachineOperand & getOperand(unsigned i) const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool needsSwapsForVSXMemOps() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createPPCVSXSwapRemovalPass()
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.