52#include "llvm/Config/llvm-config.h"
59#define DEBUG_TYPE "ppc-vsx-swaps"
65struct PPCVSXSwapEntry {
73 unsigned int IsLoad : 1;
74 unsigned int IsStore : 1;
75 unsigned int IsSwap : 1;
76 unsigned int MentionsPhysVR : 1;
77 unsigned int IsSwappable : 1;
78 unsigned int MentionsPartialVR : 1;
79 unsigned int SpecialHandling : 3;
80 unsigned int WebRejected : 1;
81 unsigned int WillRemove : 1;
103 std::vector<PPCVSXSwapEntry> SwapVector;
123 bool gatherVectorInstructions();
132 unsigned lookThruCopyLike(
unsigned SrcReg,
unsigned VecIdx);
138 void recordUnoptimizableWebs();
141 void markSwapsForRemoval();
150 unsigned DstReg,
unsigned SrcReg);
153 void handleSpecialSwappables(
int EntryIdx);
156 void dumpSwapVector();
172 bool isScalarVecReg(
unsigned Reg) {
180 bool isAnyVecReg(
unsigned Reg,
bool &Partial) {
181 if (isScalarVecReg(Reg))
183 return isScalarVecReg(Reg) ||
isVecReg(Reg);
199 bool Changed =
false;
202 if (gatherVectorInstructions()) {
204 recordUnoptimizableWebs();
205 markSwapsForRemoval();
206 Changed = removeSwaps();
220 MRI = &MF->getRegInfo();
227 const int InitialVectorSize(256);
229 SwapVector.reserve(InitialVectorSize);
240bool PPCVSXSwapRemoval::gatherVectorInstructions() {
241 bool RelevantFunction =
false;
246 if (
MI.isDebugInstr())
249 bool RelevantInstr =
false;
250 bool Partial =
false;
259 if (isAnyVecReg(Reg, Partial))
260 RelevantInstr =
true;
266 RelevantFunction =
true;
271 PPCVSXSwapEntry SwapEntry{};
272 int VecIdx = addSwapEntry(&
MI, SwapEntry);
274 switch(
MI.getOpcode()) {
283 SwapVector[VecIdx].MentionsPartialVR = 1;
285 SwapVector[VecIdx].IsSwappable = 1;
287 case PPC::XXPERMDI: {
295 int immed =
MI.getOperand(3).getImm();
297 unsigned trueReg1 = lookThruCopyLike(
MI.getOperand(1).getReg(),
299 unsigned trueReg2 = lookThruCopyLike(
MI.getOperand(2).getReg(),
301 if (trueReg1 == trueReg2)
302 SwapVector[VecIdx].IsSwap = 1;
306 SwapVector[VecIdx].IsSwappable = 1;
307 SwapVector[VecIdx].SpecialHandling = SHValues::SH_XXPERMDI;
317 }
else if (immed == 0 || immed == 3) {
319 SwapVector[VecIdx].IsSwappable = 1;
320 SwapVector[VecIdx].SpecialHandling = SHValues::SH_XXPERMDI;
322 unsigned trueReg1 = lookThruCopyLike(
MI.getOperand(1).getReg(),
324 unsigned trueReg2 = lookThruCopyLike(
MI.getOperand(2).getReg(),
326 if (trueReg1 == trueReg2)
327 SwapVector[VecIdx].MentionsPhysVR = 0;
331 SwapVector[VecIdx].IsSwappable = 1;
332 SwapVector[VecIdx].SpecialHandling = SHValues::SH_XXPERMDI;
341 SwapVector[VecIdx].IsLoad = 1;
347 SwapVector[VecIdx].IsLoad = 1;
348 SwapVector[VecIdx].IsSwap = 1;
358 SwapVector[VecIdx].IsLoad = 1;
359 SwapVector[VecIdx].IsSwappable = 1;
366 SwapVector[VecIdx].IsStore = 1;
372 SwapVector[VecIdx].IsStore = 1;
373 SwapVector[VecIdx].IsSwap = 1;
380 SwapVector[VecIdx].IsSwappable = 1;
385 else if (isScalarVecReg(
MI.getOperand(0).getReg()) &&
386 isScalarVecReg(
MI.getOperand(1).getReg()))
387 SwapVector[VecIdx].IsSwappable = 1;
389 case PPC::SUBREG_TO_REG: {
399 SwapVector[VecIdx].IsSwappable = 1;
400 else if (
isVecReg(
MI.getOperand(0).getReg()) &&
401 isScalarVecReg(
MI.getOperand(2).getReg())) {
402 SwapVector[VecIdx].IsSwappable = 1;
403 SwapVector[VecIdx].SpecialHandling = SHValues::SH_COPYWIDEN;
413 SwapVector[VecIdx].IsSwappable = 1;
414 SwapVector[VecIdx].SpecialHandling = SHValues::SH_SPLAT;
425 case PPC::INLINEASM_BR:
426 case PPC::EXTRACT_SUBREG:
427 case PPC::INSERT_SUBREG:
428 case PPC::COPY_TO_REGCLASS:
448 case PPC::VCIPHERLAST:
468 case PPC::VNCIPHERLAST:
493 case PPC::VSHASIGMAD:
494 case PPC::VSHASIGMAW:
528 if (RelevantFunction) {
533 return RelevantFunction;
539 PPCVSXSwapEntry& SwapEntry) {
540 SwapEntry.VSEMI =
MI;
541 SwapEntry.VSEId = SwapVector.size();
542 SwapVector.push_back(SwapEntry);
543 EC->insert(SwapEntry.VSEId);
544 SwapMap[
MI] = SwapEntry.VSEId;
545 return SwapEntry.VSEId;
557unsigned PPCVSXSwapRemoval::lookThruCopyLike(
unsigned SrcReg,
560 if (!
MI->isCopyLike())
565 CopySrcReg =
MI->getOperand(1).getReg();
567 assert(
MI->isSubregToReg() &&
"bad opcode for lookThruCopyLike");
568 CopySrcReg =
MI->getOperand(2).getReg();
572 if (!isScalarVecReg(CopySrcReg))
573 SwapVector[VecIdx].MentionsPhysVR = 1;
577 return lookThruCopyLike(CopySrcReg, VecIdx);
587void PPCVSXSwapRemoval::formWebs() {
589 LLVM_DEBUG(
dbgs() <<
"\n*** Forming webs for swap removal ***\n\n");
591 for (
unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
607 if (!
isVecReg(Reg) && !isScalarVecReg(Reg))
610 if (!Reg.isVirtual()) {
611 if (!(
MI->isCopy() && isScalarVecReg(Reg)))
612 SwapVector[EntryIdx].MentionsPhysVR = 1;
621 "Inconsistency: def of vector reg not found in swap map!");
622 int DefIdx = SwapMap[
DefMI];
623 (void)EC->unionSets(SwapVector[DefIdx].VSEId,
624 SwapVector[EntryIdx].VSEId);
627 SwapVector[DefIdx].VSEId,
628 SwapVector[EntryIdx].VSEId));
639void PPCVSXSwapRemoval::recordUnoptimizableWebs() {
641 LLVM_DEBUG(
dbgs() <<
"\n*** Rejecting webs for swap removal ***\n\n");
643 for (
unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
644 int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId);
647 if (SwapVector[Repr].WebRejected)
653 if (SwapVector[EntryIdx].MentionsPhysVR ||
654 SwapVector[EntryIdx].MentionsPartialVR ||
655 !(SwapVector[EntryIdx].IsSwappable || SwapVector[EntryIdx].IsSwap)) {
657 SwapVector[Repr].WebRejected = 1;
660 dbgs() <<
format(
"Web %d rejected for physreg, partial reg, or not "
664 LLVM_DEBUG(SwapVector[EntryIdx].VSEMI->dump());
670 else if (SwapVector[EntryIdx].IsLoad && SwapVector[EntryIdx].IsSwap) {
679 int UseIdx = SwapMap[&
UseMI];
681 if (!SwapVector[UseIdx].IsSwap || SwapVector[UseIdx].IsLoad ||
682 SwapVector[UseIdx].IsStore) {
684 SwapVector[Repr].WebRejected = 1;
687 "Web %d rejected for load not feeding swap\n", Repr));
698 if (SwapVector[UseIdx].IsSwap && !SwapVector[UseIdx].IsLoad &&
699 !SwapVector[UseIdx].IsStore) {
702 MRI->use_nodbg_instructions(SwapDefReg)) {
703 int UseOfUseIdx = SwapMap[&UseOfUseMI];
704 if (SwapVector[UseOfUseIdx].IsStore) {
705 SwapVector[Repr].WebRejected = 1;
708 "Web %d rejected for load/swap feeding a store\n", Repr));
721 }
else if (SwapVector[EntryIdx].IsStore && SwapVector[EntryIdx].IsSwap) {
726 int DefIdx = SwapMap[
DefMI];
728 if (!SwapVector[DefIdx].IsSwap || SwapVector[DefIdx].IsLoad ||
729 SwapVector[DefIdx].IsStore) {
731 SwapVector[Repr].WebRejected = 1;
734 "Web %d rejected for store not fed by swap\n", Repr));
745 int UseIdx = SwapMap[&
UseMI];
747 if (SwapVector[UseIdx].VSEMI->getOpcode() !=
MI->getOpcode()) {
748 SwapVector[Repr].WebRejected = 1;
752 "Web %d rejected for swap not feeding only stores\n", Repr));
773void PPCVSXSwapRemoval::markSwapsForRemoval() {
777 for (
unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
779 if (SwapVector[EntryIdx].IsLoad && SwapVector[EntryIdx].IsSwap) {
780 int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId);
782 if (!SwapVector[Repr].WebRejected) {
787 int UseIdx = SwapMap[&
UseMI];
788 SwapVector[UseIdx].WillRemove = 1;
795 }
else if (SwapVector[EntryIdx].IsStore && SwapVector[EntryIdx].IsSwap) {
796 int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId);
798 if (!SwapVector[Repr].WebRejected) {
802 int DefIdx = SwapMap[
DefMI];
803 SwapVector[DefIdx].WillRemove = 1;
809 }
else if (SwapVector[EntryIdx].IsSwappable &&
810 SwapVector[EntryIdx].SpecialHandling != 0) {
811 int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId);
813 if (!SwapVector[Repr].WebRejected)
814 handleSpecialSwappables(EntryIdx);
826 unsigned DstReg,
unsigned SrcReg) {
827 BuildMI(*
MI->getParent(), InsertPoint,
MI->getDebugLoc(),
828 TII->get(PPC::XXPERMDI), DstReg)
839void PPCVSXSwapRemoval::handleSpecialSwappables(
int EntryIdx) {
840 switch (SwapVector[EntryIdx].SpecialHandling) {
847 case SHValues::SH_SPLAT: {
854 switch (
MI->getOpcode()) {
857 case PPC::VSPLTB: NElts = 16;
break;
858 case PPC::VSPLTH: NElts = 8;
break;
860 case PPC::XXSPLTW: NElts = 4;
break;
864 if (
MI->getOpcode() == PPC::XXSPLTW)
865 EltNo =
MI->getOperand(2).getImm();
867 EltNo =
MI->getOperand(1).getImm();
869 EltNo = (EltNo + NElts / 2) % NElts;
870 if (
MI->getOpcode() == PPC::XXSPLTW)
871 MI->getOperand(2).setImm(EltNo);
873 MI->getOperand(1).setImm(EltNo);
886 case SHValues::SH_XXPERMDI: {
892 unsigned Selector =
MI->getOperand(3).getImm();
893 if (Selector == 0 || Selector == 3)
894 Selector = 3 - Selector;
895 MI->getOperand(3).setImm(Selector);
899 MI->getOperand(1).setReg(Reg2);
900 MI->getOperand(2).setReg(Reg1);
903 bool IsKill1 =
MI->getOperand(1).isKill();
904 bool IsKill2 =
MI->getOperand(2).isKill();
905 MI->getOperand(1).setIsKill(IsKill2);
906 MI->getOperand(2).setIsKill(IsKill1);
916 case SHValues::SH_COPYWIDEN: {
924 Register NewVReg =
MRI->createVirtualRegister(DstRC);
926 MI->getOperand(0).setReg(NewVReg);
937 if (DstRC == &PPC::VRRCRegClass) {
938 Register VSRCTmp1 =
MRI->createVirtualRegister(&PPC::VSRCRegClass);
939 Register VSRCTmp2 =
MRI->createVirtualRegister(&PPC::VSRCRegClass);
941 BuildMI(*
MI->getParent(), InsertPoint,
MI->getDebugLoc(),
942 TII->get(PPC::COPY), VSRCTmp1)
946 insertSwap(
MI, InsertPoint, VSRCTmp2, VSRCTmp1);
949 BuildMI(*
MI->getParent(), InsertPoint,
MI->getDebugLoc(),
950 TII->get(PPC::COPY), DstReg)
955 insertSwap(
MI, InsertPoint, DstReg, NewVReg);
965bool PPCVSXSwapRemoval::removeSwaps() {
969 bool Changed =
false;
971 for (
unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
972 if (SwapVector[EntryIdx].WillRemove) {
977 MI->getOperand(0).getReg())
978 .
add(
MI->getOperand(1));
981 SwapVector[EntryIdx].VSEId));
984 MI->eraseFromParent();
991#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
995 for (
unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
998 int ID = SwapVector[EntryIdx].VSEId;
1002 dbgs() <<
format(
" %bb.%3d",
MI->getParent()->getNumber());
1003 dbgs() <<
format(
" %14s ",
TII->getName(
MI->getOpcode()).str().c_str());
1005 if (SwapVector[EntryIdx].IsLoad)
1007 if (SwapVector[EntryIdx].IsStore)
1009 if (SwapVector[EntryIdx].IsSwap)
1011 if (SwapVector[EntryIdx].MentionsPhysVR)
1012 dbgs() <<
"physreg ";
1013 if (SwapVector[EntryIdx].MentionsPartialVR)
1014 dbgs() <<
"partialreg ";
1016 if (SwapVector[EntryIdx].IsSwappable) {
1017 dbgs() <<
"swappable ";
1018 switch(SwapVector[EntryIdx].SpecialHandling) {
1020 dbgs() <<
"special:**unknown**";
1025 dbgs() <<
"special:extract ";
1028 dbgs() <<
"special:insert ";
1031 dbgs() <<
"special:load ";
1034 dbgs() <<
"special:store ";
1037 dbgs() <<
"special:splat ";
1040 dbgs() <<
"special:xxpermdi ";
1043 dbgs() <<
"special:copywiden ";
1048 if (SwapVector[EntryIdx].WebRejected)
1049 dbgs() <<
"rejected ";
1050 if (SwapVector[EntryIdx].WillRemove)
1051 dbgs() <<
"remove ";
1067 "PowerPC VSX Swap Removal",
false,
false)
1071char PPCVSXSwapRemoval::
ID = 0;
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static bool isRegInClass(const MachineOperand &MO, const TargetRegisterClass *Class)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file defines the DenseMap class.
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
static Register UseReg(const MachineOperand &MO)
const HexagonInstrInfo * TII
static bool isVecReg(unsigned Reg)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
FunctionPass class - This class is used to implement most global optimizations.
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool needsSwapsForVSXMemOps() const
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Wrapper class representing virtual and physical registers.
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createPPCVSXSwapRemovalPass()
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
void initializePPCVSXSwapRemovalPass(PassRegistry &)