53#include "llvm/Config/llvm-config.h"
60#define DEBUG_TYPE "ppc-vsx-swaps"
66struct PPCVSXSwapEntry {
74 unsigned int IsLoad : 1;
75 unsigned int IsStore : 1;
76 unsigned int IsSwap : 1;
77 unsigned int MentionsPhysVR : 1;
78 unsigned int IsSwappable : 1;
79 unsigned int MentionsPartialVR : 1;
80 unsigned int SpecialHandling : 3;
81 unsigned int WebRejected : 1;
82 unsigned int WillRemove : 1;
104 std::vector<PPCVSXSwapEntry> SwapVector;
124 bool gatherVectorInstructions();
133 unsigned lookThruCopyLike(
unsigned SrcReg,
unsigned VecIdx);
139 void recordUnoptimizableWebs();
142 void markSwapsForRemoval();
151 unsigned DstReg,
unsigned SrcReg);
154 void handleSpecialSwappables(
int EntryIdx);
157 void dumpSwapVector();
173 bool isScalarVecReg(
unsigned Reg) {
181 bool isAnyVecReg(
unsigned Reg,
bool &Partial) {
182 if (isScalarVecReg(Reg))
184 return isScalarVecReg(Reg) ||
isVecReg(Reg);
200 bool Changed =
false;
203 if (gatherVectorInstructions()) {
205 recordUnoptimizableWebs();
206 markSwapsForRemoval();
207 Changed = removeSwaps();
221 MRI = &MF->getRegInfo();
228 const int InitialVectorSize(256);
230 SwapVector.reserve(InitialVectorSize);
241bool PPCVSXSwapRemoval::gatherVectorInstructions() {
242 bool RelevantFunction =
false;
247 if (
MI.isDebugInstr())
250 bool RelevantInstr =
false;
251 bool Partial =
false;
260 if (isAnyVecReg(Reg, Partial))
261 RelevantInstr =
true;
267 RelevantFunction =
true;
272 PPCVSXSwapEntry SwapEntry{};
273 int VecIdx = addSwapEntry(&
MI, SwapEntry);
275 switch(
MI.getOpcode()) {
284 SwapVector[VecIdx].MentionsPartialVR = 1;
286 SwapVector[VecIdx].IsSwappable = 1;
288 case PPC::XXPERMDI: {
296 int immed =
MI.getOperand(3).getImm();
298 unsigned trueReg1 = lookThruCopyLike(
MI.getOperand(1).getReg(),
300 unsigned trueReg2 = lookThruCopyLike(
MI.getOperand(2).getReg(),
302 if (trueReg1 == trueReg2)
303 SwapVector[VecIdx].IsSwap = 1;
307 SwapVector[VecIdx].IsSwappable = 1;
308 SwapVector[VecIdx].SpecialHandling = SHValues::SH_XXPERMDI;
318 }
else if (immed == 0 || immed == 3) {
320 SwapVector[VecIdx].IsSwappable = 1;
321 SwapVector[VecIdx].SpecialHandling = SHValues::SH_XXPERMDI;
323 unsigned trueReg1 = lookThruCopyLike(
MI.getOperand(1).getReg(),
325 unsigned trueReg2 = lookThruCopyLike(
MI.getOperand(2).getReg(),
327 if (trueReg1 == trueReg2)
328 SwapVector[VecIdx].MentionsPhysVR = 0;
332 SwapVector[VecIdx].IsSwappable = 1;
333 SwapVector[VecIdx].SpecialHandling = SHValues::SH_XXPERMDI;
342 SwapVector[VecIdx].IsLoad = 1;
348 SwapVector[VecIdx].IsLoad = 1;
349 SwapVector[VecIdx].IsSwap = 1;
359 SwapVector[VecIdx].IsLoad = 1;
360 SwapVector[VecIdx].IsSwappable = 1;
367 SwapVector[VecIdx].IsStore = 1;
373 SwapVector[VecIdx].IsStore = 1;
374 SwapVector[VecIdx].IsSwap = 1;
381 SwapVector[VecIdx].IsSwappable = 1;
386 else if (isScalarVecReg(
MI.getOperand(0).getReg()) &&
387 isScalarVecReg(
MI.getOperand(1).getReg()))
388 SwapVector[VecIdx].IsSwappable = 1;
390 case PPC::SUBREG_TO_REG: {
400 SwapVector[VecIdx].IsSwappable = 1;
401 else if (
isVecReg(
MI.getOperand(0).getReg()) &&
402 isScalarVecReg(
MI.getOperand(2).getReg())) {
403 SwapVector[VecIdx].IsSwappable = 1;
404 SwapVector[VecIdx].SpecialHandling = SHValues::SH_COPYWIDEN;
414 SwapVector[VecIdx].IsSwappable = 1;
415 SwapVector[VecIdx].SpecialHandling = SHValues::SH_SPLAT;
426 case PPC::INLINEASM_BR:
427 case PPC::EXTRACT_SUBREG:
428 case PPC::INSERT_SUBREG:
429 case PPC::COPY_TO_REGCLASS:
449 case PPC::VCIPHERLAST:
469 case PPC::VNCIPHERLAST:
494 case PPC::VSHASIGMAD:
495 case PPC::VSHASIGMAW:
529 if (RelevantFunction) {
534 return RelevantFunction;
540 PPCVSXSwapEntry& SwapEntry) {
541 SwapEntry.VSEMI =
MI;
542 SwapEntry.VSEId = SwapVector.size();
543 SwapVector.push_back(SwapEntry);
544 EC->insert(SwapEntry.VSEId);
545 SwapMap[
MI] = SwapEntry.VSEId;
546 return SwapEntry.VSEId;
558unsigned PPCVSXSwapRemoval::lookThruCopyLike(
unsigned SrcReg,
561 if (!
MI->isCopyLike())
566 CopySrcReg =
MI->getOperand(1).getReg();
568 assert(
MI->isSubregToReg() &&
"bad opcode for lookThruCopyLike");
569 CopySrcReg =
MI->getOperand(2).getReg();
573 if (!isScalarVecReg(CopySrcReg))
574 SwapVector[VecIdx].MentionsPhysVR = 1;
578 return lookThruCopyLike(CopySrcReg, VecIdx);
588void PPCVSXSwapRemoval::formWebs() {
590 LLVM_DEBUG(
dbgs() <<
"\n*** Forming webs for swap removal ***\n\n");
592 for (
unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
608 if (!
isVecReg(Reg) && !isScalarVecReg(Reg))
611 if (!Reg.isVirtual()) {
612 if (!(
MI->isCopy() && isScalarVecReg(Reg)))
613 SwapVector[EntryIdx].MentionsPhysVR = 1;
622 "Inconsistency: def of vector reg not found in swap map!");
623 int DefIdx = SwapMap[
DefMI];
624 (void)EC->unionSets(SwapVector[DefIdx].VSEId,
625 SwapVector[EntryIdx].VSEId);
628 SwapVector[DefIdx].VSEId,
629 SwapVector[EntryIdx].VSEId));
640void PPCVSXSwapRemoval::recordUnoptimizableWebs() {
642 LLVM_DEBUG(
dbgs() <<
"\n*** Rejecting webs for swap removal ***\n\n");
644 for (
unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
645 int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId);
648 if (SwapVector[Repr].WebRejected)
654 if (SwapVector[EntryIdx].MentionsPhysVR ||
655 SwapVector[EntryIdx].MentionsPartialVR ||
656 !(SwapVector[EntryIdx].IsSwappable || SwapVector[EntryIdx].IsSwap)) {
658 SwapVector[Repr].WebRejected = 1;
661 dbgs() <<
format(
"Web %d rejected for physreg, partial reg, or not "
665 LLVM_DEBUG(SwapVector[EntryIdx].VSEMI->dump());
671 else if (SwapVector[EntryIdx].IsLoad && SwapVector[EntryIdx].IsSwap) {
680 int UseIdx = SwapMap[&
UseMI];
682 if (!SwapVector[UseIdx].IsSwap || SwapVector[UseIdx].IsLoad ||
683 SwapVector[UseIdx].IsStore) {
685 SwapVector[Repr].WebRejected = 1;
688 "Web %d rejected for load not feeding swap\n", Repr));
699 if (SwapVector[UseIdx].IsSwap && !SwapVector[UseIdx].IsLoad &&
700 !SwapVector[UseIdx].IsStore) {
703 MRI->use_nodbg_instructions(SwapDefReg)) {
704 int UseOfUseIdx = SwapMap[&UseOfUseMI];
705 if (SwapVector[UseOfUseIdx].IsStore) {
706 SwapVector[Repr].WebRejected = 1;
709 "Web %d rejected for load/swap feeding a store\n", Repr));
722 }
else if (SwapVector[EntryIdx].IsStore && SwapVector[EntryIdx].IsSwap) {
727 int DefIdx = SwapMap[
DefMI];
729 if (!SwapVector[DefIdx].IsSwap || SwapVector[DefIdx].IsLoad ||
730 SwapVector[DefIdx].IsStore) {
732 SwapVector[Repr].WebRejected = 1;
735 "Web %d rejected for store not fed by swap\n", Repr));
746 int UseIdx = SwapMap[&
UseMI];
748 if (SwapVector[UseIdx].VSEMI->getOpcode() !=
MI->getOpcode()) {
749 SwapVector[Repr].WebRejected = 1;
753 "Web %d rejected for swap not feeding only stores\n", Repr));
774void PPCVSXSwapRemoval::markSwapsForRemoval() {
778 for (
unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
780 if (SwapVector[EntryIdx].IsLoad && SwapVector[EntryIdx].IsSwap) {
781 int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId);
783 if (!SwapVector[Repr].WebRejected) {
788 int UseIdx = SwapMap[&
UseMI];
789 SwapVector[UseIdx].WillRemove = 1;
796 }
else if (SwapVector[EntryIdx].IsStore && SwapVector[EntryIdx].IsSwap) {
797 int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId);
799 if (!SwapVector[Repr].WebRejected) {
803 int DefIdx = SwapMap[
DefMI];
804 SwapVector[DefIdx].WillRemove = 1;
810 }
else if (SwapVector[EntryIdx].IsSwappable &&
811 SwapVector[EntryIdx].SpecialHandling != 0) {
812 int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId);
814 if (!SwapVector[Repr].WebRejected)
815 handleSpecialSwappables(EntryIdx);
827 unsigned DstReg,
unsigned SrcReg) {
828 BuildMI(*
MI->getParent(), InsertPoint,
MI->getDebugLoc(),
829 TII->get(PPC::XXPERMDI), DstReg)
840void PPCVSXSwapRemoval::handleSpecialSwappables(
int EntryIdx) {
841 switch (SwapVector[EntryIdx].SpecialHandling) {
848 case SHValues::SH_SPLAT: {
855 switch (
MI->getOpcode()) {
858 case PPC::VSPLTB: NElts = 16;
break;
859 case PPC::VSPLTH: NElts = 8;
break;
861 case PPC::XXSPLTW: NElts = 4;
break;
865 if (
MI->getOpcode() == PPC::XXSPLTW)
866 EltNo =
MI->getOperand(2).getImm();
868 EltNo =
MI->getOperand(1).getImm();
870 EltNo = (EltNo + NElts / 2) % NElts;
871 if (
MI->getOpcode() == PPC::XXSPLTW)
872 MI->getOperand(2).setImm(EltNo);
874 MI->getOperand(1).setImm(EltNo);
887 case SHValues::SH_XXPERMDI: {
893 unsigned Selector =
MI->getOperand(3).getImm();
894 if (Selector == 0 || Selector == 3)
895 Selector = 3 - Selector;
896 MI->getOperand(3).setImm(Selector);
900 MI->getOperand(1).setReg(Reg2);
901 MI->getOperand(2).setReg(Reg1);
904 bool IsKill1 =
MI->getOperand(1).isKill();
905 bool IsKill2 =
MI->getOperand(2).isKill();
906 MI->getOperand(1).setIsKill(IsKill2);
907 MI->getOperand(2).setIsKill(IsKill1);
917 case SHValues::SH_COPYWIDEN: {
925 Register NewVReg =
MRI->createVirtualRegister(DstRC);
927 MI->getOperand(0).setReg(NewVReg);
938 if (DstRC == &PPC::VRRCRegClass) {
939 Register VSRCTmp1 =
MRI->createVirtualRegister(&PPC::VSRCRegClass);
940 Register VSRCTmp2 =
MRI->createVirtualRegister(&PPC::VSRCRegClass);
942 BuildMI(*
MI->getParent(), InsertPoint,
MI->getDebugLoc(),
943 TII->get(PPC::COPY), VSRCTmp1)
947 insertSwap(
MI, InsertPoint, VSRCTmp2, VSRCTmp1);
950 BuildMI(*
MI->getParent(), InsertPoint,
MI->getDebugLoc(),
951 TII->get(PPC::COPY), DstReg)
956 insertSwap(
MI, InsertPoint, DstReg, NewVReg);
966bool PPCVSXSwapRemoval::removeSwaps() {
970 bool Changed =
false;
972 for (
unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
973 if (SwapVector[EntryIdx].WillRemove) {
978 MI->getOperand(0).getReg())
979 .
add(
MI->getOperand(1));
982 SwapVector[EntryIdx].VSEId));
985 MI->eraseFromParent();
992#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
996 for (
unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
999 int ID = SwapVector[EntryIdx].VSEId;
1003 dbgs() <<
format(
" %bb.%3d",
MI->getParent()->getNumber());
1004 dbgs() <<
format(
" %14s ",
TII->getName(
MI->getOpcode()).str().c_str());
1006 if (SwapVector[EntryIdx].IsLoad)
1008 if (SwapVector[EntryIdx].IsStore)
1010 if (SwapVector[EntryIdx].IsSwap)
1012 if (SwapVector[EntryIdx].MentionsPhysVR)
1013 dbgs() <<
"physreg ";
1014 if (SwapVector[EntryIdx].MentionsPartialVR)
1015 dbgs() <<
"partialreg ";
1017 if (SwapVector[EntryIdx].IsSwappable) {
1018 dbgs() <<
"swappable ";
1019 switch(SwapVector[EntryIdx].SpecialHandling) {
1021 dbgs() <<
"special:**unknown**";
1026 dbgs() <<
"special:extract ";
1029 dbgs() <<
"special:insert ";
1032 dbgs() <<
"special:load ";
1035 dbgs() <<
"special:store ";
1038 dbgs() <<
"special:splat ";
1041 dbgs() <<
"special:xxpermdi ";
1044 dbgs() <<
"special:copywiden ";
1049 if (SwapVector[EntryIdx].WebRejected)
1050 dbgs() <<
"rejected ";
1051 if (SwapVector[EntryIdx].WillRemove)
1052 dbgs() <<
"remove ";
1068 "PowerPC VSX Swap Removal",
false,
false)
1072char PPCVSXSwapRemoval::
ID = 0;
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static bool isRegInClass(const MachineOperand &MO, const TargetRegisterClass *Class)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file defines the DenseMap class.
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
static Register UseReg(const MachineOperand &MO)
const HexagonInstrInfo * TII
static bool isVecReg(unsigned Reg)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
FunctionPass class - This class is used to implement most global optimizations.
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool needsSwapsForVSXMemOps() const
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Wrapper class representing virtual and physical registers.
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createPPCVSXSwapRemovalPass()
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
void initializePPCVSXSwapRemovalPass(PassRegistry &)