53 #include "llvm/Config/llvm-config.h" 60 #define DEBUG_TYPE "ppc-vsx-swaps" 66 struct PPCVSXSwapEntry {
74 unsigned int IsLoad : 1;
75 unsigned int IsStore : 1;
76 unsigned int IsSwap : 1;
77 unsigned int MentionsPhysVR : 1;
78 unsigned int IsSwappable : 1;
79 unsigned int MentionsPartialVR : 1;
80 unsigned int SpecialHandling : 3;
81 unsigned int WebRejected : 1;
82 unsigned int WillRemove : 1;
104 std::vector<PPCVSXSwapEntry> SwapVector;
124 bool gatherVectorInstructions();
133 unsigned lookThruCopyLike(
unsigned SrcReg,
unsigned VecIdx);
139 void recordUnoptimizableWebs();
142 void markSwapsForRemoval();
151 unsigned DstReg,
unsigned SrcReg);
154 void handleSpecialSwappables(
int EntryIdx);
157 void dumpSwapVector();
168 return (isRegInClass(Reg, &PPC::VSRCRegClass) ||
169 isRegInClass(Reg, &PPC::VRRCRegClass));
173 bool isScalarVecReg(
unsigned Reg) {
174 return (isRegInClass(Reg, &PPC::VSFRCRegClass) ||
175 isRegInClass(Reg, &PPC::VSSRCRegClass));
181 bool isAnyVecReg(
unsigned Reg,
bool &Partial) {
182 if (isScalarVecReg(Reg))
184 return isScalarVecReg(Reg) ||
isVecReg(Reg);
200 bool Changed =
false;
203 if (gatherVectorInstructions()) {
205 recordUnoptimizableWebs();
206 markSwapsForRemoval();
207 Changed = removeSwaps();
221 MRI = &MF->getRegInfo();
228 const int InitialVectorSize(256);
230 SwapVector.reserve(InitialVectorSize);
241 bool PPCVSXSwapRemoval::gatherVectorInstructions() {
242 bool RelevantFunction =
false;
247 if (
MI.isDebugInstr())
250 bool RelevantInstr =
false;
251 bool Partial =
false;
257 if (isAnyVecReg(Reg, Partial)) {
258 RelevantInstr =
true;
266 RelevantFunction =
true;
271 PPCVSXSwapEntry SwapEntry{};
272 int VecIdx = addSwapEntry(&
MI, SwapEntry);
274 switch(
MI.getOpcode()) {
283 SwapVector[VecIdx].MentionsPartialVR = 1;
285 SwapVector[VecIdx].IsSwappable = 1;
295 int immed =
MI.getOperand(3).getImm();
297 unsigned trueReg1 = lookThruCopyLike(
MI.getOperand(1).getReg(),
299 unsigned trueReg2 = lookThruCopyLike(
MI.getOperand(2).getReg(),
301 if (trueReg1 == trueReg2)
302 SwapVector[VecIdx].IsSwap = 1;
306 SwapVector[VecIdx].IsSwappable = 1;
307 SwapVector[VecIdx].SpecialHandling = SHValues::SH_XXPERMDI;
317 }
else if (immed == 0 || immed == 3) {
319 SwapVector[VecIdx].IsSwappable = 1;
320 SwapVector[VecIdx].SpecialHandling = SHValues::SH_XXPERMDI;
322 unsigned trueReg1 = lookThruCopyLike(
MI.getOperand(1).getReg(),
324 unsigned trueReg2 = lookThruCopyLike(
MI.getOperand(2).getReg(),
326 if (trueReg1 == trueReg2)
327 SwapVector[VecIdx].MentionsPhysVR = 0;
331 SwapVector[VecIdx].IsSwappable = 1;
332 SwapVector[VecIdx].SpecialHandling = SHValues::SH_XXPERMDI;
341 SwapVector[VecIdx].IsLoad = 1;
347 SwapVector[VecIdx].IsLoad = 1;
348 SwapVector[VecIdx].IsSwap = 1;
358 SwapVector[VecIdx].IsLoad = 1;
359 SwapVector[VecIdx].IsSwappable = 1;
366 SwapVector[VecIdx].IsStore = 1;
372 SwapVector[VecIdx].IsStore = 1;
373 SwapVector[VecIdx].IsSwap = 1;
380 SwapVector[VecIdx].IsSwappable = 1;
385 else if (isScalarVecReg(
MI.getOperand(0).getReg()) &&
386 isScalarVecReg(
MI.getOperand(1).getReg()))
387 SwapVector[VecIdx].IsSwappable = 1;
389 case PPC::SUBREG_TO_REG: {
399 SwapVector[VecIdx].IsSwappable = 1;
400 else if (
isVecReg(
MI.getOperand(0).getReg()) &&
401 isScalarVecReg(
MI.getOperand(2).getReg())) {
402 SwapVector[VecIdx].IsSwappable = 1;
403 SwapVector[VecIdx].SpecialHandling = SHValues::SH_COPYWIDEN;
413 SwapVector[VecIdx].IsSwappable = 1;
414 SwapVector[VecIdx].SpecialHandling = SHValues::SH_SPLAT;
426 case PPC::EXTRACT_SUBREG:
427 case PPC::INSERT_SUBREG:
428 case PPC::COPY_TO_REGCLASS:
448 case PPC::VCIPHERLAST:
468 case PPC::VNCIPHERLAST:
493 case PPC::VSHASIGMAD:
494 case PPC::VSHASIGMAW:
526 if (RelevantFunction) {
531 return RelevantFunction;
537 PPCVSXSwapEntry& SwapEntry) {
538 SwapEntry.VSEMI =
MI;
539 SwapEntry.VSEId = SwapVector.size();
540 SwapVector.push_back(SwapEntry);
541 EC->insert(SwapEntry.VSEId);
542 SwapMap[
MI] = SwapEntry.VSEId;
543 return SwapEntry.VSEId;
555 unsigned PPCVSXSwapRemoval::lookThruCopyLike(
unsigned SrcReg,
570 if (!isScalarVecReg(CopySrcReg))
571 SwapVector[VecIdx].MentionsPhysVR = 1;
575 return lookThruCopyLike(CopySrcReg, VecIdx);
585 void PPCVSXSwapRemoval::formWebs() {
587 LLVM_DEBUG(
dbgs() <<
"\n*** Forming webs for swap removal ***\n\n");
589 for (
unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
605 if (!
isVecReg(Reg) && !isScalarVecReg(Reg))
609 if (!(MI->
isCopy() && isScalarVecReg(Reg)))
610 SwapVector[EntryIdx].MentionsPhysVR = 1;
618 assert(SwapMap.find(DefMI) != SwapMap.end() &&
619 "Inconsistency: def of vector reg not found in swap map!");
620 int DefIdx = SwapMap[
DefMI];
621 (void)EC->unionSets(SwapVector[DefIdx].VSEId,
622 SwapVector[EntryIdx].VSEId);
625 SwapVector[DefIdx].VSEId,
626 SwapVector[EntryIdx].VSEId));
637 void PPCVSXSwapRemoval::recordUnoptimizableWebs() {
639 LLVM_DEBUG(
dbgs() <<
"\n*** Rejecting webs for swap removal ***\n\n");
641 for (
unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
642 int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId);
645 if (SwapVector[Repr].WebRejected)
651 if (SwapVector[EntryIdx].MentionsPhysVR ||
652 SwapVector[EntryIdx].MentionsPartialVR ||
653 !(SwapVector[EntryIdx].IsSwappable || SwapVector[EntryIdx].IsSwap)) {
655 SwapVector[Repr].WebRejected = 1;
658 dbgs() <<
format(
"Web %d rejected for physreg, partial reg, or not " 662 LLVM_DEBUG(SwapVector[EntryIdx].VSEMI->dump());
668 else if (SwapVector[EntryIdx].IsLoad && SwapVector[EntryIdx].IsSwap) {
677 int UseIdx = SwapMap[&
UseMI];
679 if (!SwapVector[UseIdx].IsSwap || SwapVector[UseIdx].IsLoad ||
680 SwapVector[UseIdx].IsStore) {
682 SwapVector[Repr].WebRejected = 1;
685 "Web %d rejected for load not feeding swap\n", Repr));
696 }
else if (SwapVector[EntryIdx].IsStore && SwapVector[EntryIdx].IsSwap) {
701 int DefIdx = SwapMap[
DefMI];
703 if (!SwapVector[DefIdx].IsSwap || SwapVector[DefIdx].IsLoad ||
704 SwapVector[DefIdx].IsStore) {
706 SwapVector[Repr].WebRejected = 1;
709 "Web %d rejected for store not fed by swap\n", Repr));
720 int UseIdx = SwapMap[&
UseMI];
722 if (SwapVector[UseIdx].VSEMI->getOpcode() != MI->
getOpcode()) {
723 SwapVector[Repr].WebRejected = 1;
727 "Web %d rejected for swap not feeding only stores\n", Repr));
748 void PPCVSXSwapRemoval::markSwapsForRemoval() {
752 for (
unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
754 if (SwapVector[EntryIdx].IsLoad && SwapVector[EntryIdx].IsSwap) {
755 int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId);
757 if (!SwapVector[Repr].WebRejected) {
762 int UseIdx = SwapMap[&
UseMI];
763 SwapVector[UseIdx].WillRemove = 1;
770 }
else if (SwapVector[EntryIdx].IsStore && SwapVector[EntryIdx].IsSwap) {
771 int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId);
773 if (!SwapVector[Repr].WebRejected) {
777 int DefIdx = SwapMap[
DefMI];
778 SwapVector[DefIdx].WillRemove = 1;
784 }
else if (SwapVector[EntryIdx].IsSwappable &&
785 SwapVector[EntryIdx].SpecialHandling != 0) {
786 int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId);
788 if (!SwapVector[Repr].WebRejected)
789 handleSpecialSwappables(EntryIdx);
801 unsigned DstReg,
unsigned SrcReg) {
814 void PPCVSXSwapRemoval::handleSpecialSwappables(
int EntryIdx) {
815 switch (SwapVector[EntryIdx].SpecialHandling) {
822 case SHValues::SH_SPLAT: {
832 case PPC::VSPLTB: NElts = 16;
break;
833 case PPC::VSPLTH: NElts = 8;
break;
835 case PPC::XXSPLTW: NElts = 4;
break;
844 EltNo = (EltNo + NElts / 2) % NElts;
861 case SHValues::SH_XXPERMDI: {
868 if (Selector == 0 || Selector == 3)
869 Selector = 3 - Selector;
891 case SHValues::SH_COPYWIDEN: {
899 Register NewVReg =
MRI->createVirtualRegister(DstRC);
912 if (DstRC == &PPC::VRRCRegClass) {
913 Register VSRCTmp1 =
MRI->createVirtualRegister(&PPC::VSRCRegClass);
914 Register VSRCTmp2 =
MRI->createVirtualRegister(&PPC::VSRCRegClass);
917 TII->get(PPC::COPY), VSRCTmp1)
921 insertSwap(MI, InsertPoint, VSRCTmp2, VSRCTmp1);
925 TII->get(PPC::COPY), DstReg)
930 insertSwap(MI, InsertPoint, DstReg, NewVReg);
940 bool PPCVSXSwapRemoval::removeSwaps() {
944 bool Changed =
false;
946 for (
unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
947 if (SwapVector[EntryIdx].WillRemove) {
956 SwapVector[EntryIdx].VSEId));
966 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 970 for (
unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
973 int ID = SwapVector[EntryIdx].VSEId;
976 dbgs() <<
format(
"%6d", EC->getLeaderValue(ID));
980 if (SwapVector[EntryIdx].IsLoad)
982 if (SwapVector[EntryIdx].IsStore)
984 if (SwapVector[EntryIdx].IsSwap)
986 if (SwapVector[EntryIdx].MentionsPhysVR)
987 dbgs() <<
"physreg ";
988 if (SwapVector[EntryIdx].MentionsPartialVR)
989 dbgs() <<
"partialreg ";
991 if (SwapVector[EntryIdx].IsSwappable) {
992 dbgs() <<
"swappable ";
993 switch(SwapVector[EntryIdx].SpecialHandling) {
995 dbgs() <<
"special:**unknown**";
1000 dbgs() <<
"special:extract ";
1003 dbgs() <<
"special:insert ";
1006 dbgs() <<
"special:load ";
1009 dbgs() <<
"special:store ";
1012 dbgs() <<
"special:splat ";
1015 dbgs() <<
"special:xxpermdi ";
1018 dbgs() <<
"special:copywiden ";
1023 if (SwapVector[EntryIdx].WebRejected)
1024 dbgs() <<
"rejected ";
1025 if (SwapVector[EntryIdx].WillRemove)
1026 dbgs() <<
"remove ";
1042 "PowerPC VSX Swap Removal",
false,
false)
1046 char PPCVSXSwapRemoval::
ID = 0;
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
bool contains(unsigned Reg) const
Return true if the specified register is included in this register class.
This class represents lattice values for constants.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
FunctionPass * createPPCVSXSwapRemovalPass()
bool isSubregToReg() const
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
iterator_range< mop_iterator > operands()
bool isCopyLike() const
Return true if the instruction behaves like a copy.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
INITIALIZE_PASS_BEGIN(PPCVSXSwapRemoval, DEBUG_TYPE, "PowerPC VSX Swap Removal", false, false) INITIALIZE_PASS_END(PPCVSXSwapRemoval
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
INLINEASM - Represents an inline asm block.
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
void setReg(Register Reg)
Change the register this operand corresponds to.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
void initializePPCVSXSwapRemovalPass(PassRegistry &)
VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
MachineInstrBundleIterator< MachineInstr > iterator
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
unsigned const MachineRegisterInfo * MRI
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineInstrBuilder & UseMI
static Register UseReg(const MachineOperand &MO)
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
void setImm(int64_t immVal)
FunctionPass class - This class is used to implement most global optimizations.
static bool isVecReg(unsigned Reg)
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
void setIsKill(bool Val=true)
static uint64_t add(uint64_t LeftOp, uint64_t RightOp)
MachineOperand class - Representation of each machine instruction operand.
MachineInstrBuilder MachineInstrBuilder & DefMI
VPERM - The PPC VPERM Instruction.
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
const MachineBasicBlock * getParent() const
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Representation of each machine instruction.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
INLINEASM_BR - Terminator version of inline asm. Used by asm-goto.
bool needsSwapsForVSXMemOps() const
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Register getReg() const
getReg - Returns the register number.
const MachineOperand & getOperand(unsigned i) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Wrapper class representing virtual and physical registers.
XXPERMDI - The PPC XXPERMDI instruction.