52#include "llvm/Config/llvm-config.h"
59#define DEBUG_TYPE "ppc-vsx-swaps"
65struct PPCVSXSwapEntry {
73 unsigned int IsLoad : 1;
74 unsigned int IsStore : 1;
75 unsigned int IsSwap : 1;
76 unsigned int MentionsPhysVR : 1;
77 unsigned int IsSwappable : 1;
78 unsigned int MentionsPartialVR : 1;
79 unsigned int SpecialHandling : 3;
80 unsigned int WebRejected : 1;
81 unsigned int WillRemove : 1;
103 std::vector<PPCVSXSwapEntry> SwapVector;
121 bool gatherVectorInstructions();
130 unsigned lookThruCopyLike(
unsigned SrcReg,
unsigned VecIdx);
136 void recordUnoptimizableWebs();
139 void markSwapsForRemoval();
148 unsigned DstReg,
unsigned SrcReg);
151 void handleSpecialSwappables(
int EntryIdx);
154 void dumpSwapVector();
170 bool isScalarVecReg(
unsigned Reg) {
178 bool isAnyVecReg(
unsigned Reg,
bool &Partial) {
179 if (isScalarVecReg(
Reg))
200 if (gatherVectorInstructions()) {
202 recordUnoptimizableWebs();
203 markSwapsForRemoval();
226 const int InitialVectorSize(256);
228 SwapVector.reserve(InitialVectorSize);
233 EC =
new EquivalenceClasses<int>;
239bool PPCVSXSwapRemoval::gatherVectorInstructions() {
240 bool RelevantFunction =
false;
242 for (MachineBasicBlock &
MBB : *MF) {
243 for (MachineInstr &
MI :
MBB) {
245 if (
MI.isDebugInstr())
248 bool RelevantInstr =
false;
249 bool Partial =
false;
251 for (
const MachineOperand &MO :
MI.operands()) {
258 if (isAnyVecReg(
Reg, Partial))
259 RelevantInstr =
true;
265 RelevantFunction =
true;
270 PPCVSXSwapEntry SwapEntry{};
271 int VecIdx = addSwapEntry(&
MI, SwapEntry);
273 switch(
MI.getOpcode()) {
282 SwapVector[VecIdx].MentionsPartialVR = 1;
284 SwapVector[VecIdx].IsSwappable = 1;
286 case PPC::XXPERMDI: {
294 int immed =
MI.getOperand(3).getImm();
296 unsigned trueReg1 = lookThruCopyLike(
MI.getOperand(1).getReg(),
298 unsigned trueReg2 = lookThruCopyLike(
MI.getOperand(2).getReg(),
300 if (trueReg1 == trueReg2)
301 SwapVector[VecIdx].IsSwap = 1;
305 SwapVector[VecIdx].IsSwappable = 1;
306 SwapVector[VecIdx].SpecialHandling = SHValues::SH_XXPERMDI;
316 }
else if (immed == 0 || immed == 3) {
318 SwapVector[VecIdx].IsSwappable = 1;
319 SwapVector[VecIdx].SpecialHandling = SHValues::SH_XXPERMDI;
321 unsigned trueReg1 = lookThruCopyLike(
MI.getOperand(1).getReg(),
323 unsigned trueReg2 = lookThruCopyLike(
MI.getOperand(2).getReg(),
325 if (trueReg1 == trueReg2)
326 SwapVector[VecIdx].MentionsPhysVR = 0;
330 SwapVector[VecIdx].IsSwappable = 1;
331 SwapVector[VecIdx].SpecialHandling = SHValues::SH_XXPERMDI;
340 SwapVector[VecIdx].IsLoad = 1;
346 SwapVector[VecIdx].IsLoad = 1;
347 SwapVector[VecIdx].IsSwap = 1;
357 SwapVector[VecIdx].IsLoad = 1;
358 SwapVector[VecIdx].IsSwappable = 1;
365 SwapVector[VecIdx].IsStore = 1;
371 SwapVector[VecIdx].IsStore = 1;
372 SwapVector[VecIdx].IsSwap = 1;
379 SwapVector[VecIdx].IsSwappable = 1;
384 else if (isScalarVecReg(
MI.getOperand(0).getReg()) &&
385 isScalarVecReg(
MI.getOperand(1).getReg()))
386 SwapVector[VecIdx].IsSwappable = 1;
388 case PPC::SUBREG_TO_REG: {
398 SwapVector[VecIdx].IsSwappable = 1;
399 else if (
isVecReg(
MI.getOperand(0).getReg()) &&
400 isScalarVecReg(
MI.getOperand(1).getReg())) {
401 SwapVector[VecIdx].IsSwappable = 1;
402 SwapVector[VecIdx].SpecialHandling = SHValues::SH_COPYWIDEN;
412 SwapVector[VecIdx].IsSwappable = 1;
413 SwapVector[VecIdx].SpecialHandling = SHValues::SH_SPLAT;
424 case PPC::INLINEASM_BR:
425 case PPC::EXTRACT_SUBREG:
426 case PPC::INSERT_SUBREG:
427 case PPC::COPY_TO_REGCLASS:
447 case PPC::VCIPHERLAST:
467 case PPC::VNCIPHERLAST:
492 case PPC::VSHASIGMAD:
493 case PPC::VSHASIGMAW:
527 if (RelevantFunction) {
532 return RelevantFunction;
537int PPCVSXSwapRemoval::addSwapEntry(MachineInstr *
MI,
538 PPCVSXSwapEntry& SwapEntry) {
539 SwapEntry.VSEMI =
MI;
540 SwapEntry.VSEId = SwapVector.size();
541 SwapVector.push_back(SwapEntry);
542 EC->insert(SwapEntry.VSEId);
543 SwapMap[
MI] = SwapEntry.VSEId;
544 return SwapEntry.VSEId;
556unsigned PPCVSXSwapRemoval::lookThruCopyLike(
unsigned SrcReg,
558 MachineInstr *
MI =
MRI->getVRegDef(SrcReg);
559 if (!
MI->isCopyLike())
562 assert((
MI->isCopy() ||
MI->isSubregToReg()) &&
563 "bad opcode for lookThruCopyLike");
564 unsigned CopySrcReg =
MI->getOperand(1).getReg();
566 if (!Register::isVirtualRegister(CopySrcReg)) {
567 if (!isScalarVecReg(CopySrcReg))
568 SwapVector[VecIdx].MentionsPhysVR = 1;
572 return lookThruCopyLike(CopySrcReg, VecIdx);
582void PPCVSXSwapRemoval::formWebs() {
584 LLVM_DEBUG(
dbgs() <<
"\n*** Forming webs for swap removal ***\n\n");
586 for (
unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
588 MachineInstr *
MI = SwapVector[EntryIdx].VSEMI;
597 for (
const MachineOperand &MO :
MI->operands()) {
606 if (!(
MI->isCopy() && isScalarVecReg(
Reg)))
607 SwapVector[EntryIdx].MentionsPhysVR = 1;
616 "Inconsistency: def of vector reg not found in swap map!");
617 int DefIdx = SwapMap[
DefMI];
618 (void)EC->unionSets(SwapVector[DefIdx].VSEId,
619 SwapVector[EntryIdx].VSEId);
622 SwapVector[DefIdx].VSEId,
623 SwapVector[EntryIdx].VSEId));
634void PPCVSXSwapRemoval::recordUnoptimizableWebs() {
636 LLVM_DEBUG(
dbgs() <<
"\n*** Rejecting webs for swap removal ***\n\n");
638 for (
unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
639 int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId);
642 if (SwapVector[Repr].WebRejected)
648 if (SwapVector[EntryIdx].MentionsPhysVR ||
649 SwapVector[EntryIdx].MentionsPartialVR ||
650 !(SwapVector[EntryIdx].IsSwappable || SwapVector[EntryIdx].IsSwap)) {
652 SwapVector[Repr].WebRejected = 1;
655 dbgs() <<
format(
"Web %d rejected for physreg, partial reg, or not "
659 LLVM_DEBUG(SwapVector[EntryIdx].VSEMI->dump());
665 else if (SwapVector[EntryIdx].IsLoad && SwapVector[EntryIdx].IsSwap) {
666 MachineInstr *
MI = SwapVector[EntryIdx].VSEMI;
673 for (MachineInstr &
UseMI :
MRI->use_nodbg_instructions(DefReg)) {
674 int UseIdx = SwapMap[&
UseMI];
676 if (!SwapVector[UseIdx].IsSwap || SwapVector[UseIdx].IsLoad ||
677 SwapVector[UseIdx].IsStore) {
679 SwapVector[Repr].WebRejected = 1;
682 "Web %d rejected for load not feeding swap\n", Repr));
693 if (SwapVector[UseIdx].IsSwap && !SwapVector[UseIdx].IsLoad &&
694 !SwapVector[UseIdx].IsStore) {
696 for (MachineInstr &UseOfUseMI :
697 MRI->use_nodbg_instructions(SwapDefReg)) {
698 int UseOfUseIdx = SwapMap[&UseOfUseMI];
699 if (SwapVector[UseOfUseIdx].IsStore) {
700 SwapVector[Repr].WebRejected = 1;
703 "Web %d rejected for load/swap feeding a store\n", Repr));
716 }
else if (SwapVector[EntryIdx].IsStore && SwapVector[EntryIdx].IsSwap) {
717 MachineInstr *
MI = SwapVector[EntryIdx].VSEMI;
721 int DefIdx = SwapMap[
DefMI];
723 if (!SwapVector[DefIdx].IsSwap || SwapVector[DefIdx].IsLoad ||
724 SwapVector[DefIdx].IsStore) {
726 SwapVector[Repr].WebRejected = 1;
729 "Web %d rejected for store not fed by swap\n", Repr));
739 for (MachineInstr &
UseMI :
MRI->use_nodbg_instructions(DefReg)) {
740 int UseIdx = SwapMap[&
UseMI];
742 if (SwapVector[UseIdx].VSEMI->getOpcode() !=
MI->getOpcode()) {
743 SwapVector[Repr].WebRejected = 1;
747 "Web %d rejected for swap not feeding only stores\n", Repr));
768void PPCVSXSwapRemoval::markSwapsForRemoval() {
772 for (
unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
774 if (SwapVector[EntryIdx].IsLoad && SwapVector[EntryIdx].IsSwap) {
775 int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId);
777 if (!SwapVector[Repr].WebRejected) {
778 MachineInstr *
MI = SwapVector[EntryIdx].VSEMI;
781 for (MachineInstr &
UseMI :
MRI->use_nodbg_instructions(DefReg)) {
782 int UseIdx = SwapMap[&
UseMI];
783 SwapVector[UseIdx].WillRemove = 1;
790 }
else if (SwapVector[EntryIdx].IsStore && SwapVector[EntryIdx].IsSwap) {
791 int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId);
793 if (!SwapVector[Repr].WebRejected) {
794 MachineInstr *
MI = SwapVector[EntryIdx].VSEMI;
797 int DefIdx = SwapMap[
DefMI];
798 SwapVector[DefIdx].WillRemove = 1;
804 }
else if (SwapVector[EntryIdx].IsSwappable &&
805 SwapVector[EntryIdx].SpecialHandling != 0) {
806 int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId);
808 if (!SwapVector[Repr].WebRejected)
809 handleSpecialSwappables(EntryIdx);
819void PPCVSXSwapRemoval::insertSwap(MachineInstr *
MI,
821 unsigned DstReg,
unsigned SrcReg) {
822 BuildMI(*
MI->getParent(), InsertPoint,
MI->getDebugLoc(),
823 TII->get(PPC::XXPERMDI), DstReg)
834void PPCVSXSwapRemoval::handleSpecialSwappables(
int EntryIdx) {
835 switch (SwapVector[EntryIdx].SpecialHandling) {
842 case SHValues::SH_SPLAT: {
843 MachineInstr *
MI = SwapVector[EntryIdx].VSEMI;
849 switch (
MI->getOpcode()) {
852 case PPC::VSPLTB: NElts = 16;
break;
853 case PPC::VSPLTH: NElts = 8;
break;
855 case PPC::XXSPLTW: NElts = 4;
break;
859 if (
MI->getOpcode() == PPC::XXSPLTW)
860 EltNo =
MI->getOperand(2).getImm();
862 EltNo =
MI->getOperand(1).getImm();
864 EltNo = (EltNo + NElts / 2) % NElts;
865 if (
MI->getOpcode() == PPC::XXSPLTW)
866 MI->getOperand(2).setImm(EltNo);
868 MI->getOperand(1).setImm(EltNo);
881 case SHValues::SH_XXPERMDI: {
882 MachineInstr *
MI = SwapVector[EntryIdx].VSEMI;
887 unsigned Selector =
MI->getOperand(3).getImm();
888 if (Selector == 0 || Selector == 3)
889 Selector = 3 - Selector;
890 MI->getOperand(3).setImm(Selector);
894 MI->getOperand(1).setReg(Reg2);
895 MI->getOperand(2).setReg(Reg1);
898 bool IsKill1 =
MI->getOperand(1).isKill();
899 bool IsKill2 =
MI->getOperand(2).isKill();
900 MI->getOperand(1).setIsKill(IsKill2);
901 MI->getOperand(2).setIsKill(IsKill1);
911 case SHValues::SH_COPYWIDEN: {
912 MachineInstr *
MI = SwapVector[EntryIdx].VSEMI;
918 const TargetRegisterClass *DstRC =
MRI->getRegClass(DstReg);
919 Register NewVReg =
MRI->createVirtualRegister(DstRC);
921 MI->getOperand(0).setReg(NewVReg);
932 if (DstRC == &PPC::VRRCRegClass) {
933 Register VSRCTmp1 =
MRI->createVirtualRegister(&PPC::VSRCRegClass);
934 Register VSRCTmp2 =
MRI->createVirtualRegister(&PPC::VSRCRegClass);
936 BuildMI(*
MI->getParent(), InsertPoint,
MI->getDebugLoc(),
937 TII->get(PPC::COPY), VSRCTmp1)
941 insertSwap(
MI, InsertPoint, VSRCTmp2, VSRCTmp1);
944 BuildMI(*
MI->getParent(), InsertPoint,
MI->getDebugLoc(),
945 TII->get(PPC::COPY), DstReg)
950 insertSwap(
MI, InsertPoint, DstReg, NewVReg);
960bool PPCVSXSwapRemoval::removeSwaps() {
966 for (
unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
967 if (SwapVector[EntryIdx].WillRemove) {
969 MachineInstr *
MI = SwapVector[EntryIdx].VSEMI;
970 MachineBasicBlock *
MBB =
MI->getParent();
972 MI->getOperand(0).getReg())
973 .
add(
MI->getOperand(1));
976 SwapVector[EntryIdx].VSEId));
979 MI->eraseFromParent();
986#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
990 for (
unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
992 MachineInstr *
MI = SwapVector[EntryIdx].VSEMI;
993 int ID = SwapVector[EntryIdx].VSEId;
997 dbgs() <<
format(
" %bb.%3d",
MI->getParent()->getNumber());
998 dbgs() <<
format(
" %14s ",
TII->getName(
MI->getOpcode()).str().c_str());
1000 if (SwapVector[EntryIdx].IsLoad)
1002 if (SwapVector[EntryIdx].IsStore)
1004 if (SwapVector[EntryIdx].IsSwap)
1006 if (SwapVector[EntryIdx].MentionsPhysVR)
1007 dbgs() <<
"physreg ";
1008 if (SwapVector[EntryIdx].MentionsPartialVR)
1009 dbgs() <<
"partialreg ";
1011 if (SwapVector[EntryIdx].IsSwappable) {
1012 dbgs() <<
"swappable ";
1013 switch(SwapVector[EntryIdx].SpecialHandling) {
1015 dbgs() <<
"special:**unknown**";
1020 dbgs() <<
"special:extract ";
1023 dbgs() <<
"special:insert ";
1026 dbgs() <<
"special:load ";
1029 dbgs() <<
"special:store ";
1032 dbgs() <<
"special:splat ";
1035 dbgs() <<
"special:xxpermdi ";
1038 dbgs() <<
"special:copywiden ";
1043 if (SwapVector[EntryIdx].WebRejected)
1044 dbgs() <<
"rejected ";
1045 if (SwapVector[EntryIdx].WillRemove)
1046 dbgs() <<
"remove ";
1060 "PowerPC VSX Swap Removal",
false,
false)
1064char PPCVSXSwapRemoval::
ID = 0;
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isRegInClass(const MachineOperand &MO, const TargetRegisterClass *Class)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file defines the DenseMap class.
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
static Register UseReg(const MachineOperand &MO)
const HexagonInstrInfo * TII
static bool isVecReg(unsigned Reg)
Promote Memory to Register
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)
Initialize the set of available library functions based on the specified target triple.
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
FunctionPass class - This class is used to implement most global optimizations.
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
Representation of each machine instruction.
LLVM_ABI void dump() const
const MachineOperand & getOperand(unsigned i) const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool needsSwapsForVSXMemOps() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createPPCVSXSwapRemovalPass()
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.