26 #define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass"
50 unsigned LdarOp,
unsigned StlrOp,
unsigned CmpOp,
51 unsigned ExtendImm,
unsigned ZeroReg,
81 static uint64_t
getChunk(uint64_t Imm,
unsigned ChunkIdx) {
82 assert(ChunkIdx < 4 &&
"Out of range chunk index specified!");
84 return (Imm >> (ChunkIdx * 16)) & 0xFFFF;
89 static uint64_t
replicateChunk(uint64_t Imm,
unsigned FromIdx,
unsigned ToIdx) {
90 assert((FromIdx < 4) && (ToIdx < 4) &&
"Out of range chunk index specified!");
91 const unsigned ShiftAmt = ToIdx * 16;
94 const uint64_t Chunk =
getChunk(Imm, FromIdx) << ShiftAmt;
96 Imm &= ~(0xFFFFLL << ShiftAmt);
107 assert(ChunkIdx < 4 &&
"Out of range chunk index specified!");
108 const unsigned ShiftAmt = ChunkIdx * 16;
116 .addReg(AArch64::XZR)
130 transferImpOps(MI, MIB, MIB1);
140 static bool canUseOrr(uint64_t Chunk, uint64_t &Encoding) {
141 Chunk = (Chunk << 48) | (Chunk << 32) | (Chunk << 16) | Chunk;
162 for (
unsigned Idx = 0; Idx < 4; ++Idx)
166 for (CountMap::const_iterator Chunk = Counts.begin(),
End = Counts.end();
167 Chunk !=
End; ++Chunk) {
168 const uint64_t ChunkVal = Chunk->first;
169 const unsigned Count = Chunk->second;
171 uint64_t Encoding = 0;
175 if ((Count != 2 && Count != 3) || !
canUseOrr(ChunkVal, Encoding))
178 const bool CountThree = Count == 3;
183 .addReg(AArch64::XZR)
189 unsigned ShiftAmt = 0;
192 for (; ShiftAmt < 64; ShiftAmt += 16) {
193 Imm16 = (UImm >> ShiftAmt) & 0xFFFF;
195 if (Imm16 != ChunkVal)
211 transferImpOps(MI, MIB, MIB1);
217 for (ShiftAmt += 16; ShiftAmt < 64; ShiftAmt += 16) {
218 Imm16 = (UImm >> ShiftAmt) & 0xFFFF;
220 if (Imm16 != ChunkVal)
232 transferImpOps(MI, MIB, MIB2);
244 if (Chunk == 0 || Chunk == UINT64_MAX)
254 if (Chunk == 0 || Chunk == UINT64_MAX)
261 static uint64_t
updateImm(uint64_t Imm,
unsigned Idx,
bool Clear) {
262 const uint64_t
Mask = 0xFFFF;
266 Imm &= ~(Mask << (Idx * 16));
269 Imm |= Mask << (Idx * 16);
292 const int NotSet = -1;
293 const uint64_t
Mask = 0xFFFF;
295 int StartIdx = NotSet;
298 for (
int Idx = 0; Idx < 4; ++Idx) {
299 int64_t Chunk =
getChunk(UImm, Idx);
301 Chunk = (Chunk << 48) >> 48;
310 if (StartIdx == NotSet || EndIdx == NotSet)
314 uint64_t Outside = 0;
316 uint64_t Inside =
Mask;
321 if (StartIdx > EndIdx) {
326 uint64_t OrrImm = UImm;
327 int FirstMovkIdx = NotSet;
328 int SecondMovkIdx = NotSet;
332 for (
int Idx = 0; Idx < 4; ++Idx) {
333 const uint64_t Chunk =
getChunk(UImm, Idx);
337 if ((Idx < StartIdx || EndIdx < Idx) && Chunk != Outside) {
338 OrrImm =
updateImm(OrrImm, Idx, Outside == 0);
341 if (FirstMovkIdx == NotSet)
348 }
else if (Idx > StartIdx && Idx < EndIdx && Chunk != Inside) {
349 OrrImm =
updateImm(OrrImm, Idx, Inside != Mask);
352 if (FirstMovkIdx == NotSet)
358 assert(FirstMovkIdx != NotSet &&
"Constant materializable with single ORR!");
361 uint64_t Encoding = 0;
366 .addReg(AArch64::XZR)
372 const bool SingleMovk = SecondMovkIdx == NotSet;
385 transferImpOps(MI, MIB, MIB1);
399 transferImpOps(MI, MIB, MIB2);
412 const unsigned Mask = 0xFFFF;
414 if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) {
422 uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
425 unsigned Opc = (BitSize == 32 ? AArch64::ORRWri : AArch64::ORRXri);
429 .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
431 transferImpOps(MI, MIB, MIB);
438 unsigned OneChunks = 0;
439 unsigned ZeroChunks = 0;
440 for (
unsigned Shift = 0; Shift < BitSize; Shift += 16) {
441 const unsigned Chunk = (Imm >> Shift) & Mask;
471 if (BitSize == 64 && OneChunks < 3 && ZeroChunks < 3) {
525 if (OneChunks > ZeroChunks) {
532 Imm &= (1LL << 32) - 1;
533 FirstOpc = (isNeg ? AArch64::MOVNWi : AArch64::MOVZWi);
535 FirstOpc = (isNeg ? AArch64::MOVNXi : AArch64::MOVZXi);
538 unsigned LastShift = 0;
542 Shift = ((63 - LZ) / 16) * 16;
543 LastShift = (TZ / 16) * 16;
545 unsigned Imm16 = (Imm >> Shift) & Mask;
559 if (Shift == LastShift) {
560 transferImpOps(MI, MIB1, MIB1);
566 unsigned Opc = (BitSize == 32 ? AArch64::MOVKWi : AArch64::MOVKXi);
567 while (Shift != LastShift) {
569 Imm16 = (Imm >> Shift) & Mask;
570 if (Imm16 == (isNeg ? Mask : 0))
581 transferImpOps(MI, MIB1, MIB2);
587 for (
auto I = LiveRegs.
begin();
I != LiveRegs.
end(); ++
I)
591 bool AArch64ExpandPseudo::expandCMP_SWAP(
593 unsigned StlrOp,
unsigned CmpOp,
unsigned ExtendImm,
unsigned ZeroReg,
605 for (
auto I = std::prev(MBB.
end());
I != MBBI; --
I)
606 LiveRegs.stepBackward(*
I);
614 MF->
insert(++LoadCmpBB->getIterator(), StoreBB);
615 MF->
insert(++StoreBB->getIterator(), DoneBB);
621 LoadCmpBB->addLiveIn(Addr.
getReg());
622 LoadCmpBB->addLiveIn(Dest.
getReg());
623 LoadCmpBB->addLiveIn(Desired.
getReg());
628 BuildMI(LoadCmpBB, DL,
TII->get(CmpOp), ZeroReg)
632 BuildMI(LoadCmpBB, DL,
TII->get(AArch64::Bcc))
636 LoadCmpBB->addSuccessor(DoneBB);
637 LoadCmpBB->addSuccessor(StoreBB);
642 StoreBB->addLiveIn(Addr.
getReg());
643 StoreBB->addLiveIn(New.
getReg());
646 BuildMI(StoreBB, DL,
TII->get(StlrOp), StatusReg)
649 BuildMI(StoreBB, DL,
TII->get(AArch64::CBNZW))
652 StoreBB->addSuccessor(LoadCmpBB);
653 StoreBB->addSuccessor(DoneBB);
655 DoneBB->splice(DoneBB->end(), &
MBB,
MI, MBB.
end());
656 DoneBB->transferSuccessors(&MBB);
661 NextMBBI = MBB.
end();
662 MI.eraseFromParent();
666 bool AArch64ExpandPseudo::expandCMP_SWAP_128(
683 for (
auto I = std::prev(MBB.
end());
I != MBBI; --
I)
684 LiveRegs.stepBackward(*
I);
692 MF->
insert(++LoadCmpBB->getIterator(), StoreBB);
693 MF->
insert(++StoreBB->getIterator(), DoneBB);
700 LoadCmpBB->addLiveIn(Addr.
getReg());
701 LoadCmpBB->addLiveIn(DestLo.
getReg());
702 LoadCmpBB->addLiveIn(DestHi.
getReg());
703 LoadCmpBB->addLiveIn(DesiredLo.
getReg());
704 LoadCmpBB->addLiveIn(DesiredHi.
getReg());
707 BuildMI(LoadCmpBB, DL,
TII->get(AArch64::LDAXPX))
711 BuildMI(LoadCmpBB, DL,
TII->get(AArch64::SUBSXrs), AArch64::XZR)
715 BuildMI(LoadCmpBB, DL,
TII->get(AArch64::CSINCWr), StatusReg)
716 .addUse(AArch64::WZR)
719 BuildMI(LoadCmpBB, DL,
TII->get(AArch64::SUBSXrs), AArch64::XZR)
723 BuildMI(LoadCmpBB, DL,
TII->get(AArch64::CSINCWr), StatusReg)
727 BuildMI(LoadCmpBB, DL,
TII->get(AArch64::CBNZW))
730 LoadCmpBB->addSuccessor(DoneBB);
731 LoadCmpBB->addSuccessor(StoreBB);
736 StoreBB->addLiveIn(Addr.
getReg());
737 StoreBB->addLiveIn(NewLo.
getReg());
738 StoreBB->addLiveIn(NewHi.
getReg());
740 BuildMI(StoreBB, DL,
TII->get(AArch64::STLXPX), StatusReg)
744 BuildMI(StoreBB, DL,
TII->get(AArch64::CBNZW))
747 StoreBB->addSuccessor(LoadCmpBB);
748 StoreBB->addSuccessor(DoneBB);
750 DoneBB->splice(DoneBB->end(), &
MBB,
MI, MBB.
end());
751 DoneBB->transferSuccessors(&MBB);
756 NextMBBI = MBB.
end();
757 MI.eraseFromParent();
772 case AArch64::ADDWrr:
773 case AArch64::SUBWrr:
774 case AArch64::ADDXrr:
775 case AArch64::SUBXrr:
776 case AArch64::ADDSWrr:
777 case AArch64::SUBSWrr:
778 case AArch64::ADDSXrr:
779 case AArch64::SUBSXrr:
780 case AArch64::ANDWrr:
781 case AArch64::ANDXrr:
782 case AArch64::BICWrr:
783 case AArch64::BICXrr:
784 case AArch64::ANDSWrr:
785 case AArch64::ANDSXrr:
786 case AArch64::BICSWrr:
787 case AArch64::BICSXrr:
788 case AArch64::EONWrr:
789 case AArch64::EONXrr:
790 case AArch64::EORWrr:
791 case AArch64::EORXrr:
792 case AArch64::ORNWrr:
793 case AArch64::ORNXrr:
794 case AArch64::ORRWrr:
795 case AArch64::ORRXrr: {
800 case AArch64::ADDWrr: Opcode = AArch64::ADDWrs;
break;
801 case AArch64::SUBWrr: Opcode = AArch64::SUBWrs;
break;
802 case AArch64::ADDXrr: Opcode = AArch64::ADDXrs;
break;
803 case AArch64::SUBXrr: Opcode = AArch64::SUBXrs;
break;
804 case AArch64::ADDSWrr: Opcode = AArch64::ADDSWrs;
break;
805 case AArch64::SUBSWrr: Opcode = AArch64::SUBSWrs;
break;
806 case AArch64::ADDSXrr: Opcode = AArch64::ADDSXrs;
break;
807 case AArch64::SUBSXrr: Opcode = AArch64::SUBSXrs;
break;
808 case AArch64::ANDWrr: Opcode = AArch64::ANDWrs;
break;
809 case AArch64::ANDXrr: Opcode = AArch64::ANDXrs;
break;
810 case AArch64::BICWrr: Opcode = AArch64::BICWrs;
break;
811 case AArch64::BICXrr: Opcode = AArch64::BICXrs;
break;
812 case AArch64::ANDSWrr: Opcode = AArch64::ANDSWrs;
break;
813 case AArch64::ANDSXrr: Opcode = AArch64::ANDSXrs;
break;
814 case AArch64::BICSWrr: Opcode = AArch64::BICSWrs;
break;
815 case AArch64::BICSXrr: Opcode = AArch64::BICSXrs;
break;
816 case AArch64::EONWrr: Opcode = AArch64::EONWrs;
break;
817 case AArch64::EONXrr: Opcode = AArch64::EONXrs;
break;
818 case AArch64::EORWrr: Opcode = AArch64::EORWrs;
break;
819 case AArch64::EORXrr: Opcode = AArch64::EORXrs;
break;
820 case AArch64::ORNWrr: Opcode = AArch64::ORNWrs;
break;
821 case AArch64::ORNXrr: Opcode = AArch64::ORNXrs;
break;
822 case AArch64::ORRWrr: Opcode = AArch64::ORRWrs;
break;
823 case AArch64::ORRXrr: Opcode = AArch64::ORRXrs;
break;
831 transferImpOps(MI, MIB1, MIB1);
858 "Only expect globals, externalsymbols, or constant pools");
866 transferImpOps(MI, MIB1, MIB2);
871 case AArch64::MOVaddr:
872 case AArch64::MOVaddrJT:
873 case AArch64::MOVaddrCP:
874 case AArch64::MOVaddrBA:
875 case AArch64::MOVaddrTLS:
876 case AArch64::MOVaddrEXT: {
890 transferImpOps(MI, MIB1, MIB2);
895 case AArch64::MOVi32imm:
896 return expandMOVImm(MBB, MBBI, 32);
897 case AArch64::MOVi64imm:
898 return expandMOVImm(MBB, MBBI, 64);
899 case AArch64::RET_ReallyLR: {
908 transferImpOps(MI, MIB, MIB);
912 case AArch64::CMP_SWAP_8:
913 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRB, AArch64::STLXRB,
916 AArch64::WZR, NextMBBI);
917 case AArch64::CMP_SWAP_16:
918 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRH, AArch64::STLXRH,
921 AArch64::WZR, NextMBBI);
922 case AArch64::CMP_SWAP_32:
923 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRW, AArch64::STLXRW,
926 AArch64::WZR, NextMBBI);
927 case AArch64::CMP_SWAP_64:
928 return expandCMP_SWAP(MBB, MBBI,
929 AArch64::LDAXRX, AArch64::STLXRX, AArch64::SUBSXrs,
931 AArch64::XZR, NextMBBI);
932 case AArch64::CMP_SWAP_128:
933 return expandCMP_SWAP_128(MBB, MBBI, NextMBBI);
941 bool Modified =
false;
946 Modified |= expandMI(MBB, MBBI, NMBBI);
956 bool Modified =
false;
958 Modified |= expandMBB(MBB);
964 return new AArch64ExpandPseudo();
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
const GlobalValue * getGlobal() const
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
Describe properties that are true of each instruction in the target description file.
MachineInstrBuilder MachineInstrBuilder &DefMI const MCInstrDesc & Desc
static bool processLogicalImmediate(uint64_t Imm, unsigned RegSize, uint64_t &Encoding)
processLogicalImmediate - Determine if an immediate value can be encoded as the immediate operand of ...
const char * getSymbolName() const
static bool tryToreplicateChunks(uint64_t UImm, MachineInstr &MI, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const AArch64InstrInfo *TII)
Check for identical 16-bit chunks within the constant and if so materialize them with a single ORR in...
static bool canUseOrr(uint64_t Chunk, uint64_t &Encoding)
Check whether the given 16-bit chunk replicated to full 64-bit width can be materialized with an ORR ...
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the most significant bit to the least stopping at the first 1...
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
struct fuzzer::@269 Flags
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
bool isCPI() const
isCPI - Tests if this is a MO_ConstantPoolIndex operand.
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
const HexagonRegisterInfo & getRegisterInfo() const
HexagonInstrInfo specifics.
constexpr bool isMask_64(uint64_t Value)
isMask_64 - This function returns true if the argument is a non-empty sequence of ones starting at th...
static uint64_t replicateChunk(uint64_t Imm, unsigned FromIdx, unsigned ToIdx)
Helper function which replicates a 16-bit chunk within a 64-bit value.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
unsigned getKillRegState(bool B)
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
static uint64_t updateImm(uint64_t Imm, unsigned Idx, bool Clear)
Clear or set all bits in the chunk at the given index.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
unsigned getDeadRegState(bool B)
static bool isStartChunk(uint64_t Chunk)
Check whether this chunk matches the pattern '1...0...'.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned getTargetFlags() const
void addLiveIn(MCPhysReg PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1...
void initializeAArch64ExpandPseudoPass(PassRegistry &)
static void addPostLoopLiveIns(MachineBasicBlock *MBB, LivePhysRegs &LiveRegs)
MachineInstrBuilder & UseMI
void addLiveOuts(const MachineBasicBlock &MBB)
Adds all live-out registers of basic block MBB.
const MachineOperand & getOperand(unsigned i) const
bool isSymbol() const
isSymbol - Tests if this is a MO_ExternalSymbol operand.
static const unsigned End
FunctionPass class - This class is used to implement most global optimizations.
INITIALIZE_PASS(AArch64ExpandPseudo,"aarch64-expand-pseudo", AARCH64_EXPAND_PSEUDO_NAME, false, false) static void transferImpOps(MachineInstr &OldMI
Transfer implicit operands on the pseudo instruction to the instructions created from the expansion...
int64_t getOffset() const
Return the offset from the symbol in this operand.
self_iterator getIterator()
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
static uint64_t getChunk(uint64_t Imm, unsigned ChunkIdx)
Helper function which extracts the specified 16-bit chunk from a 64-bit value.
MachineOperand class - Representation of each machine instruction operand.
FunctionPass * createAArch64ExpandPseudoPass()
Returns an instance of the pseudo instruction expansion pass.
static bool isEndChunk(uint64_t Chunk)
Check whether this chunk matches the pattern '0...1...' This pattern ends a contiguous sequence of on...
static bool trySequenceOfOnes(uint64_t UImm, MachineInstr &MI, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const AArch64InstrInfo *TII)
Check whether the constant contains a sequence of contiguous ones, which might be interrupted by one ...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned char TargetFlags=0) const
Representation of each machine instruction.
A set of live physical registers with functions to track liveness when walking backward/forward throu...
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned char TargetFlags=0) const
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page...
#define AARCH64_EXPAND_PSEUDO_NAME
unsigned getReg() const
getReg - Returns the register number.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addUse(unsigned RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
virtual const TargetInstrInfo * getInstrInfo() const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
const MachineInstrBuilder & addOperand(const MachineOperand &MO) const
static bool tryOrrMovk(uint64_t UImm, uint64_t OrrImm, MachineInstr &MI, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const AArch64InstrInfo *TII, unsigned ChunkIdx)
Helper function which tries to materialize a 64-bit value with an ORR + MOVK instruction sequence...
StringRef - Represent a constant reference to a string, i.e.
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow...
const_iterator end() const
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const_iterator begin() const