LCOV - code coverage report
Current view: top level - lib/Target/AArch64 - AArch64ExpandPseudoInsts.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 172 412 41.7 %
Date: 2018-10-20 13:21:21 Functions: 9 15 60.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions ----------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This file contains a pass that expands pseudo instructions into target
      11             : // instructions to allow proper scheduling and other late optimizations.  This
      12             : // pass should be run after register allocation but before the post-regalloc
      13             : // scheduling pass.
      14             : //
      15             : //===----------------------------------------------------------------------===//
      16             : 
      17             : #include "AArch64InstrInfo.h"
      18             : #include "AArch64Subtarget.h"
      19             : #include "MCTargetDesc/AArch64AddressingModes.h"
      20             : #include "Utils/AArch64BaseInfo.h"
      21             : #include "llvm/ADT/DenseMap.h"
      22             : #include "llvm/ADT/Triple.h"
      23             : #include "llvm/CodeGen/LivePhysRegs.h"
      24             : #include "llvm/CodeGen/MachineBasicBlock.h"
      25             : #include "llvm/CodeGen/MachineFunction.h"
      26             : #include "llvm/CodeGen/MachineFunctionPass.h"
      27             : #include "llvm/CodeGen/MachineInstr.h"
      28             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      29             : #include "llvm/CodeGen/MachineOperand.h"
      30             : #include "llvm/CodeGen/TargetSubtargetInfo.h"
      31             : #include "llvm/IR/DebugLoc.h"
      32             : #include "llvm/MC/MCInstrDesc.h"
      33             : #include "llvm/Pass.h"
      34             : #include "llvm/Support/CodeGen.h"
      35             : #include "llvm/Support/MathExtras.h"
      36             : #include "llvm/Target/TargetMachine.h"
      37             : #include <cassert>
      38             : #include <cstdint>
      39             : #include <iterator>
      40             : #include <limits>
      41             : #include <utility>
      42             : 
      43             : using namespace llvm;
      44             : 
      45             : #define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass"
      46             : 
      47             : namespace {
      48             : 
      49             : class AArch64ExpandPseudo : public MachineFunctionPass {
      50             : public:
      51             :   const AArch64InstrInfo *TII;
      52             : 
      53             :   static char ID;
      54             : 
      55        1224 :   AArch64ExpandPseudo() : MachineFunctionPass(ID) {
      56        1224 :     initializeAArch64ExpandPseudoPass(*PassRegistry::getPassRegistry());
      57        1224 :   }
      58             : 
      59             :   bool runOnMachineFunction(MachineFunction &Fn) override;
      60             : 
      61        1207 :   StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; }
      62             : 
      63             : private:
      64             :   bool expandMBB(MachineBasicBlock &MBB);
      65             :   bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
      66             :                 MachineBasicBlock::iterator &NextMBBI);
      67             :   bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
      68             :                     unsigned BitSize);
      69             :   bool expandMOVImmSimple(MachineBasicBlock &MBB,
      70             :                           MachineBasicBlock::iterator MBBI,
      71             :                           unsigned BitSize,
      72             :                           unsigned OneChunks,
      73             :                           unsigned ZeroChunks);
      74             : 
      75             :   bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
      76             :                       unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
      77             :                       unsigned ExtendImm, unsigned ZeroReg,
      78             :                       MachineBasicBlock::iterator &NextMBBI);
      79             :   bool expandCMP_SWAP_128(MachineBasicBlock &MBB,
      80             :                           MachineBasicBlock::iterator MBBI,
      81             :                           MachineBasicBlock::iterator &NextMBBI);
      82             : };
      83             : 
      84             : } // end anonymous namespace
      85             : 
      86             : char AArch64ExpandPseudo::ID = 0;
      87             : 
      88      200256 : INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo",
      89             :                 AARCH64_EXPAND_PSEUDO_NAME, false, false)
      90             : 
      91             : /// Transfer implicit operands on the pseudo instruction to the
      92             : /// instructions created from the expansion.
      93           0 : static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
      94             :                            MachineInstrBuilder &DefMI) {
      95           0 :   const MCInstrDesc &Desc = OldMI.getDesc();
      96           0 :   for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands(); i != e;
      97             :        ++i) {
      98           0 :     const MachineOperand &MO = OldMI.getOperand(i);
      99             :     assert(MO.isReg() && MO.getReg());
     100           0 :     if (MO.isUse())
     101             :       UseMI.add(MO);
     102             :     else
     103             :       DefMI.add(MO);
     104             :   }
     105           0 : }
     106             : 
     107             : /// Helper function which extracts the specified 16-bit chunk from a
     108             : /// 64-bit value.
     109             : static uint64_t getChunk(uint64_t Imm, unsigned ChunkIdx) {
     110             :   assert(ChunkIdx < 4 && "Out of range chunk index specified!");
     111             : 
     112          19 :   return (Imm >> (ChunkIdx * 16)) & 0xFFFF;
     113             : }
     114             : 
     115             : /// Check whether the given 16-bit chunk replicated to full 64-bit width
     116             : /// can be materialized with an ORR instruction.
     117             : static bool canUseOrr(uint64_t Chunk, uint64_t &Encoding) {
     118           0 :   Chunk = (Chunk << 48) | (Chunk << 32) | (Chunk << 16) | Chunk;
     119             : 
     120           0 :   return AArch64_AM::processLogicalImmediate(Chunk, 64, Encoding);
     121             : }
     122             : 
     123             : /// Check for identical 16-bit chunks within the constant and if so
     124             : /// materialize them with a single ORR instruction. The remaining one or two
     125             : /// 16-bit chunks will be materialized with MOVK instructions.
     126             : ///
     127             : /// This allows us to materialize constants like |A|B|A|A| or |A|B|C|A| (order
     128             : /// of the chunks doesn't matter), assuming |A|A|A|A| can be materialized with
     129             : /// an ORR instruction.
     130           0 : static bool tryToreplicateChunks(uint64_t UImm, MachineInstr &MI,
     131             :                                  MachineBasicBlock &MBB,
     132             :                                  MachineBasicBlock::iterator &MBBI,
     133             :                                  const AArch64InstrInfo *TII) {
     134             :   using CountMap = DenseMap<uint64_t, unsigned>;
     135             : 
     136             :   CountMap Counts;
     137             : 
     138             :   // Scan the constant and count how often every chunk occurs.
     139           0 :   for (unsigned Idx = 0; Idx < 4; ++Idx)
     140           0 :     ++Counts[getChunk(UImm, Idx)];
     141             : 
     142             :   // Traverse the chunks to find one which occurs more than once.
     143           0 :   for (CountMap::const_iterator Chunk = Counts.begin(), End = Counts.end();
     144           0 :        Chunk != End; ++Chunk) {
     145           0 :     const uint64_t ChunkVal = Chunk->first;
     146           0 :     const unsigned Count = Chunk->second;
     147             : 
     148           0 :     uint64_t Encoding = 0;
     149             : 
     150             :     // We are looking for chunks which have two or three instances and can be
     151             :     // materialized with an ORR instruction.
     152           0 :     if ((Count != 2 && Count != 3) || !canUseOrr(ChunkVal, Encoding))
     153           0 :       continue;
     154             : 
     155           0 :     const bool CountThree = Count == 3;
     156             :     // Create the ORR-immediate instruction.
     157             :     MachineInstrBuilder MIB =
     158           0 :         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri))
     159           0 :             .add(MI.getOperand(0))
     160           0 :             .addReg(AArch64::XZR)
     161           0 :             .addImm(Encoding);
     162             : 
     163           0 :     const unsigned DstReg = MI.getOperand(0).getReg();
     164             :     const bool DstIsDead = MI.getOperand(0).isDead();
     165             : 
     166             :     unsigned ShiftAmt = 0;
     167             :     uint64_t Imm16 = 0;
     168             :     // Find the first chunk not materialized with the ORR instruction.
     169           0 :     for (; ShiftAmt < 64; ShiftAmt += 16) {
     170           0 :       Imm16 = (UImm >> ShiftAmt) & 0xFFFF;
     171             : 
     172           0 :       if (Imm16 != ChunkVal)
     173             :         break;
     174             :     }
     175             : 
     176             :     // Create the first MOVK instruction.
     177             :     MachineInstrBuilder MIB1 =
     178           0 :         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
     179             :             .addReg(DstReg,
     180           0 :                     RegState::Define | getDeadRegState(DstIsDead && CountThree))
     181           0 :             .addReg(DstReg)
     182           0 :             .addImm(Imm16)
     183           0 :             .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt));
     184             : 
     185             :     // In case we have three instances the whole constant is now materialized
     186             :     // and we can exit.
     187           0 :     if (CountThree) {
     188           0 :       transferImpOps(MI, MIB, MIB1);
     189           0 :       MI.eraseFromParent();
     190           0 :       return true;
     191             :     }
     192             : 
     193             :     // Find the remaining chunk which needs to be materialized.
     194           0 :     for (ShiftAmt += 16; ShiftAmt < 64; ShiftAmt += 16) {
     195           0 :       Imm16 = (UImm >> ShiftAmt) & 0xFFFF;
     196             : 
     197           0 :       if (Imm16 != ChunkVal)
     198             :         break;
     199             :     }
     200             : 
     201             :     // Create the second MOVK instruction.
     202             :     MachineInstrBuilder MIB2 =
     203           0 :         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
     204           0 :             .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
     205           0 :             .addReg(DstReg)
     206           0 :             .addImm(Imm16)
     207           0 :             .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt));
     208             : 
     209           0 :     transferImpOps(MI, MIB, MIB2);
     210           0 :     MI.eraseFromParent();
     211           0 :     return true;
     212             :   }
     213             : 
     214           0 :   return false;
     215             : }
     216             : 
     217             : /// Check whether this chunk matches the pattern '1...0...'. This pattern
     218             : /// starts a contiguous sequence of ones if we look at the bits from the LSB
     219             : /// towards the MSB.
     220             : static bool isStartChunk(uint64_t Chunk) {
     221           0 :   if (Chunk == 0 || Chunk == std::numeric_limits<uint64_t>::max())
     222             :     return false;
     223             : 
     224           0 :   return isMask_64(~Chunk);
     225             : }
     226             : 
     227             : /// Check whether this chunk matches the pattern '0...1...' This pattern
     228             : /// ends a contiguous sequence of ones if we look at the bits from the LSB
     229             : /// towards the MSB.
     230             : static bool isEndChunk(uint64_t Chunk) {
     231           0 :   if (Chunk == 0 || Chunk == std::numeric_limits<uint64_t>::max())
     232             :     return false;
     233             : 
     234             :   return isMask_64(Chunk);
     235             : }
     236             : 
     237             : /// Clear or set all bits in the chunk at the given index.
     238             : static uint64_t updateImm(uint64_t Imm, unsigned Idx, bool Clear) {
     239             :   const uint64_t Mask = 0xFFFF;
     240             : 
     241           0 :   if (Clear)
     242             :     // Clear chunk in the immediate.
     243           0 :     Imm &= ~(Mask << (Idx * 16));
     244             :   else
     245             :     // Set all bits in the immediate for the particular chunk.
     246           0 :     Imm |= Mask << (Idx * 16);
     247             : 
     248             :   return Imm;
     249             : }
     250             : 
     251             : /// Check whether the constant contains a sequence of contiguous ones,
     252             : /// which might be interrupted by one or two chunks. If so, materialize the
     253             : /// sequence of contiguous ones with an ORR instruction.
     254             : /// Materialize the chunks which are either interrupting the sequence or outside
     255             : /// of the sequence with a MOVK instruction.
     256             : ///
     257             : /// Assuming S is a chunk which starts the sequence (1...0...), E is a chunk
     258             : /// which ends the sequence (0...1...). Then we are looking for constants which
     259             : /// contain at least one S and E chunk.
     260             : /// E.g. |E|A|B|S|, |A|E|B|S| or |A|B|E|S|.
     261             : ///
     262             : /// We are also looking for constants like |S|A|B|E| where the contiguous
     263             : /// sequence of ones wraps around the MSB into the LSB.
     264           0 : static bool trySequenceOfOnes(uint64_t UImm, MachineInstr &MI,
     265             :                               MachineBasicBlock &MBB,
     266             :                               MachineBasicBlock::iterator &MBBI,
     267             :                               const AArch64InstrInfo *TII) {
     268             :   const int NotSet = -1;
     269             :   const uint64_t Mask = 0xFFFF;
     270             : 
     271             :   int StartIdx = NotSet;
     272             :   int EndIdx = NotSet;
     273             :   // Try to find the chunks which start/end a contiguous sequence of ones.
     274           0 :   for (int Idx = 0; Idx < 4; ++Idx) {
     275           0 :     int64_t Chunk = getChunk(UImm, Idx);
     276             :     // Sign extend the 16-bit chunk to 64-bit.
     277           0 :     Chunk = (Chunk << 48) >> 48;
     278             : 
     279           0 :     if (isStartChunk(Chunk))
     280             :       StartIdx = Idx;
     281             :     else if (isEndChunk(Chunk))
     282             :       EndIdx = Idx;
     283             :   }
     284             : 
     285             :   // Early exit in case we can't find a start/end chunk.
     286           0 :   if (StartIdx == NotSet || EndIdx == NotSet)
     287           0 :     return false;
     288             : 
     289             :   // Outside of the contiguous sequence of ones everything needs to be zero.
     290             :   uint64_t Outside = 0;
     291             :   // Chunks between the start and end chunk need to have all their bits set.
     292             :   uint64_t Inside = Mask;
     293             : 
     294             :   // If our contiguous sequence of ones wraps around from the MSB into the LSB,
     295             :   // just swap indices and pretend we are materializing a contiguous sequence
     296             :   // of zeros surrounded by a contiguous sequence of ones.
     297           0 :   if (StartIdx > EndIdx) {
     298             :     std::swap(StartIdx, EndIdx);
     299             :     std::swap(Outside, Inside);
     300             :   }
     301             : 
     302             :   uint64_t OrrImm = UImm;
     303             :   int FirstMovkIdx = NotSet;
     304             :   int SecondMovkIdx = NotSet;
     305             : 
     306             :   // Find out which chunks we need to patch up to obtain a contiguous sequence
     307             :   // of ones.
     308           0 :   for (int Idx = 0; Idx < 4; ++Idx) {
     309           0 :     const uint64_t Chunk = getChunk(UImm, Idx);
     310             : 
     311             :     // Check whether we are looking at a chunk which is not part of the
     312             :     // contiguous sequence of ones.
     313           0 :     if ((Idx < StartIdx || EndIdx < Idx) && Chunk != Outside) {
     314             :       OrrImm = updateImm(OrrImm, Idx, Outside == 0);
     315             : 
     316             :       // Remember the index we need to patch.
     317           0 :       if (FirstMovkIdx == NotSet)
     318             :         FirstMovkIdx = Idx;
     319             :       else
     320             :         SecondMovkIdx = Idx;
     321             : 
     322             :       // Check whether we are looking a chunk which is part of the contiguous
     323             :       // sequence of ones.
     324           0 :     } else if (Idx > StartIdx && Idx < EndIdx && Chunk != Inside) {
     325             :       OrrImm = updateImm(OrrImm, Idx, Inside != Mask);
     326             : 
     327             :       // Remember the index we need to patch.
     328           0 :       if (FirstMovkIdx == NotSet)
     329             :         FirstMovkIdx = Idx;
     330             :       else
     331             :         SecondMovkIdx = Idx;
     332             :     }
     333             :   }
     334             :   assert(FirstMovkIdx != NotSet && "Constant materializable with single ORR!");
     335             : 
     336             :   // Create the ORR-immediate instruction.
     337           0 :   uint64_t Encoding = 0;
     338           0 :   AArch64_AM::processLogicalImmediate(OrrImm, 64, Encoding);
     339             :   MachineInstrBuilder MIB =
     340           0 :       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri))
     341           0 :           .add(MI.getOperand(0))
     342           0 :           .addReg(AArch64::XZR)
     343           0 :           .addImm(Encoding);
     344             : 
     345           0 :   const unsigned DstReg = MI.getOperand(0).getReg();
     346             :   const bool DstIsDead = MI.getOperand(0).isDead();
     347             : 
     348           0 :   const bool SingleMovk = SecondMovkIdx == NotSet;
     349             :   // Create the first MOVK instruction.
     350             :   MachineInstrBuilder MIB1 =
     351           0 :       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
     352             :           .addReg(DstReg,
     353           0 :                   RegState::Define | getDeadRegState(DstIsDead && SingleMovk))
     354           0 :           .addReg(DstReg)
     355           0 :           .addImm(getChunk(UImm, FirstMovkIdx))
     356             :           .addImm(
     357           0 :               AArch64_AM::getShifterImm(AArch64_AM::LSL, FirstMovkIdx * 16));
     358             : 
     359             :   // Early exit in case we only need to emit a single MOVK instruction.
     360           0 :   if (SingleMovk) {
     361           0 :     transferImpOps(MI, MIB, MIB1);
     362           0 :     MI.eraseFromParent();
     363           0 :     return true;
     364             :   }
     365             : 
     366             :   // Create the second MOVK instruction.
     367             :   MachineInstrBuilder MIB2 =
     368           0 :       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
     369           0 :           .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
     370           0 :           .addReg(DstReg)
     371           0 :           .addImm(getChunk(UImm, SecondMovkIdx))
     372             :           .addImm(
     373           0 :               AArch64_AM::getShifterImm(AArch64_AM::LSL, SecondMovkIdx * 16));
     374             : 
     375           0 :   transferImpOps(MI, MIB, MIB2);
     376           0 :   MI.eraseFromParent();
     377           0 :   return true;
     378             : }
     379             : 
     380             : /// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
     381             : /// real move-immediate instructions to synthesize the immediate.
     382        1806 : bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
     383             :                                        MachineBasicBlock::iterator MBBI,
     384             :                                        unsigned BitSize) {
     385             :   MachineInstr &MI = *MBBI;
     386        1806 :   unsigned DstReg = MI.getOperand(0).getReg();
     387        1806 :   uint64_t Imm = MI.getOperand(1).getImm();
     388             :   const unsigned Mask = 0xFFFF;
     389             : 
     390        1806 :   if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) {
     391             :     // Useless def, and we don't want to risk creating an invalid ORR (which
     392             :     // would really write to sp).
     393           2 :     MI.eraseFromParent();
     394           2 :     return true;
     395             :   }
     396             : 
     397             :   // Scan the immediate and count the number of 16-bit chunks which are either
     398             :   // all ones or all zeros.
     399             :   unsigned OneChunks = 0;
     400             :   unsigned ZeroChunks = 0;
     401        5842 :   for (unsigned Shift = 0; Shift < BitSize; Shift += 16) {
     402        4038 :     const unsigned Chunk = (Imm >> Shift) & Mask;
     403        4038 :     if (Chunk == Mask)
     404         260 :       OneChunks++;
     405        3778 :     else if (Chunk == 0)
     406        1760 :       ZeroChunks++;
     407             :   }
     408             : 
     409             :   // FIXME: Prefer MOVZ/MOVN over ORR because of the rules for the "mov"
     410             :   // alias.
     411             : 
     412             :   // Try a single ORR.
     413        1804 :   uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
     414             :   uint64_t Encoding;
     415        1804 :   if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
     416        1117 :     unsigned Opc = (BitSize == 32 ? AArch64::ORRWri : AArch64::ORRXri);
     417             :     MachineInstrBuilder MIB =
     418        2234 :         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
     419        1117 :             .add(MI.getOperand(0))
     420        1182 :             .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
     421        1117 :             .addImm(Encoding);
     422        1117 :     transferImpOps(MI, MIB, MIB);
     423        1117 :     MI.eraseFromParent();
     424             :     return true;
     425             :   }
     426             : 
     427             :   // Two instruction sequences.
     428             :   //
     429             :   // Prefer MOVZ/MOVN followed by MOVK; it's more readable, and possibly the
     430             :   // fastest sequence with fast literal generation.
     431         687 :   if (OneChunks >= (BitSize / 16) - 2 || ZeroChunks >= (BitSize / 16) - 2)
     432         629 :     return expandMOVImmSimple(MBB, MBBI, BitSize, OneChunks, ZeroChunks);
     433             : 
     434             :   assert(BitSize == 64 && "All 32-bit immediates can be expanded with a"
     435             :                           "MOVZ/MOVK pair");
     436             : 
     437             :   // Try other two-instruction sequences.
     438             : 
     439             :   // 64-bit ORR followed by MOVK.
     440             :   // We try to construct the ORR immediate in three different ways: either we
     441             :   // zero out the chunk which will be replaced, we fill the chunk which will
     442             :   // be replaced with ones, or we take the bit pattern from the other half of
     443             :   // the 64-bit immediate. This is comprehensive because of the way ORR
     444             :   // immediates are constructed.
     445         230 :   for (unsigned Shift = 0; Shift < BitSize; Shift += 16) {
     446         191 :     uint64_t ShiftedMask = (0xFFFFULL << Shift);
     447         191 :     uint64_t ZeroChunk = UImm & ~ShiftedMask;
     448         191 :     uint64_t OneChunk = UImm | ShiftedMask;
     449         191 :     uint64_t RotatedImm = (UImm << 32) | (UImm >> 32);
     450         191 :     uint64_t ReplicateChunk = ZeroChunk | (RotatedImm & ShiftedMask);
     451         378 :     if (AArch64_AM::processLogicalImmediate(ZeroChunk, BitSize, Encoding) ||
     452         376 :         AArch64_AM::processLogicalImmediate(OneChunk, BitSize, Encoding) ||
     453         185 :         AArch64_AM::processLogicalImmediate(ReplicateChunk,
     454             :                                             BitSize, Encoding)) {
     455             :       // Create the ORR-immediate instruction.
     456             :       MachineInstrBuilder MIB =
     457          38 :           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri))
     458          19 :               .add(MI.getOperand(0))
     459          19 :               .addReg(AArch64::XZR)
     460          19 :               .addImm(Encoding);
     461             : 
     462             :       // Create the MOVK instruction.
     463             :       const unsigned Imm16 = getChunk(UImm, Shift / 16);
     464          19 :       const unsigned DstReg = MI.getOperand(0).getReg();
     465             :       const bool DstIsDead = MI.getOperand(0).isDead();
     466             :       MachineInstrBuilder MIB1 =
     467          38 :           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
     468          19 :               .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
     469          19 :               .addReg(DstReg)
     470          19 :               .addImm(Imm16)
     471          19 :               .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift));
     472             : 
     473          19 :       transferImpOps(MI, MIB, MIB1);
     474          19 :       MI.eraseFromParent();
     475             :       return true;
     476             :     }
     477             :   }
     478             : 
     479             :   // FIXME: Add more two-instruction sequences.
     480             : 
     481             :   // Three instruction sequences.
     482             :   //
     483             :   // Prefer MOVZ/MOVN followed by two MOVK; it's more readable, and possibly
     484             :   // the fastest sequence with fast literal generation. (If neither MOVK is
     485             :   // part of a fast literal generation pair, it could be slower than the
     486             :   // four-instruction sequence, but we won't worry about that for now.)
     487          39 :   if (OneChunks || ZeroChunks)
     488          12 :     return expandMOVImmSimple(MBB, MBBI, BitSize, OneChunks, ZeroChunks);
     489             : 
     490             :   // Check for identical 16-bit chunks within the constant and if so materialize
     491             :   // them with a single ORR instruction. The remaining one or two 16-bit chunks
     492             :   // will be materialized with MOVK instructions.
     493          27 :   if (BitSize == 64 && tryToreplicateChunks(UImm, MI, MBB, MBBI, TII))
     494             :     return true;
     495             : 
     496             :   // Check whether the constant contains a sequence of contiguous ones, which
     497             :   // might be interrupted by one or two chunks. If so, materialize the sequence
     498             :   // of contiguous ones with an ORR instruction. Materialize the chunks which
     499             :   // are either interrupting the sequence or outside of the sequence with a
     500             :   // MOVK instruction.
     501          22 :   if (BitSize == 64 && trySequenceOfOnes(UImm, MI, MBB, MBBI, TII))
     502             :     return true;
     503             : 
     504             :   // We found no possible two or three instruction sequence; use the general
     505             :   // four-instruction sequence.
     506          18 :   return expandMOVImmSimple(MBB, MBBI, BitSize, OneChunks, ZeroChunks);
     507             : }
     508             : 
     509             : /// \brief Expand a MOVi32imm or MOVi64imm pseudo instruction to a
     510             : /// MOVZ or MOVN of width BitSize followed by up to 3 MOVK instructions.
     511           0 : bool AArch64ExpandPseudo::expandMOVImmSimple(MachineBasicBlock &MBB,
     512             :                                              MachineBasicBlock::iterator MBBI,
     513             :                                              unsigned BitSize,
     514             :                                              unsigned OneChunks,
     515             :                                              unsigned ZeroChunks) {
     516             :   MachineInstr &MI = *MBBI;
     517           0 :   unsigned DstReg = MI.getOperand(0).getReg();
     518           0 :   uint64_t Imm = MI.getOperand(1).getImm();
     519             :   const unsigned Mask = 0xFFFF;
     520             : 
     521             :   // Use a MOVZ or MOVN instruction to set the high bits, followed by one or
     522             :   // more MOVK instructions to insert additional 16-bit portions into the
     523             :   // lower bits.
     524             :   bool isNeg = false;
     525             : 
     526             :   // Use MOVN to materialize the high bits if we have more all one chunks
     527             :   // than all zero chunks.
     528           0 :   if (OneChunks > ZeroChunks) {
     529             :     isNeg = true;
     530           0 :     Imm = ~Imm;
     531             :   }
     532             : 
     533             :   unsigned FirstOpc;
     534           0 :   if (BitSize == 32) {
     535           0 :     Imm &= (1LL << 32) - 1;
     536           0 :     FirstOpc = (isNeg ? AArch64::MOVNWi : AArch64::MOVZWi);
     537             :   } else {
     538           0 :     FirstOpc = (isNeg ? AArch64::MOVNXi : AArch64::MOVZXi);
     539             :   }
     540             :   unsigned Shift = 0;     // LSL amount for high bits with MOVZ/MOVN
     541             :   unsigned LastShift = 0; // LSL amount for last MOVK
     542           0 :   if (Imm != 0) {
     543           0 :     unsigned LZ = countLeadingZeros(Imm);
     544           0 :     unsigned TZ = countTrailingZeros(Imm);
     545           0 :     Shift = (TZ / 16) * 16;
     546           0 :     LastShift = ((63 - LZ) / 16) * 16;
     547             :   }
     548           0 :   unsigned Imm16 = (Imm >> Shift) & Mask;
     549             :   bool DstIsDead = MI.getOperand(0).isDead();
     550             :   MachineInstrBuilder MIB1 =
     551           0 :       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(FirstOpc))
     552             :           .addReg(DstReg, RegState::Define |
     553           0 :                   getDeadRegState(DstIsDead && Shift == LastShift))
     554           0 :           .addImm(Imm16)
     555           0 :           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift));
     556             : 
     557             :   // If a MOVN was used for the high bits of a negative value, flip the rest
     558             :   // of the bits back for use with MOVK.
     559           0 :   if (isNeg)
     560           0 :     Imm = ~Imm;
     561             : 
     562           0 :   if (Shift == LastShift) {
     563           0 :     transferImpOps(MI, MIB1, MIB1);
     564           0 :     MI.eraseFromParent();
     565           0 :     return true;
     566             :   }
     567             : 
     568           0 :   MachineInstrBuilder MIB2;
     569           0 :   unsigned Opc = (BitSize == 32 ? AArch64::MOVKWi : AArch64::MOVKXi);
     570           0 :   while (Shift < LastShift) {
     571           0 :     Shift += 16;
     572           0 :     Imm16 = (Imm >> Shift) & Mask;
     573           0 :     if (Imm16 == (isNeg ? Mask : 0))
     574           0 :       continue; // This 16-bit portion is already set correctly.
     575           0 :     MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
     576             :                .addReg(DstReg,
     577             :                        RegState::Define |
     578           0 :                        getDeadRegState(DstIsDead && Shift == LastShift))
     579           0 :                .addReg(DstReg)
     580           0 :                .addImm(Imm16)
     581           0 :                .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift));
     582             :   }
     583             : 
     584           0 :   transferImpOps(MI, MIB1, MIB2);
     585           0 :   MI.eraseFromParent();
     586           0 :   return true;
     587             : }
     588             : 
     589           0 : bool AArch64ExpandPseudo::expandCMP_SWAP(
     590             :     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp,
     591             :     unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg,
     592             :     MachineBasicBlock::iterator &NextMBBI) {
     593             :   MachineInstr &MI = *MBBI;
     594             :   DebugLoc DL = MI.getDebugLoc();
     595           0 :   const MachineOperand &Dest = MI.getOperand(0);
     596           0 :   unsigned StatusReg = MI.getOperand(1).getReg();
     597             :   bool StatusDead = MI.getOperand(1).isDead();
     598             :   // Duplicating undef operands into 2 instructions does not guarantee the same
     599             :   // value on both; However undef should be replaced by xzr anyway.
     600             :   assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
     601           0 :   unsigned AddrReg = MI.getOperand(2).getReg();
     602           0 :   unsigned DesiredReg = MI.getOperand(3).getReg();
     603           0 :   unsigned NewReg = MI.getOperand(4).getReg();
     604             : 
     605           0 :   MachineFunction *MF = MBB.getParent();
     606           0 :   auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
     607           0 :   auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
     608           0 :   auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
     609             : 
     610           0 :   MF->insert(++MBB.getIterator(), LoadCmpBB);
     611           0 :   MF->insert(++LoadCmpBB->getIterator(), StoreBB);
     612           0 :   MF->insert(++StoreBB->getIterator(), DoneBB);
     613             : 
     614             :   // .Lloadcmp:
     615             :   //     mov wStatus, 0
     616             :   //     ldaxr xDest, [xAddr]
     617             :   //     cmp xDest, xDesired
     618             :   //     b.ne .Ldone
     619           0 :   if (!StatusDead)
     620           0 :     BuildMI(LoadCmpBB, DL, TII->get(AArch64::MOVZWi), StatusReg)
     621             :       .addImm(0).addImm(0);
     622           0 :   BuildMI(LoadCmpBB, DL, TII->get(LdarOp), Dest.getReg())
     623           0 :       .addReg(AddrReg);
     624           0 :   BuildMI(LoadCmpBB, DL, TII->get(CmpOp), ZeroReg)
     625           0 :       .addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
     626           0 :       .addReg(DesiredReg)
     627           0 :       .addImm(ExtendImm);
     628           0 :   BuildMI(LoadCmpBB, DL, TII->get(AArch64::Bcc))
     629             :       .addImm(AArch64CC::NE)
     630             :       .addMBB(DoneBB)
     631           0 :       .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill);
     632           0 :   LoadCmpBB->addSuccessor(DoneBB);
     633           0 :   LoadCmpBB->addSuccessor(StoreBB);
     634             : 
     635             :   // .Lstore:
     636             :   //     stlxr wStatus, xNew, [xAddr]
     637             :   //     cbnz wStatus, .Lloadcmp
     638           0 :   BuildMI(StoreBB, DL, TII->get(StlrOp), StatusReg)
     639           0 :       .addReg(NewReg)
     640           0 :       .addReg(AddrReg);
     641           0 :   BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW))
     642           0 :       .addReg(StatusReg, getKillRegState(StatusDead))
     643             :       .addMBB(LoadCmpBB);
     644           0 :   StoreBB->addSuccessor(LoadCmpBB);
     645           0 :   StoreBB->addSuccessor(DoneBB);
     646             : 
     647             :   DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
     648           0 :   DoneBB->transferSuccessors(&MBB);
     649             : 
     650           0 :   MBB.addSuccessor(LoadCmpBB);
     651             : 
     652           0 :   NextMBBI = MBB.end();
     653           0 :   MI.eraseFromParent();
     654             : 
     655             :   // Recompute livein lists.
     656             :   LivePhysRegs LiveRegs;
     657           0 :   computeAndAddLiveIns(LiveRegs, *DoneBB);
     658           0 :   computeAndAddLiveIns(LiveRegs, *StoreBB);
     659           0 :   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
     660             :   // Do an extra pass around the loop to get loop carried registers right.
     661           0 :   StoreBB->clearLiveIns();
     662           0 :   computeAndAddLiveIns(LiveRegs, *StoreBB);
     663           0 :   LoadCmpBB->clearLiveIns();
     664           0 :   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
     665             : 
     666           0 :   return true;
     667             : }
     668             : 
     669           0 : bool AArch64ExpandPseudo::expandCMP_SWAP_128(
     670             :     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
     671             :     MachineBasicBlock::iterator &NextMBBI) {
     672             :   MachineInstr &MI = *MBBI;
     673             :   DebugLoc DL = MI.getDebugLoc();
     674           0 :   MachineOperand &DestLo = MI.getOperand(0);
     675             :   MachineOperand &DestHi = MI.getOperand(1);
     676           0 :   unsigned StatusReg = MI.getOperand(2).getReg();
     677             :   bool StatusDead = MI.getOperand(2).isDead();
     678             :   // Duplicating undef operands into 2 instructions does not guarantee the same
     679             :   // value on both; However undef should be replaced by xzr anyway.
     680             :   assert(!MI.getOperand(3).isUndef() && "cannot handle undef");
     681           0 :   unsigned AddrReg = MI.getOperand(3).getReg();
     682           0 :   unsigned DesiredLoReg = MI.getOperand(4).getReg();
     683           0 :   unsigned DesiredHiReg = MI.getOperand(5).getReg();
     684           0 :   unsigned NewLoReg = MI.getOperand(6).getReg();
     685           0 :   unsigned NewHiReg = MI.getOperand(7).getReg();
     686             : 
     687           0 :   MachineFunction *MF = MBB.getParent();
     688           0 :   auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
     689           0 :   auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
     690           0 :   auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
     691             : 
     692           0 :   MF->insert(++MBB.getIterator(), LoadCmpBB);
     693           0 :   MF->insert(++LoadCmpBB->getIterator(), StoreBB);
     694           0 :   MF->insert(++StoreBB->getIterator(), DoneBB);
     695             : 
     696             :   // .Lloadcmp:
     697             :   //     ldaxp xDestLo, xDestHi, [xAddr]
     698             :   //     cmp xDestLo, xDesiredLo
     699             :   //     sbcs xDestHi, xDesiredHi
     700             :   //     b.ne .Ldone
     701           0 :   BuildMI(LoadCmpBB, DL, TII->get(AArch64::LDAXPX))
     702           0 :       .addReg(DestLo.getReg(), RegState::Define)
     703           0 :       .addReg(DestHi.getReg(), RegState::Define)
     704           0 :       .addReg(AddrReg);
     705           0 :   BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR)
     706           0 :       .addReg(DestLo.getReg(), getKillRegState(DestLo.isDead()))
     707           0 :       .addReg(DesiredLoReg)
     708             :       .addImm(0);
     709           0 :   BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg)
     710             :     .addUse(AArch64::WZR)
     711             :     .addUse(AArch64::WZR)
     712             :     .addImm(AArch64CC::EQ);
     713           0 :   BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR)
     714           0 :       .addReg(DestHi.getReg(), getKillRegState(DestHi.isDead()))
     715           0 :       .addReg(DesiredHiReg)
     716             :       .addImm(0);
     717           0 :   BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg)
     718             :       .addUse(StatusReg, RegState::Kill)
     719             :       .addUse(StatusReg, RegState::Kill)
     720             :       .addImm(AArch64CC::EQ);
     721           0 :   BuildMI(LoadCmpBB, DL, TII->get(AArch64::CBNZW))
     722             :       .addUse(StatusReg, getKillRegState(StatusDead))
     723             :       .addMBB(DoneBB);
     724           0 :   LoadCmpBB->addSuccessor(DoneBB);
     725           0 :   LoadCmpBB->addSuccessor(StoreBB);
     726             : 
     727             :   // .Lstore:
     728             :   //     stlxp wStatus, xNewLo, xNewHi, [xAddr]
     729             :   //     cbnz wStatus, .Lloadcmp
     730           0 :   BuildMI(StoreBB, DL, TII->get(AArch64::STLXPX), StatusReg)
     731           0 :       .addReg(NewLoReg)
     732           0 :       .addReg(NewHiReg)
     733           0 :       .addReg(AddrReg);
     734           0 :   BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW))
     735           0 :       .addReg(StatusReg, getKillRegState(StatusDead))
     736             :       .addMBB(LoadCmpBB);
     737           0 :   StoreBB->addSuccessor(LoadCmpBB);
     738           0 :   StoreBB->addSuccessor(DoneBB);
     739             : 
     740             :   DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
     741           0 :   DoneBB->transferSuccessors(&MBB);
     742             : 
     743           0 :   MBB.addSuccessor(LoadCmpBB);
     744             : 
     745           0 :   NextMBBI = MBB.end();
     746           0 :   MI.eraseFromParent();
     747             : 
     748             :   // Recompute liveness bottom up.
     749             :   LivePhysRegs LiveRegs;
     750           0 :   computeAndAddLiveIns(LiveRegs, *DoneBB);
     751           0 :   computeAndAddLiveIns(LiveRegs, *StoreBB);
     752           0 :   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
     753             :   // Do an extra pass in the loop to get the loop carried dependencies right.
     754           0 :   StoreBB->clearLiveIns();
     755           0 :   computeAndAddLiveIns(LiveRegs, *StoreBB);
     756           0 :   LoadCmpBB->clearLiveIns();
     757           0 :   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
     758             : 
     759           0 :   return true;
     760             : }
     761             : 
     762             : /// If MBBI references a pseudo instruction that should be expanded here,
     763             : /// do the expansion and return true.  Otherwise return false.
     764       84378 : bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
     765             :                                    MachineBasicBlock::iterator MBBI,
     766             :                                    MachineBasicBlock::iterator &NextMBBI) {
     767             :   MachineInstr &MI = *MBBI;
     768       84378 :   unsigned Opcode = MI.getOpcode();
     769       84378 :   switch (Opcode) {
     770             :   default:
     771             :     break;
     772             : 
     773        3634 :   case AArch64::ADDWrr:
     774             :   case AArch64::SUBWrr:
     775             :   case AArch64::ADDXrr:
     776             :   case AArch64::SUBXrr:
     777             :   case AArch64::ADDSWrr:
     778             :   case AArch64::SUBSWrr:
     779             :   case AArch64::ADDSXrr:
     780             :   case AArch64::SUBSXrr:
     781             :   case AArch64::ANDWrr:
     782             :   case AArch64::ANDXrr:
     783             :   case AArch64::BICWrr:
     784             :   case AArch64::BICXrr:
     785             :   case AArch64::ANDSWrr:
     786             :   case AArch64::ANDSXrr:
     787             :   case AArch64::BICSWrr:
     788             :   case AArch64::BICSXrr:
     789             :   case AArch64::EONWrr:
     790             :   case AArch64::EONXrr:
     791             :   case AArch64::EORWrr:
     792             :   case AArch64::EORXrr:
     793             :   case AArch64::ORNWrr:
     794             :   case AArch64::ORNXrr:
     795             :   case AArch64::ORRWrr:
     796             :   case AArch64::ORRXrr: {
     797             :     unsigned Opcode;
     798             :     switch (MI.getOpcode()) {
     799             :     default:
     800             :       return false;
     801             :     case AArch64::ADDWrr:      Opcode = AArch64::ADDWrs; break;
     802         167 :     case AArch64::SUBWrr:      Opcode = AArch64::SUBWrs; break;
     803         270 :     case AArch64::ADDXrr:      Opcode = AArch64::ADDXrs; break;
     804         112 :     case AArch64::SUBXrr:      Opcode = AArch64::SUBXrs; break;
     805          26 :     case AArch64::ADDSWrr:     Opcode = AArch64::ADDSWrs; break;
     806         218 :     case AArch64::SUBSWrr:     Opcode = AArch64::SUBSWrs; break;
     807          34 :     case AArch64::ADDSXrr:     Opcode = AArch64::ADDSXrs; break;
     808         109 :     case AArch64::SUBSXrr:     Opcode = AArch64::SUBSXrs; break;
     809         139 :     case AArch64::ANDWrr:      Opcode = AArch64::ANDWrs; break;
     810          40 :     case AArch64::ANDXrr:      Opcode = AArch64::ANDXrs; break;
     811          49 :     case AArch64::BICWrr:      Opcode = AArch64::BICWrs; break;
     812          13 :     case AArch64::BICXrr:      Opcode = AArch64::BICXrs; break;
     813           3 :     case AArch64::ANDSWrr:     Opcode = AArch64::ANDSWrs; break;
     814           2 :     case AArch64::ANDSXrr:     Opcode = AArch64::ANDSXrs; break;
     815           4 :     case AArch64::BICSWrr:     Opcode = AArch64::BICSWrs; break;
     816           0 :     case AArch64::BICSXrr:     Opcode = AArch64::BICSXrs; break;
     817           1 :     case AArch64::EONWrr:      Opcode = AArch64::EONWrs; break;
     818           1 :     case AArch64::EONXrr:      Opcode = AArch64::EONXrs; break;
     819         123 :     case AArch64::EORWrr:      Opcode = AArch64::EORWrs; break;
     820          21 :     case AArch64::EORXrr:      Opcode = AArch64::EORXrs; break;
     821         114 :     case AArch64::ORNWrr:      Opcode = AArch64::ORNWrs; break;
     822          54 :     case AArch64::ORNXrr:      Opcode = AArch64::ORNXrs; break;
     823         877 :     case AArch64::ORRWrr:      Opcode = AArch64::ORRWrs; break;
     824         873 :     case AArch64::ORRXrr:      Opcode = AArch64::ORRXrs; break;
     825             :     }
     826             :     MachineInstrBuilder MIB1 =
     827        3634 :         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode),
     828        7268 :                 MI.getOperand(0).getReg())
     829        3634 :             .add(MI.getOperand(1))
     830        3634 :             .add(MI.getOperand(2))
     831        3634 :             .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
     832        3634 :     transferImpOps(MI, MIB1, MIB1);
     833        3634 :     MI.eraseFromParent();
     834        3634 :     return true;
     835             :   }
     836             : 
     837         269 :   case AArch64::LOADgot: {
     838         269 :     MachineFunction *MF = MBB.getParent();
     839         269 :     unsigned DstReg = MI.getOperand(0).getReg();
     840             :     const MachineOperand &MO1 = MI.getOperand(1);
     841             :     unsigned Flags = MO1.getTargetFlags();
     842             : 
     843         269 :     if (MF->getTarget().getCodeModel() == CodeModel::Tiny) {
     844             :       // Tiny codemodel expand to LDR
     845             :       MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
     846          60 :                                         TII->get(AArch64::LDRXl), DstReg);
     847             : 
     848          30 :       if (MO1.isGlobal()) {
     849          30 :         MIB.addGlobalAddress(MO1.getGlobal(), 0, Flags);
     850           0 :       } else if (MO1.isSymbol()) {
     851           0 :         MIB.addExternalSymbol(MO1.getSymbolName(), Flags);
     852             :       } else {
     853             :         assert(MO1.isCPI() &&
     854             :                "Only expect globals, externalsymbols, or constant pools");
     855           0 :         MIB.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), Flags);
     856             :       }
     857             :     } else {
     858             :       // Small codemodel expand into ADRP + LDR.
     859             :       MachineInstrBuilder MIB1 =
     860         478 :           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg);
     861             :       MachineInstrBuilder MIB2 =
     862         478 :           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRXui))
     863         239 :               .add(MI.getOperand(0))
     864         239 :               .addReg(DstReg);
     865             : 
     866         239 :       if (MO1.isGlobal()) {
     867         239 :         MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE);
     868             :         MIB2.addGlobalAddress(MO1.getGlobal(), 0,
     869         239 :                               Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
     870           0 :       } else if (MO1.isSymbol()) {
     871           0 :         MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE);
     872             :         MIB2.addExternalSymbol(MO1.getSymbolName(), Flags |
     873           0 :                                                         AArch64II::MO_PAGEOFF |
     874           0 :                                                         AArch64II::MO_NC);
     875             :       } else {
     876             :         assert(MO1.isCPI() &&
     877             :                "Only expect globals, externalsymbols, or constant pools");
     878           0 :         MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
     879           0 :                                   Flags | AArch64II::MO_PAGE);
     880           0 :         MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
     881           0 :                                   Flags | AArch64II::MO_PAGEOFF |
     882           0 :                                       AArch64II::MO_NC);
     883             :       }
     884             : 
     885         239 :       transferImpOps(MI, MIB1, MIB2);
     886             :     }
     887         269 :     MI.eraseFromParent();
     888         269 :     return true;
     889             :   }
     890             : 
     891        2023 :   case AArch64::MOVaddr:
     892             :   case AArch64::MOVaddrJT:
     893             :   case AArch64::MOVaddrCP:
     894             :   case AArch64::MOVaddrBA:
     895             :   case AArch64::MOVaddrTLS:
     896             :   case AArch64::MOVaddrEXT: {
     897             :     // Expand into ADRP + ADD.
     898        2023 :     unsigned DstReg = MI.getOperand(0).getReg();
     899             :     MachineInstrBuilder MIB1 =
     900        4046 :         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
     901        2023 :             .add(MI.getOperand(1));
     902             : 
     903             :     MachineInstrBuilder MIB2 =
     904        4046 :         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
     905        2023 :             .add(MI.getOperand(0))
     906        2023 :             .addReg(DstReg)
     907        2023 :             .add(MI.getOperand(2))
     908        2023 :             .addImm(0);
     909             : 
     910        2023 :     transferImpOps(MI, MIB1, MIB2);
     911        2023 :     MI.eraseFromParent();
     912             :     return true;
     913             :   }
     914           1 :   case AArch64::ADDlowTLS:
     915             :     // Produce a plain ADD
     916           2 :     BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
     917           1 :         .add(MI.getOperand(0))
     918           1 :         .add(MI.getOperand(1))
     919           1 :         .add(MI.getOperand(2))
     920             :         .addImm(0);
     921           1 :     MI.eraseFromParent();
     922           1 :     return true;
     923             : 
     924          50 :   case AArch64::MOVbaseTLS: {
     925          50 :     unsigned DstReg = MI.getOperand(0).getReg();
     926             :     auto SysReg = AArch64SysReg::TPIDR_EL0;
     927          50 :     MachineFunction *MF = MBB.getParent();
     928         104 :     if (MF->getTarget().getTargetTriple().isOSFuchsia() &&
     929           4 :         MF->getTarget().getCodeModel() == CodeModel::Kernel)
     930             :       SysReg = AArch64SysReg::TPIDR_EL1;
     931         100 :     BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg)
     932          50 :         .addImm(SysReg);
     933          50 :     MI.eraseFromParent();
     934          50 :     return true;
     935             :   }
     936             : 
     937        1590 :   case AArch64::MOVi32imm:
     938        1590 :     return expandMOVImm(MBB, MBBI, 32);
     939         216 :   case AArch64::MOVi64imm:
     940         216 :     return expandMOVImm(MBB, MBBI, 64);
     941       14878 :   case AArch64::RET_ReallyLR: {
     942             :     // Hiding the LR use with RET_ReallyLR may lead to extra kills in the
     943             :     // function and missing live-ins. We are fine in practice because callee
     944             :     // saved register handling ensures the register value is restored before
     945             :     // RET, but we need the undef flag here to appease the MachineVerifier
     946             :     // liveness checks.
     947             :     MachineInstrBuilder MIB =
     948       29756 :         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET))
     949       14878 :           .addReg(AArch64::LR, RegState::Undef);
     950       14878 :     transferImpOps(MI, MIB, MIB);
     951       14878 :     MI.eraseFromParent();
     952             :     return true;
     953             :   }
     954             :   case AArch64::CMP_SWAP_8:
     955           1 :     return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRB, AArch64::STLXRB,
     956             :                           AArch64::SUBSWrx,
     957             :                           AArch64_AM::getArithExtendImm(AArch64_AM::UXTB, 0),
     958           1 :                           AArch64::WZR, NextMBBI);
     959             :   case AArch64::CMP_SWAP_16:
     960           1 :     return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRH, AArch64::STLXRH,
     961             :                           AArch64::SUBSWrx,
     962             :                           AArch64_AM::getArithExtendImm(AArch64_AM::UXTH, 0),
     963           1 :                           AArch64::WZR, NextMBBI);
     964             :   case AArch64::CMP_SWAP_32:
     965           3 :     return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRW, AArch64::STLXRW,
     966             :                           AArch64::SUBSWrs,
     967             :                           AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
     968           3 :                           AArch64::WZR, NextMBBI);
     969             :   case AArch64::CMP_SWAP_64:
     970           2 :     return expandCMP_SWAP(MBB, MBBI,
     971             :                           AArch64::LDAXRX, AArch64::STLXRX, AArch64::SUBSXrs,
     972             :                           AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
     973           2 :                           AArch64::XZR, NextMBBI);
     974           2 :   case AArch64::CMP_SWAP_128:
     975           2 :     return expandCMP_SWAP_128(MBB, MBBI, NextMBBI);
     976             : 
     977         180 :   case AArch64::AESMCrrTied:
     978             :   case AArch64::AESIMCrrTied: {
     979             :     MachineInstrBuilder MIB =
     980             :     BuildMI(MBB, MBBI, MI.getDebugLoc(),
     981         180 :             TII->get(Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr :
     982         340 :                                                       AArch64::AESIMCrr))
     983         180 :       .add(MI.getOperand(0))
     984         180 :       .add(MI.getOperand(1));
     985         180 :     transferImpOps(MI, MIB, MIB);
     986         180 :     MI.eraseFromParent();
     987             :     return true;
     988             :    }
     989             :   }
     990             :   return false;
     991             : }
     992             : 
     993             : /// Iterate over the instructions in basic block MBB and expand any
     994             : /// pseudo instructions.  Return true if anything was modified.
     995       17111 : bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
     996             :   bool Modified = false;
     997             : 
     998             :   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
     999      101489 :   while (MBBI != E) {
    1000       84378 :     MachineBasicBlock::iterator NMBBI = std::next(MBBI);
    1001       84378 :     Modified |= expandMI(MBB, MBBI, NMBBI);
    1002       84378 :     MBBI = NMBBI;
    1003             :   }
    1004             : 
    1005       17111 :   return Modified;
    1006             : }
    1007             : 
    1008       14771 : bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
    1009       14771 :   TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
    1010             : 
    1011             :   bool Modified = false;
    1012       31882 :   for (auto &MBB : MF)
    1013       17111 :     Modified |= expandMBB(MBB);
    1014       14771 :   return Modified;
    1015             : }
    1016             : 
    1017             : /// Returns an instance of the pseudo instruction expansion pass.
    1018        1223 : FunctionPass *llvm::createAArch64ExpandPseudoPass() {
    1019        1223 :   return new AArch64ExpandPseudo();
    1020             : }

Generated by: LCOV version 1.13