LLVM  14.0.0git
AArch64MIPeepholeOpt.cpp
Go to the documentation of this file.
1 //===- AArch64MIPeepholeOpt.cpp - AArch64 MI peephole optimization pass ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs below peephole optimizations on MIR level.
10 //
11 // 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri
12 // MOVi64imm + ANDXrr ==> ANDXri + ANDXri
13 //
14 // The mov pseudo instruction could be expanded to multiple mov instructions
15 // later. In this case, we could try to split the constant operand of mov
16 // instruction into two bitmask immediates. It makes two AND instructions
17 // intead of multiple `mov` + `and` instructions.
18 //
19 // 2. Remove redundant ORRWrs which is generated by zero-extend.
20 //
21 // %3:gpr32 = ORRWrs $wzr, %2, 0
22 // %4:gpr64 = SUBREG_TO_REG 0, %3, %subreg.sub_32
23 //
24 // If AArch64's 32-bit form of instruction defines the source operand of
25 // ORRWrs, we can remove the ORRWrs because the upper 32 bits of the source
26 // operand are set to zero.
27 //
28 //===----------------------------------------------------------------------===//
29 
30 #include "AArch64ExpandImm.h"
31 #include "AArch64InstrInfo.h"
33 #include "llvm/ADT/SetVector.h"
36 
37 using namespace llvm;
38 
39 #define DEBUG_TYPE "aarch64-mi-peephole-opt"
40 
41 namespace {
42 
43 struct AArch64MIPeepholeOpt : public MachineFunctionPass {
44  static char ID;
45 
46  AArch64MIPeepholeOpt() : MachineFunctionPass(ID) {
48  }
49 
50  const AArch64InstrInfo *TII;
51  MachineLoopInfo *MLI;
53 
54  template <typename T>
55  bool visitAND(MachineInstr &MI,
57  bool visitORR(MachineInstr &MI,
59  bool runOnMachineFunction(MachineFunction &MF) override;
60 
61  StringRef getPassName() const override {
62  return "AArch64 MI Peephole Optimization pass";
63  }
64 
65  void getAnalysisUsage(AnalysisUsage &AU) const override {
66  AU.setPreservesCFG();
69  }
70 };
71 
73 
74 } // end anonymous namespace
75 
76 INITIALIZE_PASS(AArch64MIPeepholeOpt, "aarch64-mi-peephole-opt",
77  "AArch64 MI Peephole Optimization", false, false)
78 
79 template <typename T>
80 static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) {
81  T UImm = static_cast<T>(Imm);
83  return false;
84 
85  // If this immediate can be handled by one instruction, do not split it.
88  if (Insn.size() == 1)
89  return false;
90 
91  // The bitmask immediate consists of consecutive ones. Let's say there is
92  // constant 0b00000000001000000000010000000000 which does not consist of
93  // consecutive ones. We can split it in to two bitmask immediate like
94  // 0b00000000001111111111110000000000 and 0b11111111111000000000011111111111.
95  // If we do AND with these two bitmask immediate, we can see original one.
96  unsigned LowestBitSet = countTrailingZeros(UImm);
97  unsigned HighestBitSet = Log2_64(UImm);
98 
99  // Create a mask which is filled with one from the position of lowest bit set
100  // to the position of highest bit set.
101  T NewImm1 = (static_cast<T>(2) << HighestBitSet) -
102  (static_cast<T>(1) << LowestBitSet);
103  // Create a mask which is filled with one outside the position of lowest bit
104  // set and the position of highest bit set.
105  T NewImm2 = UImm | ~NewImm1;
106 
107  // If the split value is not valid bitmask immediate, do not split this
108  // constant.
110  return false;
111 
114  return true;
115 }
116 
117 template <typename T>
118 bool AArch64MIPeepholeOpt::visitAND(
120  // Try below transformation.
121  //
122  // MOVi32imm + ANDWrr ==> ANDWri + ANDWri
123  // MOVi64imm + ANDXrr ==> ANDXri + ANDXri
124  //
125  // The mov pseudo instruction could be expanded to multiple mov instructions
126  // later. Let's try to split the constant operand of mov instruction into two
127  // bitmask immediates. It makes only two AND instructions intead of multiple
128  // mov + and instructions.
129 
130  unsigned RegSize = sizeof(T) * 8;
131  assert((RegSize == 32 || RegSize == 64) &&
132  "Invalid RegSize for AND bitmask peephole optimization");
133 
134  // Check whether AND's MBB is in loop and the AND is loop invariant.
135  MachineBasicBlock *MBB = MI.getParent();
136  MachineLoop *L = MLI->getLoopFor(MBB);
137  if (L && !L->isLoopInvariant(MI))
138  return false;
139 
140  // Check whether AND's operand is MOV with immediate.
141  MachineInstr *MovMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
142  if (!MovMI)
143  return false;
144 
145  MachineInstr *SubregToRegMI = nullptr;
146  // If it is SUBREG_TO_REG, check its operand.
147  if (MovMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) {
148  SubregToRegMI = MovMI;
149  MovMI = MRI->getUniqueVRegDef(MovMI->getOperand(2).getReg());
150  if (!MovMI)
151  return false;
152  }
153 
154  if (MovMI->getOpcode() != AArch64::MOVi32imm &&
155  MovMI->getOpcode() != AArch64::MOVi64imm)
156  return false;
157 
158  // If the MOV has multiple uses, do not split the immediate because it causes
159  // more instructions.
160  if (!MRI->hasOneUse(MovMI->getOperand(0).getReg()))
161  return false;
162 
163  if (SubregToRegMI && !MRI->hasOneUse(SubregToRegMI->getOperand(0).getReg()))
164  return false;
165 
166  // Split the bitmask immediate into two.
167  T UImm = static_cast<T>(MovMI->getOperand(1).getImm());
168  // For the 32 bit form of instruction, the upper 32 bits of the destination
169  // register are set to zero. If there is SUBREG_TO_REG, set the upper 32 bits
170  // of UImm to zero.
171  if (SubregToRegMI)
172  UImm &= 0xFFFFFFFF;
173  T Imm1Enc;
174  T Imm2Enc;
175  if (!splitBitmaskImm(UImm, RegSize, Imm1Enc, Imm2Enc))
176  return false;
177 
178  // Create new AND MIs.
179  DebugLoc DL = MI.getDebugLoc();
180  const TargetRegisterClass *ANDImmRC =
181  (RegSize == 32) ? &AArch64::GPR32spRegClass : &AArch64::GPR64spRegClass;
182  Register DstReg = MI.getOperand(0).getReg();
183  Register SrcReg = MI.getOperand(1).getReg();
184  Register NewTmpReg = MRI->createVirtualRegister(ANDImmRC);
185  Register NewDstReg = MRI->createVirtualRegister(ANDImmRC);
186  unsigned Opcode = (RegSize == 32) ? AArch64::ANDWri : AArch64::ANDXri;
187 
188  MRI->constrainRegClass(NewTmpReg, MRI->getRegClass(SrcReg));
189  BuildMI(*MBB, MI, DL, TII->get(Opcode), NewTmpReg)
190  .addReg(SrcReg)
191  .addImm(Imm1Enc);
192 
193  MRI->constrainRegClass(NewDstReg, MRI->getRegClass(DstReg));
194  BuildMI(*MBB, MI, DL, TII->get(Opcode), NewDstReg)
195  .addReg(NewTmpReg)
196  .addImm(Imm2Enc);
197 
198  MRI->replaceRegWith(DstReg, NewDstReg);
199  // replaceRegWith changes MI's definition register. Keep it for SSA form until
200  // deleting MI.
201  MI.getOperand(0).setReg(DstReg);
202 
203  ToBeRemoved.insert(&MI);
204  if (SubregToRegMI)
205  ToBeRemoved.insert(SubregToRegMI);
206  ToBeRemoved.insert(MovMI);
207 
208  return true;
209 }
210 
211 bool AArch64MIPeepholeOpt::visitORR(
213  // Check this ORR comes from below zero-extend pattern.
214  //
215  // def : Pat<(i64 (zext GPR32:$src)),
216  // (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>;
217  if (MI.getOperand(3).getImm() != 0)
218  return false;
219 
220  if (MI.getOperand(1).getReg() != AArch64::WZR)
221  return false;
222 
223  MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
224  if (!SrcMI)
225  return false;
226 
227  // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC
228  //
229  // When you use the 32-bit form of an instruction, the upper 32 bits of the
230  // source registers are ignored and the upper 32 bits of the destination
231  // register are set to zero.
232  //
233  // If AArch64's 32-bit form of instruction defines the source operand of
234  // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is
235  // real AArch64 instruction and if it is not, do not process the opcode
236  // conservatively.
237  if (SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END)
238  return false;
239 
240  Register DefReg = MI.getOperand(0).getReg();
241  Register SrcReg = MI.getOperand(2).getReg();
242  MRI->replaceRegWith(DefReg, SrcReg);
243  MRI->clearKillFlags(SrcReg);
244  // replaceRegWith changes MI's definition register. Keep it for SSA form until
245  // deleting MI.
246  MI.getOperand(0).setReg(DefReg);
247  ToBeRemoved.insert(&MI);
248 
249  LLVM_DEBUG({ dbgs() << "Removed: " << MI << "\n"; });
250 
251  return true;
252 }
253 
254 bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
255  if (skipFunction(MF.getFunction()))
256  return false;
257 
258  TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
259  MLI = &getAnalysis<MachineLoopInfo>();
260  MRI = &MF.getRegInfo();
261 
262  if (!MRI->isSSA())
263  return false;
264 
265  bool Changed = false;
267 
268  for (MachineBasicBlock &MBB : MF) {
269  for (MachineInstr &MI : MBB) {
270  switch (MI.getOpcode()) {
271  default:
272  break;
273  case AArch64::ANDWrr:
274  Changed = visitAND<uint32_t>(MI, ToBeRemoved);
275  break;
276  case AArch64::ANDXrr:
277  Changed = visitAND<uint64_t>(MI, ToBeRemoved);
278  break;
279  case AArch64::ORRWrs:
280  Changed = visitORR(MI, ToBeRemoved);
281  }
282  }
283  }
284 
285  for (MachineInstr *MI : ToBeRemoved)
286  MI->eraseFromParent();
287 
288  return Changed;
289 }
290 
292  return new AArch64MIPeepholeOpt();
293 }
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:105
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:158
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
T
RegSize
unsigned RegSize
Definition: AArch64MIPeepholeOpt.cpp:80
llvm::TargetSubtargetInfo::getInstrInfo
virtual const TargetInstrInfo * getInstrInfo() const
Definition: TargetSubtargetInfo.h:92
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::MachineRegisterInfo::getUniqueVRegDef
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
Definition: MachineRegisterInfo.cpp:409
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:102
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::MachineLoopInfo
Definition: MachineLoopInfo.h:90
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:644
llvm::Log2_64
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:602
MachineLoopInfo.h
Imm1Enc
unsigned T & Imm1Enc
Definition: AArch64MIPeepholeOpt.cpp:80
AArch64InstrInfo.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::AArch64InstrInfo
Definition: AArch64InstrInfo.h:38
llvm::AArch64_AM::isLogicalImmediate
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
Definition: AArch64AddressingModes.h:276
llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:537
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:499
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
INITIALIZE_PASS
INITIALIZE_PASS(AArch64MIPeepholeOpt, "aarch64-mi-peephole-opt", "AArch64 MI Peephole Optimization", false, false) template< typename T > static bool splitBitmaskImm(T Imm
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:634
llvm::MachineRegisterInfo::isSSA
bool isSSA() const
Definition: MachineRegisterInfo.h:185
AArch64AddressingModes.h
llvm::MachineRegisterInfo::clearKillFlags
void clearKillFlags(Register Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
Definition: MachineRegisterInfo.cpp:429
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:634
llvm::MachineLoop
Definition: MachineLoopInfo.h:45
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
Imm2Enc
unsigned T T & Imm2Enc
Definition: AArch64MIPeepholeOpt.cpp:80
AArch64ExpandImm.h
NewImm2
T NewImm2
Definition: AArch64MIPeepholeOpt.cpp:105
llvm::createAArch64MIPeepholeOptPass
FunctionPass * createAArch64MIPeepholeOptPass()
Definition: AArch64MIPeepholeOpt.cpp:291
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:360
HighestBitSet
unsigned HighestBitSet
Definition: AArch64MIPeepholeOpt.cpp:97
llvm::MachineFunction
Definition: MachineFunction.h:234
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
llvm::AArch64_AM::encodeLogicalImmediate
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
Definition: AArch64AddressingModes.h:283
llvm::countTrailingZeros
unsigned countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: MathExtras.h:156
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:253
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:489
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::MachineRegisterInfo::replaceRegWith
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
Definition: MachineRegisterInfo.cpp:380
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::initializeAArch64MIPeepholeOptPass
void initializeAArch64MIPeepholeOptPass(PassRegistry &)
Insn
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
Definition: AArch64MIPeepholeOpt.cpp:86
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:600
llvm::AArch64_IMM::expandMOVImm
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
Definition: AArch64ExpandImm.cpp:304
NewImm1
T NewImm1
Definition: AArch64MIPeepholeOpt.cpp:101
llvm::MachineRegisterInfo::hasOneUse
bool hasOneUse(Register RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
Definition: MachineRegisterInfo.h:510
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
llvm::MachineRegisterInfo::constrainRegClass
const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
Definition: MachineRegisterInfo.cpp:85
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:307
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::MachineLoop::isLoopInvariant
bool isLoopInvariant(MachineInstr &I) const
Returns true if the instruction is loop invariant.
Definition: MachineLoopInfo.cpp:155
LowestBitSet
unsigned LowestBitSet
Definition: AArch64MIPeepholeOpt.cpp:96
SetVector.h
MachineDominators.h
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38