LLVM 20.0.0git
AArch64MIPeepholeOpt.cpp
Go to the documentation of this file.
1//===- AArch64MIPeepholeOpt.cpp - AArch64 MI peephole optimization pass ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs below peephole optimizations on MIR level.
10//
11// 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri
12// MOVi64imm + ANDXrr ==> ANDXri + ANDXri
13//
14// 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi
15// MOVi64imm + ADDXrr ==> ANDXri + ANDXri
16//
17// 3. MOVi32imm + SUBWrr ==> SUBWRi + SUBWRi
18// MOVi64imm + SUBXrr ==> SUBXri + SUBXri
19//
20// The mov pseudo instruction could be expanded to multiple mov instructions
21// later. In this case, we could try to split the constant operand of mov
22// instruction into two immediates which can be directly encoded into
23// *Wri/*Xri instructions. It makes two AND/ADD/SUB instructions instead of
24// multiple `mov` + `and/add/sub` instructions.
25//
26// 4. Remove redundant ORRWrs which is generated by zero-extend.
27//
28// %3:gpr32 = ORRWrs $wzr, %2, 0
29// %4:gpr64 = SUBREG_TO_REG 0, %3, %subreg.sub_32
30//
31// If AArch64's 32-bit form of instruction defines the source operand of
32// ORRWrs, we can remove the ORRWrs because the upper 32 bits of the source
33// operand are set to zero.
34//
35// 5. %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx
36// ==> %reg:subidx = SUBREG_TO_REG 0, %subreg, subidx
37//
38// 6. %intermediate:gpr32 = COPY %src:fpr128
39// %dst:fpr128 = INSvi32gpr %dst_vec:fpr128, dst_index, %intermediate:gpr32
40// ==> %dst:fpr128 = INSvi32lane %dst_vec:fpr128, dst_index, %src:fpr128, 0
41//
42// In cases where a source FPR is copied to a GPR in order to be copied
43// to a destination FPR, we can directly copy the values between the FPRs,
44// eliminating the use of the Integer unit. When we match a pattern of
45// INSvi[X]gpr that is preceded by a chain of COPY instructions from a FPR
46// source, we use the INSvi[X]lane to replace the COPY & INSvi[X]gpr
47// instructions.
48//
49// 7. If MI sets zero for high 64-bits implicitly, remove `mov 0` for high
50// 64-bits. For example,
51//
52// %1:fpr64 = nofpexcept FCVTNv4i16 %0:fpr128, implicit $fpcr
53// %2:fpr64 = MOVID 0
54// %4:fpr128 = IMPLICIT_DEF
55// %3:fpr128 = INSERT_SUBREG %4:fpr128(tied-def 0), %2:fpr64, %subreg.dsub
56// %6:fpr128 = IMPLICIT_DEF
57// %5:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), %1:fpr64, %subreg.dsub
58// %7:fpr128 = INSvi64lane %5:fpr128(tied-def 0), 1, %3:fpr128, 0
59// ==>
60// %1:fpr64 = nofpexcept FCVTNv4i16 %0:fpr128, implicit $fpcr
61// %6:fpr128 = IMPLICIT_DEF
62// %7:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), %1:fpr64, %subreg.dsub
63//
64// 8. Remove redundant CSELs that select between identical registers, by
65// replacing them with unconditional moves.
66//
67// 9. Replace UBFMXri with UBFMWri if the instruction is equivalent to a 32 bit
68// LSR or LSL alias of UBFM.
69//
70//===----------------------------------------------------------------------===//
71
72#include "AArch64ExpandImm.h"
73#include "AArch64InstrInfo.h"
77
78using namespace llvm;
79
80#define DEBUG_TYPE "aarch64-mi-peephole-opt"
81
82namespace {
83
84struct AArch64MIPeepholeOpt : public MachineFunctionPass {
85 static char ID;
86
87 AArch64MIPeepholeOpt() : MachineFunctionPass(ID) {
89 }
90
91 const AArch64InstrInfo *TII;
93 MachineLoopInfo *MLI;
95
96 using OpcodePair = std::pair<unsigned, unsigned>;
97 template <typename T>
98 using SplitAndOpcFunc =
99 std::function<std::optional<OpcodePair>(T, unsigned, T &, T &)>;
100 using BuildMIFunc =
101 std::function<void(MachineInstr &, OpcodePair, unsigned, unsigned,
103
104 /// For instructions where an immediate operand could be split into two
105 /// separate immediate instructions, use the splitTwoPartImm two handle the
106 /// optimization.
107 ///
108 /// To implement, the following function types must be passed to
109 /// splitTwoPartImm. A SplitAndOpcFunc must be implemented that determines if
110 /// splitting the immediate is valid and returns the associated new opcode. A
111 /// BuildMIFunc must be implemented to build the two immediate instructions.
112 ///
113 /// Example Pattern (where IMM would require 2+ MOV instructions):
114 /// %dst = <Instr>rr %src IMM [...]
115 /// becomes:
116 /// %tmp = <Instr>ri %src (encode half IMM) [...]
117 /// %dst = <Instr>ri %tmp (encode half IMM) [...]
118 template <typename T>
119 bool splitTwoPartImm(MachineInstr &MI,
120 SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr);
121
122 bool checkMovImmInstr(MachineInstr &MI, MachineInstr *&MovMI,
123 MachineInstr *&SubregToRegMI);
124
125 template <typename T>
126 bool visitADDSUB(unsigned PosOpc, unsigned NegOpc, MachineInstr &MI);
127 template <typename T>
128 bool visitADDSSUBS(OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI);
129
130 template <typename T>
131 bool visitAND(unsigned Opc, MachineInstr &MI);
132 bool visitORR(MachineInstr &MI);
133 bool visitCSEL(MachineInstr &MI);
134 bool visitINSERT(MachineInstr &MI);
135 bool visitINSviGPR(MachineInstr &MI, unsigned Opc);
136 bool visitINSvi64lane(MachineInstr &MI);
137 bool visitFMOVDr(MachineInstr &MI);
138 bool visitUBFMXri(MachineInstr &MI);
139 bool visitCopy(MachineInstr &MI);
140 bool runOnMachineFunction(MachineFunction &MF) override;
141
142 StringRef getPassName() const override {
143 return "AArch64 MI Peephole Optimization pass";
144 }
145
146 void getAnalysisUsage(AnalysisUsage &AU) const override {
147 AU.setPreservesCFG();
150 }
151};
152
153char AArch64MIPeepholeOpt::ID = 0;
154
155} // end anonymous namespace
156
157INITIALIZE_PASS(AArch64MIPeepholeOpt, "aarch64-mi-peephole-opt",
158 "AArch64 MI Peephole Optimization", false, false)
159
160template <typename T>
161static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) {
162 T UImm = static_cast<T>(Imm);
164 return false;
165
166 // If this immediate can be handled by one instruction, do not split it.
169 if (Insn.size() == 1)
170 return false;
171
172 // The bitmask immediate consists of consecutive ones. Let's say there is
173 // constant 0b00000000001000000000010000000000 which does not consist of
174 // consecutive ones. We can split it in to two bitmask immediate like
175 // 0b00000000001111111111110000000000 and 0b11111111111000000000011111111111.
176 // If we do AND with these two bitmask immediate, we can see original one.
178 unsigned HighestBitSet = Log2_64(UImm);
179
180 // Create a mask which is filled with one from the position of lowest bit set
181 // to the position of highest bit set.
182 T NewImm1 = (static_cast<T>(2) << HighestBitSet) -
183 (static_cast<T>(1) << LowestBitSet);
184 // Create a mask which is filled with one outside the position of lowest bit
185 // set and the position of highest bit set.
186 T NewImm2 = UImm | ~NewImm1;
187
188 // If the split value is not valid bitmask immediate, do not split this
189 // constant.
191 return false;
192
195 return true;
196}
197
198template <typename T>
199bool AArch64MIPeepholeOpt::visitAND(
200 unsigned Opc, MachineInstr &MI) {
201 // Try below transformation.
202 //
203 // MOVi32imm + ANDWrr ==> ANDWri + ANDWri
204 // MOVi64imm + ANDXrr ==> ANDXri + ANDXri
205 //
206 // The mov pseudo instruction could be expanded to multiple mov instructions
207 // later. Let's try to split the constant operand of mov instruction into two
208 // bitmask immediates. It makes only two AND instructions instead of multiple
209 // mov + and instructions.
210
211 return splitTwoPartImm<T>(
212 MI,
213 [Opc](T Imm, unsigned RegSize, T &Imm0,
214 T &Imm1) -> std::optional<OpcodePair> {
215 if (splitBitmaskImm(Imm, RegSize, Imm0, Imm1))
216 return std::make_pair(Opc, Opc);
217 return std::nullopt;
218 },
219 [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
220 unsigned Imm1, Register SrcReg, Register NewTmpReg,
221 Register NewDstReg) {
222 DebugLoc DL = MI.getDebugLoc();
223 MachineBasicBlock *MBB = MI.getParent();
224 BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
225 .addReg(SrcReg)
226 .addImm(Imm0);
227 BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
228 .addReg(NewTmpReg)
229 .addImm(Imm1);
230 });
231}
232
233bool AArch64MIPeepholeOpt::visitORR(MachineInstr &MI) {
234 // Check this ORR comes from below zero-extend pattern.
235 //
236 // def : Pat<(i64 (zext GPR32:$src)),
237 // (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>;
238 if (MI.getOperand(3).getImm() != 0)
239 return false;
240
241 if (MI.getOperand(1).getReg() != AArch64::WZR)
242 return false;
243
244 MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
245 if (!SrcMI)
246 return false;
247
248 // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC
249 //
250 // When you use the 32-bit form of an instruction, the upper 32 bits of the
251 // source registers are ignored and the upper 32 bits of the destination
252 // register are set to zero.
253 //
254 // If AArch64's 32-bit form of instruction defines the source operand of
255 // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is
256 // real AArch64 instruction and if it is not, do not process the opcode
257 // conservatively.
258 if (SrcMI->getOpcode() == TargetOpcode::COPY &&
259 SrcMI->getOperand(1).getReg().isVirtual()) {
260 const TargetRegisterClass *RC =
261 MRI->getRegClass(SrcMI->getOperand(1).getReg());
262
263 // A COPY from an FPR will become a FMOVSWr, so do so now so that we know
264 // that the upper bits are zero.
265 if (RC != &AArch64::FPR32RegClass &&
266 ((RC != &AArch64::FPR64RegClass && RC != &AArch64::FPR128RegClass) ||
267 SrcMI->getOperand(1).getSubReg() != AArch64::ssub))
268 return false;
269 Register CpySrc = SrcMI->getOperand(1).getReg();
270 if (SrcMI->getOperand(1).getSubReg() == AArch64::ssub) {
271 CpySrc = MRI->createVirtualRegister(&AArch64::FPR32RegClass);
272 BuildMI(*SrcMI->getParent(), SrcMI, SrcMI->getDebugLoc(),
273 TII->get(TargetOpcode::COPY), CpySrc)
274 .add(SrcMI->getOperand(1));
275 }
276 BuildMI(*SrcMI->getParent(), SrcMI, SrcMI->getDebugLoc(),
277 TII->get(AArch64::FMOVSWr), SrcMI->getOperand(0).getReg())
278 .addReg(CpySrc);
279 SrcMI->eraseFromParent();
280 }
281 else if (SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END)
282 return false;
283
284 Register DefReg = MI.getOperand(0).getReg();
285 Register SrcReg = MI.getOperand(2).getReg();
286 MRI->replaceRegWith(DefReg, SrcReg);
287 MRI->clearKillFlags(SrcReg);
288 LLVM_DEBUG(dbgs() << "Removed: " << MI << "\n");
289 MI.eraseFromParent();
290
291 return true;
292}
293
294bool AArch64MIPeepholeOpt::visitCSEL(MachineInstr &MI) {
295 // Replace CSEL with MOV when both inputs are the same register.
296 if (MI.getOperand(1).getReg() != MI.getOperand(2).getReg())
297 return false;
298
299 auto ZeroReg =
300 MI.getOpcode() == AArch64::CSELXr ? AArch64::XZR : AArch64::WZR;
301 auto OrOpcode =
302 MI.getOpcode() == AArch64::CSELXr ? AArch64::ORRXrs : AArch64::ORRWrs;
303
304 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(OrOpcode))
305 .addReg(MI.getOperand(0).getReg(), RegState::Define)
306 .addReg(ZeroReg)
307 .addReg(MI.getOperand(1).getReg())
308 .addImm(0);
309
310 MI.eraseFromParent();
311 return true;
312}
313
314bool AArch64MIPeepholeOpt::visitINSERT(MachineInstr &MI) {
315 // Check this INSERT_SUBREG comes from below zero-extend pattern.
316 //
317 // From %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx
318 // To %reg:subidx = SUBREG_TO_REG 0, %subreg, subidx
319 //
320 // We're assuming the first operand to INSERT_SUBREG is irrelevant because a
321 // COPY would destroy the upper part of the register anyway
322 if (!MI.isRegTiedToDefOperand(1))
323 return false;
324
325 Register DstReg = MI.getOperand(0).getReg();
326 const TargetRegisterClass *RC = MRI->getRegClass(DstReg);
327 MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
328 if (!SrcMI)
329 return false;
330
331 // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC
332 //
333 // When you use the 32-bit form of an instruction, the upper 32 bits of the
334 // source registers are ignored and the upper 32 bits of the destination
335 // register are set to zero.
336 //
337 // If AArch64's 32-bit form of instruction defines the source operand of
338 // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is
339 // real AArch64 instruction and if it is not, do not process the opcode
340 // conservatively.
341 if ((SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END) ||
342 !AArch64::GPR64allRegClass.hasSubClassEq(RC))
343 return false;
344
345 // Build a SUBREG_TO_REG instruction
346 MachineInstr *SubregMI =
347 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
348 TII->get(TargetOpcode::SUBREG_TO_REG), DstReg)
349 .addImm(0)
350 .add(MI.getOperand(2))
351 .add(MI.getOperand(3));
352 LLVM_DEBUG(dbgs() << MI << " replace by:\n: " << *SubregMI << "\n");
353 (void)SubregMI;
354 MI.eraseFromParent();
355
356 return true;
357}
358
359template <typename T>
360static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1) {
361 // The immediate must be in the form of ((imm0 << 12) + imm1), in which both
362 // imm0 and imm1 are non-zero 12-bit unsigned int.
363 if ((Imm & 0xfff000) == 0 || (Imm & 0xfff) == 0 ||
364 (Imm & ~static_cast<T>(0xffffff)) != 0)
365 return false;
366
367 // The immediate can not be composed via a single instruction.
370 if (Insn.size() == 1)
371 return false;
372
373 // Split Imm into (Imm0 << 12) + Imm1;
374 Imm0 = (Imm >> 12) & 0xfff;
375 Imm1 = Imm & 0xfff;
376 return true;
377}
378
379template <typename T>
380bool AArch64MIPeepholeOpt::visitADDSUB(
381 unsigned PosOpc, unsigned NegOpc, MachineInstr &MI) {
382 // Try below transformation.
383 //
384 // ADDWrr X, MOVi32imm ==> ADDWri + ADDWri
385 // ADDXrr X, MOVi64imm ==> ADDXri + ADDXri
386 //
387 // SUBWrr X, MOVi32imm ==> SUBWri + SUBWri
388 // SUBXrr X, MOVi64imm ==> SUBXri + SUBXri
389 //
390 // The mov pseudo instruction could be expanded to multiple mov instructions
391 // later. Let's try to split the constant operand of mov instruction into two
392 // legal add/sub immediates. It makes only two ADD/SUB instructions instead of
393 // multiple `mov` + `and/sub` instructions.
394
395 // We can sometimes have ADDWrr WZR, MULi32imm that have not been constant
396 // folded. Make sure that we don't generate invalid instructions that use XZR
397 // in those cases.
398 if (MI.getOperand(1).getReg() == AArch64::XZR ||
399 MI.getOperand(1).getReg() == AArch64::WZR)
400 return false;
401
402 return splitTwoPartImm<T>(
403 MI,
404 [PosOpc, NegOpc](T Imm, unsigned RegSize, T &Imm0,
405 T &Imm1) -> std::optional<OpcodePair> {
406 if (splitAddSubImm(Imm, RegSize, Imm0, Imm1))
407 return std::make_pair(PosOpc, PosOpc);
408 if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1))
409 return std::make_pair(NegOpc, NegOpc);
410 return std::nullopt;
411 },
412 [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
413 unsigned Imm1, Register SrcReg, Register NewTmpReg,
414 Register NewDstReg) {
415 DebugLoc DL = MI.getDebugLoc();
416 MachineBasicBlock *MBB = MI.getParent();
417 BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
418 .addReg(SrcReg)
419 .addImm(Imm0)
420 .addImm(12);
421 BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
422 .addReg(NewTmpReg)
423 .addImm(Imm1)
424 .addImm(0);
425 });
426}
427
428template <typename T>
429bool AArch64MIPeepholeOpt::visitADDSSUBS(
430 OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI) {
431 // Try the same transformation as ADDSUB but with additional requirement
432 // that the condition code usages are only for Equal and Not Equal
433
434 if (MI.getOperand(1).getReg() == AArch64::XZR ||
435 MI.getOperand(1).getReg() == AArch64::WZR)
436 return false;
437
438 return splitTwoPartImm<T>(
439 MI,
440 [PosOpcs, NegOpcs, &MI, &TRI = TRI,
441 &MRI = MRI](T Imm, unsigned RegSize, T &Imm0,
442 T &Imm1) -> std::optional<OpcodePair> {
443 OpcodePair OP;
444 if (splitAddSubImm(Imm, RegSize, Imm0, Imm1))
445 OP = PosOpcs;
446 else if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1))
447 OP = NegOpcs;
448 else
449 return std::nullopt;
450 // Check conditional uses last since it is expensive for scanning
451 // proceeding instructions
452 MachineInstr &SrcMI = *MRI->getUniqueVRegDef(MI.getOperand(1).getReg());
453 std::optional<UsedNZCV> NZCVUsed = examineCFlagsUse(SrcMI, MI, *TRI);
454 if (!NZCVUsed || NZCVUsed->C || NZCVUsed->V)
455 return std::nullopt;
456 return OP;
457 },
458 [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
459 unsigned Imm1, Register SrcReg, Register NewTmpReg,
460 Register NewDstReg) {
461 DebugLoc DL = MI.getDebugLoc();
462 MachineBasicBlock *MBB = MI.getParent();
463 BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
464 .addReg(SrcReg)
465 .addImm(Imm0)
466 .addImm(12);
467 BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
468 .addReg(NewTmpReg)
469 .addImm(Imm1)
470 .addImm(0);
471 });
472}
473
474// Checks if the corresponding MOV immediate instruction is applicable for
475// this peephole optimization.
476bool AArch64MIPeepholeOpt::checkMovImmInstr(MachineInstr &MI,
477 MachineInstr *&MovMI,
478 MachineInstr *&SubregToRegMI) {
479 // Check whether current MBB is in loop and the AND is loop invariant.
480 MachineBasicBlock *MBB = MI.getParent();
481 MachineLoop *L = MLI->getLoopFor(MBB);
482 if (L && !L->isLoopInvariant(MI))
483 return false;
484
485 // Check whether current MI's operand is MOV with immediate.
486 MovMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
487 if (!MovMI)
488 return false;
489
490 // If it is SUBREG_TO_REG, check its operand.
491 SubregToRegMI = nullptr;
492 if (MovMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) {
493 SubregToRegMI = MovMI;
494 MovMI = MRI->getUniqueVRegDef(MovMI->getOperand(2).getReg());
495 if (!MovMI)
496 return false;
497 }
498
499 if (MovMI->getOpcode() != AArch64::MOVi32imm &&
500 MovMI->getOpcode() != AArch64::MOVi64imm)
501 return false;
502
503 // If the MOV has multiple uses, do not split the immediate because it causes
504 // more instructions.
505 if (!MRI->hasOneUse(MovMI->getOperand(0).getReg()))
506 return false;
507 if (SubregToRegMI && !MRI->hasOneUse(SubregToRegMI->getOperand(0).getReg()))
508 return false;
509
510 // It is OK to perform this peephole optimization.
511 return true;
512}
513
514template <typename T>
515bool AArch64MIPeepholeOpt::splitTwoPartImm(
517 SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr) {
518 unsigned RegSize = sizeof(T) * 8;
519 assert((RegSize == 32 || RegSize == 64) &&
520 "Invalid RegSize for legal immediate peephole optimization");
521
522 // Perform several essential checks against current MI.
523 MachineInstr *MovMI, *SubregToRegMI;
524 if (!checkMovImmInstr(MI, MovMI, SubregToRegMI))
525 return false;
526
527 // Split the immediate to Imm0 and Imm1, and calculate the Opcode.
528 T Imm = static_cast<T>(MovMI->getOperand(1).getImm()), Imm0, Imm1;
529 // For the 32 bit form of instruction, the upper 32 bits of the destination
530 // register are set to zero. If there is SUBREG_TO_REG, set the upper 32 bits
531 // of Imm to zero. This is essential if the Immediate value was a negative
532 // number since it was sign extended when we assign to the 64-bit Imm.
533 if (SubregToRegMI)
534 Imm &= 0xFFFFFFFF;
535 OpcodePair Opcode;
536 if (auto R = SplitAndOpc(Imm, RegSize, Imm0, Imm1))
537 Opcode = *R;
538 else
539 return false;
540
541 // Create new MIs using the first and second opcodes. Opcodes might differ for
542 // flag setting operations that should only set flags on second instruction.
543 // NewTmpReg = Opcode.first SrcReg Imm0
544 // NewDstReg = Opcode.second NewTmpReg Imm1
545
546 // Determine register classes for destinations and register operands
547 MachineFunction *MF = MI.getMF();
548 const TargetRegisterClass *FirstInstrDstRC =
549 TII->getRegClass(TII->get(Opcode.first), 0, TRI, *MF);
550 const TargetRegisterClass *FirstInstrOperandRC =
551 TII->getRegClass(TII->get(Opcode.first), 1, TRI, *MF);
552 const TargetRegisterClass *SecondInstrDstRC =
553 (Opcode.first == Opcode.second)
554 ? FirstInstrDstRC
555 : TII->getRegClass(TII->get(Opcode.second), 0, TRI, *MF);
556 const TargetRegisterClass *SecondInstrOperandRC =
557 (Opcode.first == Opcode.second)
558 ? FirstInstrOperandRC
559 : TII->getRegClass(TII->get(Opcode.second), 1, TRI, *MF);
560
561 // Get old registers destinations and new register destinations
562 Register DstReg = MI.getOperand(0).getReg();
563 Register SrcReg = MI.getOperand(1).getReg();
564 Register NewTmpReg = MRI->createVirtualRegister(FirstInstrDstRC);
565 // In the situation that DstReg is not Virtual (likely WZR or XZR), we want to
566 // reuse that same destination register.
567 Register NewDstReg = DstReg.isVirtual()
568 ? MRI->createVirtualRegister(SecondInstrDstRC)
569 : DstReg;
570
571 // Constrain registers based on their new uses
572 MRI->constrainRegClass(SrcReg, FirstInstrOperandRC);
573 MRI->constrainRegClass(NewTmpReg, SecondInstrOperandRC);
574 if (DstReg != NewDstReg)
575 MRI->constrainRegClass(NewDstReg, MRI->getRegClass(DstReg));
576
577 // Call the delegating operation to build the instruction
578 BuildInstr(MI, Opcode, Imm0, Imm1, SrcReg, NewTmpReg, NewDstReg);
579
580 // replaceRegWith changes MI's definition register. Keep it for SSA form until
581 // deleting MI. Only if we made a new destination register.
582 if (DstReg != NewDstReg) {
583 MRI->replaceRegWith(DstReg, NewDstReg);
584 MI.getOperand(0).setReg(DstReg);
585 }
586
587 // Record the MIs need to be removed.
588 MI.eraseFromParent();
589 if (SubregToRegMI)
590 SubregToRegMI->eraseFromParent();
591 MovMI->eraseFromParent();
592
593 return true;
594}
595
596bool AArch64MIPeepholeOpt::visitINSviGPR(MachineInstr &MI, unsigned Opc) {
597 // Check if this INSvi[X]gpr comes from COPY of a source FPR128
598 //
599 // From
600 // %intermediate1:gpr64 = COPY %src:fpr128
601 // %intermediate2:gpr32 = COPY %intermediate1:gpr64
602 // %dst:fpr128 = INSvi[X]gpr %dst_vec:fpr128, dst_index, %intermediate2:gpr32
603 // To
604 // %dst:fpr128 = INSvi[X]lane %dst_vec:fpr128, dst_index, %src:fpr128,
605 // src_index
606 // where src_index = 0, X = [8|16|32|64]
607
608 MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(3).getReg());
609
610 // For a chain of COPY instructions, find the initial source register
611 // and check if it's an FPR128
612 while (true) {
613 if (!SrcMI || SrcMI->getOpcode() != TargetOpcode::COPY)
614 return false;
615
616 if (!SrcMI->getOperand(1).getReg().isVirtual())
617 return false;
618
619 if (MRI->getRegClass(SrcMI->getOperand(1).getReg()) ==
620 &AArch64::FPR128RegClass) {
621 break;
622 }
623 SrcMI = MRI->getUniqueVRegDef(SrcMI->getOperand(1).getReg());
624 }
625
626 Register DstReg = MI.getOperand(0).getReg();
627 Register SrcReg = SrcMI->getOperand(1).getReg();
628 MachineInstr *INSvilaneMI =
629 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opc), DstReg)
630 .add(MI.getOperand(1))
631 .add(MI.getOperand(2))
632 .addUse(SrcReg, getRegState(SrcMI->getOperand(1)))
633 .addImm(0);
634
635 LLVM_DEBUG(dbgs() << MI << " replace by:\n: " << *INSvilaneMI << "\n");
636 (void)INSvilaneMI;
637 MI.eraseFromParent();
638 return true;
639}
640
641// All instructions that set a FPR64 will implicitly zero the top bits of the
642// register.
645 if (!MI->getOperand(0).isReg() || !MI->getOperand(0).isDef())
646 return false;
647 const TargetRegisterClass *RC = MRI->getRegClass(MI->getOperand(0).getReg());
648 if (RC != &AArch64::FPR64RegClass)
649 return false;
650 return MI->getOpcode() > TargetOpcode::GENERIC_OP_END;
651}
652
653bool AArch64MIPeepholeOpt::visitINSvi64lane(MachineInstr &MI) {
654 // Check the MI for low 64-bits sets zero for high 64-bits implicitly.
655 // We are expecting below case.
656 //
657 // %1:fpr64 = nofpexcept FCVTNv4i16 %0:fpr128, implicit $fpcr
658 // %6:fpr128 = IMPLICIT_DEF
659 // %5:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), killed %1:fpr64, %subreg.dsub
660 // %7:fpr128 = INSvi64lane %5:fpr128(tied-def 0), 1, killed %3:fpr128, 0
661 MachineInstr *Low64MI = MRI->getUniqueVRegDef(MI.getOperand(1).getReg());
662 if (Low64MI->getOpcode() != AArch64::INSERT_SUBREG)
663 return false;
664 Low64MI = MRI->getUniqueVRegDef(Low64MI->getOperand(2).getReg());
665 if (!Low64MI || !is64bitDefwithZeroHigh64bit(Low64MI, MRI))
666 return false;
667
668 // Check there is `mov 0` MI for high 64-bits.
669 // We are expecting below cases.
670 //
671 // %2:fpr64 = MOVID 0
672 // %4:fpr128 = IMPLICIT_DEF
673 // %3:fpr128 = INSERT_SUBREG %4:fpr128(tied-def 0), killed %2:fpr64, %subreg.dsub
674 // %7:fpr128 = INSvi64lane %5:fpr128(tied-def 0), 1, killed %3:fpr128, 0
675 // or
676 // %5:fpr128 = MOVIv2d_ns 0
677 // %6:fpr64 = COPY %5.dsub:fpr128
678 // %8:fpr128 = IMPLICIT_DEF
679 // %7:fpr128 = INSERT_SUBREG %8:fpr128(tied-def 0), killed %6:fpr64, %subreg.dsub
680 // %11:fpr128 = INSvi64lane %9:fpr128(tied-def 0), 1, killed %7:fpr128, 0
681 MachineInstr *High64MI = MRI->getUniqueVRegDef(MI.getOperand(3).getReg());
682 if (!High64MI || High64MI->getOpcode() != AArch64::INSERT_SUBREG)
683 return false;
684 High64MI = MRI->getUniqueVRegDef(High64MI->getOperand(2).getReg());
685 if (High64MI && High64MI->getOpcode() == TargetOpcode::COPY)
686 High64MI = MRI->getUniqueVRegDef(High64MI->getOperand(1).getReg());
687 if (!High64MI || (High64MI->getOpcode() != AArch64::MOVID &&
688 High64MI->getOpcode() != AArch64::MOVIv2d_ns))
689 return false;
690 if (High64MI->getOperand(1).getImm() != 0)
691 return false;
692
693 // Let's remove MIs for high 64-bits.
694 Register OldDef = MI.getOperand(0).getReg();
695 Register NewDef = MI.getOperand(1).getReg();
696 MRI->constrainRegClass(NewDef, MRI->getRegClass(OldDef));
697 MRI->replaceRegWith(OldDef, NewDef);
698 MI.eraseFromParent();
699
700 return true;
701}
702
703bool AArch64MIPeepholeOpt::visitFMOVDr(MachineInstr &MI) {
704 // An FMOVDr sets the high 64-bits to zero implicitly, similar to ORR for GPR.
705 MachineInstr *Low64MI = MRI->getUniqueVRegDef(MI.getOperand(1).getReg());
706 if (!Low64MI || !is64bitDefwithZeroHigh64bit(Low64MI, MRI))
707 return false;
708
709 // Let's remove MIs for high 64-bits.
710 Register OldDef = MI.getOperand(0).getReg();
711 Register NewDef = MI.getOperand(1).getReg();
712 LLVM_DEBUG(dbgs() << "Removing: " << MI << "\n");
713 MRI->clearKillFlags(OldDef);
714 MRI->clearKillFlags(NewDef);
715 MRI->constrainRegClass(NewDef, MRI->getRegClass(OldDef));
716 MRI->replaceRegWith(OldDef, NewDef);
717 MI.eraseFromParent();
718
719 return true;
720}
721
722bool AArch64MIPeepholeOpt::visitUBFMXri(MachineInstr &MI) {
723 // Check if the instruction is equivalent to a 32 bit LSR or LSL alias of
724 // UBFM, and replace the UBFMXri instruction with its 32 bit variant, UBFMWri.
725 int64_t Immr = MI.getOperand(2).getImm();
726 int64_t Imms = MI.getOperand(3).getImm();
727
728 bool IsLSR = Imms == 31 && Immr <= Imms;
729 bool IsLSL = Immr == Imms + 33;
730 if (!IsLSR && !IsLSL)
731 return false;
732
733 if (IsLSL) {
734 Immr -= 32;
735 }
736
737 const TargetRegisterClass *DstRC64 =
738 TII->getRegClass(TII->get(MI.getOpcode()), 0, TRI, *MI.getMF());
739 const TargetRegisterClass *DstRC32 =
740 TRI->getSubRegisterClass(DstRC64, AArch64::sub_32);
741 assert(DstRC32 && "Destination register class of UBFMXri doesn't have a "
742 "sub_32 subregister class");
743
744 const TargetRegisterClass *SrcRC64 =
745 TII->getRegClass(TII->get(MI.getOpcode()), 1, TRI, *MI.getMF());
746 const TargetRegisterClass *SrcRC32 =
747 TRI->getSubRegisterClass(SrcRC64, AArch64::sub_32);
748 assert(SrcRC32 && "Source register class of UBFMXri doesn't have a sub_32 "
749 "subregister class");
750
751 Register DstReg64 = MI.getOperand(0).getReg();
752 Register DstReg32 = MRI->createVirtualRegister(DstRC32);
753 Register SrcReg64 = MI.getOperand(1).getReg();
754 Register SrcReg32 = MRI->createVirtualRegister(SrcRC32);
755
756 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(AArch64::COPY),
757 SrcReg32)
758 .addReg(SrcReg64, 0, AArch64::sub_32);
759 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(AArch64::UBFMWri),
760 DstReg32)
761 .addReg(SrcReg32)
762 .addImm(Immr)
763 .addImm(Imms);
764 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
765 TII->get(AArch64::SUBREG_TO_REG), DstReg64)
766 .addImm(0)
767 .addReg(DstReg32)
768 .addImm(AArch64::sub_32);
769 MI.eraseFromParent();
770 return true;
771}
772
773// Across a basic-block we might have in i32 extract from a value that only
774// operates on upper bits (for example a sxtw). We can replace the COPY with a
775// new version skipping the sxtw.
776bool AArch64MIPeepholeOpt::visitCopy(MachineInstr &MI) {
777 Register InputReg = MI.getOperand(1).getReg();
778 if (MI.getOperand(1).getSubReg() != AArch64::sub_32 ||
779 !MRI->hasOneNonDBGUse(InputReg))
780 return false;
781
782 MachineInstr *SrcMI = MRI->getUniqueVRegDef(InputReg);
784 DeadInstrs.insert(SrcMI);
785 while (SrcMI && SrcMI->isFullCopy() &&
786 MRI->hasOneNonDBGUse(SrcMI->getOperand(1).getReg())) {
787 SrcMI = MRI->getUniqueVRegDef(SrcMI->getOperand(1).getReg());
788 DeadInstrs.insert(SrcMI);
789 }
790
791 if (!SrcMI)
792 return false;
793
794 // Look for SXTW(X) and return Reg.
795 auto getSXTWSrcReg = [](MachineInstr *SrcMI) -> Register {
796 if (SrcMI->getOpcode() != AArch64::SBFMXri ||
797 SrcMI->getOperand(2).getImm() != 0 ||
798 SrcMI->getOperand(3).getImm() != 31)
799 return AArch64::NoRegister;
800 return SrcMI->getOperand(1).getReg();
801 };
802 // Look for SUBREG_TO_REG(ORRWrr(WZR, COPY(X.sub_32)))
803 auto getUXTWSrcReg = [&](MachineInstr *SrcMI) -> Register {
804 if (SrcMI->getOpcode() != AArch64::SUBREG_TO_REG ||
805 SrcMI->getOperand(3).getImm() != AArch64::sub_32 ||
806 !MRI->hasOneNonDBGUse(SrcMI->getOperand(2).getReg()))
807 return AArch64::NoRegister;
808 MachineInstr *Orr = MRI->getUniqueVRegDef(SrcMI->getOperand(2).getReg());
809 if (!Orr || Orr->getOpcode() != AArch64::ORRWrr ||
810 Orr->getOperand(1).getReg() != AArch64::WZR ||
811 !MRI->hasOneNonDBGUse(Orr->getOperand(2).getReg()))
812 return AArch64::NoRegister;
813 MachineInstr *Cpy = MRI->getUniqueVRegDef(Orr->getOperand(2).getReg());
814 if (!Cpy || Cpy->getOpcode() != AArch64::COPY ||
815 Cpy->getOperand(1).getSubReg() != AArch64::sub_32)
816 return AArch64::NoRegister;
817 DeadInstrs.insert(Orr);
818 return Cpy->getOperand(1).getReg();
819 };
820
821 Register SrcReg = getSXTWSrcReg(SrcMI);
822 if (!SrcReg)
823 SrcReg = getUXTWSrcReg(SrcMI);
824 if (!SrcReg)
825 return false;
826
827 MRI->constrainRegClass(SrcReg, MRI->getRegClass(InputReg));
828 LLVM_DEBUG(dbgs() << "Optimizing: " << MI);
829 MI.getOperand(1).setReg(SrcReg);
830 LLVM_DEBUG(dbgs() << " to: " << MI);
831 for (auto *DeadMI : DeadInstrs) {
832 LLVM_DEBUG(dbgs() << " Removing: " << *DeadMI);
833 DeadMI->eraseFromParent();
834 }
835 return true;
836}
837
838bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
839 if (skipFunction(MF.getFunction()))
840 return false;
841
842 TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
843 TRI = static_cast<const AArch64RegisterInfo *>(
845 MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
846 MRI = &MF.getRegInfo();
847
848 assert(MRI->isSSA() && "Expected to be run on SSA form!");
849
850 bool Changed = false;
851
852 for (MachineBasicBlock &MBB : MF) {
854 switch (MI.getOpcode()) {
855 default:
856 break;
857 case AArch64::INSERT_SUBREG:
858 Changed |= visitINSERT(MI);
859 break;
860 case AArch64::ANDWrr:
861 Changed |= visitAND<uint32_t>(AArch64::ANDWri, MI);
862 break;
863 case AArch64::ANDXrr:
864 Changed |= visitAND<uint64_t>(AArch64::ANDXri, MI);
865 break;
866 case AArch64::ORRWrs:
867 Changed |= visitORR(MI);
868 break;
869 case AArch64::ADDWrr:
870 Changed |= visitADDSUB<uint32_t>(AArch64::ADDWri, AArch64::SUBWri, MI);
871 break;
872 case AArch64::SUBWrr:
873 Changed |= visitADDSUB<uint32_t>(AArch64::SUBWri, AArch64::ADDWri, MI);
874 break;
875 case AArch64::ADDXrr:
876 Changed |= visitADDSUB<uint64_t>(AArch64::ADDXri, AArch64::SUBXri, MI);
877 break;
878 case AArch64::SUBXrr:
879 Changed |= visitADDSUB<uint64_t>(AArch64::SUBXri, AArch64::ADDXri, MI);
880 break;
881 case AArch64::ADDSWrr:
882 Changed |=
883 visitADDSSUBS<uint32_t>({AArch64::ADDWri, AArch64::ADDSWri},
884 {AArch64::SUBWri, AArch64::SUBSWri}, MI);
885 break;
886 case AArch64::SUBSWrr:
887 Changed |=
888 visitADDSSUBS<uint32_t>({AArch64::SUBWri, AArch64::SUBSWri},
889 {AArch64::ADDWri, AArch64::ADDSWri}, MI);
890 break;
891 case AArch64::ADDSXrr:
892 Changed |=
893 visitADDSSUBS<uint64_t>({AArch64::ADDXri, AArch64::ADDSXri},
894 {AArch64::SUBXri, AArch64::SUBSXri}, MI);
895 break;
896 case AArch64::SUBSXrr:
897 Changed |=
898 visitADDSSUBS<uint64_t>({AArch64::SUBXri, AArch64::SUBSXri},
899 {AArch64::ADDXri, AArch64::ADDSXri}, MI);
900 break;
901 case AArch64::CSELWr:
902 case AArch64::CSELXr:
903 Changed |= visitCSEL(MI);
904 break;
905 case AArch64::INSvi64gpr:
906 Changed |= visitINSviGPR(MI, AArch64::INSvi64lane);
907 break;
908 case AArch64::INSvi32gpr:
909 Changed |= visitINSviGPR(MI, AArch64::INSvi32lane);
910 break;
911 case AArch64::INSvi16gpr:
912 Changed |= visitINSviGPR(MI, AArch64::INSvi16lane);
913 break;
914 case AArch64::INSvi8gpr:
915 Changed |= visitINSviGPR(MI, AArch64::INSvi8lane);
916 break;
917 case AArch64::INSvi64lane:
918 Changed |= visitINSvi64lane(MI);
919 break;
920 case AArch64::FMOVDr:
921 Changed |= visitFMOVDr(MI);
922 break;
923 case AArch64::UBFMXri:
924 Changed |= visitUBFMXri(MI);
925 break;
926 case AArch64::COPY:
927 Changed |= visitCopy(MI);
928 break;
929 }
930 }
931 }
932
933 return Changed;
934}
935
937 return new AArch64MIPeepholeOpt();
938}
unsigned const MachineRegisterInfo * MRI
unsigned HighestBitSet
unsigned T T & Imm2Enc
unsigned T & Imm1Enc
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
unsigned RegSize
static bool is64bitDefwithZeroHigh64bit(MachineInstr *MI, MachineRegisterInfo *MRI)
static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1)
unsigned LowestBitSet
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define LLVM_DEBUG(...)
Definition: Debug.h:106
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
#define OP(OPC)
Definition: Instruction.h:45
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
A debug info location.
Definition: DebugLoc.h:33
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:575
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:347
bool isFullCopy() const
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:499
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
unsigned getSubReg() const
int64_t getImm() const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ Define
Register definition.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createAArch64MIPeepholeOptPass()
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:657
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:346
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
std::optional< UsedNZCV > examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > *CCUseInstrs=nullptr)
unsigned getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.
void initializeAArch64MIPeepholeOptPass(PassRegistry &)