LLVM 23.0.0git
AArch64MIPeepholeOpt.cpp
Go to the documentation of this file.
1//===- AArch64MIPeepholeOpt.cpp - AArch64 MI peephole optimization pass ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs below peephole optimizations on MIR level.
10//
11// 1. MOVi32imm + (ANDS?|EOR|ORR)Wrr ==> (AND|EOR|ORR)Wri + (ANDS?|EOR|ORR)Wri
12// MOVi64imm + (ANDS?|EOR|ORR)Xrr ==> (AND|EOR|ORR)Xri + (ANDS?|EOR|ORR)Xri
13//
14// 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi
15// MOVi64imm + ADDXrr ==> ADDXri + ADDXri
16//
17// 3. MOVi32imm + SUBWrr ==> SUBWRi + SUBWRi
18// MOVi64imm + SUBXrr ==> SUBXri + SUBXri
19//
20// The mov pseudo instruction could be expanded to multiple mov instructions
21// later. In this case, we could try to split the constant operand of mov
22// instruction into two immediates which can be directly encoded into
23// *Wri/*Xri instructions. It makes two AND/ADD/SUB instructions instead of
24// multiple `mov` + `and/add/sub` instructions.
25//
26// 4. Remove redundant ORRWrs which is generated by zero-extend.
27//
28// %3:gpr32 = ORRWrs $wzr, %2, 0
29// %4:gpr64 = SUBREG_TO_REG %3, %subreg.sub_32
30//
31// If AArch64's 32-bit form of instruction defines the source operand of
32// ORRWrs, we can remove the ORRWrs because the upper 32 bits of the source
33// operand are set to zero.
34//
35// 5. %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx
36// ==> %reg:subidx = SUBREG_TO_REG %subreg, subidx
37//
38// 6. %intermediate:gpr32 = COPY %src:fpr128
39// %dst:fpr128 = INSvi32gpr %dst_vec:fpr128, dst_index, %intermediate:gpr32
40// ==> %dst:fpr128 = INSvi32lane %dst_vec:fpr128, dst_index, %src:fpr128, 0
41//
42// In cases where a source FPR is copied to a GPR in order to be copied
43// to a destination FPR, we can directly copy the values between the FPRs,
44// eliminating the use of the Integer unit. When we match a pattern of
45// INSvi[X]gpr that is preceded by a chain of COPY instructions from a FPR
46// source, we use the INSvi[X]lane to replace the COPY & INSvi[X]gpr
47// instructions.
48//
49// 7. If MI sets zero for high 64-bits implicitly, remove `mov 0` for high
50// 64-bits. For example,
51//
52// %1:fpr64 = nofpexcept FCVTNv4i16 %0:fpr128, implicit $fpcr
53// %2:fpr64 = MOVID 0
54// %4:fpr128 = IMPLICIT_DEF
55// %3:fpr128 = INSERT_SUBREG %4:fpr128(tied-def 0), %2:fpr64, %subreg.dsub
56// %6:fpr128 = IMPLICIT_DEF
57// %5:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), %1:fpr64, %subreg.dsub
58// %7:fpr128 = INSvi64lane %5:fpr128(tied-def 0), 1, %3:fpr128, 0
59// ==>
60// %1:fpr64 = nofpexcept FCVTNv4i16 %0:fpr128, implicit $fpcr
61// %6:fpr128 = IMPLICIT_DEF
62// %7:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), %1:fpr64, %subreg.dsub
63//
64// 8. Remove redundant CSELs that select between identical registers, by
65// replacing them with unconditional moves.
66//
67// 9. Replace UBFMXri with UBFMWri if the instruction is equivalent to a 32 bit
68// LSR or LSL alias of UBFM.
69//
70//===----------------------------------------------------------------------===//
71
72#include "AArch64ExpandImm.h"
73#include "AArch64InstrInfo.h"
77
78using namespace llvm;
79
80#define DEBUG_TYPE "aarch64-mi-peephole-opt"
81
82namespace {
83
84class AArch64MIPeepholeOptImpl {
85public:
86 const AArch64InstrInfo *TII;
88 MachineLoopInfo *MLI;
90
91 explicit AArch64MIPeepholeOptImpl(MachineLoopInfo &MLI) : MLI(&MLI) {}
92
93 bool run(MachineFunction &MF);
94
95private:
96 using OpcodePair = std::pair<unsigned, unsigned>;
97 template <typename T>
98 using SplitAndOpcFunc =
99 std::function<std::optional<OpcodePair>(T, unsigned, T &, T &)>;
100 using BuildMIFunc =
101 std::function<void(MachineInstr &, OpcodePair, unsigned, unsigned,
103
104 /// For instructions where an immediate operand could be split into two
105 /// separate immediate instructions, use the splitTwoPartImm two handle the
106 /// optimization.
107 ///
108 /// To implement, the following function types must be passed to
109 /// splitTwoPartImm. A SplitAndOpcFunc must be implemented that determines if
110 /// splitting the immediate is valid and returns the associated new opcode. A
111 /// BuildMIFunc must be implemented to build the two immediate instructions.
112 ///
113 /// Example Pattern (where IMM would require 2+ MOV instructions):
114 /// %dst = <Instr>rr %src IMM [...]
115 /// becomes:
116 /// %tmp = <Instr>ri %src (encode half IMM) [...]
117 /// %dst = <Instr>ri %tmp (encode half IMM) [...]
118 template <typename T>
119 bool splitTwoPartImm(MachineInstr &MI,
120 SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr);
121
122 bool checkMovImmInstr(MachineInstr &MI, MachineInstr *&MovMI,
123 MachineInstr *&SubregToRegMI);
124
125 template <typename T>
126 bool visitADDSUB(unsigned PosOpc, unsigned NegOpc, MachineInstr &MI);
127 template <typename T>
128 bool visitADDSSUBS(OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI);
129
130 // Strategy used to split logical immediate bitmasks.
131 enum class SplitStrategy {
132 Intersect,
133 Disjoint,
134 };
135 template <typename T>
136 bool trySplitLogicalImm(unsigned Opc, MachineInstr &MI,
137 SplitStrategy Strategy, unsigned OtherOpc = 0);
138 bool visitORR(MachineInstr &MI);
139 bool visitCSEL(MachineInstr &MI);
140 bool visitINSERT(MachineInstr &MI);
141 bool visitINSviGPR(MachineInstr &MI, unsigned Opc);
142 bool visitINSvi64lane(MachineInstr &MI);
143 bool visitFMOVDr(MachineInstr &MI);
144 bool visitUBFMXri(MachineInstr &MI);
145 bool visitCopy(MachineInstr &MI);
146};
147
148struct AArch64MIPeepholeOptLegacy : public MachineFunctionPass {
149 static char ID;
150
151 AArch64MIPeepholeOptLegacy() : MachineFunctionPass(ID) {}
152
153 bool runOnMachineFunction(MachineFunction &MF) override;
154
155 StringRef getPassName() const override {
156 return "AArch64 MI Peephole Optimization pass";
157 }
158
159 void getAnalysisUsage(AnalysisUsage &AU) const override {
160 AU.setPreservesCFG();
163 }
164};
165
166char AArch64MIPeepholeOptLegacy::ID = 0;
167
168} // end anonymous namespace
169
170INITIALIZE_PASS(AArch64MIPeepholeOptLegacy, "aarch64-mi-peephole-opt",
171 "AArch64 MI Peephole Optimization", false, false)
172
173template <typename T>
174static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) {
175 T UImm = static_cast<T>(Imm);
176 assert(UImm && (UImm != ~static_cast<T>(0)) && "Invalid immediate!");
177
178 // The bitmask immediate consists of consecutive ones. Let's say there is
179 // constant 0b00000000001000000000010000000000 which does not consist of
180 // consecutive ones. We can split it in to two bitmask immediate like
181 // 0b00000000001111111111110000000000 and 0b11111111111000000000011111111111.
182 // If we do AND with these two bitmask immediate, we can see original one.
184 unsigned HighestBitSet = Log2_64(UImm);
185
186 // Create a mask which is filled with one from the position of lowest bit set
187 // to the position of highest bit set.
188 T NewImm1 = (static_cast<T>(2) << HighestBitSet) -
189 (static_cast<T>(1) << LowestBitSet);
190 // Create a mask which is filled with one outside the position of lowest bit
191 // set and the position of highest bit set.
193
194 // If the split value is not valid bitmask immediate, do not split this
195 // constant.
197 return false;
198
201 return true;
202}
203
204template <typename T>
205static bool splitDisjointBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc,
206 T &Imm2Enc) {
207 assert(Imm && (Imm != ~static_cast<T>(0)) && "Invalid immediate!");
208
209 // Try to split a bitmask of the form 0b00000000011000000000011110000000 into
210 // two disjoint masks such as 0b00000000011000000000000000000000 and
211 // 0b00000000000000000000011110000000 where the inclusive/exclusive OR of the
212 // new masks match the original mask.
213 unsigned LowestBitSet = llvm::countr_zero(Imm);
214 unsigned LowestGapBitUnset =
216
217 // Create a mask for the least significant group of consecutive ones.
218 assert(LowestGapBitUnset < sizeof(T) * CHAR_BIT && "Undefined behaviour!");
219 T NewImm1 = (static_cast<T>(1) << LowestGapBitUnset) -
220 (static_cast<T>(1) << LowestBitSet);
221 // Create a disjoint mask for the remaining ones.
222 T NewImm2 = Imm & ~NewImm1;
223
224 // Do not split if NewImm2 is not a valid bitmask immediate.
226 return false;
227
230 return true;
231}
232
233template <typename T>
234bool AArch64MIPeepholeOptImpl::trySplitLogicalImm(unsigned Opc,
236 SplitStrategy Strategy,
237 unsigned OtherOpc) {
238 // Try below transformations.
239 //
240 // MOVi32imm + (ANDS?|EOR|ORR)Wrr ==> (AND|EOR|ORR)Wri + (ANDS?|EOR|ORR)Wri
241 // MOVi64imm + (ANDS?|EOR|ORR)Xrr ==> (AND|EOR|ORR)Xri + (ANDS?|EOR|ORR)Xri
242 //
243 // The mov pseudo instruction could be expanded to multiple mov instructions
244 // later. Let's try to split the constant operand of mov instruction into two
245 // bitmask immediates based on the given split strategy. It makes only two
246 // logical instructions instead of multiple mov + logic instructions.
247
248 return splitTwoPartImm<T>(
249 MI,
250 [Opc, Strategy, OtherOpc](T Imm, unsigned RegSize, T &Imm0,
251 T &Imm1) -> std::optional<OpcodePair> {
252 // If this immediate is already a suitable bitmask, don't split it.
253 // TODO: Should we just combine the two instructions in this case?
255 return std::nullopt;
256
257 // If this immediate can be handled by one instruction, don't split it.
260 if (Insn.size() == 1)
261 return std::nullopt;
262
263 bool SplitSucc = false;
264 switch (Strategy) {
265 case SplitStrategy::Intersect:
266 SplitSucc = splitBitmaskImm(Imm, RegSize, Imm0, Imm1);
267 break;
268 case SplitStrategy::Disjoint:
269 SplitSucc = splitDisjointBitmaskImm(Imm, RegSize, Imm0, Imm1);
270 break;
271 }
272 if (SplitSucc)
273 return std::make_pair(Opc, !OtherOpc ? Opc : OtherOpc);
274 return std::nullopt;
275 },
276 [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
277 unsigned Imm1, Register SrcReg, Register NewTmpReg,
278 Register NewDstReg) {
279 DebugLoc DL = MI.getDebugLoc();
280 MachineBasicBlock *MBB = MI.getParent();
281 BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
282 .addReg(SrcReg)
283 .addImm(Imm0);
284 BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
285 .addReg(NewTmpReg)
286 .addImm(Imm1);
287 });
288}
289
290bool AArch64MIPeepholeOptImpl::visitORR(MachineInstr &MI) {
291 // Check this ORR comes from below zero-extend pattern.
292 //
293 // def : Pat<(i64 (zext GPR32:$src)),
294 // (SUBREG_TO_REG (ORRWrs WZR, GPR32:$src, 0), sub_32)>;
295 if (MI.getOperand(3).getImm() != 0)
296 return false;
297
298 if (MI.getOperand(1).getReg() != AArch64::WZR)
299 return false;
300
301 MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
302 if (!SrcMI)
303 return false;
304
305 // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC
306 //
307 // When you use the 32-bit form of an instruction, the upper 32 bits of the
308 // source registers are ignored and the upper 32 bits of the destination
309 // register are set to zero.
310 //
311 // If AArch64's 32-bit form of instruction defines the source operand of
312 // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is
313 // real AArch64 instruction and if it is not, do not process the opcode
314 // conservatively.
315 if (SrcMI->getOpcode() == TargetOpcode::COPY &&
316 SrcMI->getOperand(1).getReg().isVirtual()) {
317 const TargetRegisterClass *RC =
318 MRI->getRegClass(SrcMI->getOperand(1).getReg());
319
320 // A COPY from an FPR will become a FMOVSWr, so do so now so that we know
321 // that the upper bits are zero.
322 if (RC != &AArch64::FPR32RegClass &&
323 ((RC != &AArch64::FPR64RegClass && RC != &AArch64::FPR128RegClass &&
324 RC != &AArch64::ZPRRegClass) ||
325 SrcMI->getOperand(1).getSubReg() != AArch64::ssub))
326 return false;
327 Register CpySrc;
328 if (SrcMI->getOperand(1).getSubReg() == AArch64::ssub) {
329 CpySrc = MRI->createVirtualRegister(&AArch64::FPR32RegClass);
330 BuildMI(*SrcMI->getParent(), SrcMI, SrcMI->getDebugLoc(),
331 TII->get(TargetOpcode::COPY), CpySrc)
332 .add(SrcMI->getOperand(1));
333 } else {
334 CpySrc = SrcMI->getOperand(1).getReg();
335 }
336 BuildMI(*SrcMI->getParent(), SrcMI, SrcMI->getDebugLoc(),
337 TII->get(AArch64::FMOVSWr), SrcMI->getOperand(0).getReg())
338 .addReg(CpySrc);
339 SrcMI->eraseFromParent();
340 }
341 else if (SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END)
342 return false;
343
344 Register DefReg = MI.getOperand(0).getReg();
345 Register SrcReg = MI.getOperand(2).getReg();
346 MRI->replaceRegWith(DefReg, SrcReg);
347 MRI->clearKillFlags(SrcReg);
348 LLVM_DEBUG(dbgs() << "Removed: " << MI << "\n");
349 MI.eraseFromParent();
350
351 return true;
352}
353
354bool AArch64MIPeepholeOptImpl::visitCSEL(MachineInstr &MI) {
355 // Replace CSEL with MOV when both inputs are the same register.
356 if (MI.getOperand(1).getReg() != MI.getOperand(2).getReg())
357 return false;
358
359 auto ZeroReg =
360 MI.getOpcode() == AArch64::CSELXr ? AArch64::XZR : AArch64::WZR;
361 auto OrOpcode =
362 MI.getOpcode() == AArch64::CSELXr ? AArch64::ORRXrs : AArch64::ORRWrs;
363
364 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(OrOpcode))
365 .addReg(MI.getOperand(0).getReg(), RegState::Define)
366 .addReg(ZeroReg)
367 .addReg(MI.getOperand(1).getReg())
368 .addImm(0);
369
370 MI.eraseFromParent();
371 return true;
372}
373
374bool AArch64MIPeepholeOptImpl::visitINSERT(MachineInstr &MI) {
375 // Check this INSERT_SUBREG comes from below zero-extend pattern.
376 //
377 // From %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx
378 // To %reg:subidx = SUBREG_TO_REG %subreg, subidx
379 //
380 // We're assuming the first operand to INSERT_SUBREG is irrelevant because a
381 // COPY would destroy the upper part of the register anyway
382 if (!MI.isRegTiedToDefOperand(1))
383 return false;
384
385 Register DstReg = MI.getOperand(0).getReg();
386 const TargetRegisterClass *RC = MRI->getRegClass(DstReg);
387 MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
388 if (!SrcMI)
389 return false;
390
391 // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC
392 //
393 // When you use the 32-bit form of an instruction, the upper 32 bits of the
394 // source registers are ignored and the upper 32 bits of the destination
395 // register are set to zero.
396 //
397 // If AArch64's 32-bit form of instruction defines the source operand of
398 // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is
399 // real AArch64 instruction and if it is not, do not process the opcode
400 // conservatively.
401 if ((SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END) ||
402 !AArch64::GPR64allRegClass.hasSubClassEq(RC))
403 return false;
404
405 // Build a SUBREG_TO_REG instruction
406 MachineInstr *SubregMI =
407 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
408 TII->get(TargetOpcode::SUBREG_TO_REG), DstReg)
409 .add(MI.getOperand(2))
410 .add(MI.getOperand(3));
411 LLVM_DEBUG(dbgs() << MI << " replace by:\n: " << *SubregMI << "\n");
412 (void)SubregMI;
413 MI.eraseFromParent();
414
415 return true;
416}
417
418template <typename T>
419static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1) {
420 // The immediate must be in the form of ((imm0 << 12) + imm1), in which both
421 // imm0 and imm1 are non-zero 12-bit unsigned int.
422 if ((Imm & 0xfff000) == 0 || (Imm & 0xfff) == 0 ||
423 (Imm & ~static_cast<T>(0xffffff)) != 0)
424 return false;
425
426 // The immediate can not be composed via a single instruction.
429 if (Insn.size() == 1)
430 return false;
431
432 // Split Imm into (Imm0 << 12) + Imm1;
433 Imm0 = (Imm >> 12) & 0xfff;
434 Imm1 = Imm & 0xfff;
435 return true;
436}
437
438template <typename T>
439bool AArch64MIPeepholeOptImpl::visitADDSUB(unsigned PosOpc, unsigned NegOpc,
440 MachineInstr &MI) {
441 // Try below transformation.
442 //
443 // ADDWrr X, MOVi32imm ==> ADDWri + ADDWri
444 // ADDXrr X, MOVi64imm ==> ADDXri + ADDXri
445 //
446 // SUBWrr X, MOVi32imm ==> SUBWri + SUBWri
447 // SUBXrr X, MOVi64imm ==> SUBXri + SUBXri
448 //
449 // The mov pseudo instruction could be expanded to multiple mov instructions
450 // later. Let's try to split the constant operand of mov instruction into two
451 // legal add/sub immediates. It makes only two ADD/SUB instructions instead of
452 // multiple `mov` + `and/sub` instructions.
453
454 // We can sometimes have ADDWrr WZR, MULi32imm that have not been constant
455 // folded. Make sure that we don't generate invalid instructions that use XZR
456 // in those cases.
457 if (MI.getOperand(1).getReg() == AArch64::XZR ||
458 MI.getOperand(1).getReg() == AArch64::WZR)
459 return false;
460
461 return splitTwoPartImm<T>(
462 MI,
463 [PosOpc, NegOpc](T Imm, unsigned RegSize, T &Imm0,
464 T &Imm1) -> std::optional<OpcodePair> {
465 if (splitAddSubImm(Imm, RegSize, Imm0, Imm1))
466 return std::make_pair(PosOpc, PosOpc);
467 if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1))
468 return std::make_pair(NegOpc, NegOpc);
469 return std::nullopt;
470 },
471 [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
472 unsigned Imm1, Register SrcReg, Register NewTmpReg,
473 Register NewDstReg) {
474 DebugLoc DL = MI.getDebugLoc();
475 MachineBasicBlock *MBB = MI.getParent();
476 BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
477 .addReg(SrcReg)
478 .addImm(Imm0)
479 .addImm(12);
480 BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
481 .addReg(NewTmpReg)
482 .addImm(Imm1)
483 .addImm(0);
484 });
485}
486
487template <typename T>
488bool AArch64MIPeepholeOptImpl::visitADDSSUBS(OpcodePair PosOpcs,
489 OpcodePair NegOpcs,
490 MachineInstr &MI) {
491 // Try the same transformation as ADDSUB but with additional requirement
492 // that the condition code usages are only for Equal and Not Equal
493
494 if (MI.getOperand(1).getReg() == AArch64::XZR ||
495 MI.getOperand(1).getReg() == AArch64::WZR)
496 return false;
497
498 return splitTwoPartImm<T>(
499 MI,
500 [PosOpcs, NegOpcs, &MI, &TRI = TRI,
501 &MRI = MRI](T Imm, unsigned RegSize, T &Imm0,
502 T &Imm1) -> std::optional<OpcodePair> {
503 OpcodePair OP;
504 if (splitAddSubImm(Imm, RegSize, Imm0, Imm1))
505 OP = PosOpcs;
506 else if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1))
507 OP = NegOpcs;
508 else
509 return std::nullopt;
510 // Check conditional uses last since it is expensive for scanning
511 // proceeding instructions
512 MachineInstr &SrcMI = *MRI->getUniqueVRegDef(MI.getOperand(1).getReg());
513 std::optional<UsedNZCV> NZCVUsed = examineCFlagsUse(SrcMI, MI, *TRI);
514 if (!NZCVUsed || NZCVUsed->C || NZCVUsed->V)
515 return std::nullopt;
516 return OP;
517 },
518 [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
519 unsigned Imm1, Register SrcReg, Register NewTmpReg,
520 Register NewDstReg) {
521 DebugLoc DL = MI.getDebugLoc();
522 MachineBasicBlock *MBB = MI.getParent();
523 BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
524 .addReg(SrcReg)
525 .addImm(Imm0)
526 .addImm(12);
527 BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
528 .addReg(NewTmpReg)
529 .addImm(Imm1)
530 .addImm(0);
531 });
532}
533
534// Checks if the corresponding MOV immediate instruction is applicable for
535// this peephole optimization.
536bool AArch64MIPeepholeOptImpl::checkMovImmInstr(MachineInstr &MI,
537 MachineInstr *&MovMI,
538 MachineInstr *&SubregToRegMI) {
539 // Check whether current MBB is in loop and the AND is loop invariant.
540 MachineBasicBlock *MBB = MI.getParent();
541 MachineLoop *L = MLI->getLoopFor(MBB);
542 if (L && !L->isLoopInvariant(MI))
543 return false;
544
545 // Check whether current MI's operand is MOV with immediate.
546 MovMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
547 if (!MovMI)
548 return false;
549
550 // If it is SUBREG_TO_REG, check its operand.
551 SubregToRegMI = nullptr;
552 if (MovMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) {
553 SubregToRegMI = MovMI;
554 MovMI = MRI->getUniqueVRegDef(MovMI->getOperand(1).getReg());
555 if (!MovMI)
556 return false;
557 }
558
559 if (MovMI->getOpcode() != AArch64::MOVi32imm &&
560 MovMI->getOpcode() != AArch64::MOVi64imm)
561 return false;
562
563 // If the MOV has multiple uses, do not split the immediate because it causes
564 // more instructions.
565 if (!MRI->hasOneUse(MovMI->getOperand(0).getReg()))
566 return false;
567 if (SubregToRegMI && !MRI->hasOneUse(SubregToRegMI->getOperand(0).getReg()))
568 return false;
569
570 // It is OK to perform this peephole optimization.
571 return true;
572}
573
574template <typename T>
575bool AArch64MIPeepholeOptImpl::splitTwoPartImm(MachineInstr &MI,
576 SplitAndOpcFunc<T> SplitAndOpc,
577 BuildMIFunc BuildInstr) {
578 unsigned RegSize = sizeof(T) * 8;
579 assert((RegSize == 32 || RegSize == 64) &&
580 "Invalid RegSize for legal immediate peephole optimization");
581
582 // Perform several essential checks against current MI.
583 MachineInstr *MovMI, *SubregToRegMI;
584 if (!checkMovImmInstr(MI, MovMI, SubregToRegMI))
585 return false;
586
587 // Split the immediate to Imm0 and Imm1, and calculate the Opcode.
588 T Imm = static_cast<T>(MovMI->getOperand(1).getImm()), Imm0, Imm1;
589 // For the 32 bit form of instruction, the upper 32 bits of the destination
590 // register are set to zero. If there is SUBREG_TO_REG, set the upper 32 bits
591 // of Imm to zero. This is essential if the Immediate value was a negative
592 // number since it was sign extended when we assign to the 64-bit Imm.
593 if (SubregToRegMI)
594 Imm &= 0xFFFFFFFF;
595 OpcodePair Opcode;
596 if (auto R = SplitAndOpc(Imm, RegSize, Imm0, Imm1))
597 Opcode = *R;
598 else
599 return false;
600
601 // Create new MIs using the first and second opcodes. Opcodes might differ for
602 // flag setting operations that should only set flags on second instruction.
603 // NewTmpReg = Opcode.first SrcReg Imm0
604 // NewDstReg = Opcode.second NewTmpReg Imm1
605
606 // Determine register classes for destinations and register operands
607 const TargetRegisterClass *FirstInstrDstRC =
608 TII->getRegClass(TII->get(Opcode.first), 0);
609 const TargetRegisterClass *FirstInstrOperandRC =
610 TII->getRegClass(TII->get(Opcode.first), 1);
611 const TargetRegisterClass *SecondInstrDstRC =
612 (Opcode.first == Opcode.second)
613 ? FirstInstrDstRC
614 : TII->getRegClass(TII->get(Opcode.second), 0);
615 const TargetRegisterClass *SecondInstrOperandRC =
616 (Opcode.first == Opcode.second)
617 ? FirstInstrOperandRC
618 : TII->getRegClass(TII->get(Opcode.second), 1);
619
620 // Get old registers destinations and new register destinations
621 Register DstReg = MI.getOperand(0).getReg();
622 Register SrcReg = MI.getOperand(1).getReg();
623 Register NewTmpReg = MRI->createVirtualRegister(FirstInstrDstRC);
624 // In the situation that DstReg is not Virtual (likely WZR or XZR), we want to
625 // reuse that same destination register.
626 Register NewDstReg = DstReg.isVirtual()
627 ? MRI->createVirtualRegister(SecondInstrDstRC)
628 : DstReg;
629
630 // Constrain registers based on their new uses
631 MRI->constrainRegClass(SrcReg, FirstInstrOperandRC);
632 MRI->constrainRegClass(NewTmpReg, SecondInstrOperandRC);
633 if (DstReg != NewDstReg)
634 MRI->constrainRegClass(NewDstReg, MRI->getRegClass(DstReg));
635
636 // Call the delegating operation to build the instruction
637 BuildInstr(MI, Opcode, Imm0, Imm1, SrcReg, NewTmpReg, NewDstReg);
638
639 // replaceRegWith changes MI's definition register. Keep it for SSA form until
640 // deleting MI. Only if we made a new destination register.
641 if (DstReg != NewDstReg) {
642 MRI->replaceRegWith(DstReg, NewDstReg);
643 MI.getOperand(0).setReg(DstReg);
644 }
645
646 // Record the MIs need to be removed.
647 MI.eraseFromParent();
648 if (SubregToRegMI)
649 SubregToRegMI->eraseFromParent();
650 MovMI->eraseFromParent();
651
652 return true;
653}
654
655bool AArch64MIPeepholeOptImpl::visitINSviGPR(MachineInstr &MI, unsigned Opc) {
656 // Check if this INSvi[X]gpr comes from COPY of a source FPR128
657 //
658 // From
659 // %intermediate1:gpr64 = COPY %src:fpr128
660 // %intermediate2:gpr32 = COPY %intermediate1:gpr64
661 // %dst:fpr128 = INSvi[X]gpr %dst_vec:fpr128, dst_index, %intermediate2:gpr32
662 // To
663 // %dst:fpr128 = INSvi[X]lane %dst_vec:fpr128, dst_index, %src:fpr128,
664 // src_index
665 // where src_index = 0, X = [8|16|32|64]
666
667 MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(3).getReg());
668
669 // For a chain of COPY instructions, find the initial source register
670 // and check if it's an FPR128
671 while (true) {
672 if (!SrcMI || SrcMI->getOpcode() != TargetOpcode::COPY)
673 return false;
674
675 if (!SrcMI->getOperand(1).getReg().isVirtual())
676 return false;
677
678 if (MRI->getRegClass(SrcMI->getOperand(1).getReg()) ==
679 &AArch64::FPR128RegClass) {
680 break;
681 }
682 SrcMI = MRI->getUniqueVRegDef(SrcMI->getOperand(1).getReg());
683 }
684
685 Register DstReg = MI.getOperand(0).getReg();
686 Register SrcReg = SrcMI->getOperand(1).getReg();
687 MachineInstr *INSvilaneMI =
688 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opc), DstReg)
689 .add(MI.getOperand(1))
690 .add(MI.getOperand(2))
691 .addUse(SrcReg, getRegState(SrcMI->getOperand(1)))
692 .addImm(0);
693
694 LLVM_DEBUG(dbgs() << MI << " replace by:\n: " << *INSvilaneMI << "\n");
695 (void)INSvilaneMI;
696 MI.eraseFromParent();
697 return true;
698}
699
700// All instructions that set a FPR64 will implicitly zero the top bits of the
701// register. When the def is expressed as a COPY from a GPR, turn it into an
702// explicit FMOV so it cannot be elided later in further passes.
705 const AArch64InstrInfo *TII) {
706 if (!MI->getOperand(0).isReg() || !MI->getOperand(0).isDef())
707 return false;
708 const TargetRegisterClass *RC = MRI->getRegClass(MI->getOperand(0).getReg());
709 if (RC != &AArch64::FPR64RegClass)
710 return false;
711 if (MI->getOpcode() == TargetOpcode::COPY) {
712 MachineOperand &SrcOp = MI->getOperand(1);
713 if (!SrcOp.isReg())
714 return false;
715 if (SrcOp.getSubReg())
716 return false;
717 Register SrcReg = SrcOp.getReg();
718 auto IsGPR64Like = [&]() -> bool {
719 if (SrcReg.isVirtual())
720 return AArch64::GPR64allRegClass.hasSubClassEq(
721 MRI->getRegClass(SrcReg));
722 return AArch64::GPR64allRegClass.contains(SrcReg);
723 };
724 if (!IsGPR64Like())
725 return false;
726 assert(TII && "Expected InstrInfo when materializing COPYs");
727 // FMOVXDr insists on strict GPR64 operands, so fix up the COPY source.
728 MachineOperand &SrcMO = MI->getOperand(1);
729 bool SrcKill = SrcMO.isKill();
730 if (SrcReg.isVirtual()) {
731 if (MRI->getRegClass(SrcReg) != &AArch64::GPR64RegClass) {
732 // Pass the value through a temporary GPR64 vreg to satisfy the
733 // verifier.
734 Register NewSrc = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
735 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
736 TII->get(TargetOpcode::COPY), NewSrc)
737 .addReg(SrcReg, getKillRegState(SrcKill));
738 SrcReg = NewSrc;
739 SrcKill = true;
740 }
741 } else if (!AArch64::GPR64RegClass.contains(SrcReg)) {
742 return false;
743 }
744 SrcMO.setReg(SrcReg);
745 SrcMO.setSubReg(0);
746 SrcMO.setIsKill(SrcKill);
747 // Replace the COPY with an explicit FMOV so the zeroing behaviour stays
748 // visible.
749 MI->setDesc(TII->get(AArch64::FMOVXDr));
750 return true;
751 }
752 return MI->getOpcode() > TargetOpcode::GENERIC_OP_END;
753}
754
755bool AArch64MIPeepholeOptImpl::visitINSvi64lane(MachineInstr &MI) {
756 // Check the MI for low 64-bits sets zero for high 64-bits implicitly.
757 // We are expecting below case.
758 //
759 // %1:fpr64 = nofpexcept FCVTNv4i16 %0:fpr128, implicit $fpcr
760 // %6:fpr128 = IMPLICIT_DEF
761 // %5:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), killed %1:fpr64, %subreg.dsub
762 // %7:fpr128 = INSvi64lane %5:fpr128(tied-def 0), 1, killed %3:fpr128, 0
763 MachineInstr *Low64MI = MRI->getUniqueVRegDef(MI.getOperand(1).getReg());
764 if (Low64MI->getOpcode() != AArch64::INSERT_SUBREG)
765 return false;
766 Low64MI = MRI->getUniqueVRegDef(Low64MI->getOperand(2).getReg());
767 if (!Low64MI || !is64bitDefwithZeroHigh64bit(Low64MI, MRI, TII))
768 return false;
769
770 // Check there is `mov 0` MI for high 64-bits.
771 // We are expecting below cases.
772 //
773 // %2:fpr64 = MOVID 0
774 // %4:fpr128 = IMPLICIT_DEF
775 // %3:fpr128 = INSERT_SUBREG %4:fpr128(tied-def 0), killed %2:fpr64, %subreg.dsub
776 // %7:fpr128 = INSvi64lane %5:fpr128(tied-def 0), 1, killed %3:fpr128, 0
777 // or
778 // %5:fpr128 = MOVIv2d_ns 0
779 // %6:fpr64 = COPY %5.dsub:fpr128
780 // %8:fpr128 = IMPLICIT_DEF
781 // %7:fpr128 = INSERT_SUBREG %8:fpr128(tied-def 0), killed %6:fpr64, %subreg.dsub
782 // %11:fpr128 = INSvi64lane %9:fpr128(tied-def 0), 1, killed %7:fpr128, 0
783 MachineInstr *High64MI = MRI->getUniqueVRegDef(MI.getOperand(3).getReg());
784 if (!High64MI || High64MI->getOpcode() != AArch64::INSERT_SUBREG)
785 return false;
786 High64MI = MRI->getUniqueVRegDef(High64MI->getOperand(2).getReg());
787 if (High64MI && High64MI->getOpcode() == TargetOpcode::COPY)
788 High64MI = MRI->getUniqueVRegDef(High64MI->getOperand(1).getReg());
789 if (!High64MI || (High64MI->getOpcode() != AArch64::MOVID &&
790 High64MI->getOpcode() != AArch64::MOVIv2d_ns))
791 return false;
792 if (High64MI->getOperand(1).getImm() != 0)
793 return false;
794
795 // Let's remove MIs for high 64-bits.
796 Register OldDef = MI.getOperand(0).getReg();
797 Register NewDef = MI.getOperand(1).getReg();
798 MRI->constrainRegClass(NewDef, MRI->getRegClass(OldDef));
799 MRI->replaceRegWith(OldDef, NewDef);
800 MI.eraseFromParent();
801
802 return true;
803}
804
805bool AArch64MIPeepholeOptImpl::visitFMOVDr(MachineInstr &MI) {
806 // An FMOVDr sets the high 64-bits to zero implicitly, similar to ORR for GPR.
807 MachineInstr *Low64MI = MRI->getUniqueVRegDef(MI.getOperand(1).getReg());
808 if (!Low64MI || !is64bitDefwithZeroHigh64bit(Low64MI, MRI, TII))
809 return false;
810
811 // Let's remove MIs for high 64-bits.
812 Register OldDef = MI.getOperand(0).getReg();
813 Register NewDef = MI.getOperand(1).getReg();
814 LLVM_DEBUG(dbgs() << "Removing: " << MI << "\n");
815 MRI->clearKillFlags(OldDef);
816 MRI->clearKillFlags(NewDef);
817 MRI->constrainRegClass(NewDef, MRI->getRegClass(OldDef));
818 MRI->replaceRegWith(OldDef, NewDef);
819 MI.eraseFromParent();
820
821 return true;
822}
823
824bool AArch64MIPeepholeOptImpl::visitUBFMXri(MachineInstr &MI) {
825 // Check if the instruction is equivalent to a 32 bit LSR or LSL alias of
826 // UBFM, and replace the UBFMXri instruction with its 32 bit variant, UBFMWri.
827 int64_t Immr = MI.getOperand(2).getImm();
828 int64_t Imms = MI.getOperand(3).getImm();
829
830 bool IsLSR = Imms == 31 && Immr <= Imms;
831 bool IsLSL = Immr == Imms + 33;
832 if (!IsLSR && !IsLSL)
833 return false;
834
835 if (IsLSL) {
836 Immr -= 32;
837 }
838
839 const TargetRegisterClass *DstRC64 =
840 TII->getRegClass(TII->get(MI.getOpcode()), 0);
841 const TargetRegisterClass *DstRC32 =
842 TRI->getSubRegisterClass(DstRC64, AArch64::sub_32);
843 assert(DstRC32 && "Destination register class of UBFMXri doesn't have a "
844 "sub_32 subregister class");
845
846 const TargetRegisterClass *SrcRC64 =
847 TII->getRegClass(TII->get(MI.getOpcode()), 1);
848 const TargetRegisterClass *SrcRC32 =
849 TRI->getSubRegisterClass(SrcRC64, AArch64::sub_32);
850 assert(SrcRC32 && "Source register class of UBFMXri doesn't have a sub_32 "
851 "subregister class");
852
853 Register DstReg64 = MI.getOperand(0).getReg();
854 Register DstReg32 = MRI->createVirtualRegister(DstRC32);
855 Register SrcReg64 = MI.getOperand(1).getReg();
856 Register SrcReg32 = MRI->createVirtualRegister(SrcRC32);
857
858 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(AArch64::COPY),
859 SrcReg32)
860 .addReg(SrcReg64, {}, AArch64::sub_32);
861 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(AArch64::UBFMWri),
862 DstReg32)
863 .addReg(SrcReg32)
864 .addImm(Immr)
865 .addImm(Imms);
866 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
867 TII->get(AArch64::SUBREG_TO_REG), DstReg64)
868 .addReg(DstReg32)
869 .addImm(AArch64::sub_32);
870 MI.eraseFromParent();
871 return true;
872}
873
874// Across a basic-block we might have in i32 extract from a value that only
875// operates on upper bits (for example a sxtw). We can replace the COPY with a
876// new version skipping the sxtw.
877bool AArch64MIPeepholeOptImpl::visitCopy(MachineInstr &MI) {
878 Register InputReg = MI.getOperand(1).getReg();
879 if (MI.getOperand(1).getSubReg() != AArch64::sub_32 ||
880 !MRI->hasOneNonDBGUse(InputReg))
881 return false;
882
883 MachineInstr *SrcMI = MRI->getUniqueVRegDef(InputReg);
884 SmallPtrSet<MachineInstr *, 4> DeadInstrs;
885 DeadInstrs.insert(SrcMI);
886 while (SrcMI && SrcMI->isFullCopy() &&
887 MRI->hasOneNonDBGUse(SrcMI->getOperand(1).getReg())) {
888 SrcMI = MRI->getUniqueVRegDef(SrcMI->getOperand(1).getReg());
889 DeadInstrs.insert(SrcMI);
890 }
891
892 if (!SrcMI)
893 return false;
894
895 // Look for SXTW(X) and return Reg.
896 auto getSXTWSrcReg = [](MachineInstr *SrcMI) -> Register {
897 if (SrcMI->getOpcode() != AArch64::SBFMXri ||
898 SrcMI->getOperand(2).getImm() != 0 ||
899 SrcMI->getOperand(3).getImm() != 31)
900 return AArch64::NoRegister;
901 return SrcMI->getOperand(1).getReg();
902 };
903 // Look for SUBREG_TO_REG(ORRWrr(WZR, COPY(X.sub_32)))
904 auto getUXTWSrcReg = [&](MachineInstr *SrcMI) -> Register {
905 if (SrcMI->getOpcode() != AArch64::SUBREG_TO_REG ||
906 SrcMI->getOperand(2).getImm() != AArch64::sub_32 ||
907 !MRI->hasOneNonDBGUse(SrcMI->getOperand(1).getReg()))
908 return AArch64::NoRegister;
909 MachineInstr *Orr = MRI->getUniqueVRegDef(SrcMI->getOperand(1).getReg());
910 if (!Orr || Orr->getOpcode() != AArch64::ORRWrr ||
911 Orr->getOperand(1).getReg() != AArch64::WZR ||
912 !MRI->hasOneNonDBGUse(Orr->getOperand(2).getReg()))
913 return AArch64::NoRegister;
914 MachineInstr *Cpy = MRI->getUniqueVRegDef(Orr->getOperand(2).getReg());
915 if (!Cpy || Cpy->getOpcode() != AArch64::COPY ||
916 Cpy->getOperand(1).getSubReg() != AArch64::sub_32)
917 return AArch64::NoRegister;
918 DeadInstrs.insert(Orr);
919 return Cpy->getOperand(1).getReg();
920 };
921
922 Register SrcReg = getSXTWSrcReg(SrcMI);
923 if (!SrcReg)
924 SrcReg = getUXTWSrcReg(SrcMI);
925 if (!SrcReg)
926 return false;
927
928 MRI->constrainRegClass(SrcReg, MRI->getRegClass(InputReg));
929 LLVM_DEBUG(dbgs() << "Optimizing: " << MI);
930 MI.getOperand(1).setReg(SrcReg);
931 LLVM_DEBUG(dbgs() << " to: " << MI);
932 for (auto *DeadMI : DeadInstrs) {
933 LLVM_DEBUG(dbgs() << " Removing: " << *DeadMI);
934 DeadMI->eraseFromParent();
935 }
936 return true;
937}
938
939bool AArch64MIPeepholeOptImpl::run(MachineFunction &MF) {
940 TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
941 TRI = static_cast<const AArch64RegisterInfo *>(
942 MF.getSubtarget().getRegisterInfo());
943 MRI = &MF.getRegInfo();
944
945 assert(MRI->isSSA() && "Expected to be run on SSA form!");
946
947 bool Changed = false;
948
949 for (MachineBasicBlock &MBB : MF) {
950 for (MachineInstr &MI : make_early_inc_range(MBB)) {
951 switch (MI.getOpcode()) {
952 default:
953 break;
954 case AArch64::INSERT_SUBREG:
955 Changed |= visitINSERT(MI);
956 break;
957 case AArch64::ANDWrr:
958 Changed |= trySplitLogicalImm<uint32_t>(AArch64::ANDWri, MI,
959 SplitStrategy::Intersect);
960 break;
961 case AArch64::ANDXrr:
962 Changed |= trySplitLogicalImm<uint64_t>(AArch64::ANDXri, MI,
963 SplitStrategy::Intersect);
964 break;
965 case AArch64::ANDSWrr:
966 Changed |= trySplitLogicalImm<uint32_t>(
967 AArch64::ANDWri, MI, SplitStrategy::Intersect, AArch64::ANDSWri);
968 break;
969 case AArch64::ANDSXrr:
970 Changed |= trySplitLogicalImm<uint64_t>(
971 AArch64::ANDXri, MI, SplitStrategy::Intersect, AArch64::ANDSXri);
972 break;
973 case AArch64::EORWrr:
974 Changed |= trySplitLogicalImm<uint32_t>(AArch64::EORWri, MI,
975 SplitStrategy::Disjoint);
976 break;
977 case AArch64::EORXrr:
978 Changed |= trySplitLogicalImm<uint64_t>(AArch64::EORXri, MI,
979 SplitStrategy::Disjoint);
980 break;
981 case AArch64::ORRWrr:
982 Changed |= trySplitLogicalImm<uint32_t>(AArch64::ORRWri, MI,
983 SplitStrategy::Disjoint);
984 break;
985 case AArch64::ORRXrr:
986 Changed |= trySplitLogicalImm<uint64_t>(AArch64::ORRXri, MI,
987 SplitStrategy::Disjoint);
988 break;
989 case AArch64::ORRWrs:
990 Changed |= visitORR(MI);
991 break;
992 case AArch64::ADDWrr:
993 Changed |= visitADDSUB<uint32_t>(AArch64::ADDWri, AArch64::SUBWri, MI);
994 break;
995 case AArch64::SUBWrr:
996 Changed |= visitADDSUB<uint32_t>(AArch64::SUBWri, AArch64::ADDWri, MI);
997 break;
998 case AArch64::ADDXrr:
999 Changed |= visitADDSUB<uint64_t>(AArch64::ADDXri, AArch64::SUBXri, MI);
1000 break;
1001 case AArch64::SUBXrr:
1002 Changed |= visitADDSUB<uint64_t>(AArch64::SUBXri, AArch64::ADDXri, MI);
1003 break;
1004 case AArch64::ADDSWrr:
1005 Changed |=
1006 visitADDSSUBS<uint32_t>({AArch64::ADDWri, AArch64::ADDSWri},
1007 {AArch64::SUBWri, AArch64::SUBSWri}, MI);
1008 break;
1009 case AArch64::SUBSWrr:
1010 Changed |=
1011 visitADDSSUBS<uint32_t>({AArch64::SUBWri, AArch64::SUBSWri},
1012 {AArch64::ADDWri, AArch64::ADDSWri}, MI);
1013 break;
1014 case AArch64::ADDSXrr:
1015 Changed |=
1016 visitADDSSUBS<uint64_t>({AArch64::ADDXri, AArch64::ADDSXri},
1017 {AArch64::SUBXri, AArch64::SUBSXri}, MI);
1018 break;
1019 case AArch64::SUBSXrr:
1020 Changed |=
1021 visitADDSSUBS<uint64_t>({AArch64::SUBXri, AArch64::SUBSXri},
1022 {AArch64::ADDXri, AArch64::ADDSXri}, MI);
1023 break;
1024 case AArch64::CSELWr:
1025 case AArch64::CSELXr:
1026 Changed |= visitCSEL(MI);
1027 break;
1028 case AArch64::INSvi64gpr:
1029 Changed |= visitINSviGPR(MI, AArch64::INSvi64lane);
1030 break;
1031 case AArch64::INSvi32gpr:
1032 Changed |= visitINSviGPR(MI, AArch64::INSvi32lane);
1033 break;
1034 case AArch64::INSvi16gpr:
1035 Changed |= visitINSviGPR(MI, AArch64::INSvi16lane);
1036 break;
1037 case AArch64::INSvi8gpr:
1038 Changed |= visitINSviGPR(MI, AArch64::INSvi8lane);
1039 break;
1040 case AArch64::INSvi64lane:
1041 Changed |= visitINSvi64lane(MI);
1042 break;
1043 case AArch64::FMOVDr:
1044 Changed |= visitFMOVDr(MI);
1045 break;
1046 case AArch64::UBFMXri:
1047 Changed |= visitUBFMXri(MI);
1048 break;
1049 case AArch64::COPY:
1050 Changed |= visitCopy(MI);
1051 break;
1052 }
1053 }
1054 }
1055
1056 return Changed;
1057}
1058
1059bool AArch64MIPeepholeOptLegacy::runOnMachineFunction(MachineFunction &MF) {
1060 if (skipFunction(MF.getFunction()))
1061 return false;
1062
1063 MachineLoopInfo &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
1064 return AArch64MIPeepholeOptImpl(MLI).run(MF);
1065}
1066
1068 return new AArch64MIPeepholeOptLegacy();
1069}
1070
1075 const bool Changed = AArch64MIPeepholeOptImpl(MLI).run(MF);
1076 if (!Changed)
1077 return PreservedAnalyses::all();
1080 return PA;
1081}
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
static bool is64bitDefwithZeroHigh64bit(MachineInstr *MI, MachineRegisterInfo *MRI, const AArch64InstrInfo *TII)
static bool splitDisjointBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc)
unsigned HighestBitSet
unsigned T T & Imm2Enc
unsigned T & Imm1Enc
unsigned RegSize
static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
unsigned LowestBitSet
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
#define OP(OPC)
Definition Instruction.h:46
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:487
#define LLVM_DEBUG(...)
Definition Debug.h:114
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool isFullCopy() const
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Analysis pass that exposes the MachineLoopInfo for a machine function.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
int64_t getImm() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI void clearKillFlags(Register Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
bool hasOneUse(Register RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
LLVM_ABI const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
LLVM_ABI void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
LLVM_ABI MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Register getReg() const
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Changed
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr RegState getKillRegState(bool B)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:293
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
FunctionPass * createAArch64MIPeepholeOptLegacyPass()
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
std::optional< UsedNZCV > examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > *CCUseInstrs=nullptr)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
RegState getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.