File: | lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp |
Warning: | line 937, column 9 Value stored to 'Imms' during its initialization is never read |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains a pass that performs load / store related peephole |
10 | // optimizations. This pass should be run after register allocation. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "AArch64InstrInfo.h" |
15 | #include "AArch64Subtarget.h" |
16 | #include "MCTargetDesc/AArch64AddressingModes.h" |
17 | #include "llvm/ADT/BitVector.h" |
18 | #include "llvm/ADT/SmallVector.h" |
19 | #include "llvm/ADT/Statistic.h" |
20 | #include "llvm/ADT/StringRef.h" |
21 | #include "llvm/ADT/iterator_range.h" |
22 | #include "llvm/Analysis/AliasAnalysis.h" |
23 | #include "llvm/CodeGen/MachineBasicBlock.h" |
24 | #include "llvm/CodeGen/MachineFunction.h" |
25 | #include "llvm/CodeGen/MachineFunctionPass.h" |
26 | #include "llvm/CodeGen/MachineInstr.h" |
27 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
28 | #include "llvm/CodeGen/MachineOperand.h" |
29 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
30 | #include "llvm/IR/DebugLoc.h" |
31 | #include "llvm/MC/MCRegisterInfo.h" |
32 | #include "llvm/Pass.h" |
33 | #include "llvm/Support/CommandLine.h" |
34 | #include "llvm/Support/Debug.h" |
35 | #include "llvm/Support/ErrorHandling.h" |
36 | #include "llvm/Support/raw_ostream.h" |
37 | #include <cassert> |
38 | #include <cstdint> |
39 | #include <iterator> |
40 | #include <limits> |
41 | |
42 | using namespace llvm; |
43 | |
44 | #define DEBUG_TYPE"aarch64-ldst-opt" "aarch64-ldst-opt" |
45 | |
46 | STATISTIC(NumPairCreated, "Number of load/store pair instructions generated")static llvm::Statistic NumPairCreated = {"aarch64-ldst-opt", "NumPairCreated" , "Number of load/store pair instructions generated", {0}, {false }}; |
47 | STATISTIC(NumPostFolded, "Number of post-index updates folded")static llvm::Statistic NumPostFolded = {"aarch64-ldst-opt", "NumPostFolded" , "Number of post-index updates folded", {0}, {false}}; |
48 | STATISTIC(NumPreFolded, "Number of pre-index updates folded")static llvm::Statistic NumPreFolded = {"aarch64-ldst-opt", "NumPreFolded" , "Number of pre-index updates folded", {0}, {false}}; |
49 | STATISTIC(NumUnscaledPairCreated,static llvm::Statistic NumUnscaledPairCreated = {"aarch64-ldst-opt" , "NumUnscaledPairCreated", "Number of load/store from unscaled generated" , {0}, {false}} |
50 | "Number of load/store from unscaled generated")static llvm::Statistic NumUnscaledPairCreated = {"aarch64-ldst-opt" , "NumUnscaledPairCreated", "Number of load/store from unscaled generated" , {0}, {false}}; |
51 | STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted")static llvm::Statistic NumZeroStoresPromoted = {"aarch64-ldst-opt" , "NumZeroStoresPromoted", "Number of narrow zero stores promoted" , {0}, {false}}; |
52 | STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted")static llvm::Statistic NumLoadsFromStoresPromoted = {"aarch64-ldst-opt" , "NumLoadsFromStoresPromoted", "Number of loads from stores promoted" , {0}, {false}}; |
53 | |
54 | // The LdStLimit limits how far we search for load/store pairs. |
55 | static cl::opt<unsigned> LdStLimit("aarch64-load-store-scan-limit", |
56 | cl::init(20), cl::Hidden); |
57 | |
58 | // The UpdateLimit limits how far we search for update instructions when we form |
59 | // pre-/post-index instructions. |
60 | static cl::opt<unsigned> UpdateLimit("aarch64-update-scan-limit", cl::init(100), |
61 | cl::Hidden); |
62 | |
63 | #define AARCH64_LOAD_STORE_OPT_NAME"AArch64 load / store optimization pass" "AArch64 load / store optimization pass" |
64 | |
65 | namespace { |
66 | |
67 | using LdStPairFlags = struct LdStPairFlags { |
68 | // If a matching instruction is found, MergeForward is set to true if the |
69 | // merge is to remove the first instruction and replace the second with |
70 | // a pair-wise insn, and false if the reverse is true. |
71 | bool MergeForward = false; |
72 | |
73 | // SExtIdx gives the index of the result of the load pair that must be |
74 | // extended. The value of SExtIdx assumes that the paired load produces the |
75 | // value in this order: (I, returned iterator), i.e., -1 means no value has |
76 | // to be extended, 0 means I, and 1 means the returned iterator. |
77 | int SExtIdx = -1; |
78 | |
79 | LdStPairFlags() = default; |
80 | |
81 | void setMergeForward(bool V = true) { MergeForward = V; } |
82 | bool getMergeForward() const { return MergeForward; } |
83 | |
84 | void setSExtIdx(int V) { SExtIdx = V; } |
85 | int getSExtIdx() const { return SExtIdx; } |
86 | }; |
87 | |
88 | struct AArch64LoadStoreOpt : public MachineFunctionPass { |
89 | static char ID; |
90 | |
91 | AArch64LoadStoreOpt() : MachineFunctionPass(ID) { |
92 | initializeAArch64LoadStoreOptPass(*PassRegistry::getPassRegistry()); |
93 | } |
94 | |
95 | AliasAnalysis *AA; |
96 | const AArch64InstrInfo *TII; |
97 | const TargetRegisterInfo *TRI; |
98 | const AArch64Subtarget *Subtarget; |
99 | |
100 | // Track which register units have been modified and used. |
101 | LiveRegUnits ModifiedRegUnits, UsedRegUnits; |
102 | |
103 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
104 | AU.addRequired<AAResultsWrapperPass>(); |
105 | MachineFunctionPass::getAnalysisUsage(AU); |
106 | } |
107 | |
108 | // Scan the instructions looking for a load/store that can be combined |
109 | // with the current instruction into a load/store pair. |
110 | // Return the matching instruction if one is found, else MBB->end(). |
111 | MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I, |
112 | LdStPairFlags &Flags, |
113 | unsigned Limit, |
114 | bool FindNarrowMerge); |
115 | |
116 | // Scan the instructions looking for a store that writes to the address from |
117 | // which the current load instruction reads. Return true if one is found. |
118 | bool findMatchingStore(MachineBasicBlock::iterator I, unsigned Limit, |
119 | MachineBasicBlock::iterator &StoreI); |
120 | |
121 | // Merge the two instructions indicated into a wider narrow store instruction. |
122 | MachineBasicBlock::iterator |
123 | mergeNarrowZeroStores(MachineBasicBlock::iterator I, |
124 | MachineBasicBlock::iterator MergeMI, |
125 | const LdStPairFlags &Flags); |
126 | |
127 | // Merge the two instructions indicated into a single pair-wise instruction. |
128 | MachineBasicBlock::iterator |
129 | mergePairedInsns(MachineBasicBlock::iterator I, |
130 | MachineBasicBlock::iterator Paired, |
131 | const LdStPairFlags &Flags); |
132 | |
133 | // Promote the load that reads directly from the address stored to. |
134 | MachineBasicBlock::iterator |
135 | promoteLoadFromStore(MachineBasicBlock::iterator LoadI, |
136 | MachineBasicBlock::iterator StoreI); |
137 | |
138 | // Scan the instruction list to find a base register update that can |
139 | // be combined with the current instruction (a load or store) using |
140 | // pre or post indexed addressing with writeback. Scan forwards. |
141 | MachineBasicBlock::iterator |
142 | findMatchingUpdateInsnForward(MachineBasicBlock::iterator I, |
143 | int UnscaledOffset, unsigned Limit); |
144 | |
145 | // Scan the instruction list to find a base register update that can |
146 | // be combined with the current instruction (a load or store) using |
147 | // pre or post indexed addressing with writeback. Scan backwards. |
148 | MachineBasicBlock::iterator |
149 | findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit); |
150 | |
151 | // Find an instruction that updates the base register of the ld/st |
152 | // instruction. |
153 | bool isMatchingUpdateInsn(MachineInstr &MemMI, MachineInstr &MI, |
154 | unsigned BaseReg, int Offset); |
155 | |
156 | // Merge a pre- or post-index base register update into a ld/st instruction. |
157 | MachineBasicBlock::iterator |
158 | mergeUpdateInsn(MachineBasicBlock::iterator I, |
159 | MachineBasicBlock::iterator Update, bool IsPreIdx); |
160 | |
161 | // Find and merge zero store instructions. |
162 | bool tryToMergeZeroStInst(MachineBasicBlock::iterator &MBBI); |
163 | |
164 | // Find and pair ldr/str instructions. |
165 | bool tryToPairLdStInst(MachineBasicBlock::iterator &MBBI); |
166 | |
167 | // Find and promote load instructions which read directly from store. |
168 | bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI); |
169 | |
170 | // Find and merge a base register updates before or after a ld/st instruction. |
171 | bool tryToMergeLdStUpdate(MachineBasicBlock::iterator &MBBI); |
172 | |
173 | bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt); |
174 | |
175 | bool runOnMachineFunction(MachineFunction &Fn) override; |
176 | |
177 | MachineFunctionProperties getRequiredProperties() const override { |
178 | return MachineFunctionProperties().set( |
179 | MachineFunctionProperties::Property::NoVRegs); |
180 | } |
181 | |
182 | StringRef getPassName() const override { return AARCH64_LOAD_STORE_OPT_NAME"AArch64 load / store optimization pass"; } |
183 | }; |
184 | |
185 | char AArch64LoadStoreOpt::ID = 0; |
186 | |
187 | } // end anonymous namespace |
188 | |
189 | INITIALIZE_PASS(AArch64LoadStoreOpt, "aarch64-ldst-opt",static void *initializeAArch64LoadStoreOptPassOnce(PassRegistry &Registry) { PassInfo *PI = new PassInfo( "AArch64 load / store optimization pass" , "aarch64-ldst-opt", &AArch64LoadStoreOpt::ID, PassInfo:: NormalCtor_t(callDefaultCtor<AArch64LoadStoreOpt>), false , false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeAArch64LoadStoreOptPassFlag; void llvm ::initializeAArch64LoadStoreOptPass(PassRegistry &Registry ) { llvm::call_once(InitializeAArch64LoadStoreOptPassFlag, initializeAArch64LoadStoreOptPassOnce , std::ref(Registry)); } |
190 | AARCH64_LOAD_STORE_OPT_NAME, false, false)static void *initializeAArch64LoadStoreOptPassOnce(PassRegistry &Registry) { PassInfo *PI = new PassInfo( "AArch64 load / store optimization pass" , "aarch64-ldst-opt", &AArch64LoadStoreOpt::ID, PassInfo:: NormalCtor_t(callDefaultCtor<AArch64LoadStoreOpt>), false , false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeAArch64LoadStoreOptPassFlag; void llvm ::initializeAArch64LoadStoreOptPass(PassRegistry &Registry ) { llvm::call_once(InitializeAArch64LoadStoreOptPassFlag, initializeAArch64LoadStoreOptPassOnce , std::ref(Registry)); } |
191 | |
192 | static bool isNarrowStore(unsigned Opc) { |
193 | switch (Opc) { |
194 | default: |
195 | return false; |
196 | case AArch64::STRBBui: |
197 | case AArch64::STURBBi: |
198 | case AArch64::STRHHui: |
199 | case AArch64::STURHHi: |
200 | return true; |
201 | } |
202 | } |
203 | |
204 | // Scaling factor for unscaled load or store. |
205 | static int getMemScale(MachineInstr &MI) { |
206 | switch (MI.getOpcode()) { |
207 | default: |
208 | llvm_unreachable("Opcode has unknown scale!")::llvm::llvm_unreachable_internal("Opcode has unknown scale!" , "/build/llvm-toolchain-snapshot-9~svn359999/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 208); |
209 | case AArch64::LDRBBui: |
210 | case AArch64::LDURBBi: |
211 | case AArch64::LDRSBWui: |
212 | case AArch64::LDURSBWi: |
213 | case AArch64::STRBBui: |
214 | case AArch64::STURBBi: |
215 | return 1; |
216 | case AArch64::LDRHHui: |
217 | case AArch64::LDURHHi: |
218 | case AArch64::LDRSHWui: |
219 | case AArch64::LDURSHWi: |
220 | case AArch64::STRHHui: |
221 | case AArch64::STURHHi: |
222 | return 2; |
223 | case AArch64::LDRSui: |
224 | case AArch64::LDURSi: |
225 | case AArch64::LDRSWui: |
226 | case AArch64::LDURSWi: |
227 | case AArch64::LDRWui: |
228 | case AArch64::LDURWi: |
229 | case AArch64::STRSui: |
230 | case AArch64::STURSi: |
231 | case AArch64::STRWui: |
232 | case AArch64::STURWi: |
233 | case AArch64::LDPSi: |
234 | case AArch64::LDPSWi: |
235 | case AArch64::LDPWi: |
236 | case AArch64::STPSi: |
237 | case AArch64::STPWi: |
238 | return 4; |
239 | case AArch64::LDRDui: |
240 | case AArch64::LDURDi: |
241 | case AArch64::LDRXui: |
242 | case AArch64::LDURXi: |
243 | case AArch64::STRDui: |
244 | case AArch64::STURDi: |
245 | case AArch64::STRXui: |
246 | case AArch64::STURXi: |
247 | case AArch64::LDPDi: |
248 | case AArch64::LDPXi: |
249 | case AArch64::STPDi: |
250 | case AArch64::STPXi: |
251 | return 8; |
252 | case AArch64::LDRQui: |
253 | case AArch64::LDURQi: |
254 | case AArch64::STRQui: |
255 | case AArch64::STURQi: |
256 | case AArch64::LDPQi: |
257 | case AArch64::STPQi: |
258 | return 16; |
259 | } |
260 | } |
261 | |
262 | static unsigned getMatchingNonSExtOpcode(unsigned Opc, |
263 | bool *IsValidLdStrOpc = nullptr) { |
264 | if (IsValidLdStrOpc) |
265 | *IsValidLdStrOpc = true; |
266 | switch (Opc) { |
267 | default: |
268 | if (IsValidLdStrOpc) |
269 | *IsValidLdStrOpc = false; |
270 | return std::numeric_limits<unsigned>::max(); |
271 | case AArch64::STRDui: |
272 | case AArch64::STURDi: |
273 | case AArch64::STRQui: |
274 | case AArch64::STURQi: |
275 | case AArch64::STRBBui: |
276 | case AArch64::STURBBi: |
277 | case AArch64::STRHHui: |
278 | case AArch64::STURHHi: |
279 | case AArch64::STRWui: |
280 | case AArch64::STURWi: |
281 | case AArch64::STRXui: |
282 | case AArch64::STURXi: |
283 | case AArch64::LDRDui: |
284 | case AArch64::LDURDi: |
285 | case AArch64::LDRQui: |
286 | case AArch64::LDURQi: |
287 | case AArch64::LDRWui: |
288 | case AArch64::LDURWi: |
289 | case AArch64::LDRXui: |
290 | case AArch64::LDURXi: |
291 | case AArch64::STRSui: |
292 | case AArch64::STURSi: |
293 | case AArch64::LDRSui: |
294 | case AArch64::LDURSi: |
295 | return Opc; |
296 | case AArch64::LDRSWui: |
297 | return AArch64::LDRWui; |
298 | case AArch64::LDURSWi: |
299 | return AArch64::LDURWi; |
300 | } |
301 | } |
302 | |
303 | static unsigned getMatchingWideOpcode(unsigned Opc) { |
304 | switch (Opc) { |
305 | default: |
306 | llvm_unreachable("Opcode has no wide equivalent!")::llvm::llvm_unreachable_internal("Opcode has no wide equivalent!" , "/build/llvm-toolchain-snapshot-9~svn359999/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 306); |
307 | case AArch64::STRBBui: |
308 | return AArch64::STRHHui; |
309 | case AArch64::STRHHui: |
310 | return AArch64::STRWui; |
311 | case AArch64::STURBBi: |
312 | return AArch64::STURHHi; |
313 | case AArch64::STURHHi: |
314 | return AArch64::STURWi; |
315 | case AArch64::STURWi: |
316 | return AArch64::STURXi; |
317 | case AArch64::STRWui: |
318 | return AArch64::STRXui; |
319 | } |
320 | } |
321 | |
322 | static unsigned getMatchingPairOpcode(unsigned Opc) { |
323 | switch (Opc) { |
324 | default: |
325 | llvm_unreachable("Opcode has no pairwise equivalent!")::llvm::llvm_unreachable_internal("Opcode has no pairwise equivalent!" , "/build/llvm-toolchain-snapshot-9~svn359999/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 325); |
326 | case AArch64::STRSui: |
327 | case AArch64::STURSi: |
328 | return AArch64::STPSi; |
329 | case AArch64::STRDui: |
330 | case AArch64::STURDi: |
331 | return AArch64::STPDi; |
332 | case AArch64::STRQui: |
333 | case AArch64::STURQi: |
334 | return AArch64::STPQi; |
335 | case AArch64::STRWui: |
336 | case AArch64::STURWi: |
337 | return AArch64::STPWi; |
338 | case AArch64::STRXui: |
339 | case AArch64::STURXi: |
340 | return AArch64::STPXi; |
341 | case AArch64::LDRSui: |
342 | case AArch64::LDURSi: |
343 | return AArch64::LDPSi; |
344 | case AArch64::LDRDui: |
345 | case AArch64::LDURDi: |
346 | return AArch64::LDPDi; |
347 | case AArch64::LDRQui: |
348 | case AArch64::LDURQi: |
349 | return AArch64::LDPQi; |
350 | case AArch64::LDRWui: |
351 | case AArch64::LDURWi: |
352 | return AArch64::LDPWi; |
353 | case AArch64::LDRXui: |
354 | case AArch64::LDURXi: |
355 | return AArch64::LDPXi; |
356 | case AArch64::LDRSWui: |
357 | case AArch64::LDURSWi: |
358 | return AArch64::LDPSWi; |
359 | } |
360 | } |
361 | |
362 | static unsigned isMatchingStore(MachineInstr &LoadInst, |
363 | MachineInstr &StoreInst) { |
364 | unsigned LdOpc = LoadInst.getOpcode(); |
365 | unsigned StOpc = StoreInst.getOpcode(); |
366 | switch (LdOpc) { |
367 | default: |
368 | llvm_unreachable("Unsupported load instruction!")::llvm::llvm_unreachable_internal("Unsupported load instruction!" , "/build/llvm-toolchain-snapshot-9~svn359999/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 368); |
369 | case AArch64::LDRBBui: |
370 | return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui || |
371 | StOpc == AArch64::STRWui || StOpc == AArch64::STRXui; |
372 | case AArch64::LDURBBi: |
373 | return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi || |
374 | StOpc == AArch64::STURWi || StOpc == AArch64::STURXi; |
375 | case AArch64::LDRHHui: |
376 | return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui || |
377 | StOpc == AArch64::STRXui; |
378 | case AArch64::LDURHHi: |
379 | return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi || |
380 | StOpc == AArch64::STURXi; |
381 | case AArch64::LDRWui: |
382 | return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui; |
383 | case AArch64::LDURWi: |
384 | return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi; |
385 | case AArch64::LDRXui: |
386 | return StOpc == AArch64::STRXui; |
387 | case AArch64::LDURXi: |
388 | return StOpc == AArch64::STURXi; |
389 | } |
390 | } |
391 | |
392 | static unsigned getPreIndexedOpcode(unsigned Opc) { |
393 | // FIXME: We don't currently support creating pre-indexed loads/stores when |
394 | // the load or store is the unscaled version. If we decide to perform such an |
395 | // optimization in the future the cases for the unscaled loads/stores will |
396 | // need to be added here. |
397 | switch (Opc) { |
398 | default: |
399 | llvm_unreachable("Opcode has no pre-indexed equivalent!")::llvm::llvm_unreachable_internal("Opcode has no pre-indexed equivalent!" , "/build/llvm-toolchain-snapshot-9~svn359999/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 399); |
400 | case AArch64::STRSui: |
401 | return AArch64::STRSpre; |
402 | case AArch64::STRDui: |
403 | return AArch64::STRDpre; |
404 | case AArch64::STRQui: |
405 | return AArch64::STRQpre; |
406 | case AArch64::STRBBui: |
407 | return AArch64::STRBBpre; |
408 | case AArch64::STRHHui: |
409 | return AArch64::STRHHpre; |
410 | case AArch64::STRWui: |
411 | return AArch64::STRWpre; |
412 | case AArch64::STRXui: |
413 | return AArch64::STRXpre; |
414 | case AArch64::LDRSui: |
415 | return AArch64::LDRSpre; |
416 | case AArch64::LDRDui: |
417 | return AArch64::LDRDpre; |
418 | case AArch64::LDRQui: |
419 | return AArch64::LDRQpre; |
420 | case AArch64::LDRBBui: |
421 | return AArch64::LDRBBpre; |
422 | case AArch64::LDRHHui: |
423 | return AArch64::LDRHHpre; |
424 | case AArch64::LDRWui: |
425 | return AArch64::LDRWpre; |
426 | case AArch64::LDRXui: |
427 | return AArch64::LDRXpre; |
428 | case AArch64::LDRSWui: |
429 | return AArch64::LDRSWpre; |
430 | case AArch64::LDPSi: |
431 | return AArch64::LDPSpre; |
432 | case AArch64::LDPSWi: |
433 | return AArch64::LDPSWpre; |
434 | case AArch64::LDPDi: |
435 | return AArch64::LDPDpre; |
436 | case AArch64::LDPQi: |
437 | return AArch64::LDPQpre; |
438 | case AArch64::LDPWi: |
439 | return AArch64::LDPWpre; |
440 | case AArch64::LDPXi: |
441 | return AArch64::LDPXpre; |
442 | case AArch64::STPSi: |
443 | return AArch64::STPSpre; |
444 | case AArch64::STPDi: |
445 | return AArch64::STPDpre; |
446 | case AArch64::STPQi: |
447 | return AArch64::STPQpre; |
448 | case AArch64::STPWi: |
449 | return AArch64::STPWpre; |
450 | case AArch64::STPXi: |
451 | return AArch64::STPXpre; |
452 | } |
453 | } |
454 | |
455 | static unsigned getPostIndexedOpcode(unsigned Opc) { |
456 | switch (Opc) { |
457 | default: |
458 | llvm_unreachable("Opcode has no post-indexed wise equivalent!")::llvm::llvm_unreachable_internal("Opcode has no post-indexed wise equivalent!" , "/build/llvm-toolchain-snapshot-9~svn359999/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 458); |
459 | case AArch64::STRSui: |
460 | case AArch64::STURSi: |
461 | return AArch64::STRSpost; |
462 | case AArch64::STRDui: |
463 | case AArch64::STURDi: |
464 | return AArch64::STRDpost; |
465 | case AArch64::STRQui: |
466 | case AArch64::STURQi: |
467 | return AArch64::STRQpost; |
468 | case AArch64::STRBBui: |
469 | return AArch64::STRBBpost; |
470 | case AArch64::STRHHui: |
471 | return AArch64::STRHHpost; |
472 | case AArch64::STRWui: |
473 | case AArch64::STURWi: |
474 | return AArch64::STRWpost; |
475 | case AArch64::STRXui: |
476 | case AArch64::STURXi: |
477 | return AArch64::STRXpost; |
478 | case AArch64::LDRSui: |
479 | case AArch64::LDURSi: |
480 | return AArch64::LDRSpost; |
481 | case AArch64::LDRDui: |
482 | case AArch64::LDURDi: |
483 | return AArch64::LDRDpost; |
484 | case AArch64::LDRQui: |
485 | case AArch64::LDURQi: |
486 | return AArch64::LDRQpost; |
487 | case AArch64::LDRBBui: |
488 | return AArch64::LDRBBpost; |
489 | case AArch64::LDRHHui: |
490 | return AArch64::LDRHHpost; |
491 | case AArch64::LDRWui: |
492 | case AArch64::LDURWi: |
493 | return AArch64::LDRWpost; |
494 | case AArch64::LDRXui: |
495 | case AArch64::LDURXi: |
496 | return AArch64::LDRXpost; |
497 | case AArch64::LDRSWui: |
498 | return AArch64::LDRSWpost; |
499 | case AArch64::LDPSi: |
500 | return AArch64::LDPSpost; |
501 | case AArch64::LDPSWi: |
502 | return AArch64::LDPSWpost; |
503 | case AArch64::LDPDi: |
504 | return AArch64::LDPDpost; |
505 | case AArch64::LDPQi: |
506 | return AArch64::LDPQpost; |
507 | case AArch64::LDPWi: |
508 | return AArch64::LDPWpost; |
509 | case AArch64::LDPXi: |
510 | return AArch64::LDPXpost; |
511 | case AArch64::STPSi: |
512 | return AArch64::STPSpost; |
513 | case AArch64::STPDi: |
514 | return AArch64::STPDpost; |
515 | case AArch64::STPQi: |
516 | return AArch64::STPQpost; |
517 | case AArch64::STPWi: |
518 | return AArch64::STPWpost; |
519 | case AArch64::STPXi: |
520 | return AArch64::STPXpost; |
521 | } |
522 | } |
523 | |
524 | static bool isPairedLdSt(const MachineInstr &MI) { |
525 | switch (MI.getOpcode()) { |
526 | default: |
527 | return false; |
528 | case AArch64::LDPSi: |
529 | case AArch64::LDPSWi: |
530 | case AArch64::LDPDi: |
531 | case AArch64::LDPQi: |
532 | case AArch64::LDPWi: |
533 | case AArch64::LDPXi: |
534 | case AArch64::STPSi: |
535 | case AArch64::STPDi: |
536 | case AArch64::STPQi: |
537 | case AArch64::STPWi: |
538 | case AArch64::STPXi: |
539 | return true; |
540 | } |
541 | } |
542 | |
543 | static const MachineOperand &getLdStRegOp(const MachineInstr &MI, |
544 | unsigned PairedRegOp = 0) { |
545 | assert(PairedRegOp < 2 && "Unexpected register operand idx.")((PairedRegOp < 2 && "Unexpected register operand idx." ) ? static_cast<void> (0) : __assert_fail ("PairedRegOp < 2 && \"Unexpected register operand idx.\"" , "/build/llvm-toolchain-snapshot-9~svn359999/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 545, __PRETTY_FUNCTION__)); |
546 | unsigned Idx = isPairedLdSt(MI) ? PairedRegOp : 0; |
547 | return MI.getOperand(Idx); |
548 | } |
549 | |
550 | static const MachineOperand &getLdStBaseOp(const MachineInstr &MI) { |
551 | unsigned Idx = isPairedLdSt(MI) ? 2 : 1; |
552 | return MI.getOperand(Idx); |
553 | } |
554 | |
555 | static const MachineOperand &getLdStOffsetOp(const MachineInstr &MI) { |
556 | unsigned Idx = isPairedLdSt(MI) ? 3 : 2; |
557 | return MI.getOperand(Idx); |
558 | } |
559 | |
560 | static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst, |
561 | MachineInstr &StoreInst, |
562 | const AArch64InstrInfo *TII) { |
563 | assert(isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st.")((isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st." ) ? static_cast<void> (0) : __assert_fail ("isMatchingStore(LoadInst, StoreInst) && \"Expect only matched ld/st.\"" , "/build/llvm-toolchain-snapshot-9~svn359999/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 563, __PRETTY_FUNCTION__)); |
564 | int LoadSize = getMemScale(LoadInst); |
565 | int StoreSize = getMemScale(StoreInst); |
566 | int UnscaledStOffset = TII->isUnscaledLdSt(StoreInst) |
567 | ? getLdStOffsetOp(StoreInst).getImm() |
568 | : getLdStOffsetOp(StoreInst).getImm() * StoreSize; |
569 | int UnscaledLdOffset = TII->isUnscaledLdSt(LoadInst) |
570 | ? getLdStOffsetOp(LoadInst).getImm() |
571 | : getLdStOffsetOp(LoadInst).getImm() * LoadSize; |
572 | return (UnscaledStOffset <= UnscaledLdOffset) && |
573 | (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize)); |
574 | } |
575 | |
576 | static bool isPromotableZeroStoreInst(MachineInstr &MI) { |
577 | unsigned Opc = MI.getOpcode(); |
578 | return (Opc == AArch64::STRWui || Opc == AArch64::STURWi || |
579 | isNarrowStore(Opc)) && |
580 | getLdStRegOp(MI).getReg() == AArch64::WZR; |
581 | } |
582 | |
583 | static bool isPromotableLoadFromStore(MachineInstr &MI) { |
584 | switch (MI.getOpcode()) { |
585 | default: |
586 | return false; |
587 | // Scaled instructions. |
588 | case AArch64::LDRBBui: |
589 | case AArch64::LDRHHui: |
590 | case AArch64::LDRWui: |
591 | case AArch64::LDRXui: |
592 | // Unscaled instructions. |
593 | case AArch64::LDURBBi: |
594 | case AArch64::LDURHHi: |
595 | case AArch64::LDURWi: |
596 | case AArch64::LDURXi: |
597 | return true; |
598 | } |
599 | } |
600 | |
601 | static bool isMergeableLdStUpdate(MachineInstr &MI) { |
602 | unsigned Opc = MI.getOpcode(); |
603 | switch (Opc) { |
604 | default: |
605 | return false; |
606 | // Scaled instructions. |
607 | case AArch64::STRSui: |
608 | case AArch64::STRDui: |
609 | case AArch64::STRQui: |
610 | case AArch64::STRXui: |
611 | case AArch64::STRWui: |
612 | case AArch64::STRHHui: |
613 | case AArch64::STRBBui: |
614 | case AArch64::LDRSui: |
615 | case AArch64::LDRDui: |
616 | case AArch64::LDRQui: |
617 | case AArch64::LDRXui: |
618 | case AArch64::LDRWui: |
619 | case AArch64::LDRHHui: |
620 | case AArch64::LDRBBui: |
621 | // Unscaled instructions. |
622 | case AArch64::STURSi: |
623 | case AArch64::STURDi: |
624 | case AArch64::STURQi: |
625 | case AArch64::STURWi: |
626 | case AArch64::STURXi: |
627 | case AArch64::LDURSi: |
628 | case AArch64::LDURDi: |
629 | case AArch64::LDURQi: |
630 | case AArch64::LDURWi: |
631 | case AArch64::LDURXi: |
632 | // Paired instructions. |
633 | case AArch64::LDPSi: |
634 | case AArch64::LDPSWi: |
635 | case AArch64::LDPDi: |
636 | case AArch64::LDPQi: |
637 | case AArch64::LDPWi: |
638 | case AArch64::LDPXi: |
639 | case AArch64::STPSi: |
640 | case AArch64::STPDi: |
641 | case AArch64::STPQi: |
642 | case AArch64::STPWi: |
643 | case AArch64::STPXi: |
644 | // Make sure this is a reg+imm (as opposed to an address reloc). |
645 | if (!getLdStOffsetOp(MI).isImm()) |
646 | return false; |
647 | |
648 | return true; |
649 | } |
650 | } |
651 | |
652 | MachineBasicBlock::iterator |
653 | AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I, |
654 | MachineBasicBlock::iterator MergeMI, |
655 | const LdStPairFlags &Flags) { |
656 | assert(isPromotableZeroStoreInst(*I) && isPromotableZeroStoreInst(*MergeMI) &&((isPromotableZeroStoreInst(*I) && isPromotableZeroStoreInst (*MergeMI) && "Expected promotable zero stores.") ? static_cast <void> (0) : __assert_fail ("isPromotableZeroStoreInst(*I) && isPromotableZeroStoreInst(*MergeMI) && \"Expected promotable zero stores.\"" , "/build/llvm-toolchain-snapshot-9~svn359999/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 657, __PRETTY_FUNCTION__)) |
657 | "Expected promotable zero stores.")((isPromotableZeroStoreInst(*I) && isPromotableZeroStoreInst (*MergeMI) && "Expected promotable zero stores.") ? static_cast <void> (0) : __assert_fail ("isPromotableZeroStoreInst(*I) && isPromotableZeroStoreInst(*MergeMI) && \"Expected promotable zero stores.\"" , "/build/llvm-toolchain-snapshot-9~svn359999/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 657, __PRETTY_FUNCTION__)); |
658 | |
659 | MachineBasicBlock::iterator NextI = I; |
660 | ++NextI; |
661 | // If NextI is the second of the two instructions to be merged, we need |
662 | // to skip one further. Either way we merge will invalidate the iterator, |
663 | // and we don't need to scan the new instruction, as it's a pairwise |
664 | // instruction, which we're not considering for further action anyway. |
665 | if (NextI == MergeMI) |
666 | ++NextI; |
667 | |
668 | unsigned Opc = I->getOpcode(); |
669 | bool IsScaled = !TII->isUnscaledLdSt(Opc); |
670 | int OffsetStride = IsScaled ? 1 : getMemScale(*I); |
671 | |
672 | bool MergeForward = Flags.getMergeForward(); |
673 | // Insert our new paired instruction after whichever of the paired |
674 | // instructions MergeForward indicates. |
675 | MachineBasicBlock::iterator InsertionPoint = MergeForward ? MergeMI : I; |
676 | // Also based on MergeForward is from where we copy the base register operand |
677 | // so we get the flags compatible with the input code. |
678 | const MachineOperand &BaseRegOp = |
679 | MergeForward ? getLdStBaseOp(*MergeMI) : getLdStBaseOp(*I); |
680 | |
681 | // Which register is Rt and which is Rt2 depends on the offset order. |
682 | MachineInstr *RtMI; |
683 | if (getLdStOffsetOp(*I).getImm() == |
684 | getLdStOffsetOp(*MergeMI).getImm() + OffsetStride) |
685 | RtMI = &*MergeMI; |
686 | else |
687 | RtMI = &*I; |
688 | |
689 | int OffsetImm = getLdStOffsetOp(*RtMI).getImm(); |
690 | // Change the scaled offset from small to large type. |
691 | if (IsScaled) { |
692 | assert(((OffsetImm & 1) == 0) && "Unexpected offset to merge")((((OffsetImm & 1) == 0) && "Unexpected offset to merge" ) ? static_cast<void> (0) : __assert_fail ("((OffsetImm & 1) == 0) && \"Unexpected offset to merge\"" , "/build/llvm-toolchain-snapshot-9~svn359999/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 692, __PRETTY_FUNCTION__)); |
693 | OffsetImm /= 2; |
694 | } |
695 | |
696 | // Construct the new instruction. |
697 | DebugLoc DL = I->getDebugLoc(); |
698 | MachineBasicBlock *MBB = I->getParent(); |
699 | MachineInstrBuilder MIB; |
700 | MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingWideOpcode(Opc))) |
701 | .addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR) |
702 | .add(BaseRegOp) |
703 | .addImm(OffsetImm) |
704 | .cloneMergedMemRefs({&*I, &*MergeMI}) |
705 | .setMIFlags(I->mergeFlagsWith(*MergeMI)); |
706 | (void)MIB; |
707 | |
708 | LLVM_DEBUG(dbgs() << "Creating wider store. Replacing instructions:\n ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << "Creating wider store. Replacing instructions:\n " ; } } while (false); |
709 | LLVM_DEBUG(I->print(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { I->print(dbgs()); } } while (false ); |
710 | LLVM_DEBUG(dbgs() << " ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << " "; } } while (false ); |
711 | LLVM_DEBUG(MergeMI->print(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { MergeMI->print(dbgs()); } } while ( false); |
712 | LLVM_DEBUG(dbgs() << " with instruction:\n ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << " with instruction:\n " ; } } while (false); |
713 | LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { ((MachineInstr *)MIB)->print(dbgs( )); } } while (false); |
714 | LLVM_DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << "\n"; } } while (false ); |
715 | |
716 | // Erase the old instructions. |
717 | I->eraseFromParent(); |
718 | MergeMI->eraseFromParent(); |
719 | return NextI; |
720 | } |
721 | |
722 | MachineBasicBlock::iterator |
723 | AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, |
724 | MachineBasicBlock::iterator Paired, |
725 | const LdStPairFlags &Flags) { |
726 | MachineBasicBlock::iterator NextI = I; |
727 | ++NextI; |
728 | // If NextI is the second of the two instructions to be merged, we need |
729 | // to skip one further. Either way we merge will invalidate the iterator, |
730 | // and we don't need to scan the new instruction, as it's a pairwise |
731 | // instruction, which we're not considering for further action anyway. |
732 | if (NextI == Paired) |
733 | ++NextI; |
734 | |
735 | int SExtIdx = Flags.getSExtIdx(); |
736 | unsigned Opc = |
737 | SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode()); |
738 | bool IsUnscaled = TII->isUnscaledLdSt(Opc); |
739 | int OffsetStride = IsUnscaled ? getMemScale(*I) : 1; |
740 | |
741 | bool MergeForward = Flags.getMergeForward(); |
742 | // Insert our new paired instruction after whichever of the paired |
743 | // instructions MergeForward indicates. |
744 | MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I; |
745 | // Also based on MergeForward is from where we copy the base register operand |
746 | // so we get the flags compatible with the input code. |
747 | const MachineOperand &BaseRegOp = |
748 | MergeForward ? getLdStBaseOp(*Paired) : getLdStBaseOp(*I); |
749 | |
750 | int Offset = getLdStOffsetOp(*I).getImm(); |
751 | int PairedOffset = getLdStOffsetOp(*Paired).getImm(); |
752 | bool PairedIsUnscaled = TII->isUnscaledLdSt(Paired->getOpcode()); |
753 | if (IsUnscaled != PairedIsUnscaled) { |
754 | // We're trying to pair instructions that differ in how they are scaled. If |
755 | // I is scaled then scale the offset of Paired accordingly. Otherwise, do |
756 | // the opposite (i.e., make Paired's offset unscaled). |
757 | int MemSize = getMemScale(*Paired); |
758 | if (PairedIsUnscaled) { |
759 | // If the unscaled offset isn't a multiple of the MemSize, we can't |
760 | // pair the operations together. |
761 | assert(!(PairedOffset % getMemScale(*Paired)) &&((!(PairedOffset % getMemScale(*Paired)) && "Offset should be a multiple of the stride!" ) ? static_cast<void> (0) : __assert_fail ("!(PairedOffset % getMemScale(*Paired)) && \"Offset should be a multiple of the stride!\"" , "/build/llvm-toolchain-snapshot-9~svn359999/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 762, __PRETTY_FUNCTION__)) |
762 | "Offset should be a multiple of the stride!")((!(PairedOffset % getMemScale(*Paired)) && "Offset should be a multiple of the stride!" ) ? static_cast<void> (0) : __assert_fail ("!(PairedOffset % getMemScale(*Paired)) && \"Offset should be a multiple of the stride!\"" , "/build/llvm-toolchain-snapshot-9~svn359999/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 762, __PRETTY_FUNCTION__)); |
763 | PairedOffset /= MemSize; |
764 | } else { |
765 | PairedOffset *= MemSize; |
766 | } |
767 | } |
768 | |
769 | // Which register is Rt and which is Rt2 depends on the offset order. |
770 | MachineInstr *RtMI, *Rt2MI; |
771 | if (Offset == PairedOffset + OffsetStride) { |
772 | RtMI = &*Paired; |
773 | Rt2MI = &*I; |
774 | // Here we swapped the assumption made for SExtIdx. |
775 | // I.e., we turn ldp I, Paired into ldp Paired, I. |
776 | // Update the index accordingly. |
777 | if (SExtIdx != -1) |
778 | SExtIdx = (SExtIdx + 1) % 2; |
779 | } else { |
780 | RtMI = &*I; |
781 | Rt2MI = &*Paired; |
782 | } |
783 | int OffsetImm = getLdStOffsetOp(*RtMI).getImm(); |
784 | // Scale the immediate offset, if necessary. |
785 | if (TII->isUnscaledLdSt(RtMI->getOpcode())) { |
786 | assert(!(OffsetImm % getMemScale(*RtMI)) &&((!(OffsetImm % getMemScale(*RtMI)) && "Unscaled offset cannot be scaled." ) ? static_cast<void> (0) : __assert_fail ("!(OffsetImm % getMemScale(*RtMI)) && \"Unscaled offset cannot be scaled.\"" , "/build/llvm-toolchain-snapshot-9~svn359999/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 787, __PRETTY_FUNCTION__)) |
787 | "Unscaled offset cannot be scaled.")((!(OffsetImm % getMemScale(*RtMI)) && "Unscaled offset cannot be scaled." ) ? static_cast<void> (0) : __assert_fail ("!(OffsetImm % getMemScale(*RtMI)) && \"Unscaled offset cannot be scaled.\"" , "/build/llvm-toolchain-snapshot-9~svn359999/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 787, __PRETTY_FUNCTION__)); |
788 | OffsetImm /= getMemScale(*RtMI); |
789 | } |
790 | |
791 | // Construct the new instruction. |
792 | MachineInstrBuilder MIB; |
793 | DebugLoc DL = I->getDebugLoc(); |
794 | MachineBasicBlock *MBB = I->getParent(); |
795 | MachineOperand RegOp0 = getLdStRegOp(*RtMI); |
796 | MachineOperand RegOp1 = getLdStRegOp(*Rt2MI); |
797 | // Kill flags may become invalid when moving stores for pairing. |
798 | if (RegOp0.isUse()) { |
799 | if (!MergeForward) { |
800 | // Clear kill flags on store if moving upwards. Example: |
801 | // STRWui %w0, ... |
802 | // USE %w1 |
803 | // STRWui kill %w1 ; need to clear kill flag when moving STRWui upwards |
804 | RegOp0.setIsKill(false); |
805 | RegOp1.setIsKill(false); |
806 | } else { |
807 | // Clear kill flags of the first stores register. Example: |
808 | // STRWui %w1, ... |
809 | // USE kill %w1 ; need to clear kill flag when moving STRWui downwards |
810 | // STRW %w0 |
811 | unsigned Reg = getLdStRegOp(*I).getReg(); |
812 | for (MachineInstr &MI : make_range(std::next(I), Paired)) |
813 | MI.clearRegisterKills(Reg, TRI); |
814 | } |
815 | } |
816 | MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingPairOpcode(Opc))) |
817 | .add(RegOp0) |
818 | .add(RegOp1) |
819 | .add(BaseRegOp) |
820 | .addImm(OffsetImm) |
821 | .cloneMergedMemRefs({&*I, &*Paired}) |
822 | .setMIFlags(I->mergeFlagsWith(*Paired)); |
823 | |
824 | (void)MIB; |
825 | |
826 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << "Creating pair load/store. Replacing instructions:\n " ; } } while (false) |
827 | dbgs() << "Creating pair load/store. Replacing instructions:\n ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << "Creating pair load/store. Replacing instructions:\n " ; } } while (false); |
828 | LLVM_DEBUG(I->print(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { I->print(dbgs()); } } while (false ); |
829 | LLVM_DEBUG(dbgs() << " ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << " "; } } while (false ); |
830 | LLVM_DEBUG(Paired->print(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { Paired->print(dbgs()); } } while ( false); |
831 | LLVM_DEBUG(dbgs() << " with instruction:\n ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << " with instruction:\n " ; } } while (false); |
832 | if (SExtIdx != -1) { |
833 | // Generate the sign extension for the proper result of the ldp. |
834 | // I.e., with X1, that would be: |
835 | // %w1 = KILL %w1, implicit-def %x1 |
836 | // %x1 = SBFMXri killed %x1, 0, 31 |
837 | MachineOperand &DstMO = MIB->getOperand(SExtIdx); |
838 | // Right now, DstMO has the extended register, since it comes from an |
839 | // extended opcode. |
840 | unsigned DstRegX = DstMO.getReg(); |
841 | // Get the W variant of that register. |
842 | unsigned DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32); |
843 | // Update the result of LDP to use the W instead of the X variant. |
844 | DstMO.setReg(DstRegW); |
845 | LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { ((MachineInstr *)MIB)->print(dbgs( )); } } while (false); |
846 | LLVM_DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << "\n"; } } while (false ); |
847 | // Make the machine verifier happy by providing a definition for |
848 | // the X register. |
849 | // Insert this definition right after the generated LDP, i.e., before |
850 | // InsertionPoint. |
851 | MachineInstrBuilder MIBKill = |
852 | BuildMI(*MBB, InsertionPoint, DL, TII->get(TargetOpcode::KILL), DstRegW) |
853 | .addReg(DstRegW) |
854 | .addReg(DstRegX, RegState::Define); |
855 | MIBKill->getOperand(2).setImplicit(); |
856 | // Create the sign extension. |
857 | MachineInstrBuilder MIBSXTW = |
858 | BuildMI(*MBB, InsertionPoint, DL, TII->get(AArch64::SBFMXri), DstRegX) |
859 | .addReg(DstRegX) |
860 | .addImm(0) |
861 | .addImm(31); |
862 | (void)MIBSXTW; |
863 | LLVM_DEBUG(dbgs() << " Extend operand:\n ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << " Extend operand:\n " ; } } while (false); |
864 | LLVM_DEBUG(((MachineInstr *)MIBSXTW)->print(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { ((MachineInstr *)MIBSXTW)->print(dbgs ()); } } while (false); |
865 | } else { |
866 | LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { ((MachineInstr *)MIB)->print(dbgs( )); } } while (false); |
867 | } |
868 | LLVM_DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << "\n"; } } while (false ); |
869 | |
870 | // Erase the old instructions. |
871 | I->eraseFromParent(); |
872 | Paired->eraseFromParent(); |
873 | |
874 | return NextI; |
875 | } |
876 | |
877 | MachineBasicBlock::iterator |
878 | AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI, |
879 | MachineBasicBlock::iterator StoreI) { |
880 | MachineBasicBlock::iterator NextI = LoadI; |
881 | ++NextI; |
882 | |
883 | int LoadSize = getMemScale(*LoadI); |
884 | int StoreSize = getMemScale(*StoreI); |
885 | unsigned LdRt = getLdStRegOp(*LoadI).getReg(); |
886 | const MachineOperand &StMO = getLdStRegOp(*StoreI); |
887 | unsigned StRt = getLdStRegOp(*StoreI).getReg(); |
888 | bool IsStoreXReg = TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt); |
889 | |
890 | assert((IsStoreXReg ||(((IsStoreXReg || TRI->getRegClass(AArch64::GPR32RegClassID )->contains(StRt)) && "Unexpected RegClass") ? static_cast <void> (0) : __assert_fail ("(IsStoreXReg || TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) && \"Unexpected RegClass\"" , "/build/llvm-toolchain-snapshot-9~svn359999/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 892, __PRETTY_FUNCTION__)) |
891 | TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) &&(((IsStoreXReg || TRI->getRegClass(AArch64::GPR32RegClassID )->contains(StRt)) && "Unexpected RegClass") ? static_cast <void> (0) : __assert_fail ("(IsStoreXReg || TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) && \"Unexpected RegClass\"" , "/build/llvm-toolchain-snapshot-9~svn359999/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 892, __PRETTY_FUNCTION__)) |
892 | "Unexpected RegClass")(((IsStoreXReg || TRI->getRegClass(AArch64::GPR32RegClassID )->contains(StRt)) && "Unexpected RegClass") ? static_cast <void> (0) : __assert_fail ("(IsStoreXReg || TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) && \"Unexpected RegClass\"" , "/build/llvm-toolchain-snapshot-9~svn359999/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 892, __PRETTY_FUNCTION__)); |
893 | |
894 | MachineInstr *BitExtMI; |
895 | if (LoadSize == StoreSize && (LoadSize == 4 || LoadSize == 8)) { |
896 | // Remove the load, if the destination register of the loads is the same |
897 | // register for stored value. |
898 | if (StRt == LdRt && LoadSize == 8) { |
899 | for (MachineInstr &MI : make_range(StoreI->getIterator(), |
900 | LoadI->getIterator())) { |
901 | if (MI.killsRegister(StRt, TRI)) { |
902 | MI.clearRegisterKills(StRt, TRI); |
903 | break; |
904 | } |
905 | } |
906 | LLVM_DEBUG(dbgs() << "Remove load instruction:\n ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << "Remove load instruction:\n " ; } } while (false); |
907 | LLVM_DEBUG(LoadI->print(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { LoadI->print(dbgs()); } } while (false ); |
908 | LLVM_DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << "\n"; } } while (false ); |
909 | LoadI->eraseFromParent(); |
910 | return NextI; |
911 | } |
912 | // Replace the load with a mov if the load and store are in the same size. |
913 | BitExtMI = |
914 | BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(), |
915 | TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt) |
916 | .addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR) |
917 | .add(StMO) |
918 | .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)) |
919 | .setMIFlags(LoadI->getFlags()); |
920 | } else { |
921 | // FIXME: Currently we disable this transformation in big-endian targets as |
922 | // performance and correctness are verified only in little-endian. |
923 | if (!Subtarget->isLittleEndian()) |
924 | return NextI; |
925 | bool IsUnscaled = TII->isUnscaledLdSt(*LoadI); |
926 | assert(IsUnscaled == TII->isUnscaledLdSt(*StoreI) &&((IsUnscaled == TII->isUnscaledLdSt(*StoreI) && "Unsupported ld/st match" ) ? static_cast<void> (0) : __assert_fail ("IsUnscaled == TII->isUnscaledLdSt(*StoreI) && \"Unsupported ld/st match\"" , "/build/llvm-toolchain-snapshot-9~svn359999/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 927, __PRETTY_FUNCTION__)) |
927 | "Unsupported ld/st match")((IsUnscaled == TII->isUnscaledLdSt(*StoreI) && "Unsupported ld/st match" ) ? static_cast<void> (0) : __assert_fail ("IsUnscaled == TII->isUnscaledLdSt(*StoreI) && \"Unsupported ld/st match\"" , "/build/llvm-toolchain-snapshot-9~svn359999/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 927, __PRETTY_FUNCTION__)); |
928 | assert(LoadSize <= StoreSize && "Invalid load size")((LoadSize <= StoreSize && "Invalid load size") ? static_cast <void> (0) : __assert_fail ("LoadSize <= StoreSize && \"Invalid load size\"" , "/build/llvm-toolchain-snapshot-9~svn359999/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 928, __PRETTY_FUNCTION__)); |
929 | int UnscaledLdOffset = IsUnscaled |
930 | ? getLdStOffsetOp(*LoadI).getImm() |
931 | : getLdStOffsetOp(*LoadI).getImm() * LoadSize; |
932 | int UnscaledStOffset = IsUnscaled |
933 | ? getLdStOffsetOp(*StoreI).getImm() |
934 | : getLdStOffsetOp(*StoreI).getImm() * StoreSize; |
935 | int Width = LoadSize * 8; |
936 | int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset); |
937 | int Imms = Immr + Width - 1; |
Value stored to 'Imms' during its initialization is never read | |
938 | unsigned DestReg = IsStoreXReg |
939 | ? TRI->getMatchingSuperReg(LdRt, AArch64::sub_32, |
940 | &AArch64::GPR64RegClass) |
941 | : LdRt; |
942 | |
943 | assert((UnscaledLdOffset >= UnscaledStOffset &&(((UnscaledLdOffset >= UnscaledStOffset && (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) && "Invalid offset" ) ? static_cast<void> (0) : __assert_fail ("(UnscaledLdOffset >= UnscaledStOffset && (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) && \"Invalid offset\"" , "/build/llvm-toolchain-snapshot-9~svn359999/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 945, __PRETTY_FUNCTION__)) |
944 | (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&(((UnscaledLdOffset >= UnscaledStOffset && (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) && "Invalid offset" ) ? static_cast<void> (0) : __assert_fail ("(UnscaledLdOffset >= UnscaledStOffset && (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) && \"Invalid offset\"" , "/build/llvm-toolchain-snapshot-9~svn359999/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 945, __PRETTY_FUNCTION__)) |
945 | "Invalid offset")(((UnscaledLdOffset >= UnscaledStOffset && (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) && "Invalid offset" ) ? static_cast<void> (0) : __assert_fail ("(UnscaledLdOffset >= UnscaledStOffset && (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) && \"Invalid offset\"" , "/build/llvm-toolchain-snapshot-9~svn359999/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 945, __PRETTY_FUNCTION__)); |
946 | |
947 | Immr = 8 * (UnscaledLdOffset - UnscaledStOffset); |
948 | Imms = Immr + Width - 1; |
949 | if (UnscaledLdOffset == UnscaledStOffset) { |
950 | uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12) // N |
951 | | ((Immr) << 6) // immr |
952 | | ((Imms) << 0) // imms |
953 | ; |
954 | |
955 | BitExtMI = |
956 | BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(), |
957 | TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri), |
958 | DestReg) |
959 | .add(StMO) |
960 | .addImm(AndMaskEncoded) |
961 | .setMIFlags(LoadI->getFlags()); |
962 | } else { |
963 | BitExtMI = |
964 | BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(), |
965 | TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri), |
966 | DestReg) |
967 | .add(StMO) |
968 | .addImm(Immr) |
969 | .addImm(Imms) |
970 | .setMIFlags(LoadI->getFlags()); |
971 | } |
972 | } |
973 | |
974 | // Clear kill flags between store and load. |
975 | for (MachineInstr &MI : make_range(StoreI->getIterator(), |
976 | BitExtMI->getIterator())) |
977 | if (MI.killsRegister(StRt, TRI)) { |
978 | MI.clearRegisterKills(StRt, TRI); |
979 | break; |
980 | } |
981 | |
982 | LLVM_DEBUG(dbgs() << "Promoting load by replacing :\n ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << "Promoting load by replacing :\n " ; } } while (false); |
983 | LLVM_DEBUG(StoreI->print(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { StoreI->print(dbgs()); } } while ( false); |
984 | LLVM_DEBUG(dbgs() << " ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << " "; } } while (false ); |
985 | LLVM_DEBUG(LoadI->print(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { LoadI->print(dbgs()); } } while (false ); |
986 | LLVM_DEBUG(dbgs() << " with instructions:\n ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << " with instructions:\n " ; } } while (false); |
987 | LLVM_DEBUG(StoreI->print(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { StoreI->print(dbgs()); } } while ( false); |
988 | LLVM_DEBUG(dbgs() << " ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << " "; } } while (false ); |
989 | LLVM_DEBUG((BitExtMI)->print(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { (BitExtMI)->print(dbgs()); } } while (false); |
990 | LLVM_DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << "\n"; } } while (false ); |
991 | |
992 | // Erase the old instructions. |
993 | LoadI->eraseFromParent(); |
994 | return NextI; |
995 | } |
996 | |
997 | static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) { |
998 | // Convert the byte-offset used by unscaled into an "element" offset used |
999 | // by the scaled pair load/store instructions. |
1000 | if (IsUnscaled) { |
1001 | // If the byte-offset isn't a multiple of the stride, there's no point |
1002 | // trying to match it. |
1003 | if (Offset % OffsetStride) |
1004 | return false; |
1005 | Offset /= OffsetStride; |
1006 | } |
1007 | return Offset <= 63 && Offset >= -64; |
1008 | } |
1009 | |
1010 | // Do alignment, specialized to power of 2 and for signed ints, |
1011 | // avoiding having to do a C-style cast from uint_64t to int when |
1012 | // using alignTo from include/llvm/Support/MathExtras.h. |
1013 | // FIXME: Move this function to include/MathExtras.h? |
1014 | static int alignTo(int Num, int PowOf2) { |
1015 | return (Num + PowOf2 - 1) & ~(PowOf2 - 1); |
1016 | } |
1017 | |
1018 | static bool mayAlias(MachineInstr &MIa, MachineInstr &MIb, |
1019 | AliasAnalysis *AA) { |
1020 | // One of the instructions must modify memory. |
1021 | if (!MIa.mayStore() && !MIb.mayStore()) |
1022 | return false; |
1023 | |
1024 | // Both instructions must be memory operations. |
1025 | if (!MIa.mayLoadOrStore() && !MIb.mayLoadOrStore()) |
1026 | return false; |
1027 | |
1028 | return MIa.mayAlias(AA, MIb, /*UseTBAA*/false); |
1029 | } |
1030 | |
1031 | static bool mayAlias(MachineInstr &MIa, |
1032 | SmallVectorImpl<MachineInstr *> &MemInsns, |
1033 | AliasAnalysis *AA) { |
1034 | for (MachineInstr *MIb : MemInsns) |
1035 | if (mayAlias(MIa, *MIb, AA)) |
1036 | return true; |
1037 | |
1038 | return false; |
1039 | } |
1040 | |
1041 | bool AArch64LoadStoreOpt::findMatchingStore( |
1042 | MachineBasicBlock::iterator I, unsigned Limit, |
1043 | MachineBasicBlock::iterator &StoreI) { |
1044 | MachineBasicBlock::iterator B = I->getParent()->begin(); |
1045 | MachineBasicBlock::iterator MBBI = I; |
1046 | MachineInstr &LoadMI = *I; |
1047 | unsigned BaseReg = getLdStBaseOp(LoadMI).getReg(); |
1048 | |
1049 | // If the load is the first instruction in the block, there's obviously |
1050 | // not any matching store. |
1051 | if (MBBI == B) |
1052 | return false; |
1053 | |
1054 | // Track which register units have been modified and used between the first |
1055 | // insn and the second insn. |
1056 | ModifiedRegUnits.clear(); |
1057 | UsedRegUnits.clear(); |
1058 | |
1059 | unsigned Count = 0; |
1060 | do { |
1061 | --MBBI; |
1062 | MachineInstr &MI = *MBBI; |
1063 | |
1064 | // Don't count transient instructions towards the search limit since there |
1065 | // may be different numbers of them if e.g. debug information is present. |
1066 | if (!MI.isTransient()) |
1067 | ++Count; |
1068 | |
1069 | // If the load instruction reads directly from the address to which the |
1070 | // store instruction writes and the stored value is not modified, we can |
1071 | // promote the load. Since we do not handle stores with pre-/post-index, |
1072 | // it's unnecessary to check if BaseReg is modified by the store itself. |
1073 | if (MI.mayStore() && isMatchingStore(LoadMI, MI) && |
1074 | BaseReg == getLdStBaseOp(MI).getReg() && |
1075 | isLdOffsetInRangeOfSt(LoadMI, MI, TII) && |
1076 | ModifiedRegUnits.available(getLdStRegOp(MI).getReg())) { |
1077 | StoreI = MBBI; |
1078 | return true; |
1079 | } |
1080 | |
1081 | if (MI.isCall()) |
1082 | return false; |
1083 | |
1084 | // Update modified / uses register units. |
1085 | LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); |
1086 | |
1087 | // Otherwise, if the base register is modified, we have no match, so |
1088 | // return early. |
1089 | if (!ModifiedRegUnits.available(BaseReg)) |
1090 | return false; |
1091 | |
1092 | // If we encounter a store aliased with the load, return early. |
1093 | if (MI.mayStore() && mayAlias(LoadMI, MI, AA)) |
1094 | return false; |
1095 | } while (MBBI != B && Count < Limit); |
1096 | return false; |
1097 | } |
1098 | |
1099 | // Returns true if FirstMI and MI are candidates for merging or pairing. |
1100 | // Otherwise, returns false. |
1101 | static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI, |
1102 | LdStPairFlags &Flags, |
1103 | const AArch64InstrInfo *TII) { |
1104 | // If this is volatile or if pairing is suppressed, not a candidate. |
1105 | if (MI.hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI)) |
1106 | return false; |
1107 | |
1108 | // We should have already checked FirstMI for pair suppression and volatility. |
1109 | assert(!FirstMI.hasOrderedMemoryRef() &&((!FirstMI.hasOrderedMemoryRef() && !TII->isLdStPairSuppressed (FirstMI) && "FirstMI shouldn't get here if either of these checks are true." ) ? static_cast<void> (0) : __assert_fail ("!FirstMI.hasOrderedMemoryRef() && !TII->isLdStPairSuppressed(FirstMI) && \"FirstMI shouldn't get here if either of these checks are true.\"" , "/build/llvm-toolchain-snapshot-9~svn359999/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 1111, __PRETTY_FUNCTION__)) |
1110 | !TII->isLdStPairSuppressed(FirstMI) &&((!FirstMI.hasOrderedMemoryRef() && !TII->isLdStPairSuppressed (FirstMI) && "FirstMI shouldn't get here if either of these checks are true." ) ? static_cast<void> (0) : __assert_fail ("!FirstMI.hasOrderedMemoryRef() && !TII->isLdStPairSuppressed(FirstMI) && \"FirstMI shouldn't get here if either of these checks are true.\"" , "/build/llvm-toolchain-snapshot-9~svn359999/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 1111, __PRETTY_FUNCTION__)) |
1111 | "FirstMI shouldn't get here if either of these checks are true.")((!FirstMI.hasOrderedMemoryRef() && !TII->isLdStPairSuppressed (FirstMI) && "FirstMI shouldn't get here if either of these checks are true." ) ? static_cast<void> (0) : __assert_fail ("!FirstMI.hasOrderedMemoryRef() && !TII->isLdStPairSuppressed(FirstMI) && \"FirstMI shouldn't get here if either of these checks are true.\"" , "/build/llvm-toolchain-snapshot-9~svn359999/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 1111, __PRETTY_FUNCTION__)); |
1112 | |
1113 | unsigned OpcA = FirstMI.getOpcode(); |
1114 | unsigned OpcB = MI.getOpcode(); |
1115 | |
1116 | // Opcodes match: nothing more to check. |
1117 | if (OpcA == OpcB) |
1118 | return true; |
1119 | |
1120 | // Try to match a sign-extended load/store with a zero-extended load/store. |
1121 | bool IsValidLdStrOpc, PairIsValidLdStrOpc; |
1122 | unsigned NonSExtOpc = getMatchingNonSExtOpcode(OpcA, &IsValidLdStrOpc); |
1123 | assert(IsValidLdStrOpc &&((IsValidLdStrOpc && "Given Opc should be a Load or Store with an immediate" ) ? static_cast<void> (0) : __assert_fail ("IsValidLdStrOpc && \"Given Opc should be a Load or Store with an immediate\"" , "/build/llvm-toolchain-snapshot-9~svn359999/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 1124, __PRETTY_FUNCTION__)) |
1124 | "Given Opc should be a Load or Store with an immediate")((IsValidLdStrOpc && "Given Opc should be a Load or Store with an immediate" ) ? static_cast<void> (0) : __assert_fail ("IsValidLdStrOpc && \"Given Opc should be a Load or Store with an immediate\"" , "/build/llvm-toolchain-snapshot-9~svn359999/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 1124, __PRETTY_FUNCTION__)); |
1125 | // OpcA will be the first instruction in the pair. |
1126 | if (NonSExtOpc == getMatchingNonSExtOpcode(OpcB, &PairIsValidLdStrOpc)) { |
1127 | Flags.setSExtIdx(NonSExtOpc == (unsigned)OpcA ? 1 : 0); |
1128 | return true; |
1129 | } |
1130 | |
1131 | // If the second instruction isn't even a mergable/pairable load/store, bail |
1132 | // out. |
1133 | if (!PairIsValidLdStrOpc) |
1134 | return false; |
1135 | |
1136 | // FIXME: We don't support merging narrow stores with mixed scaled/unscaled |
1137 | // offsets. |
1138 | if (isNarrowStore(OpcA) || isNarrowStore(OpcB)) |
1139 | return false; |
1140 | |
1141 | // Try to match an unscaled load/store with a scaled load/store. |
1142 | return TII->isUnscaledLdSt(OpcA) != TII->isUnscaledLdSt(OpcB) && |
1143 | getMatchingPairOpcode(OpcA) == getMatchingPairOpcode(OpcB); |
1144 | |
1145 | // FIXME: Can we also match a mixed sext/zext unscaled/scaled pair? |
1146 | } |
1147 | |
1148 | /// Scan the instructions looking for a load/store that can be combined with the |
1149 | /// current instruction into a wider equivalent or a load/store pair. |
1150 | MachineBasicBlock::iterator |
1151 | AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, |
1152 | LdStPairFlags &Flags, unsigned Limit, |
1153 | bool FindNarrowMerge) { |
1154 | MachineBasicBlock::iterator E = I->getParent()->end(); |
1155 | MachineBasicBlock::iterator MBBI = I; |
1156 | MachineInstr &FirstMI = *I; |
1157 | ++MBBI; |
1158 | |
1159 | bool MayLoad = FirstMI.mayLoad(); |
1160 | bool IsUnscaled = TII->isUnscaledLdSt(FirstMI); |
1161 | unsigned Reg = getLdStRegOp(FirstMI).getReg(); |
1162 | unsigned BaseReg = getLdStBaseOp(FirstMI).getReg(); |
1163 | int Offset = getLdStOffsetOp(FirstMI).getImm(); |
1164 | int OffsetStride = IsUnscaled ? getMemScale(FirstMI) : 1; |
1165 | bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI); |
1166 | |
1167 | // Track which register units have been modified and used between the first |
1168 | // insn (inclusive) and the second insn. |
1169 | ModifiedRegUnits.clear(); |
1170 | UsedRegUnits.clear(); |
1171 | |
1172 | // Remember any instructions that read/write memory between FirstMI and MI. |
1173 | SmallVector<MachineInstr *, 4> MemInsns; |
1174 | |
1175 | for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) { |
1176 | MachineInstr &MI = *MBBI; |
1177 | |
1178 | // Don't count transient instructions towards the search limit since there |
1179 | // may be different numbers of them if e.g. debug information is present. |
1180 | if (!MI.isTransient()) |
1181 | ++Count; |
1182 | |
1183 | Flags.setSExtIdx(-1); |
1184 | if (areCandidatesToMergeOrPair(FirstMI, MI, Flags, TII) && |
1185 | getLdStOffsetOp(MI).isImm()) { |
1186 | assert(MI.mayLoadOrStore() && "Expected memory operation.")((MI.mayLoadOrStore() && "Expected memory operation." ) ? static_cast<void> (0) : __assert_fail ("MI.mayLoadOrStore() && \"Expected memory operation.\"" , "/build/llvm-toolchain-snapshot-9~svn359999/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 1186, __PRETTY_FUNCTION__)); |
1187 | // If we've found another instruction with the same opcode, check to see |
1188 | // if the base and offset are compatible with our starting instruction. |
1189 | // These instructions all have scaled immediate operands, so we just |
1190 | // check for +1/-1. Make sure to check the new instruction offset is |
1191 | // actually an immediate and not a symbolic reference destined for |
1192 | // a relocation. |
1193 | unsigned MIBaseReg = getLdStBaseOp(MI).getReg(); |
1194 | int MIOffset = getLdStOffsetOp(MI).getImm(); |
1195 | bool MIIsUnscaled = TII->isUnscaledLdSt(MI); |
1196 | if (IsUnscaled != MIIsUnscaled) { |
1197 | // We're trying to pair instructions that differ in how they are scaled. |
1198 | // If FirstMI is scaled then scale the offset of MI accordingly. |
1199 | // Otherwise, do the opposite (i.e., make MI's offset unscaled). |
1200 | int MemSize = getMemScale(MI); |
1201 | if (MIIsUnscaled) { |
1202 | // If the unscaled offset isn't a multiple of the MemSize, we can't |
1203 | // pair the operations together: bail and keep looking. |
1204 | if (MIOffset % MemSize) { |
1205 | LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, |
1206 | UsedRegUnits, TRI); |
1207 | MemInsns.push_back(&MI); |
1208 | continue; |
1209 | } |
1210 | MIOffset /= MemSize; |
1211 | } else { |
1212 | MIOffset *= MemSize; |
1213 | } |
1214 | } |
1215 | |
1216 | if (BaseReg == MIBaseReg && ((Offset == MIOffset + OffsetStride) || |
1217 | (Offset + OffsetStride == MIOffset))) { |
1218 | int MinOffset = Offset < MIOffset ? Offset : MIOffset; |
1219 | if (FindNarrowMerge) { |
1220 | // If the alignment requirements of the scaled wide load/store |
1221 | // instruction can't express the offset of the scaled narrow input, |
1222 | // bail and keep looking. For promotable zero stores, allow only when |
1223 | // the stored value is the same (i.e., WZR). |
1224 | if ((!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) || |
1225 | (IsPromotableZeroStore && Reg != getLdStRegOp(MI).getReg())) { |
1226 | LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, |
1227 | UsedRegUnits, TRI); |
1228 | MemInsns.push_back(&MI); |
1229 | continue; |
1230 | } |
1231 | } else { |
1232 | // Pairwise instructions have a 7-bit signed offset field. Single |
1233 | // insns have a 12-bit unsigned offset field. If the resultant |
1234 | // immediate offset of merging these instructions is out of range for |
1235 | // a pairwise instruction, bail and keep looking. |
1236 | if (!inBoundsForPair(IsUnscaled, MinOffset, OffsetStride)) { |
1237 | LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, |
1238 | UsedRegUnits, TRI); |
1239 | MemInsns.push_back(&MI); |
1240 | continue; |
1241 | } |
1242 | // If the alignment requirements of the paired (scaled) instruction |
1243 | // can't express the offset of the unscaled input, bail and keep |
1244 | // looking. |
1245 | if (IsUnscaled && (alignTo(MinOffset, OffsetStride) != MinOffset)) { |
1246 | LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, |
1247 | UsedRegUnits, TRI); |
1248 | MemInsns.push_back(&MI); |
1249 | continue; |
1250 | } |
1251 | } |
1252 | // If the destination register of the loads is the same register, bail |
1253 | // and keep looking. A load-pair instruction with both destination |
1254 | // registers the same is UNPREDICTABLE and will result in an exception. |
1255 | if (MayLoad && Reg == getLdStRegOp(MI).getReg()) { |
1256 | LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, |
1257 | TRI); |
1258 | MemInsns.push_back(&MI); |
1259 | continue; |
1260 | } |
1261 | |
1262 | // If the Rt of the second instruction was not modified or used between |
1263 | // the two instructions and none of the instructions between the second |
1264 | // and first alias with the second, we can combine the second into the |
1265 | // first. |
1266 | if (ModifiedRegUnits.available(getLdStRegOp(MI).getReg()) && |
1267 | !(MI.mayLoad() && |
1268 | !UsedRegUnits.available(getLdStRegOp(MI).getReg())) && |
1269 | !mayAlias(MI, MemInsns, AA)) { |
1270 | Flags.setMergeForward(false); |
1271 | return MBBI; |
1272 | } |
1273 | |
1274 | // Likewise, if the Rt of the first instruction is not modified or used |
1275 | // between the two instructions and none of the instructions between the |
1276 | // first and the second alias with the first, we can combine the first |
1277 | // into the second. |
1278 | if (ModifiedRegUnits.available(getLdStRegOp(FirstMI).getReg()) && |
1279 | !(MayLoad && |
1280 | !UsedRegUnits.available(getLdStRegOp(FirstMI).getReg())) && |
1281 | !mayAlias(FirstMI, MemInsns, AA)) { |
1282 | Flags.setMergeForward(true); |
1283 | return MBBI; |
1284 | } |
1285 | // Unable to combine these instructions due to interference in between. |
1286 | // Keep looking. |
1287 | } |
1288 | } |
1289 | |
1290 | // If the instruction wasn't a matching load or store. Stop searching if we |
1291 | // encounter a call instruction that might modify memory. |
1292 | if (MI.isCall()) |
1293 | return E; |
1294 | |
1295 | // Update modified / uses register units. |
1296 | LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); |
1297 | |
1298 | // Otherwise, if the base register is modified, we have no match, so |
1299 | // return early. |
1300 | if (!ModifiedRegUnits.available(BaseReg)) |
1301 | return E; |
1302 | |
1303 | // Update list of instructions that read/write memory. |
1304 | if (MI.mayLoadOrStore()) |
1305 | MemInsns.push_back(&MI); |
1306 | } |
1307 | return E; |
1308 | } |
1309 | |
1310 | MachineBasicBlock::iterator |
1311 | AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I, |
1312 | MachineBasicBlock::iterator Update, |
1313 | bool IsPreIdx) { |
1314 | assert((Update->getOpcode() == AArch64::ADDXri ||(((Update->getOpcode() == AArch64::ADDXri || Update->getOpcode () == AArch64::SUBXri) && "Unexpected base register update instruction to merge!" ) ? static_cast<void> (0) : __assert_fail ("(Update->getOpcode() == AArch64::ADDXri || Update->getOpcode() == AArch64::SUBXri) && \"Unexpected base register update instruction to merge!\"" , "/build/llvm-toolchain-snapshot-9~svn359999/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 1316, __PRETTY_FUNCTION__)) |
1315 | Update->getOpcode() == AArch64::SUBXri) &&(((Update->getOpcode() == AArch64::ADDXri || Update->getOpcode () == AArch64::SUBXri) && "Unexpected base register update instruction to merge!" ) ? static_cast<void> (0) : __assert_fail ("(Update->getOpcode() == AArch64::ADDXri || Update->getOpcode() == AArch64::SUBXri) && \"Unexpected base register update instruction to merge!\"" , "/build/llvm-toolchain-snapshot-9~svn359999/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 1316, __PRETTY_FUNCTION__)) |
1316 | "Unexpected base register update instruction to merge!")(((Update->getOpcode() == AArch64::ADDXri || Update->getOpcode () == AArch64::SUBXri) && "Unexpected base register update instruction to merge!" ) ? static_cast<void> (0) : __assert_fail ("(Update->getOpcode() == AArch64::ADDXri || Update->getOpcode() == AArch64::SUBXri) && \"Unexpected base register update instruction to merge!\"" , "/build/llvm-toolchain-snapshot-9~svn359999/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 1316, __PRETTY_FUNCTION__)); |
1317 | MachineBasicBlock::iterator NextI = I; |
1318 | // Return the instruction following the merged instruction, which is |
1319 | // the instruction following our unmerged load. Unless that's the add/sub |
1320 | // instruction we're merging, in which case it's the one after that. |
1321 | if (++NextI == Update) |
1322 | ++NextI; |
1323 | |
1324 | int Value = Update->getOperand(2).getImm(); |
1325 | assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&((AArch64_AM::getShiftValue(Update->getOperand(3).getImm() ) == 0 && "Can't merge 1 << 12 offset into pre-/post-indexed load / store" ) ? static_cast<void> (0) : __assert_fail ("AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 && \"Can't merge 1 << 12 offset into pre-/post-indexed load / store\"" , "/build/llvm-toolchain-snapshot-9~svn359999/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 1326, __PRETTY_FUNCTION__)) |
1326 | "Can't merge 1 << 12 offset into pre-/post-indexed load / store")((AArch64_AM::getShiftValue(Update->getOperand(3).getImm() ) == 0 && "Can't merge 1 << 12 offset into pre-/post-indexed load / store" ) ? static_cast<void> (0) : __assert_fail ("AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 && \"Can't merge 1 << 12 offset into pre-/post-indexed load / store\"" , "/build/llvm-toolchain-snapshot-9~svn359999/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 1326, __PRETTY_FUNCTION__)); |
1327 | if (Update->getOpcode() == AArch64::SUBXri) |
1328 | Value = -Value; |
1329 | |
1330 | unsigned NewOpc = IsPreIdx ? getPreIndexedOpcode(I->getOpcode()) |
1331 | : getPostIndexedOpcode(I->getOpcode()); |
1332 | MachineInstrBuilder MIB; |
1333 | if (!isPairedLdSt(*I)) { |
1334 | // Non-paired instruction. |
1335 | MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) |
1336 | .add(getLdStRegOp(*Update)) |
1337 | .add(getLdStRegOp(*I)) |
1338 | .add(getLdStBaseOp(*I)) |
1339 | .addImm(Value) |
1340 | .setMemRefs(I->memoperands()) |
1341 | .setMIFlags(I->mergeFlagsWith(*Update)); |
1342 | } else { |
1343 | // Paired instruction. |
1344 | int Scale = getMemScale(*I); |
1345 | MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) |
1346 | .add(getLdStRegOp(*Update)) |
1347 | .add(getLdStRegOp(*I, 0)) |
1348 | .add(getLdStRegOp(*I, 1)) |
1349 | .add(getLdStBaseOp(*I)) |
1350 | .addImm(Value / Scale) |
1351 | .setMemRefs(I->memoperands()) |
1352 | .setMIFlags(I->mergeFlagsWith(*Update)); |
1353 | } |
1354 | (void)MIB; |
1355 | |
1356 | if (IsPreIdx) { |
1357 | ++NumPreFolded; |
1358 | LLVM_DEBUG(dbgs() << "Creating pre-indexed load/store.")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << "Creating pre-indexed load/store." ; } } while (false); |
1359 | } else { |
1360 | ++NumPostFolded; |
1361 | LLVM_DEBUG(dbgs() << "Creating post-indexed load/store.")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << "Creating post-indexed load/store." ; } } while (false); |
1362 | } |
1363 | LLVM_DEBUG(dbgs() << " Replacing instructions:\n ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << " Replacing instructions:\n " ; } } while (false); |
1364 | LLVM_DEBUG(I->print(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { I->print(dbgs()); } } while (false ); |
1365 | LLVM_DEBUG(dbgs() << " ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << " "; } } while (false ); |
1366 | LLVM_DEBUG(Update->print(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { Update->print(dbgs()); } } while ( false); |
1367 | LLVM_DEBUG(dbgs() << " with instruction:\n ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << " with instruction:\n " ; } } while (false); |
1368 | LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { ((MachineInstr *)MIB)->print(dbgs( )); } } while (false); |
1369 | LLVM_DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << "\n"; } } while (false ); |
1370 | |
1371 | // Erase the old instructions for the block. |
1372 | I->eraseFromParent(); |
1373 | Update->eraseFromParent(); |
1374 | |
1375 | return NextI; |
1376 | } |
1377 | |
1378 | bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI, |
1379 | MachineInstr &MI, |
1380 | unsigned BaseReg, int Offset) { |
1381 | switch (MI.getOpcode()) { |
1382 | default: |
1383 | break; |
1384 | case AArch64::SUBXri: |
1385 | case AArch64::ADDXri: |
1386 | // Make sure it's a vanilla immediate operand, not a relocation or |
1387 | // anything else we can't handle. |
1388 | if (!MI.getOperand(2).isImm()) |
1389 | break; |
1390 | // Watch out for 1 << 12 shifted value. |
1391 | if (AArch64_AM::getShiftValue(MI.getOperand(3).getImm())) |
1392 | break; |
1393 | |
1394 | // The update instruction source and destination register must be the |
1395 | // same as the load/store base register. |
1396 | if (MI.getOperand(0).getReg() != BaseReg || |
1397 | MI.getOperand(1).getReg() != BaseReg) |
1398 | break; |
1399 | |
1400 | bool IsPairedInsn = isPairedLdSt(MemMI); |
1401 | int UpdateOffset = MI.getOperand(2).getImm(); |
1402 | if (MI.getOpcode() == AArch64::SUBXri) |
1403 | UpdateOffset = -UpdateOffset; |
1404 | |
1405 | // For non-paired load/store instructions, the immediate must fit in a |
1406 | // signed 9-bit integer. |
1407 | if (!IsPairedInsn && (UpdateOffset > 255 || UpdateOffset < -256)) |
1408 | break; |
1409 | |
1410 | // For paired load/store instructions, the immediate must be a multiple of |
1411 | // the scaling factor. The scaled offset must also fit into a signed 7-bit |
1412 | // integer. |
1413 | if (IsPairedInsn) { |
1414 | int Scale = getMemScale(MemMI); |
1415 | if (UpdateOffset % Scale != 0) |
1416 | break; |
1417 | |
1418 | int ScaledOffset = UpdateOffset / Scale; |
1419 | if (ScaledOffset > 63 || ScaledOffset < -64) |
1420 | break; |
1421 | } |
1422 | |
1423 | // If we have a non-zero Offset, we check that it matches the amount |
1424 | // we're adding to the register. |
1425 | if (!Offset || Offset == UpdateOffset) |
1426 | return true; |
1427 | break; |
1428 | } |
1429 | return false; |
1430 | } |
1431 | |
1432 | MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward( |
1433 | MachineBasicBlock::iterator I, int UnscaledOffset, unsigned Limit) { |
1434 | MachineBasicBlock::iterator E = I->getParent()->end(); |
1435 | MachineInstr &MemMI = *I; |
1436 | MachineBasicBlock::iterator MBBI = I; |
1437 | |
1438 | unsigned BaseReg = getLdStBaseOp(MemMI).getReg(); |
1439 | int MIUnscaledOffset = getLdStOffsetOp(MemMI).getImm() * getMemScale(MemMI); |
1440 | |
1441 | // Scan forward looking for post-index opportunities. Updating instructions |
1442 | // can't be formed if the memory instruction doesn't have the offset we're |
1443 | // looking for. |
1444 | if (MIUnscaledOffset != UnscaledOffset) |
1445 | return E; |
1446 | |
1447 | // If the base register overlaps a destination register, we can't |
1448 | // merge the update. |
1449 | bool IsPairedInsn = isPairedLdSt(MemMI); |
1450 | for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) { |
1451 | unsigned DestReg = getLdStRegOp(MemMI, i).getReg(); |
1452 | if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg)) |
1453 | return E; |
1454 | } |
1455 | |
1456 | // Track which register units have been modified and used between the first |
1457 | // insn (inclusive) and the second insn. |
1458 | ModifiedRegUnits.clear(); |
1459 | UsedRegUnits.clear(); |
1460 | ++MBBI; |
1461 | for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) { |
1462 | MachineInstr &MI = *MBBI; |
1463 | |
1464 | // Don't count transient instructions towards the search limit since there |
1465 | // may be different numbers of them if e.g. debug information is present. |
1466 | if (!MI.isTransient()) |
1467 | ++Count; |
1468 | |
1469 | // If we found a match, return it. |
1470 | if (isMatchingUpdateInsn(*I, MI, BaseReg, UnscaledOffset)) |
1471 | return MBBI; |
1472 | |
1473 | // Update the status of what the instruction clobbered and used. |
1474 | LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); |
1475 | |
1476 | // Otherwise, if the base register is used or modified, we have no match, so |
1477 | // return early. |
1478 | if (!ModifiedRegUnits.available(BaseReg) || |
1479 | !UsedRegUnits.available(BaseReg)) |
1480 | return E; |
1481 | } |
1482 | return E; |
1483 | } |
1484 | |
1485 | MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward( |
1486 | MachineBasicBlock::iterator I, unsigned Limit) { |
1487 | MachineBasicBlock::iterator B = I->getParent()->begin(); |
1488 | MachineBasicBlock::iterator E = I->getParent()->end(); |
1489 | MachineInstr &MemMI = *I; |
1490 | MachineBasicBlock::iterator MBBI = I; |
1491 | |
1492 | unsigned BaseReg = getLdStBaseOp(MemMI).getReg(); |
1493 | int Offset = getLdStOffsetOp(MemMI).getImm(); |
1494 | |
1495 | // If the load/store is the first instruction in the block, there's obviously |
1496 | // not any matching update. Ditto if the memory offset isn't zero. |
1497 | if (MBBI == B || Offset != 0) |
1498 | return E; |
1499 | // If the base register overlaps a destination register, we can't |
1500 | // merge the update. |
1501 | bool IsPairedInsn = isPairedLdSt(MemMI); |
1502 | for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) { |
1503 | unsigned DestReg = getLdStRegOp(MemMI, i).getReg(); |
1504 | if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg)) |
1505 | return E; |
1506 | } |
1507 | |
1508 | // Track which register units have been modified and used between the first |
1509 | // insn (inclusive) and the second insn. |
1510 | ModifiedRegUnits.clear(); |
1511 | UsedRegUnits.clear(); |
1512 | unsigned Count = 0; |
1513 | do { |
1514 | --MBBI; |
1515 | MachineInstr &MI = *MBBI; |
1516 | |
1517 | // Don't count transient instructions towards the search limit since there |
1518 | // may be different numbers of them if e.g. debug information is present. |
1519 | if (!MI.isTransient()) |
1520 | ++Count; |
1521 | |
1522 | // If we found a match, return it. |
1523 | if (isMatchingUpdateInsn(*I, MI, BaseReg, Offset)) |
1524 | return MBBI; |
1525 | |
1526 | // Update the status of what the instruction clobbered and used. |
1527 | LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); |
1528 | |
1529 | // Otherwise, if the base register is used or modified, we have no match, so |
1530 | // return early. |
1531 | if (!ModifiedRegUnits.available(BaseReg) || |
1532 | !UsedRegUnits.available(BaseReg)) |
1533 | return E; |
1534 | } while (MBBI != B && Count < Limit); |
1535 | return E; |
1536 | } |
1537 | |
1538 | bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore( |
1539 | MachineBasicBlock::iterator &MBBI) { |
1540 | MachineInstr &MI = *MBBI; |
1541 | // If this is a volatile load, don't mess with it. |
1542 | if (MI.hasOrderedMemoryRef()) |
1543 | return false; |
1544 | |
1545 | // Make sure this is a reg+imm. |
1546 | // FIXME: It is possible to extend it to handle reg+reg cases. |
1547 | if (!getLdStOffsetOp(MI).isImm()) |
1548 | return false; |
1549 | |
1550 | // Look backward up to LdStLimit instructions. |
1551 | MachineBasicBlock::iterator StoreI; |
1552 | if (findMatchingStore(MBBI, LdStLimit, StoreI)) { |
1553 | ++NumLoadsFromStoresPromoted; |
1554 | // Promote the load. Keeping the iterator straight is a |
1555 | // pain, so we let the merge routine tell us what the next instruction |
1556 | // is after it's done mucking about. |
1557 | MBBI = promoteLoadFromStore(MBBI, StoreI); |
1558 | return true; |
1559 | } |
1560 | return false; |
1561 | } |
1562 | |
1563 | // Merge adjacent zero stores into a wider store. |
1564 | bool AArch64LoadStoreOpt::tryToMergeZeroStInst( |
1565 | MachineBasicBlock::iterator &MBBI) { |
1566 | assert(isPromotableZeroStoreInst(*MBBI) && "Expected narrow store.")((isPromotableZeroStoreInst(*MBBI) && "Expected narrow store." ) ? static_cast<void> (0) : __assert_fail ("isPromotableZeroStoreInst(*MBBI) && \"Expected narrow store.\"" , "/build/llvm-toolchain-snapshot-9~svn359999/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 1566, __PRETTY_FUNCTION__)); |
1567 | MachineInstr &MI = *MBBI; |
1568 | MachineBasicBlock::iterator E = MI.getParent()->end(); |
1569 | |
1570 | if (!TII->isCandidateToMergeOrPair(MI)) |
1571 | return false; |
1572 | |
1573 | // Look ahead up to LdStLimit instructions for a mergable instruction. |
1574 | LdStPairFlags Flags; |
1575 | MachineBasicBlock::iterator MergeMI = |
1576 | findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ true); |
1577 | if (MergeMI != E) { |
1578 | ++NumZeroStoresPromoted; |
1579 | |
1580 | // Keeping the iterator straight is a pain, so we let the merge routine tell |
1581 | // us what the next instruction is after it's done mucking about. |
1582 | MBBI = mergeNarrowZeroStores(MBBI, MergeMI, Flags); |
1583 | return true; |
1584 | } |
1585 | return false; |
1586 | } |
1587 | |
1588 | // Find loads and stores that can be merged into a single load or store pair |
1589 | // instruction. |
1590 | bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) { |
1591 | MachineInstr &MI = *MBBI; |
1592 | MachineBasicBlock::iterator E = MI.getParent()->end(); |
1593 | |
1594 | if (!TII->isCandidateToMergeOrPair(MI)) |
1595 | return false; |
1596 | |
1597 | // Early exit if the offset is not possible to match. (6 bits of positive |
1598 | // range, plus allow an extra one in case we find a later insn that matches |
1599 | // with Offset-1) |
1600 | bool IsUnscaled = TII->isUnscaledLdSt(MI); |
1601 | int Offset = getLdStOffsetOp(MI).getImm(); |
1602 | int OffsetStride = IsUnscaled ? getMemScale(MI) : 1; |
1603 | // Allow one more for offset. |
1604 | if (Offset > 0) |
1605 | Offset -= OffsetStride; |
1606 | if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride)) |
1607 | return false; |
1608 | |
1609 | // Look ahead up to LdStLimit instructions for a pairable instruction. |
1610 | LdStPairFlags Flags; |
1611 | MachineBasicBlock::iterator Paired = |
1612 | findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ false); |
1613 | if (Paired != E) { |
1614 | ++NumPairCreated; |
1615 | if (TII->isUnscaledLdSt(MI)) |
1616 | ++NumUnscaledPairCreated; |
1617 | // Keeping the iterator straight is a pain, so we let the merge routine tell |
1618 | // us what the next instruction is after it's done mucking about. |
1619 | MBBI = mergePairedInsns(MBBI, Paired, Flags); |
1620 | return true; |
1621 | } |
1622 | return false; |
1623 | } |
1624 | |
1625 | bool AArch64LoadStoreOpt::tryToMergeLdStUpdate |
1626 | (MachineBasicBlock::iterator &MBBI) { |
1627 | MachineInstr &MI = *MBBI; |
1628 | MachineBasicBlock::iterator E = MI.getParent()->end(); |
1629 | MachineBasicBlock::iterator Update; |
1630 | |
1631 | // Look forward to try to form a post-index instruction. For example, |
1632 | // ldr x0, [x20] |
1633 | // add x20, x20, #32 |
1634 | // merged into: |
1635 | // ldr x0, [x20], #32 |
1636 | Update = findMatchingUpdateInsnForward(MBBI, 0, UpdateLimit); |
1637 | if (Update != E) { |
1638 | // Merge the update into the ld/st. |
1639 | MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/false); |
1640 | return true; |
1641 | } |
1642 | |
1643 | // Don't know how to handle unscaled pre/post-index versions below, so bail. |
1644 | if (TII->isUnscaledLdSt(MI.getOpcode())) |
1645 | return false; |
1646 | |
1647 | // Look back to try to find a pre-index instruction. For example, |
1648 | // add x0, x0, #8 |
1649 | // ldr x1, [x0] |
1650 | // merged into: |
1651 | // ldr x1, [x0, #8]! |
1652 | Update = findMatchingUpdateInsnBackward(MBBI, UpdateLimit); |
1653 | if (Update != E) { |
1654 | // Merge the update into the ld/st. |
1655 | MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true); |
1656 | return true; |
1657 | } |
1658 | |
1659 | // The immediate in the load/store is scaled by the size of the memory |
1660 | // operation. The immediate in the add we're looking for, |
1661 | // however, is not, so adjust here. |
1662 | int UnscaledOffset = getLdStOffsetOp(MI).getImm() * getMemScale(MI); |
1663 | |
1664 | // Look forward to try to find a post-index instruction. For example, |
1665 | // ldr x1, [x0, #64] |
1666 | // add x0, x0, #64 |
1667 | // merged into: |
1668 | // ldr x1, [x0, #64]! |
1669 | Update = findMatchingUpdateInsnForward(MBBI, UnscaledOffset, UpdateLimit); |
1670 | if (Update != E) { |
1671 | // Merge the update into the ld/st. |
1672 | MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true); |
1673 | return true; |
1674 | } |
1675 | |
1676 | return false; |
1677 | } |
1678 | |
1679 | bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, |
1680 | bool EnableNarrowZeroStOpt) { |
1681 | bool Modified = false; |
1682 | // Four tranformations to do here: |
1683 | // 1) Find loads that directly read from stores and promote them by |
1684 | // replacing with mov instructions. If the store is wider than the load, |
1685 | // the load will be replaced with a bitfield extract. |
1686 | // e.g., |
1687 | // str w1, [x0, #4] |
1688 | // ldrh w2, [x0, #6] |
1689 | // ; becomes |
1690 | // str w1, [x0, #4] |
1691 | // lsr w2, w1, #16 |
1692 | for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); |
1693 | MBBI != E;) { |
1694 | if (isPromotableLoadFromStore(*MBBI) && tryToPromoteLoadFromStore(MBBI)) |
1695 | Modified = true; |
1696 | else |
1697 | ++MBBI; |
1698 | } |
1699 | // 2) Merge adjacent zero stores into a wider store. |
1700 | // e.g., |
1701 | // strh wzr, [x0] |
1702 | // strh wzr, [x0, #2] |
1703 | // ; becomes |
1704 | // str wzr, [x0] |
1705 | // e.g., |
1706 | // str wzr, [x0] |
1707 | // str wzr, [x0, #4] |
1708 | // ; becomes |
1709 | // str xzr, [x0] |
1710 | if (EnableNarrowZeroStOpt) |
1711 | for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); |
1712 | MBBI != E;) { |
1713 | if (isPromotableZeroStoreInst(*MBBI) && tryToMergeZeroStInst(MBBI)) |
1714 | Modified = true; |
1715 | else |
1716 | ++MBBI; |
1717 | } |
1718 | // 3) Find loads and stores that can be merged into a single load or store |
1719 | // pair instruction. |
1720 | // e.g., |
1721 | // ldr x0, [x2] |
1722 | // ldr x1, [x2, #8] |
1723 | // ; becomes |
1724 | // ldp x0, x1, [x2] |
1725 | for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); |
1726 | MBBI != E;) { |
1727 | if (TII->isPairableLdStInst(*MBBI) && tryToPairLdStInst(MBBI)) |
1728 | Modified = true; |
1729 | else |
1730 | ++MBBI; |
1731 | } |
1732 | // 4) Find base register updates that can be merged into the load or store |
1733 | // as a base-reg writeback. |
1734 | // e.g., |
1735 | // ldr x0, [x2] |
1736 | // add x2, x2, #4 |
1737 | // ; becomes |
1738 | // ldr x0, [x2], #4 |
1739 | for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); |
1740 | MBBI != E;) { |
1741 | if (isMergeableLdStUpdate(*MBBI) && tryToMergeLdStUpdate(MBBI)) |
1742 | Modified = true; |
1743 | else |
1744 | ++MBBI; |
1745 | } |
1746 | |
1747 | return Modified; |
1748 | } |
1749 | |
1750 | bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { |
1751 | if (skipFunction(Fn.getFunction())) |
1752 | return false; |
1753 | |
1754 | Subtarget = &static_cast<const AArch64Subtarget &>(Fn.getSubtarget()); |
1755 | TII = static_cast<const AArch64InstrInfo *>(Subtarget->getInstrInfo()); |
1756 | TRI = Subtarget->getRegisterInfo(); |
1757 | AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); |
1758 | |
1759 | // Resize the modified and used register unit trackers. We do this once |
1760 | // per function and then clear the register units each time we optimize a load |
1761 | // or store. |
1762 | ModifiedRegUnits.init(*TRI); |
1763 | UsedRegUnits.init(*TRI); |
1764 | |
1765 | bool Modified = false; |
1766 | bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign(); |
1767 | for (auto &MBB : Fn) |
1768 | Modified |= optimizeBlock(MBB, enableNarrowZeroStOpt); |
1769 | |
1770 | return Modified; |
1771 | } |
1772 | |
1773 | // FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep loads and |
1774 | // stores near one another? Note: The pre-RA instruction scheduler already has |
1775 | // hooks to try and schedule pairable loads/stores together to improve pairing |
1776 | // opportunities. Thus, pre-RA pairing pass may not be worth the effort. |
1777 | |
1778 | // FIXME: When pairing store instructions it's very possible for this pass to |
1779 | // hoist a store with a KILL marker above another use (without a KILL marker). |
1780 | // The resulting IR is invalid, but nothing uses the KILL markers after this |
1781 | // pass, so it's never caused a problem in practice. |
1782 | |
1783 | /// createAArch64LoadStoreOptimizationPass - returns an instance of the |
1784 | /// load / store optimization pass. |
1785 | FunctionPass *llvm::createAArch64LoadStoreOptimizationPass() { |
1786 | return new AArch64LoadStoreOpt(); |
1787 | } |