File: | lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp |
Location: | line 895, column 9 |
Description: | Value stored to 'Imms' during its initialization is never read |
1 | //=- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -*- C++ -*-=// |
2 | // |
3 | // The LLVM Compiler Infrastructure |
4 | // |
5 | // This file is distributed under the University of Illinois Open Source |
6 | // License. See LICENSE.TXT for details. |
7 | // |
8 | //===----------------------------------------------------------------------===// |
9 | // |
10 | // This file contains a pass that performs load / store related peephole |
11 | // optimizations. This pass should be run after register allocation. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "AArch64InstrInfo.h" |
16 | #include "AArch64Subtarget.h" |
17 | #include "MCTargetDesc/AArch64AddressingModes.h" |
18 | #include "llvm/ADT/BitVector.h" |
19 | #include "llvm/ADT/SmallVector.h" |
20 | #include "llvm/ADT/Statistic.h" |
21 | #include "llvm/CodeGen/MachineBasicBlock.h" |
22 | #include "llvm/CodeGen/MachineFunctionPass.h" |
23 | #include "llvm/CodeGen/MachineInstr.h" |
24 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
25 | #include "llvm/Support/CommandLine.h" |
26 | #include "llvm/Support/Debug.h" |
27 | #include "llvm/Support/ErrorHandling.h" |
28 | #include "llvm/Support/raw_ostream.h" |
29 | #include "llvm/Target/TargetInstrInfo.h" |
30 | #include "llvm/Target/TargetMachine.h" |
31 | #include "llvm/Target/TargetRegisterInfo.h" |
32 | using namespace llvm; |
33 | |
34 | #define DEBUG_TYPE"aarch64-ldst-opt" "aarch64-ldst-opt" |
35 | |
36 | /// AArch64AllocLoadStoreOpt - Post-register allocation pass to combine |
37 | /// load / store instructions to form ldp / stp instructions. |
38 | |
39 | STATISTIC(NumPairCreated, "Number of load/store pair instructions generated")static llvm::Statistic NumPairCreated = { "aarch64-ldst-opt", "Number of load/store pair instructions generated", 0, 0 }; |
40 | STATISTIC(NumPostFolded, "Number of post-index updates folded")static llvm::Statistic NumPostFolded = { "aarch64-ldst-opt", "Number of post-index updates folded" , 0, 0 }; |
41 | STATISTIC(NumPreFolded, "Number of pre-index updates folded")static llvm::Statistic NumPreFolded = { "aarch64-ldst-opt", "Number of pre-index updates folded" , 0, 0 }; |
42 | STATISTIC(NumUnscaledPairCreated,static llvm::Statistic NumUnscaledPairCreated = { "aarch64-ldst-opt" , "Number of load/store from unscaled generated", 0, 0 } |
43 | "Number of load/store from unscaled generated")static llvm::Statistic NumUnscaledPairCreated = { "aarch64-ldst-opt" , "Number of load/store from unscaled generated", 0, 0 }; |
44 | STATISTIC(NumNarrowLoadsPromoted, "Number of narrow loads promoted")static llvm::Statistic NumNarrowLoadsPromoted = { "aarch64-ldst-opt" , "Number of narrow loads promoted", 0, 0 }; |
45 | STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted")static llvm::Statistic NumZeroStoresPromoted = { "aarch64-ldst-opt" , "Number of narrow zero stores promoted", 0, 0 }; |
46 | STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted")static llvm::Statistic NumLoadsFromStoresPromoted = { "aarch64-ldst-opt" , "Number of loads from stores promoted", 0, 0 }; |
47 | |
48 | static cl::opt<unsigned> ScanLimit("aarch64-load-store-scan-limit", |
49 | cl::init(20), cl::Hidden); |
50 | |
51 | namespace llvm { |
52 | void initializeAArch64LoadStoreOptPass(PassRegistry &); |
53 | } |
54 | |
55 | #define AARCH64_LOAD_STORE_OPT_NAME"AArch64 load / store optimization pass" "AArch64 load / store optimization pass" |
56 | |
57 | namespace { |
58 | |
59 | typedef struct LdStPairFlags { |
60 | // If a matching instruction is found, MergeForward is set to true if the |
61 | // merge is to remove the first instruction and replace the second with |
62 | // a pair-wise insn, and false if the reverse is true. |
63 | bool MergeForward; |
64 | |
65 | // SExtIdx gives the index of the result of the load pair that must be |
66 | // extended. The value of SExtIdx assumes that the paired load produces the |
67 | // value in this order: (I, returned iterator), i.e., -1 means no value has |
68 | // to be extended, 0 means I, and 1 means the returned iterator. |
69 | int SExtIdx; |
70 | |
71 | LdStPairFlags() : MergeForward(false), SExtIdx(-1) {} |
72 | |
73 | void setMergeForward(bool V = true) { MergeForward = V; } |
74 | bool getMergeForward() const { return MergeForward; } |
75 | |
76 | void setSExtIdx(int V) { SExtIdx = V; } |
77 | int getSExtIdx() const { return SExtIdx; } |
78 | |
79 | } LdStPairFlags; |
80 | |
81 | struct AArch64LoadStoreOpt : public MachineFunctionPass { |
82 | static char ID; |
83 | AArch64LoadStoreOpt() : MachineFunctionPass(ID) { |
84 | initializeAArch64LoadStoreOptPass(*PassRegistry::getPassRegistry()); |
85 | } |
86 | |
87 | const AArch64InstrInfo *TII; |
88 | const TargetRegisterInfo *TRI; |
89 | const AArch64Subtarget *Subtarget; |
90 | |
91 | // Scan the instructions looking for a load/store that can be combined |
92 | // with the current instruction into a load/store pair. |
93 | // Return the matching instruction if one is found, else MBB->end(). |
94 | MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I, |
95 | LdStPairFlags &Flags, |
96 | unsigned Limit); |
97 | |
98 | // Scan the instructions looking for a store that writes to the address from |
99 | // which the current load instruction reads. Return true if one is found. |
100 | bool findMatchingStore(MachineBasicBlock::iterator I, unsigned Limit, |
101 | MachineBasicBlock::iterator &StoreI); |
102 | |
103 | // Merge the two instructions indicated into a single pair-wise instruction. |
104 | // If MergeForward is true, erase the first instruction and fold its |
105 | // operation into the second. If false, the reverse. Return the instruction |
106 | // following the first instruction (which may change during processing). |
107 | MachineBasicBlock::iterator |
108 | mergePairedInsns(MachineBasicBlock::iterator I, |
109 | MachineBasicBlock::iterator Paired, |
110 | const LdStPairFlags &Flags); |
111 | |
112 | // Promote the load that reads directly from the address stored to. |
113 | MachineBasicBlock::iterator |
114 | promoteLoadFromStore(MachineBasicBlock::iterator LoadI, |
115 | MachineBasicBlock::iterator StoreI); |
116 | |
117 | // Scan the instruction list to find a base register update that can |
118 | // be combined with the current instruction (a load or store) using |
119 | // pre or post indexed addressing with writeback. Scan forwards. |
120 | MachineBasicBlock::iterator |
121 | findMatchingUpdateInsnForward(MachineBasicBlock::iterator I, unsigned Limit, |
122 | int UnscaledOffset); |
123 | |
124 | // Scan the instruction list to find a base register update that can |
125 | // be combined with the current instruction (a load or store) using |
126 | // pre or post indexed addressing with writeback. Scan backwards. |
127 | MachineBasicBlock::iterator |
128 | findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit); |
129 | |
130 | // Find an instruction that updates the base register of the ld/st |
131 | // instruction. |
132 | bool isMatchingUpdateInsn(MachineInstr *MemMI, MachineInstr *MI, |
133 | unsigned BaseReg, int Offset); |
134 | |
135 | // Merge a pre- or post-index base register update into a ld/st instruction. |
136 | MachineBasicBlock::iterator |
137 | mergeUpdateInsn(MachineBasicBlock::iterator I, |
138 | MachineBasicBlock::iterator Update, bool IsPreIdx); |
139 | |
140 | // Find and merge foldable ldr/str instructions. |
141 | bool tryToMergeLdStInst(MachineBasicBlock::iterator &MBBI); |
142 | |
143 | // Find and promote load instructions which read directly from store. |
144 | bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI); |
145 | |
146 | // Check if converting two narrow loads into a single wider load with |
147 | // bitfield extracts could be enabled. |
148 | bool enableNarrowLdMerge(MachineFunction &Fn); |
149 | |
150 | bool optimizeBlock(MachineBasicBlock &MBB, bool enableNarrowLdOpt); |
151 | |
152 | bool runOnMachineFunction(MachineFunction &Fn) override; |
153 | |
154 | const char *getPassName() const override { |
155 | return AARCH64_LOAD_STORE_OPT_NAME"AArch64 load / store optimization pass"; |
156 | } |
157 | }; |
158 | char AArch64LoadStoreOpt::ID = 0; |
159 | } // namespace |
160 | |
161 | INITIALIZE_PASS(AArch64LoadStoreOpt, "aarch64-ldst-opt",static void* initializeAArch64LoadStoreOptPassOnce(PassRegistry &Registry) { PassInfo *PI = new PassInfo("AArch64 load / store optimization pass" , "aarch64-ldst-opt", & AArch64LoadStoreOpt ::ID, PassInfo ::NormalCtor_t(callDefaultCtor< AArch64LoadStoreOpt >), false, false); Registry.registerPass(*PI, true); return PI; } void llvm::initializeAArch64LoadStoreOptPass(PassRegistry & Registry) { static volatile sys::cas_flag initialized = 0; sys ::cas_flag old_val = sys::CompareAndSwap(&initialized, 1, 0); if (old_val == 0) { initializeAArch64LoadStoreOptPassOnce (Registry); sys::MemoryFence(); ; ; initialized = 2; ; } else { sys::cas_flag tmp = initialized; sys::MemoryFence(); while (tmp != 2) { tmp = initialized; sys::MemoryFence(); } } ; } |
162 | AARCH64_LOAD_STORE_OPT_NAME, false, false)static void* initializeAArch64LoadStoreOptPassOnce(PassRegistry &Registry) { PassInfo *PI = new PassInfo("AArch64 load / store optimization pass" , "aarch64-ldst-opt", & AArch64LoadStoreOpt ::ID, PassInfo ::NormalCtor_t(callDefaultCtor< AArch64LoadStoreOpt >), false, false); Registry.registerPass(*PI, true); return PI; } void llvm::initializeAArch64LoadStoreOptPass(PassRegistry & Registry) { static volatile sys::cas_flag initialized = 0; sys ::cas_flag old_val = sys::CompareAndSwap(&initialized, 1, 0); if (old_val == 0) { initializeAArch64LoadStoreOptPassOnce (Registry); sys::MemoryFence(); ; ; initialized = 2; ; } else { sys::cas_flag tmp = initialized; sys::MemoryFence(); while (tmp != 2) { tmp = initialized; sys::MemoryFence(); } } ; } |
163 | |
164 | static bool isUnscaledLdSt(unsigned Opc) { |
165 | switch (Opc) { |
166 | default: |
167 | return false; |
168 | case AArch64::STURSi: |
169 | case AArch64::STURDi: |
170 | case AArch64::STURQi: |
171 | case AArch64::STURBBi: |
172 | case AArch64::STURHHi: |
173 | case AArch64::STURWi: |
174 | case AArch64::STURXi: |
175 | case AArch64::LDURSi: |
176 | case AArch64::LDURDi: |
177 | case AArch64::LDURQi: |
178 | case AArch64::LDURWi: |
179 | case AArch64::LDURXi: |
180 | case AArch64::LDURSWi: |
181 | case AArch64::LDURHHi: |
182 | case AArch64::LDURBBi: |
183 | case AArch64::LDURSBWi: |
184 | case AArch64::LDURSHWi: |
185 | return true; |
186 | } |
187 | } |
188 | |
189 | static bool isUnscaledLdSt(MachineInstr *MI) { |
190 | return isUnscaledLdSt(MI->getOpcode()); |
191 | } |
192 | |
193 | static unsigned getBitExtrOpcode(MachineInstr *MI) { |
194 | switch (MI->getOpcode()) { |
195 | default: |
196 | llvm_unreachable("Unexpected opcode.")::llvm::llvm_unreachable_internal("Unexpected opcode.", "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 196); |
197 | case AArch64::LDRBBui: |
198 | case AArch64::LDURBBi: |
199 | case AArch64::LDRHHui: |
200 | case AArch64::LDURHHi: |
201 | return AArch64::UBFMWri; |
202 | case AArch64::LDRSBWui: |
203 | case AArch64::LDURSBWi: |
204 | case AArch64::LDRSHWui: |
205 | case AArch64::LDURSHWi: |
206 | return AArch64::SBFMWri; |
207 | } |
208 | } |
209 | |
210 | static bool isNarrowStore(unsigned Opc) { |
211 | switch (Opc) { |
212 | default: |
213 | return false; |
214 | case AArch64::STRBBui: |
215 | case AArch64::STURBBi: |
216 | case AArch64::STRHHui: |
217 | case AArch64::STURHHi: |
218 | return true; |
219 | } |
220 | } |
221 | |
222 | static bool isNarrowStore(MachineInstr *MI) { |
223 | return isNarrowStore(MI->getOpcode()); |
224 | } |
225 | |
226 | static bool isNarrowLoad(unsigned Opc) { |
227 | switch (Opc) { |
228 | default: |
229 | return false; |
230 | case AArch64::LDRHHui: |
231 | case AArch64::LDURHHi: |
232 | case AArch64::LDRBBui: |
233 | case AArch64::LDURBBi: |
234 | case AArch64::LDRSHWui: |
235 | case AArch64::LDURSHWi: |
236 | case AArch64::LDRSBWui: |
237 | case AArch64::LDURSBWi: |
238 | return true; |
239 | } |
240 | } |
241 | |
242 | static bool isNarrowLoad(MachineInstr *MI) { |
243 | return isNarrowLoad(MI->getOpcode()); |
244 | } |
245 | |
246 | // Scaling factor for unscaled load or store. |
247 | static int getMemScale(MachineInstr *MI) { |
248 | switch (MI->getOpcode()) { |
249 | default: |
250 | llvm_unreachable("Opcode has unknown scale!")::llvm::llvm_unreachable_internal("Opcode has unknown scale!" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 250); |
251 | case AArch64::LDRBBui: |
252 | case AArch64::LDURBBi: |
253 | case AArch64::LDRSBWui: |
254 | case AArch64::LDURSBWi: |
255 | case AArch64::STRBBui: |
256 | case AArch64::STURBBi: |
257 | return 1; |
258 | case AArch64::LDRHHui: |
259 | case AArch64::LDURHHi: |
260 | case AArch64::LDRSHWui: |
261 | case AArch64::LDURSHWi: |
262 | case AArch64::STRHHui: |
263 | case AArch64::STURHHi: |
264 | return 2; |
265 | case AArch64::LDRSui: |
266 | case AArch64::LDURSi: |
267 | case AArch64::LDRSWui: |
268 | case AArch64::LDURSWi: |
269 | case AArch64::LDRWui: |
270 | case AArch64::LDURWi: |
271 | case AArch64::STRSui: |
272 | case AArch64::STURSi: |
273 | case AArch64::STRWui: |
274 | case AArch64::STURWi: |
275 | case AArch64::LDPSi: |
276 | case AArch64::LDPSWi: |
277 | case AArch64::LDPWi: |
278 | case AArch64::STPSi: |
279 | case AArch64::STPWi: |
280 | return 4; |
281 | case AArch64::LDRDui: |
282 | case AArch64::LDURDi: |
283 | case AArch64::LDRXui: |
284 | case AArch64::LDURXi: |
285 | case AArch64::STRDui: |
286 | case AArch64::STURDi: |
287 | case AArch64::STRXui: |
288 | case AArch64::STURXi: |
289 | case AArch64::LDPDi: |
290 | case AArch64::LDPXi: |
291 | case AArch64::STPDi: |
292 | case AArch64::STPXi: |
293 | return 8; |
294 | case AArch64::LDRQui: |
295 | case AArch64::LDURQi: |
296 | case AArch64::STRQui: |
297 | case AArch64::STURQi: |
298 | case AArch64::LDPQi: |
299 | case AArch64::STPQi: |
300 | return 16; |
301 | } |
302 | } |
303 | |
304 | static unsigned getMatchingNonSExtOpcode(unsigned Opc, |
305 | bool *IsValidLdStrOpc = nullptr) { |
306 | if (IsValidLdStrOpc) |
307 | *IsValidLdStrOpc = true; |
308 | switch (Opc) { |
309 | default: |
310 | if (IsValidLdStrOpc) |
311 | *IsValidLdStrOpc = false; |
312 | return UINT_MAX(2147483647 *2U +1U); |
313 | case AArch64::STRDui: |
314 | case AArch64::STURDi: |
315 | case AArch64::STRQui: |
316 | case AArch64::STURQi: |
317 | case AArch64::STRBBui: |
318 | case AArch64::STURBBi: |
319 | case AArch64::STRHHui: |
320 | case AArch64::STURHHi: |
321 | case AArch64::STRWui: |
322 | case AArch64::STURWi: |
323 | case AArch64::STRXui: |
324 | case AArch64::STURXi: |
325 | case AArch64::LDRDui: |
326 | case AArch64::LDURDi: |
327 | case AArch64::LDRQui: |
328 | case AArch64::LDURQi: |
329 | case AArch64::LDRWui: |
330 | case AArch64::LDURWi: |
331 | case AArch64::LDRXui: |
332 | case AArch64::LDURXi: |
333 | case AArch64::STRSui: |
334 | case AArch64::STURSi: |
335 | case AArch64::LDRSui: |
336 | case AArch64::LDURSi: |
337 | case AArch64::LDRHHui: |
338 | case AArch64::LDURHHi: |
339 | case AArch64::LDRBBui: |
340 | case AArch64::LDURBBi: |
341 | return Opc; |
342 | case AArch64::LDRSWui: |
343 | return AArch64::LDRWui; |
344 | case AArch64::LDURSWi: |
345 | return AArch64::LDURWi; |
346 | case AArch64::LDRSBWui: |
347 | return AArch64::LDRBBui; |
348 | case AArch64::LDRSHWui: |
349 | return AArch64::LDRHHui; |
350 | case AArch64::LDURSBWi: |
351 | return AArch64::LDURBBi; |
352 | case AArch64::LDURSHWi: |
353 | return AArch64::LDURHHi; |
354 | } |
355 | } |
356 | |
357 | static unsigned getMatchingPairOpcode(unsigned Opc) { |
358 | switch (Opc) { |
359 | default: |
360 | llvm_unreachable("Opcode has no pairwise equivalent!")::llvm::llvm_unreachable_internal("Opcode has no pairwise equivalent!" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 360); |
361 | case AArch64::STRSui: |
362 | case AArch64::STURSi: |
363 | return AArch64::STPSi; |
364 | case AArch64::STRDui: |
365 | case AArch64::STURDi: |
366 | return AArch64::STPDi; |
367 | case AArch64::STRQui: |
368 | case AArch64::STURQi: |
369 | return AArch64::STPQi; |
370 | case AArch64::STRBBui: |
371 | return AArch64::STRHHui; |
372 | case AArch64::STRHHui: |
373 | return AArch64::STRWui; |
374 | case AArch64::STURBBi: |
375 | return AArch64::STURHHi; |
376 | case AArch64::STURHHi: |
377 | return AArch64::STURWi; |
378 | case AArch64::STRWui: |
379 | case AArch64::STURWi: |
380 | return AArch64::STPWi; |
381 | case AArch64::STRXui: |
382 | case AArch64::STURXi: |
383 | return AArch64::STPXi; |
384 | case AArch64::LDRSui: |
385 | case AArch64::LDURSi: |
386 | return AArch64::LDPSi; |
387 | case AArch64::LDRDui: |
388 | case AArch64::LDURDi: |
389 | return AArch64::LDPDi; |
390 | case AArch64::LDRQui: |
391 | case AArch64::LDURQi: |
392 | return AArch64::LDPQi; |
393 | case AArch64::LDRWui: |
394 | case AArch64::LDURWi: |
395 | return AArch64::LDPWi; |
396 | case AArch64::LDRXui: |
397 | case AArch64::LDURXi: |
398 | return AArch64::LDPXi; |
399 | case AArch64::LDRSWui: |
400 | case AArch64::LDURSWi: |
401 | return AArch64::LDPSWi; |
402 | case AArch64::LDRHHui: |
403 | case AArch64::LDRSHWui: |
404 | return AArch64::LDRWui; |
405 | case AArch64::LDURHHi: |
406 | case AArch64::LDURSHWi: |
407 | return AArch64::LDURWi; |
408 | case AArch64::LDRBBui: |
409 | case AArch64::LDRSBWui: |
410 | return AArch64::LDRHHui; |
411 | case AArch64::LDURBBi: |
412 | case AArch64::LDURSBWi: |
413 | return AArch64::LDURHHi; |
414 | } |
415 | } |
416 | |
417 | static unsigned isMatchingStore(MachineInstr *LoadInst, |
418 | MachineInstr *StoreInst) { |
419 | unsigned LdOpc = LoadInst->getOpcode(); |
420 | unsigned StOpc = StoreInst->getOpcode(); |
421 | switch (LdOpc) { |
422 | default: |
423 | llvm_unreachable("Unsupported load instruction!")::llvm::llvm_unreachable_internal("Unsupported load instruction!" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 423); |
424 | case AArch64::LDRBBui: |
425 | return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui || |
426 | StOpc == AArch64::STRWui || StOpc == AArch64::STRXui; |
427 | case AArch64::LDURBBi: |
428 | return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi || |
429 | StOpc == AArch64::STURWi || StOpc == AArch64::STURXi; |
430 | case AArch64::LDRHHui: |
431 | return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui || |
432 | StOpc == AArch64::STRXui; |
433 | case AArch64::LDURHHi: |
434 | return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi || |
435 | StOpc == AArch64::STURXi; |
436 | case AArch64::LDRWui: |
437 | return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui; |
438 | case AArch64::LDURWi: |
439 | return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi; |
440 | case AArch64::LDRXui: |
441 | return StOpc == AArch64::STRXui; |
442 | case AArch64::LDURXi: |
443 | return StOpc == AArch64::STURXi; |
444 | } |
445 | } |
446 | |
447 | static unsigned getPreIndexedOpcode(unsigned Opc) { |
448 | switch (Opc) { |
449 | default: |
450 | llvm_unreachable("Opcode has no pre-indexed equivalent!")::llvm::llvm_unreachable_internal("Opcode has no pre-indexed equivalent!" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 450); |
451 | case AArch64::STRSui: |
452 | return AArch64::STRSpre; |
453 | case AArch64::STRDui: |
454 | return AArch64::STRDpre; |
455 | case AArch64::STRQui: |
456 | return AArch64::STRQpre; |
457 | case AArch64::STRBBui: |
458 | return AArch64::STRBBpre; |
459 | case AArch64::STRHHui: |
460 | return AArch64::STRHHpre; |
461 | case AArch64::STRWui: |
462 | return AArch64::STRWpre; |
463 | case AArch64::STRXui: |
464 | return AArch64::STRXpre; |
465 | case AArch64::LDRSui: |
466 | return AArch64::LDRSpre; |
467 | case AArch64::LDRDui: |
468 | return AArch64::LDRDpre; |
469 | case AArch64::LDRQui: |
470 | return AArch64::LDRQpre; |
471 | case AArch64::LDRBBui: |
472 | return AArch64::LDRBBpre; |
473 | case AArch64::LDRHHui: |
474 | return AArch64::LDRHHpre; |
475 | case AArch64::LDRWui: |
476 | return AArch64::LDRWpre; |
477 | case AArch64::LDRXui: |
478 | return AArch64::LDRXpre; |
479 | case AArch64::LDRSWui: |
480 | return AArch64::LDRSWpre; |
481 | case AArch64::LDPSi: |
482 | return AArch64::LDPSpre; |
483 | case AArch64::LDPSWi: |
484 | return AArch64::LDPSWpre; |
485 | case AArch64::LDPDi: |
486 | return AArch64::LDPDpre; |
487 | case AArch64::LDPQi: |
488 | return AArch64::LDPQpre; |
489 | case AArch64::LDPWi: |
490 | return AArch64::LDPWpre; |
491 | case AArch64::LDPXi: |
492 | return AArch64::LDPXpre; |
493 | case AArch64::STPSi: |
494 | return AArch64::STPSpre; |
495 | case AArch64::STPDi: |
496 | return AArch64::STPDpre; |
497 | case AArch64::STPQi: |
498 | return AArch64::STPQpre; |
499 | case AArch64::STPWi: |
500 | return AArch64::STPWpre; |
501 | case AArch64::STPXi: |
502 | return AArch64::STPXpre; |
503 | } |
504 | } |
505 | |
506 | static unsigned getPostIndexedOpcode(unsigned Opc) { |
507 | switch (Opc) { |
508 | default: |
509 | llvm_unreachable("Opcode has no post-indexed wise equivalent!")::llvm::llvm_unreachable_internal("Opcode has no post-indexed wise equivalent!" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 509); |
510 | case AArch64::STRSui: |
511 | return AArch64::STRSpost; |
512 | case AArch64::STRDui: |
513 | return AArch64::STRDpost; |
514 | case AArch64::STRQui: |
515 | return AArch64::STRQpost; |
516 | case AArch64::STRBBui: |
517 | return AArch64::STRBBpost; |
518 | case AArch64::STRHHui: |
519 | return AArch64::STRHHpost; |
520 | case AArch64::STRWui: |
521 | return AArch64::STRWpost; |
522 | case AArch64::STRXui: |
523 | return AArch64::STRXpost; |
524 | case AArch64::LDRSui: |
525 | return AArch64::LDRSpost; |
526 | case AArch64::LDRDui: |
527 | return AArch64::LDRDpost; |
528 | case AArch64::LDRQui: |
529 | return AArch64::LDRQpost; |
530 | case AArch64::LDRBBui: |
531 | return AArch64::LDRBBpost; |
532 | case AArch64::LDRHHui: |
533 | return AArch64::LDRHHpost; |
534 | case AArch64::LDRWui: |
535 | return AArch64::LDRWpost; |
536 | case AArch64::LDRXui: |
537 | return AArch64::LDRXpost; |
538 | case AArch64::LDRSWui: |
539 | return AArch64::LDRSWpost; |
540 | case AArch64::LDPSi: |
541 | return AArch64::LDPSpost; |
542 | case AArch64::LDPSWi: |
543 | return AArch64::LDPSWpost; |
544 | case AArch64::LDPDi: |
545 | return AArch64::LDPDpost; |
546 | case AArch64::LDPQi: |
547 | return AArch64::LDPQpost; |
548 | case AArch64::LDPWi: |
549 | return AArch64::LDPWpost; |
550 | case AArch64::LDPXi: |
551 | return AArch64::LDPXpost; |
552 | case AArch64::STPSi: |
553 | return AArch64::STPSpost; |
554 | case AArch64::STPDi: |
555 | return AArch64::STPDpost; |
556 | case AArch64::STPQi: |
557 | return AArch64::STPQpost; |
558 | case AArch64::STPWi: |
559 | return AArch64::STPWpost; |
560 | case AArch64::STPXi: |
561 | return AArch64::STPXpost; |
562 | } |
563 | } |
564 | |
565 | static bool isPairedLdSt(const MachineInstr *MI) { |
566 | switch (MI->getOpcode()) { |
567 | default: |
568 | return false; |
569 | case AArch64::LDPSi: |
570 | case AArch64::LDPSWi: |
571 | case AArch64::LDPDi: |
572 | case AArch64::LDPQi: |
573 | case AArch64::LDPWi: |
574 | case AArch64::LDPXi: |
575 | case AArch64::STPSi: |
576 | case AArch64::STPDi: |
577 | case AArch64::STPQi: |
578 | case AArch64::STPWi: |
579 | case AArch64::STPXi: |
580 | return true; |
581 | } |
582 | } |
583 | |
584 | static const MachineOperand &getLdStRegOp(const MachineInstr *MI, |
585 | unsigned PairedRegOp = 0) { |
586 | assert(PairedRegOp < 2 && "Unexpected register operand idx.")((PairedRegOp < 2 && "Unexpected register operand idx." ) ? static_cast<void> (0) : __assert_fail ("PairedRegOp < 2 && \"Unexpected register operand idx.\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 586, __PRETTY_FUNCTION__)); |
587 | unsigned Idx = isPairedLdSt(MI) ? PairedRegOp : 0; |
588 | return MI->getOperand(Idx); |
589 | } |
590 | |
591 | static const MachineOperand &getLdStBaseOp(const MachineInstr *MI) { |
592 | unsigned Idx = isPairedLdSt(MI) ? 2 : 1; |
593 | return MI->getOperand(Idx); |
594 | } |
595 | |
596 | static const MachineOperand &getLdStOffsetOp(const MachineInstr *MI) { |
597 | unsigned Idx = isPairedLdSt(MI) ? 3 : 2; |
598 | return MI->getOperand(Idx); |
599 | } |
600 | |
601 | static bool isLdOffsetInRangeOfSt(MachineInstr *LoadInst, |
602 | MachineInstr *StoreInst) { |
603 | assert(isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st.")((isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st." ) ? static_cast<void> (0) : __assert_fail ("isMatchingStore(LoadInst, StoreInst) && \"Expect only matched ld/st.\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 603, __PRETTY_FUNCTION__)); |
604 | int LoadSize = getMemScale(LoadInst); |
605 | int StoreSize = getMemScale(StoreInst); |
606 | int UnscaledStOffset = isUnscaledLdSt(StoreInst) |
607 | ? getLdStOffsetOp(StoreInst).getImm() |
608 | : getLdStOffsetOp(StoreInst).getImm() * StoreSize; |
609 | int UnscaledLdOffset = isUnscaledLdSt(LoadInst) |
610 | ? getLdStOffsetOp(LoadInst).getImm() |
611 | : getLdStOffsetOp(LoadInst).getImm() * LoadSize; |
612 | return (UnscaledStOffset <= UnscaledLdOffset) && |
613 | (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize)); |
614 | } |
615 | |
616 | MachineBasicBlock::iterator |
617 | AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, |
618 | MachineBasicBlock::iterator Paired, |
619 | const LdStPairFlags &Flags) { |
620 | MachineBasicBlock::iterator NextI = I; |
621 | ++NextI; |
622 | // If NextI is the second of the two instructions to be merged, we need |
623 | // to skip one further. Either way we merge will invalidate the iterator, |
624 | // and we don't need to scan the new instruction, as it's a pairwise |
625 | // instruction, which we're not considering for further action anyway. |
626 | if (NextI == Paired) |
627 | ++NextI; |
628 | |
629 | int SExtIdx = Flags.getSExtIdx(); |
630 | unsigned Opc = |
631 | SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode()); |
632 | bool IsUnscaled = isUnscaledLdSt(Opc); |
633 | int OffsetStride = IsUnscaled ? getMemScale(I) : 1; |
634 | |
635 | bool MergeForward = Flags.getMergeForward(); |
636 | unsigned NewOpc = getMatchingPairOpcode(Opc); |
637 | // Insert our new paired instruction after whichever of the paired |
638 | // instructions MergeForward indicates. |
639 | MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I; |
640 | // Also based on MergeForward is from where we copy the base register operand |
641 | // so we get the flags compatible with the input code. |
642 | const MachineOperand &BaseRegOp = |
643 | MergeForward ? getLdStBaseOp(Paired) : getLdStBaseOp(I); |
644 | |
645 | // Which register is Rt and which is Rt2 depends on the offset order. |
646 | MachineInstr *RtMI, *Rt2MI; |
647 | if (getLdStOffsetOp(I).getImm() == |
648 | getLdStOffsetOp(Paired).getImm() + OffsetStride) { |
649 | RtMI = Paired; |
650 | Rt2MI = I; |
651 | // Here we swapped the assumption made for SExtIdx. |
652 | // I.e., we turn ldp I, Paired into ldp Paired, I. |
653 | // Update the index accordingly. |
654 | if (SExtIdx != -1) |
655 | SExtIdx = (SExtIdx + 1) % 2; |
656 | } else { |
657 | RtMI = I; |
658 | Rt2MI = Paired; |
659 | } |
660 | |
661 | int OffsetImm = getLdStOffsetOp(RtMI).getImm(); |
662 | |
663 | if (isNarrowLoad(Opc)) { |
664 | // Change the scaled offset from small to large type. |
665 | if (!IsUnscaled) { |
666 | assert(((OffsetImm & 1) == 0) && "Unexpected offset to merge")((((OffsetImm & 1) == 0) && "Unexpected offset to merge" ) ? static_cast<void> (0) : __assert_fail ("((OffsetImm & 1) == 0) && \"Unexpected offset to merge\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 666, __PRETTY_FUNCTION__)); |
667 | OffsetImm /= 2; |
668 | } |
669 | MachineInstr *RtNewDest = MergeForward ? I : Paired; |
670 | // When merging small (< 32 bit) loads for big-endian targets, the order of |
671 | // the component parts gets swapped. |
672 | if (!Subtarget->isLittleEndian()) |
673 | std::swap(RtMI, Rt2MI); |
674 | // Construct the new load instruction. |
675 | MachineInstr *NewMemMI, *BitExtMI1, *BitExtMI2; |
676 | NewMemMI = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), |
677 | TII->get(NewOpc)) |
678 | .addOperand(getLdStRegOp(RtNewDest)) |
679 | .addOperand(BaseRegOp) |
680 | .addImm(OffsetImm) |
681 | .setMemRefs(I->mergeMemRefsWith(*Paired)); |
682 | |
683 | DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << "Creating the new load and extract. Replacing instructions:\n " ; } } while (0) |
684 | dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << "Creating the new load and extract. Replacing instructions:\n " ; } } while (0) |
685 | << "Creating the new load and extract. Replacing instructions:\n ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << "Creating the new load and extract. Replacing instructions:\n " ; } } while (0); |
686 | DEBUG(I->print(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { I->print(dbgs()); } } while (0); |
687 | DEBUG(dbgs() << " ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << " "; } } while (0); |
688 | DEBUG(Paired->print(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { Paired->print(dbgs()); } } while ( 0); |
689 | DEBUG(dbgs() << " with instructions:\n ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << " with instructions:\n " ; } } while (0); |
690 | DEBUG((NewMemMI)->print(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { (NewMemMI)->print(dbgs()); } } while (0); |
691 | |
692 | int Width = getMemScale(I) == 1 ? 8 : 16; |
693 | int LSBLow = 0; |
694 | int LSBHigh = Width; |
695 | int ImmsLow = LSBLow + Width - 1; |
696 | int ImmsHigh = LSBHigh + Width - 1; |
697 | MachineInstr *ExtDestMI = MergeForward ? Paired : I; |
698 | if ((ExtDestMI == Rt2MI) == Subtarget->isLittleEndian()) { |
699 | // Create the bitfield extract for high bits. |
700 | BitExtMI1 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), |
701 | TII->get(getBitExtrOpcode(Rt2MI))) |
702 | .addOperand(getLdStRegOp(Rt2MI)) |
703 | .addReg(getLdStRegOp(RtNewDest).getReg()) |
704 | .addImm(LSBHigh) |
705 | .addImm(ImmsHigh); |
706 | // Create the bitfield extract for low bits. |
707 | if (RtMI->getOpcode() == getMatchingNonSExtOpcode(RtMI->getOpcode())) { |
708 | // For unsigned, prefer to use AND for low bits. |
709 | BitExtMI2 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), |
710 | TII->get(AArch64::ANDWri)) |
711 | .addOperand(getLdStRegOp(RtMI)) |
712 | .addReg(getLdStRegOp(RtNewDest).getReg()) |
713 | .addImm(ImmsLow); |
714 | } else { |
715 | BitExtMI2 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), |
716 | TII->get(getBitExtrOpcode(RtMI))) |
717 | .addOperand(getLdStRegOp(RtMI)) |
718 | .addReg(getLdStRegOp(RtNewDest).getReg()) |
719 | .addImm(LSBLow) |
720 | .addImm(ImmsLow); |
721 | } |
722 | } else { |
723 | // Create the bitfield extract for low bits. |
724 | if (RtMI->getOpcode() == getMatchingNonSExtOpcode(RtMI->getOpcode())) { |
725 | // For unsigned, prefer to use AND for low bits. |
726 | BitExtMI1 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), |
727 | TII->get(AArch64::ANDWri)) |
728 | .addOperand(getLdStRegOp(RtMI)) |
729 | .addReg(getLdStRegOp(RtNewDest).getReg()) |
730 | .addImm(ImmsLow); |
731 | } else { |
732 | BitExtMI1 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), |
733 | TII->get(getBitExtrOpcode(RtMI))) |
734 | .addOperand(getLdStRegOp(RtMI)) |
735 | .addReg(getLdStRegOp(RtNewDest).getReg()) |
736 | .addImm(LSBLow) |
737 | .addImm(ImmsLow); |
738 | } |
739 | |
740 | // Create the bitfield extract for high bits. |
741 | BitExtMI2 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), |
742 | TII->get(getBitExtrOpcode(Rt2MI))) |
743 | .addOperand(getLdStRegOp(Rt2MI)) |
744 | .addReg(getLdStRegOp(RtNewDest).getReg()) |
745 | .addImm(LSBHigh) |
746 | .addImm(ImmsHigh); |
747 | } |
748 | DEBUG(dbgs() << " ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << " "; } } while (0); |
749 | DEBUG((BitExtMI1)->print(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { (BitExtMI1)->print(dbgs()); } } while (0); |
750 | DEBUG(dbgs() << " ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << " "; } } while (0); |
751 | DEBUG((BitExtMI2)->print(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { (BitExtMI2)->print(dbgs()); } } while (0); |
752 | DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << "\n"; } } while (0); |
753 | |
754 | // Erase the old instructions. |
755 | I->eraseFromParent(); |
756 | Paired->eraseFromParent(); |
757 | return NextI; |
758 | } |
759 | |
760 | // Construct the new instruction. |
761 | MachineInstrBuilder MIB; |
762 | if (isNarrowStore(Opc)) { |
763 | // Change the scaled offset from small to large type. |
764 | if (!IsUnscaled) { |
765 | assert(((OffsetImm & 1) == 0) && "Unexpected offset to merge")((((OffsetImm & 1) == 0) && "Unexpected offset to merge" ) ? static_cast<void> (0) : __assert_fail ("((OffsetImm & 1) == 0) && \"Unexpected offset to merge\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 765, __PRETTY_FUNCTION__)); |
766 | OffsetImm /= 2; |
767 | } |
768 | MIB = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), |
769 | TII->get(NewOpc)) |
770 | .addOperand(getLdStRegOp(I)) |
771 | .addOperand(BaseRegOp) |
772 | .addImm(OffsetImm) |
773 | .setMemRefs(I->mergeMemRefsWith(*Paired)); |
774 | } else { |
775 | // Handle Unscaled |
776 | if (IsUnscaled) |
777 | OffsetImm /= OffsetStride; |
778 | MIB = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), |
779 | TII->get(NewOpc)) |
780 | .addOperand(getLdStRegOp(RtMI)) |
781 | .addOperand(getLdStRegOp(Rt2MI)) |
782 | .addOperand(BaseRegOp) |
783 | .addImm(OffsetImm); |
784 | } |
785 | |
786 | (void)MIB; |
787 | |
788 | // FIXME: Do we need/want to copy the mem operands from the source |
789 | // instructions? Probably. What uses them after this? |
790 | |
791 | DEBUG(dbgs() << "Creating pair load/store. Replacing instructions:\n ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << "Creating pair load/store. Replacing instructions:\n " ; } } while (0); |
792 | DEBUG(I->print(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { I->print(dbgs()); } } while (0); |
793 | DEBUG(dbgs() << " ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << " "; } } while (0); |
794 | DEBUG(Paired->print(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { Paired->print(dbgs()); } } while ( 0); |
795 | DEBUG(dbgs() << " with instruction:\n ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << " with instruction:\n " ; } } while (0); |
796 | |
797 | if (SExtIdx != -1) { |
798 | // Generate the sign extension for the proper result of the ldp. |
799 | // I.e., with X1, that would be: |
800 | // %W1<def> = KILL %W1, %X1<imp-def> |
801 | // %X1<def> = SBFMXri %X1<kill>, 0, 31 |
802 | MachineOperand &DstMO = MIB->getOperand(SExtIdx); |
803 | // Right now, DstMO has the extended register, since it comes from an |
804 | // extended opcode. |
805 | unsigned DstRegX = DstMO.getReg(); |
806 | // Get the W variant of that register. |
807 | unsigned DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32); |
808 | // Update the result of LDP to use the W instead of the X variant. |
809 | DstMO.setReg(DstRegW); |
810 | DEBUG(((MachineInstr *)MIB)->print(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { ((MachineInstr *)MIB)->print(dbgs( )); } } while (0); |
811 | DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << "\n"; } } while (0); |
812 | // Make the machine verifier happy by providing a definition for |
813 | // the X register. |
814 | // Insert this definition right after the generated LDP, i.e., before |
815 | // InsertionPoint. |
816 | MachineInstrBuilder MIBKill = |
817 | BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), |
818 | TII->get(TargetOpcode::KILL), DstRegW) |
819 | .addReg(DstRegW) |
820 | .addReg(DstRegX, RegState::Define); |
821 | MIBKill->getOperand(2).setImplicit(); |
822 | // Create the sign extension. |
823 | MachineInstrBuilder MIBSXTW = |
824 | BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), |
825 | TII->get(AArch64::SBFMXri), DstRegX) |
826 | .addReg(DstRegX) |
827 | .addImm(0) |
828 | .addImm(31); |
829 | (void)MIBSXTW; |
830 | DEBUG(dbgs() << " Extend operand:\n ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << " Extend operand:\n " ; } } while (0); |
831 | DEBUG(((MachineInstr *)MIBSXTW)->print(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { ((MachineInstr *)MIBSXTW)->print(dbgs ()); } } while (0); |
832 | DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << "\n"; } } while (0); |
833 | } else { |
834 | DEBUG(((MachineInstr *)MIB)->print(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { ((MachineInstr *)MIB)->print(dbgs( )); } } while (0); |
835 | DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << "\n"; } } while (0); |
836 | } |
837 | |
838 | // Erase the old instructions. |
839 | I->eraseFromParent(); |
840 | Paired->eraseFromParent(); |
841 | |
842 | return NextI; |
843 | } |
844 | |
845 | MachineBasicBlock::iterator |
846 | AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI, |
847 | MachineBasicBlock::iterator StoreI) { |
848 | MachineBasicBlock::iterator NextI = LoadI; |
849 | ++NextI; |
850 | |
851 | int LoadSize = getMemScale(LoadI); |
852 | int StoreSize = getMemScale(StoreI); |
853 | unsigned LdRt = getLdStRegOp(LoadI).getReg(); |
854 | unsigned StRt = getLdStRegOp(StoreI).getReg(); |
855 | bool IsStoreXReg = TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt); |
856 | |
857 | assert((IsStoreXReg ||(((IsStoreXReg || TRI->getRegClass(AArch64::GPR32RegClassID )->contains(StRt)) && "Unexpected RegClass") ? static_cast <void> (0) : __assert_fail ("(IsStoreXReg || TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) && \"Unexpected RegClass\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 859, __PRETTY_FUNCTION__)) |
858 | TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) &&(((IsStoreXReg || TRI->getRegClass(AArch64::GPR32RegClassID )->contains(StRt)) && "Unexpected RegClass") ? static_cast <void> (0) : __assert_fail ("(IsStoreXReg || TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) && \"Unexpected RegClass\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 859, __PRETTY_FUNCTION__)) |
859 | "Unexpected RegClass")(((IsStoreXReg || TRI->getRegClass(AArch64::GPR32RegClassID )->contains(StRt)) && "Unexpected RegClass") ? static_cast <void> (0) : __assert_fail ("(IsStoreXReg || TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) && \"Unexpected RegClass\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 859, __PRETTY_FUNCTION__)); |
860 | |
861 | MachineInstr *BitExtMI; |
862 | if (LoadSize == StoreSize && (LoadSize == 4 || LoadSize == 8)) { |
863 | // Remove the load, if the destination register of the loads is the same |
864 | // register for stored value. |
865 | if (StRt == LdRt && LoadSize == 8) { |
866 | DEBUG(dbgs() << "Remove load instruction:\n ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << "Remove load instruction:\n " ; } } while (0); |
867 | DEBUG(LoadI->print(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { LoadI->print(dbgs()); } } while (0 ); |
868 | DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << "\n"; } } while (0); |
869 | LoadI->eraseFromParent(); |
870 | return NextI; |
871 | } |
872 | // Replace the load with a mov if the load and store are in the same size. |
873 | BitExtMI = |
874 | BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(), |
875 | TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt) |
876 | .addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR) |
877 | .addReg(StRt) |
878 | .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); |
879 | } else { |
880 | // FIXME: Currently we disable this transformation in big-endian targets as |
881 | // performance and correctness are verified only in little-endian. |
882 | if (!Subtarget->isLittleEndian()) |
883 | return NextI; |
884 | bool IsUnscaled = isUnscaledLdSt(LoadI); |
885 | assert(IsUnscaled == isUnscaledLdSt(StoreI) && "Unsupported ld/st match")((IsUnscaled == isUnscaledLdSt(StoreI) && "Unsupported ld/st match" ) ? static_cast<void> (0) : __assert_fail ("IsUnscaled == isUnscaledLdSt(StoreI) && \"Unsupported ld/st match\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 885, __PRETTY_FUNCTION__)); |
886 | assert(LoadSize <= StoreSize && "Invalid load size")((LoadSize <= StoreSize && "Invalid load size") ? static_cast <void> (0) : __assert_fail ("LoadSize <= StoreSize && \"Invalid load size\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 886, __PRETTY_FUNCTION__)); |
887 | int UnscaledLdOffset = IsUnscaled |
888 | ? getLdStOffsetOp(LoadI).getImm() |
889 | : getLdStOffsetOp(LoadI).getImm() * LoadSize; |
890 | int UnscaledStOffset = IsUnscaled |
891 | ? getLdStOffsetOp(StoreI).getImm() |
892 | : getLdStOffsetOp(StoreI).getImm() * StoreSize; |
893 | int Width = LoadSize * 8; |
894 | int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset); |
895 | int Imms = Immr + Width - 1; |
Value stored to 'Imms' during its initialization is never read | |
896 | unsigned DestReg = IsStoreXReg |
897 | ? TRI->getMatchingSuperReg(LdRt, AArch64::sub_32, |
898 | &AArch64::GPR64RegClass) |
899 | : LdRt; |
900 | |
901 | assert((UnscaledLdOffset >= UnscaledStOffset &&(((UnscaledLdOffset >= UnscaledStOffset && (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) && "Invalid offset" ) ? static_cast<void> (0) : __assert_fail ("(UnscaledLdOffset >= UnscaledStOffset && (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) && \"Invalid offset\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 903, __PRETTY_FUNCTION__)) |
902 | (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&(((UnscaledLdOffset >= UnscaledStOffset && (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) && "Invalid offset" ) ? static_cast<void> (0) : __assert_fail ("(UnscaledLdOffset >= UnscaledStOffset && (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) && \"Invalid offset\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 903, __PRETTY_FUNCTION__)) |
903 | "Invalid offset")(((UnscaledLdOffset >= UnscaledStOffset && (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) && "Invalid offset" ) ? static_cast<void> (0) : __assert_fail ("(UnscaledLdOffset >= UnscaledStOffset && (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) && \"Invalid offset\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 903, __PRETTY_FUNCTION__)); |
904 | |
905 | Immr = 8 * (UnscaledLdOffset - UnscaledStOffset); |
906 | Imms = Immr + Width - 1; |
907 | if (UnscaledLdOffset == UnscaledStOffset) { |
908 | uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12) // N |
909 | | ((Immr) << 6) // immr |
910 | | ((Imms) << 0) // imms |
911 | ; |
912 | |
913 | BitExtMI = |
914 | BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(), |
915 | TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri), |
916 | DestReg) |
917 | .addReg(StRt) |
918 | .addImm(AndMaskEncoded); |
919 | } else { |
920 | BitExtMI = |
921 | BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(), |
922 | TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri), |
923 | DestReg) |
924 | .addReg(StRt) |
925 | .addImm(Immr) |
926 | .addImm(Imms); |
927 | } |
928 | } |
929 | |
930 | DEBUG(dbgs() << "Promoting load by replacing :\n ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << "Promoting load by replacing :\n " ; } } while (0); |
931 | DEBUG(StoreI->print(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { StoreI->print(dbgs()); } } while ( 0); |
932 | DEBUG(dbgs() << " ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << " "; } } while (0); |
933 | DEBUG(LoadI->print(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { LoadI->print(dbgs()); } } while (0 ); |
934 | DEBUG(dbgs() << " with instructions:\n ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << " with instructions:\n " ; } } while (0); |
935 | DEBUG(StoreI->print(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { StoreI->print(dbgs()); } } while ( 0); |
936 | DEBUG(dbgs() << " ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << " "; } } while (0); |
937 | DEBUG((BitExtMI)->print(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { (BitExtMI)->print(dbgs()); } } while (0); |
938 | DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << "\n"; } } while (0); |
939 | |
940 | // Erase the old instructions. |
941 | LoadI->eraseFromParent(); |
942 | return NextI; |
943 | } |
944 | |
945 | /// trackRegDefsUses - Remember what registers the specified instruction uses |
946 | /// and modifies. |
947 | static void trackRegDefsUses(const MachineInstr *MI, BitVector &ModifiedRegs, |
948 | BitVector &UsedRegs, |
949 | const TargetRegisterInfo *TRI) { |
950 | for (const MachineOperand &MO : MI->operands()) { |
951 | if (MO.isRegMask()) |
952 | ModifiedRegs.setBitsNotInMask(MO.getRegMask()); |
953 | |
954 | if (!MO.isReg()) |
955 | continue; |
956 | unsigned Reg = MO.getReg(); |
957 | if (MO.isDef()) { |
958 | for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) |
959 | ModifiedRegs.set(*AI); |
960 | } else { |
961 | assert(MO.isUse() && "Reg operand not a def and not a use?!?")((MO.isUse() && "Reg operand not a def and not a use?!?" ) ? static_cast<void> (0) : __assert_fail ("MO.isUse() && \"Reg operand not a def and not a use?!?\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 961, __PRETTY_FUNCTION__)); |
962 | for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) |
963 | UsedRegs.set(*AI); |
964 | } |
965 | } |
966 | } |
967 | |
968 | static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) { |
969 | // Convert the byte-offset used by unscaled into an "element" offset used |
970 | // by the scaled pair load/store instructions. |
971 | if (IsUnscaled) |
972 | Offset /= OffsetStride; |
973 | |
974 | return Offset <= 63 && Offset >= -64; |
975 | } |
976 | |
977 | // Do alignment, specialized to power of 2 and for signed ints, |
978 | // avoiding having to do a C-style cast from uint_64t to int when |
979 | // using RoundUpToAlignment from include/llvm/Support/MathExtras.h. |
980 | // FIXME: Move this function to include/MathExtras.h? |
981 | static int alignTo(int Num, int PowOf2) { |
982 | return (Num + PowOf2 - 1) & ~(PowOf2 - 1); |
983 | } |
984 | |
985 | static bool mayAlias(MachineInstr *MIa, MachineInstr *MIb, |
986 | const AArch64InstrInfo *TII) { |
987 | // One of the instructions must modify memory. |
988 | if (!MIa->mayStore() && !MIb->mayStore()) |
989 | return false; |
990 | |
991 | // Both instructions must be memory operations. |
992 | if (!MIa->mayLoadOrStore() && !MIb->mayLoadOrStore()) |
993 | return false; |
994 | |
995 | return !TII->areMemAccessesTriviallyDisjoint(MIa, MIb); |
996 | } |
997 | |
998 | static bool mayAlias(MachineInstr *MIa, |
999 | SmallVectorImpl<MachineInstr *> &MemInsns, |
1000 | const AArch64InstrInfo *TII) { |
1001 | for (auto &MIb : MemInsns) |
1002 | if (mayAlias(MIa, MIb, TII)) |
1003 | return true; |
1004 | |
1005 | return false; |
1006 | } |
1007 | |
1008 | bool AArch64LoadStoreOpt::findMatchingStore( |
1009 | MachineBasicBlock::iterator I, unsigned Limit, |
1010 | MachineBasicBlock::iterator &StoreI) { |
1011 | MachineBasicBlock::iterator E = I->getParent()->begin(); |
1012 | MachineBasicBlock::iterator MBBI = I; |
1013 | MachineInstr *FirstMI = I; |
1014 | unsigned BaseReg = getLdStBaseOp(FirstMI).getReg(); |
1015 | |
1016 | // Track which registers have been modified and used between the first insn |
1017 | // and the second insn. |
1018 | BitVector ModifiedRegs, UsedRegs; |
1019 | ModifiedRegs.resize(TRI->getNumRegs()); |
1020 | UsedRegs.resize(TRI->getNumRegs()); |
1021 | |
1022 | for (unsigned Count = 0; MBBI != E && Count < Limit;) { |
1023 | --MBBI; |
1024 | MachineInstr *MI = MBBI; |
1025 | // Skip DBG_VALUE instructions. Otherwise debug info can affect the |
1026 | // optimization by changing how far we scan. |
1027 | if (MI->isDebugValue()) |
1028 | continue; |
1029 | // Now that we know this is a real instruction, count it. |
1030 | ++Count; |
1031 | |
1032 | // If the load instruction reads directly from the address to which the |
1033 | // store instruction writes and the stored value is not modified, we can |
1034 | // promote the load. Since we do not handle stores with pre-/post-index, |
1035 | // it's unnecessary to check if BaseReg is modified by the store itself. |
1036 | if (MI->mayStore() && isMatchingStore(FirstMI, MI) && |
1037 | BaseReg == getLdStBaseOp(MI).getReg() && |
1038 | isLdOffsetInRangeOfSt(FirstMI, MI) && |
1039 | !ModifiedRegs[getLdStRegOp(MI).getReg()]) { |
1040 | StoreI = MBBI; |
1041 | return true; |
1042 | } |
1043 | |
1044 | if (MI->isCall()) |
1045 | return false; |
1046 | |
1047 | // Update modified / uses register lists. |
1048 | trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); |
1049 | |
1050 | // Otherwise, if the base register is modified, we have no match, so |
1051 | // return early. |
1052 | if (ModifiedRegs[BaseReg]) |
1053 | return false; |
1054 | |
1055 | // If we encounter a store aliased with the load, return early. |
1056 | if (MI->mayStore() && mayAlias(FirstMI, MI, TII)) |
1057 | return false; |
1058 | } |
1059 | return false; |
1060 | } |
1061 | |
1062 | /// findMatchingInsn - Scan the instructions looking for a load/store that can |
1063 | /// be combined with the current instruction into a load/store pair. |
1064 | MachineBasicBlock::iterator |
1065 | AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, |
1066 | LdStPairFlags &Flags, unsigned Limit) { |
1067 | MachineBasicBlock::iterator E = I->getParent()->end(); |
1068 | MachineBasicBlock::iterator MBBI = I; |
1069 | MachineInstr *FirstMI = I; |
1070 | ++MBBI; |
1071 | |
1072 | unsigned Opc = FirstMI->getOpcode(); |
1073 | bool MayLoad = FirstMI->mayLoad(); |
1074 | bool IsUnscaled = isUnscaledLdSt(FirstMI); |
1075 | unsigned Reg = getLdStRegOp(FirstMI).getReg(); |
1076 | unsigned BaseReg = getLdStBaseOp(FirstMI).getReg(); |
1077 | int Offset = getLdStOffsetOp(FirstMI).getImm(); |
1078 | bool IsNarrowStore = isNarrowStore(Opc); |
1079 | |
1080 | // For narrow stores, find only the case where the stored value is WZR. |
1081 | if (IsNarrowStore && Reg != AArch64::WZR) |
1082 | return E; |
1083 | |
1084 | // Early exit if the first instruction modifies the base register. |
1085 | // e.g., ldr x0, [x0] |
1086 | if (FirstMI->modifiesRegister(BaseReg, TRI)) |
1087 | return E; |
1088 | |
1089 | // Early exit if the offset if not possible to match. (6 bits of positive |
1090 | // range, plus allow an extra one in case we find a later insn that matches |
1091 | // with Offset-1) |
1092 | int OffsetStride = IsUnscaled ? getMemScale(FirstMI) : 1; |
1093 | if (!(isNarrowLoad(Opc) || IsNarrowStore) && |
1094 | !inBoundsForPair(IsUnscaled, Offset, OffsetStride)) |
1095 | return E; |
1096 | |
1097 | // Track which registers have been modified and used between the first insn |
1098 | // (inclusive) and the second insn. |
1099 | BitVector ModifiedRegs, UsedRegs; |
1100 | ModifiedRegs.resize(TRI->getNumRegs()); |
1101 | UsedRegs.resize(TRI->getNumRegs()); |
1102 | |
1103 | // Remember any instructions that read/write memory between FirstMI and MI. |
1104 | SmallVector<MachineInstr *, 4> MemInsns; |
1105 | |
1106 | for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) { |
1107 | MachineInstr *MI = MBBI; |
1108 | // Skip DBG_VALUE instructions. Otherwise debug info can affect the |
1109 | // optimization by changing how far we scan. |
1110 | if (MI->isDebugValue()) |
1111 | continue; |
1112 | |
1113 | // Now that we know this is a real instruction, count it. |
1114 | ++Count; |
1115 | |
1116 | bool CanMergeOpc = Opc == MI->getOpcode(); |
1117 | Flags.setSExtIdx(-1); |
1118 | if (!CanMergeOpc) { |
1119 | bool IsValidLdStrOpc; |
1120 | unsigned NonSExtOpc = getMatchingNonSExtOpcode(Opc, &IsValidLdStrOpc); |
1121 | assert(IsValidLdStrOpc &&((IsValidLdStrOpc && "Given Opc should be a Load or Store with an immediate" ) ? static_cast<void> (0) : __assert_fail ("IsValidLdStrOpc && \"Given Opc should be a Load or Store with an immediate\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 1122, __PRETTY_FUNCTION__)) |
1122 | "Given Opc should be a Load or Store with an immediate")((IsValidLdStrOpc && "Given Opc should be a Load or Store with an immediate" ) ? static_cast<void> (0) : __assert_fail ("IsValidLdStrOpc && \"Given Opc should be a Load or Store with an immediate\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 1122, __PRETTY_FUNCTION__)); |
1123 | // Opc will be the first instruction in the pair. |
1124 | Flags.setSExtIdx(NonSExtOpc == (unsigned)Opc ? 1 : 0); |
1125 | CanMergeOpc = NonSExtOpc == getMatchingNonSExtOpcode(MI->getOpcode()); |
1126 | } |
1127 | |
1128 | if (CanMergeOpc && getLdStOffsetOp(MI).isImm()) { |
1129 | assert(MI->mayLoadOrStore() && "Expected memory operation.")((MI->mayLoadOrStore() && "Expected memory operation." ) ? static_cast<void> (0) : __assert_fail ("MI->mayLoadOrStore() && \"Expected memory operation.\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 1129, __PRETTY_FUNCTION__)); |
1130 | // If we've found another instruction with the same opcode, check to see |
1131 | // if the base and offset are compatible with our starting instruction. |
1132 | // These instructions all have scaled immediate operands, so we just |
1133 | // check for +1/-1. Make sure to check the new instruction offset is |
1134 | // actually an immediate and not a symbolic reference destined for |
1135 | // a relocation. |
1136 | // |
1137 | // Pairwise instructions have a 7-bit signed offset field. Single insns |
1138 | // have a 12-bit unsigned offset field. To be a valid combine, the |
1139 | // final offset must be in range. |
1140 | unsigned MIBaseReg = getLdStBaseOp(MI).getReg(); |
1141 | int MIOffset = getLdStOffsetOp(MI).getImm(); |
1142 | if (BaseReg == MIBaseReg && ((Offset == MIOffset + OffsetStride) || |
1143 | (Offset + OffsetStride == MIOffset))) { |
1144 | int MinOffset = Offset < MIOffset ? Offset : MIOffset; |
1145 | // If this is a volatile load/store that otherwise matched, stop looking |
1146 | // as something is going on that we don't have enough information to |
1147 | // safely transform. Similarly, stop if we see a hint to avoid pairs. |
1148 | if (MI->hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI)) |
1149 | return E; |
1150 | // If the resultant immediate offset of merging these instructions |
1151 | // is out of range for a pairwise instruction, bail and keep looking. |
1152 | bool MIIsUnscaled = isUnscaledLdSt(MI); |
1153 | bool IsNarrowLoad = isNarrowLoad(MI->getOpcode()); |
1154 | if (!IsNarrowLoad && |
1155 | !inBoundsForPair(MIIsUnscaled, MinOffset, OffsetStride)) { |
1156 | trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); |
1157 | MemInsns.push_back(MI); |
1158 | continue; |
1159 | } |
1160 | |
1161 | if (IsNarrowLoad || IsNarrowStore) { |
1162 | // If the alignment requirements of the scaled wide load/store |
1163 | // instruction can't express the offset of the scaled narrow |
1164 | // input, bail and keep looking. |
1165 | if (!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) { |
1166 | trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); |
1167 | MemInsns.push_back(MI); |
1168 | continue; |
1169 | } |
1170 | } else { |
1171 | // If the alignment requirements of the paired (scaled) instruction |
1172 | // can't express the offset of the unscaled input, bail and keep |
1173 | // looking. |
1174 | if (IsUnscaled && (alignTo(MinOffset, OffsetStride) != MinOffset)) { |
1175 | trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); |
1176 | MemInsns.push_back(MI); |
1177 | continue; |
1178 | } |
1179 | } |
1180 | // If the destination register of the loads is the same register, bail |
1181 | // and keep looking. A load-pair instruction with both destination |
1182 | // registers the same is UNPREDICTABLE and will result in an exception. |
1183 | // For narrow stores, allow only when the stored value is the same |
1184 | // (i.e., WZR). |
1185 | if ((MayLoad && Reg == getLdStRegOp(MI).getReg()) || |
1186 | (IsNarrowStore && Reg != getLdStRegOp(MI).getReg())) { |
1187 | trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); |
1188 | MemInsns.push_back(MI); |
1189 | continue; |
1190 | } |
1191 | |
1192 | // If the Rt of the second instruction was not modified or used between |
1193 | // the two instructions and none of the instructions between the second |
1194 | // and first alias with the second, we can combine the second into the |
1195 | // first. |
1196 | if (!ModifiedRegs[getLdStRegOp(MI).getReg()] && |
1197 | !(MI->mayLoad() && UsedRegs[getLdStRegOp(MI).getReg()]) && |
1198 | !mayAlias(MI, MemInsns, TII)) { |
1199 | Flags.setMergeForward(false); |
1200 | return MBBI; |
1201 | } |
1202 | |
1203 | // Likewise, if the Rt of the first instruction is not modified or used |
1204 | // between the two instructions and none of the instructions between the |
1205 | // first and the second alias with the first, we can combine the first |
1206 | // into the second. |
1207 | if (!ModifiedRegs[getLdStRegOp(FirstMI).getReg()] && |
1208 | !(MayLoad && UsedRegs[getLdStRegOp(FirstMI).getReg()]) && |
1209 | !mayAlias(FirstMI, MemInsns, TII)) { |
1210 | Flags.setMergeForward(true); |
1211 | return MBBI; |
1212 | } |
1213 | // Unable to combine these instructions due to interference in between. |
1214 | // Keep looking. |
1215 | } |
1216 | } |
1217 | |
1218 | // If the instruction wasn't a matching load or store. Stop searching if we |
1219 | // encounter a call instruction that might modify memory. |
1220 | if (MI->isCall()) |
1221 | return E; |
1222 | |
1223 | // Update modified / uses register lists. |
1224 | trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); |
1225 | |
1226 | // Otherwise, if the base register is modified, we have no match, so |
1227 | // return early. |
1228 | if (ModifiedRegs[BaseReg]) |
1229 | return E; |
1230 | |
1231 | // Update list of instructions that read/write memory. |
1232 | if (MI->mayLoadOrStore()) |
1233 | MemInsns.push_back(MI); |
1234 | } |
1235 | return E; |
1236 | } |
1237 | |
1238 | MachineBasicBlock::iterator |
1239 | AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I, |
1240 | MachineBasicBlock::iterator Update, |
1241 | bool IsPreIdx) { |
1242 | assert((Update->getOpcode() == AArch64::ADDXri ||(((Update->getOpcode() == AArch64::ADDXri || Update->getOpcode () == AArch64::SUBXri) && "Unexpected base register update instruction to merge!" ) ? static_cast<void> (0) : __assert_fail ("(Update->getOpcode() == AArch64::ADDXri || Update->getOpcode() == AArch64::SUBXri) && \"Unexpected base register update instruction to merge!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 1244, __PRETTY_FUNCTION__)) |
1243 | Update->getOpcode() == AArch64::SUBXri) &&(((Update->getOpcode() == AArch64::ADDXri || Update->getOpcode () == AArch64::SUBXri) && "Unexpected base register update instruction to merge!" ) ? static_cast<void> (0) : __assert_fail ("(Update->getOpcode() == AArch64::ADDXri || Update->getOpcode() == AArch64::SUBXri) && \"Unexpected base register update instruction to merge!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 1244, __PRETTY_FUNCTION__)) |
1244 | "Unexpected base register update instruction to merge!")(((Update->getOpcode() == AArch64::ADDXri || Update->getOpcode () == AArch64::SUBXri) && "Unexpected base register update instruction to merge!" ) ? static_cast<void> (0) : __assert_fail ("(Update->getOpcode() == AArch64::ADDXri || Update->getOpcode() == AArch64::SUBXri) && \"Unexpected base register update instruction to merge!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 1244, __PRETTY_FUNCTION__)); |
1245 | MachineBasicBlock::iterator NextI = I; |
1246 | // Return the instruction following the merged instruction, which is |
1247 | // the instruction following our unmerged load. Unless that's the add/sub |
1248 | // instruction we're merging, in which case it's the one after that. |
1249 | if (++NextI == Update) |
1250 | ++NextI; |
1251 | |
1252 | int Value = Update->getOperand(2).getImm(); |
1253 | assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&((AArch64_AM::getShiftValue(Update->getOperand(3).getImm() ) == 0 && "Can't merge 1 << 12 offset into pre-/post-indexed load / store" ) ? static_cast<void> (0) : __assert_fail ("AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 && \"Can't merge 1 << 12 offset into pre-/post-indexed load / store\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 1254, __PRETTY_FUNCTION__)) |
1254 | "Can't merge 1 << 12 offset into pre-/post-indexed load / store")((AArch64_AM::getShiftValue(Update->getOperand(3).getImm() ) == 0 && "Can't merge 1 << 12 offset into pre-/post-indexed load / store" ) ? static_cast<void> (0) : __assert_fail ("AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 && \"Can't merge 1 << 12 offset into pre-/post-indexed load / store\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp" , 1254, __PRETTY_FUNCTION__)); |
1255 | if (Update->getOpcode() == AArch64::SUBXri) |
1256 | Value = -Value; |
1257 | |
1258 | unsigned NewOpc = IsPreIdx ? getPreIndexedOpcode(I->getOpcode()) |
1259 | : getPostIndexedOpcode(I->getOpcode()); |
1260 | MachineInstrBuilder MIB; |
1261 | if (!isPairedLdSt(I)) { |
1262 | // Non-paired instruction. |
1263 | MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) |
1264 | .addOperand(getLdStRegOp(Update)) |
1265 | .addOperand(getLdStRegOp(I)) |
1266 | .addOperand(getLdStBaseOp(I)) |
1267 | .addImm(Value); |
1268 | } else { |
1269 | // Paired instruction. |
1270 | int Scale = getMemScale(I); |
1271 | MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) |
1272 | .addOperand(getLdStRegOp(Update)) |
1273 | .addOperand(getLdStRegOp(I, 0)) |
1274 | .addOperand(getLdStRegOp(I, 1)) |
1275 | .addOperand(getLdStBaseOp(I)) |
1276 | .addImm(Value / Scale); |
1277 | } |
1278 | (void)MIB; |
1279 | |
1280 | if (IsPreIdx) |
1281 | DEBUG(dbgs() << "Creating pre-indexed load/store.")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << "Creating pre-indexed load/store." ; } } while (0); |
1282 | else |
1283 | DEBUG(dbgs() << "Creating post-indexed load/store.")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << "Creating post-indexed load/store." ; } } while (0); |
1284 | DEBUG(dbgs() << " Replacing instructions:\n ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << " Replacing instructions:\n " ; } } while (0); |
1285 | DEBUG(I->print(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { I->print(dbgs()); } } while (0); |
1286 | DEBUG(dbgs() << " ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << " "; } } while (0); |
1287 | DEBUG(Update->print(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { Update->print(dbgs()); } } while ( 0); |
1288 | DEBUG(dbgs() << " with instruction:\n ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << " with instruction:\n " ; } } while (0); |
1289 | DEBUG(((MachineInstr *)MIB)->print(dbgs()))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { ((MachineInstr *)MIB)->print(dbgs( )); } } while (0); |
1290 | DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-ldst-opt")) { dbgs() << "\n"; } } while (0); |
1291 | |
1292 | // Erase the old instructions for the block. |
1293 | I->eraseFromParent(); |
1294 | Update->eraseFromParent(); |
1295 | |
1296 | return NextI; |
1297 | } |
1298 | |
1299 | bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr *MemMI, |
1300 | MachineInstr *MI, |
1301 | unsigned BaseReg, int Offset) { |
1302 | switch (MI->getOpcode()) { |
1303 | default: |
1304 | break; |
1305 | case AArch64::SUBXri: |
1306 | // Negate the offset for a SUB instruction. |
1307 | Offset *= -1; |
1308 | // FALLTHROUGH |
1309 | case AArch64::ADDXri: |
1310 | // Make sure it's a vanilla immediate operand, not a relocation or |
1311 | // anything else we can't handle. |
1312 | if (!MI->getOperand(2).isImm()) |
1313 | break; |
1314 | // Watch out for 1 << 12 shifted value. |
1315 | if (AArch64_AM::getShiftValue(MI->getOperand(3).getImm())) |
1316 | break; |
1317 | |
1318 | // The update instruction source and destination register must be the |
1319 | // same as the load/store base register. |
1320 | if (MI->getOperand(0).getReg() != BaseReg || |
1321 | MI->getOperand(1).getReg() != BaseReg) |
1322 | break; |
1323 | |
1324 | bool IsPairedInsn = isPairedLdSt(MemMI); |
1325 | int UpdateOffset = MI->getOperand(2).getImm(); |
1326 | // For non-paired load/store instructions, the immediate must fit in a |
1327 | // signed 9-bit integer. |
1328 | if (!IsPairedInsn && (UpdateOffset > 255 || UpdateOffset < -256)) |
1329 | break; |
1330 | |
1331 | // For paired load/store instructions, the immediate must be a multiple of |
1332 | // the scaling factor. The scaled offset must also fit into a signed 7-bit |
1333 | // integer. |
1334 | if (IsPairedInsn) { |
1335 | int Scale = getMemScale(MemMI); |
1336 | if (UpdateOffset % Scale != 0) |
1337 | break; |
1338 | |
1339 | int ScaledOffset = UpdateOffset / Scale; |
1340 | if (ScaledOffset > 64 || ScaledOffset < -64) |
1341 | break; |
1342 | } |
1343 | |
1344 | // If we have a non-zero Offset, we check that it matches the amount |
1345 | // we're adding to the register. |
1346 | if (!Offset || Offset == MI->getOperand(2).getImm()) |
1347 | return true; |
1348 | break; |
1349 | } |
1350 | return false; |
1351 | } |
1352 | |
1353 | MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward( |
1354 | MachineBasicBlock::iterator I, unsigned Limit, int UnscaledOffset) { |
1355 | MachineBasicBlock::iterator E = I->getParent()->end(); |
1356 | MachineInstr *MemMI = I; |
1357 | MachineBasicBlock::iterator MBBI = I; |
1358 | |
1359 | unsigned BaseReg = getLdStBaseOp(MemMI).getReg(); |
1360 | int MIUnscaledOffset = getLdStOffsetOp(MemMI).getImm() * getMemScale(MemMI); |
1361 | |
1362 | // Scan forward looking for post-index opportunities. Updating instructions |
1363 | // can't be formed if the memory instruction doesn't have the offset we're |
1364 | // looking for. |
1365 | if (MIUnscaledOffset != UnscaledOffset) |
1366 | return E; |
1367 | |
1368 | // If the base register overlaps a destination register, we can't |
1369 | // merge the update. |
1370 | bool IsPairedInsn = isPairedLdSt(MemMI); |
1371 | for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) { |
1372 | unsigned DestReg = getLdStRegOp(MemMI, i).getReg(); |
1373 | if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg)) |
1374 | return E; |
1375 | } |
1376 | |
1377 | // Track which registers have been modified and used between the first insn |
1378 | // (inclusive) and the second insn. |
1379 | BitVector ModifiedRegs, UsedRegs; |
1380 | ModifiedRegs.resize(TRI->getNumRegs()); |
1381 | UsedRegs.resize(TRI->getNumRegs()); |
1382 | ++MBBI; |
1383 | for (unsigned Count = 0; MBBI != E; ++MBBI) { |
1384 | MachineInstr *MI = MBBI; |
1385 | // Skip DBG_VALUE instructions. Otherwise debug info can affect the |
1386 | // optimization by changing how far we scan. |
1387 | if (MI->isDebugValue()) |
1388 | continue; |
1389 | |
1390 | // Now that we know this is a real instruction, count it. |
1391 | ++Count; |
1392 | |
1393 | // If we found a match, return it. |
1394 | if (isMatchingUpdateInsn(I, MI, BaseReg, UnscaledOffset)) |
1395 | return MBBI; |
1396 | |
1397 | // Update the status of what the instruction clobbered and used. |
1398 | trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); |
1399 | |
1400 | // Otherwise, if the base register is used or modified, we have no match, so |
1401 | // return early. |
1402 | if (ModifiedRegs[BaseReg] || UsedRegs[BaseReg]) |
1403 | return E; |
1404 | } |
1405 | return E; |
1406 | } |
1407 | |
1408 | MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward( |
1409 | MachineBasicBlock::iterator I, unsigned Limit) { |
1410 | MachineBasicBlock::iterator B = I->getParent()->begin(); |
1411 | MachineBasicBlock::iterator E = I->getParent()->end(); |
1412 | MachineInstr *MemMI = I; |
1413 | MachineBasicBlock::iterator MBBI = I; |
1414 | |
1415 | unsigned BaseReg = getLdStBaseOp(MemMI).getReg(); |
1416 | int Offset = getLdStOffsetOp(MemMI).getImm(); |
1417 | |
1418 | // If the load/store is the first instruction in the block, there's obviously |
1419 | // not any matching update. Ditto if the memory offset isn't zero. |
1420 | if (MBBI == B || Offset != 0) |
1421 | return E; |
1422 | // If the base register overlaps a destination register, we can't |
1423 | // merge the update. |
1424 | bool IsPairedInsn = isPairedLdSt(MemMI); |
1425 | for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) { |
1426 | unsigned DestReg = getLdStRegOp(MemMI, i).getReg(); |
1427 | if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg)) |
1428 | return E; |
1429 | } |
1430 | |
1431 | // Track which registers have been modified and used between the first insn |
1432 | // (inclusive) and the second insn. |
1433 | BitVector ModifiedRegs, UsedRegs; |
1434 | ModifiedRegs.resize(TRI->getNumRegs()); |
1435 | UsedRegs.resize(TRI->getNumRegs()); |
1436 | --MBBI; |
1437 | for (unsigned Count = 0; MBBI != B; --MBBI) { |
1438 | MachineInstr *MI = MBBI; |
1439 | // Skip DBG_VALUE instructions. Otherwise debug info can affect the |
1440 | // optimization by changing how far we scan. |
1441 | if (MI->isDebugValue()) |
1442 | continue; |
1443 | |
1444 | // Now that we know this is a real instruction, count it. |
1445 | ++Count; |
1446 | |
1447 | // If we found a match, return it. |
1448 | if (isMatchingUpdateInsn(I, MI, BaseReg, Offset)) |
1449 | return MBBI; |
1450 | |
1451 | // Update the status of what the instruction clobbered and used. |
1452 | trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); |
1453 | |
1454 | // Otherwise, if the base register is used or modified, we have no match, so |
1455 | // return early. |
1456 | if (ModifiedRegs[BaseReg] || UsedRegs[BaseReg]) |
1457 | return E; |
1458 | } |
1459 | return E; |
1460 | } |
1461 | |
1462 | bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore( |
1463 | MachineBasicBlock::iterator &MBBI) { |
1464 | MachineInstr *MI = MBBI; |
1465 | // If this is a volatile load, don't mess with it. |
1466 | if (MI->hasOrderedMemoryRef()) |
1467 | return false; |
1468 | |
1469 | // Make sure this is a reg+imm. |
1470 | // FIXME: It is possible to extend it to handle reg+reg cases. |
1471 | if (!getLdStOffsetOp(MI).isImm()) |
1472 | return false; |
1473 | |
1474 | // Look backward up to ScanLimit instructions. |
1475 | MachineBasicBlock::iterator StoreI; |
1476 | if (findMatchingStore(MBBI, ScanLimit, StoreI)) { |
1477 | ++NumLoadsFromStoresPromoted; |
1478 | // Promote the load. Keeping the iterator straight is a |
1479 | // pain, so we let the merge routine tell us what the next instruction |
1480 | // is after it's done mucking about. |
1481 | MBBI = promoteLoadFromStore(MBBI, StoreI); |
1482 | return true; |
1483 | } |
1484 | return false; |
1485 | } |
1486 | |
1487 | bool AArch64LoadStoreOpt::tryToMergeLdStInst( |
1488 | MachineBasicBlock::iterator &MBBI) { |
1489 | MachineInstr *MI = MBBI; |
1490 | MachineBasicBlock::iterator E = MI->getParent()->end(); |
1491 | // If this is a volatile load/store, don't mess with it. |
1492 | if (MI->hasOrderedMemoryRef()) |
1493 | return false; |
1494 | |
1495 | // Make sure this is a reg+imm (as opposed to an address reloc). |
1496 | if (!getLdStOffsetOp(MI).isImm()) |
1497 | return false; |
1498 | |
1499 | // Check if this load/store has a hint to avoid pair formation. |
1500 | // MachineMemOperands hints are set by the AArch64StorePairSuppress pass. |
1501 | if (TII->isLdStPairSuppressed(MI)) |
1502 | return false; |
1503 | |
1504 | // Look ahead up to ScanLimit instructions for a pairable instruction. |
1505 | LdStPairFlags Flags; |
1506 | MachineBasicBlock::iterator Paired = findMatchingInsn(MBBI, Flags, ScanLimit); |
1507 | if (Paired != E) { |
1508 | if (isNarrowLoad(MI)) { |
1509 | ++NumNarrowLoadsPromoted; |
1510 | } else if (isNarrowStore(MI)) { |
1511 | ++NumZeroStoresPromoted; |
1512 | } else { |
1513 | ++NumPairCreated; |
1514 | if (isUnscaledLdSt(MI)) |
1515 | ++NumUnscaledPairCreated; |
1516 | } |
1517 | |
1518 | // Merge the loads into a pair. Keeping the iterator straight is a |
1519 | // pain, so we let the merge routine tell us what the next instruction |
1520 | // is after it's done mucking about. |
1521 | MBBI = mergePairedInsns(MBBI, Paired, Flags); |
1522 | return true; |
1523 | } |
1524 | return false; |
1525 | } |
1526 | |
1527 | bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, |
1528 | bool enableNarrowLdOpt) { |
1529 | bool Modified = false; |
1530 | // Three tranformations to do here: |
1531 | // 1) Find loads that directly read from stores and promote them by |
1532 | // replacing with mov instructions. If the store is wider than the load, |
1533 | // the load will be replaced with a bitfield extract. |
1534 | // e.g., |
1535 | // str w1, [x0, #4] |
1536 | // ldrh w2, [x0, #6] |
1537 | // ; becomes |
1538 | // str w1, [x0, #4] |
1539 | // lsr w2, w1, #16 |
1540 | // 2) Find narrow loads that can be converted into a single wider load |
1541 | // with bitfield extract instructions. |
1542 | // e.g., |
1543 | // ldrh w0, [x2] |
1544 | // ldrh w1, [x2, #2] |
1545 | // ; becomes |
1546 | // ldr w0, [x2] |
1547 | // ubfx w1, w0, #16, #16 |
1548 | // and w0, w0, #ffff |
1549 | // 3) Find loads and stores that can be merged into a single load or store |
1550 | // pair instruction. |
1551 | // e.g., |
1552 | // ldr x0, [x2] |
1553 | // ldr x1, [x2, #8] |
1554 | // ; becomes |
1555 | // ldp x0, x1, [x2] |
1556 | // 4) Find base register updates that can be merged into the load or store |
1557 | // as a base-reg writeback. |
1558 | // e.g., |
1559 | // ldr x0, [x2] |
1560 | // add x2, x2, #4 |
1561 | // ; becomes |
1562 | // ldr x0, [x2], #4 |
1563 | |
1564 | for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); |
1565 | MBBI != E;) { |
1566 | MachineInstr *MI = MBBI; |
1567 | switch (MI->getOpcode()) { |
1568 | default: |
1569 | // Just move on to the next instruction. |
1570 | ++MBBI; |
1571 | break; |
1572 | // Scaled instructions. |
1573 | case AArch64::LDRBBui: |
1574 | case AArch64::LDRHHui: |
1575 | case AArch64::LDRWui: |
1576 | case AArch64::LDRXui: |
1577 | // Unscaled instructions. |
1578 | case AArch64::LDURBBi: |
1579 | case AArch64::LDURHHi: |
1580 | case AArch64::LDURWi: |
1581 | case AArch64::LDURXi: { |
1582 | if (tryToPromoteLoadFromStore(MBBI)) { |
1583 | Modified = true; |
1584 | break; |
1585 | } |
1586 | ++MBBI; |
1587 | break; |
1588 | } |
1589 | // FIXME: Do the other instructions. |
1590 | } |
1591 | } |
1592 | |
1593 | for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); |
1594 | enableNarrowLdOpt && MBBI != E;) { |
1595 | MachineInstr *MI = MBBI; |
1596 | switch (MI->getOpcode()) { |
1597 | default: |
1598 | // Just move on to the next instruction. |
1599 | ++MBBI; |
1600 | break; |
1601 | // Scaled instructions. |
1602 | case AArch64::LDRBBui: |
1603 | case AArch64::LDRHHui: |
1604 | case AArch64::LDRSBWui: |
1605 | case AArch64::LDRSHWui: |
1606 | case AArch64::STRBBui: |
1607 | case AArch64::STRHHui: |
1608 | // Unscaled instructions. |
1609 | case AArch64::LDURBBi: |
1610 | case AArch64::LDURHHi: |
1611 | case AArch64::LDURSBWi: |
1612 | case AArch64::LDURSHWi: |
1613 | case AArch64::STURBBi: |
1614 | case AArch64::STURHHi: { |
1615 | if (tryToMergeLdStInst(MBBI)) { |
1616 | Modified = true; |
1617 | break; |
1618 | } |
1619 | ++MBBI; |
1620 | break; |
1621 | } |
1622 | // FIXME: Do the other instructions. |
1623 | } |
1624 | } |
1625 | |
1626 | for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); |
1627 | MBBI != E;) { |
1628 | MachineInstr *MI = MBBI; |
1629 | switch (MI->getOpcode()) { |
1630 | default: |
1631 | // Just move on to the next instruction. |
1632 | ++MBBI; |
1633 | break; |
1634 | // Scaled instructions. |
1635 | case AArch64::STRSui: |
1636 | case AArch64::STRDui: |
1637 | case AArch64::STRQui: |
1638 | case AArch64::STRXui: |
1639 | case AArch64::STRWui: |
1640 | case AArch64::LDRSui: |
1641 | case AArch64::LDRDui: |
1642 | case AArch64::LDRQui: |
1643 | case AArch64::LDRXui: |
1644 | case AArch64::LDRWui: |
1645 | case AArch64::LDRSWui: |
1646 | // Unscaled instructions. |
1647 | case AArch64::STURSi: |
1648 | case AArch64::STURDi: |
1649 | case AArch64::STURQi: |
1650 | case AArch64::STURWi: |
1651 | case AArch64::STURXi: |
1652 | case AArch64::LDURSi: |
1653 | case AArch64::LDURDi: |
1654 | case AArch64::LDURQi: |
1655 | case AArch64::LDURWi: |
1656 | case AArch64::LDURXi: |
1657 | case AArch64::LDURSWi: { |
1658 | if (tryToMergeLdStInst(MBBI)) { |
1659 | Modified = true; |
1660 | break; |
1661 | } |
1662 | ++MBBI; |
1663 | break; |
1664 | } |
1665 | // FIXME: Do the other instructions. |
1666 | } |
1667 | } |
1668 | |
1669 | for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); |
1670 | MBBI != E;) { |
1671 | MachineInstr *MI = MBBI; |
1672 | // Do update merging. It's simpler to keep this separate from the above |
1673 | // switch, though not strictly necessary. |
1674 | unsigned Opc = MI->getOpcode(); |
1675 | switch (Opc) { |
1676 | default: |
1677 | // Just move on to the next instruction. |
1678 | ++MBBI; |
1679 | break; |
1680 | // Scaled instructions. |
1681 | case AArch64::STRSui: |
1682 | case AArch64::STRDui: |
1683 | case AArch64::STRQui: |
1684 | case AArch64::STRXui: |
1685 | case AArch64::STRWui: |
1686 | case AArch64::STRHHui: |
1687 | case AArch64::STRBBui: |
1688 | case AArch64::LDRSui: |
1689 | case AArch64::LDRDui: |
1690 | case AArch64::LDRQui: |
1691 | case AArch64::LDRXui: |
1692 | case AArch64::LDRWui: |
1693 | case AArch64::LDRHHui: |
1694 | case AArch64::LDRBBui: |
1695 | // Unscaled instructions. |
1696 | case AArch64::STURSi: |
1697 | case AArch64::STURDi: |
1698 | case AArch64::STURQi: |
1699 | case AArch64::STURWi: |
1700 | case AArch64::STURXi: |
1701 | case AArch64::LDURSi: |
1702 | case AArch64::LDURDi: |
1703 | case AArch64::LDURQi: |
1704 | case AArch64::LDURWi: |
1705 | case AArch64::LDURXi: |
1706 | // Paired instructions. |
1707 | case AArch64::LDPSi: |
1708 | case AArch64::LDPSWi: |
1709 | case AArch64::LDPDi: |
1710 | case AArch64::LDPQi: |
1711 | case AArch64::LDPWi: |
1712 | case AArch64::LDPXi: |
1713 | case AArch64::STPSi: |
1714 | case AArch64::STPDi: |
1715 | case AArch64::STPQi: |
1716 | case AArch64::STPWi: |
1717 | case AArch64::STPXi: { |
1718 | // Make sure this is a reg+imm (as opposed to an address reloc). |
1719 | if (!getLdStOffsetOp(MI).isImm()) { |
1720 | ++MBBI; |
1721 | break; |
1722 | } |
1723 | // Look forward to try to form a post-index instruction. For example, |
1724 | // ldr x0, [x20] |
1725 | // add x20, x20, #32 |
1726 | // merged into: |
1727 | // ldr x0, [x20], #32 |
1728 | MachineBasicBlock::iterator Update = |
1729 | findMatchingUpdateInsnForward(MBBI, ScanLimit, 0); |
1730 | if (Update != E) { |
1731 | // Merge the update into the ld/st. |
1732 | MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/false); |
1733 | Modified = true; |
1734 | ++NumPostFolded; |
1735 | break; |
1736 | } |
1737 | // Don't know how to handle pre/post-index versions, so move to the next |
1738 | // instruction. |
1739 | if (isUnscaledLdSt(Opc)) { |
1740 | ++MBBI; |
1741 | break; |
1742 | } |
1743 | |
1744 | // Look back to try to find a pre-index instruction. For example, |
1745 | // add x0, x0, #8 |
1746 | // ldr x1, [x0] |
1747 | // merged into: |
1748 | // ldr x1, [x0, #8]! |
1749 | Update = findMatchingUpdateInsnBackward(MBBI, ScanLimit); |
1750 | if (Update != E) { |
1751 | // Merge the update into the ld/st. |
1752 | MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true); |
1753 | Modified = true; |
1754 | ++NumPreFolded; |
1755 | break; |
1756 | } |
1757 | // The immediate in the load/store is scaled by the size of the memory |
1758 | // operation. The immediate in the add we're looking for, |
1759 | // however, is not, so adjust here. |
1760 | int UnscaledOffset = getLdStOffsetOp(MI).getImm() * getMemScale(MI); |
1761 | |
1762 | // Look forward to try to find a post-index instruction. For example, |
1763 | // ldr x1, [x0, #64] |
1764 | // add x0, x0, #64 |
1765 | // merged into: |
1766 | // ldr x1, [x0, #64]! |
1767 | Update = findMatchingUpdateInsnForward(MBBI, ScanLimit, UnscaledOffset); |
1768 | if (Update != E) { |
1769 | // Merge the update into the ld/st. |
1770 | MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true); |
1771 | Modified = true; |
1772 | ++NumPreFolded; |
1773 | break; |
1774 | } |
1775 | |
1776 | // Nothing found. Just move to the next instruction. |
1777 | ++MBBI; |
1778 | break; |
1779 | } |
1780 | // FIXME: Do the other instructions. |
1781 | } |
1782 | } |
1783 | |
1784 | return Modified; |
1785 | } |
1786 | |
1787 | bool AArch64LoadStoreOpt::enableNarrowLdMerge(MachineFunction &Fn) { |
1788 | bool ProfitableArch = Subtarget->isCortexA57(); |
1789 | // FIXME: The benefit from converting narrow loads into a wider load could be |
1790 | // microarchitectural as it assumes that a single load with two bitfield |
1791 | // extracts is cheaper than two narrow loads. Currently, this conversion is |
1792 | // enabled only in cortex-a57 on which performance benefits were verified. |
1793 | return ProfitableArch && !Subtarget->requiresStrictAlign(); |
1794 | } |
1795 | |
1796 | bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { |
1797 | Subtarget = &static_cast<const AArch64Subtarget &>(Fn.getSubtarget()); |
1798 | TII = static_cast<const AArch64InstrInfo *>(Subtarget->getInstrInfo()); |
1799 | TRI = Subtarget->getRegisterInfo(); |
1800 | |
1801 | bool Modified = false; |
1802 | bool enableNarrowLdOpt = enableNarrowLdMerge(Fn); |
1803 | for (auto &MBB : Fn) |
1804 | Modified |= optimizeBlock(MBB, enableNarrowLdOpt); |
1805 | |
1806 | return Modified; |
1807 | } |
1808 | |
1809 | // FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep |
1810 | // loads and stores near one another? |
1811 | |
1812 | /// createAArch64LoadStoreOptimizationPass - returns an instance of the |
1813 | /// load / store optimization pass. |
1814 | FunctionPass *llvm::createAArch64LoadStoreOptimizationPass() { |
1815 | return new AArch64LoadStoreOpt(); |
1816 | } |