File: | lib/CodeGen/GlobalISel/CombinerHelper.cpp |
Warning: | line 914, column 7 Value stored to 'Alignment' is never read |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" |
9 | #include "llvm/CodeGen/GlobalISel/Combiner.h" |
10 | #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" |
11 | #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" |
12 | #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" |
13 | #include "llvm/CodeGen/GlobalISel/Utils.h" |
14 | #include "llvm/CodeGen/MachineDominators.h" |
15 | #include "llvm/CodeGen/MachineFrameInfo.h" |
16 | #include "llvm/CodeGen/MachineInstr.h" |
17 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
18 | #include "llvm/CodeGen/TargetInstrInfo.h" |
19 | #include "llvm/CodeGen/TargetLowering.h" |
20 | #include "llvm/Target/TargetMachine.h" |
21 | |
22 | #define DEBUG_TYPE"gi-combiner" "gi-combiner" |
23 | |
24 | using namespace llvm; |
25 | |
26 | // Option to allow testing of the combiner while no targets know about indexed |
27 | // addressing. |
28 | static cl::opt<bool> |
29 | ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false), |
30 | cl::desc("Force all indexed operations to be " |
31 | "legal for the GlobalISel combiner")); |
32 | |
33 | |
34 | CombinerHelper::CombinerHelper(GISelChangeObserver &Observer, |
35 | MachineIRBuilder &B, GISelKnownBits *KB, |
36 | MachineDominatorTree *MDT) |
37 | : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), |
38 | KB(KB), MDT(MDT) { |
39 | (void)this->KB; |
40 | } |
41 | |
42 | void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, |
43 | Register ToReg) const { |
44 | Observer.changingAllUsesOfReg(MRI, FromReg); |
45 | |
46 | if (MRI.constrainRegAttrs(ToReg, FromReg)) |
47 | MRI.replaceRegWith(FromReg, ToReg); |
48 | else |
49 | Builder.buildCopy(ToReg, FromReg); |
50 | |
51 | Observer.finishedChangingAllUsesOfReg(); |
52 | } |
53 | |
54 | void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI, |
55 | MachineOperand &FromRegOp, |
56 | Register ToReg) const { |
57 | assert(FromRegOp.getParent() && "Expected an operand in an MI")((FromRegOp.getParent() && "Expected an operand in an MI" ) ? static_cast<void> (0) : __assert_fail ("FromRegOp.getParent() && \"Expected an operand in an MI\"" , "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/GlobalISel/CombinerHelper.cpp" , 57, __PRETTY_FUNCTION__)); |
58 | Observer.changingInstr(*FromRegOp.getParent()); |
59 | |
60 | FromRegOp.setReg(ToReg); |
61 | |
62 | Observer.changedInstr(*FromRegOp.getParent()); |
63 | } |
64 | |
65 | bool CombinerHelper::tryCombineCopy(MachineInstr &MI) { |
66 | if (matchCombineCopy(MI)) { |
67 | applyCombineCopy(MI); |
68 | return true; |
69 | } |
70 | return false; |
71 | } |
72 | bool CombinerHelper::matchCombineCopy(MachineInstr &MI) { |
73 | if (MI.getOpcode() != TargetOpcode::COPY) |
74 | return false; |
75 | Register DstReg = MI.getOperand(0).getReg(); |
76 | Register SrcReg = MI.getOperand(1).getReg(); |
77 | LLT DstTy = MRI.getType(DstReg); |
78 | LLT SrcTy = MRI.getType(SrcReg); |
79 | // Simple Copy Propagation. |
80 | // a(sx) = COPY b(sx) -> Replace all uses of a with b. |
81 | if (DstTy.isValid() && SrcTy.isValid() && DstTy == SrcTy) |
82 | return true; |
83 | return false; |
84 | } |
85 | void CombinerHelper::applyCombineCopy(MachineInstr &MI) { |
86 | Register DstReg = MI.getOperand(0).getReg(); |
87 | Register SrcReg = MI.getOperand(1).getReg(); |
88 | MI.eraseFromParent(); |
89 | replaceRegWith(MRI, DstReg, SrcReg); |
90 | } |
91 | |
92 | namespace { |
93 | |
94 | /// Select a preference between two uses. CurrentUse is the current preference |
95 | /// while *ForCandidate is attributes of the candidate under consideration. |
96 | PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse, |
97 | const LLT &TyForCandidate, |
98 | unsigned OpcodeForCandidate, |
99 | MachineInstr *MIForCandidate) { |
100 | if (!CurrentUse.Ty.isValid()) { |
101 | if (CurrentUse.ExtendOpcode == OpcodeForCandidate || |
102 | CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT) |
103 | return {TyForCandidate, OpcodeForCandidate, MIForCandidate}; |
104 | return CurrentUse; |
105 | } |
106 | |
107 | // We permit the extend to hoist through basic blocks but this is only |
108 | // sensible if the target has extending loads. If you end up lowering back |
109 | // into a load and extend during the legalizer then the end result is |
110 | // hoisting the extend up to the load. |
111 | |
112 | // Prefer defined extensions to undefined extensions as these are more |
113 | // likely to reduce the number of instructions. |
114 | if (OpcodeForCandidate == TargetOpcode::G_ANYEXT && |
115 | CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT) |
116 | return CurrentUse; |
117 | else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT && |
118 | OpcodeForCandidate != TargetOpcode::G_ANYEXT) |
119 | return {TyForCandidate, OpcodeForCandidate, MIForCandidate}; |
120 | |
121 | // Prefer sign extensions to zero extensions as sign-extensions tend to be |
122 | // more expensive. |
123 | if (CurrentUse.Ty == TyForCandidate) { |
124 | if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT && |
125 | OpcodeForCandidate == TargetOpcode::G_ZEXT) |
126 | return CurrentUse; |
127 | else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT && |
128 | OpcodeForCandidate == TargetOpcode::G_SEXT) |
129 | return {TyForCandidate, OpcodeForCandidate, MIForCandidate}; |
130 | } |
131 | |
132 | // This is potentially target specific. We've chosen the largest type |
133 | // because G_TRUNC is usually free. One potential catch with this is that |
134 | // some targets have a reduced number of larger registers than smaller |
135 | // registers and this choice potentially increases the live-range for the |
136 | // larger value. |
137 | if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) { |
138 | return {TyForCandidate, OpcodeForCandidate, MIForCandidate}; |
139 | } |
140 | return CurrentUse; |
141 | } |
142 | |
143 | /// Find a suitable place to insert some instructions and insert them. This |
144 | /// function accounts for special cases like inserting before a PHI node. |
145 | /// The current strategy for inserting before PHI's is to duplicate the |
146 | /// instructions for each predecessor. However, while that's ok for G_TRUNC |
147 | /// on most targets since it generally requires no code, other targets/cases may |
148 | /// want to try harder to find a dominating block. |
149 | static void InsertInsnsWithoutSideEffectsBeforeUse( |
150 | MachineIRBuilder &Builder, MachineInstr &DefMI, MachineOperand &UseMO, |
151 | std::function<void(MachineBasicBlock *, MachineBasicBlock::iterator, |
152 | MachineOperand &UseMO)> |
153 | Inserter) { |
154 | MachineInstr &UseMI = *UseMO.getParent(); |
155 | |
156 | MachineBasicBlock *InsertBB = UseMI.getParent(); |
157 | |
158 | // If the use is a PHI then we want the predecessor block instead. |
159 | if (UseMI.isPHI()) { |
160 | MachineOperand *PredBB = std::next(&UseMO); |
161 | InsertBB = PredBB->getMBB(); |
162 | } |
163 | |
164 | // If the block is the same block as the def then we want to insert just after |
165 | // the def instead of at the start of the block. |
166 | if (InsertBB == DefMI.getParent()) { |
167 | MachineBasicBlock::iterator InsertPt = &DefMI; |
168 | Inserter(InsertBB, std::next(InsertPt), UseMO); |
169 | return; |
170 | } |
171 | |
172 | // Otherwise we want the start of the BB |
173 | Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO); |
174 | } |
175 | } // end anonymous namespace |
176 | |
177 | bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) { |
178 | PreferredTuple Preferred; |
179 | if (matchCombineExtendingLoads(MI, Preferred)) { |
180 | applyCombineExtendingLoads(MI, Preferred); |
181 | return true; |
182 | } |
183 | return false; |
184 | } |
185 | |
186 | bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI, |
187 | PreferredTuple &Preferred) { |
188 | // We match the loads and follow the uses to the extend instead of matching |
189 | // the extends and following the def to the load. This is because the load |
190 | // must remain in the same position for correctness (unless we also add code |
191 | // to find a safe place to sink it) whereas the extend is freely movable. |
192 | // It also prevents us from duplicating the load for the volatile case or just |
193 | // for performance. |
194 | |
195 | if (MI.getOpcode() != TargetOpcode::G_LOAD && |
196 | MI.getOpcode() != TargetOpcode::G_SEXTLOAD && |
197 | MI.getOpcode() != TargetOpcode::G_ZEXTLOAD) |
198 | return false; |
199 | |
200 | auto &LoadValue = MI.getOperand(0); |
201 | assert(LoadValue.isReg() && "Result wasn't a register?")((LoadValue.isReg() && "Result wasn't a register?") ? static_cast<void> (0) : __assert_fail ("LoadValue.isReg() && \"Result wasn't a register?\"" , "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/GlobalISel/CombinerHelper.cpp" , 201, __PRETTY_FUNCTION__)); |
202 | |
203 | LLT LoadValueTy = MRI.getType(LoadValue.getReg()); |
204 | if (!LoadValueTy.isScalar()) |
205 | return false; |
206 | |
207 | // Most architectures are going to legalize <s8 loads into at least a 1 byte |
208 | // load, and the MMOs can only describe memory accesses in multiples of bytes. |
209 | // If we try to perform extload combining on those, we can end up with |
210 | // %a(s8) = extload %ptr (load 1 byte from %ptr) |
211 | // ... which is an illegal extload instruction. |
212 | if (LoadValueTy.getSizeInBits() < 8) |
213 | return false; |
214 | |
215 | // For non power-of-2 types, they will very likely be legalized into multiple |
216 | // loads. Don't bother trying to match them into extending loads. |
217 | if (!isPowerOf2_32(LoadValueTy.getSizeInBits())) |
218 | return false; |
219 | |
220 | // Find the preferred type aside from the any-extends (unless it's the only |
221 | // one) and non-extending ops. We'll emit an extending load to that type and |
222 | // and emit a variant of (extend (trunc X)) for the others according to the |
223 | // relative type sizes. At the same time, pick an extend to use based on the |
224 | // extend involved in the chosen type. |
225 | unsigned PreferredOpcode = MI.getOpcode() == TargetOpcode::G_LOAD |
226 | ? TargetOpcode::G_ANYEXT |
227 | : MI.getOpcode() == TargetOpcode::G_SEXTLOAD |
228 | ? TargetOpcode::G_SEXT |
229 | : TargetOpcode::G_ZEXT; |
230 | Preferred = {LLT(), PreferredOpcode, nullptr}; |
231 | for (auto &UseMI : MRI.use_instructions(LoadValue.getReg())) { |
232 | if (UseMI.getOpcode() == TargetOpcode::G_SEXT || |
233 | UseMI.getOpcode() == TargetOpcode::G_ZEXT || |
234 | UseMI.getOpcode() == TargetOpcode::G_ANYEXT) { |
235 | Preferred = ChoosePreferredUse(Preferred, |
236 | MRI.getType(UseMI.getOperand(0).getReg()), |
237 | UseMI.getOpcode(), &UseMI); |
238 | } |
239 | } |
240 | |
241 | // There were no extends |
242 | if (!Preferred.MI) |
243 | return false; |
244 | // It should be impossible to chose an extend without selecting a different |
245 | // type since by definition the result of an extend is larger. |
246 | assert(Preferred.Ty != LoadValueTy && "Extending to same type?")((Preferred.Ty != LoadValueTy && "Extending to same type?" ) ? static_cast<void> (0) : __assert_fail ("Preferred.Ty != LoadValueTy && \"Extending to same type?\"" , "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/GlobalISel/CombinerHelper.cpp" , 246, __PRETTY_FUNCTION__)); |
247 | |
248 | LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("gi-combiner")) { dbgs() << "Preferred use is: " << *Preferred.MI; } } while (false); |
249 | return true; |
250 | } |
251 | |
252 | void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI, |
253 | PreferredTuple &Preferred) { |
254 | // Rewrite the load to the chosen extending load. |
255 | Register ChosenDstReg = Preferred.MI->getOperand(0).getReg(); |
256 | |
257 | // Inserter to insert a truncate back to the original type at a given point |
258 | // with some basic CSE to limit truncate duplication to one per BB. |
259 | DenseMap<MachineBasicBlock *, MachineInstr *> EmittedInsns; |
260 | auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB, |
261 | MachineBasicBlock::iterator InsertBefore, |
262 | MachineOperand &UseMO) { |
263 | MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB); |
264 | if (PreviouslyEmitted) { |
265 | Observer.changingInstr(*UseMO.getParent()); |
266 | UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg()); |
267 | Observer.changedInstr(*UseMO.getParent()); |
268 | return; |
269 | } |
270 | |
271 | Builder.setInsertPt(*InsertIntoBB, InsertBefore); |
272 | Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg()); |
273 | MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg); |
274 | EmittedInsns[InsertIntoBB] = NewMI; |
275 | replaceRegOpWith(MRI, UseMO, NewDstReg); |
276 | }; |
277 | |
278 | Observer.changingInstr(MI); |
279 | MI.setDesc( |
280 | Builder.getTII().get(Preferred.ExtendOpcode == TargetOpcode::G_SEXT |
281 | ? TargetOpcode::G_SEXTLOAD |
282 | : Preferred.ExtendOpcode == TargetOpcode::G_ZEXT |
283 | ? TargetOpcode::G_ZEXTLOAD |
284 | : TargetOpcode::G_LOAD)); |
285 | |
286 | // Rewrite all the uses to fix up the types. |
287 | auto &LoadValue = MI.getOperand(0); |
288 | SmallVector<MachineOperand *, 4> Uses; |
289 | for (auto &UseMO : MRI.use_operands(LoadValue.getReg())) |
290 | Uses.push_back(&UseMO); |
291 | |
292 | for (auto *UseMO : Uses) { |
293 | MachineInstr *UseMI = UseMO->getParent(); |
294 | |
295 | // If the extend is compatible with the preferred extend then we should fix |
296 | // up the type and extend so that it uses the preferred use. |
297 | if (UseMI->getOpcode() == Preferred.ExtendOpcode || |
298 | UseMI->getOpcode() == TargetOpcode::G_ANYEXT) { |
299 | Register UseDstReg = UseMI->getOperand(0).getReg(); |
300 | MachineOperand &UseSrcMO = UseMI->getOperand(1); |
301 | const LLT &UseDstTy = MRI.getType(UseDstReg); |
302 | if (UseDstReg != ChosenDstReg) { |
303 | if (Preferred.Ty == UseDstTy) { |
304 | // If the use has the same type as the preferred use, then merge |
305 | // the vregs and erase the extend. For example: |
306 | // %1:_(s8) = G_LOAD ... |
307 | // %2:_(s32) = G_SEXT %1(s8) |
308 | // %3:_(s32) = G_ANYEXT %1(s8) |
309 | // ... = ... %3(s32) |
310 | // rewrites to: |
311 | // %2:_(s32) = G_SEXTLOAD ... |
312 | // ... = ... %2(s32) |
313 | replaceRegWith(MRI, UseDstReg, ChosenDstReg); |
314 | Observer.erasingInstr(*UseMO->getParent()); |
315 | UseMO->getParent()->eraseFromParent(); |
316 | } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) { |
317 | // If the preferred size is smaller, then keep the extend but extend |
318 | // from the result of the extending load. For example: |
319 | // %1:_(s8) = G_LOAD ... |
320 | // %2:_(s32) = G_SEXT %1(s8) |
321 | // %3:_(s64) = G_ANYEXT %1(s8) |
322 | // ... = ... %3(s64) |
323 | /// rewrites to: |
324 | // %2:_(s32) = G_SEXTLOAD ... |
325 | // %3:_(s64) = G_ANYEXT %2:_(s32) |
326 | // ... = ... %3(s64) |
327 | replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg); |
328 | } else { |
329 | // If the preferred size is large, then insert a truncate. For |
330 | // example: |
331 | // %1:_(s8) = G_LOAD ... |
332 | // %2:_(s64) = G_SEXT %1(s8) |
333 | // %3:_(s32) = G_ZEXT %1(s8) |
334 | // ... = ... %3(s32) |
335 | /// rewrites to: |
336 | // %2:_(s64) = G_SEXTLOAD ... |
337 | // %4:_(s8) = G_TRUNC %2:_(s32) |
338 | // %3:_(s64) = G_ZEXT %2:_(s8) |
339 | // ... = ... %3(s64) |
340 | InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, |
341 | InsertTruncAt); |
342 | } |
343 | continue; |
344 | } |
345 | // The use is (one of) the uses of the preferred use we chose earlier. |
346 | // We're going to update the load to def this value later so just erase |
347 | // the old extend. |
348 | Observer.erasingInstr(*UseMO->getParent()); |
349 | UseMO->getParent()->eraseFromParent(); |
350 | continue; |
351 | } |
352 | |
353 | // The use isn't an extend. Truncate back to the type we originally loaded. |
354 | // This is free on many targets. |
355 | InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt); |
356 | } |
357 | |
358 | MI.getOperand(0).setReg(ChosenDstReg); |
359 | Observer.changedInstr(MI); |
360 | } |
361 | |
362 | bool CombinerHelper::isPredecessor(MachineInstr &DefMI, MachineInstr &UseMI) { |
363 | assert(DefMI.getParent() == UseMI.getParent())((DefMI.getParent() == UseMI.getParent()) ? static_cast<void > (0) : __assert_fail ("DefMI.getParent() == UseMI.getParent()" , "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/GlobalISel/CombinerHelper.cpp" , 363, __PRETTY_FUNCTION__)); |
364 | if (&DefMI == &UseMI) |
365 | return false; |
366 | |
367 | // Loop through the basic block until we find one of the instructions. |
368 | MachineBasicBlock::const_iterator I = DefMI.getParent()->begin(); |
369 | for (; &*I != &DefMI && &*I != &UseMI; ++I) |
370 | return &*I == &DefMI; |
371 | |
372 | llvm_unreachable("Block must contain instructions")::llvm::llvm_unreachable_internal("Block must contain instructions" , "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/GlobalISel/CombinerHelper.cpp" , 372); |
373 | } |
374 | |
375 | bool CombinerHelper::dominates(MachineInstr &DefMI, MachineInstr &UseMI) { |
376 | if (MDT) |
377 | return MDT->dominates(&DefMI, &UseMI); |
378 | else if (DefMI.getParent() != UseMI.getParent()) |
379 | return false; |
380 | |
381 | return isPredecessor(DefMI, UseMI); |
382 | } |
383 | |
384 | bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr, |
385 | Register &Base, Register &Offset) { |
386 | auto &MF = *MI.getParent()->getParent(); |
387 | const auto &TLI = *MF.getSubtarget().getTargetLowering(); |
388 | |
389 | #ifndef NDEBUG |
390 | unsigned Opcode = MI.getOpcode(); |
391 | assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||((Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD || Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode ::G_STORE) ? static_cast<void> (0) : __assert_fail ("Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD || Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE" , "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/GlobalISel/CombinerHelper.cpp" , 392, __PRETTY_FUNCTION__)) |
392 | Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE)((Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD || Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode ::G_STORE) ? static_cast<void> (0) : __assert_fail ("Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD || Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE" , "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/GlobalISel/CombinerHelper.cpp" , 392, __PRETTY_FUNCTION__)); |
393 | #endif |
394 | |
395 | Base = MI.getOperand(1).getReg(); |
396 | MachineInstr *BaseDef = MRI.getUniqueVRegDef(Base); |
397 | if (BaseDef && BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) |
398 | return false; |
399 | |
400 | LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("gi-combiner")) { dbgs() << "Searching for post-indexing opportunity for: " << MI; } } while (false); |
401 | |
402 | for (auto &Use : MRI.use_instructions(Base)) { |
403 | if (Use.getOpcode() != TargetOpcode::G_GEP) |
404 | continue; |
405 | |
406 | Offset = Use.getOperand(2).getReg(); |
407 | if (!ForceLegalIndexing && |
408 | !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ false, MRI)) { |
409 | LLVM_DEBUG(dbgs() << " Ignoring candidate with illegal addrmode: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("gi-combiner")) { dbgs() << " Ignoring candidate with illegal addrmode: " << Use; } } while (false) |
410 | << Use)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("gi-combiner")) { dbgs() << " Ignoring candidate with illegal addrmode: " << Use; } } while (false); |
411 | continue; |
412 | } |
413 | |
414 | // Make sure the offset calculation is before the potentially indexed op. |
415 | // FIXME: we really care about dependency here. The offset calculation might |
416 | // be movable. |
417 | MachineInstr *OffsetDef = MRI.getUniqueVRegDef(Offset); |
418 | if (!OffsetDef || !dominates(*OffsetDef, MI)) { |
419 | LLVM_DEBUG(dbgs() << " Ignoring candidate with offset after mem-op: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("gi-combiner")) { dbgs() << " Ignoring candidate with offset after mem-op: " << Use; } } while (false) |
420 | << Use)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("gi-combiner")) { dbgs() << " Ignoring candidate with offset after mem-op: " << Use; } } while (false); |
421 | continue; |
422 | } |
423 | |
424 | // FIXME: check whether all uses of Base are load/store with foldable |
425 | // addressing modes. If so, using the normal addr-modes is better than |
426 | // forming an indexed one. |
427 | |
428 | bool MemOpDominatesAddrUses = true; |
429 | for (auto &GEPUse : MRI.use_instructions(Use.getOperand(0).getReg())) { |
430 | if (!dominates(MI, GEPUse)) { |
431 | MemOpDominatesAddrUses = false; |
432 | break; |
433 | } |
434 | } |
435 | |
436 | if (!MemOpDominatesAddrUses) { |
437 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("gi-combiner")) { dbgs() << " Ignoring candidate as memop does not dominate uses: " << Use; } } while (false) |
438 | dbgs() << " Ignoring candidate as memop does not dominate uses: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("gi-combiner")) { dbgs() << " Ignoring candidate as memop does not dominate uses: " << Use; } } while (false) |
439 | << Use)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("gi-combiner")) { dbgs() << " Ignoring candidate as memop does not dominate uses: " << Use; } } while (false); |
440 | continue; |
441 | } |
442 | |
443 | LLVM_DEBUG(dbgs() << " Found match: " << Use)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("gi-combiner")) { dbgs() << " Found match: " << Use; } } while (false); |
444 | Addr = Use.getOperand(0).getReg(); |
445 | return true; |
446 | } |
447 | |
448 | return false; |
449 | } |
450 | |
451 | bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr, |
452 | Register &Base, Register &Offset) { |
453 | auto &MF = *MI.getParent()->getParent(); |
454 | const auto &TLI = *MF.getSubtarget().getTargetLowering(); |
455 | |
456 | #ifndef NDEBUG |
457 | unsigned Opcode = MI.getOpcode(); |
458 | assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||((Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD || Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode ::G_STORE) ? static_cast<void> (0) : __assert_fail ("Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD || Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE" , "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/GlobalISel/CombinerHelper.cpp" , 459, __PRETTY_FUNCTION__)) |
459 | Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE)((Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD || Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode ::G_STORE) ? static_cast<void> (0) : __assert_fail ("Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD || Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE" , "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/GlobalISel/CombinerHelper.cpp" , 459, __PRETTY_FUNCTION__)); |
460 | #endif |
461 | |
462 | Addr = MI.getOperand(1).getReg(); |
463 | MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_GEP, Addr, MRI); |
464 | if (!AddrDef || MRI.hasOneUse(Addr)) |
465 | return false; |
466 | |
467 | Base = AddrDef->getOperand(1).getReg(); |
468 | Offset = AddrDef->getOperand(2).getReg(); |
469 | |
470 | LLVM_DEBUG(dbgs() << "Found potential pre-indexed load_store: " << MI)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("gi-combiner")) { dbgs() << "Found potential pre-indexed load_store: " << MI; } } while (false); |
471 | |
472 | if (!ForceLegalIndexing && |
473 | !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ true, MRI)) { |
474 | LLVM_DEBUG(dbgs() << " Skipping, not legal for target")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("gi-combiner")) { dbgs() << " Skipping, not legal for target" ; } } while (false); |
475 | return false; |
476 | } |
477 | |
478 | MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI); |
479 | if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) { |
480 | LLVM_DEBUG(dbgs() << " Skipping, frame index would need copy anyway.")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("gi-combiner")) { dbgs() << " Skipping, frame index would need copy anyway." ; } } while (false); |
481 | return false; |
482 | } |
483 | |
484 | if (MI.getOpcode() == TargetOpcode::G_STORE) { |
485 | // Would require a copy. |
486 | if (Base == MI.getOperand(0).getReg()) { |
487 | LLVM_DEBUG(dbgs() << " Skipping, storing base so need copy anyway.")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("gi-combiner")) { dbgs() << " Skipping, storing base so need copy anyway." ; } } while (false); |
488 | return false; |
489 | } |
490 | |
491 | // We're expecting one use of Addr in MI, but it could also be the |
492 | // value stored, which isn't actually dominated by the instruction. |
493 | if (MI.getOperand(0).getReg() == Addr) { |
494 | LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("gi-combiner")) { dbgs() << " Skipping, does not dominate all addr uses" ; } } while (false); |
495 | return false; |
496 | } |
497 | } |
498 | |
499 | // FIXME: check whether all uses of the base pointer are constant GEPs. That |
500 | // might allow us to end base's liveness here by adjusting the constant. |
501 | |
502 | for (auto &UseMI : MRI.use_instructions(Addr)) { |
503 | if (!dominates(MI, UseMI)) { |
504 | LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses.")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("gi-combiner")) { dbgs() << " Skipping, does not dominate all addr uses." ; } } while (false); |
505 | return false; |
506 | } |
507 | } |
508 | |
509 | return true; |
510 | } |
511 | |
512 | bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr &MI) { |
513 | unsigned Opcode = MI.getOpcode(); |
514 | if (Opcode != TargetOpcode::G_LOAD && Opcode != TargetOpcode::G_SEXTLOAD && |
515 | Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE) |
516 | return false; |
517 | |
518 | bool IsStore = Opcode == TargetOpcode::G_STORE; |
519 | Register Addr, Base, Offset; |
520 | bool IsPre = findPreIndexCandidate(MI, Addr, Base, Offset); |
521 | if (!IsPre && !findPostIndexCandidate(MI, Addr, Base, Offset)) |
522 | return false; |
523 | |
524 | |
525 | unsigned NewOpcode; |
526 | switch (Opcode) { |
527 | case TargetOpcode::G_LOAD: |
528 | NewOpcode = TargetOpcode::G_INDEXED_LOAD; |
529 | break; |
530 | case TargetOpcode::G_SEXTLOAD: |
531 | NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD; |
532 | break; |
533 | case TargetOpcode::G_ZEXTLOAD: |
534 | NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD; |
535 | break; |
536 | case TargetOpcode::G_STORE: |
537 | NewOpcode = TargetOpcode::G_INDEXED_STORE; |
538 | break; |
539 | default: |
540 | llvm_unreachable("Unknown load/store opcode")::llvm::llvm_unreachable_internal("Unknown load/store opcode" , "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/GlobalISel/CombinerHelper.cpp" , 540); |
541 | } |
542 | |
543 | MachineInstr &AddrDef = *MRI.getUniqueVRegDef(Addr); |
544 | MachineIRBuilder MIRBuilder(MI); |
545 | auto MIB = MIRBuilder.buildInstr(NewOpcode); |
546 | if (IsStore) { |
547 | MIB.addDef(Addr); |
548 | MIB.addUse(MI.getOperand(0).getReg()); |
549 | } else { |
550 | MIB.addDef(MI.getOperand(0).getReg()); |
551 | MIB.addDef(Addr); |
552 | } |
553 | |
554 | MIB.addUse(Base); |
555 | MIB.addUse(Offset); |
556 | MIB.addImm(IsPre); |
557 | MI.eraseFromParent(); |
558 | AddrDef.eraseFromParent(); |
559 | |
560 | LLVM_DEBUG(dbgs() << " Combinined to indexed operation")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("gi-combiner")) { dbgs() << " Combinined to indexed operation" ; } } while (false); |
561 | return true; |
562 | } |
563 | |
564 | bool CombinerHelper::matchCombineBr(MachineInstr &MI) { |
565 | assert(MI.getOpcode() == TargetOpcode::G_BR && "Expected a G_BR")((MI.getOpcode() == TargetOpcode::G_BR && "Expected a G_BR" ) ? static_cast<void> (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_BR && \"Expected a G_BR\"" , "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/GlobalISel/CombinerHelper.cpp" , 565, __PRETTY_FUNCTION__)); |
566 | // Try to match the following: |
567 | // bb1: |
568 | // %c(s32) = G_ICMP pred, %a, %b |
569 | // %c1(s1) = G_TRUNC %c(s32) |
570 | // G_BRCOND %c1, %bb2 |
571 | // G_BR %bb3 |
572 | // bb2: |
573 | // ... |
574 | // bb3: |
575 | |
576 | // The above pattern does not have a fall through to the successor bb2, always |
577 | // resulting in a branch no matter which path is taken. Here we try to find |
578 | // and replace that pattern with conditional branch to bb3 and otherwise |
579 | // fallthrough to bb2. |
580 | |
581 | MachineBasicBlock *MBB = MI.getParent(); |
582 | MachineBasicBlock::iterator BrIt(MI); |
583 | if (BrIt == MBB->begin()) |
584 | return false; |
585 | assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator")((std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator" ) ? static_cast<void> (0) : __assert_fail ("std::next(BrIt) == MBB->end() && \"expected G_BR to be a terminator\"" , "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/GlobalISel/CombinerHelper.cpp" , 585, __PRETTY_FUNCTION__)); |
586 | |
587 | MachineInstr *BrCond = &*std::prev(BrIt); |
588 | if (BrCond->getOpcode() != TargetOpcode::G_BRCOND) |
589 | return false; |
590 | |
591 | // Check that the next block is the conditional branch target. |
592 | if (!MBB->isLayoutSuccessor(BrCond->getOperand(1).getMBB())) |
593 | return false; |
594 | |
595 | MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg()); |
596 | if (!CmpMI || CmpMI->getOpcode() != TargetOpcode::G_ICMP || |
597 | !MRI.hasOneUse(CmpMI->getOperand(0).getReg())) |
598 | return false; |
599 | return true; |
600 | } |
601 | |
602 | bool CombinerHelper::tryCombineBr(MachineInstr &MI) { |
603 | if (!matchCombineBr(MI)) |
604 | return false; |
605 | MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB(); |
606 | MachineBasicBlock::iterator BrIt(MI); |
607 | MachineInstr *BrCond = &*std::prev(BrIt); |
608 | MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg()); |
609 | |
610 | CmpInst::Predicate InversePred = CmpInst::getInversePredicate( |
611 | (CmpInst::Predicate)CmpMI->getOperand(1).getPredicate()); |
612 | |
613 | // Invert the G_ICMP condition. |
614 | Observer.changingInstr(*CmpMI); |
615 | CmpMI->getOperand(1).setPredicate(InversePred); |
616 | Observer.changedInstr(*CmpMI); |
617 | |
618 | // Change the conditional branch target. |
619 | Observer.changingInstr(*BrCond); |
620 | BrCond->getOperand(1).setMBB(BrTarget); |
621 | Observer.changedInstr(*BrCond); |
622 | MI.eraseFromParent(); |
623 | return true; |
624 | } |
625 | |
626 | static bool shouldLowerMemFuncForSize(const MachineFunction &MF) { |
627 | // On Darwin, -Os means optimize for size without hurting performance, so |
628 | // only really optimize for size when -Oz (MinSize) is used. |
629 | if (MF.getTarget().getTargetTriple().isOSDarwin()) |
630 | return MF.getFunction().hasMinSize(); |
631 | return MF.getFunction().hasOptSize(); |
632 | } |
633 | |
634 | // Returns a list of types to use for memory op lowering in MemOps. A partial |
635 | // port of findOptimalMemOpLowering in TargetLowering. |
636 | static bool findGISelOptimalMemOpLowering( |
637 | std::vector<LLT> &MemOps, unsigned Limit, uint64_t Size, unsigned DstAlign, |
638 | unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, |
639 | bool AllowOverlap, unsigned DstAS, unsigned SrcAS, |
640 | const AttributeList &FuncAttributes, const TargetLowering &TLI) { |
641 | // If 'SrcAlign' is zero, that means the memory operation does not need to |
642 | // load the value, i.e. memset or memcpy from constant string. Otherwise, |
643 | // it's the inferred alignment of the source. 'DstAlign', on the other hand, |
644 | // is the specified alignment of the memory operation. If it is zero, that |
645 | // means it's possible to change the alignment of the destination. |
646 | // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does |
647 | // not need to be loaded. |
648 | if (SrcAlign != 0 && SrcAlign < DstAlign) |
649 | return false; |
650 | |
651 | LLT Ty = TLI.getOptimalMemOpLLT(Size, DstAlign, SrcAlign, IsMemset, |
652 | ZeroMemset, MemcpyStrSrc, FuncAttributes); |
653 | |
654 | if (Ty == LLT()) { |
655 | // Use the largest scalar type whose alignment constraints are satisfied. |
656 | // We only need to check DstAlign here as SrcAlign is always greater or |
657 | // equal to DstAlign (or zero). |
658 | Ty = LLT::scalar(64); |
659 | while (DstAlign && DstAlign < Ty.getSizeInBytes() && |
660 | !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, DstAlign)) |
661 | Ty = LLT::scalar(Ty.getSizeInBytes()); |
662 | assert(Ty.getSizeInBits() > 0 && "Could not find valid type")((Ty.getSizeInBits() > 0 && "Could not find valid type" ) ? static_cast<void> (0) : __assert_fail ("Ty.getSizeInBits() > 0 && \"Could not find valid type\"" , "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/GlobalISel/CombinerHelper.cpp" , 662, __PRETTY_FUNCTION__)); |
663 | // FIXME: check for the largest legal type we can load/store to. |
664 | } |
665 | |
666 | unsigned NumMemOps = 0; |
667 | while (Size != 0) { |
668 | unsigned TySize = Ty.getSizeInBytes(); |
669 | while (TySize > Size) { |
670 | // For now, only use non-vector load / store's for the left-over pieces. |
671 | LLT NewTy = Ty; |
672 | // FIXME: check for mem op safety and legality of the types. Not all of |
673 | // SDAGisms map cleanly to GISel concepts. |
674 | if (NewTy.isVector()) |
675 | NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32); |
676 | NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1)); |
677 | unsigned NewTySize = NewTy.getSizeInBytes(); |
678 | assert(NewTySize > 0 && "Could not find appropriate type")((NewTySize > 0 && "Could not find appropriate type" ) ? static_cast<void> (0) : __assert_fail ("NewTySize > 0 && \"Could not find appropriate type\"" , "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/GlobalISel/CombinerHelper.cpp" , 678, __PRETTY_FUNCTION__)); |
679 | |
680 | // If the new LLT cannot cover all of the remaining bits, then consider |
681 | // issuing a (or a pair of) unaligned and overlapping load / store. |
682 | bool Fast; |
683 | // Need to get a VT equivalent for allowMisalignedMemoryAccesses(). |
684 | MVT VT = getMVTForLLT(Ty); |
685 | if (NumMemOps && AllowOverlap && NewTySize < Size && |
686 | TLI.allowsMisalignedMemoryAccesses( |
687 | VT, DstAS, DstAlign, MachineMemOperand::MONone, &Fast) && |
688 | Fast) |
689 | TySize = Size; |
690 | else { |
691 | Ty = NewTy; |
692 | TySize = NewTySize; |
693 | } |
694 | } |
695 | |
696 | if (++NumMemOps > Limit) |
697 | return false; |
698 | |
699 | MemOps.push_back(Ty); |
700 | Size -= TySize; |
701 | } |
702 | |
703 | return true; |
704 | } |
705 | |
706 | static Type *getTypeForLLT(LLT Ty, LLVMContext &C) { |
707 | if (Ty.isVector()) |
708 | return VectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()), |
709 | Ty.getNumElements()); |
710 | return IntegerType::get(C, Ty.getSizeInBits()); |
711 | } |
712 | |
713 | // Get a vectorized representation of the memset value operand, GISel edition. |
714 | static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) { |
715 | MachineRegisterInfo &MRI = *MIB.getMRI(); |
716 | unsigned NumBits = Ty.getScalarSizeInBits(); |
717 | auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI); |
718 | if (!Ty.isVector() && ValVRegAndVal) { |
719 | unsigned KnownVal = ValVRegAndVal->Value; |
720 | APInt Scalar = APInt(8, KnownVal); |
721 | APInt SplatVal = APInt::getSplat(NumBits, Scalar); |
722 | return MIB.buildConstant(Ty, SplatVal).getReg(0); |
723 | } |
724 | // FIXME: for vector types create a G_BUILD_VECTOR. |
725 | if (Ty.isVector()) |
726 | return Register(); |
727 | |
728 | // Extend the byte value to the larger type, and then multiply by a magic |
729 | // value 0x010101... in order to replicate it across every byte. |
730 | LLT ExtType = Ty.getScalarType(); |
731 | auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val); |
732 | if (NumBits > 8) { |
733 | APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01)); |
734 | auto MagicMI = MIB.buildConstant(ExtType, Magic); |
735 | Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0); |
736 | } |
737 | |
738 | assert(ExtType == Ty && "Vector memset value type not supported yet")((ExtType == Ty && "Vector memset value type not supported yet" ) ? static_cast<void> (0) : __assert_fail ("ExtType == Ty && \"Vector memset value type not supported yet\"" , "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/GlobalISel/CombinerHelper.cpp" , 738, __PRETTY_FUNCTION__)); |
739 | return Val; |
740 | } |
741 | |
742 | bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst, Register Val, |
743 | unsigned KnownLen, unsigned Align, |
744 | bool IsVolatile) { |
745 | auto &MF = *MI.getParent()->getParent(); |
746 | const auto &TLI = *MF.getSubtarget().getTargetLowering(); |
747 | auto &DL = MF.getDataLayout(); |
748 | LLVMContext &C = MF.getFunction().getContext(); |
749 | |
750 | assert(KnownLen != 0 && "Have a zero length memset length!")((KnownLen != 0 && "Have a zero length memset length!" ) ? static_cast<void> (0) : __assert_fail ("KnownLen != 0 && \"Have a zero length memset length!\"" , "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/GlobalISel/CombinerHelper.cpp" , 750, __PRETTY_FUNCTION__)); |
751 | |
752 | bool DstAlignCanChange = false; |
753 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
754 | bool OptSize = shouldLowerMemFuncForSize(MF); |
755 | |
756 | MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); |
757 | if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) |
758 | DstAlignCanChange = true; |
759 | |
760 | unsigned Limit = TLI.getMaxStoresPerMemset(OptSize); |
761 | std::vector<LLT> MemOps; |
762 | |
763 | const auto &DstMMO = **MI.memoperands_begin(); |
764 | MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo(); |
765 | |
766 | auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI); |
767 | bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0; |
768 | |
769 | if (!findGISelOptimalMemOpLowering( |
770 | MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Align), 0, |
771 | /*IsMemset=*/true, |
772 | /*ZeroMemset=*/IsZeroVal, /*MemcpyStrSrc=*/false, |
773 | /*AllowOverlap=*/!IsVolatile, DstPtrInfo.getAddrSpace(), ~0u, |
774 | MF.getFunction().getAttributes(), TLI)) |
775 | return false; |
776 | |
777 | if (DstAlignCanChange) { |
778 | // Get an estimate of the type from the LLT. |
779 | Type *IRTy = getTypeForLLT(MemOps[0], C); |
780 | unsigned NewAlign = (unsigned)DL.getABITypeAlignment(IRTy); |
781 | if (NewAlign > Align) { |
782 | unsigned FI = FIDef->getOperand(1).getIndex(); |
783 | // Give the stack frame object a larger alignment if needed. |
784 | if (MFI.getObjectAlignment(FI) < NewAlign) |
785 | MFI.setObjectAlignment(FI, NewAlign); |
786 | Align = NewAlign; |
787 | } |
788 | } |
789 | |
790 | MachineIRBuilder MIB(MI); |
791 | // Find the largest store and generate the bit pattern for it. |
792 | LLT LargestTy = MemOps[0]; |
793 | for (unsigned i = 1; i < MemOps.size(); i++) |
794 | if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits()) |
795 | LargestTy = MemOps[i]; |
796 | |
797 | // The memset stored value is always defined as an s8, so in order to make it |
798 | // work with larger store types we need to repeat the bit pattern across the |
799 | // wider type. |
800 | Register MemSetValue = getMemsetValue(Val, LargestTy, MIB); |
801 | |
802 | if (!MemSetValue) |
803 | return false; |
804 | |
805 | // Generate the stores. For each store type in the list, we generate the |
806 | // matching store of that type to the destination address. |
807 | LLT PtrTy = MRI.getType(Dst); |
808 | unsigned DstOff = 0; |
809 | unsigned Size = KnownLen; |
810 | for (unsigned I = 0; I < MemOps.size(); I++) { |
811 | LLT Ty = MemOps[I]; |
812 | unsigned TySize = Ty.getSizeInBytes(); |
813 | if (TySize > Size) { |
814 | // Issuing an unaligned load / store pair that overlaps with the previous |
815 | // pair. Adjust the offset accordingly. |
816 | assert(I == MemOps.size() - 1 && I != 0)((I == MemOps.size() - 1 && I != 0) ? static_cast< void> (0) : __assert_fail ("I == MemOps.size() - 1 && I != 0" , "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/GlobalISel/CombinerHelper.cpp" , 816, __PRETTY_FUNCTION__)); |
817 | DstOff -= TySize - Size; |
818 | } |
819 | |
820 | // If this store is smaller than the largest store see whether we can get |
821 | // the smaller value for free with a truncate. |
822 | Register Value = MemSetValue; |
823 | if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) { |
824 | MVT VT = getMVTForLLT(Ty); |
825 | MVT LargestVT = getMVTForLLT(LargestTy); |
826 | if (!LargestTy.isVector() && !Ty.isVector() && |
827 | TLI.isTruncateFree(LargestVT, VT)) |
828 | Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0); |
829 | else |
830 | Value = getMemsetValue(Val, Ty, MIB); |
831 | if (!Value) |
832 | return false; |
833 | } |
834 | |
835 | auto *StoreMMO = |
836 | MF.getMachineMemOperand(&DstMMO, DstOff, Ty.getSizeInBytes()); |
837 | |
838 | Register Ptr = Dst; |
839 | if (DstOff != 0) { |
840 | auto Offset = |
841 | MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff); |
842 | Ptr = MIB.buildGEP(PtrTy, Dst, Offset).getReg(0); |
843 | } |
844 | |
845 | MIB.buildStore(Value, Ptr, *StoreMMO); |
846 | DstOff += Ty.getSizeInBytes(); |
847 | Size -= TySize; |
848 | } |
849 | |
850 | MI.eraseFromParent(); |
851 | return true; |
852 | } |
853 | |
854 | |
855 | bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst, |
856 | Register Src, unsigned KnownLen, |
857 | unsigned DstAlign, unsigned SrcAlign, |
858 | bool IsVolatile) { |
859 | auto &MF = *MI.getParent()->getParent(); |
860 | const auto &TLI = *MF.getSubtarget().getTargetLowering(); |
861 | auto &DL = MF.getDataLayout(); |
862 | LLVMContext &C = MF.getFunction().getContext(); |
863 | |
864 | assert(KnownLen != 0 && "Have a zero length memcpy length!")((KnownLen != 0 && "Have a zero length memcpy length!" ) ? static_cast<void> (0) : __assert_fail ("KnownLen != 0 && \"Have a zero length memcpy length!\"" , "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/GlobalISel/CombinerHelper.cpp" , 864, __PRETTY_FUNCTION__)); |
865 | |
866 | bool DstAlignCanChange = false; |
867 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
868 | bool OptSize = shouldLowerMemFuncForSize(MF); |
869 | unsigned Alignment = MinAlign(DstAlign, SrcAlign); |
870 | |
871 | MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); |
872 | if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) |
873 | DstAlignCanChange = true; |
874 | |
875 | // FIXME: infer better src pointer alignment like SelectionDAG does here. |
876 | // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining |
877 | // if the memcpy is in a tail call position. |
878 | |
879 | unsigned Limit = TLI.getMaxStoresPerMemcpy(OptSize); |
880 | std::vector<LLT> MemOps; |
881 | |
882 | const auto &DstMMO = **MI.memoperands_begin(); |
883 | const auto &SrcMMO = **std::next(MI.memoperands_begin()); |
884 | MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo(); |
885 | MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo(); |
886 | |
887 | if (!findGISelOptimalMemOpLowering( |
888 | MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Alignment), |
889 | SrcAlign, |
890 | /*IsMemset=*/false, |
891 | /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false, |
892 | /*AllowOverlap=*/!IsVolatile, DstPtrInfo.getAddrSpace(), |
893 | SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes(), TLI)) |
894 | return false; |
895 | |
896 | if (DstAlignCanChange) { |
897 | // Get an estimate of the type from the LLT. |
898 | Type *IRTy = getTypeForLLT(MemOps[0], C); |
899 | unsigned NewAlign = (unsigned)DL.getABITypeAlignment(IRTy); |
900 | |
901 | // Don't promote to an alignment that would require dynamic stack |
902 | // realignment. |
903 | const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); |
904 | if (!TRI->needsStackRealignment(MF)) |
905 | while (NewAlign > Alignment && |
906 | DL.exceedsNaturalStackAlignment(Align(NewAlign))) |
907 | NewAlign /= 2; |
908 | |
909 | if (NewAlign > Alignment) { |
910 | unsigned FI = FIDef->getOperand(1).getIndex(); |
911 | // Give the stack frame object a larger alignment if needed. |
912 | if (MFI.getObjectAlignment(FI) < NewAlign) |
913 | MFI.setObjectAlignment(FI, NewAlign); |
914 | Alignment = NewAlign; |
Value stored to 'Alignment' is never read | |
915 | } |
916 | } |
917 | |
918 | LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("gi-combiner")) { dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n"; } } while (false); |
919 | |
920 | MachineIRBuilder MIB(MI); |
921 | // Now we need to emit a pair of load and stores for each of the types we've |
922 | // collected. I.e. for each type, generate a load from the source pointer of |
923 | // that type width, and then generate a corresponding store to the dest buffer |
924 | // of that value loaded. This can result in a sequence of loads and stores |
925 | // mixed types, depending on what the target specifies as good types to use. |
926 | unsigned CurrOffset = 0; |
927 | LLT PtrTy = MRI.getType(Src); |
928 | unsigned Size = KnownLen; |
929 | for (auto CopyTy : MemOps) { |
930 | // Issuing an unaligned load / store pair that overlaps with the previous |
931 | // pair. Adjust the offset accordingly. |
932 | if (CopyTy.getSizeInBytes() > Size) |
933 | CurrOffset -= CopyTy.getSizeInBytes() - Size; |
934 | |
935 | // Construct MMOs for the accesses. |
936 | auto *LoadMMO = |
937 | MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes()); |
938 | auto *StoreMMO = |
939 | MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes()); |
940 | |
941 | // Create the load. |
942 | Register LoadPtr = Src; |
943 | Register Offset; |
944 | if (CurrOffset != 0) { |
945 | Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset) |
946 | .getReg(0); |
947 | LoadPtr = MIB.buildGEP(PtrTy, Src, Offset).getReg(0); |
948 | } |
949 | auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO); |
950 | |
951 | // Create the store. |
952 | Register StorePtr = |
953 | CurrOffset == 0 ? Dst : MIB.buildGEP(PtrTy, Dst, Offset).getReg(0); |
954 | MIB.buildStore(LdVal, StorePtr, *StoreMMO); |
955 | CurrOffset += CopyTy.getSizeInBytes(); |
956 | Size -= CopyTy.getSizeInBytes(); |
957 | } |
958 | |
959 | MI.eraseFromParent(); |
960 | return true; |
961 | } |
962 | |
963 | bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst, |
964 | Register Src, unsigned KnownLen, |
965 | unsigned DstAlign, unsigned SrcAlign, |
966 | bool IsVolatile) { |
967 | auto &MF = *MI.getParent()->getParent(); |
968 | const auto &TLI = *MF.getSubtarget().getTargetLowering(); |
969 | auto &DL = MF.getDataLayout(); |
970 | LLVMContext &C = MF.getFunction().getContext(); |
971 | |
972 | assert(KnownLen != 0 && "Have a zero length memmove length!")((KnownLen != 0 && "Have a zero length memmove length!" ) ? static_cast<void> (0) : __assert_fail ("KnownLen != 0 && \"Have a zero length memmove length!\"" , "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/GlobalISel/CombinerHelper.cpp" , 972, __PRETTY_FUNCTION__)); |
973 | |
974 | bool DstAlignCanChange = false; |
975 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
976 | bool OptSize = shouldLowerMemFuncForSize(MF); |
977 | unsigned Alignment = MinAlign(DstAlign, SrcAlign); |
978 | |
979 | MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); |
980 | if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) |
981 | DstAlignCanChange = true; |
982 | |
983 | unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize); |
984 | std::vector<LLT> MemOps; |
985 | |
986 | const auto &DstMMO = **MI.memoperands_begin(); |
987 | const auto &SrcMMO = **std::next(MI.memoperands_begin()); |
988 | MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo(); |
989 | MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo(); |
990 | |
991 | // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due |
992 | // to a bug in it's findOptimalMemOpLowering implementation. For now do the |
993 | // same thing here. |
994 | if (!findGISelOptimalMemOpLowering( |
995 | MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Alignment), |
996 | SrcAlign, |
997 | /*IsMemset=*/false, |
998 | /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false, |
999 | /*AllowOverlap=*/false, DstPtrInfo.getAddrSpace(), |
1000 | SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes(), TLI)) |
1001 | return false; |
1002 | |
1003 | if (DstAlignCanChange) { |
1004 | // Get an estimate of the type from the LLT. |
1005 | Type *IRTy = getTypeForLLT(MemOps[0], C); |
1006 | unsigned NewAlign = (unsigned)DL.getABITypeAlignment(IRTy); |
1007 | |
1008 | // Don't promote to an alignment that would require dynamic stack |
1009 | // realignment. |
1010 | const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); |
1011 | if (!TRI->needsStackRealignment(MF)) |
1012 | while (NewAlign > Alignment && |
1013 | DL.exceedsNaturalStackAlignment(Align(NewAlign))) |
1014 | NewAlign /= 2; |
1015 | |
1016 | if (NewAlign > Alignment) { |
1017 | unsigned FI = FIDef->getOperand(1).getIndex(); |
1018 | // Give the stack frame object a larger alignment if needed. |
1019 | if (MFI.getObjectAlignment(FI) < NewAlign) |
1020 | MFI.setObjectAlignment(FI, NewAlign); |
1021 | Alignment = NewAlign; |
1022 | } |
1023 | } |
1024 | |
1025 | LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("gi-combiner")) { dbgs() << "Inlining memmove: " << MI << " into loads & stores\n"; } } while (false); |
1026 | |
1027 | MachineIRBuilder MIB(MI); |
1028 | // Memmove requires that we perform the loads first before issuing the stores. |
1029 | // Apart from that, this loop is pretty much doing the same thing as the |
1030 | // memcpy codegen function. |
1031 | unsigned CurrOffset = 0; |
1032 | LLT PtrTy = MRI.getType(Src); |
1033 | SmallVector<Register, 16> LoadVals; |
1034 | for (auto CopyTy : MemOps) { |
1035 | // Construct MMO for the load. |
1036 | auto *LoadMMO = |
1037 | MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes()); |
1038 | |
1039 | // Create the load. |
1040 | Register LoadPtr = Src; |
1041 | if (CurrOffset != 0) { |
1042 | auto Offset = |
1043 | MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset); |
1044 | LoadPtr = MIB.buildGEP(PtrTy, Src, Offset).getReg(0); |
1045 | } |
1046 | LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0)); |
1047 | CurrOffset += CopyTy.getSizeInBytes(); |
1048 | } |
1049 | |
1050 | CurrOffset = 0; |
1051 | for (unsigned I = 0; I < MemOps.size(); ++I) { |
1052 | LLT CopyTy = MemOps[I]; |
1053 | // Now store the values loaded. |
1054 | auto *StoreMMO = |
1055 | MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes()); |
1056 | |
1057 | Register StorePtr = Dst; |
1058 | if (CurrOffset != 0) { |
1059 | auto Offset = |
1060 | MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset); |
1061 | StorePtr = MIB.buildGEP(PtrTy, Dst, Offset).getReg(0); |
1062 | } |
1063 | MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO); |
1064 | CurrOffset += CopyTy.getSizeInBytes(); |
1065 | } |
1066 | MI.eraseFromParent(); |
1067 | return true; |
1068 | } |
1069 | |
1070 | bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) { |
1071 | // This combine is fairly complex so it's not written with a separate |
1072 | // matcher function. |
1073 | assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)((MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS) ? static_cast<void> (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS" , "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/GlobalISel/CombinerHelper.cpp" , 1073, __PRETTY_FUNCTION__)); |
1074 | Intrinsic::ID ID = (Intrinsic::ID)MI.getIntrinsicID(); |
1075 | assert((ID == Intrinsic::memcpy || ID == Intrinsic::memmove ||(((ID == Intrinsic::memcpy || ID == Intrinsic::memmove || ID == Intrinsic::memset) && "Expected a memcpy like intrinsic" ) ? static_cast<void> (0) : __assert_fail ("(ID == Intrinsic::memcpy || ID == Intrinsic::memmove || ID == Intrinsic::memset) && \"Expected a memcpy like intrinsic\"" , "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/GlobalISel/CombinerHelper.cpp" , 1077, __PRETTY_FUNCTION__)) |
1076 | ID == Intrinsic::memset) &&(((ID == Intrinsic::memcpy || ID == Intrinsic::memmove || ID == Intrinsic::memset) && "Expected a memcpy like intrinsic" ) ? static_cast<void> (0) : __assert_fail ("(ID == Intrinsic::memcpy || ID == Intrinsic::memmove || ID == Intrinsic::memset) && \"Expected a memcpy like intrinsic\"" , "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/GlobalISel/CombinerHelper.cpp" , 1077, __PRETTY_FUNCTION__)) |
1077 | "Expected a memcpy like intrinsic")(((ID == Intrinsic::memcpy || ID == Intrinsic::memmove || ID == Intrinsic::memset) && "Expected a memcpy like intrinsic" ) ? static_cast<void> (0) : __assert_fail ("(ID == Intrinsic::memcpy || ID == Intrinsic::memmove || ID == Intrinsic::memset) && \"Expected a memcpy like intrinsic\"" , "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/GlobalISel/CombinerHelper.cpp" , 1077, __PRETTY_FUNCTION__)); |
1078 | |
1079 | auto MMOIt = MI.memoperands_begin(); |
1080 | const MachineMemOperand *MemOp = *MMOIt; |
1081 | bool IsVolatile = MemOp->isVolatile(); |
1082 | // Don't try to optimize volatile. |
1083 | if (IsVolatile) |
1084 | return false; |
1085 | |
1086 | unsigned DstAlign = MemOp->getBaseAlignment(); |
1087 | unsigned SrcAlign = 0; |
1088 | Register Dst = MI.getOperand(1).getReg(); |
1089 | Register Src = MI.getOperand(2).getReg(); |
1090 | Register Len = MI.getOperand(3).getReg(); |
1091 | |
1092 | if (ID != Intrinsic::memset) { |
1093 | assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI")((MMOIt != MI.memoperands_end() && "Expected a second MMO on MI" ) ? static_cast<void> (0) : __assert_fail ("MMOIt != MI.memoperands_end() && \"Expected a second MMO on MI\"" , "/build/llvm-toolchain-snapshot-10~svn374877/lib/CodeGen/GlobalISel/CombinerHelper.cpp" , 1093, __PRETTY_FUNCTION__)); |
1094 | MemOp = *(++MMOIt); |
1095 | SrcAlign = MemOp->getBaseAlignment(); |
1096 | } |
1097 | |
1098 | // See if this is a constant length copy |
1099 | auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI); |
1100 | if (!LenVRegAndVal) |
1101 | return false; // Leave it to the legalizer to lower it to a libcall. |
1102 | unsigned KnownLen = LenVRegAndVal->Value; |
1103 | |
1104 | if (KnownLen == 0) { |
1105 | MI.eraseFromParent(); |
1106 | return true; |
1107 | } |
1108 | |
1109 | if (MaxLen && KnownLen > MaxLen) |
1110 | return false; |
1111 | |
1112 | if (ID == Intrinsic::memcpy) |
1113 | return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile); |
1114 | if (ID == Intrinsic::memmove) |
1115 | return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile); |
1116 | if (ID == Intrinsic::memset) |
1117 | return optimizeMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile); |
1118 | return false; |
1119 | } |
1120 | |
1121 | bool CombinerHelper::tryCombine(MachineInstr &MI) { |
1122 | if (tryCombineCopy(MI)) |
1123 | return true; |
1124 | if (tryCombineExtendingLoads(MI)) |
1125 | return true; |
1126 | if (tryCombineIndexedLoadStore(MI)) |
1127 | return true; |
1128 | return false; |
1129 | } |