LLVM  6.0.0svn
SeparateConstOffsetFromGEP.cpp
Go to the documentation of this file.
1 //===- SeparateConstOffsetFromGEP.cpp -------------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Loop unrolling may create many similar GEPs for array accesses.
11 // e.g., a 2-level loop
12 //
13 // float a[32][32]; // global variable
14 //
15 // for (int i = 0; i < 2; ++i) {
16 // for (int j = 0; j < 2; ++j) {
17 // ...
18 // ... = a[x + i][y + j];
19 // ...
20 // }
21 // }
22 //
23 // will probably be unrolled to:
24 //
25 // gep %a, 0, %x, %y; load
26 // gep %a, 0, %x, %y + 1; load
27 // gep %a, 0, %x + 1, %y; load
28 // gep %a, 0, %x + 1, %y + 1; load
29 //
30 // LLVM's GVN does not use partial redundancy elimination yet, and is thus
31 // unable to reuse (gep %a, 0, %x, %y). As a result, this misoptimization incurs
32 // significant slowdown in targets with limited addressing modes. For instance,
33 // because the PTX target does not support the reg+reg addressing mode, the
34 // NVPTX backend emits PTX code that literally computes the pointer address of
35 // each GEP, wasting tons of registers. It emits the following PTX for the
36 // first load and similar PTX for other loads.
37 //
38 // mov.u32 %r1, %x;
39 // mov.u32 %r2, %y;
40 // mul.wide.u32 %rl2, %r1, 128;
41 // mov.u64 %rl3, a;
42 // add.s64 %rl4, %rl3, %rl2;
43 // mul.wide.u32 %rl5, %r2, 4;
44 // add.s64 %rl6, %rl4, %rl5;
45 // ld.global.f32 %f1, [%rl6];
46 //
47 // To reduce the register pressure, the optimization implemented in this file
48 // merges the common part of a group of GEPs, so we can compute each pointer
49 // address by adding a simple offset to the common part, saving many registers.
50 //
51 // It works by splitting each GEP into a variadic base and a constant offset.
52 // The variadic base can be computed once and reused by multiple GEPs, and the
53 // constant offsets can be nicely folded into the reg+immediate addressing mode
54 // (supported by most targets) without using any extra register.
55 //
56 // For instance, we transform the four GEPs and four loads in the above example
57 // into:
58 //
59 // base = gep a, 0, x, y
60 // load base
61 // laod base + 1 * sizeof(float)
62 // load base + 32 * sizeof(float)
63 // load base + 33 * sizeof(float)
64 //
65 // Given the transformed IR, a backend that supports the reg+immediate
66 // addressing mode can easily fold the pointer arithmetics into the loads. For
67 // example, the NVPTX backend can easily fold the pointer arithmetics into the
68 // ld.global.f32 instructions, and the resultant PTX uses much fewer registers.
69 //
70 // mov.u32 %r1, %tid.x;
71 // mov.u32 %r2, %tid.y;
72 // mul.wide.u32 %rl2, %r1, 128;
73 // mov.u64 %rl3, a;
74 // add.s64 %rl4, %rl3, %rl2;
75 // mul.wide.u32 %rl5, %r2, 4;
76 // add.s64 %rl6, %rl4, %rl5;
77 // ld.global.f32 %f1, [%rl6]; // so far the same as unoptimized PTX
78 // ld.global.f32 %f2, [%rl6+4]; // much better
79 // ld.global.f32 %f3, [%rl6+128]; // much better
80 // ld.global.f32 %f4, [%rl6+132]; // much better
81 //
82 // Another improvement enabled by the LowerGEP flag is to lower a GEP with
83 // multiple indices to either multiple GEPs with a single index or arithmetic
84 // operations (depending on whether the target uses alias analysis in codegen).
85 // Such transformation can have following benefits:
86 // (1) It can always extract constants in the indices of structure type.
87 // (2) After such Lowering, there are more optimization opportunities such as
88 // CSE, LICM and CGP.
89 //
90 // E.g. The following GEPs have multiple indices:
91 // BB1:
92 // %p = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 3
93 // load %p
94 // ...
95 // BB2:
96 // %p2 = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 2
97 // load %p2
98 // ...
99 //
100 // We can not do CSE for to the common part related to index "i64 %i". Lowering
101 // GEPs can achieve such goals.
102 // If the target does not use alias analysis in codegen, this pass will
103 // lower a GEP with multiple indices into arithmetic operations:
104 // BB1:
105 // %1 = ptrtoint [10 x %struct]* %ptr to i64 ; CSE opportunity
106 // %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity
107 // %3 = add i64 %1, %2 ; CSE opportunity
108 // %4 = mul i64 %j1, length_of_struct
109 // %5 = add i64 %3, %4
110 // %6 = add i64 %3, struct_field_3 ; Constant offset
111 // %p = inttoptr i64 %6 to i32*
112 // load %p
113 // ...
114 // BB2:
115 // %7 = ptrtoint [10 x %struct]* %ptr to i64 ; CSE opportunity
116 // %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity
117 // %9 = add i64 %7, %8 ; CSE opportunity
118 // %10 = mul i64 %j2, length_of_struct
119 // %11 = add i64 %9, %10
120 // %12 = add i64 %11, struct_field_2 ; Constant offset
121 // %p = inttoptr i64 %12 to i32*
122 // load %p2
123 // ...
124 //
125 // If the target uses alias analysis in codegen, this pass will lower a GEP
126 // with multiple indices into multiple GEPs with a single index:
127 // BB1:
128 // %1 = bitcast [10 x %struct]* %ptr to i8* ; CSE opportunity
129 // %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity
130 // %3 = getelementptr i8* %1, i64 %2 ; CSE opportunity
131 // %4 = mul i64 %j1, length_of_struct
132 // %5 = getelementptr i8* %3, i64 %4
133 // %6 = getelementptr i8* %5, struct_field_3 ; Constant offset
134 // %p = bitcast i8* %6 to i32*
135 // load %p
136 // ...
137 // BB2:
138 // %7 = bitcast [10 x %struct]* %ptr to i8* ; CSE opportunity
139 // %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity
140 // %9 = getelementptr i8* %7, i64 %8 ; CSE opportunity
141 // %10 = mul i64 %j2, length_of_struct
142 // %11 = getelementptr i8* %9, i64 %10
143 // %12 = getelementptr i8* %11, struct_field_2 ; Constant offset
144 // %p2 = bitcast i8* %12 to i32*
145 // load %p2
146 // ...
147 //
148 // Lowering GEPs can also benefit other passes such as LICM and CGP.
149 // LICM (Loop Invariant Code Motion) can not hoist/sink a GEP of multiple
150 // indices if one of the index is variant. If we lower such GEP into invariant
151 // parts and variant parts, LICM can hoist/sink those invariant parts.
152 // CGP (CodeGen Prepare) tries to sink address calculations that match the
153 // target's addressing modes. A GEP with multiple indices may not match and will
154 // not be sunk. If we lower such GEP into smaller parts, CGP may sink some of
155 // them. So we end up with a better addressing mode.
156 //
157 //===----------------------------------------------------------------------===//
158 
159 #include "llvm/ADT/APInt.h"
160 #include "llvm/ADT/DenseMap.h"
162 #include "llvm/ADT/SmallVector.h"
163 #include "llvm/Analysis/LoopInfo.h"
170 #include "llvm/IR/BasicBlock.h"
171 #include "llvm/IR/Constant.h"
172 #include "llvm/IR/Constants.h"
173 #include "llvm/IR/DataLayout.h"
174 #include "llvm/IR/DerivedTypes.h"
175 #include "llvm/IR/Dominators.h"
176 #include "llvm/IR/Function.h"
178 #include "llvm/IR/IRBuilder.h"
179 #include "llvm/IR/Instruction.h"
180 #include "llvm/IR/Instructions.h"
181 #include "llvm/IR/Module.h"
182 #include "llvm/IR/PatternMatch.h"
183 #include "llvm/IR/Type.h"
184 #include "llvm/IR/User.h"
185 #include "llvm/IR/Value.h"
186 #include "llvm/Pass.h"
187 #include "llvm/Support/Casting.h"
192 #include "llvm/Transforms/Scalar.h"
194 #include <cassert>
195 #include <cstdint>
196 #include <string>
197 
198 using namespace llvm;
199 using namespace llvm::PatternMatch;
200 
202  "disable-separate-const-offset-from-gep", cl::init(false),
203  cl::desc("Do not separate the constant offset from a GEP instruction"),
204  cl::Hidden);
205 
206 // Setting this flag may emit false positives when the input module already
207 // contains dead instructions. Therefore, we set it only in unit tests that are
208 // free of dead code.
209 static cl::opt<bool>
210  VerifyNoDeadCode("reassociate-geps-verify-no-dead-code", cl::init(false),
211  cl::desc("Verify this pass produces no dead code"),
212  cl::Hidden);
213 
214 namespace {
215 
216 /// \brief A helper class for separating a constant offset from a GEP index.
217 ///
218 /// In real programs, a GEP index may be more complicated than a simple addition
219 /// of something and a constant integer which can be trivially splitted. For
220 /// example, to split ((a << 3) | 5) + b, we need to search deeper for the
221 /// constant offset, so that we can separate the index to (a << 3) + b and 5.
222 ///
223 /// Therefore, this class looks into the expression that computes a given GEP
224 /// index, and tries to find a constant integer that can be hoisted to the
225 /// outermost level of the expression as an addition. Not every constant in an
226 /// expression can jump out. e.g., we cannot transform (b * (a + 5)) to (b * a +
227 /// 5); nor can we transform (3 * (a + 5)) to (3 * a + 5), however in this case,
228 /// -instcombine probably already optimized (3 * (a + 5)) to (3 * a + 15).
229 class ConstantOffsetExtractor {
230 public:
231  /// Extracts a constant offset from the given GEP index. It returns the
232  /// new index representing the remainder (equal to the original index minus
233  /// the constant offset), or nullptr if we cannot extract a constant offset.
234  /// \p Idx The given GEP index
235  /// \p GEP The given GEP
236  /// \p UserChainTail Outputs the tail of UserChain so that we can
237  /// garbage-collect unused instructions in UserChain.
238  static Value *Extract(Value *Idx, GetElementPtrInst *GEP,
239  User *&UserChainTail, const DominatorTree *DT);
240 
241  /// Looks for a constant offset from the given GEP index without extracting
242  /// it. It returns the numeric value of the extracted constant offset (0 if
243  /// failed). The meaning of the arguments are the same as Extract.
244  static int64_t Find(Value *Idx, GetElementPtrInst *GEP,
245  const DominatorTree *DT);
246 
247 private:
248  ConstantOffsetExtractor(Instruction *InsertionPt, const DominatorTree *DT)
249  : IP(InsertionPt), DL(InsertionPt->getModule()->getDataLayout()), DT(DT) {
250  }
251 
252  /// Searches the expression that computes V for a non-zero constant C s.t.
253  /// V can be reassociated into the form V' + C. If the searching is
254  /// successful, returns C and update UserChain as a def-use chain from C to V;
255  /// otherwise, UserChain is empty.
256  ///
257  /// \p V The given expression
258  /// \p SignExtended Whether V will be sign-extended in the computation of the
259  /// GEP index
260  /// \p ZeroExtended Whether V will be zero-extended in the computation of the
261  /// GEP index
262  /// \p NonNegative Whether V is guaranteed to be non-negative. For example,
263  /// an index of an inbounds GEP is guaranteed to be
264  /// non-negative. Levaraging this, we can better split
265  /// inbounds GEPs.
266  APInt find(Value *V, bool SignExtended, bool ZeroExtended, bool NonNegative);
267 
268  /// A helper function to look into both operands of a binary operator.
269  APInt findInEitherOperand(BinaryOperator *BO, bool SignExtended,
270  bool ZeroExtended);
271 
272  /// After finding the constant offset C from the GEP index I, we build a new
273  /// index I' s.t. I' + C = I. This function builds and returns the new
274  /// index I' according to UserChain produced by function "find".
275  ///
276  /// The building conceptually takes two steps:
277  /// 1) iteratively distribute s/zext towards the leaves of the expression tree
278  /// that computes I
279  /// 2) reassociate the expression tree to the form I' + C.
280  ///
281  /// For example, to extract the 5 from sext(a + (b + 5)), we first distribute
282  /// sext to a, b and 5 so that we have
283  /// sext(a) + (sext(b) + 5).
284  /// Then, we reassociate it to
285  /// (sext(a) + sext(b)) + 5.
286  /// Given this form, we know I' is sext(a) + sext(b).
287  Value *rebuildWithoutConstOffset();
288 
289  /// After the first step of rebuilding the GEP index without the constant
290  /// offset, distribute s/zext to the operands of all operators in UserChain.
291  /// e.g., zext(sext(a + (b + 5)) (assuming no overflow) =>
292  /// zext(sext(a)) + (zext(sext(b)) + zext(sext(5))).
293  ///
294  /// The function also updates UserChain to point to new subexpressions after
295  /// distributing s/zext. e.g., the old UserChain of the above example is
296  /// 5 -> b + 5 -> a + (b + 5) -> sext(...) -> zext(sext(...)),
297  /// and the new UserChain is
298  /// zext(sext(5)) -> zext(sext(b)) + zext(sext(5)) ->
299  /// zext(sext(a)) + (zext(sext(b)) + zext(sext(5))
300  ///
301  /// \p ChainIndex The index to UserChain. ChainIndex is initially
302  /// UserChain.size() - 1, and is decremented during
303  /// the recursion.
304  Value *distributeExtsAndCloneChain(unsigned ChainIndex);
305 
306  /// Reassociates the GEP index to the form I' + C and returns I'.
307  Value *removeConstOffset(unsigned ChainIndex);
308 
309  /// A helper function to apply ExtInsts, a list of s/zext, to value V.
310  /// e.g., if ExtInsts = [sext i32 to i64, zext i16 to i32], this function
311  /// returns "sext i32 (zext i16 V to i32) to i64".
312  Value *applyExts(Value *V);
313 
314  /// A helper function that returns whether we can trace into the operands
315  /// of binary operator BO for a constant offset.
316  ///
317  /// \p SignExtended Whether BO is surrounded by sext
318  /// \p ZeroExtended Whether BO is surrounded by zext
319  /// \p NonNegative Whether BO is known to be non-negative, e.g., an in-bound
320  /// array index.
321  bool CanTraceInto(bool SignExtended, bool ZeroExtended, BinaryOperator *BO,
322  bool NonNegative);
323 
324  /// The path from the constant offset to the old GEP index. e.g., if the GEP
325  /// index is "a * b + (c + 5)". After running function find, UserChain[0] will
326  /// be the constant 5, UserChain[1] will be the subexpression "c + 5", and
327  /// UserChain[2] will be the entire expression "a * b + (c + 5)".
328  ///
329  /// This path helps to rebuild the new GEP index.
330  SmallVector<User *, 8> UserChain;
331 
332  /// A data structure used in rebuildWithoutConstOffset. Contains all
333  /// sext/zext instructions along UserChain.
335 
336  /// Insertion position of cloned instructions.
337  Instruction *IP;
338 
339  const DataLayout &DL;
340  const DominatorTree *DT;
341 };
342 
343 /// \brief A pass that tries to split every GEP in the function into a variadic
344 /// base and a constant offset. It is a FunctionPass because searching for the
345 /// constant offset may inspect other basic blocks.
346 class SeparateConstOffsetFromGEP : public FunctionPass {
347 public:
348  static char ID;
349 
350  SeparateConstOffsetFromGEP(const TargetMachine *TM = nullptr,
351  bool LowerGEP = false)
352  : FunctionPass(ID), TM(TM), LowerGEP(LowerGEP) {
354  }
355 
356  void getAnalysisUsage(AnalysisUsage &AU) const override {
361  AU.setPreservesCFG();
363  }
364 
365  bool doInitialization(Module &M) override {
366  DL = &M.getDataLayout();
367  return false;
368  }
369 
370  bool runOnFunction(Function &F) override;
371 
372 private:
373  /// Tries to split the given GEP into a variadic base and a constant offset,
374  /// and returns true if the splitting succeeds.
375  bool splitGEP(GetElementPtrInst *GEP);
376 
377  /// Lower a GEP with multiple indices into multiple GEPs with a single index.
378  /// Function splitGEP already split the original GEP into a variadic part and
379  /// a constant offset (i.e., AccumulativeByteOffset). This function lowers the
380  /// variadic part into a set of GEPs with a single index and applies
381  /// AccumulativeByteOffset to it.
382  /// \p Variadic The variadic part of the original GEP.
383  /// \p AccumulativeByteOffset The constant offset.
384  void lowerToSingleIndexGEPs(GetElementPtrInst *Variadic,
385  int64_t AccumulativeByteOffset);
386 
387  /// Lower a GEP with multiple indices into ptrtoint+arithmetics+inttoptr form.
388  /// Function splitGEP already split the original GEP into a variadic part and
389  /// a constant offset (i.e., AccumulativeByteOffset). This function lowers the
390  /// variadic part into a set of arithmetic operations and applies
391  /// AccumulativeByteOffset to it.
392  /// \p Variadic The variadic part of the original GEP.
393  /// \p AccumulativeByteOffset The constant offset.
394  void lowerToArithmetics(GetElementPtrInst *Variadic,
395  int64_t AccumulativeByteOffset);
396 
397  /// Finds the constant offset within each index and accumulates them. If
398  /// LowerGEP is true, it finds in indices of both sequential and structure
399  /// types, otherwise it only finds in sequential indices. The output
400  /// NeedsExtraction indicates whether we successfully find a non-zero constant
401  /// offset.
402  int64_t accumulateByteOffset(GetElementPtrInst *GEP, bool &NeedsExtraction);
403 
404  /// Canonicalize array indices to pointer-size integers. This helps to
405  /// simplify the logic of splitting a GEP. For example, if a + b is a
406  /// pointer-size integer, we have
407  /// gep base, a + b = gep (gep base, a), b
408  /// However, this equality may not hold if the size of a + b is smaller than
409  /// the pointer size, because LLVM conceptually sign-extends GEP indices to
410  /// pointer size before computing the address
411  /// (http://llvm.org/docs/LangRef.html#id181).
412  ///
413  /// This canonicalization is very likely already done in clang and
414  /// instcombine. Therefore, the program will probably remain the same.
415  ///
416  /// Returns true if the module changes.
417  ///
418  /// Verified in @i32_add in split-gep.ll
419  bool canonicalizeArrayIndicesToPointerSize(GetElementPtrInst *GEP);
420 
421  /// Optimize sext(a)+sext(b) to sext(a+b) when a+b can't sign overflow.
422  /// SeparateConstOffsetFromGEP distributes a sext to leaves before extracting
423  /// the constant offset. After extraction, it becomes desirable to reunion the
424  /// distributed sexts. For example,
425  ///
426  /// &a[sext(i +nsw (j +nsw 5)]
427  /// => distribute &a[sext(i) +nsw (sext(j) +nsw 5)]
428  /// => constant extraction &a[sext(i) + sext(j)] + 5
429  /// => reunion &a[sext(i +nsw j)] + 5
430  bool reuniteExts(Function &F);
431 
432  /// A helper that reunites sexts in an instruction.
433  bool reuniteExts(Instruction *I);
434 
435  /// Find the closest dominator of <Dominatee> that is equivalent to <Key>.
436  Instruction *findClosestMatchingDominator(const SCEV *Key,
437  Instruction *Dominatee);
438  /// Verify F is free of dead code.
439  void verifyNoDeadCode(Function &F);
440 
441  bool hasMoreThanOneUseInLoop(Value *v, Loop *L);
442 
443  // Swap the index operand of two GEP.
444  void swapGEPOperand(GetElementPtrInst *First, GetElementPtrInst *Second);
445 
446  // Check if it is safe to swap operand of two GEP.
447  bool isLegalToSwapOperand(GetElementPtrInst *First, GetElementPtrInst *Second,
448  Loop *CurLoop);
449 
450  const DataLayout *DL = nullptr;
451  DominatorTree *DT = nullptr;
452  ScalarEvolution *SE;
453  const TargetMachine *TM;
454 
455  LoopInfo *LI;
456  TargetLibraryInfo *TLI;
457 
458  /// Whether to lower a GEP with multiple indices into arithmetic operations or
459  /// multiple GEPs with a single index.
460  bool LowerGEP;
461 
463 };
464 
465 } // end anonymous namespace
466 
468 
470  SeparateConstOffsetFromGEP, "separate-const-offset-from-gep",
471  "Split GEPs to a variadic base and a constant offset for better CSE", false,
472  false)
479  SeparateConstOffsetFromGEP, "separate-const-offset-from-gep",
480  "Split GEPs to a variadic base and a constant offset for better CSE", false,
481  false)
482 
483 FunctionPass *
485  bool LowerGEP) {
486  return new SeparateConstOffsetFromGEP(TM, LowerGEP);
487 }
488 
489 bool ConstantOffsetExtractor::CanTraceInto(bool SignExtended,
490  bool ZeroExtended,
491  BinaryOperator *BO,
492  bool NonNegative) {
493  // We only consider ADD, SUB and OR, because a non-zero constant found in
494  // expressions composed of these operations can be easily hoisted as a
495  // constant offset by reassociation.
496  if (BO->getOpcode() != Instruction::Add &&
497  BO->getOpcode() != Instruction::Sub &&
498  BO->getOpcode() != Instruction::Or) {
499  return false;
500  }
501 
502  Value *LHS = BO->getOperand(0), *RHS = BO->getOperand(1);
503  // Do not trace into "or" unless it is equivalent to "add". If LHS and RHS
504  // don't have common bits, (LHS | RHS) is equivalent to (LHS + RHS).
505  if (BO->getOpcode() == Instruction::Or &&
506  !haveNoCommonBitsSet(LHS, RHS, DL, nullptr, BO, DT))
507  return false;
508 
509  // In addition, tracing into BO requires that its surrounding s/zext (if
510  // any) is distributable to both operands.
511  //
512  // Suppose BO = A op B.
513  // SignExtended | ZeroExtended | Distributable?
514  // --------------+--------------+----------------------------------
515  // 0 | 0 | true because no s/zext exists
516  // 0 | 1 | zext(BO) == zext(A) op zext(B)
517  // 1 | 0 | sext(BO) == sext(A) op sext(B)
518  // 1 | 1 | zext(sext(BO)) ==
519  // | | zext(sext(A)) op zext(sext(B))
520  if (BO->getOpcode() == Instruction::Add && !ZeroExtended && NonNegative) {
521  // If a + b >= 0 and (a >= 0 or b >= 0), then
522  // sext(a + b) = sext(a) + sext(b)
523  // even if the addition is not marked nsw.
524  //
525  // Leveraging this invarient, we can trace into an sext'ed inbound GEP
526  // index if the constant offset is non-negative.
527  //
528  // Verified in @sext_add in split-gep.ll.
529  if (ConstantInt *ConstLHS = dyn_cast<ConstantInt>(LHS)) {
530  if (!ConstLHS->isNegative())
531  return true;
532  }
533  if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(RHS)) {
534  if (!ConstRHS->isNegative())
535  return true;
536  }
537  }
538 
539  // sext (add/sub nsw A, B) == add/sub nsw (sext A), (sext B)
540  // zext (add/sub nuw A, B) == add/sub nuw (zext A), (zext B)
541  if (BO->getOpcode() == Instruction::Add ||
542  BO->getOpcode() == Instruction::Sub) {
543  if (SignExtended && !BO->hasNoSignedWrap())
544  return false;
545  if (ZeroExtended && !BO->hasNoUnsignedWrap())
546  return false;
547  }
548 
549  return true;
550 }
551 
552 APInt ConstantOffsetExtractor::findInEitherOperand(BinaryOperator *BO,
553  bool SignExtended,
554  bool ZeroExtended) {
555  // BO being non-negative does not shed light on whether its operands are
556  // non-negative. Clear the NonNegative flag here.
557  APInt ConstantOffset = find(BO->getOperand(0), SignExtended, ZeroExtended,
558  /* NonNegative */ false);
559  // If we found a constant offset in the left operand, stop and return that.
560  // This shortcut might cause us to miss opportunities of combining the
561  // constant offsets in both operands, e.g., (a + 4) + (b + 5) => (a + b) + 9.
562  // However, such cases are probably already handled by -instcombine,
563  // given this pass runs after the standard optimizations.
564  if (ConstantOffset != 0) return ConstantOffset;
565  ConstantOffset = find(BO->getOperand(1), SignExtended, ZeroExtended,
566  /* NonNegative */ false);
567  // If U is a sub operator, negate the constant offset found in the right
568  // operand.
569  if (BO->getOpcode() == Instruction::Sub)
570  ConstantOffset = -ConstantOffset;
571  return ConstantOffset;
572 }
573 
574 APInt ConstantOffsetExtractor::find(Value *V, bool SignExtended,
575  bool ZeroExtended, bool NonNegative) {
576  // TODO(jingyue): We could trace into integer/pointer casts, such as
577  // inttoptr, ptrtoint, bitcast, and addrspacecast. We choose to handle only
578  // integers because it gives good enough results for our benchmarks.
579  unsigned BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
580 
581  // We cannot do much with Values that are not a User, such as an Argument.
582  User *U = dyn_cast<User>(V);
583  if (U == nullptr) return APInt(BitWidth, 0);
584 
585  APInt ConstantOffset(BitWidth, 0);
586  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
587  // Hooray, we found it!
588  ConstantOffset = CI->getValue();
589  } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V)) {
590  // Trace into subexpressions for more hoisting opportunities.
591  if (CanTraceInto(SignExtended, ZeroExtended, BO, NonNegative))
592  ConstantOffset = findInEitherOperand(BO, SignExtended, ZeroExtended);
593  } else if (isa<SExtInst>(V)) {
594  ConstantOffset = find(U->getOperand(0), /* SignExtended */ true,
595  ZeroExtended, NonNegative).sext(BitWidth);
596  } else if (isa<ZExtInst>(V)) {
597  // As an optimization, we can clear the SignExtended flag because
598  // sext(zext(a)) = zext(a). Verified in @sext_zext in split-gep.ll.
599  //
600  // Clear the NonNegative flag, because zext(a) >= 0 does not imply a >= 0.
601  ConstantOffset =
602  find(U->getOperand(0), /* SignExtended */ false,
603  /* ZeroExtended */ true, /* NonNegative */ false).zext(BitWidth);
604  }
605 
606  // If we found a non-zero constant offset, add it to the path for
607  // rebuildWithoutConstOffset. Zero is a valid constant offset, but doesn't
608  // help this optimization.
609  if (ConstantOffset != 0)
610  UserChain.push_back(U);
611  return ConstantOffset;
612 }
613 
614 Value *ConstantOffsetExtractor::applyExts(Value *V) {
615  Value *Current = V;
616  // ExtInsts is built in the use-def order. Therefore, we apply them to V
617  // in the reversed order.
618  for (auto I = ExtInsts.rbegin(), E = ExtInsts.rend(); I != E; ++I) {
619  if (Constant *C = dyn_cast<Constant>(Current)) {
620  // If Current is a constant, apply s/zext using ConstantExpr::getCast.
621  // ConstantExpr::getCast emits a ConstantInt if C is a ConstantInt.
622  Current = ConstantExpr::getCast((*I)->getOpcode(), C, (*I)->getType());
623  } else {
624  Instruction *Ext = (*I)->clone();
625  Ext->setOperand(0, Current);
626  Ext->insertBefore(IP);
627  Current = Ext;
628  }
629  }
630  return Current;
631 }
632 
633 Value *ConstantOffsetExtractor::rebuildWithoutConstOffset() {
634  distributeExtsAndCloneChain(UserChain.size() - 1);
635  // Remove all nullptrs (used to be s/zext) from UserChain.
636  unsigned NewSize = 0;
637  for (User *I : UserChain) {
638  if (I != nullptr) {
639  UserChain[NewSize] = I;
640  NewSize++;
641  }
642  }
643  UserChain.resize(NewSize);
644  return removeConstOffset(UserChain.size() - 1);
645 }
646 
647 Value *
648 ConstantOffsetExtractor::distributeExtsAndCloneChain(unsigned ChainIndex) {
649  User *U = UserChain[ChainIndex];
650  if (ChainIndex == 0) {
651  assert(isa<ConstantInt>(U));
652  // If U is a ConstantInt, applyExts will return a ConstantInt as well.
653  return UserChain[ChainIndex] = cast<ConstantInt>(applyExts(U));
654  }
655 
656  if (CastInst *Cast = dyn_cast<CastInst>(U)) {
657  assert((isa<SExtInst>(Cast) || isa<ZExtInst>(Cast)) &&
658  "We only traced into two types of CastInst: sext and zext");
659  ExtInsts.push_back(Cast);
660  UserChain[ChainIndex] = nullptr;
661  return distributeExtsAndCloneChain(ChainIndex - 1);
662  }
663 
664  // Function find only trace into BinaryOperator and CastInst.
665  BinaryOperator *BO = cast<BinaryOperator>(U);
666  // OpNo = which operand of BO is UserChain[ChainIndex - 1]
667  unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1);
668  Value *TheOther = applyExts(BO->getOperand(1 - OpNo));
669  Value *NextInChain = distributeExtsAndCloneChain(ChainIndex - 1);
670 
671  BinaryOperator *NewBO = nullptr;
672  if (OpNo == 0) {
673  NewBO = BinaryOperator::Create(BO->getOpcode(), NextInChain, TheOther,
674  BO->getName(), IP);
675  } else {
676  NewBO = BinaryOperator::Create(BO->getOpcode(), TheOther, NextInChain,
677  BO->getName(), IP);
678  }
679  return UserChain[ChainIndex] = NewBO;
680 }
681 
682 Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) {
683  if (ChainIndex == 0) {
684  assert(isa<ConstantInt>(UserChain[ChainIndex]));
685  return ConstantInt::getNullValue(UserChain[ChainIndex]->getType());
686  }
687 
688  BinaryOperator *BO = cast<BinaryOperator>(UserChain[ChainIndex]);
689  assert(BO->getNumUses() <= 1 &&
690  "distributeExtsAndCloneChain clones each BinaryOperator in "
691  "UserChain, so no one should be used more than "
692  "once");
693 
694  unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1);
695  assert(BO->getOperand(OpNo) == UserChain[ChainIndex - 1]);
696  Value *NextInChain = removeConstOffset(ChainIndex - 1);
697  Value *TheOther = BO->getOperand(1 - OpNo);
698 
699  // If NextInChain is 0 and not the LHS of a sub, we can simplify the
700  // sub-expression to be just TheOther.
701  if (ConstantInt *CI = dyn_cast<ConstantInt>(NextInChain)) {
702  if (CI->isZero() && !(BO->getOpcode() == Instruction::Sub && OpNo == 0))
703  return TheOther;
704  }
705 
706  BinaryOperator::BinaryOps NewOp = BO->getOpcode();
707  if (BO->getOpcode() == Instruction::Or) {
708  // Rebuild "or" as "add", because "or" may be invalid for the new
709  // epxression.
710  //
711  // For instance, given
712  // a | (b + 5) where a and b + 5 have no common bits,
713  // we can extract 5 as the constant offset.
714  //
715  // However, reusing the "or" in the new index would give us
716  // (a | b) + 5
717  // which does not equal a | (b + 5).
718  //
719  // Replacing the "or" with "add" is fine, because
720  // a | (b + 5) = a + (b + 5) = (a + b) + 5
721  NewOp = Instruction::Add;
722  }
723 
724  BinaryOperator *NewBO;
725  if (OpNo == 0) {
726  NewBO = BinaryOperator::Create(NewOp, NextInChain, TheOther, "", IP);
727  } else {
728  NewBO = BinaryOperator::Create(NewOp, TheOther, NextInChain, "", IP);
729  }
730  NewBO->takeName(BO);
731  return NewBO;
732 }
733 
734 Value *ConstantOffsetExtractor::Extract(Value *Idx, GetElementPtrInst *GEP,
735  User *&UserChainTail,
736  const DominatorTree *DT) {
737  ConstantOffsetExtractor Extractor(GEP, DT);
738  // Find a non-zero constant offset first.
739  APInt ConstantOffset =
740  Extractor.find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
741  GEP->isInBounds());
742  if (ConstantOffset == 0) {
743  UserChainTail = nullptr;
744  return nullptr;
745  }
746  // Separates the constant offset from the GEP index.
747  Value *IdxWithoutConstOffset = Extractor.rebuildWithoutConstOffset();
748  UserChainTail = Extractor.UserChain.back();
749  return IdxWithoutConstOffset;
750 }
751 
753  const DominatorTree *DT) {
754  // If Idx is an index of an inbound GEP, Idx is guaranteed to be non-negative.
755  return ConstantOffsetExtractor(GEP, DT)
756  .find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
757  GEP->isInBounds())
758  .getSExtValue();
759 }
760 
761 bool SeparateConstOffsetFromGEP::canonicalizeArrayIndicesToPointerSize(
762  GetElementPtrInst *GEP) {
763  bool Changed = false;
764  Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
765  gep_type_iterator GTI = gep_type_begin(*GEP);
766  for (User::op_iterator I = GEP->op_begin() + 1, E = GEP->op_end();
767  I != E; ++I, ++GTI) {
768  // Skip struct member indices which must be i32.
769  if (GTI.isSequential()) {
770  if ((*I)->getType() != IntPtrTy) {
771  *I = CastInst::CreateIntegerCast(*I, IntPtrTy, true, "idxprom", GEP);
772  Changed = true;
773  }
774  }
775  }
776  return Changed;
777 }
778 
779 int64_t
780 SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP,
781  bool &NeedsExtraction) {
782  NeedsExtraction = false;
783  int64_t AccumulativeByteOffset = 0;
784  gep_type_iterator GTI = gep_type_begin(*GEP);
785  for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
786  if (GTI.isSequential()) {
787  // Tries to extract a constant offset from this GEP index.
788  int64_t ConstantOffset =
790  if (ConstantOffset != 0) {
791  NeedsExtraction = true;
792  // A GEP may have multiple indices. We accumulate the extracted
793  // constant offset to a byte offset, and later offset the remainder of
794  // the original GEP with this byte offset.
795  AccumulativeByteOffset +=
796  ConstantOffset * DL->getTypeAllocSize(GTI.getIndexedType());
797  }
798  } else if (LowerGEP) {
799  StructType *StTy = GTI.getStructType();
800  uint64_t Field = cast<ConstantInt>(GEP->getOperand(I))->getZExtValue();
801  // Skip field 0 as the offset is always 0.
802  if (Field != 0) {
803  NeedsExtraction = true;
804  AccumulativeByteOffset +=
805  DL->getStructLayout(StTy)->getElementOffset(Field);
806  }
807  }
808  }
809  return AccumulativeByteOffset;
810 }
811 
812 void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
813  GetElementPtrInst *Variadic, int64_t AccumulativeByteOffset) {
814  IRBuilder<> Builder(Variadic);
815  Type *IntPtrTy = DL->getIntPtrType(Variadic->getType());
816 
817  Type *I8PtrTy =
818  Builder.getInt8PtrTy(Variadic->getType()->getPointerAddressSpace());
819  Value *ResultPtr = Variadic->getOperand(0);
820  Loop *L = LI->getLoopFor(Variadic->getParent());
821  // Check if the base is not loop invariant or used more than once.
822  bool isSwapCandidate =
823  L && L->isLoopInvariant(ResultPtr) &&
824  !hasMoreThanOneUseInLoop(ResultPtr, L);
825  Value *FirstResult = nullptr;
826 
827  if (ResultPtr->getType() != I8PtrTy)
828  ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
829 
830  gep_type_iterator GTI = gep_type_begin(*Variadic);
831  // Create an ugly GEP for each sequential index. We don't create GEPs for
832  // structure indices, as they are accumulated in the constant offset index.
833  for (unsigned I = 1, E = Variadic->getNumOperands(); I != E; ++I, ++GTI) {
834  if (GTI.isSequential()) {
835  Value *Idx = Variadic->getOperand(I);
836  // Skip zero indices.
837  if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx))
838  if (CI->isZero())
839  continue;
840 
841  APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(),
842  DL->getTypeAllocSize(GTI.getIndexedType()));
843  // Scale the index by element size.
844  if (ElementSize != 1) {
845  if (ElementSize.isPowerOf2()) {
846  Idx = Builder.CreateShl(
847  Idx, ConstantInt::get(IntPtrTy, ElementSize.logBase2()));
848  } else {
849  Idx = Builder.CreateMul(Idx, ConstantInt::get(IntPtrTy, ElementSize));
850  }
851  }
852  // Create an ugly GEP with a single index for each index.
853  ResultPtr =
854  Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Idx, "uglygep");
855  if (FirstResult == nullptr)
856  FirstResult = ResultPtr;
857  }
858  }
859 
860  // Create a GEP with the constant offset index.
861  if (AccumulativeByteOffset != 0) {
862  Value *Offset = ConstantInt::get(IntPtrTy, AccumulativeByteOffset);
863  ResultPtr =
864  Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Offset, "uglygep");
865  } else
866  isSwapCandidate = false;
867 
868  // If we created a GEP with constant index, and the base is loop invariant,
869  // then we swap the first one with it, so LICM can move constant GEP out
870  // later.
871  GetElementPtrInst *FirstGEP = dyn_cast_or_null<GetElementPtrInst>(FirstResult);
872  GetElementPtrInst *SecondGEP = dyn_cast_or_null<GetElementPtrInst>(ResultPtr);
873  if (isSwapCandidate && isLegalToSwapOperand(FirstGEP, SecondGEP, L))
874  swapGEPOperand(FirstGEP, SecondGEP);
875 
876  if (ResultPtr->getType() != Variadic->getType())
877  ResultPtr = Builder.CreateBitCast(ResultPtr, Variadic->getType());
878 
879  Variadic->replaceAllUsesWith(ResultPtr);
880  Variadic->eraseFromParent();
881 }
882 
883 void
884 SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic,
885  int64_t AccumulativeByteOffset) {
886  IRBuilder<> Builder(Variadic);
887  Type *IntPtrTy = DL->getIntPtrType(Variadic->getType());
888 
889  Value *ResultPtr = Builder.CreatePtrToInt(Variadic->getOperand(0), IntPtrTy);
890  gep_type_iterator GTI = gep_type_begin(*Variadic);
891  // Create ADD/SHL/MUL arithmetic operations for each sequential indices. We
892  // don't create arithmetics for structure indices, as they are accumulated
893  // in the constant offset index.
894  for (unsigned I = 1, E = Variadic->getNumOperands(); I != E; ++I, ++GTI) {
895  if (GTI.isSequential()) {
896  Value *Idx = Variadic->getOperand(I);
897  // Skip zero indices.
898  if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx))
899  if (CI->isZero())
900  continue;
901 
902  APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(),
903  DL->getTypeAllocSize(GTI.getIndexedType()));
904  // Scale the index by element size.
905  if (ElementSize != 1) {
906  if (ElementSize.isPowerOf2()) {
907  Idx = Builder.CreateShl(
908  Idx, ConstantInt::get(IntPtrTy, ElementSize.logBase2()));
909  } else {
910  Idx = Builder.CreateMul(Idx, ConstantInt::get(IntPtrTy, ElementSize));
911  }
912  }
913  // Create an ADD for each index.
914  ResultPtr = Builder.CreateAdd(ResultPtr, Idx);
915  }
916  }
917 
918  // Create an ADD for the constant offset index.
919  if (AccumulativeByteOffset != 0) {
920  ResultPtr = Builder.CreateAdd(
921  ResultPtr, ConstantInt::get(IntPtrTy, AccumulativeByteOffset));
922  }
923 
924  ResultPtr = Builder.CreateIntToPtr(ResultPtr, Variadic->getType());
925  Variadic->replaceAllUsesWith(ResultPtr);
926  Variadic->eraseFromParent();
927 }
928 
929 bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
930  // Skip vector GEPs.
931  if (GEP->getType()->isVectorTy())
932  return false;
933 
934  // The backend can already nicely handle the case where all indices are
935  // constant.
936  if (GEP->hasAllConstantIndices())
937  return false;
938 
939  bool Changed = canonicalizeArrayIndicesToPointerSize(GEP);
940 
941  bool NeedsExtraction;
942  int64_t AccumulativeByteOffset = accumulateByteOffset(GEP, NeedsExtraction);
943 
944  if (!NeedsExtraction)
945  return Changed;
946  // If LowerGEP is disabled, before really splitting the GEP, check whether the
947  // backend supports the addressing mode we are about to produce. If no, this
948  // splitting probably won't be beneficial.
949  // If LowerGEP is enabled, even the extracted constant offset can not match
950  // the addressing mode, we can still do optimizations to other lowered parts
951  // of variable indices. Therefore, we don't check for addressing modes in that
952  // case.
953  if (!LowerGEP) {
954  TargetTransformInfo &TTI =
955  getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
956  *GEP->getParent()->getParent());
957  unsigned AddrSpace = GEP->getPointerAddressSpace();
959  /*BaseGV=*/nullptr, AccumulativeByteOffset,
960  /*HasBaseReg=*/true, /*Scale=*/0,
961  AddrSpace)) {
962  return Changed;
963  }
964  }
965 
966  // Remove the constant offset in each sequential index. The resultant GEP
967  // computes the variadic base.
968  // Notice that we don't remove struct field indices here. If LowerGEP is
969  // disabled, a structure index is not accumulated and we still use the old
970  // one. If LowerGEP is enabled, a structure index is accumulated in the
971  // constant offset. LowerToSingleIndexGEPs or lowerToArithmetics will later
972  // handle the constant offset and won't need a new structure index.
973  gep_type_iterator GTI = gep_type_begin(*GEP);
974  for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
975  if (GTI.isSequential()) {
976  // Splits this GEP index into a variadic part and a constant offset, and
977  // uses the variadic part as the new index.
978  Value *OldIdx = GEP->getOperand(I);
979  User *UserChainTail;
980  Value *NewIdx =
981  ConstantOffsetExtractor::Extract(OldIdx, GEP, UserChainTail, DT);
982  if (NewIdx != nullptr) {
983  // Switches to the index with the constant offset removed.
984  GEP->setOperand(I, NewIdx);
985  // After switching to the new index, we can garbage-collect UserChain
986  // and the old index if they are not used.
989  }
990  }
991  }
992 
993  // Clear the inbounds attribute because the new index may be off-bound.
994  // e.g.,
995  //
996  // b = add i64 a, 5
997  // addr = gep inbounds float, float* p, i64 b
998  //
999  // is transformed to:
1000  //
1001  // addr2 = gep float, float* p, i64 a ; inbounds removed
1002  // addr = gep inbounds float, float* addr2, i64 5
1003  //
1004  // If a is -4, although the old index b is in bounds, the new index a is
1005  // off-bound. http://llvm.org/docs/LangRef.html#id181 says "if the
1006  // inbounds keyword is not present, the offsets are added to the base
1007  // address with silently-wrapping two's complement arithmetic".
1008  // Therefore, the final code will be a semantically equivalent.
1009  //
1010  // TODO(jingyue): do some range analysis to keep as many inbounds as
1011  // possible. GEPs with inbounds are more friendly to alias analysis.
1012  bool GEPWasInBounds = GEP->isInBounds();
1013  GEP->setIsInBounds(false);
1014 
1015  // Lowers a GEP to either GEPs with a single index or arithmetic operations.
1016  if (LowerGEP) {
1017  // As currently BasicAA does not analyze ptrtoint/inttoptr, do not lower to
1018  // arithmetic operations if the target uses alias analysis in codegen.
1019  if (TM && TM->getSubtargetImpl(*GEP->getParent()->getParent())->useAA())
1020  lowerToSingleIndexGEPs(GEP, AccumulativeByteOffset);
1021  else
1022  lowerToArithmetics(GEP, AccumulativeByteOffset);
1023  return true;
1024  }
1025 
1026  // No need to create another GEP if the accumulative byte offset is 0.
1027  if (AccumulativeByteOffset == 0)
1028  return true;
1029 
1030  // Offsets the base with the accumulative byte offset.
1031  //
1032  // %gep ; the base
1033  // ... %gep ...
1034  //
1035  // => add the offset
1036  //
1037  // %gep2 ; clone of %gep
1038  // %new.gep = gep %gep2, <offset / sizeof(*%gep)>
1039  // %gep ; will be removed
1040  // ... %gep ...
1041  //
1042  // => replace all uses of %gep with %new.gep and remove %gep
1043  //
1044  // %gep2 ; clone of %gep
1045  // %new.gep = gep %gep2, <offset / sizeof(*%gep)>
1046  // ... %new.gep ...
1047  //
1048  // If AccumulativeByteOffset is not a multiple of sizeof(*%gep), we emit an
1049  // uglygep (http://llvm.org/docs/GetElementPtr.html#what-s-an-uglygep):
1050  // bitcast %gep2 to i8*, add the offset, and bitcast the result back to the
1051  // type of %gep.
1052  //
1053  // %gep2 ; clone of %gep
1054  // %0 = bitcast %gep2 to i8*
1055  // %uglygep = gep %0, <offset>
1056  // %new.gep = bitcast %uglygep to <type of %gep>
1057  // ... %new.gep ...
1058  Instruction *NewGEP = GEP->clone();
1059  NewGEP->insertBefore(GEP);
1060 
1061  // Per ANSI C standard, signed / unsigned = unsigned and signed % unsigned =
1062  // unsigned.. Therefore, we cast ElementTypeSizeOfGEP to signed because it is
1063  // used with unsigned integers later.
1064  int64_t ElementTypeSizeOfGEP = static_cast<int64_t>(
1065  DL->getTypeAllocSize(GEP->getResultElementType()));
1066  Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
1067  if (AccumulativeByteOffset % ElementTypeSizeOfGEP == 0) {
1068  // Very likely. As long as %gep is natually aligned, the byte offset we
1069  // extracted should be a multiple of sizeof(*%gep).
1070  int64_t Index = AccumulativeByteOffset / ElementTypeSizeOfGEP;
1071  NewGEP = GetElementPtrInst::Create(GEP->getResultElementType(), NewGEP,
1072  ConstantInt::get(IntPtrTy, Index, true),
1073  GEP->getName(), GEP);
1074  // Inherit the inbounds attribute of the original GEP.
1075  cast<GetElementPtrInst>(NewGEP)->setIsInBounds(GEPWasInBounds);
1076  } else {
1077  // Unlikely but possible. For example,
1078  // #pragma pack(1)
1079  // struct S {
1080  // int a[3];
1081  // int64 b[8];
1082  // };
1083  // #pragma pack()
1084  //
1085  // Suppose the gep before extraction is &s[i + 1].b[j + 3]. After
1086  // extraction, it becomes &s[i].b[j] and AccumulativeByteOffset is
1087  // sizeof(S) + 3 * sizeof(int64) = 100, which is not a multiple of
1088  // sizeof(int64).
1089  //
1090  // Emit an uglygep in this case.
1091  Type *I8PtrTy = Type::getInt8PtrTy(GEP->getContext(),
1092  GEP->getPointerAddressSpace());
1093  NewGEP = new BitCastInst(NewGEP, I8PtrTy, "", GEP);
1094  NewGEP = GetElementPtrInst::Create(
1095  Type::getInt8Ty(GEP->getContext()), NewGEP,
1096  ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true), "uglygep",
1097  GEP);
1098  // Inherit the inbounds attribute of the original GEP.
1099  cast<GetElementPtrInst>(NewGEP)->setIsInBounds(GEPWasInBounds);
1100  if (GEP->getType() != I8PtrTy)
1101  NewGEP = new BitCastInst(NewGEP, GEP->getType(), GEP->getName(), GEP);
1102  }
1103 
1104  GEP->replaceAllUsesWith(NewGEP);
1105  GEP->eraseFromParent();
1106 
1107  return true;
1108 }
1109 
1111  if (skipFunction(F))
1112  return false;
1113 
1115  return false;
1116 
1117  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1118  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1119  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1120  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
1121  bool Changed = false;
1122  for (BasicBlock &B : F) {
1123  for (BasicBlock::iterator I = B.begin(), IE = B.end(); I != IE;)
1124  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I++))
1125  Changed |= splitGEP(GEP);
1126  // No need to split GEP ConstantExprs because all its indices are constant
1127  // already.
1128  }
1129 
1130  Changed |= reuniteExts(F);
1131 
1132  if (VerifyNoDeadCode)
1133  verifyNoDeadCode(F);
1134 
1135  return Changed;
1136 }
1137 
1138 Instruction *SeparateConstOffsetFromGEP::findClosestMatchingDominator(
1139  const SCEV *Key, Instruction *Dominatee) {
1140  auto Pos = DominatingExprs.find(Key);
1141  if (Pos == DominatingExprs.end())
1142  return nullptr;
1143 
1144  auto &Candidates = Pos->second;
1145  // Because we process the basic blocks in pre-order of the dominator tree, a
1146  // candidate that doesn't dominate the current instruction won't dominate any
1147  // future instruction either. Therefore, we pop it out of the stack. This
1148  // optimization makes the algorithm O(n).
1149  while (!Candidates.empty()) {
1150  Instruction *Candidate = Candidates.back();
1151  if (DT->dominates(Candidate, Dominatee))
1152  return Candidate;
1153  Candidates.pop_back();
1154  }
1155  return nullptr;
1156 }
1157 
1158 bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) {
1159  if (!SE->isSCEVable(I->getType()))
1160  return false;
1161 
1162  // Dom: LHS+RHS
1163  // I: sext(LHS)+sext(RHS)
1164  // If Dom can't sign overflow and Dom dominates I, optimize I to sext(Dom).
1165  // TODO: handle zext
1166  Value *LHS = nullptr, *RHS = nullptr;
1167  if (match(I, m_Add(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS)))) ||
1168  match(I, m_Sub(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) {
1169  if (LHS->getType() == RHS->getType()) {
1170  const SCEV *Key =
1171  SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
1172  if (auto *Dom = findClosestMatchingDominator(Key, I)) {
1173  Instruction *NewSExt = new SExtInst(Dom, I->getType(), "", I);
1174  NewSExt->takeName(I);
1175  I->replaceAllUsesWith(NewSExt);
1177  return true;
1178  }
1179  }
1180  }
1181 
1182  // Add I to DominatingExprs if it's an add/sub that can't sign overflow.
1183  if (match(I, m_NSWAdd(m_Value(LHS), m_Value(RHS))) ||
1184  match(I, m_NSWSub(m_Value(LHS), m_Value(RHS)))) {
1186  const SCEV *Key =
1187  SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
1188  DominatingExprs[Key].push_back(I);
1189  }
1190  }
1191  return false;
1192 }
1193 
1194 bool SeparateConstOffsetFromGEP::reuniteExts(Function &F) {
1195  bool Changed = false;
1196  DominatingExprs.clear();
1197  for (const auto Node : depth_first(DT)) {
1198  BasicBlock *BB = Node->getBlock();
1199  for (auto I = BB->begin(); I != BB->end(); ) {
1200  Instruction *Cur = &*I++;
1201  Changed |= reuniteExts(Cur);
1202  }
1203  }
1204  return Changed;
1205 }
1206 
1207 void SeparateConstOffsetFromGEP::verifyNoDeadCode(Function &F) {
1208  for (BasicBlock &B : F) {
1209  for (Instruction &I : B) {
1210  if (isInstructionTriviallyDead(&I)) {
1211  std::string ErrMessage;
1212  raw_string_ostream RSO(ErrMessage);
1213  RSO << "Dead instruction detected!\n" << I << "\n";
1214  llvm_unreachable(RSO.str().c_str());
1215  }
1216  }
1217  }
1218 }
1219 
1220 bool SeparateConstOffsetFromGEP::isLegalToSwapOperand(
1221  GetElementPtrInst *FirstGEP, GetElementPtrInst *SecondGEP, Loop *CurLoop) {
1222  if (!FirstGEP || !FirstGEP->hasOneUse())
1223  return false;
1224 
1225  if (!SecondGEP || FirstGEP->getParent() != SecondGEP->getParent())
1226  return false;
1227 
1228  if (FirstGEP == SecondGEP)
1229  return false;
1230 
1231  unsigned FirstNum = FirstGEP->getNumOperands();
1232  unsigned SecondNum = SecondGEP->getNumOperands();
1233  // Give up if the number of operands are not 2.
1234  if (FirstNum != SecondNum || FirstNum != 2)
1235  return false;
1236 
1237  Value *FirstBase = FirstGEP->getOperand(0);
1238  Value *SecondBase = SecondGEP->getOperand(0);
1239  Value *FirstOffset = FirstGEP->getOperand(1);
1240  // Give up if the index of the first GEP is loop invariant.
1241  if (CurLoop->isLoopInvariant(FirstOffset))
1242  return false;
1243 
1244  // Give up if base doesn't have same type.
1245  if (FirstBase->getType() != SecondBase->getType())
1246  return false;
1247 
1248  Instruction *FirstOffsetDef = dyn_cast<Instruction>(FirstOffset);
1249 
1250  // Check if the second operand of first GEP has constant coefficient.
1251  // For an example, for the following code, we won't gain anything by
1252  // hoisting the second GEP out because the second GEP can be folded away.
1253  // %scevgep.sum.ur159 = add i64 %idxprom48.ur, 256
1254  // %67 = shl i64 %scevgep.sum.ur159, 2
1255  // %uglygep160 = getelementptr i8* %65, i64 %67
1256  // %uglygep161 = getelementptr i8* %uglygep160, i64 -1024
1257 
1258  // Skip constant shift instruction which may be generated by Splitting GEPs.
1259  if (FirstOffsetDef && FirstOffsetDef->isShift() &&
1260  isa<ConstantInt>(FirstOffsetDef->getOperand(1)))
1261  FirstOffsetDef = dyn_cast<Instruction>(FirstOffsetDef->getOperand(0));
1262 
1263  // Give up if FirstOffsetDef is an Add or Sub with constant.
1264  // Because it may not profitable at all due to constant folding.
1265  if (FirstOffsetDef)
1266  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FirstOffsetDef)) {
1267  unsigned opc = BO->getOpcode();
1268  if ((opc == Instruction::Add || opc == Instruction::Sub) &&
1269  (isa<ConstantInt>(BO->getOperand(0)) ||
1270  isa<ConstantInt>(BO->getOperand(1))))
1271  return false;
1272  }
1273  return true;
1274 }
1275 
1276 bool SeparateConstOffsetFromGEP::hasMoreThanOneUseInLoop(Value *V, Loop *L) {
1277  int UsesInLoop = 0;
1278  for (User *U : V->users()) {
1279  if (Instruction *User = dyn_cast<Instruction>(U))
1280  if (L->contains(User))
1281  if (++UsesInLoop > 1)
1282  return true;
1283  }
1284  return false;
1285 }
1286 
1287 void SeparateConstOffsetFromGEP::swapGEPOperand(GetElementPtrInst *First,
1288  GetElementPtrInst *Second) {
1289  Value *Offset1 = First->getOperand(1);
1290  Value *Offset2 = Second->getOperand(1);
1291  First->setOperand(1, Offset2);
1292  Second->setOperand(1, Offset1);
1293 
1294  // We changed p+o+c to p+c+o, p+c may not be inbound anymore.
1295  const DataLayout &DAL = First->getModule()->getDataLayout();
1297  cast<PointerType>(First->getType())->getAddressSpace()),
1298  0);
1299  Value *NewBase =
1300  First->stripAndAccumulateInBoundsConstantOffsets(DAL, Offset);
1301  uint64_t ObjectSize;
1302  if (!getObjectSize(NewBase, ObjectSize, DAL, TLI) ||
1303  Offset.ugt(ObjectSize)) {
1304  First->setIsInBounds(false);
1305  Second->setIsInBounds(false);
1306  } else
1307  First->setIsInBounds(true);
1308 }
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL, const TargetLibraryInfo *TLI, ObjectSizeOpts Opts={})
Compute the size of the object pointed by Ptr.
uint64_t CallInst * C
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:69
OverflowingBinaryOp_match< LHS, RHS, Instruction::Sub, OverflowingBinaryOperator::NoSignedWrap > m_NSWSub(const LHS &L, const RHS &R)
Definition: PatternMatch.h:645
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:109
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:72
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
Definition: PatternMatch.h:514
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
BinaryOps getOpcode() const
Definition: InstrTypes.h:523
INITIALIZE_PASS_BEGIN(SeparateConstOffsetFromGEP, "separate-const-offset-from-gep", "Split GEPs to a variadic base and a constant offset for better CSE", false, false) INITIALIZE_PASS_END(SeparateConstOffsetFromGEP
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
The main scalar evolution driver.
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value *> IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:863
static cl::opt< bool > VerifyNoDeadCode("reassociate-geps-verify-no-dead-code", cl::init(false), cl::desc("Verify this pass produces no dead code"), cl::Hidden)
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:728
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:344
F(f)
This class represents a sign extension of integer types.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:503
Hexagon Common GEP
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:227
bool hasNoSignedWrap() const
Determine whether the no signed wrap flag is set.
op_iterator op_begin()
Definition: User.h:214
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:207
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:252
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:361
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:560
Class to represent struct types.
Definition: DerivedTypes.h:201
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
void setIsInBounds(bool b=true)
Set or clear the inbounds flag on this GEP instruction.
separate const offset from Split GEPs to a variadic base and a constant offset for better CSE
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:668
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:893
This file implements a class to represent arbitrary precision integral constant values and operations...
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition: PatternMatch.h:502
bool programUndefinedIfFullPoison(const Instruction *PoisonI)
Return true if this function can prove that if PoisonI is executed and yields a full-poison value (al...
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1444
Instruction * clone() const
Create a copy of &#39;this&#39; instruction that is identical in all ways except the following: ...
Key
PAL metadata keys.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1448
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
bool isInBounds() const
Determine whether the GEP has the inbounds flag.
This class represents a no-op cast from one type to another.
bool haveNoCommonBitsSet(const Value *LHS, const Value *RHS, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr)
Return true if LHS and RHS have no common bits set.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:430
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:292
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:140
void initializeSeparateConstOffsetFromGEPPass(PassRegistry &)
Value * getOperand(unsigned i) const
Definition: User.h:154
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:837
static bool runOnFunction(Function &F, bool PostInlining)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Wrapper pass for TargetTransformInfo.
FunctionPass * createSeparateConstOffsetFromGEPPass(const TargetMachine *TM=nullptr, bool LowerGEP=false)
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction...
Definition: Instruction.cpp:75
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
static const SubtargetFeatureKV * Find(StringRef S, ArrayRef< SubtargetFeatureKV > A)
Find KV in array using binary search.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:992
Represent the analysis usage information of a pass.
op_iterator op_end()
Definition: User.h:216
static cl::opt< bool > DisableSeparateConstOffsetFromGEP("disable-separate-const-offset-from-gep", cl::init(false), cl::desc("Do not separate the constant offset from a GEP instruction"), cl::Hidden)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr)
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:401
static wasm::ValType getType(const TargetRegisterClass *RC)
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:834
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:220
PointerType * getInt8PtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer to an 8-bit integer value.
Definition: IRBuilder.h:386
std::string & str()
Flushes the stream contents to the target string and returns the string&#39;s reference.
Definition: raw_ostream.h:478
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:937
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:56
CastClass_match< OpTy, Instruction::SExt > m_SExt(const OpTy &Op)
Matches SExt.
Definition: PatternMatch.h:918
Value * CreateGEP(Value *Ptr, ArrayRef< Value *> IdxList, const Twine &Name="")
Definition: IRBuilder.h:1227
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:110
static CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:176
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
iterator end()
Definition: BasicBlock.h:254
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:239
Module.h This file contains the declarations for the Module class.
Provides information about what library functions are available for the current target.
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:560
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:285
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)
Definition: PatternMatch.h:637
void setOperand(unsigned i, Value *Val)
Definition: User.h:159
unsigned logBase2() const
Definition: APInt.h:1727
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
Accumulate offsets from stripInBoundsConstantOffsets().
Definition: Value.cpp:576
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:57
Class for arbitrary precision integers.
Definition: APInt.h:69
bool isPowerOf2() const
Check if this APInt&#39;s value is a power of two greater than zero.
Definition: APInt.h:457
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
iterator_range< user_iterator > users()
Definition: Value.h:401
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:338
static Constant * getCast(unsigned ops, Constant *C, Type *Ty, bool OnlyIfReduced=false)
Convenience function for getting a Cast operation.
Definition: Constants.cpp:1435
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1012
bool ugt(const APInt &RHS) const
Unsigned greather than comparison.
Definition: APInt.h:1234
unsigned getNumUses() const
This method computes the number of uses of this Value.
Definition: Value.cpp:166
This class represents an analyzed expression in the program.
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:439
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:220
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
#define I(x, y, z)
Definition: MD5.cpp:58
Type * getResultElementType() const
Definition: Instructions.h:939
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
bool hasNoUnsignedWrap() const
Determine whether the no unsigned wrap flag is set.
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1440
iterator_range< df_iterator< T > > depth_first(const T &G)
separate const offset from gep
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:462
aarch64 promote const
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction has no side ef...
Definition: Local.cpp:324
LLVM Value Representation.
Definition: Value.h:73
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:57
The legacy pass manager&#39;s analysis pass to compute loop information.
Definition: LoopInfo.h:958
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:414
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:267
This pass exposes codegen information to IR-level passes.
static void Split(std::vector< std::string > &V, StringRef S)
Splits a string of comma separated items in to a vector of strings.
for(unsigned i=Desc.getNumOperands(), e=OldMI.getNumOperands();i !=e;++i)
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:174
bool hasAllConstantIndices() const
Return true if all of the indices of this GEP are constant integers.
const BasicBlock * getParent() const
Definition: Instruction.h:66
gep_type_iterator gep_type_begin(const User *GEP)