LLVM  10.0.0svn
StraightLineStrengthReduce.cpp
Go to the documentation of this file.
1 //===- StraightLineStrengthReduce.cpp - -----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements straight-line strength reduction (SLSR). Unlike loop
10 // strength reduction, this algorithm is designed to reduce arithmetic
11 // redundancy in straight-line code instead of loops. It has proven to be
12 // effective in simplifying arithmetic statements derived from an unrolled loop.
13 // It can also simplify the logic of SeparateConstOffsetFromGEP.
14 //
15 // There are many optimizations we can perform in the domain of SLSR. This file
16 // for now contains only an initial step. Specifically, we look for strength
17 // reduction candidates in the following forms:
18 //
19 // Form 1: B + i * S
20 // Form 2: (B + i) * S
21 // Form 3: &B[i * S]
22 //
23 // where S is an integer variable, and i is a constant integer. If we found two
24 // candidates S1 and S2 in the same form and S1 dominates S2, we may rewrite S2
25 // in a simpler way with respect to S1. For example,
26 //
27 // S1: X = B + i * S
28 // S2: Y = B + i' * S => X + (i' - i) * S
29 //
30 // S1: X = (B + i) * S
31 // S2: Y = (B + i') * S => X + (i' - i) * S
32 //
33 // S1: X = &B[i * S]
34 // S2: Y = &B[i' * S] => &X[(i' - i) * S]
35 //
36 // Note: (i' - i) * S is folded to the extent possible.
37 //
38 // This rewriting is in general a good idea. The code patterns we focus on
39 // usually come from loop unrolling, so (i' - i) * S is likely the same
40 // across iterations and can be reused. When that happens, the optimized form
41 // takes only one add starting from the second iteration.
42 //
43 // When such rewriting is possible, we call S1 a "basis" of S2. When S2 has
44 // multiple bases, we choose to rewrite S2 with respect to its "immediate"
45 // basis, the basis that is the closest ancestor in the dominator tree.
46 //
47 // TODO:
48 //
49 // - Floating point arithmetics when fast math is enabled.
50 //
51 // - SLSR may decrease ILP at the architecture level. Targets that are very
52 // sensitive to ILP may want to disable it. Having SLSR to consider ILP is
53 // left as future work.
54 //
55 // - When (i' - i) is constant but i and i' are not, we could still perform
56 // SLSR.
57 
58 #include "llvm/ADT/APInt.h"
60 #include "llvm/ADT/SmallVector.h"
65 #include "llvm/IR/Constants.h"
66 #include "llvm/IR/DataLayout.h"
67 #include "llvm/IR/DerivedTypes.h"
68 #include "llvm/IR/Dominators.h"
70 #include "llvm/IR/IRBuilder.h"
71 #include "llvm/IR/InstrTypes.h"
72 #include "llvm/IR/Instruction.h"
73 #include "llvm/IR/Instructions.h"
74 #include "llvm/IR/Module.h"
75 #include "llvm/IR/Operator.h"
76 #include "llvm/IR/PatternMatch.h"
77 #include "llvm/IR/Type.h"
78 #include "llvm/IR/Value.h"
79 #include "llvm/Pass.h"
80 #include "llvm/Support/Casting.h"
82 #include "llvm/Transforms/Scalar.h"
83 #include <cassert>
84 #include <cstdint>
85 #include <limits>
86 #include <list>
87 #include <vector>
88 
89 using namespace llvm;
90 using namespace PatternMatch;
91 
92 static const unsigned UnknownAddressSpace =
94 
95 namespace {
96 
97 class StraightLineStrengthReduce : public FunctionPass {
98 public:
99  // SLSR candidate. Such a candidate must be in one of the forms described in
100  // the header comments.
101  struct Candidate {
102  enum Kind {
103  Invalid, // reserved for the default constructor
104  Add, // B + i * S
105  Mul, // (B + i) * S
106  GEP, // &B[..][i * S][..]
107  };
108 
109  Candidate() = default;
110  Candidate(Kind CT, const SCEV *B, ConstantInt *Idx, Value *S,
111  Instruction *I)
112  : CandidateKind(CT), Base(B), Index(Idx), Stride(S), Ins(I) {}
113 
114  Kind CandidateKind = Invalid;
115 
116  const SCEV *Base = nullptr;
117 
118  // Note that Index and Stride of a GEP candidate do not necessarily have the
119  // same integer type. In that case, during rewriting, Stride will be
120  // sign-extended or truncated to Index's type.
121  ConstantInt *Index = nullptr;
122 
123  Value *Stride = nullptr;
124 
125  // The instruction this candidate corresponds to. It helps us to rewrite a
126  // candidate with respect to its immediate basis. Note that one instruction
127  // can correspond to multiple candidates depending on how you associate the
128  // expression. For instance,
129  //
130  // (a + 1) * (b + 2)
131  //
132  // can be treated as
133  //
134  // <Base: a, Index: 1, Stride: b + 2>
135  //
136  // or
137  //
138  // <Base: b, Index: 2, Stride: a + 1>
139  Instruction *Ins = nullptr;
140 
141  // Points to the immediate basis of this candidate, or nullptr if we cannot
142  // find any basis for this candidate.
143  Candidate *Basis = nullptr;
144  };
145 
146  static char ID;
147 
148  StraightLineStrengthReduce() : FunctionPass(ID) {
150  }
151 
152  void getAnalysisUsage(AnalysisUsage &AU) const override {
156  // We do not modify the shape of the CFG.
157  AU.setPreservesCFG();
158  }
159 
160  bool doInitialization(Module &M) override {
161  DL = &M.getDataLayout();
162  return false;
163  }
164 
165  bool runOnFunction(Function &F) override;
166 
167 private:
168  // Returns true if Basis is a basis for C, i.e., Basis dominates C and they
169  // share the same base and stride.
170  bool isBasisFor(const Candidate &Basis, const Candidate &C);
171 
172  // Returns whether the candidate can be folded into an addressing mode.
173  bool isFoldable(const Candidate &C, TargetTransformInfo *TTI,
174  const DataLayout *DL);
175 
176  // Returns true if C is already in a simplest form and not worth being
177  // rewritten.
178  bool isSimplestForm(const Candidate &C);
179 
180  // Checks whether I is in a candidate form. If so, adds all the matching forms
181  // to Candidates, and tries to find the immediate basis for each of them.
182  void allocateCandidatesAndFindBasis(Instruction *I);
183 
184  // Allocate candidates and find bases for Add instructions.
185  void allocateCandidatesAndFindBasisForAdd(Instruction *I);
186 
187  // Given I = LHS + RHS, factors RHS into i * S and makes (LHS + i * S) a
188  // candidate.
189  void allocateCandidatesAndFindBasisForAdd(Value *LHS, Value *RHS,
190  Instruction *I);
191  // Allocate candidates and find bases for Mul instructions.
192  void allocateCandidatesAndFindBasisForMul(Instruction *I);
193 
194  // Splits LHS into Base + Index and, if succeeds, calls
195  // allocateCandidatesAndFindBasis.
196  void allocateCandidatesAndFindBasisForMul(Value *LHS, Value *RHS,
197  Instruction *I);
198 
199  // Allocate candidates and find bases for GetElementPtr instructions.
200  void allocateCandidatesAndFindBasisForGEP(GetElementPtrInst *GEP);
201 
202  // A helper function that scales Idx with ElementSize before invoking
203  // allocateCandidatesAndFindBasis.
204  void allocateCandidatesAndFindBasisForGEP(const SCEV *B, ConstantInt *Idx,
205  Value *S, uint64_t ElementSize,
206  Instruction *I);
207 
208  // Adds the given form <CT, B, Idx, S> to Candidates, and finds its immediate
209  // basis.
210  void allocateCandidatesAndFindBasis(Candidate::Kind CT, const SCEV *B,
211  ConstantInt *Idx, Value *S,
212  Instruction *I);
213 
214  // Rewrites candidate C with respect to Basis.
215  void rewriteCandidateWithBasis(const Candidate &C, const Candidate &Basis);
216 
217  // A helper function that factors ArrayIdx to a product of a stride and a
218  // constant index, and invokes allocateCandidatesAndFindBasis with the
219  // factorings.
220  void factorArrayIndex(Value *ArrayIdx, const SCEV *Base, uint64_t ElementSize,
222 
223  // Emit code that computes the "bump" from Basis to C. If the candidate is a
224  // GEP and the bump is not divisible by the element size of the GEP, this
225  // function sets the BumpWithUglyGEP flag to notify its caller to bump the
226  // basis using an ugly GEP.
227  static Value *emitBump(const Candidate &Basis, const Candidate &C,
228  IRBuilder<> &Builder, const DataLayout *DL,
229  bool &BumpWithUglyGEP);
230 
231  const DataLayout *DL = nullptr;
232  DominatorTree *DT = nullptr;
233  ScalarEvolution *SE;
234  TargetTransformInfo *TTI = nullptr;
235  std::list<Candidate> Candidates;
236 
237  // Temporarily holds all instructions that are unlinked (but not deleted) by
238  // rewriteCandidateWithBasis. These instructions will be actually removed
239  // after all rewriting finishes.
240  std::vector<Instruction *> UnlinkedInstructions;
241 };
242 
243 } // end anonymous namespace
244 
246 
247 INITIALIZE_PASS_BEGIN(StraightLineStrengthReduce, "slsr",
248  "Straight line strength reduction", false, false)
252 INITIALIZE_PASS_END(StraightLineStrengthReduce, "slsr",
253  "Straight line strength reduction", false, false)
254 
256  return new StraightLineStrengthReduce();
257 }
258 
259 bool StraightLineStrengthReduce::isBasisFor(const Candidate &Basis,
260  const Candidate &C) {
261  return (Basis.Ins != C.Ins && // skip the same instruction
262  // They must have the same type too. Basis.Base == C.Base doesn't
263  // guarantee their types are the same (PR23975).
264  Basis.Ins->getType() == C.Ins->getType() &&
265  // Basis must dominate C in order to rewrite C with respect to Basis.
266  DT->dominates(Basis.Ins->getParent(), C.Ins->getParent()) &&
267  // They share the same base, stride, and candidate kind.
268  Basis.Base == C.Base && Basis.Stride == C.Stride &&
269  Basis.CandidateKind == C.CandidateKind);
270 }
271 
273  const TargetTransformInfo *TTI) {
275  for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I)
276  Indices.push_back(*I);
277  return TTI->getGEPCost(GEP->getSourceElementType(), GEP->getPointerOperand(),
278  Indices) == TargetTransformInfo::TCC_Free;
279 }
280 
281 // Returns whether (Base + Index * Stride) can be folded to an addressing mode.
282 static bool isAddFoldable(const SCEV *Base, ConstantInt *Index, Value *Stride,
283  TargetTransformInfo *TTI) {
284  // Index->getSExtValue() may crash if Index is wider than 64-bit.
285  return Index->getBitWidth() <= 64 &&
286  TTI->isLegalAddressingMode(Base->getType(), nullptr, 0, true,
288 }
289 
290 bool StraightLineStrengthReduce::isFoldable(const Candidate &C,
291  TargetTransformInfo *TTI,
292  const DataLayout *DL) {
293  if (C.CandidateKind == Candidate::Add)
294  return isAddFoldable(C.Base, C.Index, C.Stride, TTI);
295  if (C.CandidateKind == Candidate::GEP)
296  return isGEPFoldable(cast<GetElementPtrInst>(C.Ins), TTI);
297  return false;
298 }
299 
300 // Returns true if GEP has zero or one non-zero index.
302  unsigned NumNonZeroIndices = 0;
303  for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I) {
304  ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I);
305  if (ConstIdx == nullptr || !ConstIdx->isZero())
306  ++NumNonZeroIndices;
307  }
308  return NumNonZeroIndices <= 1;
309 }
310 
311 bool StraightLineStrengthReduce::isSimplestForm(const Candidate &C) {
312  if (C.CandidateKind == Candidate::Add) {
313  // B + 1 * S or B + (-1) * S
314  return C.Index->isOne() || C.Index->isMinusOne();
315  }
316  if (C.CandidateKind == Candidate::Mul) {
317  // (B + 0) * S
318  return C.Index->isZero();
319  }
320  if (C.CandidateKind == Candidate::GEP) {
321  // (char*)B + S or (char*)B - S
322  return ((C.Index->isOne() || C.Index->isMinusOne()) &&
323  hasOnlyOneNonZeroIndex(cast<GetElementPtrInst>(C.Ins)));
324  }
325  return false;
326 }
327 
328 // TODO: We currently implement an algorithm whose time complexity is linear in
329 // the number of existing candidates. However, we could do better by using
330 // ScopedHashTable. Specifically, while traversing the dominator tree, we could
331 // maintain all the candidates that dominate the basic block being traversed in
332 // a ScopedHashTable. This hash table is indexed by the base and the stride of
333 // a candidate. Therefore, finding the immediate basis of a candidate boils down
334 // to one hash-table look up.
335 void StraightLineStrengthReduce::allocateCandidatesAndFindBasis(
336  Candidate::Kind CT, const SCEV *B, ConstantInt *Idx, Value *S,
337  Instruction *I) {
338  Candidate C(CT, B, Idx, S, I);
339  // SLSR can complicate an instruction in two cases:
340  //
341  // 1. If we can fold I into an addressing mode, computing I is likely free or
342  // takes only one instruction.
343  //
344  // 2. I is already in a simplest form. For example, when
345  // X = B + 8 * S
346  // Y = B + S,
347  // rewriting Y to X - 7 * S is probably a bad idea.
348  //
349  // In the above cases, we still add I to the candidate list so that I can be
350  // the basis of other candidates, but we leave I's basis blank so that I
351  // won't be rewritten.
352  if (!isFoldable(C, TTI, DL) && !isSimplestForm(C)) {
353  // Try to compute the immediate basis of C.
354  unsigned NumIterations = 0;
355  // Limit the scan radius to avoid running in quadratice time.
356  static const unsigned MaxNumIterations = 50;
357  for (auto Basis = Candidates.rbegin();
358  Basis != Candidates.rend() && NumIterations < MaxNumIterations;
359  ++Basis, ++NumIterations) {
360  if (isBasisFor(*Basis, C)) {
361  C.Basis = &(*Basis);
362  break;
363  }
364  }
365  }
366  // Regardless of whether we find a basis for C, we need to push C to the
367  // candidate list so that it can be the basis of other candidates.
368  Candidates.push_back(C);
369 }
370 
371 void StraightLineStrengthReduce::allocateCandidatesAndFindBasis(
372  Instruction *I) {
373  switch (I->getOpcode()) {
374  case Instruction::Add:
375  allocateCandidatesAndFindBasisForAdd(I);
376  break;
377  case Instruction::Mul:
378  allocateCandidatesAndFindBasisForMul(I);
379  break;
380  case Instruction::GetElementPtr:
381  allocateCandidatesAndFindBasisForGEP(cast<GetElementPtrInst>(I));
382  break;
383  }
384 }
385 
386 void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForAdd(
387  Instruction *I) {
388  // Try matching B + i * S.
389  if (!isa<IntegerType>(I->getType()))
390  return;
391 
392  assert(I->getNumOperands() == 2 && "isn't I an add?");
393  Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
394  allocateCandidatesAndFindBasisForAdd(LHS, RHS, I);
395  if (LHS != RHS)
396  allocateCandidatesAndFindBasisForAdd(RHS, LHS, I);
397 }
398 
399 void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForAdd(
400  Value *LHS, Value *RHS, Instruction *I) {
401  Value *S = nullptr;
402  ConstantInt *Idx = nullptr;
403  if (match(RHS, m_Mul(m_Value(S), m_ConstantInt(Idx)))) {
404  // I = LHS + RHS = LHS + Idx * S
405  allocateCandidatesAndFindBasis(Candidate::Add, SE->getSCEV(LHS), Idx, S, I);
406  } else if (match(RHS, m_Shl(m_Value(S), m_ConstantInt(Idx)))) {
407  // I = LHS + RHS = LHS + (S << Idx) = LHS + S * (1 << Idx)
408  APInt One(Idx->getBitWidth(), 1);
409  Idx = ConstantInt::get(Idx->getContext(), One << Idx->getValue());
410  allocateCandidatesAndFindBasis(Candidate::Add, SE->getSCEV(LHS), Idx, S, I);
411  } else {
412  // At least, I = LHS + 1 * RHS
413  ConstantInt *One = ConstantInt::get(cast<IntegerType>(I->getType()), 1);
414  allocateCandidatesAndFindBasis(Candidate::Add, SE->getSCEV(LHS), One, RHS,
415  I);
416  }
417 }
418 
419 // Returns true if A matches B + C where C is constant.
420 static bool matchesAdd(Value *A, Value *&B, ConstantInt *&C) {
421  return (match(A, m_Add(m_Value(B), m_ConstantInt(C))) ||
422  match(A, m_Add(m_ConstantInt(C), m_Value(B))));
423 }
424 
425 // Returns true if A matches B | C where C is constant.
426 static bool matchesOr(Value *A, Value *&B, ConstantInt *&C) {
427  return (match(A, m_Or(m_Value(B), m_ConstantInt(C))) ||
428  match(A, m_Or(m_ConstantInt(C), m_Value(B))));
429 }
430 
431 void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForMul(
432  Value *LHS, Value *RHS, Instruction *I) {
433  Value *B = nullptr;
434  ConstantInt *Idx = nullptr;
435  if (matchesAdd(LHS, B, Idx)) {
436  // If LHS is in the form of "Base + Index", then I is in the form of
437  // "(Base + Index) * RHS".
438  allocateCandidatesAndFindBasis(Candidate::Mul, SE->getSCEV(B), Idx, RHS, I);
439  } else if (matchesOr(LHS, B, Idx) && haveNoCommonBitsSet(B, Idx, *DL)) {
440  // If LHS is in the form of "Base | Index" and Base and Index have no common
441  // bits set, then
442  // Base | Index = Base + Index
443  // and I is thus in the form of "(Base + Index) * RHS".
444  allocateCandidatesAndFindBasis(Candidate::Mul, SE->getSCEV(B), Idx, RHS, I);
445  } else {
446  // Otherwise, at least try the form (LHS + 0) * RHS.
447  ConstantInt *Zero = ConstantInt::get(cast<IntegerType>(I->getType()), 0);
448  allocateCandidatesAndFindBasis(Candidate::Mul, SE->getSCEV(LHS), Zero, RHS,
449  I);
450  }
451 }
452 
453 void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForMul(
454  Instruction *I) {
455  // Try matching (B + i) * S.
456  // TODO: we could extend SLSR to float and vector types.
457  if (!isa<IntegerType>(I->getType()))
458  return;
459 
460  assert(I->getNumOperands() == 2 && "isn't I a mul?");
461  Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
462  allocateCandidatesAndFindBasisForMul(LHS, RHS, I);
463  if (LHS != RHS) {
464  // Symmetrically, try to split RHS to Base + Index.
465  allocateCandidatesAndFindBasisForMul(RHS, LHS, I);
466  }
467 }
468 
469 void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForGEP(
470  const SCEV *B, ConstantInt *Idx, Value *S, uint64_t ElementSize,
471  Instruction *I) {
472  // I = B + sext(Idx *nsw S) * ElementSize
473  // = B + (sext(Idx) * sext(S)) * ElementSize
474  // = B + (sext(Idx) * ElementSize) * sext(S)
475  // Casting to IntegerType is safe because we skipped vector GEPs.
476  IntegerType *IntPtrTy = cast<IntegerType>(DL->getIntPtrType(I->getType()));
477  ConstantInt *ScaledIdx = ConstantInt::get(
478  IntPtrTy, Idx->getSExtValue() * (int64_t)ElementSize, true);
479  allocateCandidatesAndFindBasis(Candidate::GEP, B, ScaledIdx, S, I);
480 }
481 
482 void StraightLineStrengthReduce::factorArrayIndex(Value *ArrayIdx,
483  const SCEV *Base,
484  uint64_t ElementSize,
486  // At least, ArrayIdx = ArrayIdx *nsw 1.
487  allocateCandidatesAndFindBasisForGEP(
488  Base, ConstantInt::get(cast<IntegerType>(ArrayIdx->getType()), 1),
489  ArrayIdx, ElementSize, GEP);
490  Value *LHS = nullptr;
491  ConstantInt *RHS = nullptr;
492  // One alternative is matching the SCEV of ArrayIdx instead of ArrayIdx
493  // itself. This would allow us to handle the shl case for free. However,
494  // matching SCEVs has two issues:
495  //
496  // 1. this would complicate rewriting because the rewriting procedure
497  // would have to translate SCEVs back to IR instructions. This translation
498  // is difficult when LHS is further evaluated to a composite SCEV.
499  //
500  // 2. ScalarEvolution is designed to be control-flow oblivious. It tends
501  // to strip nsw/nuw flags which are critical for SLSR to trace into
502  // sext'ed multiplication.
503  if (match(ArrayIdx, m_NSWMul(m_Value(LHS), m_ConstantInt(RHS)))) {
504  // SLSR is currently unsafe if i * S may overflow.
505  // GEP = Base + sext(LHS *nsw RHS) * ElementSize
506  allocateCandidatesAndFindBasisForGEP(Base, RHS, LHS, ElementSize, GEP);
507  } else if (match(ArrayIdx, m_NSWShl(m_Value(LHS), m_ConstantInt(RHS)))) {
508  // GEP = Base + sext(LHS <<nsw RHS) * ElementSize
509  // = Base + sext(LHS *nsw (1 << RHS)) * ElementSize
510  APInt One(RHS->getBitWidth(), 1);
511  ConstantInt *PowerOf2 =
512  ConstantInt::get(RHS->getContext(), One << RHS->getValue());
513  allocateCandidatesAndFindBasisForGEP(Base, PowerOf2, LHS, ElementSize, GEP);
514  }
515 }
516 
517 void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForGEP(
518  GetElementPtrInst *GEP) {
519  // TODO: handle vector GEPs
520  if (GEP->getType()->isVectorTy())
521  return;
522 
523  SmallVector<const SCEV *, 4> IndexExprs;
524  for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I)
525  IndexExprs.push_back(SE->getSCEV(*I));
526 
528  for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
529  if (GTI.isStruct())
530  continue;
531 
532  const SCEV *OrigIndexExpr = IndexExprs[I - 1];
533  IndexExprs[I - 1] = SE->getZero(OrigIndexExpr->getType());
534 
535  // The base of this candidate is GEP's base plus the offsets of all
536  // indices except this current one.
537  const SCEV *BaseExpr = SE->getGEPExpr(cast<GEPOperator>(GEP), IndexExprs);
538  Value *ArrayIdx = GEP->getOperand(I);
539  uint64_t ElementSize = DL->getTypeAllocSize(GTI.getIndexedType());
540  if (ArrayIdx->getType()->getIntegerBitWidth() <=
542  // Skip factoring if ArrayIdx is wider than the pointer size, because
543  // ArrayIdx is implicitly truncated to the pointer size.
544  factorArrayIndex(ArrayIdx, BaseExpr, ElementSize, GEP);
545  }
546  // When ArrayIdx is the sext of a value, we try to factor that value as
547  // well. Handling this case is important because array indices are
548  // typically sign-extended to the pointer size.
549  Value *TruncatedArrayIdx = nullptr;
550  if (match(ArrayIdx, m_SExt(m_Value(TruncatedArrayIdx))) &&
551  TruncatedArrayIdx->getType()->getIntegerBitWidth() <=
553  // Skip factoring if TruncatedArrayIdx is wider than the pointer size,
554  // because TruncatedArrayIdx is implicitly truncated to the pointer size.
555  factorArrayIndex(TruncatedArrayIdx, BaseExpr, ElementSize, GEP);
556  }
557 
558  IndexExprs[I - 1] = OrigIndexExpr;
559  }
560 }
561 
562 // A helper function that unifies the bitwidth of A and B.
563 static void unifyBitWidth(APInt &A, APInt &B) {
564  if (A.getBitWidth() < B.getBitWidth())
565  A = A.sext(B.getBitWidth());
566  else if (A.getBitWidth() > B.getBitWidth())
567  B = B.sext(A.getBitWidth());
568 }
569 
570 Value *StraightLineStrengthReduce::emitBump(const Candidate &Basis,
571  const Candidate &C,
572  IRBuilder<> &Builder,
573  const DataLayout *DL,
574  bool &BumpWithUglyGEP) {
575  APInt Idx = C.Index->getValue(), BasisIdx = Basis.Index->getValue();
576  unifyBitWidth(Idx, BasisIdx);
577  APInt IndexOffset = Idx - BasisIdx;
578 
579  BumpWithUglyGEP = false;
580  if (Basis.CandidateKind == Candidate::GEP) {
581  APInt ElementSize(
582  IndexOffset.getBitWidth(),
583  DL->getTypeAllocSize(
584  cast<GetElementPtrInst>(Basis.Ins)->getResultElementType()));
585  APInt Q, R;
586  APInt::sdivrem(IndexOffset, ElementSize, Q, R);
587  if (R == 0)
588  IndexOffset = Q;
589  else
590  BumpWithUglyGEP = true;
591  }
592 
593  // Compute Bump = C - Basis = (i' - i) * S.
594  // Common case 1: if (i' - i) is 1, Bump = S.
595  if (IndexOffset == 1)
596  return C.Stride;
597  // Common case 2: if (i' - i) is -1, Bump = -S.
598  if (IndexOffset.isAllOnesValue())
599  return Builder.CreateNeg(C.Stride);
600 
601  // Otherwise, Bump = (i' - i) * sext/trunc(S). Note that (i' - i) and S may
602  // have different bit widths.
603  IntegerType *DeltaType =
604  IntegerType::get(Basis.Ins->getContext(), IndexOffset.getBitWidth());
605  Value *ExtendedStride = Builder.CreateSExtOrTrunc(C.Stride, DeltaType);
606  if (IndexOffset.isPowerOf2()) {
607  // If (i' - i) is a power of 2, Bump = sext/trunc(S) << log(i' - i).
608  ConstantInt *Exponent = ConstantInt::get(DeltaType, IndexOffset.logBase2());
609  return Builder.CreateShl(ExtendedStride, Exponent);
610  }
611  if ((-IndexOffset).isPowerOf2()) {
612  // If (i - i') is a power of 2, Bump = -sext/trunc(S) << log(i' - i).
614  ConstantInt::get(DeltaType, (-IndexOffset).logBase2());
615  return Builder.CreateNeg(Builder.CreateShl(ExtendedStride, Exponent));
616  }
617  Constant *Delta = ConstantInt::get(DeltaType, IndexOffset);
618  return Builder.CreateMul(ExtendedStride, Delta);
619 }
620 
621 void StraightLineStrengthReduce::rewriteCandidateWithBasis(
622  const Candidate &C, const Candidate &Basis) {
623  assert(C.CandidateKind == Basis.CandidateKind && C.Base == Basis.Base &&
624  C.Stride == Basis.Stride);
625  // We run rewriteCandidateWithBasis on all candidates in a post-order, so the
626  // basis of a candidate cannot be unlinked before the candidate.
627  assert(Basis.Ins->getParent() != nullptr && "the basis is unlinked");
628 
629  // An instruction can correspond to multiple candidates. Therefore, instead of
630  // simply deleting an instruction when we rewrite it, we mark its parent as
631  // nullptr (i.e. unlink it) so that we can skip the candidates whose
632  // instruction is already rewritten.
633  if (!C.Ins->getParent())
634  return;
635 
636  IRBuilder<> Builder(C.Ins);
637  bool BumpWithUglyGEP;
638  Value *Bump = emitBump(Basis, C, Builder, DL, BumpWithUglyGEP);
639  Value *Reduced = nullptr; // equivalent to but weaker than C.Ins
640  switch (C.CandidateKind) {
641  case Candidate::Add:
642  case Candidate::Mul: {
643  // C = Basis + Bump
644  Value *NegBump;
645  if (match(Bump, m_Neg(m_Value(NegBump)))) {
646  // If Bump is a neg instruction, emit C = Basis - (-Bump).
647  Reduced = Builder.CreateSub(Basis.Ins, NegBump);
648  // We only use the negative argument of Bump, and Bump itself may be
649  // trivially dead.
651  } else {
652  // It's tempting to preserve nsw on Bump and/or Reduced. However, it's
653  // usually unsound, e.g.,
654  //
655  // X = (-2 +nsw 1) *nsw INT_MAX
656  // Y = (-2 +nsw 3) *nsw INT_MAX
657  // =>
658  // Y = X + 2 * INT_MAX
659  //
660  // Neither + and * in the resultant expression are nsw.
661  Reduced = Builder.CreateAdd(Basis.Ins, Bump);
662  }
663  break;
664  }
665  case Candidate::GEP:
666  {
667  Type *IntPtrTy = DL->getIntPtrType(C.Ins->getType());
668  bool InBounds = cast<GetElementPtrInst>(C.Ins)->isInBounds();
669  if (BumpWithUglyGEP) {
670  // C = (char *)Basis + Bump
671  unsigned AS = Basis.Ins->getType()->getPointerAddressSpace();
672  Type *CharTy = Type::getInt8PtrTy(Basis.Ins->getContext(), AS);
673  Reduced = Builder.CreateBitCast(Basis.Ins, CharTy);
674  if (InBounds)
675  Reduced =
676  Builder.CreateInBoundsGEP(Builder.getInt8Ty(), Reduced, Bump);
677  else
678  Reduced = Builder.CreateGEP(Builder.getInt8Ty(), Reduced, Bump);
679  Reduced = Builder.CreateBitCast(Reduced, C.Ins->getType());
680  } else {
681  // C = gep Basis, Bump
682  // Canonicalize bump to pointer size.
683  Bump = Builder.CreateSExtOrTrunc(Bump, IntPtrTy);
684  if (InBounds)
685  Reduced = Builder.CreateInBoundsGEP(
686  cast<GetElementPtrInst>(Basis.Ins)->getResultElementType(),
687  Basis.Ins, Bump);
688  else
689  Reduced = Builder.CreateGEP(
690  cast<GetElementPtrInst>(Basis.Ins)->getResultElementType(),
691  Basis.Ins, Bump);
692  }
693  break;
694  }
695  default:
696  llvm_unreachable("C.CandidateKind is invalid");
697  };
698  Reduced->takeName(C.Ins);
699  C.Ins->replaceAllUsesWith(Reduced);
700  // Unlink C.Ins so that we can skip other candidates also corresponding to
701  // C.Ins. The actual deletion is postponed to the end of runOnFunction.
702  C.Ins->removeFromParent();
703  UnlinkedInstructions.push_back(C.Ins);
704 }
705 
707  if (skipFunction(F))
708  return false;
709 
710  TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
711  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
712  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
713  // Traverse the dominator tree in the depth-first order. This order makes sure
714  // all bases of a candidate are in Candidates when we process it.
715  for (const auto Node : depth_first(DT))
716  for (auto &I : *(Node->getBlock()))
717  allocateCandidatesAndFindBasis(&I);
718 
719  // Rewrite candidates in the reverse depth-first order. This order makes sure
720  // a candidate being rewritten is not a basis for any other candidate.
721  while (!Candidates.empty()) {
722  const Candidate &C = Candidates.back();
723  if (C.Basis != nullptr) {
724  rewriteCandidateWithBasis(C, *C.Basis);
725  }
726  Candidates.pop_back();
727  }
728 
729  // Delete all unlink instructions.
730  for (auto *UnlinkedInst : UnlinkedInstructions) {
731  for (unsigned I = 0, E = UnlinkedInst->getNumOperands(); I != E; ++I) {
732  Value *Op = UnlinkedInst->getOperand(I);
733  UnlinkedInst->setOperand(I, nullptr);
735  }
736  UnlinkedInst->deleteValue();
737  }
738  bool Ret = !UnlinkedInstructions.empty();
739  UnlinkedInstructions.clear();
740  return Ret;
741 }
Value * CreateInBoundsGEP(Value *Ptr, ArrayRef< Value *> IdxList, const Twine &Name="")
Definition: IRBuilder.h:1696
FunctionPass * createStraightLineStrengthReducePass()
uint64_t CallInst * C
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:70
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:836
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
This class represents lattice values for constants.
Definition: AllocatorList.h:23
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
Definition: PatternMatch.h:783
The main scalar evolution driver.
int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value *> Operands) const
Estimate the cost of a GEP operation when lowered.
static void unifyBitWidth(APInt &A, APInt &B)
static void sdivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Definition: APInt.cpp:1841
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:745
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:389
static bool matchesAdd(Value *A, Value *&B, ConstantInt *&C)
F(f)
Hexagon Common GEP
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:229
unsigned getBitWidth() const
getBitWidth - Return the bitwidth of this constant.
Definition: Constants.h:142
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1515
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:47
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
static bool matchesOr(Value *A, Value *&B, ConstantInt *&C)
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:779
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1118
Type * getSourceElementType() const
Definition: Instructions.h:972
This file implements a class to represent arbitrary precision integral constant values and operations...
static bool hasOnlyOneNonZeroIndex(GetElementPtrInst *GEP)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition: PatternMatch.h:716
void initializeStraightLineStrengthReducePass(PassRegistry &)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1958
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
Value * CreateSExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a SExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:1903
static bool isGEPFoldable(GetElementPtrInst *GEP, const TargetTransformInfo *TTI)
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:81
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:137
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
INITIALIZE_PASS_BEGIN(StraightLineStrengthReduce, "slsr", "Straight line strength reduction", false, false) INITIALIZE_PASS_END(StraightLineStrengthReduce
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1135
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:291
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getAddressSpace() const
Returns the address space of this instruction&#39;s pointer type.
Definition: Instructions.h:984
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:875
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:772
static bool runOnFunction(Function &F, bool PostInlining)
bool isAllOnesValue() const
Determine if all bits are set.
Definition: APInt.h:395
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Wrapper pass for TargetTransformInfo.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
Definition: PatternMatch.h:837
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
Straight line strength reduction
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static const unsigned UnknownAddressSpace
Expected to fold away in lowering.
Represent the analysis usage information of a pass.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
Definition: PatternMatch.h:849
Value * CreateNeg(Value *V, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1485
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
Class to represent integer types.
Definition: DerivedTypes.h:40
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:440
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:219
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1152
CastClass_match< OpTy, Instruction::SExt > m_SExt(const OpTy &Op)
Matches SExt.
Value * CreateGEP(Value *Ptr, ArrayRef< Value *> IdxList, const Twine &Name="")
Definition: IRBuilder.h:1677
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:239
unsigned getNumOperands() const
Definition: User.h:191
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
Type * getType() const
Return the LLVM type of this SCEV expression.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
Module.h This file contains the declarations for the Module class.
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:640
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:301
unsigned logBase2() const
Definition: APInt.h:1754
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a &#39;Neg&#39; as &#39;sub 0, V&#39;.
Class for arbitrary precision integers.
Definition: APInt.h:69
bool isPowerOf2() const
Check if this APInt&#39;s value is a power of two greater than zero.
Definition: APInt.h:463
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:373
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1207
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:470
This class represents an analyzed expression in the program.
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
#define I(x, y, z)
Definition: MD5.cpp:58
OverflowingBinaryOp_match< LHS, RHS, Instruction::Shl, OverflowingBinaryOperator::NoSignedWrap > m_NSWShl(const LHS &L, const RHS &R)
Definition: PatternMatch.h:918
bool haveNoCommonBitsSet(const Value *LHS, const Value *RHS, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if LHS and RHS have no common bits set.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:192
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
iterator_range< df_iterator< T > > depth_first(const T &G)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:73
OverflowingBinaryOp_match< LHS, RHS, Instruction::Mul, OverflowingBinaryOperator::NoSignedWrap > m_NSWMul(const LHS &L, const RHS &R)
Definition: PatternMatch.h:910
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:259
This pass exposes codegen information to IR-level passes.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:156
static bool isAddFoldable(const SCEV *Base, ConstantInt *Index, Value *Stride, TargetTransformInfo *TTI)
gep_type_iterator gep_type_begin(const User *GEP)