LLVM 19.0.0git
SeparateConstOffsetFromGEP.cpp
Go to the documentation of this file.
1//===- SeparateConstOffsetFromGEP.cpp -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Loop unrolling may create many similar GEPs for array accesses.
10// e.g., a 2-level loop
11//
12// float a[32][32]; // global variable
13//
14// for (int i = 0; i < 2; ++i) {
15// for (int j = 0; j < 2; ++j) {
16// ...
17// ... = a[x + i][y + j];
18// ...
19// }
20// }
21//
22// will probably be unrolled to:
23//
24// gep %a, 0, %x, %y; load
25// gep %a, 0, %x, %y + 1; load
26// gep %a, 0, %x + 1, %y; load
27// gep %a, 0, %x + 1, %y + 1; load
28//
29// LLVM's GVN does not use partial redundancy elimination yet, and is thus
30// unable to reuse (gep %a, 0, %x, %y). As a result, this misoptimization incurs
31// significant slowdown in targets with limited addressing modes. For instance,
32// because the PTX target does not support the reg+reg addressing mode, the
33// NVPTX backend emits PTX code that literally computes the pointer address of
34// each GEP, wasting tons of registers. It emits the following PTX for the
35// first load and similar PTX for other loads.
36//
37// mov.u32 %r1, %x;
38// mov.u32 %r2, %y;
39// mul.wide.u32 %rl2, %r1, 128;
40// mov.u64 %rl3, a;
41// add.s64 %rl4, %rl3, %rl2;
42// mul.wide.u32 %rl5, %r2, 4;
43// add.s64 %rl6, %rl4, %rl5;
44// ld.global.f32 %f1, [%rl6];
45//
46// To reduce the register pressure, the optimization implemented in this file
47// merges the common part of a group of GEPs, so we can compute each pointer
48// address by adding a simple offset to the common part, saving many registers.
49//
50// It works by splitting each GEP into a variadic base and a constant offset.
51// The variadic base can be computed once and reused by multiple GEPs, and the
52// constant offsets can be nicely folded into the reg+immediate addressing mode
53// (supported by most targets) without using any extra register.
54//
55// For instance, we transform the four GEPs and four loads in the above example
56// into:
57//
58// base = gep a, 0, x, y
59// load base
60// laod base + 1 * sizeof(float)
61// load base + 32 * sizeof(float)
62// load base + 33 * sizeof(float)
63//
64// Given the transformed IR, a backend that supports the reg+immediate
65// addressing mode can easily fold the pointer arithmetics into the loads. For
66// example, the NVPTX backend can easily fold the pointer arithmetics into the
67// ld.global.f32 instructions, and the resultant PTX uses much fewer registers.
68//
69// mov.u32 %r1, %tid.x;
70// mov.u32 %r2, %tid.y;
71// mul.wide.u32 %rl2, %r1, 128;
72// mov.u64 %rl3, a;
73// add.s64 %rl4, %rl3, %rl2;
74// mul.wide.u32 %rl5, %r2, 4;
75// add.s64 %rl6, %rl4, %rl5;
76// ld.global.f32 %f1, [%rl6]; // so far the same as unoptimized PTX
77// ld.global.f32 %f2, [%rl6+4]; // much better
78// ld.global.f32 %f3, [%rl6+128]; // much better
79// ld.global.f32 %f4, [%rl6+132]; // much better
80//
81// Another improvement enabled by the LowerGEP flag is to lower a GEP with
82// multiple indices to either multiple GEPs with a single index or arithmetic
83// operations (depending on whether the target uses alias analysis in codegen).
84// Such transformation can have following benefits:
85// (1) It can always extract constants in the indices of structure type.
86// (2) After such Lowering, there are more optimization opportunities such as
87// CSE, LICM and CGP.
88//
89// E.g. The following GEPs have multiple indices:
90// BB1:
91// %p = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 3
92// load %p
93// ...
94// BB2:
95// %p2 = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 2
96// load %p2
97// ...
98//
99// We can not do CSE to the common part related to index "i64 %i". Lowering
100// GEPs can achieve such goals.
101// If the target does not use alias analysis in codegen, this pass will
102// lower a GEP with multiple indices into arithmetic operations:
103// BB1:
104// %1 = ptrtoint [10 x %struct]* %ptr to i64 ; CSE opportunity
105// %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity
106// %3 = add i64 %1, %2 ; CSE opportunity
107// %4 = mul i64 %j1, length_of_struct
108// %5 = add i64 %3, %4
109// %6 = add i64 %3, struct_field_3 ; Constant offset
110// %p = inttoptr i64 %6 to i32*
111// load %p
112// ...
113// BB2:
114// %7 = ptrtoint [10 x %struct]* %ptr to i64 ; CSE opportunity
115// %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity
116// %9 = add i64 %7, %8 ; CSE opportunity
117// %10 = mul i64 %j2, length_of_struct
118// %11 = add i64 %9, %10
119// %12 = add i64 %11, struct_field_2 ; Constant offset
120// %p = inttoptr i64 %12 to i32*
121// load %p2
122// ...
123//
124// If the target uses alias analysis in codegen, this pass will lower a GEP
125// with multiple indices into multiple GEPs with a single index:
126// BB1:
127// %1 = bitcast [10 x %struct]* %ptr to i8* ; CSE opportunity
128// %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity
129// %3 = getelementptr i8* %1, i64 %2 ; CSE opportunity
130// %4 = mul i64 %j1, length_of_struct
131// %5 = getelementptr i8* %3, i64 %4
132// %6 = getelementptr i8* %5, struct_field_3 ; Constant offset
133// %p = bitcast i8* %6 to i32*
134// load %p
135// ...
136// BB2:
137// %7 = bitcast [10 x %struct]* %ptr to i8* ; CSE opportunity
138// %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity
139// %9 = getelementptr i8* %7, i64 %8 ; CSE opportunity
140// %10 = mul i64 %j2, length_of_struct
141// %11 = getelementptr i8* %9, i64 %10
142// %12 = getelementptr i8* %11, struct_field_2 ; Constant offset
143// %p2 = bitcast i8* %12 to i32*
144// load %p2
145// ...
146//
147// Lowering GEPs can also benefit other passes such as LICM and CGP.
148// LICM (Loop Invariant Code Motion) can not hoist/sink a GEP of multiple
149// indices if one of the index is variant. If we lower such GEP into invariant
150// parts and variant parts, LICM can hoist/sink those invariant parts.
151// CGP (CodeGen Prepare) tries to sink address calculations that match the
152// target's addressing modes. A GEP with multiple indices may not match and will
153// not be sunk. If we lower such GEP into smaller parts, CGP may sink some of
154// them. So we end up with a better addressing mode.
155//
156//===----------------------------------------------------------------------===//
157
159#include "llvm/ADT/APInt.h"
160#include "llvm/ADT/DenseMap.h"
162#include "llvm/ADT/SmallVector.h"
168#include "llvm/IR/BasicBlock.h"
169#include "llvm/IR/Constant.h"
170#include "llvm/IR/Constants.h"
171#include "llvm/IR/DataLayout.h"
172#include "llvm/IR/DerivedTypes.h"
173#include "llvm/IR/Dominators.h"
174#include "llvm/IR/Function.h"
176#include "llvm/IR/IRBuilder.h"
177#include "llvm/IR/InstrTypes.h"
178#include "llvm/IR/Instruction.h"
179#include "llvm/IR/Instructions.h"
180#include "llvm/IR/Module.h"
181#include "llvm/IR/PassManager.h"
182#include "llvm/IR/PatternMatch.h"
183#include "llvm/IR/Type.h"
184#include "llvm/IR/User.h"
185#include "llvm/IR/Value.h"
187#include "llvm/Pass.h"
188#include "llvm/Support/Casting.h"
194#include <cassert>
195#include <cstdint>
196#include <string>
197
198using namespace llvm;
199using namespace llvm::PatternMatch;
200
202 "disable-separate-const-offset-from-gep", cl::init(false),
203 cl::desc("Do not separate the constant offset from a GEP instruction"),
204 cl::Hidden);
205
206// Setting this flag may emit false positives when the input module already
207// contains dead instructions. Therefore, we set it only in unit tests that are
208// free of dead code.
209static cl::opt<bool>
210 VerifyNoDeadCode("reassociate-geps-verify-no-dead-code", cl::init(false),
211 cl::desc("Verify this pass produces no dead code"),
212 cl::Hidden);
213
214namespace {
215
216/// A helper class for separating a constant offset from a GEP index.
217///
218/// In real programs, a GEP index may be more complicated than a simple addition
219/// of something and a constant integer which can be trivially splitted. For
220/// example, to split ((a << 3) | 5) + b, we need to search deeper for the
221/// constant offset, so that we can separate the index to (a << 3) + b and 5.
222///
223/// Therefore, this class looks into the expression that computes a given GEP
224/// index, and tries to find a constant integer that can be hoisted to the
225/// outermost level of the expression as an addition. Not every constant in an
226/// expression can jump out. e.g., we cannot transform (b * (a + 5)) to (b * a +
227/// 5); nor can we transform (3 * (a + 5)) to (3 * a + 5), however in this case,
228/// -instcombine probably already optimized (3 * (a + 5)) to (3 * a + 15).
229class ConstantOffsetExtractor {
230public:
231 /// Extracts a constant offset from the given GEP index. It returns the
232 /// new index representing the remainder (equal to the original index minus
233 /// the constant offset), or nullptr if we cannot extract a constant offset.
234 /// \p Idx The given GEP index
235 /// \p GEP The given GEP
236 /// \p UserChainTail Outputs the tail of UserChain so that we can
237 /// garbage-collect unused instructions in UserChain.
238 static Value *Extract(Value *Idx, GetElementPtrInst *GEP,
239 User *&UserChainTail);
240
241 /// Looks for a constant offset from the given GEP index without extracting
242 /// it. It returns the numeric value of the extracted constant offset (0 if
243 /// failed). The meaning of the arguments are the same as Extract.
244 static int64_t Find(Value *Idx, GetElementPtrInst *GEP);
245
246private:
247 ConstantOffsetExtractor(BasicBlock::iterator InsertionPt)
248 : IP(InsertionPt), DL(InsertionPt->getModule()->getDataLayout()) {}
249
250 /// Searches the expression that computes V for a non-zero constant C s.t.
251 /// V can be reassociated into the form V' + C. If the searching is
252 /// successful, returns C and update UserChain as a def-use chain from C to V;
253 /// otherwise, UserChain is empty.
254 ///
255 /// \p V The given expression
256 /// \p SignExtended Whether V will be sign-extended in the computation of the
257 /// GEP index
258 /// \p ZeroExtended Whether V will be zero-extended in the computation of the
259 /// GEP index
260 /// \p NonNegative Whether V is guaranteed to be non-negative. For example,
261 /// an index of an inbounds GEP is guaranteed to be
262 /// non-negative. Levaraging this, we can better split
263 /// inbounds GEPs.
264 APInt find(Value *V, bool SignExtended, bool ZeroExtended, bool NonNegative);
265
266 /// A helper function to look into both operands of a binary operator.
267 APInt findInEitherOperand(BinaryOperator *BO, bool SignExtended,
268 bool ZeroExtended);
269
270 /// After finding the constant offset C from the GEP index I, we build a new
271 /// index I' s.t. I' + C = I. This function builds and returns the new
272 /// index I' according to UserChain produced by function "find".
273 ///
274 /// The building conceptually takes two steps:
275 /// 1) iteratively distribute s/zext towards the leaves of the expression tree
276 /// that computes I
277 /// 2) reassociate the expression tree to the form I' + C.
278 ///
279 /// For example, to extract the 5 from sext(a + (b + 5)), we first distribute
280 /// sext to a, b and 5 so that we have
281 /// sext(a) + (sext(b) + 5).
282 /// Then, we reassociate it to
283 /// (sext(a) + sext(b)) + 5.
284 /// Given this form, we know I' is sext(a) + sext(b).
285 Value *rebuildWithoutConstOffset();
286
287 /// After the first step of rebuilding the GEP index without the constant
288 /// offset, distribute s/zext to the operands of all operators in UserChain.
289 /// e.g., zext(sext(a + (b + 5)) (assuming no overflow) =>
290 /// zext(sext(a)) + (zext(sext(b)) + zext(sext(5))).
291 ///
292 /// The function also updates UserChain to point to new subexpressions after
293 /// distributing s/zext. e.g., the old UserChain of the above example is
294 /// 5 -> b + 5 -> a + (b + 5) -> sext(...) -> zext(sext(...)),
295 /// and the new UserChain is
296 /// zext(sext(5)) -> zext(sext(b)) + zext(sext(5)) ->
297 /// zext(sext(a)) + (zext(sext(b)) + zext(sext(5))
298 ///
299 /// \p ChainIndex The index to UserChain. ChainIndex is initially
300 /// UserChain.size() - 1, and is decremented during
301 /// the recursion.
302 Value *distributeExtsAndCloneChain(unsigned ChainIndex);
303
304 /// Reassociates the GEP index to the form I' + C and returns I'.
305 Value *removeConstOffset(unsigned ChainIndex);
306
307 /// A helper function to apply ExtInsts, a list of s/zext, to value V.
308 /// e.g., if ExtInsts = [sext i32 to i64, zext i16 to i32], this function
309 /// returns "sext i32 (zext i16 V to i32) to i64".
310 Value *applyExts(Value *V);
311
312 /// A helper function that returns whether we can trace into the operands
313 /// of binary operator BO for a constant offset.
314 ///
315 /// \p SignExtended Whether BO is surrounded by sext
316 /// \p ZeroExtended Whether BO is surrounded by zext
317 /// \p NonNegative Whether BO is known to be non-negative, e.g., an in-bound
318 /// array index.
319 bool CanTraceInto(bool SignExtended, bool ZeroExtended, BinaryOperator *BO,
320 bool NonNegative);
321
322 /// The path from the constant offset to the old GEP index. e.g., if the GEP
323 /// index is "a * b + (c + 5)". After running function find, UserChain[0] will
324 /// be the constant 5, UserChain[1] will be the subexpression "c + 5", and
325 /// UserChain[2] will be the entire expression "a * b + (c + 5)".
326 ///
327 /// This path helps to rebuild the new GEP index.
328 SmallVector<User *, 8> UserChain;
329
330 /// A data structure used in rebuildWithoutConstOffset. Contains all
331 /// sext/zext instructions along UserChain.
333
334 /// Insertion position of cloned instructions.
336
337 const DataLayout &DL;
338};
339
340/// A pass that tries to split every GEP in the function into a variadic
341/// base and a constant offset. It is a FunctionPass because searching for the
342/// constant offset may inspect other basic blocks.
343class SeparateConstOffsetFromGEPLegacyPass : public FunctionPass {
344public:
345 static char ID;
346
347 SeparateConstOffsetFromGEPLegacyPass(bool LowerGEP = false)
348 : FunctionPass(ID), LowerGEP(LowerGEP) {
351 }
352
353 void getAnalysisUsage(AnalysisUsage &AU) const override {
357 AU.setPreservesCFG();
359 }
360
361 bool runOnFunction(Function &F) override;
362
363private:
364 bool LowerGEP;
365};
366
367/// A pass that tries to split every GEP in the function into a variadic
368/// base and a constant offset. It is a FunctionPass because searching for the
369/// constant offset may inspect other basic blocks.
370class SeparateConstOffsetFromGEP {
371public:
372 SeparateConstOffsetFromGEP(
374 function_ref<TargetTransformInfo &(Function &)> GetTTI, bool LowerGEP)
375 : DT(DT), LI(LI), TLI(TLI), GetTTI(GetTTI), LowerGEP(LowerGEP) {}
376
377 bool run(Function &F);
378
379private:
380 /// Track the operands of an add or sub.
381 using ExprKey = std::pair<Value *, Value *>;
382
383 /// Create a pair for use as a map key for a commutable operation.
384 static ExprKey createNormalizedCommutablePair(Value *A, Value *B) {
385 if (A < B)
386 return {A, B};
387 return {B, A};
388 }
389
390 /// Tries to split the given GEP into a variadic base and a constant offset,
391 /// and returns true if the splitting succeeds.
392 bool splitGEP(GetElementPtrInst *GEP);
393
394 /// Tries to reorder the given GEP with the GEP that produces the base if
395 /// doing so results in producing a constant offset as the outermost
396 /// index.
397 bool reorderGEP(GetElementPtrInst *GEP, TargetTransformInfo &TTI);
398
399 /// Lower a GEP with multiple indices into multiple GEPs with a single index.
400 /// Function splitGEP already split the original GEP into a variadic part and
401 /// a constant offset (i.e., AccumulativeByteOffset). This function lowers the
402 /// variadic part into a set of GEPs with a single index and applies
403 /// AccumulativeByteOffset to it.
404 /// \p Variadic The variadic part of the original GEP.
405 /// \p AccumulativeByteOffset The constant offset.
406 void lowerToSingleIndexGEPs(GetElementPtrInst *Variadic,
407 int64_t AccumulativeByteOffset);
408
409 /// Lower a GEP with multiple indices into ptrtoint+arithmetics+inttoptr form.
410 /// Function splitGEP already split the original GEP into a variadic part and
411 /// a constant offset (i.e., AccumulativeByteOffset). This function lowers the
412 /// variadic part into a set of arithmetic operations and applies
413 /// AccumulativeByteOffset to it.
414 /// \p Variadic The variadic part of the original GEP.
415 /// \p AccumulativeByteOffset The constant offset.
416 void lowerToArithmetics(GetElementPtrInst *Variadic,
417 int64_t AccumulativeByteOffset);
418
419 /// Finds the constant offset within each index and accumulates them. If
420 /// LowerGEP is true, it finds in indices of both sequential and structure
421 /// types, otherwise it only finds in sequential indices. The output
422 /// NeedsExtraction indicates whether we successfully find a non-zero constant
423 /// offset.
424 int64_t accumulateByteOffset(GetElementPtrInst *GEP, bool &NeedsExtraction);
425
426 /// Canonicalize array indices to pointer-size integers. This helps to
427 /// simplify the logic of splitting a GEP. For example, if a + b is a
428 /// pointer-size integer, we have
429 /// gep base, a + b = gep (gep base, a), b
430 /// However, this equality may not hold if the size of a + b is smaller than
431 /// the pointer size, because LLVM conceptually sign-extends GEP indices to
432 /// pointer size before computing the address
433 /// (http://llvm.org/docs/LangRef.html#id181).
434 ///
435 /// This canonicalization is very likely already done in clang and
436 /// instcombine. Therefore, the program will probably remain the same.
437 ///
438 /// Returns true if the module changes.
439 ///
440 /// Verified in @i32_add in split-gep.ll
441 bool canonicalizeArrayIndicesToIndexSize(GetElementPtrInst *GEP);
442
443 /// Optimize sext(a)+sext(b) to sext(a+b) when a+b can't sign overflow.
444 /// SeparateConstOffsetFromGEP distributes a sext to leaves before extracting
445 /// the constant offset. After extraction, it becomes desirable to reunion the
446 /// distributed sexts. For example,
447 ///
448 /// &a[sext(i +nsw (j +nsw 5)]
449 /// => distribute &a[sext(i) +nsw (sext(j) +nsw 5)]
450 /// => constant extraction &a[sext(i) + sext(j)] + 5
451 /// => reunion &a[sext(i +nsw j)] + 5
452 bool reuniteExts(Function &F);
453
454 /// A helper that reunites sexts in an instruction.
455 bool reuniteExts(Instruction *I);
456
457 /// Find the closest dominator of <Dominatee> that is equivalent to <Key>.
458 Instruction *findClosestMatchingDominator(
459 ExprKey Key, Instruction *Dominatee,
460 DenseMap<ExprKey, SmallVector<Instruction *, 2>> &DominatingExprs);
461
462 /// Verify F is free of dead code.
463 void verifyNoDeadCode(Function &F);
464
465 bool hasMoreThanOneUseInLoop(Value *v, Loop *L);
466
467 // Swap the index operand of two GEP.
468 void swapGEPOperand(GetElementPtrInst *First, GetElementPtrInst *Second);
469
470 // Check if it is safe to swap operand of two GEP.
471 bool isLegalToSwapOperand(GetElementPtrInst *First, GetElementPtrInst *Second,
472 Loop *CurLoop);
473
474 const DataLayout *DL = nullptr;
475 DominatorTree *DT = nullptr;
476 LoopInfo *LI;
478 // Retrieved lazily since not always used.
480
481 /// Whether to lower a GEP with multiple indices into arithmetic operations or
482 /// multiple GEPs with a single index.
483 bool LowerGEP;
484
487};
488
489} // end anonymous namespace
490
491char SeparateConstOffsetFromGEPLegacyPass::ID = 0;
492
494 SeparateConstOffsetFromGEPLegacyPass, "separate-const-offset-from-gep",
495 "Split GEPs to a variadic base and a constant offset for better CSE", false,
496 false)
503 SeparateConstOffsetFromGEPLegacyPass, "separate-const-offset-from-gep",
504 "Split GEPs to a variadic base and a constant offset for better CSE", false,
505 false)
506
508 return new SeparateConstOffsetFromGEPLegacyPass(LowerGEP);
509}
510
511bool ConstantOffsetExtractor::CanTraceInto(bool SignExtended,
512 bool ZeroExtended,
513 BinaryOperator *BO,
514 bool NonNegative) {
515 // We only consider ADD, SUB and OR, because a non-zero constant found in
516 // expressions composed of these operations can be easily hoisted as a
517 // constant offset by reassociation.
518 if (BO->getOpcode() != Instruction::Add &&
519 BO->getOpcode() != Instruction::Sub &&
520 BO->getOpcode() != Instruction::Or) {
521 return false;
522 }
523
524 Value *LHS = BO->getOperand(0), *RHS = BO->getOperand(1);
525 // Do not trace into "or" unless it is equivalent to "add".
526 // This is the case if the or's disjoint flag is set.
527 if (BO->getOpcode() == Instruction::Or &&
528 !cast<PossiblyDisjointInst>(BO)->isDisjoint())
529 return false;
530
531 // FIXME: We don't currently support constants from the RHS of subs,
532 // when we are zero-extended, because we need a way to zero-extended
533 // them before they are negated.
534 if (ZeroExtended && !SignExtended && BO->getOpcode() == Instruction::Sub)
535 return false;
536
537 // In addition, tracing into BO requires that its surrounding s/zext (if
538 // any) is distributable to both operands.
539 //
540 // Suppose BO = A op B.
541 // SignExtended | ZeroExtended | Distributable?
542 // --------------+--------------+----------------------------------
543 // 0 | 0 | true because no s/zext exists
544 // 0 | 1 | zext(BO) == zext(A) op zext(B)
545 // 1 | 0 | sext(BO) == sext(A) op sext(B)
546 // 1 | 1 | zext(sext(BO)) ==
547 // | | zext(sext(A)) op zext(sext(B))
548 if (BO->getOpcode() == Instruction::Add && !ZeroExtended && NonNegative) {
549 // If a + b >= 0 and (a >= 0 or b >= 0), then
550 // sext(a + b) = sext(a) + sext(b)
551 // even if the addition is not marked nsw.
552 //
553 // Leveraging this invariant, we can trace into an sext'ed inbound GEP
554 // index if the constant offset is non-negative.
555 //
556 // Verified in @sext_add in split-gep.ll.
557 if (ConstantInt *ConstLHS = dyn_cast<ConstantInt>(LHS)) {
558 if (!ConstLHS->isNegative())
559 return true;
560 }
561 if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(RHS)) {
562 if (!ConstRHS->isNegative())
563 return true;
564 }
565 }
566
567 // sext (add/sub nsw A, B) == add/sub nsw (sext A), (sext B)
568 // zext (add/sub nuw A, B) == add/sub nuw (zext A), (zext B)
569 if (BO->getOpcode() == Instruction::Add ||
570 BO->getOpcode() == Instruction::Sub) {
571 if (SignExtended && !BO->hasNoSignedWrap())
572 return false;
573 if (ZeroExtended && !BO->hasNoUnsignedWrap())
574 return false;
575 }
576
577 return true;
578}
579
580APInt ConstantOffsetExtractor::findInEitherOperand(BinaryOperator *BO,
581 bool SignExtended,
582 bool ZeroExtended) {
583 // Save off the current height of the chain, in case we need to restore it.
584 size_t ChainLength = UserChain.size();
585
586 // BO being non-negative does not shed light on whether its operands are
587 // non-negative. Clear the NonNegative flag here.
588 APInt ConstantOffset = find(BO->getOperand(0), SignExtended, ZeroExtended,
589 /* NonNegative */ false);
590 // If we found a constant offset in the left operand, stop and return that.
591 // This shortcut might cause us to miss opportunities of combining the
592 // constant offsets in both operands, e.g., (a + 4) + (b + 5) => (a + b) + 9.
593 // However, such cases are probably already handled by -instcombine,
594 // given this pass runs after the standard optimizations.
595 if (ConstantOffset != 0) return ConstantOffset;
596
597 // Reset the chain back to where it was when we started exploring this node,
598 // since visiting the LHS didn't pan out.
599 UserChain.resize(ChainLength);
600
601 ConstantOffset = find(BO->getOperand(1), SignExtended, ZeroExtended,
602 /* NonNegative */ false);
603 // If U is a sub operator, negate the constant offset found in the right
604 // operand.
605 if (BO->getOpcode() == Instruction::Sub)
606 ConstantOffset = -ConstantOffset;
607
608 // If RHS wasn't a suitable candidate either, reset the chain again.
609 if (ConstantOffset == 0)
610 UserChain.resize(ChainLength);
611
612 return ConstantOffset;
613}
614
615APInt ConstantOffsetExtractor::find(Value *V, bool SignExtended,
616 bool ZeroExtended, bool NonNegative) {
617 // TODO(jingyue): We could trace into integer/pointer casts, such as
618 // inttoptr, ptrtoint, bitcast, and addrspacecast. We choose to handle only
619 // integers because it gives good enough results for our benchmarks.
620 unsigned BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
621
622 // We cannot do much with Values that are not a User, such as an Argument.
623 User *U = dyn_cast<User>(V);
624 if (U == nullptr) return APInt(BitWidth, 0);
625
626 APInt ConstantOffset(BitWidth, 0);
627 if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
628 // Hooray, we found it!
629 ConstantOffset = CI->getValue();
630 } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V)) {
631 // Trace into subexpressions for more hoisting opportunities.
632 if (CanTraceInto(SignExtended, ZeroExtended, BO, NonNegative))
633 ConstantOffset = findInEitherOperand(BO, SignExtended, ZeroExtended);
634 } else if (isa<TruncInst>(V)) {
635 ConstantOffset =
636 find(U->getOperand(0), SignExtended, ZeroExtended, NonNegative)
637 .trunc(BitWidth);
638 } else if (isa<SExtInst>(V)) {
639 ConstantOffset = find(U->getOperand(0), /* SignExtended */ true,
640 ZeroExtended, NonNegative).sext(BitWidth);
641 } else if (isa<ZExtInst>(V)) {
642 // As an optimization, we can clear the SignExtended flag because
643 // sext(zext(a)) = zext(a). Verified in @sext_zext in split-gep.ll.
644 //
645 // Clear the NonNegative flag, because zext(a) >= 0 does not imply a >= 0.
646 ConstantOffset =
647 find(U->getOperand(0), /* SignExtended */ false,
648 /* ZeroExtended */ true, /* NonNegative */ false).zext(BitWidth);
649 }
650
651 // If we found a non-zero constant offset, add it to the path for
652 // rebuildWithoutConstOffset. Zero is a valid constant offset, but doesn't
653 // help this optimization.
654 if (ConstantOffset != 0)
655 UserChain.push_back(U);
656 return ConstantOffset;
657}
658
659Value *ConstantOffsetExtractor::applyExts(Value *V) {
660 Value *Current = V;
661 // ExtInsts is built in the use-def order. Therefore, we apply them to V
662 // in the reversed order.
663 for (CastInst *I : llvm::reverse(ExtInsts)) {
664 if (Constant *C = dyn_cast<Constant>(Current)) {
665 // Try to constant fold the cast.
666 Current = ConstantFoldCastOperand(I->getOpcode(), C, I->getType(), DL);
667 if (Current)
668 continue;
669 }
670
671 Instruction *Ext = I->clone();
672 Ext->setOperand(0, Current);
673 Ext->insertBefore(*IP->getParent(), IP);
674 Current = Ext;
675 }
676 return Current;
677}
678
679Value *ConstantOffsetExtractor::rebuildWithoutConstOffset() {
680 distributeExtsAndCloneChain(UserChain.size() - 1);
681 // Remove all nullptrs (used to be s/zext) from UserChain.
682 unsigned NewSize = 0;
683 for (User *I : UserChain) {
684 if (I != nullptr) {
685 UserChain[NewSize] = I;
686 NewSize++;
687 }
688 }
689 UserChain.resize(NewSize);
690 return removeConstOffset(UserChain.size() - 1);
691}
692
693Value *
694ConstantOffsetExtractor::distributeExtsAndCloneChain(unsigned ChainIndex) {
695 User *U = UserChain[ChainIndex];
696 if (ChainIndex == 0) {
697 assert(isa<ConstantInt>(U));
698 // If U is a ConstantInt, applyExts will return a ConstantInt as well.
699 return UserChain[ChainIndex] = cast<ConstantInt>(applyExts(U));
700 }
701
702 if (CastInst *Cast = dyn_cast<CastInst>(U)) {
703 assert(
704 (isa<SExtInst>(Cast) || isa<ZExtInst>(Cast) || isa<TruncInst>(Cast)) &&
705 "Only following instructions can be traced: sext, zext & trunc");
706 ExtInsts.push_back(Cast);
707 UserChain[ChainIndex] = nullptr;
708 return distributeExtsAndCloneChain(ChainIndex - 1);
709 }
710
711 // Function find only trace into BinaryOperator and CastInst.
712 BinaryOperator *BO = cast<BinaryOperator>(U);
713 // OpNo = which operand of BO is UserChain[ChainIndex - 1]
714 unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1);
715 Value *TheOther = applyExts(BO->getOperand(1 - OpNo));
716 Value *NextInChain = distributeExtsAndCloneChain(ChainIndex - 1);
717
718 BinaryOperator *NewBO = nullptr;
719 if (OpNo == 0) {
720 NewBO = BinaryOperator::Create(BO->getOpcode(), NextInChain, TheOther,
721 BO->getName(), IP);
722 } else {
723 NewBO = BinaryOperator::Create(BO->getOpcode(), TheOther, NextInChain,
724 BO->getName(), IP);
725 }
726 return UserChain[ChainIndex] = NewBO;
727}
728
729Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) {
730 if (ChainIndex == 0) {
731 assert(isa<ConstantInt>(UserChain[ChainIndex]));
732 return ConstantInt::getNullValue(UserChain[ChainIndex]->getType());
733 }
734
735 BinaryOperator *BO = cast<BinaryOperator>(UserChain[ChainIndex]);
736 assert((BO->use_empty() || BO->hasOneUse()) &&
737 "distributeExtsAndCloneChain clones each BinaryOperator in "
738 "UserChain, so no one should be used more than "
739 "once");
740
741 unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1);
742 assert(BO->getOperand(OpNo) == UserChain[ChainIndex - 1]);
743 Value *NextInChain = removeConstOffset(ChainIndex - 1);
744 Value *TheOther = BO->getOperand(1 - OpNo);
745
746 // If NextInChain is 0 and not the LHS of a sub, we can simplify the
747 // sub-expression to be just TheOther.
748 if (ConstantInt *CI = dyn_cast<ConstantInt>(NextInChain)) {
749 if (CI->isZero() && !(BO->getOpcode() == Instruction::Sub && OpNo == 0))
750 return TheOther;
751 }
752
754 if (BO->getOpcode() == Instruction::Or) {
755 // Rebuild "or" as "add", because "or" may be invalid for the new
756 // expression.
757 //
758 // For instance, given
759 // a | (b + 5) where a and b + 5 have no common bits,
760 // we can extract 5 as the constant offset.
761 //
762 // However, reusing the "or" in the new index would give us
763 // (a | b) + 5
764 // which does not equal a | (b + 5).
765 //
766 // Replacing the "or" with "add" is fine, because
767 // a | (b + 5) = a + (b + 5) = (a + b) + 5
768 NewOp = Instruction::Add;
769 }
770
771 BinaryOperator *NewBO;
772 if (OpNo == 0) {
773 NewBO = BinaryOperator::Create(NewOp, NextInChain, TheOther, "", IP);
774 } else {
775 NewBO = BinaryOperator::Create(NewOp, TheOther, NextInChain, "", IP);
776 }
777 NewBO->takeName(BO);
778 return NewBO;
779}
780
781Value *ConstantOffsetExtractor::Extract(Value *Idx, GetElementPtrInst *GEP,
782 User *&UserChainTail) {
783 ConstantOffsetExtractor Extractor(GEP->getIterator());
784 // Find a non-zero constant offset first.
785 APInt ConstantOffset =
786 Extractor.find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
787 GEP->isInBounds());
788 if (ConstantOffset == 0) {
789 UserChainTail = nullptr;
790 return nullptr;
791 }
792 // Separates the constant offset from the GEP index.
793 Value *IdxWithoutConstOffset = Extractor.rebuildWithoutConstOffset();
794 UserChainTail = Extractor.UserChain.back();
795 return IdxWithoutConstOffset;
796}
797
798int64_t ConstantOffsetExtractor::Find(Value *Idx, GetElementPtrInst *GEP) {
799 // If Idx is an index of an inbound GEP, Idx is guaranteed to be non-negative.
800 return ConstantOffsetExtractor(GEP->getIterator())
801 .find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
802 GEP->isInBounds())
803 .getSExtValue();
804}
805
806bool SeparateConstOffsetFromGEP::canonicalizeArrayIndicesToIndexSize(
808 bool Changed = false;
809 Type *PtrIdxTy = DL->getIndexType(GEP->getType());
811 for (User::op_iterator I = GEP->op_begin() + 1, E = GEP->op_end();
812 I != E; ++I, ++GTI) {
813 // Skip struct member indices which must be i32.
814 if (GTI.isSequential()) {
815 if ((*I)->getType() != PtrIdxTy) {
816 *I = CastInst::CreateIntegerCast(*I, PtrIdxTy, true, "idxprom",
817 GEP->getIterator());
818 Changed = true;
819 }
820 }
821 }
822 return Changed;
823}
824
825int64_t
826SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP,
827 bool &NeedsExtraction) {
828 NeedsExtraction = false;
829 int64_t AccumulativeByteOffset = 0;
831 for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
832 if (GTI.isSequential()) {
833 // Constant offsets of scalable types are not really constant.
834 if (GTI.getIndexedType()->isScalableTy())
835 continue;
836
837 // Tries to extract a constant offset from this GEP index.
838 int64_t ConstantOffset =
839 ConstantOffsetExtractor::Find(GEP->getOperand(I), GEP);
840 if (ConstantOffset != 0) {
841 NeedsExtraction = true;
842 // A GEP may have multiple indices. We accumulate the extracted
843 // constant offset to a byte offset, and later offset the remainder of
844 // the original GEP with this byte offset.
845 AccumulativeByteOffset +=
846 ConstantOffset * GTI.getSequentialElementStride(*DL);
847 }
848 } else if (LowerGEP) {
849 StructType *StTy = GTI.getStructType();
850 uint64_t Field = cast<ConstantInt>(GEP->getOperand(I))->getZExtValue();
851 // Skip field 0 as the offset is always 0.
852 if (Field != 0) {
853 NeedsExtraction = true;
854 AccumulativeByteOffset +=
855 DL->getStructLayout(StTy)->getElementOffset(Field);
856 }
857 }
858 }
859 return AccumulativeByteOffset;
860}
861
862void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
863 GetElementPtrInst *Variadic, int64_t AccumulativeByteOffset) {
864 IRBuilder<> Builder(Variadic);
865 Type *PtrIndexTy = DL->getIndexType(Variadic->getType());
866
867 Value *ResultPtr = Variadic->getOperand(0);
868 Loop *L = LI->getLoopFor(Variadic->getParent());
869 // Check if the base is not loop invariant or used more than once.
870 bool isSwapCandidate =
871 L && L->isLoopInvariant(ResultPtr) &&
872 !hasMoreThanOneUseInLoop(ResultPtr, L);
873 Value *FirstResult = nullptr;
874
875 gep_type_iterator GTI = gep_type_begin(*Variadic);
876 // Create an ugly GEP for each sequential index. We don't create GEPs for
877 // structure indices, as they are accumulated in the constant offset index.
878 for (unsigned I = 1, E = Variadic->getNumOperands(); I != E; ++I, ++GTI) {
879 if (GTI.isSequential()) {
880 Value *Idx = Variadic->getOperand(I);
881 // Skip zero indices.
882 if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx))
883 if (CI->isZero())
884 continue;
885
886 APInt ElementSize = APInt(PtrIndexTy->getIntegerBitWidth(),
888 // Scale the index by element size.
889 if (ElementSize != 1) {
890 if (ElementSize.isPowerOf2()) {
891 Idx = Builder.CreateShl(
892 Idx, ConstantInt::get(PtrIndexTy, ElementSize.logBase2()));
893 } else {
894 Idx =
895 Builder.CreateMul(Idx, ConstantInt::get(PtrIndexTy, ElementSize));
896 }
897 }
898 // Create an ugly GEP with a single index for each index.
899 ResultPtr = Builder.CreatePtrAdd(ResultPtr, Idx, "uglygep");
900 if (FirstResult == nullptr)
901 FirstResult = ResultPtr;
902 }
903 }
904
905 // Create a GEP with the constant offset index.
906 if (AccumulativeByteOffset != 0) {
907 Value *Offset = ConstantInt::get(PtrIndexTy, AccumulativeByteOffset);
908 ResultPtr = Builder.CreatePtrAdd(ResultPtr, Offset, "uglygep");
909 } else
910 isSwapCandidate = false;
911
912 // If we created a GEP with constant index, and the base is loop invariant,
913 // then we swap the first one with it, so LICM can move constant GEP out
914 // later.
915 auto *FirstGEP = dyn_cast_or_null<GetElementPtrInst>(FirstResult);
916 auto *SecondGEP = dyn_cast<GetElementPtrInst>(ResultPtr);
917 if (isSwapCandidate && isLegalToSwapOperand(FirstGEP, SecondGEP, L))
918 swapGEPOperand(FirstGEP, SecondGEP);
919
920 Variadic->replaceAllUsesWith(ResultPtr);
921 Variadic->eraseFromParent();
922}
923
924void
925SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic,
926 int64_t AccumulativeByteOffset) {
927 IRBuilder<> Builder(Variadic);
928 Type *IntPtrTy = DL->getIntPtrType(Variadic->getType());
929 assert(IntPtrTy == DL->getIndexType(Variadic->getType()) &&
930 "Pointer type must match index type for arithmetic-based lowering of "
931 "split GEPs");
932
933 Value *ResultPtr = Builder.CreatePtrToInt(Variadic->getOperand(0), IntPtrTy);
934 gep_type_iterator GTI = gep_type_begin(*Variadic);
935 // Create ADD/SHL/MUL arithmetic operations for each sequential indices. We
936 // don't create arithmetics for structure indices, as they are accumulated
937 // in the constant offset index.
938 for (unsigned I = 1, E = Variadic->getNumOperands(); I != E; ++I, ++GTI) {
939 if (GTI.isSequential()) {
940 Value *Idx = Variadic->getOperand(I);
941 // Skip zero indices.
942 if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx))
943 if (CI->isZero())
944 continue;
945
946 APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(),
948 // Scale the index by element size.
949 if (ElementSize != 1) {
950 if (ElementSize.isPowerOf2()) {
951 Idx = Builder.CreateShl(
952 Idx, ConstantInt::get(IntPtrTy, ElementSize.logBase2()));
953 } else {
954 Idx = Builder.CreateMul(Idx, ConstantInt::get(IntPtrTy, ElementSize));
955 }
956 }
957 // Create an ADD for each index.
958 ResultPtr = Builder.CreateAdd(ResultPtr, Idx);
959 }
960 }
961
962 // Create an ADD for the constant offset index.
963 if (AccumulativeByteOffset != 0) {
964 ResultPtr = Builder.CreateAdd(
965 ResultPtr, ConstantInt::get(IntPtrTy, AccumulativeByteOffset));
966 }
967
968 ResultPtr = Builder.CreateIntToPtr(ResultPtr, Variadic->getType());
969 Variadic->replaceAllUsesWith(ResultPtr);
970 Variadic->eraseFromParent();
971}
972
973bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP,
975 Type *GEPType = GEP->getResultElementType();
976 // TODO: support reordering for non-trivial GEP chains
977 if (GEPType->isAggregateType() || GEP->getNumIndices() != 1)
978 return false;
979
980 auto PtrGEP = dyn_cast<GetElementPtrInst>(GEP->getPointerOperand());
981 if (!PtrGEP)
982 return false;
983 Type *PtrGEPType = PtrGEP->getResultElementType();
984 // TODO: support reordering for non-trivial GEP chains
985 if (PtrGEPType->isAggregateType() || PtrGEP->getNumIndices() != 1)
986 return false;
987
988 // TODO: support reordering for non-trivial GEP chains
989 if (PtrGEPType != GEPType ||
990 PtrGEP->getSourceElementType() != GEP->getSourceElementType())
991 return false;
992
993 bool NestedNeedsExtraction;
994 int64_t NestedByteOffset =
995 accumulateByteOffset(PtrGEP, NestedNeedsExtraction);
996 if (!NestedNeedsExtraction)
997 return false;
998
999 unsigned AddrSpace = PtrGEP->getPointerAddressSpace();
1000 if (!TTI.isLegalAddressingMode(GEP->getResultElementType(),
1001 /*BaseGV=*/nullptr, NestedByteOffset,
1002 /*HasBaseReg=*/true, /*Scale=*/0, AddrSpace))
1003 return false;
1004
1005 IRBuilder<> Builder(GEP);
1006 Builder.SetCurrentDebugLocation(GEP->getDebugLoc());
1007 bool GEPInBounds = GEP->isInBounds();
1008 bool PtrGEPInBounds = PtrGEP->isInBounds();
1009 bool IsChainInBounds = GEPInBounds && PtrGEPInBounds;
1010 if (IsChainInBounds) {
1011 auto GEPIdx = GEP->indices().begin();
1012 auto KnownGEPIdx = computeKnownBits(GEPIdx->get(), *DL);
1013 IsChainInBounds &= KnownGEPIdx.isNonNegative();
1014 if (IsChainInBounds) {
1015 auto PtrGEPIdx = GEP->indices().begin();
1016 auto KnownPtrGEPIdx = computeKnownBits(PtrGEPIdx->get(), *DL);
1017 IsChainInBounds &= KnownPtrGEPIdx.isNonNegative();
1018 }
1019 }
1020
1021 // For trivial GEP chains, we can swap the indicies.
1022 auto NewSrc = Builder.CreateGEP(PtrGEPType, PtrGEP->getPointerOperand(),
1023 SmallVector<Value *, 4>(GEP->indices()));
1024 cast<GetElementPtrInst>(NewSrc)->setIsInBounds(IsChainInBounds);
1025 auto NewGEP = Builder.CreateGEP(GEPType, NewSrc,
1026 SmallVector<Value *, 4>(PtrGEP->indices()));
1027 cast<GetElementPtrInst>(NewGEP)->setIsInBounds(IsChainInBounds);
1028 GEP->replaceAllUsesWith(NewGEP);
1030 return true;
1031}
1032
1033bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
1034 // Skip vector GEPs.
1035 if (GEP->getType()->isVectorTy())
1036 return false;
1037
1038 // The backend can already nicely handle the case where all indices are
1039 // constant.
1040 if (GEP->hasAllConstantIndices())
1041 return false;
1042
1043 bool Changed = canonicalizeArrayIndicesToIndexSize(GEP);
1044
1045 bool NeedsExtraction;
1046 int64_t AccumulativeByteOffset = accumulateByteOffset(GEP, NeedsExtraction);
1047
1048 TargetTransformInfo &TTI = GetTTI(*GEP->getFunction());
1049
1050 if (!NeedsExtraction) {
1051 Changed |= reorderGEP(GEP, TTI);
1052 return Changed;
1053 }
1054
1055 // If LowerGEP is disabled, before really splitting the GEP, check whether the
1056 // backend supports the addressing mode we are about to produce. If no, this
1057 // splitting probably won't be beneficial.
1058 // If LowerGEP is enabled, even the extracted constant offset can not match
1059 // the addressing mode, we can still do optimizations to other lowered parts
1060 // of variable indices. Therefore, we don't check for addressing modes in that
1061 // case.
1062 if (!LowerGEP) {
1063 unsigned AddrSpace = GEP->getPointerAddressSpace();
1064 if (!TTI.isLegalAddressingMode(GEP->getResultElementType(),
1065 /*BaseGV=*/nullptr, AccumulativeByteOffset,
1066 /*HasBaseReg=*/true, /*Scale=*/0,
1067 AddrSpace)) {
1068 return Changed;
1069 }
1070 }
1071
1072 // Remove the constant offset in each sequential index. The resultant GEP
1073 // computes the variadic base.
1074 // Notice that we don't remove struct field indices here. If LowerGEP is
1075 // disabled, a structure index is not accumulated and we still use the old
1076 // one. If LowerGEP is enabled, a structure index is accumulated in the
1077 // constant offset. LowerToSingleIndexGEPs or lowerToArithmetics will later
1078 // handle the constant offset and won't need a new structure index.
1080 for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
1081 if (GTI.isSequential()) {
1082 // Constant offsets of scalable types are not really constant.
1083 if (GTI.getIndexedType()->isScalableTy())
1084 continue;
1085
1086 // Splits this GEP index into a variadic part and a constant offset, and
1087 // uses the variadic part as the new index.
1088 Value *OldIdx = GEP->getOperand(I);
1089 User *UserChainTail;
1090 Value *NewIdx =
1091 ConstantOffsetExtractor::Extract(OldIdx, GEP, UserChainTail);
1092 if (NewIdx != nullptr) {
1093 // Switches to the index with the constant offset removed.
1094 GEP->setOperand(I, NewIdx);
1095 // After switching to the new index, we can garbage-collect UserChain
1096 // and the old index if they are not used.
1099 }
1100 }
1101 }
1102
1103 // Clear the inbounds attribute because the new index may be off-bound.
1104 // e.g.,
1105 //
1106 // b = add i64 a, 5
1107 // addr = gep inbounds float, float* p, i64 b
1108 //
1109 // is transformed to:
1110 //
1111 // addr2 = gep float, float* p, i64 a ; inbounds removed
1112 // addr = gep inbounds float, float* addr2, i64 5
1113 //
1114 // If a is -4, although the old index b is in bounds, the new index a is
1115 // off-bound. http://llvm.org/docs/LangRef.html#id181 says "if the
1116 // inbounds keyword is not present, the offsets are added to the base
1117 // address with silently-wrapping two's complement arithmetic".
1118 // Therefore, the final code will be a semantically equivalent.
1119 //
1120 // TODO(jingyue): do some range analysis to keep as many inbounds as
1121 // possible. GEPs with inbounds are more friendly to alias analysis.
1122 bool GEPWasInBounds = GEP->isInBounds();
1123 GEP->setIsInBounds(false);
1124
1125 // Lowers a GEP to either GEPs with a single index or arithmetic operations.
1126 if (LowerGEP) {
1127 // As currently BasicAA does not analyze ptrtoint/inttoptr, do not lower to
1128 // arithmetic operations if the target uses alias analysis in codegen.
1129 // Additionally, pointers that aren't integral (and so can't be safely
1130 // converted to integers) or those whose offset size is different from their
1131 // pointer size (which means that doing integer arithmetic on them could
1132 // affect that data) can't be lowered in this way.
1133 unsigned AddrSpace = GEP->getPointerAddressSpace();
1134 bool PointerHasExtraData = DL->getPointerSizeInBits(AddrSpace) !=
1135 DL->getIndexSizeInBits(AddrSpace);
1136 if (TTI.useAA() || DL->isNonIntegralAddressSpace(AddrSpace) ||
1137 PointerHasExtraData)
1138 lowerToSingleIndexGEPs(GEP, AccumulativeByteOffset);
1139 else
1140 lowerToArithmetics(GEP, AccumulativeByteOffset);
1141 return true;
1142 }
1143
1144 // No need to create another GEP if the accumulative byte offset is 0.
1145 if (AccumulativeByteOffset == 0)
1146 return true;
1147
1148 // Offsets the base with the accumulative byte offset.
1149 //
1150 // %gep ; the base
1151 // ... %gep ...
1152 //
1153 // => add the offset
1154 //
1155 // %gep2 ; clone of %gep
1156 // %new.gep = gep i8, %gep2, %offset
1157 // %gep ; will be removed
1158 // ... %gep ...
1159 //
1160 // => replace all uses of %gep with %new.gep and remove %gep
1161 //
1162 // %gep2 ; clone of %gep
1163 // %new.gep = gep i8, %gep2, %offset
1164 // ... %new.gep ...
1165 Instruction *NewGEP = GEP->clone();
1166 NewGEP->insertBefore(GEP);
1167
1168 Type *PtrIdxTy = DL->getIndexType(GEP->getType());
1169 IRBuilder<> Builder(GEP);
1170 NewGEP = cast<Instruction>(Builder.CreatePtrAdd(
1171 NewGEP, ConstantInt::get(PtrIdxTy, AccumulativeByteOffset, true),
1172 GEP->getName(), GEPWasInBounds));
1173 NewGEP->copyMetadata(*GEP);
1174
1175 GEP->replaceAllUsesWith(NewGEP);
1176 GEP->eraseFromParent();
1177
1178 return true;
1179}
1180
1181bool SeparateConstOffsetFromGEPLegacyPass::runOnFunction(Function &F) {
1182 if (skipFunction(F))
1183 return false;
1184 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1185 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1186 auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1187 auto GetTTI = [this](Function &F) -> TargetTransformInfo & {
1188 return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1189 };
1190 SeparateConstOffsetFromGEP Impl(DT, LI, TLI, GetTTI, LowerGEP);
1191 return Impl.run(F);
1192}
1193
1194bool SeparateConstOffsetFromGEP::run(Function &F) {
1196 return false;
1197
1198 DL = &F.getParent()->getDataLayout();
1199 bool Changed = false;
1200 for (BasicBlock &B : F) {
1201 if (!DT->isReachableFromEntry(&B))
1202 continue;
1203
1205 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I))
1206 Changed |= splitGEP(GEP);
1207 // No need to split GEP ConstantExprs because all its indices are constant
1208 // already.
1209 }
1210
1211 Changed |= reuniteExts(F);
1212
1213 if (VerifyNoDeadCode)
1214 verifyNoDeadCode(F);
1215
1216 return Changed;
1217}
1218
1219Instruction *SeparateConstOffsetFromGEP::findClosestMatchingDominator(
1220 ExprKey Key, Instruction *Dominatee,
1221 DenseMap<ExprKey, SmallVector<Instruction *, 2>> &DominatingExprs) {
1222 auto Pos = DominatingExprs.find(Key);
1223 if (Pos == DominatingExprs.end())
1224 return nullptr;
1225
1226 auto &Candidates = Pos->second;
1227 // Because we process the basic blocks in pre-order of the dominator tree, a
1228 // candidate that doesn't dominate the current instruction won't dominate any
1229 // future instruction either. Therefore, we pop it out of the stack. This
1230 // optimization makes the algorithm O(n).
1231 while (!Candidates.empty()) {
1232 Instruction *Candidate = Candidates.back();
1233 if (DT->dominates(Candidate, Dominatee))
1234 return Candidate;
1235 Candidates.pop_back();
1236 }
1237 return nullptr;
1238}
1239
1240bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) {
1241 if (!I->getType()->isIntOrIntVectorTy())
1242 return false;
1243
1244 // Dom: LHS+RHS
1245 // I: sext(LHS)+sext(RHS)
1246 // If Dom can't sign overflow and Dom dominates I, optimize I to sext(Dom).
1247 // TODO: handle zext
1248 Value *LHS = nullptr, *RHS = nullptr;
1249 if (match(I, m_Add(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) {
1250 if (LHS->getType() == RHS->getType()) {
1251 ExprKey Key = createNormalizedCommutablePair(LHS, RHS);
1252 if (auto *Dom = findClosestMatchingDominator(Key, I, DominatingAdds)) {
1253 Instruction *NewSExt =
1254 new SExtInst(Dom, I->getType(), "", I->getIterator());
1255 NewSExt->takeName(I);
1256 I->replaceAllUsesWith(NewSExt);
1258 return true;
1259 }
1260 }
1261 } else if (match(I, m_Sub(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) {
1262 if (LHS->getType() == RHS->getType()) {
1263 if (auto *Dom =
1264 findClosestMatchingDominator({LHS, RHS}, I, DominatingSubs)) {
1265 Instruction *NewSExt =
1266 new SExtInst(Dom, I->getType(), "", I->getIterator());
1267 NewSExt->takeName(I);
1268 I->replaceAllUsesWith(NewSExt);
1270 return true;
1271 }
1272 }
1273 }
1274
1275 // Add I to DominatingExprs if it's an add/sub that can't sign overflow.
1276 if (match(I, m_NSWAdd(m_Value(LHS), m_Value(RHS)))) {
1278 ExprKey Key = createNormalizedCommutablePair(LHS, RHS);
1279 DominatingAdds[Key].push_back(I);
1280 }
1281 } else if (match(I, m_NSWSub(m_Value(LHS), m_Value(RHS)))) {
1283 DominatingSubs[{LHS, RHS}].push_back(I);
1284 }
1285 return false;
1286}
1287
1288bool SeparateConstOffsetFromGEP::reuniteExts(Function &F) {
1289 bool Changed = false;
1290 DominatingAdds.clear();
1291 DominatingSubs.clear();
1292 for (const auto Node : depth_first(DT)) {
1293 BasicBlock *BB = Node->getBlock();
1295 Changed |= reuniteExts(&I);
1296 }
1297 return Changed;
1298}
1299
1300void SeparateConstOffsetFromGEP::verifyNoDeadCode(Function &F) {
1301 for (BasicBlock &B : F) {
1302 for (Instruction &I : B) {
1304 std::string ErrMessage;
1305 raw_string_ostream RSO(ErrMessage);
1306 RSO << "Dead instruction detected!\n" << I << "\n";
1307 llvm_unreachable(RSO.str().c_str());
1308 }
1309 }
1310 }
1311}
1312
1313bool SeparateConstOffsetFromGEP::isLegalToSwapOperand(
1314 GetElementPtrInst *FirstGEP, GetElementPtrInst *SecondGEP, Loop *CurLoop) {
1315 if (!FirstGEP || !FirstGEP->hasOneUse())
1316 return false;
1317
1318 if (!SecondGEP || FirstGEP->getParent() != SecondGEP->getParent())
1319 return false;
1320
1321 if (FirstGEP == SecondGEP)
1322 return false;
1323
1324 unsigned FirstNum = FirstGEP->getNumOperands();
1325 unsigned SecondNum = SecondGEP->getNumOperands();
1326 // Give up if the number of operands are not 2.
1327 if (FirstNum != SecondNum || FirstNum != 2)
1328 return false;
1329
1330 Value *FirstBase = FirstGEP->getOperand(0);
1331 Value *SecondBase = SecondGEP->getOperand(0);
1332 Value *FirstOffset = FirstGEP->getOperand(1);
1333 // Give up if the index of the first GEP is loop invariant.
1334 if (CurLoop->isLoopInvariant(FirstOffset))
1335 return false;
1336
1337 // Give up if base doesn't have same type.
1338 if (FirstBase->getType() != SecondBase->getType())
1339 return false;
1340
1341 Instruction *FirstOffsetDef = dyn_cast<Instruction>(FirstOffset);
1342
1343 // Check if the second operand of first GEP has constant coefficient.
1344 // For an example, for the following code, we won't gain anything by
1345 // hoisting the second GEP out because the second GEP can be folded away.
1346 // %scevgep.sum.ur159 = add i64 %idxprom48.ur, 256
1347 // %67 = shl i64 %scevgep.sum.ur159, 2
1348 // %uglygep160 = getelementptr i8* %65, i64 %67
1349 // %uglygep161 = getelementptr i8* %uglygep160, i64 -1024
1350
1351 // Skip constant shift instruction which may be generated by Splitting GEPs.
1352 if (FirstOffsetDef && FirstOffsetDef->isShift() &&
1353 isa<ConstantInt>(FirstOffsetDef->getOperand(1)))
1354 FirstOffsetDef = dyn_cast<Instruction>(FirstOffsetDef->getOperand(0));
1355
1356 // Give up if FirstOffsetDef is an Add or Sub with constant.
1357 // Because it may not profitable at all due to constant folding.
1358 if (FirstOffsetDef)
1359 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FirstOffsetDef)) {
1360 unsigned opc = BO->getOpcode();
1361 if ((opc == Instruction::Add || opc == Instruction::Sub) &&
1362 (isa<ConstantInt>(BO->getOperand(0)) ||
1363 isa<ConstantInt>(BO->getOperand(1))))
1364 return false;
1365 }
1366 return true;
1367}
1368
1369bool SeparateConstOffsetFromGEP::hasMoreThanOneUseInLoop(Value *V, Loop *L) {
1370 int UsesInLoop = 0;
1371 for (User *U : V->users()) {
1372 if (Instruction *User = dyn_cast<Instruction>(U))
1373 if (L->contains(User))
1374 if (++UsesInLoop > 1)
1375 return true;
1376 }
1377 return false;
1378}
1379
1380void SeparateConstOffsetFromGEP::swapGEPOperand(GetElementPtrInst *First,
1381 GetElementPtrInst *Second) {
1382 Value *Offset1 = First->getOperand(1);
1383 Value *Offset2 = Second->getOperand(1);
1384 First->setOperand(1, Offset2);
1385 Second->setOperand(1, Offset1);
1386
1387 // We changed p+o+c to p+c+o, p+c may not be inbound anymore.
1388 const DataLayout &DAL = First->getModule()->getDataLayout();
1390 cast<PointerType>(First->getType())->getAddressSpace()),
1391 0);
1392 Value *NewBase =
1393 First->stripAndAccumulateInBoundsConstantOffsets(DAL, Offset);
1394 uint64_t ObjectSize;
1395 if (!getObjectSize(NewBase, ObjectSize, DAL, TLI) ||
1396 Offset.ugt(ObjectSize)) {
1397 First->setIsInBounds(false);
1398 Second->setIsInBounds(false);
1399 } else
1400 First->setIsInBounds(true);
1401}
1402
1404 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1406 ->printPipeline(OS, MapClassName2PassName);
1407 OS << '<';
1408 if (LowerGEP)
1409 OS << "lower-gep";
1410 OS << '>';
1411}
1412
1415 auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
1416 auto *LI = &AM.getResult<LoopAnalysis>(F);
1417 auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
1418 auto GetTTI = [&AM](Function &F) -> TargetTransformInfo & {
1419 return AM.getResult<TargetIRAnalysis>(F);
1420 };
1421 SeparateConstOffsetFromGEP Impl(DT, LI, TLI, GetTTI, LowerGEP);
1422 if (!Impl.run(F))
1423 return PreservedAnalyses::all();
1426 return PA;
1427}
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
aarch64 promote const
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements a class to represent arbitrary precision integral constant values and operations...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
@ NonNegative
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
Hexagon Common GEP
static const T * Find(StringRef S, ArrayRef< T > A)
Find KV in array using binary search.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
static cl::opt< bool > DisableSeparateConstOffsetFromGEP("disable-separate-const-offset-from-gep", cl::init(false), cl::desc("Do not separate the constant offset from a GEP instruction"), cl::Hidden)
static cl::opt< bool > VerifyNoDeadCode("reassociate-geps-verify-no-dead-code", cl::init(false), cl::desc("Verify this pass produces no dead code"), cl::Hidden)
separate const offset from gep
separate const offset from Split GEPs to a variadic base and a constant offset for better CSE
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:76
unsigned logBase2() const
Definition: APInt.h:1703
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:348
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:500
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:164
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name, BasicBlock::iterator InsertBefore)
Construct a binary instruction, given the opcode and the two operands.
BinaryOps getOpcode() const
Definition: InstrTypes.h:491
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:70
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:579
static CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name, BasicBlock::iterator InsertBefore)
Create a ZExt, BitCast, or Trunc for int -> int casts.
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
This is an important base class in LLVM.
Definition: Constant.h:41
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:420
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:317
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:973
void setIsInBounds(bool b=true)
Set or clear the inbounds flag on this GEP instruction.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2649
bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
const BasicBlock * getParent() const
Definition: Instruction.h:151
bool isShift() const
Definition: Instruction.h:258
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:566
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:593
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:60
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:144
This class represents a sign extension of integer types.
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Class to represent struct types.
Definition: DerivedTypes.h:216
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
bool isAggregateType() const
Return true if the type is an aggregate type.
Definition: Type.h:295
bool isScalableTy() const
Return true if this is a type whose size is a known multiple of vscale.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
bool use_empty() const
Definition: Value.h:344
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
An efficient, type-erasing, non-owning reference to a callable.
TypeSize getSequentialElementStride(const DataLayout &DL) const
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:660
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Key
PAL metadata keys.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
OverflowingBinaryOp_match< LHS, RHS, Instruction::Sub, OverflowingBinaryOperator::NoSignedWrap > m_NSWSub(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:533
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:665
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:399
bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL, const TargetLibraryInfo *TLI, ObjectSizeOpts Opts={})
Compute the size of the object pointed by Ptr.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
bool programUndefinedIfPoison(const Instruction *Inst)
void initializeSeparateConstOffsetFromGEPLegacyPassPass(PassRegistry &)
Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
FunctionPass * createSeparateConstOffsetFromGEPPass(bool LowerGEP=false)
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
gep_type_iterator gep_type_begin(const User *GEP)
iterator_range< df_iterator< T > > depth_first(const T &G)
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition: PassManager.h:91