LLVM 17.0.0git
SeparateConstOffsetFromGEP.cpp
Go to the documentation of this file.
1//===- SeparateConstOffsetFromGEP.cpp -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Loop unrolling may create many similar GEPs for array accesses.
10// e.g., a 2-level loop
11//
12// float a[32][32]; // global variable
13//
14// for (int i = 0; i < 2; ++i) {
15// for (int j = 0; j < 2; ++j) {
16// ...
17// ... = a[x + i][y + j];
18// ...
19// }
20// }
21//
22// will probably be unrolled to:
23//
24// gep %a, 0, %x, %y; load
25// gep %a, 0, %x, %y + 1; load
26// gep %a, 0, %x + 1, %y; load
27// gep %a, 0, %x + 1, %y + 1; load
28//
29// LLVM's GVN does not use partial redundancy elimination yet, and is thus
30// unable to reuse (gep %a, 0, %x, %y). As a result, this misoptimization incurs
31// significant slowdown in targets with limited addressing modes. For instance,
32// because the PTX target does not support the reg+reg addressing mode, the
33// NVPTX backend emits PTX code that literally computes the pointer address of
34// each GEP, wasting tons of registers. It emits the following PTX for the
35// first load and similar PTX for other loads.
36//
37// mov.u32 %r1, %x;
38// mov.u32 %r2, %y;
39// mul.wide.u32 %rl2, %r1, 128;
40// mov.u64 %rl3, a;
41// add.s64 %rl4, %rl3, %rl2;
42// mul.wide.u32 %rl5, %r2, 4;
43// add.s64 %rl6, %rl4, %rl5;
44// ld.global.f32 %f1, [%rl6];
45//
46// To reduce the register pressure, the optimization implemented in this file
47// merges the common part of a group of GEPs, so we can compute each pointer
48// address by adding a simple offset to the common part, saving many registers.
49//
50// It works by splitting each GEP into a variadic base and a constant offset.
51// The variadic base can be computed once and reused by multiple GEPs, and the
52// constant offsets can be nicely folded into the reg+immediate addressing mode
53// (supported by most targets) without using any extra register.
54//
55// For instance, we transform the four GEPs and four loads in the above example
56// into:
57//
58// base = gep a, 0, x, y
59// load base
60// laod base + 1 * sizeof(float)
61// load base + 32 * sizeof(float)
62// load base + 33 * sizeof(float)
63//
64// Given the transformed IR, a backend that supports the reg+immediate
65// addressing mode can easily fold the pointer arithmetics into the loads. For
66// example, the NVPTX backend can easily fold the pointer arithmetics into the
67// ld.global.f32 instructions, and the resultant PTX uses much fewer registers.
68//
69// mov.u32 %r1, %tid.x;
70// mov.u32 %r2, %tid.y;
71// mul.wide.u32 %rl2, %r1, 128;
72// mov.u64 %rl3, a;
73// add.s64 %rl4, %rl3, %rl2;
74// mul.wide.u32 %rl5, %r2, 4;
75// add.s64 %rl6, %rl4, %rl5;
76// ld.global.f32 %f1, [%rl6]; // so far the same as unoptimized PTX
77// ld.global.f32 %f2, [%rl6+4]; // much better
78// ld.global.f32 %f3, [%rl6+128]; // much better
79// ld.global.f32 %f4, [%rl6+132]; // much better
80//
81// Another improvement enabled by the LowerGEP flag is to lower a GEP with
82// multiple indices to either multiple GEPs with a single index or arithmetic
83// operations (depending on whether the target uses alias analysis in codegen).
84// Such transformation can have following benefits:
85// (1) It can always extract constants in the indices of structure type.
86// (2) After such Lowering, there are more optimization opportunities such as
87// CSE, LICM and CGP.
88//
89// E.g. The following GEPs have multiple indices:
90// BB1:
91// %p = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 3
92// load %p
93// ...
94// BB2:
95// %p2 = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 2
96// load %p2
97// ...
98//
99// We can not do CSE to the common part related to index "i64 %i". Lowering
100// GEPs can achieve such goals.
101// If the target does not use alias analysis in codegen, this pass will
102// lower a GEP with multiple indices into arithmetic operations:
103// BB1:
104// %1 = ptrtoint [10 x %struct]* %ptr to i64 ; CSE opportunity
105// %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity
106// %3 = add i64 %1, %2 ; CSE opportunity
107// %4 = mul i64 %j1, length_of_struct
108// %5 = add i64 %3, %4
109// %6 = add i64 %3, struct_field_3 ; Constant offset
110// %p = inttoptr i64 %6 to i32*
111// load %p
112// ...
113// BB2:
114// %7 = ptrtoint [10 x %struct]* %ptr to i64 ; CSE opportunity
115// %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity
116// %9 = add i64 %7, %8 ; CSE opportunity
117// %10 = mul i64 %j2, length_of_struct
118// %11 = add i64 %9, %10
119// %12 = add i64 %11, struct_field_2 ; Constant offset
120// %p = inttoptr i64 %12 to i32*
121// load %p2
122// ...
123//
124// If the target uses alias analysis in codegen, this pass will lower a GEP
125// with multiple indices into multiple GEPs with a single index:
126// BB1:
127// %1 = bitcast [10 x %struct]* %ptr to i8* ; CSE opportunity
128// %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity
129// %3 = getelementptr i8* %1, i64 %2 ; CSE opportunity
130// %4 = mul i64 %j1, length_of_struct
131// %5 = getelementptr i8* %3, i64 %4
132// %6 = getelementptr i8* %5, struct_field_3 ; Constant offset
133// %p = bitcast i8* %6 to i32*
134// load %p
135// ...
136// BB2:
137// %7 = bitcast [10 x %struct]* %ptr to i8* ; CSE opportunity
138// %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity
139// %9 = getelementptr i8* %7, i64 %8 ; CSE opportunity
140// %10 = mul i64 %j2, length_of_struct
141// %11 = getelementptr i8* %9, i64 %10
142// %12 = getelementptr i8* %11, struct_field_2 ; Constant offset
143// %p2 = bitcast i8* %12 to i32*
144// load %p2
145// ...
146//
147// Lowering GEPs can also benefit other passes such as LICM and CGP.
148// LICM (Loop Invariant Code Motion) can not hoist/sink a GEP of multiple
149// indices if one of the index is variant. If we lower such GEP into invariant
150// parts and variant parts, LICM can hoist/sink those invariant parts.
151// CGP (CodeGen Prepare) tries to sink address calculations that match the
152// target's addressing modes. A GEP with multiple indices may not match and will
153// not be sunk. If we lower such GEP into smaller parts, CGP may sink some of
154// them. So we end up with a better addressing mode.
155//
156//===----------------------------------------------------------------------===//
157
159#include "llvm/ADT/APInt.h"
160#include "llvm/ADT/DenseMap.h"
162#include "llvm/ADT/SmallVector.h"
169#include "llvm/IR/BasicBlock.h"
170#include "llvm/IR/Constant.h"
171#include "llvm/IR/Constants.h"
172#include "llvm/IR/DataLayout.h"
173#include "llvm/IR/DerivedTypes.h"
174#include "llvm/IR/Dominators.h"
175#include "llvm/IR/Function.h"
177#include "llvm/IR/IRBuilder.h"
178#include "llvm/IR/Instruction.h"
179#include "llvm/IR/Instructions.h"
180#include "llvm/IR/Module.h"
181#include "llvm/IR/PassManager.h"
182#include "llvm/IR/PatternMatch.h"
183#include "llvm/IR/Type.h"
184#include "llvm/IR/User.h"
185#include "llvm/IR/Value.h"
187#include "llvm/Pass.h"
188#include "llvm/Support/Casting.h"
194#include <cassert>
195#include <cstdint>
196#include <string>
197
198using namespace llvm;
199using namespace llvm::PatternMatch;
200
202 "disable-separate-const-offset-from-gep", cl::init(false),
203 cl::desc("Do not separate the constant offset from a GEP instruction"),
204 cl::Hidden);
205
206// Setting this flag may emit false positives when the input module already
207// contains dead instructions. Therefore, we set it only in unit tests that are
208// free of dead code.
209static cl::opt<bool>
210 VerifyNoDeadCode("reassociate-geps-verify-no-dead-code", cl::init(false),
211 cl::desc("Verify this pass produces no dead code"),
212 cl::Hidden);
213
214namespace {
215
216/// A helper class for separating a constant offset from a GEP index.
217///
218/// In real programs, a GEP index may be more complicated than a simple addition
219/// of something and a constant integer which can be trivially splitted. For
220/// example, to split ((a << 3) | 5) + b, we need to search deeper for the
221/// constant offset, so that we can separate the index to (a << 3) + b and 5.
222///
223/// Therefore, this class looks into the expression that computes a given GEP
224/// index, and tries to find a constant integer that can be hoisted to the
225/// outermost level of the expression as an addition. Not every constant in an
226/// expression can jump out. e.g., we cannot transform (b * (a + 5)) to (b * a +
227/// 5); nor can we transform (3 * (a + 5)) to (3 * a + 5), however in this case,
228/// -instcombine probably already optimized (3 * (a + 5)) to (3 * a + 15).
229class ConstantOffsetExtractor {
230public:
231 /// Extracts a constant offset from the given GEP index. It returns the
232 /// new index representing the remainder (equal to the original index minus
233 /// the constant offset), or nullptr if we cannot extract a constant offset.
234 /// \p Idx The given GEP index
235 /// \p GEP The given GEP
236 /// \p UserChainTail Outputs the tail of UserChain so that we can
237 /// garbage-collect unused instructions in UserChain.
238 static Value *Extract(Value *Idx, GetElementPtrInst *GEP,
239 User *&UserChainTail, const DominatorTree *DT);
240
241 /// Looks for a constant offset from the given GEP index without extracting
242 /// it. It returns the numeric value of the extracted constant offset (0 if
243 /// failed). The meaning of the arguments are the same as Extract.
244 static int64_t Find(Value *Idx, GetElementPtrInst *GEP,
245 const DominatorTree *DT);
246
247private:
248 ConstantOffsetExtractor(Instruction *InsertionPt, const DominatorTree *DT)
249 : IP(InsertionPt), DL(InsertionPt->getModule()->getDataLayout()), DT(DT) {
250 }
251
252 /// Searches the expression that computes V for a non-zero constant C s.t.
253 /// V can be reassociated into the form V' + C. If the searching is
254 /// successful, returns C and update UserChain as a def-use chain from C to V;
255 /// otherwise, UserChain is empty.
256 ///
257 /// \p V The given expression
258 /// \p SignExtended Whether V will be sign-extended in the computation of the
259 /// GEP index
260 /// \p ZeroExtended Whether V will be zero-extended in the computation of the
261 /// GEP index
262 /// \p NonNegative Whether V is guaranteed to be non-negative. For example,
263 /// an index of an inbounds GEP is guaranteed to be
264 /// non-negative. Levaraging this, we can better split
265 /// inbounds GEPs.
266 APInt find(Value *V, bool SignExtended, bool ZeroExtended, bool NonNegative);
267
268 /// A helper function to look into both operands of a binary operator.
269 APInt findInEitherOperand(BinaryOperator *BO, bool SignExtended,
270 bool ZeroExtended);
271
272 /// After finding the constant offset C from the GEP index I, we build a new
273 /// index I' s.t. I' + C = I. This function builds and returns the new
274 /// index I' according to UserChain produced by function "find".
275 ///
276 /// The building conceptually takes two steps:
277 /// 1) iteratively distribute s/zext towards the leaves of the expression tree
278 /// that computes I
279 /// 2) reassociate the expression tree to the form I' + C.
280 ///
281 /// For example, to extract the 5 from sext(a + (b + 5)), we first distribute
282 /// sext to a, b and 5 so that we have
283 /// sext(a) + (sext(b) + 5).
284 /// Then, we reassociate it to
285 /// (sext(a) + sext(b)) + 5.
286 /// Given this form, we know I' is sext(a) + sext(b).
287 Value *rebuildWithoutConstOffset();
288
289 /// After the first step of rebuilding the GEP index without the constant
290 /// offset, distribute s/zext to the operands of all operators in UserChain.
291 /// e.g., zext(sext(a + (b + 5)) (assuming no overflow) =>
292 /// zext(sext(a)) + (zext(sext(b)) + zext(sext(5))).
293 ///
294 /// The function also updates UserChain to point to new subexpressions after
295 /// distributing s/zext. e.g., the old UserChain of the above example is
296 /// 5 -> b + 5 -> a + (b + 5) -> sext(...) -> zext(sext(...)),
297 /// and the new UserChain is
298 /// zext(sext(5)) -> zext(sext(b)) + zext(sext(5)) ->
299 /// zext(sext(a)) + (zext(sext(b)) + zext(sext(5))
300 ///
301 /// \p ChainIndex The index to UserChain. ChainIndex is initially
302 /// UserChain.size() - 1, and is decremented during
303 /// the recursion.
304 Value *distributeExtsAndCloneChain(unsigned ChainIndex);
305
306 /// Reassociates the GEP index to the form I' + C and returns I'.
307 Value *removeConstOffset(unsigned ChainIndex);
308
309 /// A helper function to apply ExtInsts, a list of s/zext, to value V.
310 /// e.g., if ExtInsts = [sext i32 to i64, zext i16 to i32], this function
311 /// returns "sext i32 (zext i16 V to i32) to i64".
312 Value *applyExts(Value *V);
313
314 /// A helper function that returns whether we can trace into the operands
315 /// of binary operator BO for a constant offset.
316 ///
317 /// \p SignExtended Whether BO is surrounded by sext
318 /// \p ZeroExtended Whether BO is surrounded by zext
319 /// \p NonNegative Whether BO is known to be non-negative, e.g., an in-bound
320 /// array index.
321 bool CanTraceInto(bool SignExtended, bool ZeroExtended, BinaryOperator *BO,
322 bool NonNegative);
323
324 /// The path from the constant offset to the old GEP index. e.g., if the GEP
325 /// index is "a * b + (c + 5)". After running function find, UserChain[0] will
326 /// be the constant 5, UserChain[1] will be the subexpression "c + 5", and
327 /// UserChain[2] will be the entire expression "a * b + (c + 5)".
328 ///
329 /// This path helps to rebuild the new GEP index.
330 SmallVector<User *, 8> UserChain;
331
332 /// A data structure used in rebuildWithoutConstOffset. Contains all
333 /// sext/zext instructions along UserChain.
335
336 /// Insertion position of cloned instructions.
337 Instruction *IP;
338
339 const DataLayout &DL;
340 const DominatorTree *DT;
341};
342
343/// A pass that tries to split every GEP in the function into a variadic
344/// base and a constant offset. It is a FunctionPass because searching for the
345/// constant offset may inspect other basic blocks.
346class SeparateConstOffsetFromGEPLegacyPass : public FunctionPass {
347public:
348 static char ID;
349
350 SeparateConstOffsetFromGEPLegacyPass(bool LowerGEP = false)
351 : FunctionPass(ID), LowerGEP(LowerGEP) {
354 }
355
356 void getAnalysisUsage(AnalysisUsage &AU) const override {
361 AU.setPreservesCFG();
363 }
364
365 bool runOnFunction(Function &F) override;
366
367private:
368 bool LowerGEP;
369};
370
371/// A pass that tries to split every GEP in the function into a variadic
372/// base and a constant offset. It is a FunctionPass because searching for the
373/// constant offset may inspect other basic blocks.
374class SeparateConstOffsetFromGEP {
375public:
376 SeparateConstOffsetFromGEP(
379 function_ref<TargetTransformInfo &(Function &)> GetTTI, bool LowerGEP)
380 : DT(DT), SE(SE), LI(LI), TLI(TLI), GetTTI(GetTTI), LowerGEP(LowerGEP) {}
381
382 bool run(Function &F);
383
384private:
385 /// Tries to split the given GEP into a variadic base and a constant offset,
386 /// and returns true if the splitting succeeds.
387 bool splitGEP(GetElementPtrInst *GEP);
388
389 /// Lower a GEP with multiple indices into multiple GEPs with a single index.
390 /// Function splitGEP already split the original GEP into a variadic part and
391 /// a constant offset (i.e., AccumulativeByteOffset). This function lowers the
392 /// variadic part into a set of GEPs with a single index and applies
393 /// AccumulativeByteOffset to it.
394 /// \p Variadic The variadic part of the original GEP.
395 /// \p AccumulativeByteOffset The constant offset.
396 void lowerToSingleIndexGEPs(GetElementPtrInst *Variadic,
397 int64_t AccumulativeByteOffset);
398
399 /// Lower a GEP with multiple indices into ptrtoint+arithmetics+inttoptr form.
400 /// Function splitGEP already split the original GEP into a variadic part and
401 /// a constant offset (i.e., AccumulativeByteOffset). This function lowers the
402 /// variadic part into a set of arithmetic operations and applies
403 /// AccumulativeByteOffset to it.
404 /// \p Variadic The variadic part of the original GEP.
405 /// \p AccumulativeByteOffset The constant offset.
406 void lowerToArithmetics(GetElementPtrInst *Variadic,
407 int64_t AccumulativeByteOffset);
408
409 /// Finds the constant offset within each index and accumulates them. If
410 /// LowerGEP is true, it finds in indices of both sequential and structure
411 /// types, otherwise it only finds in sequential indices. The output
412 /// NeedsExtraction indicates whether we successfully find a non-zero constant
413 /// offset.
414 int64_t accumulateByteOffset(GetElementPtrInst *GEP, bool &NeedsExtraction);
415
416 /// Canonicalize array indices to pointer-size integers. This helps to
417 /// simplify the logic of splitting a GEP. For example, if a + b is a
418 /// pointer-size integer, we have
419 /// gep base, a + b = gep (gep base, a), b
420 /// However, this equality may not hold if the size of a + b is smaller than
421 /// the pointer size, because LLVM conceptually sign-extends GEP indices to
422 /// pointer size before computing the address
423 /// (http://llvm.org/docs/LangRef.html#id181).
424 ///
425 /// This canonicalization is very likely already done in clang and
426 /// instcombine. Therefore, the program will probably remain the same.
427 ///
428 /// Returns true if the module changes.
429 ///
430 /// Verified in @i32_add in split-gep.ll
431 bool canonicalizeArrayIndicesToIndexSize(GetElementPtrInst *GEP);
432
433 /// Optimize sext(a)+sext(b) to sext(a+b) when a+b can't sign overflow.
434 /// SeparateConstOffsetFromGEP distributes a sext to leaves before extracting
435 /// the constant offset. After extraction, it becomes desirable to reunion the
436 /// distributed sexts. For example,
437 ///
438 /// &a[sext(i +nsw (j +nsw 5)]
439 /// => distribute &a[sext(i) +nsw (sext(j) +nsw 5)]
440 /// => constant extraction &a[sext(i) + sext(j)] + 5
441 /// => reunion &a[sext(i +nsw j)] + 5
442 bool reuniteExts(Function &F);
443
444 /// A helper that reunites sexts in an instruction.
445 bool reuniteExts(Instruction *I);
446
447 /// Find the closest dominator of <Dominatee> that is equivalent to <Key>.
448 Instruction *findClosestMatchingDominator(
449 const SCEV *Key, Instruction *Dominatee,
450 DenseMap<const SCEV *, SmallVector<Instruction *, 2>> &DominatingExprs);
451
452 /// Verify F is free of dead code.
453 void verifyNoDeadCode(Function &F);
454
455 bool hasMoreThanOneUseInLoop(Value *v, Loop *L);
456
457 // Swap the index operand of two GEP.
458 void swapGEPOperand(GetElementPtrInst *First, GetElementPtrInst *Second);
459
460 // Check if it is safe to swap operand of two GEP.
461 bool isLegalToSwapOperand(GetElementPtrInst *First, GetElementPtrInst *Second,
462 Loop *CurLoop);
463
464 const DataLayout *DL = nullptr;
465 DominatorTree *DT = nullptr;
466 ScalarEvolution *SE;
467 LoopInfo *LI;
469 // Retrieved lazily since not always used.
471
472 /// Whether to lower a GEP with multiple indices into arithmetic operations or
473 /// multiple GEPs with a single index.
474 bool LowerGEP;
475
478};
479
480} // end anonymous namespace
481
482char SeparateConstOffsetFromGEPLegacyPass::ID = 0;
483
485 SeparateConstOffsetFromGEPLegacyPass, "separate-const-offset-from-gep",
486 "Split GEPs to a variadic base and a constant offset for better CSE", false,
487 false)
494 SeparateConstOffsetFromGEPLegacyPass, "separate-const-offset-from-gep",
495 "Split GEPs to a variadic base and a constant offset for better CSE", false,
496 false)
497
499 return new SeparateConstOffsetFromGEPLegacyPass(LowerGEP);
500}
501
502bool ConstantOffsetExtractor::CanTraceInto(bool SignExtended,
503 bool ZeroExtended,
504 BinaryOperator *BO,
505 bool NonNegative) {
506 // We only consider ADD, SUB and OR, because a non-zero constant found in
507 // expressions composed of these operations can be easily hoisted as a
508 // constant offset by reassociation.
509 if (BO->getOpcode() != Instruction::Add &&
510 BO->getOpcode() != Instruction::Sub &&
511 BO->getOpcode() != Instruction::Or) {
512 return false;
513 }
514
515 Value *LHS = BO->getOperand(0), *RHS = BO->getOperand(1);
516 // Do not trace into "or" unless it is equivalent to "add". If LHS and RHS
517 // don't have common bits, (LHS | RHS) is equivalent to (LHS + RHS).
518 // FIXME: this does not appear to be covered by any tests
519 // (with x86/aarch64 backends at least)
520 if (BO->getOpcode() == Instruction::Or &&
521 !haveNoCommonBitsSet(LHS, RHS, DL, nullptr, BO, DT))
522 return false;
523
524 // In addition, tracing into BO requires that its surrounding s/zext (if
525 // any) is distributable to both operands.
526 //
527 // Suppose BO = A op B.
528 // SignExtended | ZeroExtended | Distributable?
529 // --------------+--------------+----------------------------------
530 // 0 | 0 | true because no s/zext exists
531 // 0 | 1 | zext(BO) == zext(A) op zext(B)
532 // 1 | 0 | sext(BO) == sext(A) op sext(B)
533 // 1 | 1 | zext(sext(BO)) ==
534 // | | zext(sext(A)) op zext(sext(B))
535 if (BO->getOpcode() == Instruction::Add && !ZeroExtended && NonNegative) {
536 // If a + b >= 0 and (a >= 0 or b >= 0), then
537 // sext(a + b) = sext(a) + sext(b)
538 // even if the addition is not marked nsw.
539 //
540 // Leveraging this invariant, we can trace into an sext'ed inbound GEP
541 // index if the constant offset is non-negative.
542 //
543 // Verified in @sext_add in split-gep.ll.
544 if (ConstantInt *ConstLHS = dyn_cast<ConstantInt>(LHS)) {
545 if (!ConstLHS->isNegative())
546 return true;
547 }
548 if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(RHS)) {
549 if (!ConstRHS->isNegative())
550 return true;
551 }
552 }
553
554 // sext (add/sub nsw A, B) == add/sub nsw (sext A), (sext B)
555 // zext (add/sub nuw A, B) == add/sub nuw (zext A), (zext B)
556 if (BO->getOpcode() == Instruction::Add ||
557 BO->getOpcode() == Instruction::Sub) {
558 if (SignExtended && !BO->hasNoSignedWrap())
559 return false;
560 if (ZeroExtended && !BO->hasNoUnsignedWrap())
561 return false;
562 }
563
564 return true;
565}
566
567APInt ConstantOffsetExtractor::findInEitherOperand(BinaryOperator *BO,
568 bool SignExtended,
569 bool ZeroExtended) {
570 // Save off the current height of the chain, in case we need to restore it.
571 size_t ChainLength = UserChain.size();
572
573 // BO being non-negative does not shed light on whether its operands are
574 // non-negative. Clear the NonNegative flag here.
575 APInt ConstantOffset = find(BO->getOperand(0), SignExtended, ZeroExtended,
576 /* NonNegative */ false);
577 // If we found a constant offset in the left operand, stop and return that.
578 // This shortcut might cause us to miss opportunities of combining the
579 // constant offsets in both operands, e.g., (a + 4) + (b + 5) => (a + b) + 9.
580 // However, such cases are probably already handled by -instcombine,
581 // given this pass runs after the standard optimizations.
582 if (ConstantOffset != 0) return ConstantOffset;
583
584 // Reset the chain back to where it was when we started exploring this node,
585 // since visiting the LHS didn't pan out.
586 UserChain.resize(ChainLength);
587
588 ConstantOffset = find(BO->getOperand(1), SignExtended, ZeroExtended,
589 /* NonNegative */ false);
590 // If U is a sub operator, negate the constant offset found in the right
591 // operand.
592 if (BO->getOpcode() == Instruction::Sub)
593 ConstantOffset = -ConstantOffset;
594
595 // If RHS wasn't a suitable candidate either, reset the chain again.
596 if (ConstantOffset == 0)
597 UserChain.resize(ChainLength);
598
599 return ConstantOffset;
600}
601
602APInt ConstantOffsetExtractor::find(Value *V, bool SignExtended,
603 bool ZeroExtended, bool NonNegative) {
604 // TODO(jingyue): We could trace into integer/pointer casts, such as
605 // inttoptr, ptrtoint, bitcast, and addrspacecast. We choose to handle only
606 // integers because it gives good enough results for our benchmarks.
607 unsigned BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
608
609 // We cannot do much with Values that are not a User, such as an Argument.
610 User *U = dyn_cast<User>(V);
611 if (U == nullptr) return APInt(BitWidth, 0);
612
613 APInt ConstantOffset(BitWidth, 0);
614 if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
615 // Hooray, we found it!
616 ConstantOffset = CI->getValue();
617 } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V)) {
618 // Trace into subexpressions for more hoisting opportunities.
619 if (CanTraceInto(SignExtended, ZeroExtended, BO, NonNegative))
620 ConstantOffset = findInEitherOperand(BO, SignExtended, ZeroExtended);
621 } else if (isa<TruncInst>(V)) {
622 ConstantOffset =
623 find(U->getOperand(0), SignExtended, ZeroExtended, NonNegative)
624 .trunc(BitWidth);
625 } else if (isa<SExtInst>(V)) {
626 ConstantOffset = find(U->getOperand(0), /* SignExtended */ true,
627 ZeroExtended, NonNegative).sext(BitWidth);
628 } else if (isa<ZExtInst>(V)) {
629 // As an optimization, we can clear the SignExtended flag because
630 // sext(zext(a)) = zext(a). Verified in @sext_zext in split-gep.ll.
631 //
632 // Clear the NonNegative flag, because zext(a) >= 0 does not imply a >= 0.
633 ConstantOffset =
634 find(U->getOperand(0), /* SignExtended */ false,
635 /* ZeroExtended */ true, /* NonNegative */ false).zext(BitWidth);
636 }
637
638 // If we found a non-zero constant offset, add it to the path for
639 // rebuildWithoutConstOffset. Zero is a valid constant offset, but doesn't
640 // help this optimization.
641 if (ConstantOffset != 0)
642 UserChain.push_back(U);
643 return ConstantOffset;
644}
645
646Value *ConstantOffsetExtractor::applyExts(Value *V) {
647 Value *Current = V;
648 // ExtInsts is built in the use-def order. Therefore, we apply them to V
649 // in the reversed order.
650 for (CastInst *I : llvm::reverse(ExtInsts)) {
651 if (Constant *C = dyn_cast<Constant>(Current)) {
652 // If Current is a constant, apply s/zext using ConstantExpr::getCast.
653 // ConstantExpr::getCast emits a ConstantInt if C is a ConstantInt.
654 Current = ConstantExpr::getCast(I->getOpcode(), C, I->getType());
655 } else {
656 Instruction *Ext = I->clone();
657 Ext->setOperand(0, Current);
658 Ext->insertBefore(IP);
659 Current = Ext;
660 }
661 }
662 return Current;
663}
664
665Value *ConstantOffsetExtractor::rebuildWithoutConstOffset() {
666 distributeExtsAndCloneChain(UserChain.size() - 1);
667 // Remove all nullptrs (used to be s/zext) from UserChain.
668 unsigned NewSize = 0;
669 for (User *I : UserChain) {
670 if (I != nullptr) {
671 UserChain[NewSize] = I;
672 NewSize++;
673 }
674 }
675 UserChain.resize(NewSize);
676 return removeConstOffset(UserChain.size() - 1);
677}
678
679Value *
680ConstantOffsetExtractor::distributeExtsAndCloneChain(unsigned ChainIndex) {
681 User *U = UserChain[ChainIndex];
682 if (ChainIndex == 0) {
683 assert(isa<ConstantInt>(U));
684 // If U is a ConstantInt, applyExts will return a ConstantInt as well.
685 return UserChain[ChainIndex] = cast<ConstantInt>(applyExts(U));
686 }
687
688 if (CastInst *Cast = dyn_cast<CastInst>(U)) {
689 assert(
690 (isa<SExtInst>(Cast) || isa<ZExtInst>(Cast) || isa<TruncInst>(Cast)) &&
691 "Only following instructions can be traced: sext, zext & trunc");
692 ExtInsts.push_back(Cast);
693 UserChain[ChainIndex] = nullptr;
694 return distributeExtsAndCloneChain(ChainIndex - 1);
695 }
696
697 // Function find only trace into BinaryOperator and CastInst.
698 BinaryOperator *BO = cast<BinaryOperator>(U);
699 // OpNo = which operand of BO is UserChain[ChainIndex - 1]
700 unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1);
701 Value *TheOther = applyExts(BO->getOperand(1 - OpNo));
702 Value *NextInChain = distributeExtsAndCloneChain(ChainIndex - 1);
703
704 BinaryOperator *NewBO = nullptr;
705 if (OpNo == 0) {
706 NewBO = BinaryOperator::Create(BO->getOpcode(), NextInChain, TheOther,
707 BO->getName(), IP);
708 } else {
709 NewBO = BinaryOperator::Create(BO->getOpcode(), TheOther, NextInChain,
710 BO->getName(), IP);
711 }
712 return UserChain[ChainIndex] = NewBO;
713}
714
715Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) {
716 if (ChainIndex == 0) {
717 assert(isa<ConstantInt>(UserChain[ChainIndex]));
718 return ConstantInt::getNullValue(UserChain[ChainIndex]->getType());
719 }
720
721 BinaryOperator *BO = cast<BinaryOperator>(UserChain[ChainIndex]);
722 assert((BO->use_empty() || BO->hasOneUse()) &&
723 "distributeExtsAndCloneChain clones each BinaryOperator in "
724 "UserChain, so no one should be used more than "
725 "once");
726
727 unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1);
728 assert(BO->getOperand(OpNo) == UserChain[ChainIndex - 1]);
729 Value *NextInChain = removeConstOffset(ChainIndex - 1);
730 Value *TheOther = BO->getOperand(1 - OpNo);
731
732 // If NextInChain is 0 and not the LHS of a sub, we can simplify the
733 // sub-expression to be just TheOther.
734 if (ConstantInt *CI = dyn_cast<ConstantInt>(NextInChain)) {
735 if (CI->isZero() && !(BO->getOpcode() == Instruction::Sub && OpNo == 0))
736 return TheOther;
737 }
738
740 if (BO->getOpcode() == Instruction::Or) {
741 // Rebuild "or" as "add", because "or" may be invalid for the new
742 // expression.
743 //
744 // For instance, given
745 // a | (b + 5) where a and b + 5 have no common bits,
746 // we can extract 5 as the constant offset.
747 //
748 // However, reusing the "or" in the new index would give us
749 // (a | b) + 5
750 // which does not equal a | (b + 5).
751 //
752 // Replacing the "or" with "add" is fine, because
753 // a | (b + 5) = a + (b + 5) = (a + b) + 5
754 NewOp = Instruction::Add;
755 }
756
757 BinaryOperator *NewBO;
758 if (OpNo == 0) {
759 NewBO = BinaryOperator::Create(NewOp, NextInChain, TheOther, "", IP);
760 } else {
761 NewBO = BinaryOperator::Create(NewOp, TheOther, NextInChain, "", IP);
762 }
763 NewBO->takeName(BO);
764 return NewBO;
765}
766
767Value *ConstantOffsetExtractor::Extract(Value *Idx, GetElementPtrInst *GEP,
768 User *&UserChainTail,
769 const DominatorTree *DT) {
770 ConstantOffsetExtractor Extractor(GEP, DT);
771 // Find a non-zero constant offset first.
772 APInt ConstantOffset =
773 Extractor.find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
774 GEP->isInBounds());
775 if (ConstantOffset == 0) {
776 UserChainTail = nullptr;
777 return nullptr;
778 }
779 // Separates the constant offset from the GEP index.
780 Value *IdxWithoutConstOffset = Extractor.rebuildWithoutConstOffset();
781 UserChainTail = Extractor.UserChain.back();
782 return IdxWithoutConstOffset;
783}
784
785int64_t ConstantOffsetExtractor::Find(Value *Idx, GetElementPtrInst *GEP,
786 const DominatorTree *DT) {
787 // If Idx is an index of an inbound GEP, Idx is guaranteed to be non-negative.
788 return ConstantOffsetExtractor(GEP, DT)
789 .find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
790 GEP->isInBounds())
791 .getSExtValue();
792}
793
794bool SeparateConstOffsetFromGEP::canonicalizeArrayIndicesToIndexSize(
796 bool Changed = false;
797 Type *PtrIdxTy = DL->getIndexType(GEP->getType());
799 for (User::op_iterator I = GEP->op_begin() + 1, E = GEP->op_end();
800 I != E; ++I, ++GTI) {
801 // Skip struct member indices which must be i32.
802 if (GTI.isSequential()) {
803 if ((*I)->getType() != PtrIdxTy) {
804 *I = CastInst::CreateIntegerCast(*I, PtrIdxTy, true, "idxprom", GEP);
805 Changed = true;
806 }
807 }
808 }
809 return Changed;
810}
811
812int64_t
813SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP,
814 bool &NeedsExtraction) {
815 NeedsExtraction = false;
816 int64_t AccumulativeByteOffset = 0;
818 for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
819 if (GTI.isSequential()) {
820 // Constant offsets of scalable types are not really constant.
821 if (isa<ScalableVectorType>(GTI.getIndexedType()))
822 continue;
823
824 // Tries to extract a constant offset from this GEP index.
825 int64_t ConstantOffset =
826 ConstantOffsetExtractor::Find(GEP->getOperand(I), GEP, DT);
827 if (ConstantOffset != 0) {
828 NeedsExtraction = true;
829 // A GEP may have multiple indices. We accumulate the extracted
830 // constant offset to a byte offset, and later offset the remainder of
831 // the original GEP with this byte offset.
832 AccumulativeByteOffset +=
833 ConstantOffset * DL->getTypeAllocSize(GTI.getIndexedType());
834 }
835 } else if (LowerGEP) {
836 StructType *StTy = GTI.getStructType();
837 uint64_t Field = cast<ConstantInt>(GEP->getOperand(I))->getZExtValue();
838 // Skip field 0 as the offset is always 0.
839 if (Field != 0) {
840 NeedsExtraction = true;
841 AccumulativeByteOffset +=
842 DL->getStructLayout(StTy)->getElementOffset(Field);
843 }
844 }
845 }
846 return AccumulativeByteOffset;
847}
848
849void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
850 GetElementPtrInst *Variadic, int64_t AccumulativeByteOffset) {
851 IRBuilder<> Builder(Variadic);
852 Type *PtrIndexTy = DL->getIndexType(Variadic->getType());
853
854 Type *I8PtrTy =
855 Builder.getInt8PtrTy(Variadic->getType()->getPointerAddressSpace());
856 Value *ResultPtr = Variadic->getOperand(0);
857 Loop *L = LI->getLoopFor(Variadic->getParent());
858 // Check if the base is not loop invariant or used more than once.
859 bool isSwapCandidate =
860 L && L->isLoopInvariant(ResultPtr) &&
861 !hasMoreThanOneUseInLoop(ResultPtr, L);
862 Value *FirstResult = nullptr;
863
864 if (ResultPtr->getType() != I8PtrTy)
865 ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
866
867 gep_type_iterator GTI = gep_type_begin(*Variadic);
868 // Create an ugly GEP for each sequential index. We don't create GEPs for
869 // structure indices, as they are accumulated in the constant offset index.
870 for (unsigned I = 1, E = Variadic->getNumOperands(); I != E; ++I, ++GTI) {
871 if (GTI.isSequential()) {
872 Value *Idx = Variadic->getOperand(I);
873 // Skip zero indices.
874 if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx))
875 if (CI->isZero())
876 continue;
877
878 APInt ElementSize = APInt(PtrIndexTy->getIntegerBitWidth(),
879 DL->getTypeAllocSize(GTI.getIndexedType()));
880 // Scale the index by element size.
881 if (ElementSize != 1) {
882 if (ElementSize.isPowerOf2()) {
883 Idx = Builder.CreateShl(
884 Idx, ConstantInt::get(PtrIndexTy, ElementSize.logBase2()));
885 } else {
886 Idx =
887 Builder.CreateMul(Idx, ConstantInt::get(PtrIndexTy, ElementSize));
888 }
889 }
890 // Create an ugly GEP with a single index for each index.
891 ResultPtr =
892 Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Idx, "uglygep");
893 if (FirstResult == nullptr)
894 FirstResult = ResultPtr;
895 }
896 }
897
898 // Create a GEP with the constant offset index.
899 if (AccumulativeByteOffset != 0) {
900 Value *Offset = ConstantInt::get(PtrIndexTy, AccumulativeByteOffset);
901 ResultPtr =
902 Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Offset, "uglygep");
903 } else
904 isSwapCandidate = false;
905
906 // If we created a GEP with constant index, and the base is loop invariant,
907 // then we swap the first one with it, so LICM can move constant GEP out
908 // later.
909 auto *FirstGEP = dyn_cast_or_null<GetElementPtrInst>(FirstResult);
910 auto *SecondGEP = dyn_cast<GetElementPtrInst>(ResultPtr);
911 if (isSwapCandidate && isLegalToSwapOperand(FirstGEP, SecondGEP, L))
912 swapGEPOperand(FirstGEP, SecondGEP);
913
914 if (ResultPtr->getType() != Variadic->getType())
915 ResultPtr = Builder.CreateBitCast(ResultPtr, Variadic->getType());
916
917 Variadic->replaceAllUsesWith(ResultPtr);
918 Variadic->eraseFromParent();
919}
920
921void
922SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic,
923 int64_t AccumulativeByteOffset) {
924 IRBuilder<> Builder(Variadic);
925 Type *IntPtrTy = DL->getIntPtrType(Variadic->getType());
926 assert(IntPtrTy == DL->getIndexType(Variadic->getType()) &&
927 "Pointer type must match index type for arithmetic-based lowering of "
928 "split GEPs");
929
930 Value *ResultPtr = Builder.CreatePtrToInt(Variadic->getOperand(0), IntPtrTy);
931 gep_type_iterator GTI = gep_type_begin(*Variadic);
932 // Create ADD/SHL/MUL arithmetic operations for each sequential indices. We
933 // don't create arithmetics for structure indices, as they are accumulated
934 // in the constant offset index.
935 for (unsigned I = 1, E = Variadic->getNumOperands(); I != E; ++I, ++GTI) {
936 if (GTI.isSequential()) {
937 Value *Idx = Variadic->getOperand(I);
938 // Skip zero indices.
939 if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx))
940 if (CI->isZero())
941 continue;
942
943 APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(),
944 DL->getTypeAllocSize(GTI.getIndexedType()));
945 // Scale the index by element size.
946 if (ElementSize != 1) {
947 if (ElementSize.isPowerOf2()) {
948 Idx = Builder.CreateShl(
949 Idx, ConstantInt::get(IntPtrTy, ElementSize.logBase2()));
950 } else {
951 Idx = Builder.CreateMul(Idx, ConstantInt::get(IntPtrTy, ElementSize));
952 }
953 }
954 // Create an ADD for each index.
955 ResultPtr = Builder.CreateAdd(ResultPtr, Idx);
956 }
957 }
958
959 // Create an ADD for the constant offset index.
960 if (AccumulativeByteOffset != 0) {
961 ResultPtr = Builder.CreateAdd(
962 ResultPtr, ConstantInt::get(IntPtrTy, AccumulativeByteOffset));
963 }
964
965 ResultPtr = Builder.CreateIntToPtr(ResultPtr, Variadic->getType());
966 Variadic->replaceAllUsesWith(ResultPtr);
967 Variadic->eraseFromParent();
968}
969
970bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
971 // Skip vector GEPs.
972 if (GEP->getType()->isVectorTy())
973 return false;
974
975 // The backend can already nicely handle the case where all indices are
976 // constant.
977 if (GEP->hasAllConstantIndices())
978 return false;
979
980 bool Changed = canonicalizeArrayIndicesToIndexSize(GEP);
981
982 bool NeedsExtraction;
983 int64_t AccumulativeByteOffset = accumulateByteOffset(GEP, NeedsExtraction);
984
985 if (!NeedsExtraction)
986 return Changed;
987
988 TargetTransformInfo &TTI = GetTTI(*GEP->getFunction());
989
990 // If LowerGEP is disabled, before really splitting the GEP, check whether the
991 // backend supports the addressing mode we are about to produce. If no, this
992 // splitting probably won't be beneficial.
993 // If LowerGEP is enabled, even the extracted constant offset can not match
994 // the addressing mode, we can still do optimizations to other lowered parts
995 // of variable indices. Therefore, we don't check for addressing modes in that
996 // case.
997 if (!LowerGEP) {
998 unsigned AddrSpace = GEP->getPointerAddressSpace();
999 if (!TTI.isLegalAddressingMode(GEP->getResultElementType(),
1000 /*BaseGV=*/nullptr, AccumulativeByteOffset,
1001 /*HasBaseReg=*/true, /*Scale=*/0,
1002 AddrSpace)) {
1003 return Changed;
1004 }
1005 }
1006
1007 // Remove the constant offset in each sequential index. The resultant GEP
1008 // computes the variadic base.
1009 // Notice that we don't remove struct field indices here. If LowerGEP is
1010 // disabled, a structure index is not accumulated and we still use the old
1011 // one. If LowerGEP is enabled, a structure index is accumulated in the
1012 // constant offset. LowerToSingleIndexGEPs or lowerToArithmetics will later
1013 // handle the constant offset and won't need a new structure index.
1015 for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
1016 if (GTI.isSequential()) {
1017 // Constant offsets of scalable types are not really constant.
1018 if (isa<ScalableVectorType>(GTI.getIndexedType()))
1019 continue;
1020
1021 // Splits this GEP index into a variadic part and a constant offset, and
1022 // uses the variadic part as the new index.
1023 Value *OldIdx = GEP->getOperand(I);
1024 User *UserChainTail;
1025 Value *NewIdx =
1026 ConstantOffsetExtractor::Extract(OldIdx, GEP, UserChainTail, DT);
1027 if (NewIdx != nullptr) {
1028 // Switches to the index with the constant offset removed.
1029 GEP->setOperand(I, NewIdx);
1030 // After switching to the new index, we can garbage-collect UserChain
1031 // and the old index if they are not used.
1034 }
1035 }
1036 }
1037
1038 // Clear the inbounds attribute because the new index may be off-bound.
1039 // e.g.,
1040 //
1041 // b = add i64 a, 5
1042 // addr = gep inbounds float, float* p, i64 b
1043 //
1044 // is transformed to:
1045 //
1046 // addr2 = gep float, float* p, i64 a ; inbounds removed
1047 // addr = gep inbounds float, float* addr2, i64 5
1048 //
1049 // If a is -4, although the old index b is in bounds, the new index a is
1050 // off-bound. http://llvm.org/docs/LangRef.html#id181 says "if the
1051 // inbounds keyword is not present, the offsets are added to the base
1052 // address with silently-wrapping two's complement arithmetic".
1053 // Therefore, the final code will be a semantically equivalent.
1054 //
1055 // TODO(jingyue): do some range analysis to keep as many inbounds as
1056 // possible. GEPs with inbounds are more friendly to alias analysis.
1057 bool GEPWasInBounds = GEP->isInBounds();
1058 GEP->setIsInBounds(false);
1059
1060 // Lowers a GEP to either GEPs with a single index or arithmetic operations.
1061 if (LowerGEP) {
1062 // As currently BasicAA does not analyze ptrtoint/inttoptr, do not lower to
1063 // arithmetic operations if the target uses alias analysis in codegen.
1064 // Additionally, pointers that aren't integral (and so can't be safely
1065 // converted to integers) or those whose offset size is different from their
1066 // pointer size (which means that doing integer arithmetic on them could
1067 // affect that data) can't be lowered in this way.
1068 unsigned AddrSpace = GEP->getPointerAddressSpace();
1069 bool PointerHasExtraData = DL->getPointerSizeInBits(AddrSpace) !=
1070 DL->getIndexSizeInBits(AddrSpace);
1071 if (TTI.useAA() || DL->isNonIntegralAddressSpace(AddrSpace) ||
1072 PointerHasExtraData)
1073 lowerToSingleIndexGEPs(GEP, AccumulativeByteOffset);
1074 else
1075 lowerToArithmetics(GEP, AccumulativeByteOffset);
1076 return true;
1077 }
1078
1079 // No need to create another GEP if the accumulative byte offset is 0.
1080 if (AccumulativeByteOffset == 0)
1081 return true;
1082
1083 // Offsets the base with the accumulative byte offset.
1084 //
1085 // %gep ; the base
1086 // ... %gep ...
1087 //
1088 // => add the offset
1089 //
1090 // %gep2 ; clone of %gep
1091 // %new.gep = gep %gep2, <offset / sizeof(*%gep)>
1092 // %gep ; will be removed
1093 // ... %gep ...
1094 //
1095 // => replace all uses of %gep with %new.gep and remove %gep
1096 //
1097 // %gep2 ; clone of %gep
1098 // %new.gep = gep %gep2, <offset / sizeof(*%gep)>
1099 // ... %new.gep ...
1100 //
1101 // If AccumulativeByteOffset is not a multiple of sizeof(*%gep), we emit an
1102 // uglygep (http://llvm.org/docs/GetElementPtr.html#what-s-an-uglygep):
1103 // bitcast %gep2 to i8*, add the offset, and bitcast the result back to the
1104 // type of %gep.
1105 //
1106 // %gep2 ; clone of %gep
1107 // %0 = bitcast %gep2 to i8*
1108 // %uglygep = gep %0, <offset>
1109 // %new.gep = bitcast %uglygep to <type of %gep>
1110 // ... %new.gep ...
1111 Instruction *NewGEP = GEP->clone();
1112 NewGEP->insertBefore(GEP);
1113
1114 // Per ANSI C standard, signed / unsigned = unsigned and signed % unsigned =
1115 // unsigned.. Therefore, we cast ElementTypeSizeOfGEP to signed because it is
1116 // used with unsigned integers later.
1117 int64_t ElementTypeSizeOfGEP = static_cast<int64_t>(
1118 DL->getTypeAllocSize(GEP->getResultElementType()));
1119 Type *PtrIdxTy = DL->getIndexType(GEP->getType());
1120 if (AccumulativeByteOffset % ElementTypeSizeOfGEP == 0) {
1121 // Very likely. As long as %gep is naturally aligned, the byte offset we
1122 // extracted should be a multiple of sizeof(*%gep).
1123 int64_t Index = AccumulativeByteOffset / ElementTypeSizeOfGEP;
1124 NewGEP = GetElementPtrInst::Create(GEP->getResultElementType(), NewGEP,
1125 ConstantInt::get(PtrIdxTy, Index, true),
1126 GEP->getName(), GEP);
1127 NewGEP->copyMetadata(*GEP);
1128 // Inherit the inbounds attribute of the original GEP.
1129 cast<GetElementPtrInst>(NewGEP)->setIsInBounds(GEPWasInBounds);
1130 } else {
1131 // Unlikely but possible. For example,
1132 // #pragma pack(1)
1133 // struct S {
1134 // int a[3];
1135 // int64 b[8];
1136 // };
1137 // #pragma pack()
1138 //
1139 // Suppose the gep before extraction is &s[i + 1].b[j + 3]. After
1140 // extraction, it becomes &s[i].b[j] and AccumulativeByteOffset is
1141 // sizeof(S) + 3 * sizeof(int64) = 100, which is not a multiple of
1142 // sizeof(int64).
1143 //
1144 // Emit an uglygep in this case.
1146 Type *I8PtrTy =
1147 Builder.getInt8Ty()->getPointerTo(GEP->getPointerAddressSpace());
1148
1149 NewGEP = cast<Instruction>(Builder.CreateGEP(
1150 Builder.getInt8Ty(), Builder.CreateBitCast(NewGEP, I8PtrTy),
1151 {ConstantInt::get(PtrIdxTy, AccumulativeByteOffset, true)}, "uglygep",
1152 GEPWasInBounds));
1153
1154 NewGEP->copyMetadata(*GEP);
1155 NewGEP = cast<Instruction>(Builder.CreateBitCast(NewGEP, GEP->getType()));
1156 }
1157
1158 GEP->replaceAllUsesWith(NewGEP);
1159 GEP->eraseFromParent();
1160
1161 return true;
1162}
1163
1164bool SeparateConstOffsetFromGEPLegacyPass::runOnFunction(Function &F) {
1165 if (skipFunction(F))
1166 return false;
1167 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1168 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1169 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1170 auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1171 auto GetTTI = [this](Function &F) -> TargetTransformInfo & {
1172 return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1173 };
1174 SeparateConstOffsetFromGEP Impl(DT, SE, LI, TLI, GetTTI, LowerGEP);
1175 return Impl.run(F);
1176}
1177
1178bool SeparateConstOffsetFromGEP::run(Function &F) {
1180 return false;
1181
1182 DL = &F.getParent()->getDataLayout();
1183 bool Changed = false;
1184 for (BasicBlock &B : F) {
1185 if (!DT->isReachableFromEntry(&B))
1186 continue;
1187
1189 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I))
1190 Changed |= splitGEP(GEP);
1191 // No need to split GEP ConstantExprs because all its indices are constant
1192 // already.
1193 }
1194
1195 Changed |= reuniteExts(F);
1196
1197 if (VerifyNoDeadCode)
1198 verifyNoDeadCode(F);
1199
1200 return Changed;
1201}
1202
1203Instruction *SeparateConstOffsetFromGEP::findClosestMatchingDominator(
1204 const SCEV *Key, Instruction *Dominatee,
1205 DenseMap<const SCEV *, SmallVector<Instruction *, 2>> &DominatingExprs) {
1206 auto Pos = DominatingExprs.find(Key);
1207 if (Pos == DominatingExprs.end())
1208 return nullptr;
1209
1210 auto &Candidates = Pos->second;
1211 // Because we process the basic blocks in pre-order of the dominator tree, a
1212 // candidate that doesn't dominate the current instruction won't dominate any
1213 // future instruction either. Therefore, we pop it out of the stack. This
1214 // optimization makes the algorithm O(n).
1215 while (!Candidates.empty()) {
1216 Instruction *Candidate = Candidates.back();
1217 if (DT->dominates(Candidate, Dominatee))
1218 return Candidate;
1219 Candidates.pop_back();
1220 }
1221 return nullptr;
1222}
1223
1224bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) {
1225 if (!SE->isSCEVable(I->getType()))
1226 return false;
1227
1228 // Dom: LHS+RHS
1229 // I: sext(LHS)+sext(RHS)
1230 // If Dom can't sign overflow and Dom dominates I, optimize I to sext(Dom).
1231 // TODO: handle zext
1232 Value *LHS = nullptr, *RHS = nullptr;
1233 if (match(I, m_Add(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) {
1234 if (LHS->getType() == RHS->getType()) {
1235 const SCEV *Key =
1236 SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
1237 if (auto *Dom = findClosestMatchingDominator(Key, I, DominatingAdds)) {
1238 Instruction *NewSExt = new SExtInst(Dom, I->getType(), "", I);
1239 NewSExt->takeName(I);
1240 I->replaceAllUsesWith(NewSExt);
1242 return true;
1243 }
1244 }
1245 } else if (match(I, m_Sub(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) {
1246 if (LHS->getType() == RHS->getType()) {
1247 const SCEV *Key = SE->getAddExpr(
1248 SE->getUnknown(LHS), SE->getNegativeSCEV(SE->getUnknown(RHS)));
1249 if (auto *Dom = findClosestMatchingDominator(Key, I, DominatingSubs)) {
1250 Instruction *NewSExt = new SExtInst(Dom, I->getType(), "", I);
1251 NewSExt->takeName(I);
1252 I->replaceAllUsesWith(NewSExt);
1254 return true;
1255 }
1256 }
1257 }
1258
1259 // Add I to DominatingExprs if it's an add/sub that can't sign overflow.
1260 if (match(I, m_NSWAdd(m_Value(LHS), m_Value(RHS)))) {
1262 const SCEV *Key =
1263 SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
1264 DominatingAdds[Key].push_back(I);
1265 }
1266 } else if (match(I, m_NSWSub(m_Value(LHS), m_Value(RHS)))) {
1268 const SCEV *Key = SE->getAddExpr(
1269 SE->getUnknown(LHS), SE->getNegativeSCEV(SE->getUnknown(RHS)));
1270 DominatingSubs[Key].push_back(I);
1271 }
1272 }
1273 return false;
1274}
1275
1276bool SeparateConstOffsetFromGEP::reuniteExts(Function &F) {
1277 bool Changed = false;
1278 DominatingAdds.clear();
1279 DominatingSubs.clear();
1280 for (const auto Node : depth_first(DT)) {
1281 BasicBlock *BB = Node->getBlock();
1283 Changed |= reuniteExts(&I);
1284 }
1285 return Changed;
1286}
1287
1288void SeparateConstOffsetFromGEP::verifyNoDeadCode(Function &F) {
1289 for (BasicBlock &B : F) {
1290 for (Instruction &I : B) {
1292 std::string ErrMessage;
1293 raw_string_ostream RSO(ErrMessage);
1294 RSO << "Dead instruction detected!\n" << I << "\n";
1295 llvm_unreachable(RSO.str().c_str());
1296 }
1297 }
1298 }
1299}
1300
1301bool SeparateConstOffsetFromGEP::isLegalToSwapOperand(
1302 GetElementPtrInst *FirstGEP, GetElementPtrInst *SecondGEP, Loop *CurLoop) {
1303 if (!FirstGEP || !FirstGEP->hasOneUse())
1304 return false;
1305
1306 if (!SecondGEP || FirstGEP->getParent() != SecondGEP->getParent())
1307 return false;
1308
1309 if (FirstGEP == SecondGEP)
1310 return false;
1311
1312 unsigned FirstNum = FirstGEP->getNumOperands();
1313 unsigned SecondNum = SecondGEP->getNumOperands();
1314 // Give up if the number of operands are not 2.
1315 if (FirstNum != SecondNum || FirstNum != 2)
1316 return false;
1317
1318 Value *FirstBase = FirstGEP->getOperand(0);
1319 Value *SecondBase = SecondGEP->getOperand(0);
1320 Value *FirstOffset = FirstGEP->getOperand(1);
1321 // Give up if the index of the first GEP is loop invariant.
1322 if (CurLoop->isLoopInvariant(FirstOffset))
1323 return false;
1324
1325 // Give up if base doesn't have same type.
1326 if (FirstBase->getType() != SecondBase->getType())
1327 return false;
1328
1329 Instruction *FirstOffsetDef = dyn_cast<Instruction>(FirstOffset);
1330
1331 // Check if the second operand of first GEP has constant coefficient.
1332 // For an example, for the following code, we won't gain anything by
1333 // hoisting the second GEP out because the second GEP can be folded away.
1334 // %scevgep.sum.ur159 = add i64 %idxprom48.ur, 256
1335 // %67 = shl i64 %scevgep.sum.ur159, 2
1336 // %uglygep160 = getelementptr i8* %65, i64 %67
1337 // %uglygep161 = getelementptr i8* %uglygep160, i64 -1024
1338
1339 // Skip constant shift instruction which may be generated by Splitting GEPs.
1340 if (FirstOffsetDef && FirstOffsetDef->isShift() &&
1341 isa<ConstantInt>(FirstOffsetDef->getOperand(1)))
1342 FirstOffsetDef = dyn_cast<Instruction>(FirstOffsetDef->getOperand(0));
1343
1344 // Give up if FirstOffsetDef is an Add or Sub with constant.
1345 // Because it may not profitable at all due to constant folding.
1346 if (FirstOffsetDef)
1347 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FirstOffsetDef)) {
1348 unsigned opc = BO->getOpcode();
1349 if ((opc == Instruction::Add || opc == Instruction::Sub) &&
1350 (isa<ConstantInt>(BO->getOperand(0)) ||
1351 isa<ConstantInt>(BO->getOperand(1))))
1352 return false;
1353 }
1354 return true;
1355}
1356
1357bool SeparateConstOffsetFromGEP::hasMoreThanOneUseInLoop(Value *V, Loop *L) {
1358 int UsesInLoop = 0;
1359 for (User *U : V->users()) {
1360 if (Instruction *User = dyn_cast<Instruction>(U))
1361 if (L->contains(User))
1362 if (++UsesInLoop > 1)
1363 return true;
1364 }
1365 return false;
1366}
1367
1368void SeparateConstOffsetFromGEP::swapGEPOperand(GetElementPtrInst *First,
1369 GetElementPtrInst *Second) {
1370 Value *Offset1 = First->getOperand(1);
1371 Value *Offset2 = Second->getOperand(1);
1372 First->setOperand(1, Offset2);
1373 Second->setOperand(1, Offset1);
1374
1375 // We changed p+o+c to p+c+o, p+c may not be inbound anymore.
1376 const DataLayout &DAL = First->getModule()->getDataLayout();
1378 cast<PointerType>(First->getType())->getAddressSpace()),
1379 0);
1380 Value *NewBase =
1381 First->stripAndAccumulateInBoundsConstantOffsets(DAL, Offset);
1382 uint64_t ObjectSize;
1383 if (!getObjectSize(NewBase, ObjectSize, DAL, TLI) ||
1384 Offset.ugt(ObjectSize)) {
1385 First->setIsInBounds(false);
1386 Second->setIsInBounds(false);
1387 } else
1388 First->setIsInBounds(true);
1389}
1390
1392 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1394 ->printPipeline(OS, MapClassName2PassName);
1395 OS << '<';
1396 if (LowerGEP)
1397 OS << "lower-gep";
1398 OS << '>';
1399}
1400
1403 auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
1404 auto *SE = &AM.getResult<ScalarEvolutionAnalysis>(F);
1405 auto *LI = &AM.getResult<LoopAnalysis>(F);
1406 auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
1407 auto GetTTI = [&AM](Function &F) -> TargetTransformInfo & {
1408 return AM.getResult<TargetIRAnalysis>(F);
1409 };
1410 SeparateConstOffsetFromGEP Impl(DT, SE, LI, TLI, GetTTI, LowerGEP);
1411 if (!Impl.run(F))
1412 return PreservedAnalyses::all();
1415 return PA;
1416}
aarch64 promote const
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements a class to represent arbitrary precision integral constant values and operations...
assume Assume Builder
for(auto &MBB :MF)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
@ NonNegative
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
Hexagon Common GEP
static const T * Find(StringRef S, ArrayRef< T > A)
Find KV in array using binary search.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
static cl::opt< bool > DisableSeparateConstOffsetFromGEP("disable-separate-const-offset-from-gep", cl::init(false), cl::desc("Do not separate the constant offset from a GEP instruction"), cl::Hidden)
static cl::opt< bool > VerifyNoDeadCode("reassociate-geps-verify-no-dead-code", cl::init(false), cl::desc("Verify this pass produces no dead code"), cl::Hidden)
separate const offset from gep
separate const offset from Split GEPs to a variadic base and a constant offset for better CSE
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:75
unsigned logBase2() const
Definition: APInt.h:1707
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:432
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:265
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
BinaryOps getOpcode() const
Definition: InstrTypes.h:391
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:113
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:428
static CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
static Constant * getCast(unsigned ops, Constant *C, Type *Ty, bool OnlyIfReduced=false)
Convenience function for getting a Cast operation.
Definition: Constants.cpp:1964
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
This is an important base class in LLVM.
Definition: Constant.h:41
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:416
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:314
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:940
void setIsInBounds(bool b=true)
Set or clear the inbounds flag on this GEP instruction.
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:966
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2564
bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:88
const BasicBlock * getParent() const
Definition: Instruction.h:90
bool isShift() const
Definition: Instruction.h:175
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1268
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1293
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:60
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:188
This class represents an analyzed expression in the program.
This class represents a sign extension of integer types.
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Class to represent struct types.
Definition: DerivedTypes.h:213
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
bool use_empty() const
Definition: Value.h:344
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
An efficient, type-erasing, non-owning reference to a callable.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:642
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Key
PAL metadata keys.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition: PatternMatch.h:979
CastClass_match< OpTy, Instruction::SExt > m_SExt(const OpTy &Op)
Matches SExt.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Sub, OverflowingBinaryOperator::NoSignedWrap > m_NSWSub(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
Definition: PatternMatch.h:991
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:406
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1839
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:537
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:748
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:398
bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL, const TargetLibraryInfo *TLI, ObjectSizeOpts Opts={})
Compute the size of the object pointed by Ptr.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:511
bool programUndefinedIfPoison(const Instruction *Inst)
void initializeSeparateConstOffsetFromGEPLegacyPassPass(PassRegistry &)
FunctionPass * createSeparateConstOffsetFromGEPPass(bool LowerGEP=false)
bool haveNoCommonBitsSet(const Value *LHS, const Value *RHS, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if LHS and RHS have no common bits set.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:184
gep_type_iterator gep_type_begin(const User *GEP)
iterator_range< df_iterator< T > > depth_first(const T &G)
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition: PassManager.h:371