LLVM 23.0.0git
SeparateConstOffsetFromGEP.cpp
Go to the documentation of this file.
1//===- SeparateConstOffsetFromGEP.cpp -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Loop unrolling may create many similar GEPs for array accesses.
10// e.g., a 2-level loop
11//
12// float a[32][32]; // global variable
13//
14// for (int i = 0; i < 2; ++i) {
15// for (int j = 0; j < 2; ++j) {
16// ...
17// ... = a[x + i][y + j];
18// ...
19// }
20// }
21//
22// will probably be unrolled to:
23//
24// gep %a, 0, %x, %y; load
25// gep %a, 0, %x, %y + 1; load
26// gep %a, 0, %x + 1, %y; load
27// gep %a, 0, %x + 1, %y + 1; load
28//
29// LLVM's GVN does not use partial redundancy elimination yet, and is thus
30// unable to reuse (gep %a, 0, %x, %y). As a result, this misoptimization incurs
31// significant slowdown in targets with limited addressing modes. For instance,
32// because the PTX target does not support the reg+reg addressing mode, the
33// NVPTX backend emits PTX code that literally computes the pointer address of
34// each GEP, wasting tons of registers. It emits the following PTX for the
35// first load and similar PTX for other loads.
36//
37// mov.u32 %r1, %x;
38// mov.u32 %r2, %y;
39// mul.wide.u32 %rl2, %r1, 128;
40// mov.u64 %rl3, a;
41// add.s64 %rl4, %rl3, %rl2;
42// mul.wide.u32 %rl5, %r2, 4;
43// add.s64 %rl6, %rl4, %rl5;
44// ld.global.f32 %f1, [%rl6];
45//
46// To reduce the register pressure, the optimization implemented in this file
47// merges the common part of a group of GEPs, so we can compute each pointer
48// address by adding a simple offset to the common part, saving many registers.
49//
50// It works by splitting each GEP into a variadic base and a constant offset.
51// The variadic base can be computed once and reused by multiple GEPs, and the
52// constant offsets can be nicely folded into the reg+immediate addressing mode
53// (supported by most targets) without using any extra register.
54//
55// For instance, we transform the four GEPs and four loads in the above example
56// into:
57//
58// base = gep a, 0, x, y
59// load base
60// load base + 1 * sizeof(float)
61// load base + 32 * sizeof(float)
62// load base + 33 * sizeof(float)
63//
64// Given the transformed IR, a backend that supports the reg+immediate
65// addressing mode can easily fold the pointer arithmetics into the loads. For
66// example, the NVPTX backend can easily fold the pointer arithmetics into the
67// ld.global.f32 instructions, and the resultant PTX uses much fewer registers.
68//
69// mov.u32 %r1, %tid.x;
70// mov.u32 %r2, %tid.y;
71// mul.wide.u32 %rl2, %r1, 128;
72// mov.u64 %rl3, a;
73// add.s64 %rl4, %rl3, %rl2;
74// mul.wide.u32 %rl5, %r2, 4;
75// add.s64 %rl6, %rl4, %rl5;
76// ld.global.f32 %f1, [%rl6]; // so far the same as unoptimized PTX
77// ld.global.f32 %f2, [%rl6+4]; // much better
78// ld.global.f32 %f3, [%rl6+128]; // much better
79// ld.global.f32 %f4, [%rl6+132]; // much better
80//
81// Another improvement enabled by the LowerGEP flag is to lower a GEP with
82// multiple indices to multiple GEPs with a single index.
83// Such transformation can have following benefits:
84// (1) It can always extract constants in the indices of structure type.
85// (2) After such Lowering, there are more optimization opportunities such as
86// CSE, LICM and CGP.
87//
88// E.g. The following GEPs have multiple indices:
89// BB1:
90// %p = getelementptr [10 x %struct], ptr %ptr, i64 %i, i64 %j1, i32 3
91// load %p
92// ...
93// BB2:
94// %p2 = getelementptr [10 x %struct], ptr %ptr, i64 %i, i64 %j1, i32 2
95// load %p2
96// ...
97//
98// We can not do CSE to the common part related to index "i64 %i". Lowering
99// GEPs can achieve such goals.
100//
101// This pass will lower a GEP with multiple indices into multiple GEPs with a
102// single index:
103// BB1:
104// %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity
105// %3 = getelementptr i8, ptr %ptr, i64 %2 ; CSE opportunity
106// %4 = mul i64 %j1, length_of_struct
107// %5 = getelementptr i8, ptr %3, i64 %4
108// %p = getelementptr i8, ptr %5, struct_field_3 ; Constant offset
109// load %p
110// ...
111// BB2:
112// %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity
113// %9 = getelementptr i8, ptr %ptr, i64 %8 ; CSE opportunity
114// %10 = mul i64 %j2, length_of_struct
115// %11 = getelementptr i8, ptr %9, i64 %10
116// %p2 = getelementptr i8, ptr %11, struct_field_2 ; Constant offset
117// load %p2
118// ...
119//
120// Lowering GEPs can also benefit other passes such as LICM and CGP.
121// LICM (Loop Invariant Code Motion) can not hoist/sink a GEP of multiple
122// indices if one of the index is variant. If we lower such GEP into invariant
123// parts and variant parts, LICM can hoist/sink those invariant parts.
124// CGP (CodeGen Prepare) tries to sink address calculations that match the
125// target's addressing modes. A GEP with multiple indices may not match and will
126// not be sunk. If we lower such GEP into smaller parts, CGP may sink some of
127// them. So we end up with a better addressing mode.
128//
129//===----------------------------------------------------------------------===//
130
132#include "llvm/ADT/APInt.h"
133#include "llvm/ADT/DenseMap.h"
135#include "llvm/ADT/SmallVector.h"
141#include "llvm/IR/BasicBlock.h"
142#include "llvm/IR/Constant.h"
143#include "llvm/IR/Constants.h"
144#include "llvm/IR/DataLayout.h"
145#include "llvm/IR/DerivedTypes.h"
146#include "llvm/IR/Dominators.h"
147#include "llvm/IR/Function.h"
149#include "llvm/IR/IRBuilder.h"
150#include "llvm/IR/InstrTypes.h"
151#include "llvm/IR/Instruction.h"
152#include "llvm/IR/Instructions.h"
153#include "llvm/IR/Module.h"
154#include "llvm/IR/PassManager.h"
155#include "llvm/IR/PatternMatch.h"
156#include "llvm/IR/Type.h"
157#include "llvm/IR/User.h"
158#include "llvm/IR/Value.h"
160#include "llvm/Pass.h"
161#include "llvm/Support/Casting.h"
167#include <cassert>
168#include <cstdint>
169#include <string>
170
171using namespace llvm;
172using namespace llvm::PatternMatch;
173
175 "disable-separate-const-offset-from-gep", cl::init(false),
176 cl::desc("Do not separate the constant offset from a GEP instruction"),
177 cl::Hidden);
178
179// Setting this flag may emit false positives when the input module already
180// contains dead instructions. Therefore, we set it only in unit tests that are
181// free of dead code.
182static cl::opt<bool>
183 VerifyNoDeadCode("reassociate-geps-verify-no-dead-code", cl::init(false),
184 cl::desc("Verify this pass produces no dead code"),
185 cl::Hidden);
186
187namespace {
188
189/// A helper class for separating a constant offset from a GEP index.
190///
191/// In real programs, a GEP index may be more complicated than a simple addition
192/// of something and a constant integer which can be trivially splitted. For
193/// example, to split ((a << 3) | 5) + b, we need to search deeper for the
194/// constant offset, so that we can separate the index to (a << 3) + b and 5.
195///
196/// Therefore, this class looks into the expression that computes a given GEP
197/// index, and tries to find a constant integer that can be hoisted to the
198/// outermost level of the expression as an addition. Not every constant in an
199/// expression can jump out. e.g., we cannot transform (b * (a + 5)) to (b * a +
200/// 5); nor can we transform (3 * (a + 5)) to (3 * a + 5), however in this case,
201/// -instcombine probably already optimized (3 * (a + 5)) to (3 * a + 15).
202class ConstantOffsetExtractor {
203public:
204 /// Extracts a constant offset from the given GEP index. It returns the
205 /// new index representing the remainder (equal to the original index minus
206 /// the constant offset), or nullptr if we cannot extract a constant offset.
207 /// \p Idx The given GEP index
208 /// \p GEP The given GEP
209 /// \p UserChainTail Outputs the tail of UserChain so that we can
210 /// garbage-collect unused instructions in UserChain.
211 /// \p PreservesNUW Outputs whether the extraction allows preserving the
212 /// GEP's nuw flag, if it has one.
213 static Value *Extract(Value *Idx, GetElementPtrInst *GEP,
214 User *&UserChainTail, bool &PreservesNUW);
215
216 /// Looks for a constant offset from the given GEP index without extracting
217 /// it. It returns the numeric value of the extracted constant offset (0 if
218 /// failed). The meaning of the arguments are the same as Extract.
219 static APInt Find(Value *Idx, GetElementPtrInst *GEP);
220
221private:
222 ConstantOffsetExtractor(BasicBlock::iterator InsertionPt)
223 : IP(InsertionPt), DL(InsertionPt->getDataLayout()) {}
224
225 /// Searches the expression that computes V for a non-zero constant C s.t.
226 /// V can be reassociated into the form V' + C. If the searching is
227 /// successful, returns C and update UserChain as a def-use chain from C to V;
228 /// otherwise, UserChain is empty.
229 ///
230 /// \p V The given expression
231 /// \p GEP The base GEP instruction, used for determining relevant
232 /// types, flags, and non-negativity needed for safe
233 /// reassociation
234 /// \p Idx The original index of the GEP
235 /// \p SignExtended Whether V will be sign-extended in the computation of
236 /// the GEP index
237 /// \p ZeroExtended Whether V will be zero-extended in the computation of
238 /// the GEP index
239 APInt find(Value *V, GetElementPtrInst *GEP, Value *Idx, bool SignExtended,
240 bool ZeroExtended);
241
242 /// A helper function to look into both operands of a binary operator.
243 APInt findInEitherOperand(BinaryOperator *BO, bool SignExtended,
244 bool ZeroExtended);
245
246 /// After finding the constant offset C from the GEP index I, we build a new
247 /// index I' s.t. I' + C = I. This function builds and returns the new
248 /// index I' according to UserChain produced by function "find".
249 ///
250 /// The building conceptually takes two steps:
251 /// 1) iteratively distribute sext/zext/trunc towards the leaves of the
252 /// expression tree that computes I
253 /// 2) reassociate the expression tree to the form I' + C.
254 ///
255 /// For example, to extract the 5 from sext(a + (b + 5)), we first distribute
256 /// sext to a, b and 5 so that we have
257 /// sext(a) + (sext(b) + 5).
258 /// Then, we reassociate it to
259 /// (sext(a) + sext(b)) + 5.
260 /// Given this form, we know I' is sext(a) + sext(b).
261 Value *rebuildWithoutConstOffset();
262
263 /// After the first step of rebuilding the GEP index without the constant
264 /// offset, distribute sext/zext/trunc to the operands of all operators in
265 /// UserChain. e.g., zext(sext(a + (b + 5)) (assuming no overflow) =>
266 /// zext(sext(a)) + (zext(sext(b)) + zext(sext(5))).
267 ///
268 /// The function also updates UserChain to point to new subexpressions after
269 /// distributing sext/zext/trunc. e.g., the old UserChain of the above example
270 /// is
271 /// 5 -> b + 5 -> a + (b + 5) -> sext(...) -> zext(sext(...)),
272 /// and the new UserChain is
273 /// zext(sext(5)) -> zext(sext(b)) + zext(sext(5)) ->
274 /// zext(sext(a)) + (zext(sext(b)) + zext(sext(5))
275 ///
276 /// \p ChainIndex The index to UserChain. ChainIndex is initially
277 /// UserChain.size() - 1, and is decremented during
278 /// the recursion.
279 Value *distributeCastsAndCloneChain(unsigned ChainIndex);
280
281 /// Reassociates the GEP index to the form I' + C and returns I'.
282 Value *removeConstOffset(unsigned ChainIndex);
283
284 /// A helper function to apply CastInsts, a list of sext/zext/trunc, to value
285 /// V. e.g., if CastInsts = [sext i32 to i64, zext i16 to i32], this function
286 /// returns "sext i32 (zext i16 V to i32) to i64".
287 Value *applyCasts(Value *V);
288
289 /// A helper function that returns whether we can trace into the operands
290 /// of binary operator BO for a constant offset.
291 ///
292 /// \p SignExtended Whether BO is surrounded by sext
293 /// \p ZeroExtended Whether BO is surrounded by zext
294 /// \p GEP The base GEP instruction, used for determining relevant
295 /// types and flags needed for safe reassociation.
296 /// \p Idx The original index of the GEP
297 bool canTraceInto(bool SignExtended, bool ZeroExtended, BinaryOperator *BO,
298 GetElementPtrInst *GEP, Value *Idx);
299
300 /// The path from the constant offset to the old GEP index. e.g., if the GEP
301 /// index is "a * b + (c + 5)". After running function find, UserChain[0] will
302 /// be the constant 5, UserChain[1] will be the subexpression "c + 5", and
303 /// UserChain[2] will be the entire expression "a * b + (c + 5)".
304 ///
305 /// This path helps to rebuild the new GEP index.
306 SmallVector<User *, 8> UserChain;
307
308 /// A data structure used in rebuildWithoutConstOffset. Contains all
309 /// sext/zext/trunc instructions along UserChain.
311
312 /// Insertion position of cloned instructions.
314
315 const DataLayout &DL;
316};
317
318/// A pass that tries to split every GEP in the function into a variadic
319/// base and a constant offset. It is a FunctionPass because searching for the
320/// constant offset may inspect other basic blocks.
321class SeparateConstOffsetFromGEPLegacyPass : public FunctionPass {
322public:
323 static char ID;
324
325 SeparateConstOffsetFromGEPLegacyPass(bool LowerGEP = false)
326 : FunctionPass(ID), LowerGEP(LowerGEP) {
329 }
330
331 void getAnalysisUsage(AnalysisUsage &AU) const override {
332 AU.addRequired<DominatorTreeWrapperPass>();
333 AU.addRequired<TargetTransformInfoWrapperPass>();
334 AU.addRequired<LoopInfoWrapperPass>();
335 AU.setPreservesCFG();
336 AU.addRequired<TargetLibraryInfoWrapperPass>();
337 }
338
339 bool runOnFunction(Function &F) override;
340
341private:
342 bool LowerGEP;
343};
344
345/// A pass that tries to split every GEP in the function into a variadic
346/// base and a constant offset. It is a FunctionPass because searching for the
347/// constant offset may inspect other basic blocks.
348class SeparateConstOffsetFromGEP {
349public:
350 SeparateConstOffsetFromGEP(
351 DominatorTree *DT, LoopInfo *LI, TargetLibraryInfo *TLI,
352 function_ref<TargetTransformInfo &(Function &)> GetTTI, bool LowerGEP)
353 : DT(DT), LI(LI), TLI(TLI), GetTTI(GetTTI), LowerGEP(LowerGEP) {}
354
355 bool run(Function &F);
356
357private:
358 /// Track the operands of an add or sub.
359 using ExprKey = std::pair<Value *, Value *>;
360
361 /// Create a pair for use as a map key for a commutable operation.
362 static ExprKey createNormalizedCommutablePair(Value *A, Value *B) {
363 if (A < B)
364 return {A, B};
365 return {B, A};
366 }
367
368 /// Tries to split the given GEP into a variadic base and a constant offset,
369 /// and returns true if the splitting succeeds.
370 bool splitGEP(GetElementPtrInst *GEP);
371
372 /// Tries to reorder the given GEP with the GEP that produces the base if
373 /// doing so results in producing a constant offset as the outermost
374 /// index.
375 bool reorderGEP(GetElementPtrInst *GEP, TargetTransformInfo &TTI);
376
377 /// Lower a GEP with multiple indices into multiple GEPs with a single index.
378 /// Function splitGEP already split the original GEP into a variadic part and
379 /// a constant offset (i.e., AccumulativeByteOffset). This function lowers the
380 /// variadic part into a set of GEPs with a single index and applies
381 /// AccumulativeByteOffset to it.
382 /// \p Variadic The variadic part of the original GEP.
383 /// \p AccumulativeByteOffset The constant offset.
384 void lowerToSingleIndexGEPs(GetElementPtrInst *Variadic,
385 const APInt &AccumulativeByteOffset);
386
387 /// Finds the constant offset within each index and accumulates them. If
388 /// LowerGEP is true, it finds in indices of both sequential and structure
389 /// types, otherwise it only finds in sequential indices. The output
390 /// NeedsExtraction indicates whether we successfully find a non-zero constant
391 /// offset, and SignedOverflow indicates if there was signed overflow in
392 /// offset calculation.
393 APInt accumulateByteOffset(GetElementPtrInst *GEP, bool &NeedsExtraction,
394 bool &SignedOverflow);
395
396 /// Canonicalize array indices to pointer-size integers. This helps to
397 /// simplify the logic of splitting a GEP. For example, if a + b is a
398 /// pointer-size integer, we have
399 /// gep base, a + b = gep (gep base, a), b
400 /// However, this equality may not hold if the size of a + b is smaller than
401 /// the pointer size, because LLVM conceptually sign-extends GEP indices to
402 /// pointer size before computing the address
403 /// (http://llvm.org/docs/LangRef.html#id181).
404 ///
405 /// This canonicalization is very likely already done in clang and
406 /// instcombine. Therefore, the program will probably remain the same.
407 ///
408 /// Returns true if the module changes.
409 ///
410 /// Verified in @i32_add in split-gep.ll
411 bool canonicalizeArrayIndicesToIndexSize(GetElementPtrInst *GEP);
412
413 /// Optimize sext(a)+sext(b) to sext(a+b) when a+b can't sign overflow.
414 /// SeparateConstOffsetFromGEP distributes a sext to leaves before extracting
415 /// the constant offset. After extraction, it becomes desirable to reunion the
416 /// distributed sexts. For example,
417 ///
418 /// &a[sext(i +nsw (j +nsw 5)]
419 /// => distribute &a[sext(i) +nsw (sext(j) +nsw 5)]
420 /// => constant extraction &a[sext(i) + sext(j)] + 5
421 /// => reunion &a[sext(i +nsw j)] + 5
422 bool reuniteExts(Function &F);
423
424 /// A helper that reunites sexts in an instruction.
425 bool reuniteExts(Instruction *I);
426
427 /// Find the closest dominator of <Dominatee> that is equivalent to <Key>.
428 Instruction *findClosestMatchingDominator(
429 ExprKey Key, Instruction *Dominatee,
430 DenseMap<ExprKey, SmallVector<Instruction *, 2>> &DominatingExprs);
431
432 /// Verify F is free of dead code.
433 void verifyNoDeadCode(Function &F);
434
435 bool hasMoreThanOneUseInLoop(Value *v, Loop *L);
436
437 // Swap the index operand of two GEP.
438 void swapGEPOperand(GetElementPtrInst *First, GetElementPtrInst *Second);
439
440 // Check if it is safe to swap operand of two GEP.
441 bool isLegalToSwapOperand(GetElementPtrInst *First, GetElementPtrInst *Second,
442 Loop *CurLoop);
443
444 const DataLayout *DL = nullptr;
445 DominatorTree *DT = nullptr;
446 LoopInfo *LI;
447 TargetLibraryInfo *TLI;
448 // Retrieved lazily since not always used.
449 function_ref<TargetTransformInfo &(Function &)> GetTTI;
450
451 /// Whether to lower a GEP with multiple indices into arithmetic operations or
452 /// multiple GEPs with a single index.
453 bool LowerGEP;
454
455 DenseMap<ExprKey, SmallVector<Instruction *, 2>> DominatingAdds;
456 DenseMap<ExprKey, SmallVector<Instruction *, 2>> DominatingSubs;
457};
458
459} // end anonymous namespace
460
461char SeparateConstOffsetFromGEPLegacyPass::ID = 0;
462
464 SeparateConstOffsetFromGEPLegacyPass, "separate-const-offset-from-gep",
465 "Split GEPs to a variadic base and a constant offset for better CSE", false,
466 false)
473 SeparateConstOffsetFromGEPLegacyPass, "separate-const-offset-from-gep",
474 "Split GEPs to a variadic base and a constant offset for better CSE", false,
475 false)
476
478 return new SeparateConstOffsetFromGEPLegacyPass(LowerGEP);
479}
480
481// Checks if it is safe to reorder an add/sext result used in a GEP.
482//
483// An inbounds GEP does not guarantee that the index is non-negative.
484// This helper checks first if the index is known non-negative. If the index is
485// non-negative, the transform is always safe.
486// Second, it checks whether the GEP is inbounds and directly based on a global
487// or an alloca, which are required to prove futher transform validity.
488// If the GEP:
489// - Has a zero offset from the base, the index is non-negative (any negative
490// value would produce poison/UB)
491// - Has ObjectSize < (2^(N-1) - C + 1) * stride, where C is a constant from the
492// add, stride is the element size of Idx, and N is bitwidth of Idx.
493// This is because with this pattern:
494// %add = add iN %val, C
495// %sext = sext iN %add to i64
496// %gep = getelementptr inbounds TYPE, %sext
497// The worst-case is when %val sign-flips to produce the smallest magnitude
498// negative value, at 2^(N-1)-1. In this case, the add/sext is -(2^(N-1)-C+1),
499// and the sext/add is 2^(N-1)+C-1 (2^N difference). The original add/sext
500// only produces a defined GEP when -(2^(N-1)-C+1) is inbounds. So, if
501// ObjectSize < (2^(N-1) - C + 1) * stride, it is impossible for the
502// worst-case sign-flip to be defined.
503// Note that in this case the GEP is not neccesarily non-negative, but any
504// negative results will still produce the same behavior in the reordered
505// version with a defined GEP.
506// This can also work for negative C, but the threshold is instead
507// (2^(N-1)+C)*stride, since the sign-flip is done in reverse and is instead
508// producing a large positive value that still needs to be inbounds to the
509// object size. If C is negative, we cannot make any useful assumptions based
510// on the offset, since it would need to be extremely large.
512 const Value *Idx, const BinaryOperator *Add,
513 const DataLayout &DL) {
514 if (isKnownNonNegative(Idx, DL))
515 return true;
516
517 if (!GEP->isInBounds())
518 return false;
519
520 const Value *Ptr = GEP->getPointerOperand();
521 int64_t Offset = 0;
522 const Value *Base =
523 GetPointerBaseWithConstantOffset(const_cast<Value *>(Ptr), Offset, DL);
524
525 // We need one of the operands to be a constant to be able to trace into the
526 // operator.
527 const ConstantInt *CI = dyn_cast<ConstantInt>(Add->getOperand(0));
528 if (!CI)
529 CI = dyn_cast<ConstantInt>(Add->getOperand(1));
530 if (!CI)
531 return false;
532 // Calculate the threshold
533 APInt Threshold;
534 unsigned N = Add->getType()->getIntegerBitWidth();
535 TypeSize ElemSize = DL.getTypeAllocSize(GEP->getSourceElementType());
536 if (ElemSize.isScalable())
537 return false;
538 uint64_t Stride = ElemSize.getFixedValue();
539 if (!CI->isNegative()) {
540 // (2^(N-1) - C + 1) * stride
541 Threshold = (APInt::getSignedMinValue(N).zext(128) -
542 CI->getValue().zextOrTrunc(128) + 1) *
543 APInt(128, Stride);
544 } else {
545 // (2^(N-1) + C) * stride
546 Threshold = (APInt::getSignedMinValue(N).zext(128) +
547 CI->getValue().sextOrTrunc(128)) *
548 APInt(128, Stride);
549 }
550
552 !CI->isNegative()) {
553 // If the offset is zero from an alloca or global, inbounds is sufficient to
554 // prove non-negativity if one add operand is non-negative
555 if (Offset == 0)
556 return true;
557
558 // Check if the Offset < Threshold (positive CI only) otherwise
559 if (Offset < 0)
560 return true;
561 if (APInt(128, (uint64_t)Offset).ult(Threshold))
562 return true;
563 } else {
564 // If we can't determine the offset from the base object, we can still use
565 // the underlying object and type size constraints
567 // Can only prove non-negativity if the base object is known
569 return false;
570 }
571
572 // Check if the ObjectSize < Threshold (for both positive or negative C)
573 uint64_t ObjSize = 0;
574 if (const auto *AI = dyn_cast<AllocaInst>(Base)) {
575 if (auto AllocSize = AI->getAllocationSize(DL))
576 if (!AllocSize->isScalable())
577 ObjSize = AllocSize->getFixedValue();
578 } else if (const auto *GV = dyn_cast<GlobalVariable>(Base)) {
579 TypeSize GVSize = DL.getTypeAllocSize(GV->getValueType());
580 if (!GVSize.isScalable())
581 ObjSize = GVSize.getFixedValue();
582 }
583 if (ObjSize > 0 && APInt(128, ObjSize).ult(Threshold))
584 return true;
585
586 return false;
587}
588
589bool ConstantOffsetExtractor::canTraceInto(bool SignExtended, bool ZeroExtended,
590 BinaryOperator *BO,
591 GetElementPtrInst *GEP, Value *Idx) {
592 // We only consider ADD, SUB and OR, because a non-zero constant found in
593 // expressions composed of these operations can be easily hoisted as a
594 // constant offset by reassociation.
595 if (BO->getOpcode() != Instruction::Add &&
596 BO->getOpcode() != Instruction::Sub &&
597 BO->getOpcode() != Instruction::Or) {
598 return false;
599 }
600
601 // Do not trace into "or" unless it is equivalent to "add nuw nsw".
602 // This is the case if the or's disjoint flag is set.
603 if (BO->getOpcode() == Instruction::Or &&
604 !cast<PossiblyDisjointInst>(BO)->isDisjoint())
605 return false;
606
607 // FIXME: We don't currently support constants from the RHS of subs,
608 // when we are zero-extended, because we need a way to zero-extended
609 // them before they are negated.
610 if (ZeroExtended && !SignExtended && BO->getOpcode() == Instruction::Sub)
611 return false;
612
613 // In addition, tracing into BO requires that its surrounding sext/zext/trunc
614 // (if any) is distributable to both operands.
615 //
616 // Suppose BO = A op B.
617 // SignExtended | ZeroExtended | Distributable?
618 // --------------+--------------+----------------------------------
619 // 0 | 0 | true because no s/zext exists
620 // 0 | 1 | zext(BO) == zext(A) op zext(B)
621 // 1 | 0 | sext(BO) == sext(A) op sext(B)
622 // 1 | 1 | zext(sext(BO)) ==
623 // | | zext(sext(A)) op zext(sext(B))
624 if (BO->getOpcode() == Instruction::Add && !ZeroExtended && GEP) {
625 // If a + b >= 0 and (a >= 0 or b >= 0), then
626 // sext(a + b) = sext(a) + sext(b)
627 // even if the addition is not marked nsw.
628 //
629 // Leveraging this invariant, we can trace into an sext'ed inbound GEP
630 // index under certain conditions (see canReorderAddSextToGEP).
631 //
632 // Verified in @sext_add in split-gep.ll.
633 if (canReorderAddSextToGEP(GEP, Idx, BO, DL))
634 return true;
635 }
636
637 // For a sext(add nuw), allow tracing through when the enclosing GEP is both
638 // inbounds and nuw.
639 bool GEPInboundsNUW =
640 GEP ? (GEP->isInBounds() && GEP->hasNoUnsignedWrap()) : false;
641 if (BO->getOpcode() == Instruction::Add && SignExtended && !ZeroExtended &&
642 GEPInboundsNUW && BO->hasNoUnsignedWrap())
643 return true;
644
645 // sext (add/sub nsw A, B) == add/sub nsw (sext A), (sext B)
646 // zext (add/sub nuw A, B) == add/sub nuw (zext A), (zext B)
647 if (BO->getOpcode() == Instruction::Add ||
648 BO->getOpcode() == Instruction::Sub) {
649 if (SignExtended && !BO->hasNoSignedWrap())
650 return false;
651 if (ZeroExtended && !BO->hasNoUnsignedWrap())
652 return false;
653 }
654
655 return true;
656}
657
658APInt ConstantOffsetExtractor::findInEitherOperand(BinaryOperator *BO,
659 bool SignExtended,
660 bool ZeroExtended) {
661 // Save off the current height of the chain, in case we need to restore it.
662 size_t ChainLength = UserChain.size();
663
664 // BO cannot use information from the base GEP at this point, so clear it.
665 APInt ConstantOffset =
666 find(BO->getOperand(0), nullptr, nullptr, SignExtended, ZeroExtended);
667 // If we found a constant offset in the left operand, stop and return that.
668 // This shortcut might cause us to miss opportunities of combining the
669 // constant offsets in both operands, e.g., (a + 4) + (b + 5) => (a + b) + 9.
670 // However, such cases are probably already handled by -instcombine,
671 // given this pass runs after the standard optimizations.
672 if (ConstantOffset != 0) return ConstantOffset;
673
674 // Reset the chain back to where it was when we started exploring this node,
675 // since visiting the LHS didn't pan out.
676 UserChain.resize(ChainLength);
677
678 ConstantOffset =
679 find(BO->getOperand(1), nullptr, nullptr, SignExtended, ZeroExtended);
680 // If U is a sub operator, negate the constant offset found in the right
681 // operand.
682 if (BO->getOpcode() == Instruction::Sub)
683 ConstantOffset = -ConstantOffset;
684
685 // If RHS wasn't a suitable candidate either, reset the chain again.
686 if (ConstantOffset == 0)
687 UserChain.resize(ChainLength);
688
689 return ConstantOffset;
690}
691
692APInt ConstantOffsetExtractor::find(Value *V, GetElementPtrInst *GEP,
693 Value *Idx, bool SignExtended,
694 bool ZeroExtended) {
695 // TODO(jingyue): We could trace into integer/pointer casts, such as
696 // inttoptr, ptrtoint, bitcast, and addrspacecast. We choose to handle only
697 // integers because it gives good enough results for our benchmarks.
698 unsigned BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
699
700 // We cannot do much with Values that are not a User, such as an Argument.
701 User *U = dyn_cast<User>(V);
702 if (U == nullptr) return APInt(BitWidth, 0);
703
704 APInt ConstantOffset(BitWidth, 0);
705 if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
706 // Hooray, we found it!
707 ConstantOffset = CI->getValue();
708 } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V)) {
709 // Trace into subexpressions for more hoisting opportunities.
710 if (canTraceInto(SignExtended, ZeroExtended, BO, GEP, Idx))
711 ConstantOffset = findInEitherOperand(BO, SignExtended, ZeroExtended);
712 } else if (isa<TruncInst>(V)) {
713 ConstantOffset =
714 find(U->getOperand(0), GEP, Idx, SignExtended, ZeroExtended)
715 .trunc(BitWidth);
716 } else if (isa<SExtInst>(V)) {
717 ConstantOffset =
718 find(U->getOperand(0), GEP, Idx, /* SignExtended */ true, ZeroExtended)
719 .sext(BitWidth);
720 } else if (isa<ZExtInst>(V)) {
721 // As an optimization, we can clear the SignExtended flag because
722 // sext(zext(a)) = zext(a). Verified in @sext_zext in split-gep.ll.
723 ConstantOffset = find(U->getOperand(0), GEP, Idx, /* SignExtended */ false,
724 /* ZeroExtended */ true)
725 .zext(BitWidth);
726 }
727
728 // If we found a non-zero constant offset, add it to the path for
729 // rebuildWithoutConstOffset. Zero is a valid constant offset, but doesn't
730 // help this optimization.
731 if (ConstantOffset != 0)
732 UserChain.push_back(U);
733 return ConstantOffset;
734}
735
736Value *ConstantOffsetExtractor::applyCasts(Value *V) {
737 Value *Current = V;
738 // CastInsts is built in the use-def order. Therefore, we apply them to V
739 // in the reversed order.
740 for (CastInst *I : llvm::reverse(CastInsts)) {
741 if (Constant *C = dyn_cast<Constant>(Current)) {
742 // Try to constant fold the cast.
743 Current = ConstantFoldCastOperand(I->getOpcode(), C, I->getType(), DL);
744 if (Current)
745 continue;
746 }
747
748 Instruction *Cast = I->clone();
749 Cast->setOperand(0, Current);
750 // In ConstantOffsetExtractor::find we do not analyze nuw/nsw for trunc, so
751 // we assume that it is ok to redistribute trunc over add/sub/or. But for
752 // example (add (trunc nuw A), (trunc nuw B)) is more poisonous than (trunc
753 // nuw (add A, B))). To make such redistributions legal we drop all the
754 // poison generating flags from cloned trunc instructions here.
755 if (isa<TruncInst>(Cast))
757 Cast->insertBefore(*IP->getParent(), IP);
758 Current = Cast;
759 }
760 return Current;
761}
762
763Value *ConstantOffsetExtractor::rebuildWithoutConstOffset() {
764 distributeCastsAndCloneChain(UserChain.size() - 1);
765 // Remove all nullptrs (used to be sext/zext/trunc) from UserChain.
766 unsigned NewSize = 0;
767 for (User *I : UserChain) {
768 if (I != nullptr) {
769 UserChain[NewSize] = I;
770 NewSize++;
771 }
772 }
773 UserChain.resize(NewSize);
774 return removeConstOffset(UserChain.size() - 1);
775}
776
777Value *
778ConstantOffsetExtractor::distributeCastsAndCloneChain(unsigned ChainIndex) {
779 User *U = UserChain[ChainIndex];
780 if (ChainIndex == 0) {
782 // If U is a ConstantInt, applyCasts will return a ConstantInt as well.
783 return UserChain[ChainIndex] = cast<ConstantInt>(applyCasts(U));
784 }
785
786 if (CastInst *Cast = dyn_cast<CastInst>(U)) {
787 assert(
788 (isa<SExtInst>(Cast) || isa<ZExtInst>(Cast) || isa<TruncInst>(Cast)) &&
789 "Only following instructions can be traced: sext, zext & trunc");
790 CastInsts.push_back(Cast);
791 UserChain[ChainIndex] = nullptr;
792 return distributeCastsAndCloneChain(ChainIndex - 1);
793 }
794
795 // Function find only trace into BinaryOperator and CastInst.
796 BinaryOperator *BO = cast<BinaryOperator>(U);
797 // OpNo = which operand of BO is UserChain[ChainIndex - 1]
798 unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1);
799 Value *TheOther = applyCasts(BO->getOperand(1 - OpNo));
800 Value *NextInChain = distributeCastsAndCloneChain(ChainIndex - 1);
801
802 BinaryOperator *NewBO = nullptr;
803 if (OpNo == 0) {
804 NewBO = BinaryOperator::Create(BO->getOpcode(), NextInChain, TheOther,
805 BO->getName(), IP);
806 } else {
807 NewBO = BinaryOperator::Create(BO->getOpcode(), TheOther, NextInChain,
808 BO->getName(), IP);
809 }
810 return UserChain[ChainIndex] = NewBO;
811}
812
813Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) {
814 if (ChainIndex == 0) {
815 assert(isa<ConstantInt>(UserChain[ChainIndex]));
816 return ConstantInt::getNullValue(UserChain[ChainIndex]->getType());
817 }
818
819 BinaryOperator *BO = cast<BinaryOperator>(UserChain[ChainIndex]);
820 assert((BO->use_empty() || BO->hasOneUse()) &&
821 "distributeCastsAndCloneChain clones each BinaryOperator in "
822 "UserChain, so no one should be used more than "
823 "once");
824
825 unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1);
826 assert(BO->getOperand(OpNo) == UserChain[ChainIndex - 1]);
827 Value *NextInChain = removeConstOffset(ChainIndex - 1);
828 Value *TheOther = BO->getOperand(1 - OpNo);
829
830 // If NextInChain is 0 and not the LHS of a sub, we can simplify the
831 // sub-expression to be just TheOther.
832 if (ConstantInt *CI = dyn_cast<ConstantInt>(NextInChain)) {
833 if (CI->isZero() && !(BO->getOpcode() == Instruction::Sub && OpNo == 0))
834 return TheOther;
835 }
836
837 BinaryOperator::BinaryOps NewOp = BO->getOpcode();
838 if (BO->getOpcode() == Instruction::Or) {
839 // Rebuild "or" as "add", because "or" may be invalid for the new
840 // expression.
841 //
842 // For instance, given
843 // a | (b + 5) where a and b + 5 have no common bits,
844 // we can extract 5 as the constant offset.
845 //
846 // However, reusing the "or" in the new index would give us
847 // (a | b) + 5
848 // which does not equal a | (b + 5).
849 //
850 // Replacing the "or" with "add" is fine, because
851 // a | (b + 5) = a + (b + 5) = (a + b) + 5
852 NewOp = Instruction::Add;
853 }
854
855 BinaryOperator *NewBO;
856 if (OpNo == 0) {
857 NewBO = BinaryOperator::Create(NewOp, NextInChain, TheOther, "", IP);
858 } else {
859 NewBO = BinaryOperator::Create(NewOp, TheOther, NextInChain, "", IP);
860 }
861 NewBO->takeName(BO);
862 return NewBO;
863}
864
865/// A helper function to check if reassociating through an entry in the user
866/// chain would invalidate the GEP's nuw flag.
867static bool allowsPreservingNUW(const User *U) {
868 if (const BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) {
869 // Binary operations need to be effectively add nuw.
870 auto Opcode = BO->getOpcode();
871 if (Opcode == BinaryOperator::Or) {
872 // Ors are only considered here if they are disjoint. The addition that
873 // they represent in this case is NUW.
874 assert(cast<PossiblyDisjointInst>(BO)->isDisjoint());
875 return true;
876 }
877 return Opcode == BinaryOperator::Add && BO->hasNoUnsignedWrap();
878 }
879 // UserChain can only contain ConstantInt, CastInst, or BinaryOperator.
880 // Among the possible CastInsts, only trunc without nuw is a problem: If it
881 // is distributed through an add nuw, wrapping may occur:
882 // "add nuw trunc(a), trunc(b)" is more poisonous than "trunc(add nuw a, b)"
883 if (const TruncInst *TI = dyn_cast<TruncInst>(U))
884 return TI->hasNoUnsignedWrap();
885 assert((isa<CastInst>(U) || isa<ConstantInt>(U)) && "Unexpected User.");
886 return true;
887}
888
889Value *ConstantOffsetExtractor::Extract(Value *Idx, GetElementPtrInst *GEP,
890 User *&UserChainTail,
891 bool &PreservesNUW) {
892 ConstantOffsetExtractor Extractor(GEP->getIterator());
893 // Find a non-zero constant offset first.
894 APInt ConstantOffset = Extractor.find(Idx, GEP, Idx, /* SignExtended */ false,
895 /* ZeroExtended */ false);
896 if (ConstantOffset == 0) {
897 UserChainTail = nullptr;
898 PreservesNUW = true;
899 return nullptr;
900 }
901
902 PreservesNUW = all_of(Extractor.UserChain, allowsPreservingNUW);
903
904 // Separates the constant offset from the GEP index.
905 Value *IdxWithoutConstOffset = Extractor.rebuildWithoutConstOffset();
906 UserChainTail = Extractor.UserChain.back();
907 return IdxWithoutConstOffset;
908}
909
910APInt ConstantOffsetExtractor::Find(Value *Idx, GetElementPtrInst *GEP) {
911 return ConstantOffsetExtractor(GEP->getIterator())
912 .find(Idx, GEP, Idx, /* SignExtended */ false, /* ZeroExtended */ false);
913}
914
915bool SeparateConstOffsetFromGEP::canonicalizeArrayIndicesToIndexSize(
916 GetElementPtrInst *GEP) {
917 bool Changed = false;
918 Type *PtrIdxTy = DL->getIndexType(GEP->getType());
920 for (User::op_iterator I = GEP->op_begin() + 1, E = GEP->op_end();
921 I != E; ++I, ++GTI) {
922 // Skip struct member indices which must be i32.
923 if (GTI.isSequential()) {
924 if ((*I)->getType() != PtrIdxTy) {
925 *I = CastInst::CreateIntegerCast(*I, PtrIdxTy, true, "idxprom",
926 GEP->getIterator());
927 Changed = true;
928 }
929 }
930 }
931 return Changed;
932}
933
934APInt SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP,
935 bool &NeedsExtraction,
936 bool &SignedOverflow) {
937 NeedsExtraction = false;
938 SignedOverflow = false;
939 unsigned IdxWidth = DL->getIndexTypeSizeInBits(GEP->getType());
940 APInt AccumulativeByteOffset(IdxWidth, 0);
942 for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
943 if (GTI.isSequential()) {
944 // Constant offsets of scalable types are not really constant.
945 if (GTI.getIndexedType()->isScalableTy())
946 continue;
947
948 // Tries to extract a constant offset from this GEP index.
949 APInt ConstantOffset =
950 ConstantOffsetExtractor::Find(GEP->getOperand(I), GEP)
951 .sextOrTrunc(IdxWidth);
952 if (ConstantOffset != 0) {
953 NeedsExtraction = true;
954 // A GEP may have multiple indices. We accumulate the extracted
955 // constant offset to a byte offset, and later offset the remainder of
956 // the original GEP with this byte offset.
957 bool Overflow;
958 auto ByteOffset = ConstantOffset.smul_ov(
959 APInt(IdxWidth, GTI.getSequentialElementStride(*DL),
960 /*IsSigned=*/true, /*ImplicitTrunc=*/true),
961 Overflow);
962 SignedOverflow |= Overflow;
963 AccumulativeByteOffset =
964 AccumulativeByteOffset.sadd_ov(ByteOffset, Overflow);
965 SignedOverflow |= Overflow;
966 }
967 } else if (LowerGEP) {
968 StructType *StTy = GTI.getStructType();
969 uint64_t Field = cast<ConstantInt>(GEP->getOperand(I))->getZExtValue();
970 // Skip field 0 as the offset is always 0.
971 if (Field != 0) {
972 NeedsExtraction = true;
973 AccumulativeByteOffset +=
974 APInt(IdxWidth, DL->getStructLayout(StTy)->getElementOffset(Field),
975 /*IsSigned=*/true, /*ImplicitTrunc=*/true);
976 }
977 }
978 }
979 return AccumulativeByteOffset;
980}
981
982void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
983 GetElementPtrInst *Variadic, const APInt &AccumulativeByteOffset) {
984 IRBuilder<> Builder(Variadic);
985 Type *PtrIndexTy = DL->getIndexType(Variadic->getType());
986
987 Value *ResultPtr = Variadic->getOperand(0);
988 Loop *L = LI->getLoopFor(Variadic->getParent());
989 // Check if the base is not loop invariant or used more than once.
990 bool isSwapCandidate =
991 L && L->isLoopInvariant(ResultPtr) &&
992 !hasMoreThanOneUseInLoop(ResultPtr, L);
993 Value *FirstResult = nullptr;
994
995 gep_type_iterator GTI = gep_type_begin(*Variadic);
996 // Create an ugly GEP for each sequential index. We don't create GEPs for
997 // structure indices, as they are accumulated in the constant offset index.
998 for (unsigned I = 1, E = Variadic->getNumOperands(); I != E; ++I, ++GTI) {
999 if (GTI.isSequential()) {
1000 Value *Idx = Variadic->getOperand(I);
1001 // Skip zero indices.
1002 if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx))
1003 if (CI->isZero())
1004 continue;
1005
1006 APInt ElementSize = APInt(PtrIndexTy->getIntegerBitWidth(),
1008 // Scale the index by element size.
1009 if (ElementSize != 1) {
1010 if (ElementSize.isPowerOf2()) {
1011 Idx = Builder.CreateShl(
1012 Idx, ConstantInt::get(PtrIndexTy, ElementSize.logBase2()));
1013 } else {
1014 Idx =
1015 Builder.CreateMul(Idx, ConstantInt::get(PtrIndexTy, ElementSize));
1016 }
1017 }
1018 // Create an ugly GEP with a single index for each index.
1019 ResultPtr = Builder.CreatePtrAdd(ResultPtr, Idx, "uglygep");
1020 if (FirstResult == nullptr)
1021 FirstResult = ResultPtr;
1022 }
1023 }
1024
1025 // Create a GEP with the constant offset index.
1026 if (AccumulativeByteOffset != 0) {
1027 Value *Offset = ConstantInt::get(PtrIndexTy, AccumulativeByteOffset);
1028 ResultPtr = Builder.CreatePtrAdd(ResultPtr, Offset, "uglygep");
1029 } else
1030 isSwapCandidate = false;
1031
1032 // If we created a GEP with constant index, and the base is loop invariant,
1033 // then we swap the first one with it, so LICM can move constant GEP out
1034 // later.
1035 auto *FirstGEP = dyn_cast_or_null<GetElementPtrInst>(FirstResult);
1036 auto *SecondGEP = dyn_cast<GetElementPtrInst>(ResultPtr);
1037 if (isSwapCandidate && isLegalToSwapOperand(FirstGEP, SecondGEP, L))
1038 swapGEPOperand(FirstGEP, SecondGEP);
1039
1040 Variadic->replaceAllUsesWith(ResultPtr);
1041 Variadic->eraseFromParent();
1042}
1043
1044bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP,
1045 TargetTransformInfo &TTI) {
1046 auto PtrGEP = dyn_cast<GetElementPtrInst>(GEP->getPointerOperand());
1047 if (!PtrGEP)
1048 return false;
1049
1050 bool NestedNeedsExtraction, OffsetOverflow;
1051 APInt NestedByteOffset =
1052 accumulateByteOffset(PtrGEP, NestedNeedsExtraction, OffsetOverflow);
1053 if (!NestedNeedsExtraction)
1054 return false;
1055
1056 unsigned AddrSpace = PtrGEP->getPointerAddressSpace();
1057 if (!TTI.isLegalAddressingMode(GEP->getResultElementType(),
1058 /*BaseGV=*/nullptr,
1059 NestedByteOffset.getSExtValue(),
1060 /*HasBaseReg=*/true, /*Scale=*/0, AddrSpace))
1061 return false;
1062
1063 bool GEPInBounds = GEP->isInBounds();
1064 bool PtrGEPInBounds = PtrGEP->isInBounds();
1065 bool IsChainInBounds = GEPInBounds && PtrGEPInBounds;
1066 if (IsChainInBounds) {
1067 auto IsKnownNonNegative = [this](Value *V) {
1068 return isKnownNonNegative(V, *DL);
1069 };
1070 IsChainInBounds &= all_of(GEP->indices(), IsKnownNonNegative);
1071 if (IsChainInBounds)
1072 IsChainInBounds &= all_of(PtrGEP->indices(), IsKnownNonNegative);
1073 }
1074
1075 IRBuilder<> Builder(GEP);
1076 // For trivial GEP chains, we can swap the indices.
1077 Value *NewSrc = Builder.CreateGEP(
1078 GEP->getSourceElementType(), PtrGEP->getPointerOperand(),
1079 SmallVector<Value *, 4>(GEP->indices()), "", IsChainInBounds);
1080 Value *NewGEP = Builder.CreateGEP(PtrGEP->getSourceElementType(), NewSrc,
1081 SmallVector<Value *, 4>(PtrGEP->indices()),
1082 "", IsChainInBounds);
1083 GEP->replaceAllUsesWith(NewGEP);
1085 return true;
1086}
1087
1088bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
1089 // Skip vector GEPs.
1090 if (GEP->getType()->isVectorTy())
1091 return false;
1092
1093 // If the base of this GEP is a ptradd of a constant, lets pass the constant
1094 // along. This ensures that when we have a chain of GEPs the constant
1095 // offset from each is accumulated.
1096 Value *NewBase;
1097 const APInt *BaseOffset;
1098 bool ExtractBase = match(GEP->getPointerOperand(),
1099 m_PtrAdd(m_Value(NewBase), m_APInt(BaseOffset)));
1100
1101 unsigned IdxWidth = DL->getIndexTypeSizeInBits(GEP->getType());
1102 APInt BaseByteOffset =
1103 ExtractBase ? BaseOffset->sextOrTrunc(IdxWidth) : APInt(IdxWidth, 0);
1104
1105 // The backend can already nicely handle the case where all indices are
1106 // constant.
1107 if (GEP->hasAllConstantIndices() && !ExtractBase)
1108 return false;
1109
1110 bool Changed = canonicalizeArrayIndicesToIndexSize(GEP);
1111
1112 bool NeedsExtraction, OffsetOverflow;
1113 APInt NonBaseByteOffset =
1114 accumulateByteOffset(GEP, NeedsExtraction, OffsetOverflow);
1115 bool AddOverflow;
1116 APInt AccumulativeByteOffset =
1117 BaseByteOffset.sadd_ov(NonBaseByteOffset, AddOverflow);
1118 OffsetOverflow |= AddOverflow;
1119
1120 TargetTransformInfo &TTI = GetTTI(*GEP->getFunction());
1121
1122 if (!NeedsExtraction && !ExtractBase) {
1123 Changed |= reorderGEP(GEP, TTI);
1124 return Changed;
1125 }
1126
1127 // If LowerGEP is disabled, before really splitting the GEP, check whether the
1128 // backend supports the addressing mode we are about to produce. If no, this
1129 // splitting probably won't be beneficial.
1130 // If LowerGEP is enabled, even the extracted constant offset can not match
1131 // the addressing mode, we can still do optimizations to other lowered parts
1132 // of variable indices. Therefore, we don't check for addressing modes in that
1133 // case.
1134 if (!LowerGEP) {
1135 unsigned AddrSpace = GEP->getPointerAddressSpace();
1137 GEP->getResultElementType(),
1138 /*BaseGV=*/nullptr, AccumulativeByteOffset.getSExtValue(),
1139 /*HasBaseReg=*/true, /*Scale=*/0, AddrSpace)) {
1140 // If the addressing mode was not legal and the base byte offset was not
1141 // 0, it could be a case where the total offset became too large for
1142 // the addressing mode. Try again without extracting the base offset.
1143 if (!ExtractBase)
1144 return Changed;
1145 ExtractBase = false;
1146 BaseByteOffset = APInt(IdxWidth, 0);
1147 AccumulativeByteOffset = NonBaseByteOffset;
1149 GEP->getResultElementType(),
1150 /*BaseGV=*/nullptr, AccumulativeByteOffset.getSExtValue(),
1151 /*HasBaseReg=*/true, /*Scale=*/0, AddrSpace))
1152 return Changed;
1153 // We can proceed with just extracting the other (non-base) offsets.
1154 NeedsExtraction = true;
1155 }
1156 }
1157
1158 // Track information for preserving GEP flags.
1159 bool AllOffsetsNonNegative =
1160 AccumulativeByteOffset.isNonNegative() && !OffsetOverflow;
1161 bool AllNUWPreserved = GEP->hasNoUnsignedWrap();
1162 bool NewGEPInBounds = GEP->isInBounds();
1163 bool NewGEPNUSW = GEP->hasNoUnsignedSignedWrap();
1164
1165 // Remove the constant offset in each sequential index. The resultant GEP
1166 // computes the variadic base.
1167 // Notice that we don't remove struct field indices here. If LowerGEP is
1168 // disabled, a structure index is not accumulated and we still use the old
1169 // one. If LowerGEP is enabled, a structure index is accumulated in the
1170 // constant offset. LowerToSingleIndexGEPs will later handle the constant
1171 // offset and won't need a new structure index.
1173 for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
1174 if (GTI.isSequential()) {
1175 // Constant offsets of scalable types are not really constant.
1176 if (GTI.getIndexedType()->isScalableTy())
1177 continue;
1178
1179 // Splits this GEP index into a variadic part and a constant offset, and
1180 // uses the variadic part as the new index.
1181 Value *Idx = GEP->getOperand(I);
1182 User *UserChainTail;
1183 bool PreservesNUW;
1184 Value *NewIdx = ConstantOffsetExtractor::Extract(Idx, GEP, UserChainTail,
1185 PreservesNUW);
1186 if (NewIdx != nullptr) {
1187 // Switches to the index with the constant offset removed.
1188 GEP->setOperand(I, NewIdx);
1189 // After switching to the new index, we can garbage-collect UserChain
1190 // and the old index if they are not used.
1193 Idx = NewIdx;
1194 AllNUWPreserved &= PreservesNUW;
1195 }
1196 AllOffsetsNonNegative =
1197 AllOffsetsNonNegative && isKnownNonNegative(Idx, *DL);
1198 }
1199 }
1200 if (ExtractBase) {
1201 GEPOperator *Base = cast<GEPOperator>(GEP->getPointerOperand());
1202 AllNUWPreserved &= Base->hasNoUnsignedWrap();
1203 NewGEPInBounds &= Base->isInBounds();
1204 NewGEPNUSW &= Base->hasNoUnsignedSignedWrap();
1205 AllOffsetsNonNegative &= BaseByteOffset.isNonNegative();
1206
1207 GEP->setOperand(0, NewBase);
1209 }
1210
1211 // Clear the inbounds attribute because the new index may be off-bound.
1212 // e.g.,
1213 //
1214 // b = add i64 a, 5
1215 // addr = gep inbounds float, float* p, i64 b
1216 //
1217 // is transformed to:
1218 //
1219 // addr2 = gep float, float* p, i64 a ; inbounds removed
1220 // addr = gep float, float* addr2, i64 5 ; inbounds removed
1221 //
1222 // If a is -4, although the old index b is in bounds, the new index a is
1223 // off-bound. http://llvm.org/docs/LangRef.html#id181 says "if the
1224 // inbounds keyword is not present, the offsets are added to the base
1225 // address with silently-wrapping two's complement arithmetic".
1226 // Therefore, the final code will be a semantically equivalent.
1227 GEPNoWrapFlags NewGEPFlags = GEPNoWrapFlags::none();
1228
1229 // If the initial GEP was inbounds/nusw and all variable indices and the
1230 // accumulated offsets are non-negative, they can be added in any order and
1231 // the intermediate results are in bounds and don't overflow in a nusw sense.
1232 // So, we can preserve the inbounds/nusw flag for both GEPs.
1233 bool CanPreserveInBoundsNUSW = AllOffsetsNonNegative;
1234
1235 // If the initial GEP was NUW and all operations that we reassociate were NUW
1236 // additions, the resulting GEPs are also NUW.
1237 if (AllNUWPreserved) {
1238 NewGEPFlags |= GEPNoWrapFlags::noUnsignedWrap();
1239 // If the initial GEP additionally had NUSW (or inbounds, which implies
1240 // NUSW), we know that the indices in the initial GEP must all have their
1241 // signbit not set. For indices that are the result of NUW adds, the
1242 // add-operands therefore also don't have their signbit set. Therefore, all
1243 // indices of the resulting GEPs are non-negative -> we can preserve
1244 // the inbounds/nusw flag.
1245 CanPreserveInBoundsNUSW |= NewGEPNUSW;
1246 }
1247
1248 if (CanPreserveInBoundsNUSW) {
1249 if (NewGEPInBounds)
1250 NewGEPFlags |= GEPNoWrapFlags::inBounds();
1251 else if (NewGEPNUSW)
1252 NewGEPFlags |= GEPNoWrapFlags::noUnsignedSignedWrap();
1253 }
1254
1255 GEP->setNoWrapFlags(NewGEPFlags);
1256
1257 // Lowers a GEP to GEPs with a single index.
1258 if (LowerGEP) {
1259 lowerToSingleIndexGEPs(GEP, AccumulativeByteOffset);
1260 return true;
1261 }
1262
1263 // No need to create another GEP if the accumulative byte offset is 0.
1264 if (AccumulativeByteOffset == 0)
1265 return true;
1266
1267 // Offsets the base with the accumulative byte offset.
1268 //
1269 // %gep ; the base
1270 // ... %gep ...
1271 //
1272 // => add the offset
1273 //
1274 // %gep2 ; clone of %gep
1275 // %new.gep = gep i8, %gep2, %offset
1276 // %gep ; will be removed
1277 // ... %gep ...
1278 //
1279 // => replace all uses of %gep with %new.gep and remove %gep
1280 //
1281 // %gep2 ; clone of %gep
1282 // %new.gep = gep i8, %gep2, %offset
1283 // ... %new.gep ...
1284 Instruction *NewGEP = GEP->clone();
1285 NewGEP->insertBefore(GEP->getIterator());
1286
1287 Type *PtrIdxTy = DL->getIndexType(GEP->getType());
1288 IRBuilder<> Builder(GEP);
1289 NewGEP = cast<Instruction>(Builder.CreatePtrAdd(
1290 NewGEP, ConstantInt::get(PtrIdxTy, AccumulativeByteOffset),
1291 GEP->getName(), NewGEPFlags));
1292 NewGEP->copyMetadata(*GEP);
1293
1294 GEP->replaceAllUsesWith(NewGEP);
1295 GEP->eraseFromParent();
1296
1297 return true;
1298}
1299
1300bool SeparateConstOffsetFromGEPLegacyPass::runOnFunction(Function &F) {
1301 if (skipFunction(F))
1302 return false;
1303 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1304 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1305 auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1306 auto GetTTI = [this](Function &F) -> TargetTransformInfo & {
1307 return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1308 };
1309 SeparateConstOffsetFromGEP Impl(DT, LI, TLI, GetTTI, LowerGEP);
1310 return Impl.run(F);
1311}
1312
1313bool SeparateConstOffsetFromGEP::run(Function &F) {
1315 return false;
1316
1317 DL = &F.getDataLayout();
1318 bool Changed = false;
1319
1320 ReversePostOrderTraversal<Function *> RPOT(&F);
1321 for (BasicBlock *B : RPOT) {
1322 if (!DT->isReachableFromEntry(B))
1323 continue;
1324
1325 for (Instruction &I : llvm::make_early_inc_range(*B))
1326 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I))
1327 Changed |= splitGEP(GEP);
1328 // No need to split GEP ConstantExprs because all its indices are constant
1329 // already.
1330 }
1331
1332 Changed |= reuniteExts(F);
1333
1334 if (VerifyNoDeadCode)
1335 verifyNoDeadCode(F);
1336
1337 return Changed;
1338}
1339
1340Instruction *SeparateConstOffsetFromGEP::findClosestMatchingDominator(
1341 ExprKey Key, Instruction *Dominatee,
1342 DenseMap<ExprKey, SmallVector<Instruction *, 2>> &DominatingExprs) {
1343 auto Pos = DominatingExprs.find(Key);
1344 if (Pos == DominatingExprs.end())
1345 return nullptr;
1346
1347 auto &Candidates = Pos->second;
1348 // Because we process the basic blocks in pre-order of the dominator tree, a
1349 // candidate that doesn't dominate the current instruction won't dominate any
1350 // future instruction either. Therefore, we pop it out of the stack. This
1351 // optimization makes the algorithm O(n).
1352 while (!Candidates.empty()) {
1353 Instruction *Candidate = Candidates.back();
1354 if (DT->dominates(Candidate, Dominatee))
1355 return Candidate;
1356 Candidates.pop_back();
1357 }
1358 return nullptr;
1359}
1360
1361bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) {
1362 if (!I->getType()->isIntOrIntVectorTy())
1363 return false;
1364
1365 // Dom: LHS+RHS
1366 // I: sext(LHS)+sext(RHS)
1367 // If Dom can't sign overflow and Dom dominates I, optimize I to sext(Dom).
1368 // TODO: handle zext
1369 Value *LHS = nullptr, *RHS = nullptr;
1370 if (match(I, m_Add(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) {
1371 if (LHS->getType() == RHS->getType()) {
1372 ExprKey Key = createNormalizedCommutablePair(LHS, RHS);
1373 if (auto *Dom = findClosestMatchingDominator(Key, I, DominatingAdds)) {
1374 Instruction *NewSExt =
1375 new SExtInst(Dom, I->getType(), "", I->getIterator());
1376 NewSExt->takeName(I);
1377 I->replaceAllUsesWith(NewSExt);
1378 NewSExt->setDebugLoc(I->getDebugLoc());
1380 return true;
1381 }
1382 }
1383 } else if (match(I, m_Sub(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) {
1384 if (LHS->getType() == RHS->getType()) {
1385 if (auto *Dom =
1386 findClosestMatchingDominator({LHS, RHS}, I, DominatingSubs)) {
1387 Instruction *NewSExt =
1388 new SExtInst(Dom, I->getType(), "", I->getIterator());
1389 NewSExt->takeName(I);
1390 I->replaceAllUsesWith(NewSExt);
1391 NewSExt->setDebugLoc(I->getDebugLoc());
1393 return true;
1394 }
1395 }
1396 }
1397
1398 // Add I to DominatingExprs if it's an add/sub that can't sign overflow.
1399 if (match(I, m_NSWAdd(m_Value(LHS), m_Value(RHS)))) {
1401 ExprKey Key = createNormalizedCommutablePair(LHS, RHS);
1402 DominatingAdds[Key].push_back(I);
1403 }
1404 } else if (match(I, m_NSWSub(m_Value(LHS), m_Value(RHS)))) {
1406 DominatingSubs[{LHS, RHS}].push_back(I);
1407 }
1408 return false;
1409}
1410
1411bool SeparateConstOffsetFromGEP::reuniteExts(Function &F) {
1412 bool Changed = false;
1413 DominatingAdds.clear();
1414 DominatingSubs.clear();
1415 for (const auto Node : depth_first(DT)) {
1416 BasicBlock *BB = Node->getBlock();
1417 for (Instruction &I : llvm::make_early_inc_range(*BB))
1418 Changed |= reuniteExts(&I);
1419 }
1420 return Changed;
1421}
1422
1423void SeparateConstOffsetFromGEP::verifyNoDeadCode(Function &F) {
1424 for (BasicBlock &B : F) {
1425 for (Instruction &I : B) {
1427 std::string ErrMessage;
1428 raw_string_ostream RSO(ErrMessage);
1429 RSO << "Dead instruction detected!\n" << I << "\n";
1430 llvm_unreachable(RSO.str().c_str());
1431 }
1432 }
1433 }
1434}
1435
1436bool SeparateConstOffsetFromGEP::isLegalToSwapOperand(
1437 GetElementPtrInst *FirstGEP, GetElementPtrInst *SecondGEP, Loop *CurLoop) {
1438 if (!FirstGEP || !FirstGEP->hasOneUse())
1439 return false;
1440
1441 if (!SecondGEP || FirstGEP->getParent() != SecondGEP->getParent())
1442 return false;
1443
1444 if (FirstGEP == SecondGEP)
1445 return false;
1446
1447 unsigned FirstNum = FirstGEP->getNumOperands();
1448 unsigned SecondNum = SecondGEP->getNumOperands();
1449 // Give up if the number of operands are not 2.
1450 if (FirstNum != SecondNum || FirstNum != 2)
1451 return false;
1452
1453 Value *FirstBase = FirstGEP->getOperand(0);
1454 Value *SecondBase = SecondGEP->getOperand(0);
1455 Value *FirstOffset = FirstGEP->getOperand(1);
1456 // Give up if the index of the first GEP is loop invariant.
1457 if (CurLoop->isLoopInvariant(FirstOffset))
1458 return false;
1459
1460 // Give up if base doesn't have same type.
1461 if (FirstBase->getType() != SecondBase->getType())
1462 return false;
1463
1464 Instruction *FirstOffsetDef = dyn_cast<Instruction>(FirstOffset);
1465
1466 // Check if the second operand of first GEP has constant coefficient.
1467 // For an example, for the following code, we won't gain anything by
1468 // hoisting the second GEP out because the second GEP can be folded away.
1469 // %scevgep.sum.ur159 = add i64 %idxprom48.ur, 256
1470 // %67 = shl i64 %scevgep.sum.ur159, 2
1471 // %uglygep160 = getelementptr i8* %65, i64 %67
1472 // %uglygep161 = getelementptr i8* %uglygep160, i64 -1024
1473
1474 // Skip constant shift instruction which may be generated by Splitting GEPs.
1475 if (FirstOffsetDef && FirstOffsetDef->isShift() &&
1476 isa<ConstantInt>(FirstOffsetDef->getOperand(1)))
1477 FirstOffsetDef = dyn_cast<Instruction>(FirstOffsetDef->getOperand(0));
1478
1479 // Give up if FirstOffsetDef is an Add or Sub with constant.
1480 // Because it may not profitable at all due to constant folding.
1481 if (FirstOffsetDef)
1482 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FirstOffsetDef)) {
1483 unsigned opc = BO->getOpcode();
1484 if ((opc == Instruction::Add || opc == Instruction::Sub) &&
1485 (isa<ConstantInt>(BO->getOperand(0)) ||
1487 return false;
1488 }
1489 return true;
1490}
1491
1492bool SeparateConstOffsetFromGEP::hasMoreThanOneUseInLoop(Value *V, Loop *L) {
1493 // TODO: Could look at uses of globals, but we need to make sure we are
1494 // looking at the correct function.
1495 if (isa<Constant>(V))
1496 return false;
1497
1498 int UsesInLoop = 0;
1499 for (User *U : V->users()) {
1500 if (Instruction *User = dyn_cast<Instruction>(U))
1501 if (L->contains(User))
1502 if (++UsesInLoop > 1)
1503 return true;
1504 }
1505 return false;
1506}
1507
1508void SeparateConstOffsetFromGEP::swapGEPOperand(GetElementPtrInst *First,
1509 GetElementPtrInst *Second) {
1510 Value *Offset1 = First->getOperand(1);
1511 Value *Offset2 = Second->getOperand(1);
1512 First->setOperand(1, Offset2);
1513 Second->setOperand(1, Offset1);
1514
1515 // We changed p+o+c to p+c+o, p+c may not be inbound anymore.
1516 const DataLayout &DAL = First->getDataLayout();
1517 APInt Offset(DAL.getIndexSizeInBits(
1518 cast<PointerType>(First->getType())->getAddressSpace()),
1519 0);
1520 Value *NewBase =
1522 uint64_t ObjectSize;
1523 if (!getObjectSize(NewBase, ObjectSize, DAL, TLI) ||
1524 Offset.ugt(ObjectSize)) {
1525 // TODO(gep_nowrap): Make flag preservation more precise.
1526 First->setNoWrapFlags(GEPNoWrapFlags::none());
1528 } else
1529 First->setIsInBounds(true);
1530}
1531
1533 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1535 ->printPipeline(OS, MapClassName2PassName);
1536 OS << '<';
1537 if (LowerGEP)
1538 OS << "lower-gep";
1539 OS << '>';
1540}
1541
1544 auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
1545 auto *LI = &AM.getResult<LoopAnalysis>(F);
1546 auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
1547 auto GetTTI = [&AM](Function &F) -> TargetTransformInfo & {
1548 return AM.getResult<TargetIRAnalysis>(F);
1549 };
1550 SeparateConstOffsetFromGEP Impl(DT, LI, TLI, GetTTI, LowerGEP);
1551 if (!Impl.run(F))
1552 return PreservedAnalyses::all();
1555 return PA;
1556}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis false
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
static bool runOnFunction(Function &F, bool PostInlining)
Hexagon Common GEP
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
static const T * Find(StringRef S, ArrayRef< T > A)
Find KV in array using binary search.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
OptimizedStructLayoutField Field
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
static cl::opt< bool > DisableSeparateConstOffsetFromGEP("disable-separate-const-offset-from-gep", cl::init(false), cl::desc("Do not separate the constant offset from a GEP instruction"), cl::Hidden)
static bool allowsPreservingNUW(const User *U)
A helper function to check if reassociating through an entry in the user chain would invalidate the G...
static cl::opt< bool > VerifyNoDeadCode("reassociate-geps-verify-no-dead-code", cl::init(false), cl::desc("Verify this pass produces no dead code"), cl::Hidden)
static bool canReorderAddSextToGEP(const GetElementPtrInst *GEP, const Value *Idx, const BinaryOperator *Add, const DataLayout &DL)
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1043
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1064
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:956
LLVM_ABI APInt sadd_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1968
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition APInt.cpp:1072
unsigned logBase2() const
Definition APInt.h:1776
LLVM_ABI APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:2000
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:335
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:1016
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1577
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
BinaryOps getOpcode() const
Definition InstrTypes.h:374
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
static LLVM_ABI CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isNegative() const
Definition Constants.h:214
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
unsigned getIndexSizeInBits(unsigned AS) const
The size in bits of indices used for address calculation in getelementptr and for addresses in the gi...
Definition DataLayout.h:502
Analysis pass which computes a DominatorTree.
Definition Dominators.h:278
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:316
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
static GEPNoWrapFlags inBounds()
static GEPNoWrapFlags noUnsignedWrap()
static GEPNoWrapFlags noUnsignedSignedWrap()
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
LLVM_ABI void setNoWrapFlags(GEPNoWrapFlags NW)
Set nowrap flags for GEP instruction.
LLVM_ABI bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
LLVM_ABI bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
bool isShift() const
LLVM_ABI void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Analysis pass that exposes the LoopInfo for a function.
Definition LoopInfo.h:569
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
The legacy pass manager's analysis pass to compute loop information.
Definition LoopInfo.h:596
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition LoopInfo.cpp:67
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
LLVM_ABI bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr, int64_t ScalableOffset=0) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
This class represents a truncation of integer types.
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
Definition Type.cpp:65
Use * op_iterator
Definition User.h:254
void setOperand(unsigned i, Value *Val)
Definition User.h:212
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
Definition Value.h:737
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
bool use_empty() const
Definition Value.h:346
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:399
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
An efficient, type-erasing, non-owning reference to a callable.
TypeSize getSequentialElementStride(const DataLayout &DL) const
const ParentTy * getParent() const
Definition ilist_node.h:34
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
PtrAdd_match< PointerOpTy, OffsetOpTy > m_PtrAdd(const PointerOpTy &PointerOp, const OffsetOpTy &OffsetOp)
Matches GEP with i8 source element type.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Sub, OverflowingBinaryOperator::NoSignedWrap > m_NSWSub(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
auto m_Value()
Match an arbitrary value and ignore it.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ User
could "use" a pointer
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1765
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:535
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
LLVM_ABI void initializeSeparateConstOffsetFromGEPLegacyPassPass(PassRegistry &)
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition Local.cpp:403
LLVM_ABI bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL, const TargetLibraryInfo *TLI, ObjectSizeOpts Opts={})
Compute the size of the object pointed by Ptr.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
LLVM_ABI bool programUndefinedIfPoison(const Instruction *Inst)
generic_gep_type_iterator<> gep_type_iterator
LLVM_ABI Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI FunctionPass * createSeparateConstOffsetFromGEPPass(bool LowerGEP=false)
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
TargetTransformInfo TTI
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
@ Add
Sum of integers.
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
gep_type_iterator gep_type_begin(const User *GEP)
std::enable_if_t< std::is_signed_v< T >, T > AddOverflow(T X, T Y, T &Result)
Add two signed integers, computing the two's complement truncated result, returning true if overflow ...
Definition MathExtras.h:701
iterator_range< df_iterator< T > > depth_first(const T &G)
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
#define N
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition PassManager.h:70