LLVM 20.0.0git
SeparateConstOffsetFromGEP.cpp
Go to the documentation of this file.
1//===- SeparateConstOffsetFromGEP.cpp -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Loop unrolling may create many similar GEPs for array accesses.
10// e.g., a 2-level loop
11//
12// float a[32][32]; // global variable
13//
14// for (int i = 0; i < 2; ++i) {
15// for (int j = 0; j < 2; ++j) {
16// ...
17// ... = a[x + i][y + j];
18// ...
19// }
20// }
21//
22// will probably be unrolled to:
23//
24// gep %a, 0, %x, %y; load
25// gep %a, 0, %x, %y + 1; load
26// gep %a, 0, %x + 1, %y; load
27// gep %a, 0, %x + 1, %y + 1; load
28//
29// LLVM's GVN does not use partial redundancy elimination yet, and is thus
30// unable to reuse (gep %a, 0, %x, %y). As a result, this misoptimization incurs
31// significant slowdown in targets with limited addressing modes. For instance,
32// because the PTX target does not support the reg+reg addressing mode, the
33// NVPTX backend emits PTX code that literally computes the pointer address of
34// each GEP, wasting tons of registers. It emits the following PTX for the
35// first load and similar PTX for other loads.
36//
37// mov.u32 %r1, %x;
38// mov.u32 %r2, %y;
39// mul.wide.u32 %rl2, %r1, 128;
40// mov.u64 %rl3, a;
41// add.s64 %rl4, %rl3, %rl2;
42// mul.wide.u32 %rl5, %r2, 4;
43// add.s64 %rl6, %rl4, %rl5;
44// ld.global.f32 %f1, [%rl6];
45//
46// To reduce the register pressure, the optimization implemented in this file
47// merges the common part of a group of GEPs, so we can compute each pointer
48// address by adding a simple offset to the common part, saving many registers.
49//
50// It works by splitting each GEP into a variadic base and a constant offset.
51// The variadic base can be computed once and reused by multiple GEPs, and the
52// constant offsets can be nicely folded into the reg+immediate addressing mode
53// (supported by most targets) without using any extra register.
54//
55// For instance, we transform the four GEPs and four loads in the above example
56// into:
57//
58// base = gep a, 0, x, y
59// load base
60// load base + 1 * sizeof(float)
61// load base + 32 * sizeof(float)
62// load base + 33 * sizeof(float)
63//
64// Given the transformed IR, a backend that supports the reg+immediate
65// addressing mode can easily fold the pointer arithmetics into the loads. For
66// example, the NVPTX backend can easily fold the pointer arithmetics into the
67// ld.global.f32 instructions, and the resultant PTX uses much fewer registers.
68//
69// mov.u32 %r1, %tid.x;
70// mov.u32 %r2, %tid.y;
71// mul.wide.u32 %rl2, %r1, 128;
72// mov.u64 %rl3, a;
73// add.s64 %rl4, %rl3, %rl2;
74// mul.wide.u32 %rl5, %r2, 4;
75// add.s64 %rl6, %rl4, %rl5;
76// ld.global.f32 %f1, [%rl6]; // so far the same as unoptimized PTX
77// ld.global.f32 %f2, [%rl6+4]; // much better
78// ld.global.f32 %f3, [%rl6+128]; // much better
79// ld.global.f32 %f4, [%rl6+132]; // much better
80//
81// Another improvement enabled by the LowerGEP flag is to lower a GEP with
82// multiple indices to either multiple GEPs with a single index or arithmetic
83// operations (depending on whether the target uses alias analysis in codegen).
84// Such transformation can have following benefits:
85// (1) It can always extract constants in the indices of structure type.
86// (2) After such Lowering, there are more optimization opportunities such as
87// CSE, LICM and CGP.
88//
89// E.g. The following GEPs have multiple indices:
90// BB1:
91// %p = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 3
92// load %p
93// ...
94// BB2:
95// %p2 = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 2
96// load %p2
97// ...
98//
99// We can not do CSE to the common part related to index "i64 %i". Lowering
100// GEPs can achieve such goals.
101// If the target does not use alias analysis in codegen, this pass will
102// lower a GEP with multiple indices into arithmetic operations:
103// BB1:
104// %1 = ptrtoint [10 x %struct]* %ptr to i64 ; CSE opportunity
105// %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity
106// %3 = add i64 %1, %2 ; CSE opportunity
107// %4 = mul i64 %j1, length_of_struct
108// %5 = add i64 %3, %4
109// %6 = add i64 %3, struct_field_3 ; Constant offset
110// %p = inttoptr i64 %6 to i32*
111// load %p
112// ...
113// BB2:
114// %7 = ptrtoint [10 x %struct]* %ptr to i64 ; CSE opportunity
115// %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity
116// %9 = add i64 %7, %8 ; CSE opportunity
117// %10 = mul i64 %j2, length_of_struct
118// %11 = add i64 %9, %10
119// %12 = add i64 %11, struct_field_2 ; Constant offset
120// %p = inttoptr i64 %12 to i32*
121// load %p2
122// ...
123//
124// If the target uses alias analysis in codegen, this pass will lower a GEP
125// with multiple indices into multiple GEPs with a single index:
126// BB1:
127// %1 = bitcast [10 x %struct]* %ptr to i8* ; CSE opportunity
128// %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity
129// %3 = getelementptr i8* %1, i64 %2 ; CSE opportunity
130// %4 = mul i64 %j1, length_of_struct
131// %5 = getelementptr i8* %3, i64 %4
132// %6 = getelementptr i8* %5, struct_field_3 ; Constant offset
133// %p = bitcast i8* %6 to i32*
134// load %p
135// ...
136// BB2:
137// %7 = bitcast [10 x %struct]* %ptr to i8* ; CSE opportunity
138// %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity
139// %9 = getelementptr i8* %7, i64 %8 ; CSE opportunity
140// %10 = mul i64 %j2, length_of_struct
141// %11 = getelementptr i8* %9, i64 %10
142// %12 = getelementptr i8* %11, struct_field_2 ; Constant offset
143// %p2 = bitcast i8* %12 to i32*
144// load %p2
145// ...
146//
147// Lowering GEPs can also benefit other passes such as LICM and CGP.
148// LICM (Loop Invariant Code Motion) can not hoist/sink a GEP of multiple
149// indices if one of the index is variant. If we lower such GEP into invariant
150// parts and variant parts, LICM can hoist/sink those invariant parts.
151// CGP (CodeGen Prepare) tries to sink address calculations that match the
152// target's addressing modes. A GEP with multiple indices may not match and will
153// not be sunk. If we lower such GEP into smaller parts, CGP may sink some of
154// them. So we end up with a better addressing mode.
155//
156//===----------------------------------------------------------------------===//
157
159#include "llvm/ADT/APInt.h"
160#include "llvm/ADT/DenseMap.h"
162#include "llvm/ADT/SmallVector.h"
168#include "llvm/IR/BasicBlock.h"
169#include "llvm/IR/Constant.h"
170#include "llvm/IR/Constants.h"
171#include "llvm/IR/DataLayout.h"
172#include "llvm/IR/DerivedTypes.h"
173#include "llvm/IR/Dominators.h"
174#include "llvm/IR/Function.h"
176#include "llvm/IR/IRBuilder.h"
177#include "llvm/IR/InstrTypes.h"
178#include "llvm/IR/Instruction.h"
179#include "llvm/IR/Instructions.h"
180#include "llvm/IR/Module.h"
181#include "llvm/IR/PassManager.h"
182#include "llvm/IR/PatternMatch.h"
183#include "llvm/IR/Type.h"
184#include "llvm/IR/User.h"
185#include "llvm/IR/Value.h"
187#include "llvm/Pass.h"
188#include "llvm/Support/Casting.h"
194#include <cassert>
195#include <cstdint>
196#include <string>
197
198using namespace llvm;
199using namespace llvm::PatternMatch;
200
202 "disable-separate-const-offset-from-gep", cl::init(false),
203 cl::desc("Do not separate the constant offset from a GEP instruction"),
204 cl::Hidden);
205
206// Setting this flag may emit false positives when the input module already
207// contains dead instructions. Therefore, we set it only in unit tests that are
208// free of dead code.
209static cl::opt<bool>
210 VerifyNoDeadCode("reassociate-geps-verify-no-dead-code", cl::init(false),
211 cl::desc("Verify this pass produces no dead code"),
212 cl::Hidden);
213
214namespace {
215
216/// A helper class for separating a constant offset from a GEP index.
217///
218/// In real programs, a GEP index may be more complicated than a simple addition
219/// of something and a constant integer which can be trivially splitted. For
220/// example, to split ((a << 3) | 5) + b, we need to search deeper for the
221/// constant offset, so that we can separate the index to (a << 3) + b and 5.
222///
223/// Therefore, this class looks into the expression that computes a given GEP
224/// index, and tries to find a constant integer that can be hoisted to the
225/// outermost level of the expression as an addition. Not every constant in an
226/// expression can jump out. e.g., we cannot transform (b * (a + 5)) to (b * a +
227/// 5); nor can we transform (3 * (a + 5)) to (3 * a + 5), however in this case,
228/// -instcombine probably already optimized (3 * (a + 5)) to (3 * a + 15).
229class ConstantOffsetExtractor {
230public:
231 /// Extracts a constant offset from the given GEP index. It returns the
232 /// new index representing the remainder (equal to the original index minus
233 /// the constant offset), or nullptr if we cannot extract a constant offset.
234 /// \p Idx The given GEP index
235 /// \p GEP The given GEP
236 /// \p UserChainTail Outputs the tail of UserChain so that we can
237 /// garbage-collect unused instructions in UserChain.
238 static Value *Extract(Value *Idx, GetElementPtrInst *GEP,
239 User *&UserChainTail);
240
241 /// Looks for a constant offset from the given GEP index without extracting
242 /// it. It returns the numeric value of the extracted constant offset (0 if
243 /// failed). The meaning of the arguments are the same as Extract.
244 static int64_t Find(Value *Idx, GetElementPtrInst *GEP);
245
246private:
247 ConstantOffsetExtractor(BasicBlock::iterator InsertionPt)
248 : IP(InsertionPt), DL(InsertionPt->getDataLayout()) {}
249
250 /// Searches the expression that computes V for a non-zero constant C s.t.
251 /// V can be reassociated into the form V' + C. If the searching is
252 /// successful, returns C and update UserChain as a def-use chain from C to V;
253 /// otherwise, UserChain is empty.
254 ///
255 /// \p V The given expression
256 /// \p SignExtended Whether V will be sign-extended in the computation of the
257 /// GEP index
258 /// \p ZeroExtended Whether V will be zero-extended in the computation of the
259 /// GEP index
260 /// \p NonNegative Whether V is guaranteed to be non-negative. For example,
261 /// an index of an inbounds GEP is guaranteed to be
262 /// non-negative. Levaraging this, we can better split
263 /// inbounds GEPs.
264 APInt find(Value *V, bool SignExtended, bool ZeroExtended, bool NonNegative);
265
266 /// A helper function to look into both operands of a binary operator.
267 APInt findInEitherOperand(BinaryOperator *BO, bool SignExtended,
268 bool ZeroExtended);
269
270 /// After finding the constant offset C from the GEP index I, we build a new
271 /// index I' s.t. I' + C = I. This function builds and returns the new
272 /// index I' according to UserChain produced by function "find".
273 ///
274 /// The building conceptually takes two steps:
275 /// 1) iteratively distribute s/zext towards the leaves of the expression tree
276 /// that computes I
277 /// 2) reassociate the expression tree to the form I' + C.
278 ///
279 /// For example, to extract the 5 from sext(a + (b + 5)), we first distribute
280 /// sext to a, b and 5 so that we have
281 /// sext(a) + (sext(b) + 5).
282 /// Then, we reassociate it to
283 /// (sext(a) + sext(b)) + 5.
284 /// Given this form, we know I' is sext(a) + sext(b).
285 Value *rebuildWithoutConstOffset();
286
287 /// After the first step of rebuilding the GEP index without the constant
288 /// offset, distribute s/zext to the operands of all operators in UserChain.
289 /// e.g., zext(sext(a + (b + 5)) (assuming no overflow) =>
290 /// zext(sext(a)) + (zext(sext(b)) + zext(sext(5))).
291 ///
292 /// The function also updates UserChain to point to new subexpressions after
293 /// distributing s/zext. e.g., the old UserChain of the above example is
294 /// 5 -> b + 5 -> a + (b + 5) -> sext(...) -> zext(sext(...)),
295 /// and the new UserChain is
296 /// zext(sext(5)) -> zext(sext(b)) + zext(sext(5)) ->
297 /// zext(sext(a)) + (zext(sext(b)) + zext(sext(5))
298 ///
299 /// \p ChainIndex The index to UserChain. ChainIndex is initially
300 /// UserChain.size() - 1, and is decremented during
301 /// the recursion.
302 Value *distributeExtsAndCloneChain(unsigned ChainIndex);
303
304 /// Reassociates the GEP index to the form I' + C and returns I'.
305 Value *removeConstOffset(unsigned ChainIndex);
306
307 /// A helper function to apply ExtInsts, a list of s/zext, to value V.
308 /// e.g., if ExtInsts = [sext i32 to i64, zext i16 to i32], this function
309 /// returns "sext i32 (zext i16 V to i32) to i64".
310 Value *applyExts(Value *V);
311
312 /// A helper function that returns whether we can trace into the operands
313 /// of binary operator BO for a constant offset.
314 ///
315 /// \p SignExtended Whether BO is surrounded by sext
316 /// \p ZeroExtended Whether BO is surrounded by zext
317 /// \p NonNegative Whether BO is known to be non-negative, e.g., an in-bound
318 /// array index.
319 bool CanTraceInto(bool SignExtended, bool ZeroExtended, BinaryOperator *BO,
320 bool NonNegative);
321
322 /// The path from the constant offset to the old GEP index. e.g., if the GEP
323 /// index is "a * b + (c + 5)". After running function find, UserChain[0] will
324 /// be the constant 5, UserChain[1] will be the subexpression "c + 5", and
325 /// UserChain[2] will be the entire expression "a * b + (c + 5)".
326 ///
327 /// This path helps to rebuild the new GEP index.
328 SmallVector<User *, 8> UserChain;
329
330 /// A data structure used in rebuildWithoutConstOffset. Contains all
331 /// sext/zext instructions along UserChain.
333
334 /// Insertion position of cloned instructions.
336
337 const DataLayout &DL;
338};
339
340/// A pass that tries to split every GEP in the function into a variadic
341/// base and a constant offset. It is a FunctionPass because searching for the
342/// constant offset may inspect other basic blocks.
343class SeparateConstOffsetFromGEPLegacyPass : public FunctionPass {
344public:
345 static char ID;
346
347 SeparateConstOffsetFromGEPLegacyPass(bool LowerGEP = false)
348 : FunctionPass(ID), LowerGEP(LowerGEP) {
351 }
352
353 void getAnalysisUsage(AnalysisUsage &AU) const override {
357 AU.setPreservesCFG();
359 }
360
361 bool runOnFunction(Function &F) override;
362
363private:
364 bool LowerGEP;
365};
366
367/// A pass that tries to split every GEP in the function into a variadic
368/// base and a constant offset. It is a FunctionPass because searching for the
369/// constant offset may inspect other basic blocks.
370class SeparateConstOffsetFromGEP {
371public:
372 SeparateConstOffsetFromGEP(
374 function_ref<TargetTransformInfo &(Function &)> GetTTI, bool LowerGEP)
375 : DT(DT), LI(LI), TLI(TLI), GetTTI(GetTTI), LowerGEP(LowerGEP) {}
376
377 bool run(Function &F);
378
379private:
380 /// Track the operands of an add or sub.
381 using ExprKey = std::pair<Value *, Value *>;
382
383 /// Create a pair for use as a map key for a commutable operation.
384 static ExprKey createNormalizedCommutablePair(Value *A, Value *B) {
385 if (A < B)
386 return {A, B};
387 return {B, A};
388 }
389
390 /// Tries to split the given GEP into a variadic base and a constant offset,
391 /// and returns true if the splitting succeeds.
392 bool splitGEP(GetElementPtrInst *GEP);
393
394 /// Tries to reorder the given GEP with the GEP that produces the base if
395 /// doing so results in producing a constant offset as the outermost
396 /// index.
397 bool reorderGEP(GetElementPtrInst *GEP, TargetTransformInfo &TTI);
398
399 /// Lower a GEP with multiple indices into multiple GEPs with a single index.
400 /// Function splitGEP already split the original GEP into a variadic part and
401 /// a constant offset (i.e., AccumulativeByteOffset). This function lowers the
402 /// variadic part into a set of GEPs with a single index and applies
403 /// AccumulativeByteOffset to it.
404 /// \p Variadic The variadic part of the original GEP.
405 /// \p AccumulativeByteOffset The constant offset.
406 void lowerToSingleIndexGEPs(GetElementPtrInst *Variadic,
407 int64_t AccumulativeByteOffset);
408
409 /// Lower a GEP with multiple indices into ptrtoint+arithmetics+inttoptr form.
410 /// Function splitGEP already split the original GEP into a variadic part and
411 /// a constant offset (i.e., AccumulativeByteOffset). This function lowers the
412 /// variadic part into a set of arithmetic operations and applies
413 /// AccumulativeByteOffset to it.
414 /// \p Variadic The variadic part of the original GEP.
415 /// \p AccumulativeByteOffset The constant offset.
416 void lowerToArithmetics(GetElementPtrInst *Variadic,
417 int64_t AccumulativeByteOffset);
418
419 /// Finds the constant offset within each index and accumulates them. If
420 /// LowerGEP is true, it finds in indices of both sequential and structure
421 /// types, otherwise it only finds in sequential indices. The output
422 /// NeedsExtraction indicates whether we successfully find a non-zero constant
423 /// offset.
424 int64_t accumulateByteOffset(GetElementPtrInst *GEP, bool &NeedsExtraction);
425
426 /// Canonicalize array indices to pointer-size integers. This helps to
427 /// simplify the logic of splitting a GEP. For example, if a + b is a
428 /// pointer-size integer, we have
429 /// gep base, a + b = gep (gep base, a), b
430 /// However, this equality may not hold if the size of a + b is smaller than
431 /// the pointer size, because LLVM conceptually sign-extends GEP indices to
432 /// pointer size before computing the address
433 /// (http://llvm.org/docs/LangRef.html#id181).
434 ///
435 /// This canonicalization is very likely already done in clang and
436 /// instcombine. Therefore, the program will probably remain the same.
437 ///
438 /// Returns true if the module changes.
439 ///
440 /// Verified in @i32_add in split-gep.ll
441 bool canonicalizeArrayIndicesToIndexSize(GetElementPtrInst *GEP);
442
443 /// Optimize sext(a)+sext(b) to sext(a+b) when a+b can't sign overflow.
444 /// SeparateConstOffsetFromGEP distributes a sext to leaves before extracting
445 /// the constant offset. After extraction, it becomes desirable to reunion the
446 /// distributed sexts. For example,
447 ///
448 /// &a[sext(i +nsw (j +nsw 5)]
449 /// => distribute &a[sext(i) +nsw (sext(j) +nsw 5)]
450 /// => constant extraction &a[sext(i) + sext(j)] + 5
451 /// => reunion &a[sext(i +nsw j)] + 5
452 bool reuniteExts(Function &F);
453
454 /// A helper that reunites sexts in an instruction.
455 bool reuniteExts(Instruction *I);
456
457 /// Find the closest dominator of <Dominatee> that is equivalent to <Key>.
458 Instruction *findClosestMatchingDominator(
459 ExprKey Key, Instruction *Dominatee,
460 DenseMap<ExprKey, SmallVector<Instruction *, 2>> &DominatingExprs);
461
462 /// Verify F is free of dead code.
463 void verifyNoDeadCode(Function &F);
464
465 bool hasMoreThanOneUseInLoop(Value *v, Loop *L);
466
467 // Swap the index operand of two GEP.
468 void swapGEPOperand(GetElementPtrInst *First, GetElementPtrInst *Second);
469
470 // Check if it is safe to swap operand of two GEP.
471 bool isLegalToSwapOperand(GetElementPtrInst *First, GetElementPtrInst *Second,
472 Loop *CurLoop);
473
474 const DataLayout *DL = nullptr;
475 DominatorTree *DT = nullptr;
476 LoopInfo *LI;
478 // Retrieved lazily since not always used.
480
481 /// Whether to lower a GEP with multiple indices into arithmetic operations or
482 /// multiple GEPs with a single index.
483 bool LowerGEP;
484
487};
488
489} // end anonymous namespace
490
491char SeparateConstOffsetFromGEPLegacyPass::ID = 0;
492
494 SeparateConstOffsetFromGEPLegacyPass, "separate-const-offset-from-gep",
495 "Split GEPs to a variadic base and a constant offset for better CSE", false,
496 false)
503 SeparateConstOffsetFromGEPLegacyPass, "separate-const-offset-from-gep",
504 "Split GEPs to a variadic base and a constant offset for better CSE", false,
505 false)
506
508 return new SeparateConstOffsetFromGEPLegacyPass(LowerGEP);
509}
510
511bool ConstantOffsetExtractor::CanTraceInto(bool SignExtended,
512 bool ZeroExtended,
513 BinaryOperator *BO,
514 bool NonNegative) {
515 // We only consider ADD, SUB and OR, because a non-zero constant found in
516 // expressions composed of these operations can be easily hoisted as a
517 // constant offset by reassociation.
518 if (BO->getOpcode() != Instruction::Add &&
519 BO->getOpcode() != Instruction::Sub &&
520 BO->getOpcode() != Instruction::Or) {
521 return false;
522 }
523
524 Value *LHS = BO->getOperand(0), *RHS = BO->getOperand(1);
525 // Do not trace into "or" unless it is equivalent to "add".
526 // This is the case if the or's disjoint flag is set.
527 if (BO->getOpcode() == Instruction::Or &&
528 !cast<PossiblyDisjointInst>(BO)->isDisjoint())
529 return false;
530
531 // FIXME: We don't currently support constants from the RHS of subs,
532 // when we are zero-extended, because we need a way to zero-extended
533 // them before they are negated.
534 if (ZeroExtended && !SignExtended && BO->getOpcode() == Instruction::Sub)
535 return false;
536
537 // In addition, tracing into BO requires that its surrounding s/zext (if
538 // any) is distributable to both operands.
539 //
540 // Suppose BO = A op B.
541 // SignExtended | ZeroExtended | Distributable?
542 // --------------+--------------+----------------------------------
543 // 0 | 0 | true because no s/zext exists
544 // 0 | 1 | zext(BO) == zext(A) op zext(B)
545 // 1 | 0 | sext(BO) == sext(A) op sext(B)
546 // 1 | 1 | zext(sext(BO)) ==
547 // | | zext(sext(A)) op zext(sext(B))
548 if (BO->getOpcode() == Instruction::Add && !ZeroExtended && NonNegative) {
549 // If a + b >= 0 and (a >= 0 or b >= 0), then
550 // sext(a + b) = sext(a) + sext(b)
551 // even if the addition is not marked nsw.
552 //
553 // Leveraging this invariant, we can trace into an sext'ed inbound GEP
554 // index if the constant offset is non-negative.
555 //
556 // Verified in @sext_add in split-gep.ll.
557 if (ConstantInt *ConstLHS = dyn_cast<ConstantInt>(LHS)) {
558 if (!ConstLHS->isNegative())
559 return true;
560 }
561 if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(RHS)) {
562 if (!ConstRHS->isNegative())
563 return true;
564 }
565 }
566
567 // sext (add/sub nsw A, B) == add/sub nsw (sext A), (sext B)
568 // zext (add/sub nuw A, B) == add/sub nuw (zext A), (zext B)
569 if (BO->getOpcode() == Instruction::Add ||
570 BO->getOpcode() == Instruction::Sub) {
571 if (SignExtended && !BO->hasNoSignedWrap())
572 return false;
573 if (ZeroExtended && !BO->hasNoUnsignedWrap())
574 return false;
575 }
576
577 return true;
578}
579
580APInt ConstantOffsetExtractor::findInEitherOperand(BinaryOperator *BO,
581 bool SignExtended,
582 bool ZeroExtended) {
583 // Save off the current height of the chain, in case we need to restore it.
584 size_t ChainLength = UserChain.size();
585
586 // BO being non-negative does not shed light on whether its operands are
587 // non-negative. Clear the NonNegative flag here.
588 APInt ConstantOffset = find(BO->getOperand(0), SignExtended, ZeroExtended,
589 /* NonNegative */ false);
590 // If we found a constant offset in the left operand, stop and return that.
591 // This shortcut might cause us to miss opportunities of combining the
592 // constant offsets in both operands, e.g., (a + 4) + (b + 5) => (a + b) + 9.
593 // However, such cases are probably already handled by -instcombine,
594 // given this pass runs after the standard optimizations.
595 if (ConstantOffset != 0) return ConstantOffset;
596
597 // Reset the chain back to where it was when we started exploring this node,
598 // since visiting the LHS didn't pan out.
599 UserChain.resize(ChainLength);
600
601 ConstantOffset = find(BO->getOperand(1), SignExtended, ZeroExtended,
602 /* NonNegative */ false);
603 // If U is a sub operator, negate the constant offset found in the right
604 // operand.
605 if (BO->getOpcode() == Instruction::Sub)
606 ConstantOffset = -ConstantOffset;
607
608 // If RHS wasn't a suitable candidate either, reset the chain again.
609 if (ConstantOffset == 0)
610 UserChain.resize(ChainLength);
611
612 return ConstantOffset;
613}
614
615APInt ConstantOffsetExtractor::find(Value *V, bool SignExtended,
616 bool ZeroExtended, bool NonNegative) {
617 // TODO(jingyue): We could trace into integer/pointer casts, such as
618 // inttoptr, ptrtoint, bitcast, and addrspacecast. We choose to handle only
619 // integers because it gives good enough results for our benchmarks.
620 unsigned BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
621
622 // We cannot do much with Values that are not a User, such as an Argument.
623 User *U = dyn_cast<User>(V);
624 if (U == nullptr) return APInt(BitWidth, 0);
625
626 APInt ConstantOffset(BitWidth, 0);
627 if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
628 // Hooray, we found it!
629 ConstantOffset = CI->getValue();
630 } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V)) {
631 // Trace into subexpressions for more hoisting opportunities.
632 if (CanTraceInto(SignExtended, ZeroExtended, BO, NonNegative))
633 ConstantOffset = findInEitherOperand(BO, SignExtended, ZeroExtended);
634 } else if (isa<TruncInst>(V)) {
635 ConstantOffset =
636 find(U->getOperand(0), SignExtended, ZeroExtended, NonNegative)
637 .trunc(BitWidth);
638 } else if (isa<SExtInst>(V)) {
639 ConstantOffset = find(U->getOperand(0), /* SignExtended */ true,
640 ZeroExtended, NonNegative).sext(BitWidth);
641 } else if (isa<ZExtInst>(V)) {
642 // As an optimization, we can clear the SignExtended flag because
643 // sext(zext(a)) = zext(a). Verified in @sext_zext in split-gep.ll.
644 //
645 // Clear the NonNegative flag, because zext(a) >= 0 does not imply a >= 0.
646 ConstantOffset =
647 find(U->getOperand(0), /* SignExtended */ false,
648 /* ZeroExtended */ true, /* NonNegative */ false).zext(BitWidth);
649 }
650
651 // If we found a non-zero constant offset, add it to the path for
652 // rebuildWithoutConstOffset. Zero is a valid constant offset, but doesn't
653 // help this optimization.
654 if (ConstantOffset != 0)
655 UserChain.push_back(U);
656 return ConstantOffset;
657}
658
659Value *ConstantOffsetExtractor::applyExts(Value *V) {
660 Value *Current = V;
661 // ExtInsts is built in the use-def order. Therefore, we apply them to V
662 // in the reversed order.
663 for (CastInst *I : llvm::reverse(ExtInsts)) {
664 if (Constant *C = dyn_cast<Constant>(Current)) {
665 // Try to constant fold the cast.
666 Current = ConstantFoldCastOperand(I->getOpcode(), C, I->getType(), DL);
667 if (Current)
668 continue;
669 }
670
671 Instruction *Ext = I->clone();
672 Ext->setOperand(0, Current);
673 Ext->insertBefore(*IP->getParent(), IP);
674 Current = Ext;
675 }
676 return Current;
677}
678
679Value *ConstantOffsetExtractor::rebuildWithoutConstOffset() {
680 distributeExtsAndCloneChain(UserChain.size() - 1);
681 // Remove all nullptrs (used to be s/zext) from UserChain.
682 unsigned NewSize = 0;
683 for (User *I : UserChain) {
684 if (I != nullptr) {
685 UserChain[NewSize] = I;
686 NewSize++;
687 }
688 }
689 UserChain.resize(NewSize);
690 return removeConstOffset(UserChain.size() - 1);
691}
692
693Value *
694ConstantOffsetExtractor::distributeExtsAndCloneChain(unsigned ChainIndex) {
695 User *U = UserChain[ChainIndex];
696 if (ChainIndex == 0) {
697 assert(isa<ConstantInt>(U));
698 // If U is a ConstantInt, applyExts will return a ConstantInt as well.
699 return UserChain[ChainIndex] = cast<ConstantInt>(applyExts(U));
700 }
701
702 if (CastInst *Cast = dyn_cast<CastInst>(U)) {
703 assert(
704 (isa<SExtInst>(Cast) || isa<ZExtInst>(Cast) || isa<TruncInst>(Cast)) &&
705 "Only following instructions can be traced: sext, zext & trunc");
706 ExtInsts.push_back(Cast);
707 UserChain[ChainIndex] = nullptr;
708 return distributeExtsAndCloneChain(ChainIndex - 1);
709 }
710
711 // Function find only trace into BinaryOperator and CastInst.
712 BinaryOperator *BO = cast<BinaryOperator>(U);
713 // OpNo = which operand of BO is UserChain[ChainIndex - 1]
714 unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1);
715 Value *TheOther = applyExts(BO->getOperand(1 - OpNo));
716 Value *NextInChain = distributeExtsAndCloneChain(ChainIndex - 1);
717
718 BinaryOperator *NewBO = nullptr;
719 if (OpNo == 0) {
720 NewBO = BinaryOperator::Create(BO->getOpcode(), NextInChain, TheOther,
721 BO->getName(), IP);
722 } else {
723 NewBO = BinaryOperator::Create(BO->getOpcode(), TheOther, NextInChain,
724 BO->getName(), IP);
725 }
726 return UserChain[ChainIndex] = NewBO;
727}
728
729Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) {
730 if (ChainIndex == 0) {
731 assert(isa<ConstantInt>(UserChain[ChainIndex]));
732 return ConstantInt::getNullValue(UserChain[ChainIndex]->getType());
733 }
734
735 BinaryOperator *BO = cast<BinaryOperator>(UserChain[ChainIndex]);
736 assert((BO->use_empty() || BO->hasOneUse()) &&
737 "distributeExtsAndCloneChain clones each BinaryOperator in "
738 "UserChain, so no one should be used more than "
739 "once");
740
741 unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1);
742 assert(BO->getOperand(OpNo) == UserChain[ChainIndex - 1]);
743 Value *NextInChain = removeConstOffset(ChainIndex - 1);
744 Value *TheOther = BO->getOperand(1 - OpNo);
745
746 // If NextInChain is 0 and not the LHS of a sub, we can simplify the
747 // sub-expression to be just TheOther.
748 if (ConstantInt *CI = dyn_cast<ConstantInt>(NextInChain)) {
749 if (CI->isZero() && !(BO->getOpcode() == Instruction::Sub && OpNo == 0))
750 return TheOther;
751 }
752
754 if (BO->getOpcode() == Instruction::Or) {
755 // Rebuild "or" as "add", because "or" may be invalid for the new
756 // expression.
757 //
758 // For instance, given
759 // a | (b + 5) where a and b + 5 have no common bits,
760 // we can extract 5 as the constant offset.
761 //
762 // However, reusing the "or" in the new index would give us
763 // (a | b) + 5
764 // which does not equal a | (b + 5).
765 //
766 // Replacing the "or" with "add" is fine, because
767 // a | (b + 5) = a + (b + 5) = (a + b) + 5
768 NewOp = Instruction::Add;
769 }
770
771 BinaryOperator *NewBO;
772 if (OpNo == 0) {
773 NewBO = BinaryOperator::Create(NewOp, NextInChain, TheOther, "", IP);
774 } else {
775 NewBO = BinaryOperator::Create(NewOp, TheOther, NextInChain, "", IP);
776 }
777 NewBO->takeName(BO);
778 return NewBO;
779}
780
781Value *ConstantOffsetExtractor::Extract(Value *Idx, GetElementPtrInst *GEP,
782 User *&UserChainTail) {
783 ConstantOffsetExtractor Extractor(GEP->getIterator());
784 // Find a non-zero constant offset first.
785 APInt ConstantOffset =
786 Extractor.find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
787 GEP->isInBounds());
788 if (ConstantOffset == 0) {
789 UserChainTail = nullptr;
790 return nullptr;
791 }
792 // Separates the constant offset from the GEP index.
793 Value *IdxWithoutConstOffset = Extractor.rebuildWithoutConstOffset();
794 UserChainTail = Extractor.UserChain.back();
795 return IdxWithoutConstOffset;
796}
797
798int64_t ConstantOffsetExtractor::Find(Value *Idx, GetElementPtrInst *GEP) {
799 // If Idx is an index of an inbound GEP, Idx is guaranteed to be non-negative.
800 return ConstantOffsetExtractor(GEP->getIterator())
801 .find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
802 GEP->isInBounds())
803 .getSExtValue();
804}
805
806bool SeparateConstOffsetFromGEP::canonicalizeArrayIndicesToIndexSize(
808 bool Changed = false;
809 Type *PtrIdxTy = DL->getIndexType(GEP->getType());
811 for (User::op_iterator I = GEP->op_begin() + 1, E = GEP->op_end();
812 I != E; ++I, ++GTI) {
813 // Skip struct member indices which must be i32.
814 if (GTI.isSequential()) {
815 if ((*I)->getType() != PtrIdxTy) {
816 *I = CastInst::CreateIntegerCast(*I, PtrIdxTy, true, "idxprom",
817 GEP->getIterator());
818 Changed = true;
819 }
820 }
821 }
822 return Changed;
823}
824
825int64_t
826SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP,
827 bool &NeedsExtraction) {
828 NeedsExtraction = false;
829 int64_t AccumulativeByteOffset = 0;
831 for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
832 if (GTI.isSequential()) {
833 // Constant offsets of scalable types are not really constant.
834 if (GTI.getIndexedType()->isScalableTy())
835 continue;
836
837 // Tries to extract a constant offset from this GEP index.
838 int64_t ConstantOffset =
839 ConstantOffsetExtractor::Find(GEP->getOperand(I), GEP);
840 if (ConstantOffset != 0) {
841 NeedsExtraction = true;
842 // A GEP may have multiple indices. We accumulate the extracted
843 // constant offset to a byte offset, and later offset the remainder of
844 // the original GEP with this byte offset.
845 AccumulativeByteOffset +=
846 ConstantOffset * GTI.getSequentialElementStride(*DL);
847 }
848 } else if (LowerGEP) {
849 StructType *StTy = GTI.getStructType();
850 uint64_t Field = cast<ConstantInt>(GEP->getOperand(I))->getZExtValue();
851 // Skip field 0 as the offset is always 0.
852 if (Field != 0) {
853 NeedsExtraction = true;
854 AccumulativeByteOffset +=
855 DL->getStructLayout(StTy)->getElementOffset(Field);
856 }
857 }
858 }
859 return AccumulativeByteOffset;
860}
861
862void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
863 GetElementPtrInst *Variadic, int64_t AccumulativeByteOffset) {
864 IRBuilder<> Builder(Variadic);
865 Type *PtrIndexTy = DL->getIndexType(Variadic->getType());
866
867 Value *ResultPtr = Variadic->getOperand(0);
868 Loop *L = LI->getLoopFor(Variadic->getParent());
869 // Check if the base is not loop invariant or used more than once.
870 bool isSwapCandidate =
871 L && L->isLoopInvariant(ResultPtr) &&
872 !hasMoreThanOneUseInLoop(ResultPtr, L);
873 Value *FirstResult = nullptr;
874
875 gep_type_iterator GTI = gep_type_begin(*Variadic);
876 // Create an ugly GEP for each sequential index. We don't create GEPs for
877 // structure indices, as they are accumulated in the constant offset index.
878 for (unsigned I = 1, E = Variadic->getNumOperands(); I != E; ++I, ++GTI) {
879 if (GTI.isSequential()) {
880 Value *Idx = Variadic->getOperand(I);
881 // Skip zero indices.
882 if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx))
883 if (CI->isZero())
884 continue;
885
886 APInt ElementSize = APInt(PtrIndexTy->getIntegerBitWidth(),
888 // Scale the index by element size.
889 if (ElementSize != 1) {
890 if (ElementSize.isPowerOf2()) {
891 Idx = Builder.CreateShl(
892 Idx, ConstantInt::get(PtrIndexTy, ElementSize.logBase2()));
893 } else {
894 Idx =
895 Builder.CreateMul(Idx, ConstantInt::get(PtrIndexTy, ElementSize));
896 }
897 }
898 // Create an ugly GEP with a single index for each index.
899 ResultPtr = Builder.CreatePtrAdd(ResultPtr, Idx, "uglygep");
900 if (FirstResult == nullptr)
901 FirstResult = ResultPtr;
902 }
903 }
904
905 // Create a GEP with the constant offset index.
906 if (AccumulativeByteOffset != 0) {
907 Value *Offset = ConstantInt::get(PtrIndexTy, AccumulativeByteOffset);
908 ResultPtr = Builder.CreatePtrAdd(ResultPtr, Offset, "uglygep");
909 } else
910 isSwapCandidate = false;
911
912 // If we created a GEP with constant index, and the base is loop invariant,
913 // then we swap the first one with it, so LICM can move constant GEP out
914 // later.
915 auto *FirstGEP = dyn_cast_or_null<GetElementPtrInst>(FirstResult);
916 auto *SecondGEP = dyn_cast<GetElementPtrInst>(ResultPtr);
917 if (isSwapCandidate && isLegalToSwapOperand(FirstGEP, SecondGEP, L))
918 swapGEPOperand(FirstGEP, SecondGEP);
919
920 Variadic->replaceAllUsesWith(ResultPtr);
921 Variadic->eraseFromParent();
922}
923
924void
925SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic,
926 int64_t AccumulativeByteOffset) {
927 IRBuilder<> Builder(Variadic);
928 Type *IntPtrTy = DL->getIntPtrType(Variadic->getType());
929 assert(IntPtrTy == DL->getIndexType(Variadic->getType()) &&
930 "Pointer type must match index type for arithmetic-based lowering of "
931 "split GEPs");
932
933 Value *ResultPtr = Builder.CreatePtrToInt(Variadic->getOperand(0), IntPtrTy);
934 gep_type_iterator GTI = gep_type_begin(*Variadic);
935 // Create ADD/SHL/MUL arithmetic operations for each sequential indices. We
936 // don't create arithmetics for structure indices, as they are accumulated
937 // in the constant offset index.
938 for (unsigned I = 1, E = Variadic->getNumOperands(); I != E; ++I, ++GTI) {
939 if (GTI.isSequential()) {
940 Value *Idx = Variadic->getOperand(I);
941 // Skip zero indices.
942 if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx))
943 if (CI->isZero())
944 continue;
945
946 APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(),
948 // Scale the index by element size.
949 if (ElementSize != 1) {
950 if (ElementSize.isPowerOf2()) {
951 Idx = Builder.CreateShl(
952 Idx, ConstantInt::get(IntPtrTy, ElementSize.logBase2()));
953 } else {
954 Idx = Builder.CreateMul(Idx, ConstantInt::get(IntPtrTy, ElementSize));
955 }
956 }
957 // Create an ADD for each index.
958 ResultPtr = Builder.CreateAdd(ResultPtr, Idx);
959 }
960 }
961
962 // Create an ADD for the constant offset index.
963 if (AccumulativeByteOffset != 0) {
964 ResultPtr = Builder.CreateAdd(
965 ResultPtr, ConstantInt::get(IntPtrTy, AccumulativeByteOffset));
966 }
967
968 ResultPtr = Builder.CreateIntToPtr(ResultPtr, Variadic->getType());
969 Variadic->replaceAllUsesWith(ResultPtr);
970 Variadic->eraseFromParent();
971}
972
973bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP,
975 auto PtrGEP = dyn_cast<GetElementPtrInst>(GEP->getPointerOperand());
976 if (!PtrGEP)
977 return false;
978
979 bool NestedNeedsExtraction;
980 int64_t NestedByteOffset =
981 accumulateByteOffset(PtrGEP, NestedNeedsExtraction);
982 if (!NestedNeedsExtraction)
983 return false;
984
985 unsigned AddrSpace = PtrGEP->getPointerAddressSpace();
986 if (!TTI.isLegalAddressingMode(GEP->getResultElementType(),
987 /*BaseGV=*/nullptr, NestedByteOffset,
988 /*HasBaseReg=*/true, /*Scale=*/0, AddrSpace))
989 return false;
990
991 bool GEPInBounds = GEP->isInBounds();
992 bool PtrGEPInBounds = PtrGEP->isInBounds();
993 bool IsChainInBounds = GEPInBounds && PtrGEPInBounds;
994 if (IsChainInBounds) {
995 auto IsKnownNonNegative = [this](Value *V) {
996 return isKnownNonNegative(V, *DL);
997 };
998 IsChainInBounds &= all_of(GEP->indices(), IsKnownNonNegative);
999 if (IsChainInBounds)
1000 IsChainInBounds &= all_of(PtrGEP->indices(), IsKnownNonNegative);
1001 }
1002
1003 IRBuilder<> Builder(GEP);
1004 // For trivial GEP chains, we can swap the indices.
1005 Value *NewSrc = Builder.CreateGEP(
1006 GEP->getSourceElementType(), PtrGEP->getPointerOperand(),
1007 SmallVector<Value *, 4>(GEP->indices()), "", IsChainInBounds);
1008 Value *NewGEP = Builder.CreateGEP(PtrGEP->getSourceElementType(), NewSrc,
1009 SmallVector<Value *, 4>(PtrGEP->indices()),
1010 "", IsChainInBounds);
1011 GEP->replaceAllUsesWith(NewGEP);
1013 return true;
1014}
1015
1016bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
1017 // Skip vector GEPs.
1018 if (GEP->getType()->isVectorTy())
1019 return false;
1020
1021 // The backend can already nicely handle the case where all indices are
1022 // constant.
1023 if (GEP->hasAllConstantIndices())
1024 return false;
1025
1026 bool Changed = canonicalizeArrayIndicesToIndexSize(GEP);
1027
1028 bool NeedsExtraction;
1029 int64_t AccumulativeByteOffset = accumulateByteOffset(GEP, NeedsExtraction);
1030
1031 TargetTransformInfo &TTI = GetTTI(*GEP->getFunction());
1032
1033 if (!NeedsExtraction) {
1034 Changed |= reorderGEP(GEP, TTI);
1035 return Changed;
1036 }
1037
1038 // If LowerGEP is disabled, before really splitting the GEP, check whether the
1039 // backend supports the addressing mode we are about to produce. If no, this
1040 // splitting probably won't be beneficial.
1041 // If LowerGEP is enabled, even the extracted constant offset can not match
1042 // the addressing mode, we can still do optimizations to other lowered parts
1043 // of variable indices. Therefore, we don't check for addressing modes in that
1044 // case.
1045 if (!LowerGEP) {
1046 unsigned AddrSpace = GEP->getPointerAddressSpace();
1047 if (!TTI.isLegalAddressingMode(GEP->getResultElementType(),
1048 /*BaseGV=*/nullptr, AccumulativeByteOffset,
1049 /*HasBaseReg=*/true, /*Scale=*/0,
1050 AddrSpace)) {
1051 return Changed;
1052 }
1053 }
1054
1055 // Remove the constant offset in each sequential index. The resultant GEP
1056 // computes the variadic base.
1057 // Notice that we don't remove struct field indices here. If LowerGEP is
1058 // disabled, a structure index is not accumulated and we still use the old
1059 // one. If LowerGEP is enabled, a structure index is accumulated in the
1060 // constant offset. LowerToSingleIndexGEPs or lowerToArithmetics will later
1061 // handle the constant offset and won't need a new structure index.
1063 for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
1064 if (GTI.isSequential()) {
1065 // Constant offsets of scalable types are not really constant.
1066 if (GTI.getIndexedType()->isScalableTy())
1067 continue;
1068
1069 // Splits this GEP index into a variadic part and a constant offset, and
1070 // uses the variadic part as the new index.
1071 Value *OldIdx = GEP->getOperand(I);
1072 User *UserChainTail;
1073 Value *NewIdx =
1074 ConstantOffsetExtractor::Extract(OldIdx, GEP, UserChainTail);
1075 if (NewIdx != nullptr) {
1076 // Switches to the index with the constant offset removed.
1077 GEP->setOperand(I, NewIdx);
1078 // After switching to the new index, we can garbage-collect UserChain
1079 // and the old index if they are not used.
1082 }
1083 }
1084 }
1085
1086 // Clear the inbounds attribute because the new index may be off-bound.
1087 // e.g.,
1088 //
1089 // b = add i64 a, 5
1090 // addr = gep inbounds float, float* p, i64 b
1091 //
1092 // is transformed to:
1093 //
1094 // addr2 = gep float, float* p, i64 a ; inbounds removed
1095 // addr = gep inbounds float, float* addr2, i64 5
1096 //
1097 // If a is -4, although the old index b is in bounds, the new index a is
1098 // off-bound. http://llvm.org/docs/LangRef.html#id181 says "if the
1099 // inbounds keyword is not present, the offsets are added to the base
1100 // address with silently-wrapping two's complement arithmetic".
1101 // Therefore, the final code will be a semantically equivalent.
1102 //
1103 // TODO(jingyue): do some range analysis to keep as many inbounds as
1104 // possible. GEPs with inbounds are more friendly to alias analysis.
1105 // TODO(gep_nowrap): Preserve nuw at least.
1106 bool GEPWasInBounds = GEP->isInBounds();
1107 GEP->setNoWrapFlags(GEPNoWrapFlags::none());
1108
1109 // Lowers a GEP to either GEPs with a single index or arithmetic operations.
1110 if (LowerGEP) {
1111 // As currently BasicAA does not analyze ptrtoint/inttoptr, do not lower to
1112 // arithmetic operations if the target uses alias analysis in codegen.
1113 // Additionally, pointers that aren't integral (and so can't be safely
1114 // converted to integers) or those whose offset size is different from their
1115 // pointer size (which means that doing integer arithmetic on them could
1116 // affect that data) can't be lowered in this way.
1117 unsigned AddrSpace = GEP->getPointerAddressSpace();
1118 bool PointerHasExtraData = DL->getPointerSizeInBits(AddrSpace) !=
1119 DL->getIndexSizeInBits(AddrSpace);
1120 if (TTI.useAA() || DL->isNonIntegralAddressSpace(AddrSpace) ||
1121 PointerHasExtraData)
1122 lowerToSingleIndexGEPs(GEP, AccumulativeByteOffset);
1123 else
1124 lowerToArithmetics(GEP, AccumulativeByteOffset);
1125 return true;
1126 }
1127
1128 // No need to create another GEP if the accumulative byte offset is 0.
1129 if (AccumulativeByteOffset == 0)
1130 return true;
1131
1132 // Offsets the base with the accumulative byte offset.
1133 //
1134 // %gep ; the base
1135 // ... %gep ...
1136 //
1137 // => add the offset
1138 //
1139 // %gep2 ; clone of %gep
1140 // %new.gep = gep i8, %gep2, %offset
1141 // %gep ; will be removed
1142 // ... %gep ...
1143 //
1144 // => replace all uses of %gep with %new.gep and remove %gep
1145 //
1146 // %gep2 ; clone of %gep
1147 // %new.gep = gep i8, %gep2, %offset
1148 // ... %new.gep ...
1149 Instruction *NewGEP = GEP->clone();
1150 NewGEP->insertBefore(GEP);
1151
1152 Type *PtrIdxTy = DL->getIndexType(GEP->getType());
1153 IRBuilder<> Builder(GEP);
1154 NewGEP = cast<Instruction>(Builder.CreatePtrAdd(
1155 NewGEP, ConstantInt::get(PtrIdxTy, AccumulativeByteOffset, true),
1156 GEP->getName(), GEPWasInBounds));
1157 NewGEP->copyMetadata(*GEP);
1158
1159 GEP->replaceAllUsesWith(NewGEP);
1160 GEP->eraseFromParent();
1161
1162 return true;
1163}
1164
1165bool SeparateConstOffsetFromGEPLegacyPass::runOnFunction(Function &F) {
1166 if (skipFunction(F))
1167 return false;
1168 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1169 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1170 auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1171 auto GetTTI = [this](Function &F) -> TargetTransformInfo & {
1172 return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1173 };
1174 SeparateConstOffsetFromGEP Impl(DT, LI, TLI, GetTTI, LowerGEP);
1175 return Impl.run(F);
1176}
1177
1178bool SeparateConstOffsetFromGEP::run(Function &F) {
1180 return false;
1181
1182 DL = &F.getDataLayout();
1183 bool Changed = false;
1184 for (BasicBlock &B : F) {
1185 if (!DT->isReachableFromEntry(&B))
1186 continue;
1187
1189 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I))
1190 Changed |= splitGEP(GEP);
1191 // No need to split GEP ConstantExprs because all its indices are constant
1192 // already.
1193 }
1194
1195 Changed |= reuniteExts(F);
1196
1197 if (VerifyNoDeadCode)
1198 verifyNoDeadCode(F);
1199
1200 return Changed;
1201}
1202
1203Instruction *SeparateConstOffsetFromGEP::findClosestMatchingDominator(
1204 ExprKey Key, Instruction *Dominatee,
1205 DenseMap<ExprKey, SmallVector<Instruction *, 2>> &DominatingExprs) {
1206 auto Pos = DominatingExprs.find(Key);
1207 if (Pos == DominatingExprs.end())
1208 return nullptr;
1209
1210 auto &Candidates = Pos->second;
1211 // Because we process the basic blocks in pre-order of the dominator tree, a
1212 // candidate that doesn't dominate the current instruction won't dominate any
1213 // future instruction either. Therefore, we pop it out of the stack. This
1214 // optimization makes the algorithm O(n).
1215 while (!Candidates.empty()) {
1216 Instruction *Candidate = Candidates.back();
1217 if (DT->dominates(Candidate, Dominatee))
1218 return Candidate;
1219 Candidates.pop_back();
1220 }
1221 return nullptr;
1222}
1223
1224bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) {
1225 if (!I->getType()->isIntOrIntVectorTy())
1226 return false;
1227
1228 // Dom: LHS+RHS
1229 // I: sext(LHS)+sext(RHS)
1230 // If Dom can't sign overflow and Dom dominates I, optimize I to sext(Dom).
1231 // TODO: handle zext
1232 Value *LHS = nullptr, *RHS = nullptr;
1233 if (match(I, m_Add(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) {
1234 if (LHS->getType() == RHS->getType()) {
1235 ExprKey Key = createNormalizedCommutablePair(LHS, RHS);
1236 if (auto *Dom = findClosestMatchingDominator(Key, I, DominatingAdds)) {
1237 Instruction *NewSExt =
1238 new SExtInst(Dom, I->getType(), "", I->getIterator());
1239 NewSExt->takeName(I);
1240 I->replaceAllUsesWith(NewSExt);
1241 NewSExt->setDebugLoc(I->getDebugLoc());
1243 return true;
1244 }
1245 }
1246 } else if (match(I, m_Sub(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) {
1247 if (LHS->getType() == RHS->getType()) {
1248 if (auto *Dom =
1249 findClosestMatchingDominator({LHS, RHS}, I, DominatingSubs)) {
1250 Instruction *NewSExt =
1251 new SExtInst(Dom, I->getType(), "", I->getIterator());
1252 NewSExt->takeName(I);
1253 I->replaceAllUsesWith(NewSExt);
1254 NewSExt->setDebugLoc(I->getDebugLoc());
1256 return true;
1257 }
1258 }
1259 }
1260
1261 // Add I to DominatingExprs if it's an add/sub that can't sign overflow.
1262 if (match(I, m_NSWAdd(m_Value(LHS), m_Value(RHS)))) {
1264 ExprKey Key = createNormalizedCommutablePair(LHS, RHS);
1265 DominatingAdds[Key].push_back(I);
1266 }
1267 } else if (match(I, m_NSWSub(m_Value(LHS), m_Value(RHS)))) {
1269 DominatingSubs[{LHS, RHS}].push_back(I);
1270 }
1271 return false;
1272}
1273
1274bool SeparateConstOffsetFromGEP::reuniteExts(Function &F) {
1275 bool Changed = false;
1276 DominatingAdds.clear();
1277 DominatingSubs.clear();
1278 for (const auto Node : depth_first(DT)) {
1279 BasicBlock *BB = Node->getBlock();
1281 Changed |= reuniteExts(&I);
1282 }
1283 return Changed;
1284}
1285
1286void SeparateConstOffsetFromGEP::verifyNoDeadCode(Function &F) {
1287 for (BasicBlock &B : F) {
1288 for (Instruction &I : B) {
1290 std::string ErrMessage;
1291 raw_string_ostream RSO(ErrMessage);
1292 RSO << "Dead instruction detected!\n" << I << "\n";
1293 llvm_unreachable(RSO.str().c_str());
1294 }
1295 }
1296 }
1297}
1298
1299bool SeparateConstOffsetFromGEP::isLegalToSwapOperand(
1300 GetElementPtrInst *FirstGEP, GetElementPtrInst *SecondGEP, Loop *CurLoop) {
1301 if (!FirstGEP || !FirstGEP->hasOneUse())
1302 return false;
1303
1304 if (!SecondGEP || FirstGEP->getParent() != SecondGEP->getParent())
1305 return false;
1306
1307 if (FirstGEP == SecondGEP)
1308 return false;
1309
1310 unsigned FirstNum = FirstGEP->getNumOperands();
1311 unsigned SecondNum = SecondGEP->getNumOperands();
1312 // Give up if the number of operands are not 2.
1313 if (FirstNum != SecondNum || FirstNum != 2)
1314 return false;
1315
1316 Value *FirstBase = FirstGEP->getOperand(0);
1317 Value *SecondBase = SecondGEP->getOperand(0);
1318 Value *FirstOffset = FirstGEP->getOperand(1);
1319 // Give up if the index of the first GEP is loop invariant.
1320 if (CurLoop->isLoopInvariant(FirstOffset))
1321 return false;
1322
1323 // Give up if base doesn't have same type.
1324 if (FirstBase->getType() != SecondBase->getType())
1325 return false;
1326
1327 Instruction *FirstOffsetDef = dyn_cast<Instruction>(FirstOffset);
1328
1329 // Check if the second operand of first GEP has constant coefficient.
1330 // For an example, for the following code, we won't gain anything by
1331 // hoisting the second GEP out because the second GEP can be folded away.
1332 // %scevgep.sum.ur159 = add i64 %idxprom48.ur, 256
1333 // %67 = shl i64 %scevgep.sum.ur159, 2
1334 // %uglygep160 = getelementptr i8* %65, i64 %67
1335 // %uglygep161 = getelementptr i8* %uglygep160, i64 -1024
1336
1337 // Skip constant shift instruction which may be generated by Splitting GEPs.
1338 if (FirstOffsetDef && FirstOffsetDef->isShift() &&
1339 isa<ConstantInt>(FirstOffsetDef->getOperand(1)))
1340 FirstOffsetDef = dyn_cast<Instruction>(FirstOffsetDef->getOperand(0));
1341
1342 // Give up if FirstOffsetDef is an Add or Sub with constant.
1343 // Because it may not profitable at all due to constant folding.
1344 if (FirstOffsetDef)
1345 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FirstOffsetDef)) {
1346 unsigned opc = BO->getOpcode();
1347 if ((opc == Instruction::Add || opc == Instruction::Sub) &&
1348 (isa<ConstantInt>(BO->getOperand(0)) ||
1349 isa<ConstantInt>(BO->getOperand(1))))
1350 return false;
1351 }
1352 return true;
1353}
1354
1355bool SeparateConstOffsetFromGEP::hasMoreThanOneUseInLoop(Value *V, Loop *L) {
1356 int UsesInLoop = 0;
1357 for (User *U : V->users()) {
1358 if (Instruction *User = dyn_cast<Instruction>(U))
1359 if (L->contains(User))
1360 if (++UsesInLoop > 1)
1361 return true;
1362 }
1363 return false;
1364}
1365
1366void SeparateConstOffsetFromGEP::swapGEPOperand(GetElementPtrInst *First,
1367 GetElementPtrInst *Second) {
1368 Value *Offset1 = First->getOperand(1);
1369 Value *Offset2 = Second->getOperand(1);
1370 First->setOperand(1, Offset2);
1371 Second->setOperand(1, Offset1);
1372
1373 // We changed p+o+c to p+c+o, p+c may not be inbound anymore.
1374 const DataLayout &DAL = First->getDataLayout();
1376 cast<PointerType>(First->getType())->getAddressSpace()),
1377 0);
1378 Value *NewBase =
1379 First->stripAndAccumulateInBoundsConstantOffsets(DAL, Offset);
1380 uint64_t ObjectSize;
1381 if (!getObjectSize(NewBase, ObjectSize, DAL, TLI) ||
1382 Offset.ugt(ObjectSize)) {
1383 // TODO(gep_nowrap): Make flag preservation more precise.
1384 First->setNoWrapFlags(GEPNoWrapFlags::none());
1386 } else
1387 First->setIsInBounds(true);
1388}
1389
1391 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1393 ->printPipeline(OS, MapClassName2PassName);
1394 OS << '<';
1395 if (LowerGEP)
1396 OS << "lower-gep";
1397 OS << '>';
1398}
1399
1402 auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
1403 auto *LI = &AM.getResult<LoopAnalysis>(F);
1404 auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
1405 auto GetTTI = [&AM](Function &F) -> TargetTransformInfo & {
1406 return AM.getResult<TargetIRAnalysis>(F);
1407 };
1408 SeparateConstOffsetFromGEP Impl(DT, LI, TLI, GetTTI, LowerGEP);
1409 if (!Impl.run(F))
1410 return PreservedAnalyses::all();
1413 return PA;
1414}
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
aarch64 promote const
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
@ NonNegative
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
Hexagon Common GEP
static const T * Find(StringRef S, ArrayRef< T > A)
Find KV in array using binary search.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
static cl::opt< bool > DisableSeparateConstOffsetFromGEP("disable-separate-const-offset-from-gep", cl::init(false), cl::desc("Do not separate the constant offset from a GEP instruction"), cl::Hidden)
static cl::opt< bool > VerifyNoDeadCode("reassociate-geps-verify-no-dead-code", cl::init(false), cl::desc("Verify this pass produces no dead code"), cl::Hidden)
separate const offset from gep
separate const offset from Split GEPs to a variadic base and a constant offset for better CSE
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:78
unsigned logBase2() const
Definition: APInt.h:1719
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:420
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:405
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
BinaryOps getOpcode() const
Definition: InstrTypes.h:442
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:72
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:530
static CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
This is an important base class in LLVM.
Definition: Constant.h:42
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:103
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:387
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:317
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:915
void setNoWrapFlags(GEPNoWrapFlags NW)
Set nowrap flags for GEP instruction.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2674
bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:97
bool isShift() const
Definition: Instruction.h:281
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:463
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:571
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:598
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:61
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:146
This class represents a sign extension of integer types.
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1210
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Class to represent struct types.
Definition: DerivedTypes.h:216
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr, int64_t ScalableOffset=0) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
bool isScalableTy() const
Return true if this is a type whose size is a known multiple of vscale.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
bool use_empty() const
Definition: Value.h:344
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
An efficient, type-erasing, non-owning reference to a callable.
TypeSize getSequentialElementStride(const DataLayout &DL) const
const ParentTy * getParent() const
Definition: ilist_node.h:32
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Key
PAL metadata keys.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
OverflowingBinaryOp_match< LHS, RHS, Instruction::Sub, OverflowingBinaryOperator::NoSignedWrap > m_NSWSub(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:540
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:400
bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL, const TargetLibraryInfo *TLI, ObjectSizeOpts Opts={})
Compute the size of the object pointed by Ptr.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
bool programUndefinedIfPoison(const Instruction *Inst)
void initializeSeparateConstOffsetFromGEPLegacyPassPass(PassRegistry &)
Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
FunctionPass * createSeparateConstOffsetFromGEPPass(bool LowerGEP=false)
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
gep_type_iterator gep_type_begin(const User *GEP)
iterator_range< df_iterator< T > > depth_first(const T &G)
bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition: PassManager.h:69