LLVM 17.0.0git
EarlyCSE.cpp
Go to the documentation of this file.
1//===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs a simple dominator tree walk that eliminates trivially
10// redundant instructions.
11//
12//===----------------------------------------------------------------------===//
13
16#include "llvm/ADT/Hashing.h"
17#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/Statistic.h"
30#include "llvm/IR/BasicBlock.h"
31#include "llvm/IR/Constants.h"
32#include "llvm/IR/Dominators.h"
33#include "llvm/IR/Function.h"
34#include "llvm/IR/InstrTypes.h"
35#include "llvm/IR/Instruction.h"
38#include "llvm/IR/LLVMContext.h"
39#include "llvm/IR/PassManager.h"
41#include "llvm/IR/Type.h"
42#include "llvm/IR/Value.h"
44#include "llvm/Pass.h"
48#include "llvm/Support/Debug.h"
55#include <cassert>
56#include <deque>
57#include <memory>
58#include <utility>
59
60using namespace llvm;
61using namespace llvm::PatternMatch;
62
63#define DEBUG_TYPE "early-cse"
64
65STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd");
66STATISTIC(NumCSE, "Number of instructions CSE'd");
67STATISTIC(NumCSECVP, "Number of compare instructions CVP'd");
68STATISTIC(NumCSELoad, "Number of load instructions CSE'd");
69STATISTIC(NumCSECall, "Number of call instructions CSE'd");
70STATISTIC(NumDSE, "Number of trivial dead stores removed");
71
72DEBUG_COUNTER(CSECounter, "early-cse",
73 "Controls which instructions are removed");
74
76 "earlycse-mssa-optimization-cap", cl::init(500), cl::Hidden,
77 cl::desc("Enable imprecision in EarlyCSE in pathological cases, in exchange "
78 "for faster compile. Caps the MemorySSA clobbering calls."));
79
81 "earlycse-debug-hash", cl::init(false), cl::Hidden,
82 cl::desc("Perform extra assertion checking to verify that SimpleValue's hash "
83 "function is well-behaved w.r.t. its isEqual predicate"));
84
85//===----------------------------------------------------------------------===//
86// SimpleValue
87//===----------------------------------------------------------------------===//
88
89namespace {
90
91/// Struct representing the available values in the scoped hash table.
92struct SimpleValue {
93 Instruction *Inst;
94
95 SimpleValue(Instruction *I) : Inst(I) {
96 assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
97 }
98
99 bool isSentinel() const {
102 }
103
104 static bool canHandle(Instruction *Inst) {
105 // This can only handle non-void readnone functions.
106 // Also handled are constrained intrinsic that look like the types
107 // of instruction handled below (UnaryOperator, etc.).
108 if (CallInst *CI = dyn_cast<CallInst>(Inst)) {
109 if (Function *F = CI->getCalledFunction()) {
110 switch ((Intrinsic::ID)F->getIntrinsicID()) {
111 case Intrinsic::experimental_constrained_fadd:
112 case Intrinsic::experimental_constrained_fsub:
113 case Intrinsic::experimental_constrained_fmul:
114 case Intrinsic::experimental_constrained_fdiv:
115 case Intrinsic::experimental_constrained_frem:
116 case Intrinsic::experimental_constrained_fptosi:
117 case Intrinsic::experimental_constrained_sitofp:
118 case Intrinsic::experimental_constrained_fptoui:
119 case Intrinsic::experimental_constrained_uitofp:
120 case Intrinsic::experimental_constrained_fcmp:
121 case Intrinsic::experimental_constrained_fcmps: {
122 auto *CFP = cast<ConstrainedFPIntrinsic>(CI);
123 if (CFP->getExceptionBehavior() &&
124 CFP->getExceptionBehavior() == fp::ebStrict)
125 return false;
126 // Since we CSE across function calls we must not allow
127 // the rounding mode to change.
128 if (CFP->getRoundingMode() &&
129 CFP->getRoundingMode() == RoundingMode::Dynamic)
130 return false;
131 return true;
132 }
133 }
134 }
135 return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy() &&
136 // FIXME: Currently the calls which may access the thread id may
137 // be considered as not accessing the memory. But this is
138 // problematic for coroutines, since coroutines may resume in a
139 // different thread. So we disable the optimization here for the
140 // correctness. However, it may block many other correct
141 // optimizations. Revert this one when we detect the memory
142 // accessing kind more precisely.
143 !CI->getFunction()->isPresplitCoroutine();
144 }
145 return isa<CastInst>(Inst) || isa<UnaryOperator>(Inst) ||
146 isa<BinaryOperator>(Inst) || isa<GetElementPtrInst>(Inst) ||
147 isa<CmpInst>(Inst) || isa<SelectInst>(Inst) ||
148 isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
149 isa<ShuffleVectorInst>(Inst) || isa<ExtractValueInst>(Inst) ||
150 isa<InsertValueInst>(Inst) || isa<FreezeInst>(Inst);
151 }
152};
153
154} // end anonymous namespace
155
156namespace llvm {
157
158template <> struct DenseMapInfo<SimpleValue> {
159 static inline SimpleValue getEmptyKey() {
161 }
162
163 static inline SimpleValue getTombstoneKey() {
165 }
166
167 static unsigned getHashValue(SimpleValue Val);
168 static bool isEqual(SimpleValue LHS, SimpleValue RHS);
169};
170
171} // end namespace llvm
172
173/// Match a 'select' including an optional 'not's of the condition.
175 Value *&B,
176 SelectPatternFlavor &Flavor) {
177 // Return false if V is not even a select.
178 if (!match(V, m_Select(m_Value(Cond), m_Value(A), m_Value(B))))
179 return false;
180
181 // Look through a 'not' of the condition operand by swapping A/B.
182 Value *CondNot;
183 if (match(Cond, m_Not(m_Value(CondNot)))) {
184 Cond = CondNot;
185 std::swap(A, B);
186 }
187
188 // Match canonical forms of min/max. We are not using ValueTracking's
189 // more powerful matchSelectPattern() because it may rely on instruction flags
190 // such as "nsw". That would be incompatible with the current hashing
191 // mechanism that may remove flags to increase the likelihood of CSE.
192
193 Flavor = SPF_UNKNOWN;
195
196 if (!match(Cond, m_ICmp(Pred, m_Specific(A), m_Specific(B)))) {
197 // Check for commuted variants of min/max by swapping predicate.
198 // If we do not match the standard or commuted patterns, this is not a
199 // recognized form of min/max, but it is still a select, so return true.
200 if (!match(Cond, m_ICmp(Pred, m_Specific(B), m_Specific(A))))
201 return true;
202 Pred = ICmpInst::getSwappedPredicate(Pred);
203 }
204
205 switch (Pred) {
206 case CmpInst::ICMP_UGT: Flavor = SPF_UMAX; break;
207 case CmpInst::ICMP_ULT: Flavor = SPF_UMIN; break;
208 case CmpInst::ICMP_SGT: Flavor = SPF_SMAX; break;
209 case CmpInst::ICMP_SLT: Flavor = SPF_SMIN; break;
210 // Non-strict inequalities.
211 case CmpInst::ICMP_ULE: Flavor = SPF_UMIN; break;
212 case CmpInst::ICMP_UGE: Flavor = SPF_UMAX; break;
213 case CmpInst::ICMP_SLE: Flavor = SPF_SMIN; break;
214 case CmpInst::ICMP_SGE: Flavor = SPF_SMAX; break;
215 default: break;
216 }
217
218 return true;
219}
220
221static unsigned getHashValueImpl(SimpleValue Val) {
222 Instruction *Inst = Val.Inst;
223 // Hash in all of the operands as pointers.
224 if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst)) {
225 Value *LHS = BinOp->getOperand(0);
226 Value *RHS = BinOp->getOperand(1);
227 if (BinOp->isCommutative() && BinOp->getOperand(0) > BinOp->getOperand(1))
228 std::swap(LHS, RHS);
229
230 return hash_combine(BinOp->getOpcode(), LHS, RHS);
231 }
232
233 if (CmpInst *CI = dyn_cast<CmpInst>(Inst)) {
234 // Compares can be commuted by swapping the comparands and
235 // updating the predicate. Choose the form that has the
236 // comparands in sorted order, or in the case of a tie, the
237 // one with the lower predicate.
238 Value *LHS = CI->getOperand(0);
239 Value *RHS = CI->getOperand(1);
240 CmpInst::Predicate Pred = CI->getPredicate();
241 CmpInst::Predicate SwappedPred = CI->getSwappedPredicate();
242 if (std::tie(LHS, Pred) > std::tie(RHS, SwappedPred)) {
243 std::swap(LHS, RHS);
244 Pred = SwappedPred;
245 }
246 return hash_combine(Inst->getOpcode(), Pred, LHS, RHS);
247 }
248
249 // Hash general selects to allow matching commuted true/false operands.
251 Value *Cond, *A, *B;
252 if (matchSelectWithOptionalNotCond(Inst, Cond, A, B, SPF)) {
253 // Hash min/max (cmp + select) to allow for commuted operands.
254 // Min/max may also have non-canonical compare predicate (eg, the compare for
255 // smin may use 'sgt' rather than 'slt'), and non-canonical operands in the
256 // compare.
257 // TODO: We should also detect FP min/max.
258 if (SPF == SPF_SMIN || SPF == SPF_SMAX ||
259 SPF == SPF_UMIN || SPF == SPF_UMAX) {
260 if (A > B)
261 std::swap(A, B);
262 return hash_combine(Inst->getOpcode(), SPF, A, B);
263 }
264
265 // Hash general selects to allow matching commuted true/false operands.
266
267 // If we do not have a compare as the condition, just hash in the condition.
269 Value *X, *Y;
270 if (!match(Cond, m_Cmp(Pred, m_Value(X), m_Value(Y))))
271 return hash_combine(Inst->getOpcode(), Cond, A, B);
272
273 // Similar to cmp normalization (above) - canonicalize the predicate value:
274 // select (icmp Pred, X, Y), A, B --> select (icmp InvPred, X, Y), B, A
275 if (CmpInst::getInversePredicate(Pred) < Pred) {
276 Pred = CmpInst::getInversePredicate(Pred);
277 std::swap(A, B);
278 }
279 return hash_combine(Inst->getOpcode(), Pred, X, Y, A, B);
280 }
281
282 if (CastInst *CI = dyn_cast<CastInst>(Inst))
283 return hash_combine(CI->getOpcode(), CI->getType(), CI->getOperand(0));
284
285 if (FreezeInst *FI = dyn_cast<FreezeInst>(Inst))
286 return hash_combine(FI->getOpcode(), FI->getOperand(0));
287
288 if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Inst))
289 return hash_combine(EVI->getOpcode(), EVI->getOperand(0),
290 hash_combine_range(EVI->idx_begin(), EVI->idx_end()));
291
292 if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(Inst))
293 return hash_combine(IVI->getOpcode(), IVI->getOperand(0),
294 IVI->getOperand(1),
295 hash_combine_range(IVI->idx_begin(), IVI->idx_end()));
296
297 assert((isa<CallInst>(Inst) || isa<GetElementPtrInst>(Inst) ||
298 isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
299 isa<ShuffleVectorInst>(Inst) || isa<UnaryOperator>(Inst) ||
300 isa<FreezeInst>(Inst)) &&
301 "Invalid/unknown instruction");
302
303 // Handle intrinsics with commutative operands.
304 // TODO: Extend this to handle intrinsics with >2 operands where the 1st
305 // 2 operands are commutative.
306 auto *II = dyn_cast<IntrinsicInst>(Inst);
307 if (II && II->isCommutative() && II->arg_size() == 2) {
308 Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
309 if (LHS > RHS)
310 std::swap(LHS, RHS);
311 return hash_combine(II->getOpcode(), LHS, RHS);
312 }
313
314 // gc.relocate is 'special' call: its second and third operands are
315 // not real values, but indices into statepoint's argument list.
316 // Get values they point to.
317 if (const GCRelocateInst *GCR = dyn_cast<GCRelocateInst>(Inst))
318 return hash_combine(GCR->getOpcode(), GCR->getOperand(0),
319 GCR->getBasePtr(), GCR->getDerivedPtr());
320
321 // Mix in the opcode.
322 return hash_combine(
323 Inst->getOpcode(),
325}
326
327unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
328#ifndef NDEBUG
329 // If -earlycse-debug-hash was specified, return a constant -- this
330 // will force all hashing to collide, so we'll exhaustively search
331 // the table for a match, and the assertion in isEqual will fire if
332 // there's a bug causing equal keys to hash differently.
334 return 0;
335#endif
336 return getHashValueImpl(Val);
337}
338
339static bool isEqualImpl(SimpleValue LHS, SimpleValue RHS) {
340 Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
341
342 if (LHS.isSentinel() || RHS.isSentinel())
343 return LHSI == RHSI;
344
345 if (LHSI->getOpcode() != RHSI->getOpcode())
346 return false;
347 if (LHSI->isIdenticalToWhenDefined(RHSI))
348 return true;
349
350 // If we're not strictly identical, we still might be a commutable instruction
351 if (BinaryOperator *LHSBinOp = dyn_cast<BinaryOperator>(LHSI)) {
352 if (!LHSBinOp->isCommutative())
353 return false;
354
355 assert(isa<BinaryOperator>(RHSI) &&
356 "same opcode, but different instruction type?");
357 BinaryOperator *RHSBinOp = cast<BinaryOperator>(RHSI);
358
359 // Commuted equality
360 return LHSBinOp->getOperand(0) == RHSBinOp->getOperand(1) &&
361 LHSBinOp->getOperand(1) == RHSBinOp->getOperand(0);
362 }
363 if (CmpInst *LHSCmp = dyn_cast<CmpInst>(LHSI)) {
364 assert(isa<CmpInst>(RHSI) &&
365 "same opcode, but different instruction type?");
366 CmpInst *RHSCmp = cast<CmpInst>(RHSI);
367 // Commuted equality
368 return LHSCmp->getOperand(0) == RHSCmp->getOperand(1) &&
369 LHSCmp->getOperand(1) == RHSCmp->getOperand(0) &&
370 LHSCmp->getSwappedPredicate() == RHSCmp->getPredicate();
371 }
372
373 // TODO: Extend this for >2 args by matching the trailing N-2 args.
374 auto *LII = dyn_cast<IntrinsicInst>(LHSI);
375 auto *RII = dyn_cast<IntrinsicInst>(RHSI);
376 if (LII && RII && LII->getIntrinsicID() == RII->getIntrinsicID() &&
377 LII->isCommutative() && LII->arg_size() == 2) {
378 return LII->getArgOperand(0) == RII->getArgOperand(1) &&
379 LII->getArgOperand(1) == RII->getArgOperand(0);
380 }
381
382 // See comment above in `getHashValue()`.
383 if (const GCRelocateInst *GCR1 = dyn_cast<GCRelocateInst>(LHSI))
384 if (const GCRelocateInst *GCR2 = dyn_cast<GCRelocateInst>(RHSI))
385 return GCR1->getOperand(0) == GCR2->getOperand(0) &&
386 GCR1->getBasePtr() == GCR2->getBasePtr() &&
387 GCR1->getDerivedPtr() == GCR2->getDerivedPtr();
388
389 // Min/max can occur with commuted operands, non-canonical predicates,
390 // and/or non-canonical operands.
391 // Selects can be non-trivially equivalent via inverted conditions and swaps.
392 SelectPatternFlavor LSPF, RSPF;
393 Value *CondL, *CondR, *LHSA, *RHSA, *LHSB, *RHSB;
394 if (matchSelectWithOptionalNotCond(LHSI, CondL, LHSA, LHSB, LSPF) &&
395 matchSelectWithOptionalNotCond(RHSI, CondR, RHSA, RHSB, RSPF)) {
396 if (LSPF == RSPF) {
397 // TODO: We should also detect FP min/max.
398 if (LSPF == SPF_SMIN || LSPF == SPF_SMAX ||
399 LSPF == SPF_UMIN || LSPF == SPF_UMAX)
400 return ((LHSA == RHSA && LHSB == RHSB) ||
401 (LHSA == RHSB && LHSB == RHSA));
402
403 // select Cond, A, B <--> select not(Cond), B, A
404 if (CondL == CondR && LHSA == RHSA && LHSB == RHSB)
405 return true;
406 }
407
408 // If the true/false operands are swapped and the conditions are compares
409 // with inverted predicates, the selects are equal:
410 // select (icmp Pred, X, Y), A, B <--> select (icmp InvPred, X, Y), B, A
411 //
412 // This also handles patterns with a double-negation in the sense of not +
413 // inverse, because we looked through a 'not' in the matching function and
414 // swapped A/B:
415 // select (cmp Pred, X, Y), A, B <--> select (not (cmp InvPred, X, Y)), B, A
416 //
417 // This intentionally does NOT handle patterns with a double-negation in
418 // the sense of not + not, because doing so could result in values
419 // comparing
420 // as equal that hash differently in the min/max cases like:
421 // select (cmp slt, X, Y), X, Y <--> select (not (not (cmp slt, X, Y))), X, Y
422 // ^ hashes as min ^ would not hash as min
423 // In the context of the EarlyCSE pass, however, such cases never reach
424 // this code, as we simplify the double-negation before hashing the second
425 // select (and so still succeed at CSEing them).
426 if (LHSA == RHSB && LHSB == RHSA) {
427 CmpInst::Predicate PredL, PredR;
428 Value *X, *Y;
429 if (match(CondL, m_Cmp(PredL, m_Value(X), m_Value(Y))) &&
430 match(CondR, m_Cmp(PredR, m_Specific(X), m_Specific(Y))) &&
431 CmpInst::getInversePredicate(PredL) == PredR)
432 return true;
433 }
434 }
435
436 return false;
437}
438
439bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
440 // These comparisons are nontrivial, so assert that equality implies
441 // hash equality (DenseMap demands this as an invariant).
442 bool Result = isEqualImpl(LHS, RHS);
443 assert(!Result || (LHS.isSentinel() && LHS.Inst == RHS.Inst) ||
445 return Result;
446}
447
448//===----------------------------------------------------------------------===//
449// CallValue
450//===----------------------------------------------------------------------===//
451
452namespace {
453
454/// Struct representing the available call values in the scoped hash
455/// table.
456struct CallValue {
457 Instruction *Inst;
458
459 CallValue(Instruction *I) : Inst(I) {
460 assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
461 }
462
463 bool isSentinel() const {
466 }
467
468 static bool canHandle(Instruction *Inst) {
469 // Don't value number anything that returns void.
470 if (Inst->getType()->isVoidTy())
471 return false;
472
473 CallInst *CI = dyn_cast<CallInst>(Inst);
474 if (!CI || !CI->onlyReadsMemory() ||
475 // FIXME: Currently the calls which may access the thread id may
476 // be considered as not accessing the memory. But this is
477 // problematic for coroutines, since coroutines may resume in a
478 // different thread. So we disable the optimization here for the
479 // correctness. However, it may block many other correct
480 // optimizations. Revert this one when we detect the memory
481 // accessing kind more precisely.
483 return false;
484 return true;
485 }
486};
487
488} // end anonymous namespace
489
490namespace llvm {
491
492template <> struct DenseMapInfo<CallValue> {
493 static inline CallValue getEmptyKey() {
495 }
496
497 static inline CallValue getTombstoneKey() {
499 }
500
501 static unsigned getHashValue(CallValue Val);
502 static bool isEqual(CallValue LHS, CallValue RHS);
503};
504
505} // end namespace llvm
506
507unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
508 Instruction *Inst = Val.Inst;
509
510 // Hash all of the operands as pointers and mix in the opcode.
511 return hash_combine(
512 Inst->getOpcode(),
514}
515
516bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
517 Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
518 if (LHS.isSentinel() || RHS.isSentinel())
519 return LHSI == RHSI;
520
521 return LHSI->isIdenticalTo(RHSI);
522}
523
524//===----------------------------------------------------------------------===//
525// EarlyCSE implementation
526//===----------------------------------------------------------------------===//
527
528namespace {
529
530/// A simple and fast domtree-based CSE pass.
531///
532/// This pass does a simple depth-first walk over the dominator tree,
533/// eliminating trivially redundant instructions and using instsimplify to
534/// canonicalize things as it goes. It is intended to be fast and catch obvious
535/// cases so that instcombine and other passes are more effective. It is
536/// expected that a later pass of GVN will catch the interesting/hard cases.
537class EarlyCSE {
538public:
539 const TargetLibraryInfo &TLI;
541 DominatorTree &DT;
542 AssumptionCache &AC;
543 const SimplifyQuery SQ;
544 MemorySSA *MSSA;
545 std::unique_ptr<MemorySSAUpdater> MSSAUpdater;
546
547 using AllocatorTy =
550 using ScopedHTType =
552 AllocatorTy>;
553
554 /// A scoped hash table of the current values of all of our simple
555 /// scalar expressions.
556 ///
557 /// As we walk down the domtree, we look to see if instructions are in this:
558 /// if so, we replace them with what we find, otherwise we insert them so
559 /// that dominated values can succeed in their lookup.
560 ScopedHTType AvailableValues;
561
562 /// A scoped hash table of the current values of previously encountered
563 /// memory locations.
564 ///
565 /// This allows us to get efficient access to dominating loads or stores when
566 /// we have a fully redundant load. In addition to the most recent load, we
567 /// keep track of a generation count of the read, which is compared against
568 /// the current generation count. The current generation count is incremented
569 /// after every possibly writing memory operation, which ensures that we only
570 /// CSE loads with other loads that have no intervening store. Ordering
571 /// events (such as fences or atomic instructions) increment the generation
572 /// count as well; essentially, we model these as writes to all possible
573 /// locations. Note that atomic and/or volatile loads and stores can be
574 /// present the table; it is the responsibility of the consumer to inspect
575 /// the atomicity/volatility if needed.
576 struct LoadValue {
577 Instruction *DefInst = nullptr;
578 unsigned Generation = 0;
579 int MatchingId = -1;
580 bool IsAtomic = false;
581
582 LoadValue() = default;
583 LoadValue(Instruction *Inst, unsigned Generation, unsigned MatchingId,
584 bool IsAtomic)
585 : DefInst(Inst), Generation(Generation), MatchingId(MatchingId),
586 IsAtomic(IsAtomic) {}
587 };
588
589 using LoadMapAllocator =
592 using LoadHTType =
594 LoadMapAllocator>;
595
596 LoadHTType AvailableLoads;
597
598 // A scoped hash table mapping memory locations (represented as typed
599 // addresses) to generation numbers at which that memory location became
600 // (henceforth indefinitely) invariant.
601 using InvariantMapAllocator =
604 using InvariantHTType =
606 InvariantMapAllocator>;
607 InvariantHTType AvailableInvariants;
608
609 /// A scoped hash table of the current values of read-only call
610 /// values.
611 ///
612 /// It uses the same generation count as loads.
613 using CallHTType =
615 CallHTType AvailableCalls;
616
617 /// This is the current generation of the memory value.
618 unsigned CurrentGeneration = 0;
619
620 /// Set up the EarlyCSE runner for a particular function.
621 EarlyCSE(const DataLayout &DL, const TargetLibraryInfo &TLI,
623 AssumptionCache &AC, MemorySSA *MSSA)
624 : TLI(TLI), TTI(TTI), DT(DT), AC(AC), SQ(DL, &TLI, &DT, &AC), MSSA(MSSA),
625 MSSAUpdater(std::make_unique<MemorySSAUpdater>(MSSA)) {}
626
627 bool run();
628
629private:
630 unsigned ClobberCounter = 0;
631 // Almost a POD, but needs to call the constructors for the scoped hash
632 // tables so that a new scope gets pushed on. These are RAII so that the
633 // scope gets popped when the NodeScope is destroyed.
634 class NodeScope {
635 public:
636 NodeScope(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
637 InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls)
638 : Scope(AvailableValues), LoadScope(AvailableLoads),
639 InvariantScope(AvailableInvariants), CallScope(AvailableCalls) {}
640 NodeScope(const NodeScope &) = delete;
641 NodeScope &operator=(const NodeScope &) = delete;
642
643 private:
645 LoadHTType::ScopeTy LoadScope;
646 InvariantHTType::ScopeTy InvariantScope;
647 CallHTType::ScopeTy CallScope;
648 };
649
650 // Contains all the needed information to create a stack for doing a depth
651 // first traversal of the tree. This includes scopes for values, loads, and
652 // calls as well as the generation. There is a child iterator so that the
653 // children do not need to be store separately.
654 class StackNode {
655 public:
656 StackNode(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
657 InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls,
658 unsigned cg, DomTreeNode *n, DomTreeNode::const_iterator child,
660 : CurrentGeneration(cg), ChildGeneration(cg), Node(n), ChildIter(child),
661 EndIter(end),
662 Scopes(AvailableValues, AvailableLoads, AvailableInvariants,
663 AvailableCalls)
664 {}
665 StackNode(const StackNode &) = delete;
666 StackNode &operator=(const StackNode &) = delete;
667
668 // Accessors.
669 unsigned currentGeneration() const { return CurrentGeneration; }
670 unsigned childGeneration() const { return ChildGeneration; }
671 void childGeneration(unsigned generation) { ChildGeneration = generation; }
672 DomTreeNode *node() { return Node; }
673 DomTreeNode::const_iterator childIter() const { return ChildIter; }
674
675 DomTreeNode *nextChild() {
676 DomTreeNode *child = *ChildIter;
677 ++ChildIter;
678 return child;
679 }
680
681 DomTreeNode::const_iterator end() const { return EndIter; }
682 bool isProcessed() const { return Processed; }
683 void process() { Processed = true; }
684
685 private:
686 unsigned CurrentGeneration;
687 unsigned ChildGeneration;
691 NodeScope Scopes;
692 bool Processed = false;
693 };
694
695 /// Wrapper class to handle memory instructions, including loads,
696 /// stores and intrinsic loads and stores defined by the target.
697 class ParseMemoryInst {
698 public:
699 ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI)
700 : Inst(Inst) {
701 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
702 IntrID = II->getIntrinsicID();
703 if (TTI.getTgtMemIntrinsic(II, Info))
704 return;
705 if (isHandledNonTargetIntrinsic(IntrID)) {
706 switch (IntrID) {
707 case Intrinsic::masked_load:
708 Info.PtrVal = Inst->getOperand(0);
709 Info.MatchingId = Intrinsic::masked_load;
710 Info.ReadMem = true;
711 Info.WriteMem = false;
712 Info.IsVolatile = false;
713 break;
714 case Intrinsic::masked_store:
715 Info.PtrVal = Inst->getOperand(1);
716 // Use the ID of masked load as the "matching id". This will
717 // prevent matching non-masked loads/stores with masked ones
718 // (which could be done), but at the moment, the code here
719 // does not support matching intrinsics with non-intrinsics,
720 // so keep the MatchingIds specific to masked instructions
721 // for now (TODO).
722 Info.MatchingId = Intrinsic::masked_load;
723 Info.ReadMem = false;
724 Info.WriteMem = true;
725 Info.IsVolatile = false;
726 break;
727 }
728 }
729 }
730 }
731
732 Instruction *get() { return Inst; }
733 const Instruction *get() const { return Inst; }
734
735 bool isLoad() const {
736 if (IntrID != 0)
737 return Info.ReadMem;
738 return isa<LoadInst>(Inst);
739 }
740
741 bool isStore() const {
742 if (IntrID != 0)
743 return Info.WriteMem;
744 return isa<StoreInst>(Inst);
745 }
746
747 bool isAtomic() const {
748 if (IntrID != 0)
749 return Info.Ordering != AtomicOrdering::NotAtomic;
750 return Inst->isAtomic();
751 }
752
753 bool isUnordered() const {
754 if (IntrID != 0)
755 return Info.isUnordered();
756
757 if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
758 return LI->isUnordered();
759 } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
760 return SI->isUnordered();
761 }
762 // Conservative answer
763 return !Inst->isAtomic();
764 }
765
766 bool isVolatile() const {
767 if (IntrID != 0)
768 return Info.IsVolatile;
769
770 if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
771 return LI->isVolatile();
772 } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
773 return SI->isVolatile();
774 }
775 // Conservative answer
776 return true;
777 }
778
779 bool isInvariantLoad() const {
780 if (auto *LI = dyn_cast<LoadInst>(Inst))
781 return LI->hasMetadata(LLVMContext::MD_invariant_load);
782 return false;
783 }
784
785 bool isValid() const { return getPointerOperand() != nullptr; }
786
787 // For regular (non-intrinsic) loads/stores, this is set to -1. For
788 // intrinsic loads/stores, the id is retrieved from the corresponding
789 // field in the MemIntrinsicInfo structure. That field contains
790 // non-negative values only.
791 int getMatchingId() const {
792 if (IntrID != 0)
793 return Info.MatchingId;
794 return -1;
795 }
796
797 Value *getPointerOperand() const {
798 if (IntrID != 0)
799 return Info.PtrVal;
800 return getLoadStorePointerOperand(Inst);
801 }
802
803 Type *getValueType() const {
804 // TODO: handle target-specific intrinsics.
805 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
806 switch (II->getIntrinsicID()) {
807 case Intrinsic::masked_load:
808 return II->getType();
809 case Intrinsic::masked_store:
810 return II->getArgOperand(0)->getType();
811 default:
812 return nullptr;
813 }
814 }
815 return getLoadStoreType(Inst);
816 }
817
818 bool mayReadFromMemory() const {
819 if (IntrID != 0)
820 return Info.ReadMem;
821 return Inst->mayReadFromMemory();
822 }
823
824 bool mayWriteToMemory() const {
825 if (IntrID != 0)
826 return Info.WriteMem;
827 return Inst->mayWriteToMemory();
828 }
829
830 private:
831 Intrinsic::ID IntrID = 0;
833 Instruction *Inst;
834 };
835
836 // This function is to prevent accidentally passing a non-target
837 // intrinsic ID to TargetTransformInfo.
838 static bool isHandledNonTargetIntrinsic(Intrinsic::ID ID) {
839 switch (ID) {
840 case Intrinsic::masked_load:
841 case Intrinsic::masked_store:
842 return true;
843 }
844 return false;
845 }
846 static bool isHandledNonTargetIntrinsic(const Value *V) {
847 if (auto *II = dyn_cast<IntrinsicInst>(V))
848 return isHandledNonTargetIntrinsic(II->getIntrinsicID());
849 return false;
850 }
851
852 bool processNode(DomTreeNode *Node);
853
854 bool handleBranchCondition(Instruction *CondInst, const BranchInst *BI,
855 const BasicBlock *BB, const BasicBlock *Pred);
856
857 Value *getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst,
858 unsigned CurrentGeneration);
859
860 bool overridingStores(const ParseMemoryInst &Earlier,
861 const ParseMemoryInst &Later);
862
863 Value *getOrCreateResult(Value *Inst, Type *ExpectedType) const {
864 // TODO: We could insert relevant casts on type mismatch here.
865 if (auto *LI = dyn_cast<LoadInst>(Inst))
866 return LI->getType() == ExpectedType ? LI : nullptr;
867 if (auto *SI = dyn_cast<StoreInst>(Inst)) {
868 Value *V = SI->getValueOperand();
869 return V->getType() == ExpectedType ? V : nullptr;
870 }
871 assert(isa<IntrinsicInst>(Inst) && "Instruction not supported");
872 auto *II = cast<IntrinsicInst>(Inst);
873 if (isHandledNonTargetIntrinsic(II->getIntrinsicID()))
874 return getOrCreateResultNonTargetMemIntrinsic(II, ExpectedType);
875 return TTI.getOrCreateResultFromMemIntrinsic(II, ExpectedType);
876 }
877
878 Value *getOrCreateResultNonTargetMemIntrinsic(IntrinsicInst *II,
879 Type *ExpectedType) const {
880 // TODO: We could insert relevant casts on type mismatch here.
881 switch (II->getIntrinsicID()) {
882 case Intrinsic::masked_load:
883 return II->getType() == ExpectedType ? II : nullptr;
884 case Intrinsic::masked_store: {
885 Value *V = II->getOperand(0);
886 return V->getType() == ExpectedType ? V : nullptr;
887 }
888 }
889 return nullptr;
890 }
891
892 /// Return true if the instruction is known to only operate on memory
893 /// provably invariant in the given "generation".
894 bool isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt);
895
896 bool isSameMemGeneration(unsigned EarlierGeneration, unsigned LaterGeneration,
897 Instruction *EarlierInst, Instruction *LaterInst);
898
899 bool isNonTargetIntrinsicMatch(const IntrinsicInst *Earlier,
900 const IntrinsicInst *Later) {
901 auto IsSubmask = [](const Value *Mask0, const Value *Mask1) {
902 // Is Mask0 a submask of Mask1?
903 if (Mask0 == Mask1)
904 return true;
905 if (isa<UndefValue>(Mask0) || isa<UndefValue>(Mask1))
906 return false;
907 auto *Vec0 = dyn_cast<ConstantVector>(Mask0);
908 auto *Vec1 = dyn_cast<ConstantVector>(Mask1);
909 if (!Vec0 || !Vec1)
910 return false;
911 if (Vec0->getType() != Vec1->getType())
912 return false;
913 for (int i = 0, e = Vec0->getNumOperands(); i != e; ++i) {
914 Constant *Elem0 = Vec0->getOperand(i);
915 Constant *Elem1 = Vec1->getOperand(i);
916 auto *Int0 = dyn_cast<ConstantInt>(Elem0);
917 if (Int0 && Int0->isZero())
918 continue;
919 auto *Int1 = dyn_cast<ConstantInt>(Elem1);
920 if (Int1 && !Int1->isZero())
921 continue;
922 if (isa<UndefValue>(Elem0) || isa<UndefValue>(Elem1))
923 return false;
924 if (Elem0 == Elem1)
925 continue;
926 return false;
927 }
928 return true;
929 };
930 auto PtrOp = [](const IntrinsicInst *II) {
931 if (II->getIntrinsicID() == Intrinsic::masked_load)
932 return II->getOperand(0);
933 if (II->getIntrinsicID() == Intrinsic::masked_store)
934 return II->getOperand(1);
935 llvm_unreachable("Unexpected IntrinsicInst");
936 };
937 auto MaskOp = [](const IntrinsicInst *II) {
938 if (II->getIntrinsicID() == Intrinsic::masked_load)
939 return II->getOperand(2);
940 if (II->getIntrinsicID() == Intrinsic::masked_store)
941 return II->getOperand(3);
942 llvm_unreachable("Unexpected IntrinsicInst");
943 };
944 auto ThruOp = [](const IntrinsicInst *II) {
945 if (II->getIntrinsicID() == Intrinsic::masked_load)
946 return II->getOperand(3);
947 llvm_unreachable("Unexpected IntrinsicInst");
948 };
949
950 if (PtrOp(Earlier) != PtrOp(Later))
951 return false;
952
953 Intrinsic::ID IDE = Earlier->getIntrinsicID();
954 Intrinsic::ID IDL = Later->getIntrinsicID();
955 // We could really use specific intrinsic classes for masked loads
956 // and stores in IntrinsicInst.h.
957 if (IDE == Intrinsic::masked_load && IDL == Intrinsic::masked_load) {
958 // Trying to replace later masked load with the earlier one.
959 // Check that the pointers are the same, and
960 // - masks and pass-throughs are the same, or
961 // - replacee's pass-through is "undef" and replacer's mask is a
962 // super-set of the replacee's mask.
963 if (MaskOp(Earlier) == MaskOp(Later) && ThruOp(Earlier) == ThruOp(Later))
964 return true;
965 if (!isa<UndefValue>(ThruOp(Later)))
966 return false;
967 return IsSubmask(MaskOp(Later), MaskOp(Earlier));
968 }
969 if (IDE == Intrinsic::masked_store && IDL == Intrinsic::masked_load) {
970 // Trying to replace a load of a stored value with the store's value.
971 // Check that the pointers are the same, and
972 // - load's mask is a subset of store's mask, and
973 // - load's pass-through is "undef".
974 if (!IsSubmask(MaskOp(Later), MaskOp(Earlier)))
975 return false;
976 return isa<UndefValue>(ThruOp(Later));
977 }
978 if (IDE == Intrinsic::masked_load && IDL == Intrinsic::masked_store) {
979 // Trying to remove a store of the loaded value.
980 // Check that the pointers are the same, and
981 // - store's mask is a subset of the load's mask.
982 return IsSubmask(MaskOp(Later), MaskOp(Earlier));
983 }
984 if (IDE == Intrinsic::masked_store && IDL == Intrinsic::masked_store) {
985 // Trying to remove a dead store (earlier).
986 // Check that the pointers are the same,
987 // - the to-be-removed store's mask is a subset of the other store's
988 // mask.
989 return IsSubmask(MaskOp(Earlier), MaskOp(Later));
990 }
991 return false;
992 }
993
994 void removeMSSA(Instruction &Inst) {
995 if (!MSSA)
996 return;
997 if (VerifyMemorySSA)
998 MSSA->verifyMemorySSA();
999 // Removing a store here can leave MemorySSA in an unoptimized state by
1000 // creating MemoryPhis that have identical arguments and by creating
1001 // MemoryUses whose defining access is not an actual clobber. The phi case
1002 // is handled by MemorySSA when passing OptimizePhis = true to
1003 // removeMemoryAccess. The non-optimized MemoryUse case is lazily updated
1004 // by MemorySSA's getClobberingMemoryAccess.
1005 MSSAUpdater->removeMemoryAccess(&Inst, true);
1006 }
1007};
1008
1009} // end anonymous namespace
1010
1011/// Determine if the memory referenced by LaterInst is from the same heap
1012/// version as EarlierInst.
1013/// This is currently called in two scenarios:
1014///
1015/// load p
1016/// ...
1017/// load p
1018///
1019/// and
1020///
1021/// x = load p
1022/// ...
1023/// store x, p
1024///
1025/// in both cases we want to verify that there are no possible writes to the
1026/// memory referenced by p between the earlier and later instruction.
1027bool EarlyCSE::isSameMemGeneration(unsigned EarlierGeneration,
1028 unsigned LaterGeneration,
1029 Instruction *EarlierInst,
1030 Instruction *LaterInst) {
1031 // Check the simple memory generation tracking first.
1032 if (EarlierGeneration == LaterGeneration)
1033 return true;
1034
1035 if (!MSSA)
1036 return false;
1037
1038 // If MemorySSA has determined that one of EarlierInst or LaterInst does not
1039 // read/write memory, then we can safely return true here.
1040 // FIXME: We could be more aggressive when checking doesNotAccessMemory(),
1041 // onlyReadsMemory(), mayReadFromMemory(), and mayWriteToMemory() in this pass
1042 // by also checking the MemorySSA MemoryAccess on the instruction. Initial
1043 // experiments suggest this isn't worthwhile, at least for C/C++ code compiled
1044 // with the default optimization pipeline.
1045 auto *EarlierMA = MSSA->getMemoryAccess(EarlierInst);
1046 if (!EarlierMA)
1047 return true;
1048 auto *LaterMA = MSSA->getMemoryAccess(LaterInst);
1049 if (!LaterMA)
1050 return true;
1051
1052 // Since we know LaterDef dominates LaterInst and EarlierInst dominates
1053 // LaterInst, if LaterDef dominates EarlierInst then it can't occur between
1054 // EarlierInst and LaterInst and neither can any other write that potentially
1055 // clobbers LaterInst.
1056 MemoryAccess *LaterDef;
1057 if (ClobberCounter < EarlyCSEMssaOptCap) {
1058 LaterDef = MSSA->getWalker()->getClobberingMemoryAccess(LaterInst);
1059 ClobberCounter++;
1060 } else
1061 LaterDef = LaterMA->getDefiningAccess();
1062
1063 return MSSA->dominates(LaterDef, EarlierMA);
1064}
1065
1066bool EarlyCSE::isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt) {
1067 // A location loaded from with an invariant_load is assumed to *never* change
1068 // within the visible scope of the compilation.
1069 if (auto *LI = dyn_cast<LoadInst>(I))
1070 if (LI->hasMetadata(LLVMContext::MD_invariant_load))
1071 return true;
1072
1073 auto MemLocOpt = MemoryLocation::getOrNone(I);
1074 if (!MemLocOpt)
1075 // "target" intrinsic forms of loads aren't currently known to
1076 // MemoryLocation::get. TODO
1077 return false;
1078 MemoryLocation MemLoc = *MemLocOpt;
1079 if (!AvailableInvariants.count(MemLoc))
1080 return false;
1081
1082 // Is the generation at which this became invariant older than the
1083 // current one?
1084 return AvailableInvariants.lookup(MemLoc) <= GenAt;
1085}
1086
1087bool EarlyCSE::handleBranchCondition(Instruction *CondInst,
1088 const BranchInst *BI, const BasicBlock *BB,
1089 const BasicBlock *Pred) {
1090 assert(BI->isConditional() && "Should be a conditional branch!");
1091 assert(BI->getCondition() == CondInst && "Wrong condition?");
1092 assert(BI->getSuccessor(0) == BB || BI->getSuccessor(1) == BB);
1093 auto *TorF = (BI->getSuccessor(0) == BB)
1096 auto MatchBinOp = [](Instruction *I, unsigned Opcode, Value *&LHS,
1097 Value *&RHS) {
1098 if (Opcode == Instruction::And &&
1099 match(I, m_LogicalAnd(m_Value(LHS), m_Value(RHS))))
1100 return true;
1101 else if (Opcode == Instruction::Or &&
1102 match(I, m_LogicalOr(m_Value(LHS), m_Value(RHS))))
1103 return true;
1104 return false;
1105 };
1106 // If the condition is AND operation, we can propagate its operands into the
1107 // true branch. If it is OR operation, we can propagate them into the false
1108 // branch.
1109 unsigned PropagateOpcode =
1110 (BI->getSuccessor(0) == BB) ? Instruction::And : Instruction::Or;
1111
1112 bool MadeChanges = false;
1115 WorkList.push_back(CondInst);
1116 while (!WorkList.empty()) {
1117 Instruction *Curr = WorkList.pop_back_val();
1118
1119 AvailableValues.insert(Curr, TorF);
1120 LLVM_DEBUG(dbgs() << "EarlyCSE CVP: Add conditional value for '"
1121 << Curr->getName() << "' as " << *TorF << " in "
1122 << BB->getName() << "\n");
1123 if (!DebugCounter::shouldExecute(CSECounter)) {
1124 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1125 } else {
1126 // Replace all dominated uses with the known value.
1127 if (unsigned Count = replaceDominatedUsesWith(Curr, TorF, DT,
1128 BasicBlockEdge(Pred, BB))) {
1129 NumCSECVP += Count;
1130 MadeChanges = true;
1131 }
1132 }
1133
1134 Value *LHS, *RHS;
1135 if (MatchBinOp(Curr, PropagateOpcode, LHS, RHS))
1136 for (auto *Op : { LHS, RHS })
1137 if (Instruction *OPI = dyn_cast<Instruction>(Op))
1138 if (SimpleValue::canHandle(OPI) && Visited.insert(OPI).second)
1139 WorkList.push_back(OPI);
1140 }
1141
1142 return MadeChanges;
1143}
1144
1145Value *EarlyCSE::getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst,
1146 unsigned CurrentGeneration) {
1147 if (InVal.DefInst == nullptr)
1148 return nullptr;
1149 if (InVal.MatchingId != MemInst.getMatchingId())
1150 return nullptr;
1151 // We don't yet handle removing loads with ordering of any kind.
1152 if (MemInst.isVolatile() || !MemInst.isUnordered())
1153 return nullptr;
1154 // We can't replace an atomic load with one which isn't also atomic.
1155 if (MemInst.isLoad() && !InVal.IsAtomic && MemInst.isAtomic())
1156 return nullptr;
1157 // The value V returned from this function is used differently depending
1158 // on whether MemInst is a load or a store. If it's a load, we will replace
1159 // MemInst with V, if it's a store, we will check if V is the same as the
1160 // available value.
1161 bool MemInstMatching = !MemInst.isLoad();
1162 Instruction *Matching = MemInstMatching ? MemInst.get() : InVal.DefInst;
1163 Instruction *Other = MemInstMatching ? InVal.DefInst : MemInst.get();
1164
1165 // For stores check the result values before checking memory generation
1166 // (otherwise isSameMemGeneration may crash).
1167 Value *Result = MemInst.isStore()
1168 ? getOrCreateResult(Matching, Other->getType())
1169 : nullptr;
1170 if (MemInst.isStore() && InVal.DefInst != Result)
1171 return nullptr;
1172
1173 // Deal with non-target memory intrinsics.
1174 bool MatchingNTI = isHandledNonTargetIntrinsic(Matching);
1175 bool OtherNTI = isHandledNonTargetIntrinsic(Other);
1176 if (OtherNTI != MatchingNTI)
1177 return nullptr;
1178 if (OtherNTI && MatchingNTI) {
1179 if (!isNonTargetIntrinsicMatch(cast<IntrinsicInst>(InVal.DefInst),
1180 cast<IntrinsicInst>(MemInst.get())))
1181 return nullptr;
1182 }
1183
1184 if (!isOperatingOnInvariantMemAt(MemInst.get(), InVal.Generation) &&
1185 !isSameMemGeneration(InVal.Generation, CurrentGeneration, InVal.DefInst,
1186 MemInst.get()))
1187 return nullptr;
1188
1189 if (!Result)
1190 Result = getOrCreateResult(Matching, Other->getType());
1191 return Result;
1192}
1193
1194bool EarlyCSE::overridingStores(const ParseMemoryInst &Earlier,
1195 const ParseMemoryInst &Later) {
1196 // Can we remove Earlier store because of Later store?
1197
1198 assert(Earlier.isUnordered() && !Earlier.isVolatile() &&
1199 "Violated invariant");
1200 if (Earlier.getPointerOperand() != Later.getPointerOperand())
1201 return false;
1202 if (!Earlier.getValueType() || !Later.getValueType() ||
1203 Earlier.getValueType() != Later.getValueType())
1204 return false;
1205 if (Earlier.getMatchingId() != Later.getMatchingId())
1206 return false;
1207 // At the moment, we don't remove ordered stores, but do remove
1208 // unordered atomic stores. There's no special requirement (for
1209 // unordered atomics) about removing atomic stores only in favor of
1210 // other atomic stores since we were going to execute the non-atomic
1211 // one anyway and the atomic one might never have become visible.
1212 if (!Earlier.isUnordered() || !Later.isUnordered())
1213 return false;
1214
1215 // Deal with non-target memory intrinsics.
1216 bool ENTI = isHandledNonTargetIntrinsic(Earlier.get());
1217 bool LNTI = isHandledNonTargetIntrinsic(Later.get());
1218 if (ENTI && LNTI)
1219 return isNonTargetIntrinsicMatch(cast<IntrinsicInst>(Earlier.get()),
1220 cast<IntrinsicInst>(Later.get()));
1221
1222 // Because of the check above, at least one of them is false.
1223 // For now disallow matching intrinsics with non-intrinsics,
1224 // so assume that the stores match if neither is an intrinsic.
1225 return ENTI == LNTI;
1226}
1227
1228bool EarlyCSE::processNode(DomTreeNode *Node) {
1229 bool Changed = false;
1230 BasicBlock *BB = Node->getBlock();
1231
1232 // If this block has a single predecessor, then the predecessor is the parent
1233 // of the domtree node and all of the live out memory values are still current
1234 // in this block. If this block has multiple predecessors, then they could
1235 // have invalidated the live-out memory values of our parent value. For now,
1236 // just be conservative and invalidate memory if this block has multiple
1237 // predecessors.
1238 if (!BB->getSinglePredecessor())
1239 ++CurrentGeneration;
1240
1241 // If this node has a single predecessor which ends in a conditional branch,
1242 // we can infer the value of the branch condition given that we took this
1243 // path. We need the single predecessor to ensure there's not another path
1244 // which reaches this block where the condition might hold a different
1245 // value. Since we're adding this to the scoped hash table (like any other
1246 // def), it will have been popped if we encounter a future merge block.
1247 if (BasicBlock *Pred = BB->getSinglePredecessor()) {
1248 auto *BI = dyn_cast<BranchInst>(Pred->getTerminator());
1249 if (BI && BI->isConditional()) {
1250 auto *CondInst = dyn_cast<Instruction>(BI->getCondition());
1251 if (CondInst && SimpleValue::canHandle(CondInst))
1252 Changed |= handleBranchCondition(CondInst, BI, BB, Pred);
1253 }
1254 }
1255
1256 /// LastStore - Keep track of the last non-volatile store that we saw... for
1257 /// as long as there in no instruction that reads memory. If we see a store
1258 /// to the same location, we delete the dead store. This zaps trivial dead
1259 /// stores which can occur in bitfield code among other things.
1260 Instruction *LastStore = nullptr;
1261
1262 // See if any instructions in the block can be eliminated. If so, do it. If
1263 // not, add them to AvailableValues.
1264 for (Instruction &Inst : make_early_inc_range(*BB)) {
1265 // Dead instructions should just be removed.
1266 if (isInstructionTriviallyDead(&Inst, &TLI)) {
1267 LLVM_DEBUG(dbgs() << "EarlyCSE DCE: " << Inst << '\n');
1268 if (!DebugCounter::shouldExecute(CSECounter)) {
1269 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1270 continue;
1271 }
1272
1273 salvageKnowledge(&Inst, &AC);
1274 salvageDebugInfo(Inst);
1275 removeMSSA(Inst);
1276 Inst.eraseFromParent();
1277 Changed = true;
1278 ++NumSimplify;
1279 continue;
1280 }
1281
1282 // Skip assume intrinsics, they don't really have side effects (although
1283 // they're marked as such to ensure preservation of control dependencies),
1284 // and this pass will not bother with its removal. However, we should mark
1285 // its condition as true for all dominated blocks.
1286 if (auto *Assume = dyn_cast<AssumeInst>(&Inst)) {
1287 auto *CondI = dyn_cast<Instruction>(Assume->getArgOperand(0));
1288 if (CondI && SimpleValue::canHandle(CondI)) {
1289 LLVM_DEBUG(dbgs() << "EarlyCSE considering assumption: " << Inst
1290 << '\n');
1291 AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
1292 } else
1293 LLVM_DEBUG(dbgs() << "EarlyCSE skipping assumption: " << Inst << '\n');
1294 continue;
1295 }
1296
1297 // Likewise, noalias intrinsics don't actually write.
1298 if (match(&Inst,
1299 m_Intrinsic<Intrinsic::experimental_noalias_scope_decl>())) {
1300 LLVM_DEBUG(dbgs() << "EarlyCSE skipping noalias intrinsic: " << Inst
1301 << '\n');
1302 continue;
1303 }
1304
1305 // Skip sideeffect intrinsics, for the same reason as assume intrinsics.
1306 if (match(&Inst, m_Intrinsic<Intrinsic::sideeffect>())) {
1307 LLVM_DEBUG(dbgs() << "EarlyCSE skipping sideeffect: " << Inst << '\n');
1308 continue;
1309 }
1310
1311 // Skip pseudoprobe intrinsics, for the same reason as assume intrinsics.
1312 if (match(&Inst, m_Intrinsic<Intrinsic::pseudoprobe>())) {
1313 LLVM_DEBUG(dbgs() << "EarlyCSE skipping pseudoprobe: " << Inst << '\n');
1314 continue;
1315 }
1316
1317 // We can skip all invariant.start intrinsics since they only read memory,
1318 // and we can forward values across it. For invariant starts without
1319 // invariant ends, we can use the fact that the invariantness never ends to
1320 // start a scope in the current generaton which is true for all future
1321 // generations. Also, we dont need to consume the last store since the
1322 // semantics of invariant.start allow us to perform DSE of the last
1323 // store, if there was a store following invariant.start. Consider:
1324 //
1325 // store 30, i8* p
1326 // invariant.start(p)
1327 // store 40, i8* p
1328 // We can DSE the store to 30, since the store 40 to invariant location p
1329 // causes undefined behaviour.
1330 if (match(&Inst, m_Intrinsic<Intrinsic::invariant_start>())) {
1331 // If there are any uses, the scope might end.
1332 if (!Inst.use_empty())
1333 continue;
1334 MemoryLocation MemLoc =
1335 MemoryLocation::getForArgument(&cast<CallInst>(Inst), 1, TLI);
1336 // Don't start a scope if we already have a better one pushed
1337 if (!AvailableInvariants.count(MemLoc))
1338 AvailableInvariants.insert(MemLoc, CurrentGeneration);
1339 continue;
1340 }
1341
1342 if (isGuard(&Inst)) {
1343 if (auto *CondI =
1344 dyn_cast<Instruction>(cast<CallInst>(Inst).getArgOperand(0))) {
1345 if (SimpleValue::canHandle(CondI)) {
1346 // Do we already know the actual value of this condition?
1347 if (auto *KnownCond = AvailableValues.lookup(CondI)) {
1348 // Is the condition known to be true?
1349 if (isa<ConstantInt>(KnownCond) &&
1350 cast<ConstantInt>(KnownCond)->isOne()) {
1352 << "EarlyCSE removing guard: " << Inst << '\n');
1353 salvageKnowledge(&Inst, &AC);
1354 removeMSSA(Inst);
1355 Inst.eraseFromParent();
1356 Changed = true;
1357 continue;
1358 } else
1359 // Use the known value if it wasn't true.
1360 cast<CallInst>(Inst).setArgOperand(0, KnownCond);
1361 }
1362 // The condition we're on guarding here is true for all dominated
1363 // locations.
1364 AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
1365 }
1366 }
1367
1368 // Guard intrinsics read all memory, but don't write any memory.
1369 // Accordingly, don't update the generation but consume the last store (to
1370 // avoid an incorrect DSE).
1371 LastStore = nullptr;
1372 continue;
1373 }
1374
1375 // If the instruction can be simplified (e.g. X+0 = X) then replace it with
1376 // its simpler value.
1377 if (Value *V = simplifyInstruction(&Inst, SQ)) {
1378 LLVM_DEBUG(dbgs() << "EarlyCSE Simplify: " << Inst << " to: " << *V
1379 << '\n');
1380 if (!DebugCounter::shouldExecute(CSECounter)) {
1381 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1382 } else {
1383 bool Killed = false;
1384 if (!Inst.use_empty()) {
1385 Inst.replaceAllUsesWith(V);
1386 Changed = true;
1387 }
1388 if (isInstructionTriviallyDead(&Inst, &TLI)) {
1389 salvageKnowledge(&Inst, &AC);
1390 removeMSSA(Inst);
1391 Inst.eraseFromParent();
1392 Changed = true;
1393 Killed = true;
1394 }
1395 if (Changed)
1396 ++NumSimplify;
1397 if (Killed)
1398 continue;
1399 }
1400 }
1401
1402 // If this is a simple instruction that we can value number, process it.
1403 if (SimpleValue::canHandle(&Inst)) {
1404 if (auto *CI = dyn_cast<ConstrainedFPIntrinsic>(&Inst)) {
1405 assert(CI->getExceptionBehavior() != fp::ebStrict &&
1406 "Unexpected ebStrict from SimpleValue::canHandle()");
1407 assert((!CI->getRoundingMode() ||
1408 CI->getRoundingMode() != RoundingMode::Dynamic) &&
1409 "Unexpected dynamic rounding from SimpleValue::canHandle()");
1410 }
1411 // See if the instruction has an available value. If so, use it.
1412 if (Value *V = AvailableValues.lookup(&Inst)) {
1413 LLVM_DEBUG(dbgs() << "EarlyCSE CSE: " << Inst << " to: " << *V
1414 << '\n');
1415 if (!DebugCounter::shouldExecute(CSECounter)) {
1416 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1417 continue;
1418 }
1419 if (auto *I = dyn_cast<Instruction>(V)) {
1420 // If I being poison triggers UB, there is no need to drop those
1421 // flags. Otherwise, only retain flags present on both I and Inst.
1422 // TODO: Currently some fast-math flags are not treated as
1423 // poison-generating even though they should. Until this is fixed,
1424 // always retain flags present on both I and Inst for floating point
1425 // instructions.
1426 if (isa<FPMathOperator>(I) || (I->hasPoisonGeneratingFlags() && !programUndefinedIfPoison(I)))
1427 I->andIRFlags(&Inst);
1428 }
1429 Inst.replaceAllUsesWith(V);
1430 salvageKnowledge(&Inst, &AC);
1431 removeMSSA(Inst);
1432 Inst.eraseFromParent();
1433 Changed = true;
1434 ++NumCSE;
1435 continue;
1436 }
1437
1438 // Otherwise, just remember that this value is available.
1439 AvailableValues.insert(&Inst, &Inst);
1440 continue;
1441 }
1442
1443 ParseMemoryInst MemInst(&Inst, TTI);
1444 // If this is a non-volatile load, process it.
1445 if (MemInst.isValid() && MemInst.isLoad()) {
1446 // (conservatively) we can't peak past the ordering implied by this
1447 // operation, but we can add this load to our set of available values
1448 if (MemInst.isVolatile() || !MemInst.isUnordered()) {
1449 LastStore = nullptr;
1450 ++CurrentGeneration;
1451 }
1452
1453 if (MemInst.isInvariantLoad()) {
1454 // If we pass an invariant load, we know that memory location is
1455 // indefinitely constant from the moment of first dereferenceability.
1456 // We conservatively treat the invariant_load as that moment. If we
1457 // pass a invariant load after already establishing a scope, don't
1458 // restart it since we want to preserve the earliest point seen.
1459 auto MemLoc = MemoryLocation::get(&Inst);
1460 if (!AvailableInvariants.count(MemLoc))
1461 AvailableInvariants.insert(MemLoc, CurrentGeneration);
1462 }
1463
1464 // If we have an available version of this load, and if it is the right
1465 // generation or the load is known to be from an invariant location,
1466 // replace this instruction.
1467 //
1468 // If either the dominating load or the current load are invariant, then
1469 // we can assume the current load loads the same value as the dominating
1470 // load.
1471 LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
1472 if (Value *Op = getMatchingValue(InVal, MemInst, CurrentGeneration)) {
1473 LLVM_DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << Inst
1474 << " to: " << *InVal.DefInst << '\n');
1475 if (!DebugCounter::shouldExecute(CSECounter)) {
1476 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1477 continue;
1478 }
1479 if (!Inst.use_empty())
1480 Inst.replaceAllUsesWith(Op);
1481 salvageKnowledge(&Inst, &AC);
1482 removeMSSA(Inst);
1483 Inst.eraseFromParent();
1484 Changed = true;
1485 ++NumCSELoad;
1486 continue;
1487 }
1488
1489 // Otherwise, remember that we have this instruction.
1490 AvailableLoads.insert(MemInst.getPointerOperand(),
1491 LoadValue(&Inst, CurrentGeneration,
1492 MemInst.getMatchingId(),
1493 MemInst.isAtomic()));
1494 LastStore = nullptr;
1495 continue;
1496 }
1497
1498 // If this instruction may read from memory or throw (and potentially read
1499 // from memory in the exception handler), forget LastStore. Load/store
1500 // intrinsics will indicate both a read and a write to memory. The target
1501 // may override this (e.g. so that a store intrinsic does not read from
1502 // memory, and thus will be treated the same as a regular store for
1503 // commoning purposes).
1504 if ((Inst.mayReadFromMemory() || Inst.mayThrow()) &&
1505 !(MemInst.isValid() && !MemInst.mayReadFromMemory()))
1506 LastStore = nullptr;
1507
1508 // If this is a read-only call, process it.
1509 if (CallValue::canHandle(&Inst)) {
1510 // If we have an available version of this call, and if it is the right
1511 // generation, replace this instruction.
1512 std::pair<Instruction *, unsigned> InVal = AvailableCalls.lookup(&Inst);
1513 if (InVal.first != nullptr &&
1514 isSameMemGeneration(InVal.second, CurrentGeneration, InVal.first,
1515 &Inst)) {
1516 LLVM_DEBUG(dbgs() << "EarlyCSE CSE CALL: " << Inst
1517 << " to: " << *InVal.first << '\n');
1518 if (!DebugCounter::shouldExecute(CSECounter)) {
1519 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1520 continue;
1521 }
1522 if (!Inst.use_empty())
1523 Inst.replaceAllUsesWith(InVal.first);
1524 salvageKnowledge(&Inst, &AC);
1525 removeMSSA(Inst);
1526 Inst.eraseFromParent();
1527 Changed = true;
1528 ++NumCSECall;
1529 continue;
1530 }
1531
1532 // Otherwise, remember that we have this instruction.
1533 AvailableCalls.insert(&Inst, std::make_pair(&Inst, CurrentGeneration));
1534 continue;
1535 }
1536
1537 // A release fence requires that all stores complete before it, but does
1538 // not prevent the reordering of following loads 'before' the fence. As a
1539 // result, we don't need to consider it as writing to memory and don't need
1540 // to advance the generation. We do need to prevent DSE across the fence,
1541 // but that's handled above.
1542 if (auto *FI = dyn_cast<FenceInst>(&Inst))
1543 if (FI->getOrdering() == AtomicOrdering::Release) {
1544 assert(Inst.mayReadFromMemory() && "relied on to prevent DSE above");
1545 continue;
1546 }
1547
1548 // write back DSE - If we write back the same value we just loaded from
1549 // the same location and haven't passed any intervening writes or ordering
1550 // operations, we can remove the write. The primary benefit is in allowing
1551 // the available load table to remain valid and value forward past where
1552 // the store originally was.
1553 if (MemInst.isValid() && MemInst.isStore()) {
1554 LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
1555 if (InVal.DefInst &&
1556 InVal.DefInst == getMatchingValue(InVal, MemInst, CurrentGeneration)) {
1557 // It is okay to have a LastStore to a different pointer here if MemorySSA
1558 // tells us that the load and store are from the same memory generation.
1559 // In that case, LastStore should keep its present value since we're
1560 // removing the current store.
1561 assert((!LastStore ||
1562 ParseMemoryInst(LastStore, TTI).getPointerOperand() ==
1563 MemInst.getPointerOperand() ||
1564 MSSA) &&
1565 "can't have an intervening store if not using MemorySSA!");
1566 LLVM_DEBUG(dbgs() << "EarlyCSE DSE (writeback): " << Inst << '\n');
1567 if (!DebugCounter::shouldExecute(CSECounter)) {
1568 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1569 continue;
1570 }
1571 salvageKnowledge(&Inst, &AC);
1572 removeMSSA(Inst);
1573 Inst.eraseFromParent();
1574 Changed = true;
1575 ++NumDSE;
1576 // We can avoid incrementing the generation count since we were able
1577 // to eliminate this store.
1578 continue;
1579 }
1580 }
1581
1582 // Okay, this isn't something we can CSE at all. Check to see if it is
1583 // something that could modify memory. If so, our available memory values
1584 // cannot be used so bump the generation count.
1585 if (Inst.mayWriteToMemory()) {
1586 ++CurrentGeneration;
1587
1588 if (MemInst.isValid() && MemInst.isStore()) {
1589 // We do a trivial form of DSE if there are two stores to the same
1590 // location with no intervening loads. Delete the earlier store.
1591 if (LastStore) {
1592 if (overridingStores(ParseMemoryInst(LastStore, TTI), MemInst)) {
1593 LLVM_DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore
1594 << " due to: " << Inst << '\n');
1595 if (!DebugCounter::shouldExecute(CSECounter)) {
1596 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1597 } else {
1598 salvageKnowledge(&Inst, &AC);
1599 removeMSSA(*LastStore);
1600 LastStore->eraseFromParent();
1601 Changed = true;
1602 ++NumDSE;
1603 LastStore = nullptr;
1604 }
1605 }
1606 // fallthrough - we can exploit information about this store
1607 }
1608
1609 // Okay, we just invalidated anything we knew about loaded values. Try
1610 // to salvage *something* by remembering that the stored value is a live
1611 // version of the pointer. It is safe to forward from volatile stores
1612 // to non-volatile loads, so we don't have to check for volatility of
1613 // the store.
1614 AvailableLoads.insert(MemInst.getPointerOperand(),
1615 LoadValue(&Inst, CurrentGeneration,
1616 MemInst.getMatchingId(),
1617 MemInst.isAtomic()));
1618
1619 // Remember that this was the last unordered store we saw for DSE. We
1620 // don't yet handle DSE on ordered or volatile stores since we don't
1621 // have a good way to model the ordering requirement for following
1622 // passes once the store is removed. We could insert a fence, but
1623 // since fences are slightly stronger than stores in their ordering,
1624 // it's not clear this is a profitable transform. Another option would
1625 // be to merge the ordering with that of the post dominating store.
1626 if (MemInst.isUnordered() && !MemInst.isVolatile())
1627 LastStore = &Inst;
1628 else
1629 LastStore = nullptr;
1630 }
1631 }
1632 }
1633
1634 return Changed;
1635}
1636
1637bool EarlyCSE::run() {
1638 // Note, deque is being used here because there is significant performance
1639 // gains over vector when the container becomes very large due to the
1640 // specific access patterns. For more information see the mailing list
1641 // discussion on this:
1642 // http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20120116/135228.html
1643 std::deque<StackNode *> nodesToProcess;
1644
1645 bool Changed = false;
1646
1647 // Process the root node.
1648 nodesToProcess.push_back(new StackNode(
1649 AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls,
1650 CurrentGeneration, DT.getRootNode(),
1651 DT.getRootNode()->begin(), DT.getRootNode()->end()));
1652
1653 assert(!CurrentGeneration && "Create a new EarlyCSE instance to rerun it.");
1654
1655 // Process the stack.
1656 while (!nodesToProcess.empty()) {
1657 // Grab the first item off the stack. Set the current generation, remove
1658 // the node from the stack, and process it.
1659 StackNode *NodeToProcess = nodesToProcess.back();
1660
1661 // Initialize class members.
1662 CurrentGeneration = NodeToProcess->currentGeneration();
1663
1664 // Check if the node needs to be processed.
1665 if (!NodeToProcess->isProcessed()) {
1666 // Process the node.
1667 Changed |= processNode(NodeToProcess->node());
1668 NodeToProcess->childGeneration(CurrentGeneration);
1669 NodeToProcess->process();
1670 } else if (NodeToProcess->childIter() != NodeToProcess->end()) {
1671 // Push the next child onto the stack.
1672 DomTreeNode *child = NodeToProcess->nextChild();
1673 nodesToProcess.push_back(
1674 new StackNode(AvailableValues, AvailableLoads, AvailableInvariants,
1675 AvailableCalls, NodeToProcess->childGeneration(),
1676 child, child->begin(), child->end()));
1677 } else {
1678 // It has been processed, and there are no more children to process,
1679 // so delete it and pop it off the stack.
1680 delete NodeToProcess;
1681 nodesToProcess.pop_back();
1682 }
1683 } // while (!nodes...)
1684
1685 return Changed;
1686}
1687
1690 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1691 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1692 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1693 auto &AC = AM.getResult<AssumptionAnalysis>(F);
1694 auto *MSSA =
1695 UseMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA() : nullptr;
1696
1697 EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA);
1698
1699 if (!CSE.run())
1700 return PreservedAnalyses::all();
1701
1704 if (UseMemorySSA)
1706 return PA;
1707}
1708
1710 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1711 static_cast<PassInfoMixin<EarlyCSEPass> *>(this)->printPipeline(
1712 OS, MapClassName2PassName);
1713 OS << '<';
1714 if (UseMemorySSA)
1715 OS << "memssa";
1716 OS << '>';
1717}
1718
1719namespace {
1720
1721/// A simple and fast domtree-based CSE pass.
1722///
1723/// This pass does a simple depth-first walk over the dominator tree,
1724/// eliminating trivially redundant instructions and using instsimplify to
1725/// canonicalize things as it goes. It is intended to be fast and catch obvious
1726/// cases so that instcombine and other passes are more effective. It is
1727/// expected that a later pass of GVN will catch the interesting/hard cases.
1728template<bool UseMemorySSA>
1729class EarlyCSELegacyCommonPass : public FunctionPass {
1730public:
1731 static char ID;
1732
1733 EarlyCSELegacyCommonPass() : FunctionPass(ID) {
1734 if (UseMemorySSA)
1736 else
1738 }
1739
1740 bool runOnFunction(Function &F) override {
1741 if (skipFunction(F))
1742 return false;
1743
1744 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1745 auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1746 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1747 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1748 auto *MSSA =
1749 UseMemorySSA ? &getAnalysis<MemorySSAWrapperPass>().getMSSA() : nullptr;
1750
1751 EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA);
1752
1753 return CSE.run();
1754 }
1755
1756 void getAnalysisUsage(AnalysisUsage &AU) const override {
1761 if (UseMemorySSA) {
1765 }
1768 AU.setPreservesCFG();
1769 }
1770};
1771
1772} // end anonymous namespace
1773
1774using EarlyCSELegacyPass = EarlyCSELegacyCommonPass</*UseMemorySSA=*/false>;
1775
1776template<>
1777char EarlyCSELegacyPass::ID = 0;
1778
1779INITIALIZE_PASS_BEGIN(EarlyCSELegacyPass, "early-cse", "Early CSE", false,
1780 false)
1786
1787using EarlyCSEMemSSALegacyPass =
1788 EarlyCSELegacyCommonPass</*UseMemorySSA=*/true>;
1789
1790template<>
1791char EarlyCSEMemSSALegacyPass::ID = 0;
1792
1794 if (UseMemorySSA)
1795 return new EarlyCSEMemSSALegacyPass();
1796 else
1797 return new EarlyCSELegacyPass();
1798}
1799
1800INITIALIZE_PASS_BEGIN(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
1801 "Early CSE w/ MemorySSA", false, false)
1808INITIALIZE_PASS_END(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
1809 "Early CSE w/ MemorySSA", false, false)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isLoad(int Opcode)
static bool isStore(int Opcode)
This file defines the BumpPtrAllocator interface.
Atomic ordering constants.
basic Basic Alias true
SmallVector< MachineOperand, 4 > Cond
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Optimize for code generation
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool isSentinel(const DWARFDebugNames::AttributeEncoding &AE)
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
Definition: DebugCounter.h:182
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines DenseMapInfo traits for DenseMap.
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1260
static cl::opt< bool > EarlyCSEDebugHash("earlycse-debug-hash", cl::init(false), cl::Hidden, cl::desc("Perform extra assertion checking to verify that SimpleValue's hash " "function is well-behaved w.r.t. its isEqual predicate"))
EarlyCSELegacyCommonPass< false > EarlyCSELegacyPass
Definition: EarlyCSE.cpp:1774
early cse memssa
Definition: EarlyCSE.cpp:1808
static unsigned getHashValueImpl(SimpleValue Val)
Definition: EarlyCSE.cpp:221
static bool isEqualImpl(SimpleValue LHS, SimpleValue RHS)
Definition: EarlyCSE.cpp:339
static bool matchSelectWithOptionalNotCond(Value *V, Value *&Cond, Value *&A, Value *&B, SelectPatternFlavor &Flavor)
Match a 'select' including an optional 'not's of the condition.
Definition: EarlyCSE.cpp:174
static cl::opt< unsigned > EarlyCSEMssaOptCap("earlycse-mssa-optimization-cap", cl::init(500), cl::Hidden, cl::desc("Enable imprecision in EarlyCSE in pathological cases, in exchange " "for faster compile. Caps the MemorySSA clobbering calls."))
This file provides the interface for a simple, fast CSE pass.
static bool runOnFunction(Function &F, bool PostInlining)
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This is the interface for a simple mod/ref and alias analysis over globals.
static void cse(BasicBlock *BB)
Perform cse of induction variable instructions.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
@ SI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
separate const offset from Split GEPs to a variadic base and a constant offset for better CSE
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:265
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:284
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:35
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:127
Conditional or Unconditional Branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:113
bool onlyReadsMemory(unsigned OpNo) const
Definition: InstrTypes.h:1732
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:428
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:708
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:718
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:747
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:748
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:742
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:741
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:745
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:743
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:746
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:744
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:832
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:808
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:833
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:840
This is an important base class in LLVM.
Definition: Constant.h:41
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:72
typename SmallVector< DomTreeNodeBase *, 4 >::const_iterator const_iterator
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
DomTreeNodeBase< NodeT > * getRootNode()
getRootNode - This returns the entry node for the CFG of the function.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:314
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
This instruction extracts a struct member or array element value from an aggregate value.
This class represents a freeze function that returns random concrete value if an operand is either a ...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
bool isPresplitCoroutine() const
Determine if the function is presplit coroutine.
Definition: Function.h:492
Represents calls to the gc.relocate intrinsic.
Legacy wrapper pass to provide the GlobalsAAResult object.
This instruction inserts a struct field of array element value into an aggregate value.
bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
bool isIdenticalToWhenDefined(const Instruction *I) const LLVM_READONLY
This is like isIdenticalTo, except that it ignores the SubclassOptionalData flags,...
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:74
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
bool mayThrow() const LLVM_READONLY
Return true if this instruction may throw an exception.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:168
bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:82
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:54
An instruction for reading from memory.
Definition: Instructions.h:177
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static std::optional< MemoryLocation > getOrNone(const Instruction *Inst)
static MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:936
MemoryAccess * getClobberingMemoryAccess(const Instruction *I, BatchAAResults &AA)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition: MemorySSA.h:1046
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:986
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:700
bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
Definition: MemorySSA.cpp:2115
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1862
MemorySSAWalker * getWalker()
Definition: MemorySSA.cpp:1557
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:717
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:188
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:173
RecyclingAllocator - This class wraps an Allocator, adding the functionality of recycling deleted obj...
ScopedHashTableScope< K, V, KInfo, AllocatorTy > ScopeTy
ScopeTy - This is a helpful typedef that allows clients to get easy access to the name of the scope f...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
bool empty() const
Definition: SmallVector.h:94
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
An instruction for storing to memory.
Definition: Instructions.h:301
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) const
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:140
value_op_iterator value_op_end()
Definition: User.h:263
Value * getOperand(unsigned i) const
Definition: User.h:169
value_op_iterator value_op_begin()
Definition: User.h:260
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
bool use_empty() const
Definition: Value.h:344
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
An efficient, type-erasing, non-owning reference to a callable.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:772
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:89
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ ebStrict
This corresponds to "fpexcept.strict".
Definition: FPEnv.h:41
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition: Utils.cpp:1367
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:748
void initializeEarlyCSEMemSSALegacyPassPass(PassRegistry &)
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:398
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
Definition: GuardUtils.cpp:18
SelectPatternFlavor
Specific patterns of select instructions we can match.
@ SPF_UMIN
Signed minimum.
@ SPF_UMAX
Signed maximum.
@ SPF_SMIN
@ SPF_SMAX
Unsigned minimum.
@ SPF_UNKNOWN
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
bool programUndefinedIfPoison(const Instruction *Inst)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT, const BasicBlockEdge &Edge)
Replace each use of 'From' with 'To' if that use is dominated by the given edge.
Definition: Local.cpp:2862
BumpPtrAllocatorImpl BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
Definition: Allocator.h:375
void initializeEarlyCSELegacyPassPass(PassRegistry &)
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:89
bool salvageKnowledge(Instruction *I, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Calls BuildAssumeFromInst and if the resulting llvm.assume is valid insert if before I.
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE=nullptr)
See if we can compute a simplified version of this instruction.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition: Hashing.h:613
FunctionPass * createEarlyCSEPass(bool UseMemorySSA=false)
Definition: EarlyCSE.cpp:1793
Type * getLoadStoreType(Value *I)
A helper function that returns the type of a load or store instruction.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition: Hashing.h:491
Definition: BitVector.h:858
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
static unsigned getHashValue(CallValue Val)
static CallValue getTombstoneKey()
Definition: EarlyCSE.cpp:497
static bool isEqual(CallValue LHS, CallValue RHS)
static CallValue getEmptyKey()
Definition: EarlyCSE.cpp:493
static SimpleValue getEmptyKey()
Definition: EarlyCSE.cpp:159
static unsigned getHashValue(SimpleValue Val)
static SimpleValue getTombstoneKey()
Definition: EarlyCSE.cpp:163
static bool isEqual(SimpleValue LHS, SimpleValue RHS)
An information struct used to provide DenseMap with the various necessary components for a given valu...
Definition: DenseMapInfo.h:51
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
Definition: EarlyCSE.cpp:1688
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Definition: EarlyCSE.cpp:1709
Information about a load/store intrinsic defined by the target.
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition: PassManager.h:371