LLVM 23.0.0git
EarlyCSE.cpp
Go to the documentation of this file.
1//===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs a simple dominator tree walk that eliminates trivially
10// redundant instructions.
11//
12//===----------------------------------------------------------------------===//
13
16#include "llvm/ADT/Hashing.h"
17#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/Statistic.h"
30#include "llvm/IR/BasicBlock.h"
31#include "llvm/IR/Constants.h"
32#include "llvm/IR/Dominators.h"
33#include "llvm/IR/Function.h"
34#include "llvm/IR/InstrTypes.h"
35#include "llvm/IR/Instruction.h"
38#include "llvm/IR/LLVMContext.h"
39#include "llvm/IR/PassManager.h"
41#include "llvm/IR/Type.h"
42#include "llvm/IR/Value.h"
44#include "llvm/Pass.h"
48#include "llvm/Support/Debug.h"
55#include <cassert>
56#include <deque>
57#include <memory>
58#include <utility>
59
60using namespace llvm;
61using namespace llvm::PatternMatch;
62
63#define DEBUG_TYPE "early-cse"
64
65STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd");
66STATISTIC(NumCSE, "Number of instructions CSE'd");
67STATISTIC(NumCSECVP, "Number of compare instructions CVP'd");
68STATISTIC(NumCSELoad, "Number of load instructions CSE'd");
69STATISTIC(NumCSECall, "Number of call instructions CSE'd");
70STATISTIC(NumCSEGEP, "Number of GEP instructions CSE'd");
71STATISTIC(NumDSE, "Number of trivial dead stores removed");
72
73DEBUG_COUNTER(CSECounter, "early-cse",
74 "Controls which instructions are removed");
75
77 "earlycse-mssa-optimization-cap", cl::init(500), cl::Hidden,
78 cl::desc("Enable imprecision in EarlyCSE in pathological cases, in exchange "
79 "for faster compile. Caps the MemorySSA clobbering calls."));
80
82 "earlycse-debug-hash", cl::init(false), cl::Hidden,
83 cl::desc("Perform extra assertion checking to verify that SimpleValue's hash "
84 "function is well-behaved w.r.t. its isEqual predicate"));
85
86//===----------------------------------------------------------------------===//
87// SimpleValue
88//===----------------------------------------------------------------------===//
89
90namespace {
91
92/// Struct representing the available values in the scoped hash table.
93struct SimpleValue {
94 Instruction *Inst;
95
96 SimpleValue(Instruction *I) : Inst(I) {
97 assert(canHandle(I) && "Inst can't be handled!");
98 }
99
100 static bool canHandle(Instruction *Inst) {
101 // This can only handle non-void readnone functions.
102 // Also handled are constrained intrinsic that look like the types
103 // of instruction handled below (UnaryOperator, etc.).
104 if (CallInst *CI = dyn_cast<CallInst>(Inst)) {
105 if (Function *F = CI->getCalledFunction()) {
106 switch (F->getIntrinsicID()) {
107 case Intrinsic::experimental_constrained_fadd:
108 case Intrinsic::experimental_constrained_fsub:
109 case Intrinsic::experimental_constrained_fmul:
110 case Intrinsic::experimental_constrained_fdiv:
111 case Intrinsic::experimental_constrained_frem:
112 case Intrinsic::experimental_constrained_fptosi:
113 case Intrinsic::experimental_constrained_sitofp:
114 case Intrinsic::experimental_constrained_fptoui:
115 case Intrinsic::experimental_constrained_uitofp:
116 case Intrinsic::experimental_constrained_fcmp:
117 case Intrinsic::experimental_constrained_fcmps: {
118 auto *CFP = cast<ConstrainedFPIntrinsic>(CI);
119 if (CFP->getExceptionBehavior() &&
120 CFP->getExceptionBehavior() == fp::ebStrict)
121 return false;
122 // Since we CSE across function calls we must not allow
123 // the rounding mode to change.
124 if (CFP->getRoundingMode() &&
125 CFP->getRoundingMode() == RoundingMode::Dynamic)
126 return false;
127 return true;
128 }
129 }
130 }
131 return CI->doesNotAccessMemory() &&
132 // FIXME: Currently the calls which may access the thread id may
133 // be considered as not accessing the memory. But this is
134 // problematic for coroutines, since coroutines may resume in a
135 // different thread. So we disable the optimization here for the
136 // correctness. However, it may block many other correct
137 // optimizations. Revert this one when we detect the memory
138 // accessing kind more precisely.
139 !CI->getFunction()->isPresplitCoroutine();
140 }
141 return isa<CastInst>(Inst) || isa<UnaryOperator>(Inst) ||
142 isa<BinaryOperator>(Inst) || isa<CmpInst>(Inst) ||
146 isa<FreezeInst>(Inst);
147 }
148};
149
150} // end anonymous namespace
151
152template <> struct llvm::DenseMapInfo<SimpleValue> {
153 static unsigned getHashValue(SimpleValue Val);
154 static bool isEqual(SimpleValue LHS, SimpleValue RHS);
155};
156
157/// Match a 'select' including an optional 'not's of the condition.
159 Value *&B,
160 SelectPatternFlavor &Flavor) {
161 // Return false if V is not even a select.
162 if (!match(V, m_Select(m_Value(Cond), m_Value(A), m_Value(B))))
163 return false;
164
165 // Look through a 'not' of the condition operand by swapping A/B.
166 Value *CondNot;
167 if (match(Cond, m_Not(m_Value(CondNot)))) {
168 Cond = CondNot;
169 std::swap(A, B);
170 }
171
172 // Match canonical forms of min/max. We are not using ValueTracking's
173 // more powerful matchSelectPattern() because it may rely on instruction flags
174 // such as "nsw". That would be incompatible with the current hashing
175 // mechanism that may remove flags to increase the likelihood of CSE.
176
177 Flavor = SPF_UNKNOWN;
178 CmpPredicate Pred;
179
180 if (!match(Cond, m_ICmp(Pred, m_Specific(A), m_Specific(B)))) {
181 // Check for commuted variants of min/max by swapping predicate.
182 // If we do not match the standard or commuted patterns, this is not a
183 // recognized form of min/max, but it is still a select, so return true.
184 if (!match(Cond, m_ICmp(Pred, m_Specific(B), m_Specific(A))))
185 return true;
187 }
188
189 switch (Pred) {
190 case CmpInst::ICMP_UGT: Flavor = SPF_UMAX; break;
191 case CmpInst::ICMP_ULT: Flavor = SPF_UMIN; break;
192 case CmpInst::ICMP_SGT: Flavor = SPF_SMAX; break;
193 case CmpInst::ICMP_SLT: Flavor = SPF_SMIN; break;
194 // Non-strict inequalities.
195 case CmpInst::ICMP_ULE: Flavor = SPF_UMIN; break;
196 case CmpInst::ICMP_UGE: Flavor = SPF_UMAX; break;
197 case CmpInst::ICMP_SLE: Flavor = SPF_SMIN; break;
198 case CmpInst::ICMP_SGE: Flavor = SPF_SMAX; break;
199 default: break;
200 }
201
202 return true;
203}
204
205static unsigned hashCallInst(CallInst *CI) {
206 // Don't CSE convergent calls in different basic blocks, because they
207 // implicitly depend on the set of threads that is currently executing.
208 if (CI->isConvergent()) {
209 return hash_combine(CI->getOpcode(), CI->getParent(),
211 }
212 return hash_combine(CI->getOpcode(),
214}
215
216static unsigned getHashValueImpl(SimpleValue Val) {
217 Instruction *Inst = Val.Inst;
218 // Hash in all of the operands as pointers.
219 if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst)) {
220 Value *LHS = BinOp->getOperand(0);
221 Value *RHS = BinOp->getOperand(1);
222 if (BinOp->isCommutative() && BinOp->getOperand(0) > BinOp->getOperand(1))
223 std::swap(LHS, RHS);
224
225 return hash_combine(BinOp->getOpcode(), LHS, RHS);
226 }
227
228 if (CmpInst *CI = dyn_cast<CmpInst>(Inst)) {
229 // Compares can be commuted by swapping the comparands and
230 // updating the predicate. Choose the form that has the
231 // comparands in sorted order, or in the case of a tie, the
232 // one with the lower predicate.
233 Value *LHS = CI->getOperand(0);
234 Value *RHS = CI->getOperand(1);
235 CmpInst::Predicate Pred = CI->getPredicate();
236 CmpInst::Predicate SwappedPred = CI->getSwappedPredicate();
237 if (std::tie(LHS, Pred) > std::tie(RHS, SwappedPred)) {
238 std::swap(LHS, RHS);
239 Pred = SwappedPred;
240 }
241 return hash_combine(Inst->getOpcode(), Pred, LHS, RHS);
242 }
243
244 // Hash general selects to allow matching commuted true/false operands.
246 Value *Cond, *A, *B;
247 if (matchSelectWithOptionalNotCond(Inst, Cond, A, B, SPF)) {
248 // Hash min/max (cmp + select) to allow for commuted operands.
249 // Min/max may also have non-canonical compare predicate (eg, the compare for
250 // smin may use 'sgt' rather than 'slt'), and non-canonical operands in the
251 // compare.
252 // TODO: We should also detect FP min/max.
253 if (SPF == SPF_SMIN || SPF == SPF_SMAX ||
254 SPF == SPF_UMIN || SPF == SPF_UMAX) {
255 if (A > B)
256 std::swap(A, B);
257 return hash_combine(Inst->getOpcode(), SPF, A, B);
258 }
259
260 // Hash general selects to allow matching commuted true/false operands.
261
262 // If we do not have a compare as the condition, just hash in the condition.
263 CmpPredicate Pred;
264 Value *X, *Y;
265 if (!match(Cond, m_Cmp(Pred, m_Value(X), m_Value(Y))))
266 return hash_combine(Inst->getOpcode(), Cond, A, B);
267
268 // Similar to cmp normalization (above) - canonicalize the predicate value:
269 // select (icmp Pred, X, Y), A, B --> select (icmp InvPred, X, Y), B, A
270 if (CmpInst::getInversePredicate(Pred) < Pred) {
271 Pred = CmpInst::getInversePredicate(Pred);
272 std::swap(A, B);
273 }
274 return hash_combine(Inst->getOpcode(),
275 static_cast<CmpInst::Predicate>(Pred), X, Y, A, B);
276 }
277
278 if (CastInst *CI = dyn_cast<CastInst>(Inst))
279 return hash_combine(CI->getOpcode(), CI->getType(), CI->getOperand(0));
280
281 if (FreezeInst *FI = dyn_cast<FreezeInst>(Inst))
282 return hash_combine(FI->getOpcode(), FI->getOperand(0));
283
284 if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Inst))
285 return hash_combine(EVI->getOpcode(), EVI->getOperand(0),
286 hash_combine_range(EVI->indices()));
287
288 if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(Inst))
289 return hash_combine(IVI->getOpcode(), IVI->getOperand(0),
290 IVI->getOperand(1), hash_combine_range(IVI->indices()));
291
294 isa<UnaryOperator>(Inst) || isa<FreezeInst>(Inst)) &&
295 "Invalid/unknown instruction");
296
297 // Handle intrinsics with commutative operands.
298 auto *II = dyn_cast<IntrinsicInst>(Inst);
299 if (II && II->isCommutative() && II->arg_size() >= 2) {
300 Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
301 if (LHS > RHS)
302 std::swap(LHS, RHS);
303 return hash_combine(
304 II->getOpcode(), LHS, RHS,
305 hash_combine_range(drop_begin(II->operand_values(), 2)));
306 }
307
308 // gc.relocate is 'special' call: its second and third operands are
309 // not real values, but indices into statepoint's argument list.
310 // Get values they point to.
311 if (const GCRelocateInst *GCR = dyn_cast<GCRelocateInst>(Inst))
312 return hash_combine(GCR->getOpcode(), GCR->getOperand(0),
313 GCR->getBasePtr(), GCR->getDerivedPtr());
314
315 // Don't CSE convergent calls in different basic blocks, because they
316 // implicitly depend on the set of threads that is currently executing.
317 if (CallInst *CI = dyn_cast<CallInst>(Inst))
318 return hashCallInst(CI);
319
320 // Mix in the opcode.
321 return hash_combine(Inst->getOpcode(),
323}
324
325unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
326#ifndef NDEBUG
327 // If -earlycse-debug-hash was specified, return a constant -- this
328 // will force all hashing to collide, so we'll exhaustively search
329 // the table for a match, and the assertion in isEqual will fire if
330 // there's a bug causing equal keys to hash differently.
332 return 0;
333#endif
334 return getHashValueImpl(Val);
335}
336
337static bool isEqualImpl(SimpleValue LHS, SimpleValue RHS) {
338 Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
339
340 if (LHSI->getOpcode() != RHSI->getOpcode())
341 return false;
342 if (LHSI->isIdenticalToWhenDefined(RHSI, /*IntersectAttrs=*/true)) {
343 // Convergent calls implicitly depend on the set of threads that is
344 // currently executing, so conservatively return false if they are in
345 // different basic blocks.
346 if (CallInst *CI = dyn_cast<CallInst>(LHSI);
347 CI && CI->isConvergent() && LHSI->getParent() != RHSI->getParent())
348 return false;
349
350 return true;
351 }
352
353 // If we're not strictly identical, we still might be a commutable instruction
354 if (BinaryOperator *LHSBinOp = dyn_cast<BinaryOperator>(LHSI)) {
355 if (!LHSBinOp->isCommutative())
356 return false;
357
359 "same opcode, but different instruction type?");
360 BinaryOperator *RHSBinOp = cast<BinaryOperator>(RHSI);
361
362 // Commuted equality
363 return LHSBinOp->getOperand(0) == RHSBinOp->getOperand(1) &&
364 LHSBinOp->getOperand(1) == RHSBinOp->getOperand(0);
365 }
366 if (CmpInst *LHSCmp = dyn_cast<CmpInst>(LHSI)) {
367 assert(isa<CmpInst>(RHSI) &&
368 "same opcode, but different instruction type?");
369 CmpInst *RHSCmp = cast<CmpInst>(RHSI);
370 // Commuted equality
371 return LHSCmp->getOperand(0) == RHSCmp->getOperand(1) &&
372 LHSCmp->getOperand(1) == RHSCmp->getOperand(0) &&
373 LHSCmp->getSwappedPredicate() == RHSCmp->getPredicate();
374 }
375
376 auto *LII = dyn_cast<IntrinsicInst>(LHSI);
377 auto *RII = dyn_cast<IntrinsicInst>(RHSI);
378 if (LII && RII && LII->getIntrinsicID() == RII->getIntrinsicID() &&
379 LII->isCommutative() && LII->arg_size() >= 2) {
380 return LII->getArgOperand(0) == RII->getArgOperand(1) &&
381 LII->getArgOperand(1) == RII->getArgOperand(0) &&
382 std::equal(LII->arg_begin() + 2, LII->arg_end(),
383 RII->arg_begin() + 2, RII->arg_end()) &&
384 LII->hasSameSpecialState(RII, /*IgnoreAlignment=*/false,
385 /*IntersectAttrs=*/true);
386 }
387
388 // See comment above in `getHashValue()`.
389 if (const GCRelocateInst *GCR1 = dyn_cast<GCRelocateInst>(LHSI))
390 if (const GCRelocateInst *GCR2 = dyn_cast<GCRelocateInst>(RHSI))
391 return GCR1->getOperand(0) == GCR2->getOperand(0) &&
392 GCR1->getBasePtr() == GCR2->getBasePtr() &&
393 GCR1->getDerivedPtr() == GCR2->getDerivedPtr();
394
395 // Min/max can occur with commuted operands, non-canonical predicates,
396 // and/or non-canonical operands.
397 // Selects can be non-trivially equivalent via inverted conditions and swaps.
398 SelectPatternFlavor LSPF, RSPF;
399 Value *CondL, *CondR, *LHSA, *RHSA, *LHSB, *RHSB;
400 if (matchSelectWithOptionalNotCond(LHSI, CondL, LHSA, LHSB, LSPF) &&
401 matchSelectWithOptionalNotCond(RHSI, CondR, RHSA, RHSB, RSPF)) {
402 if (LSPF == RSPF) {
403 // TODO: We should also detect FP min/max.
404 if (LSPF == SPF_SMIN || LSPF == SPF_SMAX ||
405 LSPF == SPF_UMIN || LSPF == SPF_UMAX)
406 return ((LHSA == RHSA && LHSB == RHSB) ||
407 (LHSA == RHSB && LHSB == RHSA));
408
409 // select Cond, A, B <--> select not(Cond), B, A
410 if (CondL == CondR && LHSA == RHSA && LHSB == RHSB)
411 return true;
412 }
413
414 // If the true/false operands are swapped and the conditions are compares
415 // with inverted predicates, the selects are equal:
416 // select (icmp Pred, X, Y), A, B <--> select (icmp InvPred, X, Y), B, A
417 //
418 // This also handles patterns with a double-negation in the sense of not +
419 // inverse, because we looked through a 'not' in the matching function and
420 // swapped A/B:
421 // select (cmp Pred, X, Y), A, B <--> select (not (cmp InvPred, X, Y)), B, A
422 //
423 // This intentionally does NOT handle patterns with a double-negation in
424 // the sense of not + not, because doing so could result in values
425 // comparing
426 // as equal that hash differently in the min/max cases like:
427 // select (cmp slt, X, Y), X, Y <--> select (not (not (cmp slt, X, Y))), X, Y
428 // ^ hashes as min ^ would not hash as min
429 // In the context of the EarlyCSE pass, however, such cases never reach
430 // this code, as we simplify the double-negation before hashing the second
431 // select (and so still succeed at CSEing them).
432 if (LHSA == RHSB && LHSB == RHSA) {
433 CmpPredicate PredL, PredR;
434 Value *X, *Y;
435 if (match(CondL, m_Cmp(PredL, m_Value(X), m_Value(Y))) &&
436 match(CondR, m_Cmp(PredR, m_Specific(X), m_Specific(Y))) &&
437 CmpInst::getInversePredicate(PredL) == PredR)
438 return true;
439 }
440 }
441
442 return false;
443}
444
445bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
446 // These comparisons are nontrivial, so assert that equality implies
447 // hash equality (DenseMap demands this as an invariant).
448 bool Result = isEqualImpl(LHS, RHS);
450 return Result;
451}
452
453//===----------------------------------------------------------------------===//
454// CallValue
455//===----------------------------------------------------------------------===//
456
457namespace {
458
459/// Struct representing the available call values in the scoped hash
460/// table.
461struct CallValue {
462 Instruction *Inst;
463
464 CallValue(Instruction *I) : Inst(I) {
465 assert(canHandle(I) && "Inst can't be handled!");
466 }
467
468 static bool canHandle(Instruction *Inst) {
469 CallInst *CI = dyn_cast<CallInst>(Inst);
470 if (!CI || (!CI->onlyReadsMemory() && !CI->onlyWritesMemory()) ||
471 // FIXME: Currently the calls which may access the thread id may
472 // be considered as not accessing the memory. But this is
473 // problematic for coroutines, since coroutines may resume in a
474 // different thread. So we disable the optimization here for the
475 // correctness. However, it may block many other correct
476 // optimizations. Revert this one when we detect the memory
477 // accessing kind more precisely.
479 return false;
480 return true;
481 }
482};
483
484} // end anonymous namespace
485
486template <> struct llvm::DenseMapInfo<CallValue> {
487 static unsigned getHashValue(CallValue Val);
488 static bool isEqual(CallValue LHS, CallValue RHS);
489};
490
491unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
492 Instruction *Inst = Val.Inst;
493
494 // Hash all of the operands as pointers and mix in the opcode.
495 return hashCallInst(cast<CallInst>(Inst));
496}
497
498bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
499 CallInst *LHSI = cast<CallInst>(LHS.Inst);
500 CallInst *RHSI = cast<CallInst>(RHS.Inst);
501
502 // Convergent calls implicitly depend on the set of threads that is
503 // currently executing, so conservatively return false if they are in
504 // different basic blocks.
505 if (LHSI->isConvergent() && LHSI->getParent() != RHSI->getParent())
506 return false;
507
508 return LHSI->isIdenticalToWhenDefined(RHSI, /*IntersectAttrs=*/true);
509}
510
511//===----------------------------------------------------------------------===//
512// GEPValue
513//===----------------------------------------------------------------------===//
514
515namespace {
516
517struct GEPValue {
518 Instruction *Inst;
519 std::optional<int64_t> ConstantOffset;
520
521 GEPValue(Instruction *I) : Inst(I) {
522 assert(canHandle(I) && "Inst can't be handled!");
523 }
524
525 GEPValue(Instruction *I, std::optional<int64_t> ConstantOffset)
526 : Inst(I), ConstantOffset(ConstantOffset) {
527 assert(canHandle(I) && "Inst can't be handled!");
528 }
529
530 static bool canHandle(Instruction *Inst) {
531 return isa<GetElementPtrInst>(Inst);
532 }
533};
534
535} // namespace
536
537template <> struct llvm::DenseMapInfo<GEPValue> {
538 static unsigned getHashValue(const GEPValue &Val);
539 static bool isEqual(const GEPValue &LHS, const GEPValue &RHS);
540};
541
542unsigned DenseMapInfo<GEPValue>::getHashValue(const GEPValue &Val) {
543 auto *GEP = cast<GetElementPtrInst>(Val.Inst);
544 if (Val.ConstantOffset.has_value())
545 return hash_combine(GEP->getOpcode(), GEP->getPointerOperand(),
546 Val.ConstantOffset.value());
547 return hash_combine(GEP->getOpcode(),
548 hash_combine_range(GEP->operand_values()));
549}
550
551bool DenseMapInfo<GEPValue>::isEqual(const GEPValue &LHS, const GEPValue &RHS) {
552 auto *LGEP = cast<GetElementPtrInst>(LHS.Inst);
553 auto *RGEP = cast<GetElementPtrInst>(RHS.Inst);
554 if (LGEP->getPointerOperand() != RGEP->getPointerOperand())
555 return false;
556 if (LHS.ConstantOffset.has_value() && RHS.ConstantOffset.has_value())
557 return LHS.ConstantOffset.value() == RHS.ConstantOffset.value();
558 return LGEP->isIdenticalToWhenDefined(RGEP);
559}
560
561//===----------------------------------------------------------------------===//
562// EarlyCSE implementation
563//===----------------------------------------------------------------------===//
564
565namespace {
566
567/// A simple and fast domtree-based CSE pass.
568///
569/// This pass does a simple depth-first walk over the dominator tree,
570/// eliminating trivially redundant instructions and using instsimplify to
571/// canonicalize things as it goes. It is intended to be fast and catch obvious
572/// cases so that instcombine and other passes are more effective. It is
573/// expected that a later pass of GVN will catch the interesting/hard cases.
574class EarlyCSE {
575public:
576 const TargetLibraryInfo &TLI;
577 const TargetTransformInfo &TTI;
578 DominatorTree &DT;
579 AssumptionCache &AC;
580 const SimplifyQuery SQ;
581 MemorySSA *MSSA;
582 std::unique_ptr<MemorySSAUpdater> MSSAUpdater;
583
584 using AllocatorTy =
585 RecyclingAllocator<BumpPtrAllocator,
586 ScopedHashTableVal<SimpleValue, Value *>>;
587 using ScopedHTType =
588 ScopedHashTable<SimpleValue, Value *, DenseMapInfo<SimpleValue>,
589 AllocatorTy>;
590
591 /// A scoped hash table of the current values of all of our simple
592 /// scalar expressions.
593 ///
594 /// As we walk down the domtree, we look to see if instructions are in this:
595 /// if so, we replace them with what we find, otherwise we insert them so
596 /// that dominated values can succeed in their lookup.
597 ScopedHTType AvailableValues;
598
599 /// A scoped hash table of the current values of previously encountered
600 /// memory locations.
601 ///
602 /// This allows us to get efficient access to dominating loads or stores when
603 /// we have a fully redundant load. In addition to the most recent load, we
604 /// keep track of a generation count of the read, which is compared against
605 /// the current generation count. The current generation count is incremented
606 /// after every possibly writing memory operation, which ensures that we only
607 /// CSE loads with other loads that have no intervening store. Ordering
608 /// events (such as fences or atomic instructions) increment the generation
609 /// count as well; essentially, we model these as writes to all possible
610 /// locations. Note that atomic and/or volatile loads and stores can be
611 /// present the table; it is the responsibility of the consumer to inspect
612 /// the atomicity/volatility if needed.
613 struct LoadValue {
614 Instruction *DefInst = nullptr;
615 unsigned Generation = 0;
616 int MatchingId = -1;
617 bool IsAtomic = false;
618 bool IsLoad = false;
619
620 LoadValue() = default;
621 LoadValue(Instruction *Inst, unsigned Generation, unsigned MatchingId,
622 bool IsAtomic, bool IsLoad)
623 : DefInst(Inst), Generation(Generation), MatchingId(MatchingId),
624 IsAtomic(IsAtomic), IsLoad(IsLoad) {}
625 };
626
627 using LoadMapAllocator =
628 RecyclingAllocator<BumpPtrAllocator,
629 ScopedHashTableVal<Value *, LoadValue>>;
630 using LoadHTType =
631 ScopedHashTable<Value *, LoadValue, DenseMapInfo<Value *>,
632 LoadMapAllocator>;
633
634 LoadHTType AvailableLoads;
635
636 // A scoped hash table mapping memory locations (represented as typed
637 // addresses) to generation numbers at which that memory location became
638 // (henceforth indefinitely) invariant.
639 using InvariantMapAllocator =
640 RecyclingAllocator<BumpPtrAllocator,
641 ScopedHashTableVal<MemoryLocation, unsigned>>;
642 using InvariantHTType =
643 ScopedHashTable<MemoryLocation, unsigned, DenseMapInfo<MemoryLocation>,
644 InvariantMapAllocator>;
645 InvariantHTType AvailableInvariants;
646
647 /// A scoped hash table of the current values of read-only call
648 /// values.
649 ///
650 /// It uses the same generation count as loads.
651 using CallHTType =
652 ScopedHashTable<CallValue, std::pair<Instruction *, unsigned>>;
653 CallHTType AvailableCalls;
654
655 using GEPMapAllocatorTy =
656 RecyclingAllocator<BumpPtrAllocator,
657 ScopedHashTableVal<GEPValue, Value *>>;
658 using GEPHTType = ScopedHashTable<GEPValue, Value *, DenseMapInfo<GEPValue>,
659 GEPMapAllocatorTy>;
660 GEPHTType AvailableGEPs;
661
662 /// This is the current generation of the memory value.
663 unsigned CurrentGeneration = 0;
664
665 /// Set up the EarlyCSE runner for a particular function.
666 EarlyCSE(const DataLayout &DL, const TargetLibraryInfo &TLI,
667 const TargetTransformInfo &TTI, DominatorTree &DT,
668 AssumptionCache &AC, MemorySSA *MSSA)
669 : TLI(TLI), TTI(TTI), DT(DT), AC(AC), SQ(DL, &TLI, &DT, &AC), MSSA(MSSA),
670 MSSAUpdater(std::make_unique<MemorySSAUpdater>(MSSA)) {}
671
672 bool run();
673
674private:
675 unsigned ClobberCounter = 0;
676 // Almost a POD, but needs to call the constructors for the scoped hash
677 // tables so that a new scope gets pushed on. These are RAII so that the
678 // scope gets popped when the NodeScope is destroyed.
679 class NodeScope {
680 public:
681 NodeScope(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
682 InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls,
683 GEPHTType &AvailableGEPs)
684 : Scope(AvailableValues), LoadScope(AvailableLoads),
685 InvariantScope(AvailableInvariants), CallScope(AvailableCalls),
686 GEPScope(AvailableGEPs) {}
687 NodeScope(const NodeScope &) = delete;
688 NodeScope &operator=(const NodeScope &) = delete;
689
690 private:
692 LoadHTType::ScopeTy LoadScope;
693 InvariantHTType::ScopeTy InvariantScope;
694 CallHTType::ScopeTy CallScope;
695 GEPHTType::ScopeTy GEPScope;
696 };
697
698 // Contains all the needed information to create a stack for doing a depth
699 // first traversal of the tree. This includes scopes for values, loads, and
700 // calls as well as the generation. There is a child iterator so that the
701 // children do not need to be store separately.
702 class StackNode {
703 public:
704 StackNode(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
705 InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls,
706 GEPHTType &AvailableGEPs, unsigned cg, DomTreeNode *n,
707 DomTreeNode::const_iterator child,
708 DomTreeNode::const_iterator end)
709 : CurrentGeneration(cg), ChildGeneration(cg), Node(n), ChildIter(child),
710 EndIter(end),
711 Scopes(AvailableValues, AvailableLoads, AvailableInvariants,
712 AvailableCalls, AvailableGEPs) {}
713 StackNode(const StackNode &) = delete;
714 StackNode &operator=(const StackNode &) = delete;
715
716 // Accessors.
717 unsigned currentGeneration() const { return CurrentGeneration; }
718 unsigned childGeneration() const { return ChildGeneration; }
719 void childGeneration(unsigned generation) { ChildGeneration = generation; }
720 DomTreeNode *node() { return Node; }
721 DomTreeNode::const_iterator childIter() const { return ChildIter; }
722
723 DomTreeNode *nextChild() {
724 DomTreeNode *child = *ChildIter;
725 ++ChildIter;
726 return child;
727 }
728
729 DomTreeNode::const_iterator end() const { return EndIter; }
730 bool isProcessed() const { return Processed; }
731 void process() { Processed = true; }
732
733 private:
734 unsigned CurrentGeneration;
735 unsigned ChildGeneration;
736 DomTreeNode *Node;
737 DomTreeNode::const_iterator ChildIter;
738 DomTreeNode::const_iterator EndIter;
739 NodeScope Scopes;
740 bool Processed = false;
741 };
742
743 /// Wrapper class to handle memory instructions, including loads,
744 /// stores and intrinsic loads and stores defined by the target.
745 class ParseMemoryInst {
746 public:
747 ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI)
748 : Inst(Inst) {
749 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
750 IntrID = II->getIntrinsicID();
751 if (TTI.getTgtMemIntrinsic(II, Info))
752 return;
753 if (isHandledNonTargetIntrinsic(IntrID)) {
754 switch (IntrID) {
755 case Intrinsic::masked_load:
756 Info.PtrVal = Inst->getOperand(0);
757 Info.MatchingId = Intrinsic::masked_load;
758 Info.ReadMem = true;
759 Info.WriteMem = false;
760 Info.IsVolatile = false;
761 break;
762 case Intrinsic::masked_store:
763 Info.PtrVal = Inst->getOperand(1);
764 // Use the ID of masked load as the "matching id". This will
765 // prevent matching non-masked loads/stores with masked ones
766 // (which could be done), but at the moment, the code here
767 // does not support matching intrinsics with non-intrinsics,
768 // so keep the MatchingIds specific to masked instructions
769 // for now (TODO).
770 Info.MatchingId = Intrinsic::masked_load;
771 Info.ReadMem = false;
772 Info.WriteMem = true;
773 Info.IsVolatile = false;
774 break;
775 }
776 } else if (auto *MI = dyn_cast<MemSetInst>(Inst)) {
777 Info.PtrVal = MI->getDest();
778 Info.MatchingId = 0;
779 Info.ReadMem = false;
780 Info.WriteMem = true;
781 Info.IsVolatile = MI->isVolatile();
782 }
783 }
784 }
785
786 Instruction *get() { return Inst; }
787 const Instruction *get() const { return Inst; }
788
789 bool isLoad() const {
790 if (IntrID != 0)
791 return Info.ReadMem;
792 return isa<LoadInst>(Inst);
793 }
794
795 bool isStore() const {
796 if (IntrID != 0)
797 return Info.WriteMem;
798 return isa<StoreInst>(Inst);
799 }
800
801 bool isAtomic() const {
802 if (IntrID != 0)
803 return Info.Ordering != AtomicOrdering::NotAtomic;
804 return Inst->isAtomic();
805 }
806
807 bool isUnordered() const {
808 if (IntrID != 0)
809 return Info.isUnordered();
810
811 if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
812 return LI->isUnordered();
813 } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
814 return SI->isUnordered();
815 }
816 // Conservative answer
817 return !Inst->isAtomic();
818 }
819
820 bool isVolatile() const {
821 if (IntrID != 0)
822 return Info.IsVolatile;
823
824 if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
825 return LI->isVolatile();
826 } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
827 return SI->isVolatile();
828 }
829 // Conservative answer
830 return true;
831 }
832
833 bool isInvariantLoad() const {
834 if (auto *LI = dyn_cast<LoadInst>(Inst))
835 return LI->hasMetadata(LLVMContext::MD_invariant_load);
836 return false;
837 }
838
839 bool isValid() const { return getPointerOperand() != nullptr; }
840
841 // For regular (non-intrinsic) loads/stores, this is set to -1. For
842 // intrinsic loads/stores, the id is retrieved from the corresponding
843 // field in the MemIntrinsicInfo structure. That field contains
844 // non-negative values only.
845 int getMatchingId() const {
846 if (IntrID != 0)
847 return Info.MatchingId;
848 return -1;
849 }
850
851 Value *getPointerOperand() const {
852 if (IntrID != 0)
853 return Info.PtrVal;
854 return getLoadStorePointerOperand(Inst);
855 }
856
857 Type *getValueType() const {
858 // TODO: handle target-specific intrinsics.
859 return Inst->getAccessType();
860 }
861
862 bool mayReadFromMemory() const {
863 if (IntrID != 0)
864 return Info.ReadMem;
865 return Inst->mayReadFromMemory();
866 }
867
868 bool mayWriteToMemory() const {
869 if (IntrID != 0)
870 return Info.WriteMem;
871 return Inst->mayWriteToMemory();
872 }
873
874 private:
875 Intrinsic::ID IntrID = 0;
876 MemIntrinsicInfo Info;
877 Instruction *Inst;
878 };
879
880 // This function is to prevent accidentally passing a non-target
881 // intrinsic ID to TargetTransformInfo.
882 static bool isHandledNonTargetIntrinsic(Intrinsic::ID ID) {
883 switch (ID) {
884 case Intrinsic::masked_load:
885 case Intrinsic::masked_store:
886 return true;
887 }
888 return false;
889 }
890 static bool isHandledNonTargetIntrinsic(const Value *V) {
891 if (auto *II = dyn_cast<IntrinsicInst>(V))
892 return isHandledNonTargetIntrinsic(II->getIntrinsicID());
893 return false;
894 }
895
896 bool processNode(DomTreeNode *Node);
897
898 bool handleBranchCondition(Instruction *CondInst, const CondBrInst *BI,
899 const BasicBlock *BB, const BasicBlock *Pred);
900
901 Value *getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst,
902 unsigned CurrentGeneration);
903
904 bool overridingStores(const ParseMemoryInst &Earlier,
905 const ParseMemoryInst &Later);
906
907 Value *getOrCreateResult(Instruction *Inst, Type *ExpectedType,
908 bool CanCreate) const {
909 // TODO: We could insert relevant casts on type mismatch.
910 // The load or the store's first operand.
911 Value *V;
912 if (auto *II = dyn_cast<IntrinsicInst>(Inst)) {
913 switch (II->getIntrinsicID()) {
914 case Intrinsic::masked_load:
915 V = II;
916 break;
917 case Intrinsic::masked_store:
918 V = II->getOperand(0);
919 break;
920 default:
921 return TTI.getOrCreateResultFromMemIntrinsic(II, ExpectedType,
922 CanCreate);
923 }
924 } else {
925 V = isa<LoadInst>(Inst) ? Inst : cast<StoreInst>(Inst)->getValueOperand();
926 }
927
928 return V->getType() == ExpectedType ? V : nullptr;
929 }
930
931 /// Return true if the instruction is known to only operate on memory
932 /// provably invariant in the given "generation".
933 bool isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt);
934
935 bool isSameMemGeneration(unsigned EarlierGeneration, unsigned LaterGeneration,
936 Instruction *EarlierInst, Instruction *LaterInst);
937
938 bool isNonTargetIntrinsicMatch(const IntrinsicInst *Earlier,
939 const IntrinsicInst *Later) {
940 auto IsSubmask = [](const Value *Mask0, const Value *Mask1) {
941 // Is Mask0 a submask of Mask1?
942 if (Mask0 == Mask1)
943 return true;
944 if (isa<UndefValue>(Mask0) || isa<UndefValue>(Mask1))
945 return false;
946 auto *Vec0 = dyn_cast<ConstantVector>(Mask0);
947 auto *Vec1 = dyn_cast<ConstantVector>(Mask1);
948 if (!Vec0 || !Vec1)
949 return false;
950 if (Vec0->getType() != Vec1->getType())
951 return false;
952 for (int i = 0, e = Vec0->getNumOperands(); i != e; ++i) {
953 Constant *Elem0 = Vec0->getOperand(i);
954 Constant *Elem1 = Vec1->getOperand(i);
955 auto *Int0 = dyn_cast<ConstantInt>(Elem0);
956 if (Int0 && Int0->isZero())
957 continue;
958 auto *Int1 = dyn_cast<ConstantInt>(Elem1);
959 if (Int1 && !Int1->isZero())
960 continue;
961 if (isa<UndefValue>(Elem0) || isa<UndefValue>(Elem1))
962 return false;
963 if (Elem0 == Elem1)
964 continue;
965 return false;
966 }
967 return true;
968 };
969 auto PtrOp = [](const IntrinsicInst *II) {
970 if (II->getIntrinsicID() == Intrinsic::masked_load)
971 return II->getOperand(0);
972 if (II->getIntrinsicID() == Intrinsic::masked_store)
973 return II->getOperand(1);
974 llvm_unreachable("Unexpected IntrinsicInst");
975 };
976 auto MaskOp = [](const IntrinsicInst *II) {
977 if (II->getIntrinsicID() == Intrinsic::masked_load)
978 return II->getOperand(1);
979 if (II->getIntrinsicID() == Intrinsic::masked_store)
980 return II->getOperand(2);
981 llvm_unreachable("Unexpected IntrinsicInst");
982 };
983 auto ThruOp = [](const IntrinsicInst *II) {
984 if (II->getIntrinsicID() == Intrinsic::masked_load)
985 return II->getOperand(2);
986 llvm_unreachable("Unexpected IntrinsicInst");
987 };
988
989 if (PtrOp(Earlier) != PtrOp(Later))
990 return false;
991
992 Intrinsic::ID IDE = Earlier->getIntrinsicID();
993 Intrinsic::ID IDL = Later->getIntrinsicID();
994 // We could really use specific intrinsic classes for masked loads
995 // and stores in IntrinsicInst.h.
996 if (IDE == Intrinsic::masked_load && IDL == Intrinsic::masked_load) {
997 // Trying to replace later masked load with the earlier one.
998 // Check that the pointers are the same, and
999 // - masks and pass-throughs are the same, or
1000 // - replacee's pass-through is "undef" and replacer's mask is a
1001 // super-set of the replacee's mask.
1002 if (MaskOp(Earlier) == MaskOp(Later) && ThruOp(Earlier) == ThruOp(Later))
1003 return true;
1004 if (!isa<UndefValue>(ThruOp(Later)))
1005 return false;
1006 return IsSubmask(MaskOp(Later), MaskOp(Earlier));
1007 }
1008 if (IDE == Intrinsic::masked_store && IDL == Intrinsic::masked_load) {
1009 // Trying to replace a load of a stored value with the store's value.
1010 // Check that the pointers are the same, and
1011 // - load's mask is a subset of store's mask, and
1012 // - load's pass-through is "undef".
1013 if (!IsSubmask(MaskOp(Later), MaskOp(Earlier)))
1014 return false;
1015 return isa<UndefValue>(ThruOp(Later));
1016 }
1017 if (IDE == Intrinsic::masked_load && IDL == Intrinsic::masked_store) {
1018 // Trying to remove a store of the loaded value.
1019 // Check that the pointers are the same, and
1020 // - store's mask is a subset of the load's mask.
1021 return IsSubmask(MaskOp(Later), MaskOp(Earlier));
1022 }
1023 if (IDE == Intrinsic::masked_store && IDL == Intrinsic::masked_store) {
1024 // Trying to remove a dead store (earlier).
1025 // Check that the pointers are the same,
1026 // - the to-be-removed store's mask is a subset of the other store's
1027 // mask.
1028 return IsSubmask(MaskOp(Earlier), MaskOp(Later));
1029 }
1030 return false;
1031 }
1032
1033 void removeMSSA(Instruction &Inst) {
1034 if (!MSSA)
1035 return;
1036 if (VerifyMemorySSA)
1037 MSSA->verifyMemorySSA();
1038 // Removing a store here can leave MemorySSA in an unoptimized state by
1039 // creating MemoryPhis that have identical arguments and by creating
1040 // MemoryUses whose defining access is not an actual clobber. The phi case
1041 // is handled by MemorySSA when passing OptimizePhis = true to
1042 // removeMemoryAccess. The non-optimized MemoryUse case is lazily updated
1043 // by MemorySSA's getClobberingMemoryAccess.
1044 MSSAUpdater->removeMemoryAccess(&Inst, true);
1045 }
1046};
1047
1048} // end anonymous namespace
1049
1050/// Determine if the memory referenced by LaterInst is from the same heap
1051/// version as EarlierInst.
1052/// This is currently called in two scenarios:
1053///
1054/// load p
1055/// ...
1056/// load p
1057///
1058/// and
1059///
1060/// x = load p
1061/// ...
1062/// store x, p
1063///
1064/// in both cases we want to verify that there are no possible writes to the
1065/// memory referenced by p between the earlier and later instruction.
1066bool EarlyCSE::isSameMemGeneration(unsigned EarlierGeneration,
1067 unsigned LaterGeneration,
1068 Instruction *EarlierInst,
1069 Instruction *LaterInst) {
1070 // Check the simple memory generation tracking first.
1071 if (EarlierGeneration == LaterGeneration)
1072 return true;
1073
1074 if (!MSSA)
1075 return false;
1076
1077 // If MemorySSA has determined that one of EarlierInst or LaterInst does not
1078 // read/write memory, then we can safely return true here.
1079 // FIXME: We could be more aggressive when checking doesNotAccessMemory(),
1080 // onlyReadsMemory(), mayReadFromMemory(), and mayWriteToMemory() in this pass
1081 // by also checking the MemorySSA MemoryAccess on the instruction. Initial
1082 // experiments suggest this isn't worthwhile, at least for C/C++ code compiled
1083 // with the default optimization pipeline.
1084 auto *EarlierMA = MSSA->getMemoryAccess(EarlierInst);
1085 if (!EarlierMA)
1086 return true;
1087 auto *LaterMA = MSSA->getMemoryAccess(LaterInst);
1088 if (!LaterMA)
1089 return true;
1090
1091 // Since we know LaterDef dominates LaterInst and EarlierInst dominates
1092 // LaterInst, if LaterDef dominates EarlierInst then it can't occur between
1093 // EarlierInst and LaterInst and neither can any other write that potentially
1094 // clobbers LaterInst.
1095 MemoryAccess *LaterDef;
1096 if (ClobberCounter < EarlyCSEMssaOptCap) {
1097 LaterDef = MSSA->getWalker()->getClobberingMemoryAccess(LaterInst);
1098 ClobberCounter++;
1099 } else
1100 LaterDef = LaterMA->getDefiningAccess();
1101
1102 return MSSA->dominates(LaterDef, EarlierMA);
1103}
1104
1105bool EarlyCSE::isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt) {
1106 // A location loaded from with an invariant_load is assumed to *never* change
1107 // within the visible scope of the compilation.
1108 if (auto *LI = dyn_cast<LoadInst>(I))
1109 if (LI->hasMetadata(LLVMContext::MD_invariant_load))
1110 return true;
1111
1112 auto MemLocOpt = MemoryLocation::getOrNone(I);
1113 if (!MemLocOpt)
1114 // "target" intrinsic forms of loads aren't currently known to
1115 // MemoryLocation::get. TODO
1116 return false;
1117 MemoryLocation MemLoc = *MemLocOpt;
1118 if (!AvailableInvariants.count(MemLoc))
1119 return false;
1120
1121 // Is the generation at which this became invariant older than the
1122 // current one?
1123 return AvailableInvariants.lookup(MemLoc) <= GenAt;
1124}
1125
1126bool EarlyCSE::handleBranchCondition(Instruction *CondInst,
1127 const CondBrInst *BI, const BasicBlock *BB,
1128 const BasicBlock *Pred) {
1129 assert(BI->getCondition() == CondInst && "Wrong condition?");
1130 assert(BI->getSuccessor(0) == BB || BI->getSuccessor(1) == BB);
1131 auto *TorF = (BI->getSuccessor(0) == BB)
1133 : ConstantInt::getFalse(BB->getContext());
1134 auto MatchBinOp = [](Instruction *I, unsigned Opcode, Value *&LHS,
1135 Value *&RHS) {
1136 if (Opcode == Instruction::And &&
1138 return true;
1139 else if (Opcode == Instruction::Or &&
1141 return true;
1142 return false;
1143 };
1144 // If the condition is AND operation, we can propagate its operands into the
1145 // true branch. If it is OR operation, we can propagate them into the false
1146 // branch.
1147 unsigned PropagateOpcode =
1148 (BI->getSuccessor(0) == BB) ? Instruction::And : Instruction::Or;
1149
1150 bool MadeChanges = false;
1151 SmallVector<Instruction *, 4> WorkList;
1152 SmallPtrSet<Instruction *, 4> Visited;
1153 WorkList.push_back(CondInst);
1154 while (!WorkList.empty()) {
1155 Instruction *Curr = WorkList.pop_back_val();
1156
1157 AvailableValues.insert(Curr, TorF);
1158 LLVM_DEBUG(dbgs() << "EarlyCSE CVP: Add conditional value for '"
1159 << Curr->getName() << "' as " << *TorF << " in "
1160 << BB->getName() << "\n");
1161 if (!DebugCounter::shouldExecute(CSECounter)) {
1162 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1163 } else {
1164 // Replace all dominated uses with the known value.
1165 if (unsigned Count = replaceDominatedUsesWith(Curr, TorF, DT,
1166 BasicBlockEdge(Pred, BB))) {
1167 NumCSECVP += Count;
1168 MadeChanges = true;
1169 }
1170 }
1171
1172 Value *LHS, *RHS;
1173 if (MatchBinOp(Curr, PropagateOpcode, LHS, RHS))
1174 for (auto *Op : { LHS, RHS })
1175 if (Instruction *OPI = dyn_cast<Instruction>(Op))
1176 if (SimpleValue::canHandle(OPI) && Visited.insert(OPI).second)
1177 WorkList.push_back(OPI);
1178 }
1179
1180 return MadeChanges;
1181}
1182
1183Value *EarlyCSE::getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst,
1184 unsigned CurrentGeneration) {
1185 if (InVal.DefInst == nullptr)
1186 return nullptr;
1187 if (auto *MSI = dyn_cast<MemSetInst>(InVal.DefInst)) {
1188 if (!MemInst.isLoad() || MemInst.isVolatile() || !MemInst.isUnordered())
1189 return nullptr;
1190 if (MSI->isVolatile())
1191 return nullptr;
1192 auto *Val = dyn_cast<ConstantInt>(MSI->getValue());
1193 if (!Val || !Val->isZero())
1194 return nullptr;
1195 auto Len = MSI->getLengthInBytes();
1196 if (!Len)
1197 return nullptr;
1198 Type *InstType = MemInst.getValueType();
1199 if (!InstType)
1200 return nullptr;
1201 TypeSize LoadSize = SQ.DL.getTypeStoreSize(InstType);
1202 if (LoadSize.isScalable() || Len->ult(LoadSize.getFixedValue()))
1203 return nullptr;
1204 if (!isOperatingOnInvariantMemAt(MemInst.get(), InVal.Generation) &&
1205 !isSameMemGeneration(InVal.Generation, CurrentGeneration, InVal.DefInst,
1206 MemInst.get()))
1207 return nullptr;
1208 return Constant::getNullValue(MemInst.getValueType());
1209 }
1210 if (InVal.MatchingId != MemInst.getMatchingId())
1211 return nullptr;
1212 // We don't yet handle removing loads with ordering of any kind.
1213 if (MemInst.isVolatile() || !MemInst.isUnordered())
1214 return nullptr;
1215 // We can't replace an atomic load with one which isn't also atomic.
1216 if (MemInst.isLoad() && !InVal.IsAtomic && MemInst.isAtomic())
1217 return nullptr;
1218 // The value V returned from this function is used differently depending
1219 // on whether MemInst is a load or a store. If it's a load, we will replace
1220 // MemInst with V, if it's a store, we will check if V is the same as the
1221 // available value.
1222 bool MemInstMatching = !MemInst.isLoad();
1223 Instruction *Matching = MemInstMatching ? MemInst.get() : InVal.DefInst;
1224 Instruction *Other = MemInstMatching ? InVal.DefInst : MemInst.get();
1225
1226 // For stores check the result values before checking memory generation
1227 // (otherwise isSameMemGeneration may crash).
1228 Value *Result =
1229 MemInst.isStore()
1230 ? getOrCreateResult(Matching, Other->getType(), /*CanCreate=*/false)
1231 : nullptr;
1232 if (MemInst.isStore() && InVal.DefInst != Result)
1233 return nullptr;
1234
1235 // Deal with non-target memory intrinsics.
1236 bool MatchingNTI = isHandledNonTargetIntrinsic(Matching);
1237 bool OtherNTI = isHandledNonTargetIntrinsic(Other);
1238 if (OtherNTI != MatchingNTI)
1239 return nullptr;
1240 if (OtherNTI && MatchingNTI) {
1241 if (!isNonTargetIntrinsicMatch(cast<IntrinsicInst>(InVal.DefInst),
1242 cast<IntrinsicInst>(MemInst.get())))
1243 return nullptr;
1244 }
1245
1246 if (!isOperatingOnInvariantMemAt(MemInst.get(), InVal.Generation) &&
1247 !isSameMemGeneration(InVal.Generation, CurrentGeneration, InVal.DefInst,
1248 MemInst.get()))
1249 return nullptr;
1250
1251 if (!Result)
1252 Result = getOrCreateResult(Matching, Other->getType(), /*CanCreate=*/true);
1253 return Result;
1254}
1255
1256static void combineIRFlags(Instruction &From, Value *To) {
1257 if (auto *I = dyn_cast<Instruction>(To)) {
1258 // If I being poison triggers UB, there is no need to drop those
1259 // flags. Otherwise, only retain flags present on both I and Inst.
1260 // TODO: Currently some fast-math flags are not treated as
1261 // poison-generating even though they should. Until this is fixed,
1262 // always retain flags present on both I and Inst for floating point
1263 // instructions.
1264 if (isa<FPMathOperator>(I) ||
1265 (I->hasPoisonGeneratingFlags() && !programUndefinedIfPoison(I)))
1266 I->andIRFlags(&From);
1267 }
1268 if (isa<CallBase>(&From) && isa<CallBase>(To)) {
1269 // NB: Intersection of attrs between InVal.first and Inst is overly
1270 // conservative. Since we only CSE readonly functions that have the same
1271 // memory state, we can preserve (or possibly in some cases combine)
1272 // more attributes. Likewise this implies when checking equality of
1273 // callsite for CSEing, we can probably ignore more attributes.
1274 // Generally poison generating attributes need to be handled with more
1275 // care as they can create *new* UB if preserved/combined and violated.
1276 // Attributes that imply immediate UB on the other hand would have been
1277 // violated either way.
1278 bool Success =
1279 cast<CallBase>(To)->tryIntersectAttributes(cast<CallBase>(&From));
1280 assert(Success && "Failed to intersect attributes in callsites that "
1281 "passed identical check");
1282 // For NDEBUG Compile.
1283 (void)Success;
1284 }
1285}
1286
1287bool EarlyCSE::overridingStores(const ParseMemoryInst &Earlier,
1288 const ParseMemoryInst &Later) {
1289 // Can we remove Earlier store because of Later store?
1290
1291 assert(Earlier.isUnordered() && !Earlier.isVolatile() &&
1292 "Violated invariant");
1293 if (Earlier.getPointerOperand() != Later.getPointerOperand())
1294 return false;
1295 if (!Earlier.getValueType() || !Later.getValueType() ||
1296 Earlier.getValueType() != Later.getValueType())
1297 return false;
1298 if (Earlier.getMatchingId() != Later.getMatchingId())
1299 return false;
1300 // At the moment, we don't remove ordered stores, but do remove
1301 // unordered atomic stores. There's no special requirement (for
1302 // unordered atomics) about removing atomic stores only in favor of
1303 // other atomic stores since we were going to execute the non-atomic
1304 // one anyway and the atomic one might never have become visible.
1305 if (!Earlier.isUnordered() || !Later.isUnordered())
1306 return false;
1307
1308 // Deal with non-target memory intrinsics.
1309 bool ENTI = isHandledNonTargetIntrinsic(Earlier.get());
1310 bool LNTI = isHandledNonTargetIntrinsic(Later.get());
1311 if (ENTI && LNTI)
1312 return isNonTargetIntrinsicMatch(cast<IntrinsicInst>(Earlier.get()),
1313 cast<IntrinsicInst>(Later.get()));
1314
1315 // Because of the check above, at least one of them is false.
1316 // For now disallow matching intrinsics with non-intrinsics,
1317 // so assume that the stores match if neither is an intrinsic.
1318 return ENTI == LNTI;
1319}
1320
1321bool EarlyCSE::processNode(DomTreeNode *Node) {
1322 bool Changed = false;
1323 BasicBlock *BB = Node->getBlock();
1324
1325 // If this block has a single predecessor, then the predecessor is the parent
1326 // of the domtree node and all of the live out memory values are still current
1327 // in this block. If this block has multiple predecessors, then they could
1328 // have invalidated the live-out memory values of our parent value. For now,
1329 // just be conservative and invalidate memory if this block has multiple
1330 // predecessors.
1331 if (!BB->getSinglePredecessor())
1332 ++CurrentGeneration;
1333
1334 // If this node has a single predecessor which ends in a conditional branch,
1335 // we can infer the value of the branch condition given that we took this
1336 // path. We need the single predecessor to ensure there's not another path
1337 // which reaches this block where the condition might hold a different
1338 // value. Since we're adding this to the scoped hash table (like any other
1339 // def), it will have been popped if we encounter a future merge block.
1340 if (BasicBlock *Pred = BB->getSinglePredecessor()) {
1341 if (auto *BI = dyn_cast<CondBrInst>(Pred->getTerminator())) {
1342 auto *CondInst = dyn_cast<Instruction>(BI->getCondition());
1343 if (CondInst && SimpleValue::canHandle(CondInst))
1344 Changed |= handleBranchCondition(CondInst, BI, BB, Pred);
1345 }
1346 }
1347
1348 /// LastStore - Keep track of the last non-volatile store that we saw... for
1349 /// as long as there in no instruction that reads memory. If we see a store
1350 /// to the same location, we delete the dead store. This zaps trivial dead
1351 /// stores which can occur in bitfield code among other things.
1352 Instruction *LastStore = nullptr;
1353
1354 // See if any instructions in the block can be eliminated. If so, do it. If
1355 // not, add them to AvailableValues.
1356 for (Instruction &Inst : make_early_inc_range(*BB)) {
1357 // Dead instructions should just be removed.
1358 if (isInstructionTriviallyDead(&Inst, &TLI)) {
1359 LLVM_DEBUG(dbgs() << "EarlyCSE DCE: " << Inst << '\n');
1360 if (!DebugCounter::shouldExecute(CSECounter)) {
1361 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1362 continue;
1363 }
1364
1365 salvageKnowledge(&Inst, &AC);
1366 salvageDebugInfo(Inst);
1367 removeMSSA(Inst);
1368 Inst.eraseFromParent();
1369 Changed = true;
1370 ++NumSimplify;
1371 continue;
1372 }
1373
1374 // Skip assume intrinsics, they don't really have side effects (although
1375 // they're marked as such to ensure preservation of control dependencies),
1376 // and this pass will not bother with its removal. However, we should mark
1377 // its condition as true for all dominated blocks.
1378 if (auto *Assume = dyn_cast<AssumeInst>(&Inst)) {
1379 auto *CondI = dyn_cast<Instruction>(Assume->getArgOperand(0));
1380 if (CondI && SimpleValue::canHandle(CondI)) {
1381 LLVM_DEBUG(dbgs() << "EarlyCSE considering assumption: " << Inst
1382 << '\n');
1383 AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
1384 } else
1385 LLVM_DEBUG(dbgs() << "EarlyCSE skipping assumption: " << Inst << '\n');
1386 continue;
1387 }
1388
1389 // Likewise, noalias intrinsics don't actually write.
1390 if (match(&Inst,
1392 LLVM_DEBUG(dbgs() << "EarlyCSE skipping noalias intrinsic: " << Inst
1393 << '\n');
1394 continue;
1395 }
1396
1397 // Skip sideeffect intrinsics, for the same reason as assume intrinsics.
1399 LLVM_DEBUG(dbgs() << "EarlyCSE skipping sideeffect: " << Inst << '\n');
1400 continue;
1401 }
1402
1403 // Skip pseudoprobe intrinsics, for the same reason as assume intrinsics.
1405 LLVM_DEBUG(dbgs() << "EarlyCSE skipping pseudoprobe: " << Inst << '\n');
1406 continue;
1407 }
1408
1409 // We can skip all invariant.start intrinsics since they only read memory,
1410 // and we can forward values across it. For invariant starts without
1411 // invariant ends, we can use the fact that the invariantness never ends to
1412 // start a scope in the current generaton which is true for all future
1413 // generations. Also, we dont need to consume the last store since the
1414 // semantics of invariant.start allow us to perform DSE of the last
1415 // store, if there was a store following invariant.start. Consider:
1416 //
1417 // store 30, i8* p
1418 // invariant.start(p)
1419 // store 40, i8* p
1420 // We can DSE the store to 30, since the store 40 to invariant location p
1421 // causes undefined behaviour.
1423 // If there are any uses, the scope might end.
1424 if (!Inst.use_empty())
1425 continue;
1426 MemoryLocation MemLoc =
1428 // Don't start a scope if we already have a better one pushed
1429 if (!AvailableInvariants.count(MemLoc))
1430 AvailableInvariants.insert(MemLoc, CurrentGeneration);
1431 continue;
1432 }
1433
1434 if (isGuard(&Inst)) {
1435 if (auto *CondI =
1436 dyn_cast<Instruction>(cast<CallInst>(Inst).getArgOperand(0))) {
1437 if (SimpleValue::canHandle(CondI)) {
1438 // Do we already know the actual value of this condition?
1439 if (auto *KnownCond = AvailableValues.lookup(CondI)) {
1440 // Is the condition known to be true?
1441 if (isa<ConstantInt>(KnownCond) &&
1442 cast<ConstantInt>(KnownCond)->isOne()) {
1444 << "EarlyCSE removing guard: " << Inst << '\n');
1445 salvageKnowledge(&Inst, &AC);
1446 removeMSSA(Inst);
1447 Inst.eraseFromParent();
1448 Changed = true;
1449 continue;
1450 } else
1451 // Use the known value if it wasn't true.
1452 cast<CallInst>(Inst).setArgOperand(0, KnownCond);
1453 }
1454 // The condition we're on guarding here is true for all dominated
1455 // locations.
1456 AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
1457 }
1458 }
1459
1460 // Guard intrinsics read all memory, but don't write any memory.
1461 // Accordingly, don't update the generation but consume the last store (to
1462 // avoid an incorrect DSE).
1463 LastStore = nullptr;
1464 continue;
1465 }
1466
1467 // If the instruction can be simplified (e.g. X+0 = X) then replace it with
1468 // its simpler value.
1469 if (Value *V = simplifyInstruction(&Inst, SQ)) {
1470 LLVM_DEBUG(dbgs() << "EarlyCSE Simplify: " << Inst << " to: " << *V
1471 << '\n');
1472 if (!DebugCounter::shouldExecute(CSECounter)) {
1473 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1474 } else {
1475 bool Killed = false;
1476 if (!Inst.use_empty()) {
1477 Inst.replaceAllUsesWith(V);
1478 Changed = true;
1479 }
1480 if (isInstructionTriviallyDead(&Inst, &TLI)) {
1481 salvageKnowledge(&Inst, &AC);
1482 removeMSSA(Inst);
1483 Inst.eraseFromParent();
1484 Changed = true;
1485 Killed = true;
1486 }
1487 if (Changed)
1488 ++NumSimplify;
1489 if (Killed)
1490 continue;
1491 }
1492 }
1493
1494 // Make sure stores prior to a potential unwind are not removed, as the
1495 // caller may read the memory.
1496 if (Inst.mayThrow())
1497 LastStore = nullptr;
1498
1499 // If this is a simple instruction that we can value number, process it.
1500 if (SimpleValue::canHandle(&Inst)) {
1501 if ([[maybe_unused]] auto *CI = dyn_cast<ConstrainedFPIntrinsic>(&Inst)) {
1502 assert(CI->getExceptionBehavior() != fp::ebStrict &&
1503 "Unexpected ebStrict from SimpleValue::canHandle()");
1504 assert((!CI->getRoundingMode() ||
1505 CI->getRoundingMode() != RoundingMode::Dynamic) &&
1506 "Unexpected dynamic rounding from SimpleValue::canHandle()");
1507 }
1508 // See if the instruction has an available value. If so, use it.
1509 if (Value *V = AvailableValues.lookup(&Inst)) {
1510 LLVM_DEBUG(dbgs() << "EarlyCSE CSE: " << Inst << " to: " << *V
1511 << '\n');
1512 if (!DebugCounter::shouldExecute(CSECounter)) {
1513 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1514 continue;
1515 }
1516 combineIRFlags(Inst, V);
1517 Inst.replaceAllUsesWith(V);
1518 salvageKnowledge(&Inst, &AC);
1519 removeMSSA(Inst);
1520 Inst.eraseFromParent();
1521 Changed = true;
1522 ++NumCSE;
1523 continue;
1524 }
1525
1526 // Otherwise, just remember that this value is available.
1527 AvailableValues.insert(&Inst, &Inst);
1528 continue;
1529 }
1530
1531 ParseMemoryInst MemInst(&Inst, TTI);
1532 // If this is a non-volatile load, process it.
1533 if (MemInst.isValid() && MemInst.isLoad()) {
1534 // (conservatively) we can't peak past the ordering implied by this
1535 // operation, but we can add this load to our set of available values
1536 if (MemInst.isVolatile() || !MemInst.isUnordered()) {
1537 LastStore = nullptr;
1538 ++CurrentGeneration;
1539 }
1540
1541 if (MemInst.isInvariantLoad()) {
1542 // If we pass an invariant load, we know that memory location is
1543 // indefinitely constant from the moment of first dereferenceability.
1544 // We conservatively treat the invariant_load as that moment. If we
1545 // pass a invariant load after already establishing a scope, don't
1546 // restart it since we want to preserve the earliest point seen.
1547 auto MemLoc = MemoryLocation::get(&Inst);
1548 if (!AvailableInvariants.count(MemLoc))
1549 AvailableInvariants.insert(MemLoc, CurrentGeneration);
1550 }
1551
1552 // If we have an available version of this load, and if it is the right
1553 // generation or the load is known to be from an invariant location,
1554 // replace this instruction.
1555 //
1556 // If either the dominating load or the current load are invariant, then
1557 // we can assume the current load loads the same value as the dominating
1558 // load.
1559 LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
1560 if (Value *Op = getMatchingValue(InVal, MemInst, CurrentGeneration)) {
1561 LLVM_DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << Inst
1562 << " to: " << *InVal.DefInst << '\n');
1563 if (!DebugCounter::shouldExecute(CSECounter)) {
1564 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1565 continue;
1566 }
1567 if (InVal.IsLoad)
1568 if (auto *I = dyn_cast<Instruction>(Op))
1569 combineMetadataForCSE(I, &Inst, false);
1570 if (!Inst.use_empty())
1571 Inst.replaceAllUsesWith(Op);
1572 salvageKnowledge(&Inst, &AC);
1573 removeMSSA(Inst);
1574 Inst.eraseFromParent();
1575 Changed = true;
1576 ++NumCSELoad;
1577 continue;
1578 }
1579
1580 // Otherwise, remember that we have this instruction.
1581 AvailableLoads.insert(MemInst.getPointerOperand(),
1582 LoadValue(&Inst, CurrentGeneration,
1583 MemInst.getMatchingId(),
1584 MemInst.isAtomic(),
1585 MemInst.isLoad()));
1586 LastStore = nullptr;
1587 continue;
1588 }
1589
1590 // If this instruction may read from memory, forget LastStore. Load/store
1591 // intrinsics will indicate both a read and a write to memory. The target
1592 // may override this (e.g. so that a store intrinsic does not read from
1593 // memory, and thus will be treated the same as a regular store for
1594 // commoning purposes).
1595 if (Inst.mayReadFromMemory() &&
1596 !(MemInst.isValid() && !MemInst.mayReadFromMemory()))
1597 LastStore = nullptr;
1598
1599 // If this is a read-only or write-only call, process it. Skip store
1600 // MemInsts, as they will be more precisely handled later on. Also skip
1601 // memsets, as DSE may be able to optimize them better by removing the
1602 // earlier rather than later store.
1603 if (CallValue::canHandle(&Inst) &&
1604 (!MemInst.isValid() || !MemInst.isStore()) && !isa<MemSetInst>(&Inst)) {
1605 // If we have an available version of this call, and if it is the right
1606 // generation, replace this instruction.
1607 std::pair<Instruction *, unsigned> InVal = AvailableCalls.lookup(&Inst);
1608 if (InVal.first != nullptr &&
1609 isSameMemGeneration(InVal.second, CurrentGeneration, InVal.first,
1610 &Inst) &&
1611 InVal.first->mayReadFromMemory() == Inst.mayReadFromMemory()) {
1612 LLVM_DEBUG(dbgs() << "EarlyCSE CSE CALL: " << Inst
1613 << " to: " << *InVal.first << '\n');
1614 if (!DebugCounter::shouldExecute(CSECounter)) {
1615 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1616 continue;
1617 }
1618 combineIRFlags(Inst, InVal.first);
1619 if (!Inst.use_empty())
1620 Inst.replaceAllUsesWith(InVal.first);
1621 salvageKnowledge(&Inst, &AC);
1622 removeMSSA(Inst);
1623 Inst.eraseFromParent();
1624 Changed = true;
1625 ++NumCSECall;
1626 continue;
1627 }
1628
1629 // Increase memory generation for writes. Do this before inserting
1630 // the call, so it has the generation after the write occurred.
1631 if (Inst.mayWriteToMemory())
1632 ++CurrentGeneration;
1633
1634 // Otherwise, remember that we have this instruction.
1635 AvailableCalls.insert(&Inst, std::make_pair(&Inst, CurrentGeneration));
1636 continue;
1637 }
1638
1639 // Compare GEP instructions based on offset.
1640 if (GEPValue::canHandle(&Inst)) {
1641 auto *GEP = cast<GetElementPtrInst>(&Inst);
1642 APInt Offset = APInt(SQ.DL.getIndexTypeSizeInBits(GEP->getType()), 0);
1643 GEPValue GEPVal(GEP, GEP->accumulateConstantOffset(SQ.DL, Offset)
1644 ? Offset.trySExtValue()
1645 : std::nullopt);
1646 if (Value *V = AvailableGEPs.lookup(GEPVal)) {
1647 LLVM_DEBUG(dbgs() << "EarlyCSE CSE GEP: " << Inst << " to: " << *V
1648 << '\n');
1649 combineIRFlags(Inst, V);
1650 Inst.replaceAllUsesWith(V);
1651 salvageKnowledge(&Inst, &AC);
1652 removeMSSA(Inst);
1653 Inst.eraseFromParent();
1654 Changed = true;
1655 ++NumCSEGEP;
1656 continue;
1657 }
1658
1659 // Otherwise, just remember that we have this GEP.
1660 AvailableGEPs.insert(GEPVal, &Inst);
1661 continue;
1662 }
1663
1664 // A release fence requires that all stores complete before it, but does
1665 // not prevent the reordering of following loads 'before' the fence. As a
1666 // result, we don't need to consider it as writing to memory and don't need
1667 // to advance the generation. We do need to prevent DSE across the fence,
1668 // but that's handled above.
1669 if (auto *FI = dyn_cast<FenceInst>(&Inst))
1670 if (FI->getOrdering() == AtomicOrdering::Release) {
1671 assert(Inst.mayReadFromMemory() && "relied on to prevent DSE above");
1672 continue;
1673 }
1674
1675 // write back DSE - If we write back the same value we just loaded from
1676 // the same location and haven't passed any intervening writes or ordering
1677 // operations, we can remove the write. The primary benefit is in allowing
1678 // the available load table to remain valid and value forward past where
1679 // the store originally was.
1680 if (MemInst.isValid() && MemInst.isStore()) {
1681 LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
1682 if (InVal.DefInst &&
1683 InVal.DefInst ==
1684 getMatchingValue(InVal, MemInst, CurrentGeneration)) {
1685 LLVM_DEBUG(dbgs() << "EarlyCSE DSE (writeback): " << Inst << '\n');
1686 if (!DebugCounter::shouldExecute(CSECounter)) {
1687 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1688 continue;
1689 }
1690 salvageKnowledge(&Inst, &AC);
1691 removeMSSA(Inst);
1692 Inst.eraseFromParent();
1693 Changed = true;
1694 ++NumDSE;
1695 // We can avoid incrementing the generation count since we were able
1696 // to eliminate this store.
1697 continue;
1698 }
1699 }
1700
1701 // Okay, this isn't something we can CSE at all. Check to see if it is
1702 // something that could modify memory. If so, our available memory values
1703 // cannot be used so bump the generation count.
1704 if (Inst.mayWriteToMemory()) {
1705 ++CurrentGeneration;
1706
1707 if (MemInst.isValid() && MemInst.isStore()) {
1708 // We do a trivial form of DSE if there are two stores to the same
1709 // location with no intervening loads. Delete the earlier store.
1710 if (LastStore) {
1711 if (overridingStores(ParseMemoryInst(LastStore, TTI), MemInst)) {
1712 LLVM_DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore
1713 << " due to: " << Inst << '\n');
1714 if (!DebugCounter::shouldExecute(CSECounter)) {
1715 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1716 } else {
1717 salvageKnowledge(&Inst, &AC);
1718 removeMSSA(*LastStore);
1719 LastStore->eraseFromParent();
1720 Changed = true;
1721 ++NumDSE;
1722 LastStore = nullptr;
1723 }
1724 }
1725 // fallthrough - we can exploit information about this store
1726 }
1727
1728 // Okay, we just invalidated anything we knew about loaded values. Try
1729 // to salvage *something* by remembering that the stored value is a live
1730 // version of the pointer. It is safe to forward from volatile stores
1731 // to non-volatile loads, so we don't have to check for volatility of
1732 // the store.
1733 AvailableLoads.insert(MemInst.getPointerOperand(),
1734 LoadValue(&Inst, CurrentGeneration,
1735 MemInst.getMatchingId(),
1736 MemInst.isAtomic(),
1737 MemInst.isLoad()));
1738
1739 // Remember that this was the last unordered store we saw for DSE. We
1740 // don't yet handle DSE on ordered or volatile stores since we don't
1741 // have a good way to model the ordering requirement for following
1742 // passes once the store is removed. We could insert a fence, but
1743 // since fences are slightly stronger than stores in their ordering,
1744 // it's not clear this is a profitable transform. Another option would
1745 // be to merge the ordering with that of the post dominating store.
1746 if (MemInst.isUnordered() && !MemInst.isVolatile())
1747 LastStore = &Inst;
1748 else
1749 LastStore = nullptr;
1750 }
1751 }
1752 }
1753
1754 return Changed;
1755}
1756
1757bool EarlyCSE::run() {
1758 // Note, deque is being used here because there is significant performance
1759 // gains over vector when the container becomes very large due to the
1760 // specific access patterns. For more information see the mailing list
1761 // discussion on this:
1762 // http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20120116/135228.html
1763 std::deque<StackNode *> nodesToProcess;
1764
1765 bool Changed = false;
1766
1767 // Process the root node.
1768 nodesToProcess.push_back(new StackNode(
1769 AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls,
1770 AvailableGEPs, CurrentGeneration, DT.getRootNode(),
1771 DT.getRootNode()->begin(), DT.getRootNode()->end()));
1772
1773 assert(!CurrentGeneration && "Create a new EarlyCSE instance to rerun it.");
1774
1775 // Process the stack.
1776 while (!nodesToProcess.empty()) {
1777 // Grab the first item off the stack. Set the current generation, remove
1778 // the node from the stack, and process it.
1779 StackNode *NodeToProcess = nodesToProcess.back();
1780
1781 // Initialize class members.
1782 CurrentGeneration = NodeToProcess->currentGeneration();
1783
1784 // Check if the node needs to be processed.
1785 if (!NodeToProcess->isProcessed()) {
1786 // Process the node.
1787 Changed |= processNode(NodeToProcess->node());
1788 NodeToProcess->childGeneration(CurrentGeneration);
1789 NodeToProcess->process();
1790 } else if (NodeToProcess->childIter() != NodeToProcess->end()) {
1791 // Push the next child onto the stack.
1792 DomTreeNode *child = NodeToProcess->nextChild();
1793 nodesToProcess.push_back(new StackNode(
1794 AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls,
1795 AvailableGEPs, NodeToProcess->childGeneration(), child,
1796 child->begin(), child->end()));
1797 } else {
1798 // It has been processed, and there are no more children to process,
1799 // so delete it and pop it off the stack.
1800 delete NodeToProcess;
1801 nodesToProcess.pop_back();
1802 }
1803 } // while (!nodes...)
1804
1805 return Changed;
1806}
1807
1810 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1811 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1812 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1813 auto &AC = AM.getResult<AssumptionAnalysis>(F);
1814 auto *MSSA =
1815 UseMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA() : nullptr;
1816
1817 EarlyCSE CSE(F.getDataLayout(), TLI, TTI, DT, AC, MSSA);
1818
1819 if (!CSE.run())
1820 return PreservedAnalyses::all();
1821
1824 if (UseMemorySSA)
1826 return PA;
1827}
1828
1830 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1831 static_cast<PassInfoMixin<EarlyCSEPass> *>(this)->printPipeline(
1832 OS, MapClassName2PassName);
1833 OS << '<';
1834 if (UseMemorySSA)
1835 OS << "memssa";
1836 OS << '>';
1837}
1838
1839namespace {
1840
1841/// A simple and fast domtree-based CSE pass.
1842///
1843/// This pass does a simple depth-first walk over the dominator tree,
1844/// eliminating trivially redundant instructions and using instsimplify to
1845/// canonicalize things as it goes. It is intended to be fast and catch obvious
1846/// cases so that instcombine and other passes are more effective. It is
1847/// expected that a later pass of GVN will catch the interesting/hard cases.
1848template<bool UseMemorySSA>
1849class EarlyCSELegacyCommonPass : public FunctionPass {
1850public:
1851 static char ID;
1852
1853 EarlyCSELegacyCommonPass() : FunctionPass(ID) {
1854 if (UseMemorySSA)
1856 else
1858 }
1859
1860 bool runOnFunction(Function &F) override {
1861 if (skipFunction(F))
1862 return false;
1863
1864 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1865 auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1866 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1867 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1868 auto *MSSA =
1869 UseMemorySSA ? &getAnalysis<MemorySSAWrapperPass>().getMSSA() : nullptr;
1870
1871 EarlyCSE CSE(F.getDataLayout(), TLI, TTI, DT, AC, MSSA);
1872
1873 return CSE.run();
1874 }
1875
1876 void getAnalysisUsage(AnalysisUsage &AU) const override {
1877 AU.addRequired<AssumptionCacheTracker>();
1878 AU.addRequired<DominatorTreeWrapperPass>();
1879 AU.addRequired<TargetLibraryInfoWrapperPass>();
1880 AU.addRequired<TargetTransformInfoWrapperPass>();
1881 if (UseMemorySSA) {
1882 AU.addRequired<AAResultsWrapperPass>();
1883 AU.addRequired<MemorySSAWrapperPass>();
1884 AU.addPreserved<MemorySSAWrapperPass>();
1885 }
1886 AU.addPreserved<GlobalsAAWrapperPass>();
1887 AU.addPreserved<AAResultsWrapperPass>();
1888 AU.setPreservesCFG();
1889 }
1890};
1891
1892} // end anonymous namespace
1893
1894using EarlyCSELegacyPass = EarlyCSELegacyCommonPass</*UseMemorySSA=*/false>;
1895
1896template<>
1897char EarlyCSELegacyPass::ID = 0;
1898
1899INITIALIZE_PASS_BEGIN(EarlyCSELegacyPass, "early-cse", "Early CSE", false,
1900 false)
1905INITIALIZE_PASS_END(EarlyCSELegacyPass, "early-cse", "Early CSE", false, false)
1906
1907using EarlyCSEMemSSALegacyPass =
1908 EarlyCSELegacyCommonPass</*UseMemorySSA=*/true>;
1909
1910template<>
1911char EarlyCSEMemSSALegacyPass::ID = 0;
1912
1914 if (UseMemorySSA)
1915 return new EarlyCSEMemSSALegacyPass();
1916 else
1917 return new EarlyCSELegacyPass();
1918}
1919
1920INITIALIZE_PASS_BEGIN(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
1921 "Early CSE w/ MemorySSA", false, false)
1928INITIALIZE_PASS_END(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
1929 "Early CSE w/ MemorySSA", false, false)
#define Success
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isLoad(int Opcode)
static bool isStore(int Opcode)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file defines the BumpPtrAllocator interface.
Atomic ordering constants.
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Optimize for code generation
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
This file defines DenseMapInfo traits for DenseMap.
static cl::opt< bool > EarlyCSEDebugHash("earlycse-debug-hash", cl::init(false), cl::Hidden, cl::desc("Perform extra assertion checking to verify that SimpleValue's hash " "function is well-behaved w.r.t. its isEqual predicate"))
static void combineIRFlags(Instruction &From, Value *To)
EarlyCSELegacyCommonPass< false > EarlyCSELegacyPass
early cse Early CSE w MemorySSA
static unsigned getHashValueImpl(SimpleValue Val)
Definition EarlyCSE.cpp:216
static bool isEqualImpl(SimpleValue LHS, SimpleValue RHS)
Definition EarlyCSE.cpp:337
static bool matchSelectWithOptionalNotCond(Value *V, Value *&Cond, Value *&A, Value *&B, SelectPatternFlavor &Flavor)
Match a 'select' including an optional 'not's of the condition.
Definition EarlyCSE.cpp:158
static unsigned hashCallInst(CallInst *CI)
Definition EarlyCSE.cpp:205
static cl::opt< unsigned > EarlyCSEMssaOptCap("earlycse-mssa-optimization-cap", cl::init(500), cl::Hidden, cl::desc("Enable imprecision in EarlyCSE in pathological cases, in exchange " "for faster compile. Caps the MemorySSA clobbering calls."))
This file provides the interface for a simple, fast CSE pass.
static bool runOnFunction(Function &F, bool PostInlining)
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
IRTranslator LLVM IR MI
This header defines various interfaces for pass management in LLVM.
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
Value * getMatchingValue(LoadValue LV, LoadInst *LI, unsigned CurrentGeneration, BatchAAResults &BAA, function_ref< MemorySSA *()> GetMSSA)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
static bool isInvariantLoad(const Instruction *I, const Value *Ptr, const bool IsKernelFn)
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
const SmallVectorImpl< MachineOperand > & Cond
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
static Type * getValueType(Value *V, bool LookThroughCmp=false)
Returns the "element type" of the given value/instruction V.
This file contains some templates that are useful if you are working with the STL at all.
separate const offset from Split GEPs to a variadic base and a constant offset for better CSE
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:119
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
bool isProcessed() const
unsigned currentGeneration() const
unsigned childGeneration() const
DomTreeNode::const_iterator end() const
void process()
DomTreeNode * nextChild()
DomTreeNode::const_iterator childIter() const
DomTreeNode * node()
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
bool onlyWritesMemory(unsigned OpNo) const
bool onlyReadsMemory(unsigned OpNo) const
bool isConvergent() const
Determine if the invoke is convergent.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:512
This class is the base class for the comparison instructions.
Definition InstrTypes.h:728
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
@ ICMP_SLT
signed less than
Definition InstrTypes.h:769
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:770
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:764
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:763
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:767
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:765
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:768
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:766
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:890
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:852
Predicate getPredicate() const
Return the predicate for this instruction.
Definition InstrTypes.h:828
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Value * getCondition() const
BasicBlock * getSuccessor(unsigned i) const
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI unsigned getIndexTypeSizeInBits(Type *Ty) const
The size in bits of the index used in GEP calculation for this type.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Definition DataLayout.h:579
static bool shouldExecute(CounterInfo &Counter)
iterator begin() const
iterator end() const
Analysis pass which computes a DominatorTree.
Definition Dominators.h:270
DomTreeNodeBase< NodeT > * getRootNode()
getRootNode - This returns the entry node for the CFG of the function.
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:306
This instruction extracts a struct member or array element value from an aggregate value.
This class represents a freeze function that returns random concrete value if an operand is either a ...
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
bool isPresplitCoroutine() const
Determine if the function is presplit coroutine.
Definition Function.h:547
Represents calls to the gc.relocate intrinsic.
This instruction inserts a struct field of array element value into an aggregate value.
LLVM_ABI bool mayThrow(bool IncludePhaseOneUnwind=false) const LLVM_READONLY
Return true if this instruction may throw an exception.
LLVM_ABI bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI bool isIdenticalToWhenDefined(const Instruction *I, bool IntersectAttrs=false) const LLVM_READONLY
This is like isIdenticalTo, except that it ignores the SubclassOptionalData flags,...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
LLVM_ABI bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static LLVM_ABI std::optional< MemoryLocation > getOrNone(const Instruction *Inst)
static LLVM_ABI MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
An analysis that produces MemorySSA for a function.
Definition MemorySSA.h:922
MemoryAccess * getClobberingMemoryAccess(const Instruction *I, BatchAAResults &AA)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition MemorySSA.h:1035
Legacy analysis pass which computes MemorySSA.
Definition MemorySSA.h:975
LLVM_ABI bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
LLVM_ABI MemorySSAWalker * getWalker()
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition MemorySSA.h:720
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
ScopedHashTableScope< SimpleValue, Value *, DenseMapInfo< SimpleValue >, AllocatorTy > ScopeTy
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void push_back(const T &Elt)
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Wrapper pass for TargetTransformInfo.
LLVM_ABI bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
Value * getOperand(unsigned i) const
Definition User.h:207
iterator_range< value_op_iterator > operand_values()
Definition User.h:291
LLVM Value Representation.
Definition Value.h:75
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:552
bool use_empty() const
Definition Value.h:346
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
auto m_Cmp()
Matches any compare instruction and ignore it.
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
auto m_Value()
Match an arbitrary value and ignore it.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
initializer< Ty > init(const Ty &Val)
DXILDebugInfoMap run(Module &M)
@ ebStrict
This corresponds to "fpexcept.strict".
Definition FPEnv.h:42
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
Context & getContext() const
Definition BasicBlock.h:99
iterator end() const
Definition BasicBlock.h:89
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
@ Offset
Definition DWP.cpp:558
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition Utils.cpp:1687
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
LLVM_ABI void initializeEarlyCSEMemSSALegacyPassPass(PassRegistry &)
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
DomTreeNodeBase< BasicBlock > DomTreeNode
Definition Dominators.h:94
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition Local.cpp:403
LLVM_ABI bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
SelectPatternFlavor
Specific patterns of select instructions we can match.
@ SPF_UMIN
Signed minimum.
@ SPF_UMAX
Signed maximum.
@ SPF_SMAX
Unsigned minimum.
@ SPF_UNKNOWN
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI bool programUndefinedIfPoison(const Instruction *Inst)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition Local.cpp:3105
@ Other
Any other memory.
Definition ModRef.h:68
TargetTransformInfo TTI
LLVM_ABI bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition MemorySSA.cpp:84
LLVM_ABI bool salvageKnowledge(Instruction *I, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Calls BuildAssumeFromInst and if the resulting llvm.assume is valid insert if before I.
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition Hashing.h:325
BumpPtrAllocatorImpl<> BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
Definition Allocator.h:383
LLVM_ABI FunctionPass * createEarlyCSEPass(bool UseMemorySSA=false)
LLVM_ABI void initializeEarlyCSELegacyPassPass(PassRegistry &)
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition Hashing.h:305
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
unsigned Generation
static unsigned getHashValue(CallValue Val)
static bool isEqual(CallValue LHS, CallValue RHS)
static bool isEqual(const GEPValue &LHS, const GEPValue &RHS)
static unsigned getHashValue(const GEPValue &Val)
static unsigned getHashValue(SimpleValue Val)
static bool isEqual(SimpleValue LHS, SimpleValue RHS)
An information struct used to provide DenseMap with the various necessary components for a given valu...
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
LLVM_ABI void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition PassManager.h:89
const DataLayout & DL