LLVM 20.0.0git
EarlyCSE.cpp
Go to the documentation of this file.
1//===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs a simple dominator tree walk that eliminates trivially
10// redundant instructions.
11//
12//===----------------------------------------------------------------------===//
13
16#include "llvm/ADT/Hashing.h"
17#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/Statistic.h"
30#include "llvm/IR/BasicBlock.h"
31#include "llvm/IR/Constants.h"
32#include "llvm/IR/Dominators.h"
33#include "llvm/IR/Function.h"
34#include "llvm/IR/InstrTypes.h"
35#include "llvm/IR/Instruction.h"
38#include "llvm/IR/LLVMContext.h"
39#include "llvm/IR/PassManager.h"
41#include "llvm/IR/Type.h"
42#include "llvm/IR/Value.h"
44#include "llvm/Pass.h"
48#include "llvm/Support/Debug.h"
55#include <cassert>
56#include <deque>
57#include <memory>
58#include <utility>
59
60using namespace llvm;
61using namespace llvm::PatternMatch;
62
63#define DEBUG_TYPE "early-cse"
64
65STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd");
66STATISTIC(NumCSE, "Number of instructions CSE'd");
67STATISTIC(NumCSECVP, "Number of compare instructions CVP'd");
68STATISTIC(NumCSELoad, "Number of load instructions CSE'd");
69STATISTIC(NumCSECall, "Number of call instructions CSE'd");
70STATISTIC(NumCSEGEP, "Number of GEP instructions CSE'd");
71STATISTIC(NumDSE, "Number of trivial dead stores removed");
72
73DEBUG_COUNTER(CSECounter, "early-cse",
74 "Controls which instructions are removed");
75
77 "earlycse-mssa-optimization-cap", cl::init(500), cl::Hidden,
78 cl::desc("Enable imprecision in EarlyCSE in pathological cases, in exchange "
79 "for faster compile. Caps the MemorySSA clobbering calls."));
80
82 "earlycse-debug-hash", cl::init(false), cl::Hidden,
83 cl::desc("Perform extra assertion checking to verify that SimpleValue's hash "
84 "function is well-behaved w.r.t. its isEqual predicate"));
85
86//===----------------------------------------------------------------------===//
87// SimpleValue
88//===----------------------------------------------------------------------===//
89
90namespace {
91
92/// Struct representing the available values in the scoped hash table.
93struct SimpleValue {
94 Instruction *Inst;
95
96 SimpleValue(Instruction *I) : Inst(I) {
97 assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
98 }
99
100 bool isSentinel() const {
103 }
104
105 static bool canHandle(Instruction *Inst) {
106 // This can only handle non-void readnone functions.
107 // Also handled are constrained intrinsic that look like the types
108 // of instruction handled below (UnaryOperator, etc.).
109 if (CallInst *CI = dyn_cast<CallInst>(Inst)) {
110 if (Function *F = CI->getCalledFunction()) {
111 switch ((Intrinsic::ID)F->getIntrinsicID()) {
112 case Intrinsic::experimental_constrained_fadd:
113 case Intrinsic::experimental_constrained_fsub:
114 case Intrinsic::experimental_constrained_fmul:
115 case Intrinsic::experimental_constrained_fdiv:
116 case Intrinsic::experimental_constrained_frem:
117 case Intrinsic::experimental_constrained_fptosi:
118 case Intrinsic::experimental_constrained_sitofp:
119 case Intrinsic::experimental_constrained_fptoui:
120 case Intrinsic::experimental_constrained_uitofp:
121 case Intrinsic::experimental_constrained_fcmp:
122 case Intrinsic::experimental_constrained_fcmps: {
123 auto *CFP = cast<ConstrainedFPIntrinsic>(CI);
124 if (CFP->getExceptionBehavior() &&
125 CFP->getExceptionBehavior() == fp::ebStrict)
126 return false;
127 // Since we CSE across function calls we must not allow
128 // the rounding mode to change.
129 if (CFP->getRoundingMode() &&
130 CFP->getRoundingMode() == RoundingMode::Dynamic)
131 return false;
132 return true;
133 }
134 }
135 }
136 return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy() &&
137 // FIXME: Currently the calls which may access the thread id may
138 // be considered as not accessing the memory. But this is
139 // problematic for coroutines, since coroutines may resume in a
140 // different thread. So we disable the optimization here for the
141 // correctness. However, it may block many other correct
142 // optimizations. Revert this one when we detect the memory
143 // accessing kind more precisely.
144 !CI->getFunction()->isPresplitCoroutine();
145 }
146 return isa<CastInst>(Inst) || isa<UnaryOperator>(Inst) ||
147 isa<BinaryOperator>(Inst) || isa<CmpInst>(Inst) ||
148 isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
149 isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
150 isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst) ||
151 isa<FreezeInst>(Inst);
152 }
153};
154
155} // end anonymous namespace
156
157namespace llvm {
158
159template <> struct DenseMapInfo<SimpleValue> {
160 static inline SimpleValue getEmptyKey() {
162 }
163
164 static inline SimpleValue getTombstoneKey() {
166 }
167
168 static unsigned getHashValue(SimpleValue Val);
169 static bool isEqual(SimpleValue LHS, SimpleValue RHS);
170};
171
172} // end namespace llvm
173
174/// Match a 'select' including an optional 'not's of the condition.
176 Value *&B,
177 SelectPatternFlavor &Flavor) {
178 // Return false if V is not even a select.
179 if (!match(V, m_Select(m_Value(Cond), m_Value(A), m_Value(B))))
180 return false;
181
182 // Look through a 'not' of the condition operand by swapping A/B.
183 Value *CondNot;
184 if (match(Cond, m_Not(m_Value(CondNot)))) {
185 Cond = CondNot;
186 std::swap(A, B);
187 }
188
189 // Match canonical forms of min/max. We are not using ValueTracking's
190 // more powerful matchSelectPattern() because it may rely on instruction flags
191 // such as "nsw". That would be incompatible with the current hashing
192 // mechanism that may remove flags to increase the likelihood of CSE.
193
194 Flavor = SPF_UNKNOWN;
196
197 if (!match(Cond, m_ICmp(Pred, m_Specific(A), m_Specific(B)))) {
198 // Check for commuted variants of min/max by swapping predicate.
199 // If we do not match the standard or commuted patterns, this is not a
200 // recognized form of min/max, but it is still a select, so return true.
201 if (!match(Cond, m_ICmp(Pred, m_Specific(B), m_Specific(A))))
202 return true;
203 Pred = ICmpInst::getSwappedPredicate(Pred);
204 }
205
206 switch (Pred) {
207 case CmpInst::ICMP_UGT: Flavor = SPF_UMAX; break;
208 case CmpInst::ICMP_ULT: Flavor = SPF_UMIN; break;
209 case CmpInst::ICMP_SGT: Flavor = SPF_SMAX; break;
210 case CmpInst::ICMP_SLT: Flavor = SPF_SMIN; break;
211 // Non-strict inequalities.
212 case CmpInst::ICMP_ULE: Flavor = SPF_UMIN; break;
213 case CmpInst::ICMP_UGE: Flavor = SPF_UMAX; break;
214 case CmpInst::ICMP_SLE: Flavor = SPF_SMIN; break;
215 case CmpInst::ICMP_SGE: Flavor = SPF_SMAX; break;
216 default: break;
217 }
218
219 return true;
220}
221
222static unsigned hashCallInst(CallInst *CI) {
223 // Don't CSE convergent calls in different basic blocks, because they
224 // implicitly depend on the set of threads that is currently executing.
225 if (CI->isConvergent()) {
226 return hash_combine(
227 CI->getOpcode(), CI->getParent(),
229 }
230 return hash_combine(
231 CI->getOpcode(),
233}
234
235static unsigned getHashValueImpl(SimpleValue Val) {
236 Instruction *Inst = Val.Inst;
237 // Hash in all of the operands as pointers.
238 if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst)) {
239 Value *LHS = BinOp->getOperand(0);
240 Value *RHS = BinOp->getOperand(1);
241 if (BinOp->isCommutative() && BinOp->getOperand(0) > BinOp->getOperand(1))
242 std::swap(LHS, RHS);
243
244 return hash_combine(BinOp->getOpcode(), LHS, RHS);
245 }
246
247 if (CmpInst *CI = dyn_cast<CmpInst>(Inst)) {
248 // Compares can be commuted by swapping the comparands and
249 // updating the predicate. Choose the form that has the
250 // comparands in sorted order, or in the case of a tie, the
251 // one with the lower predicate.
252 Value *LHS = CI->getOperand(0);
253 Value *RHS = CI->getOperand(1);
254 CmpInst::Predicate Pred = CI->getPredicate();
255 CmpInst::Predicate SwappedPred = CI->getSwappedPredicate();
256 if (std::tie(LHS, Pred) > std::tie(RHS, SwappedPred)) {
257 std::swap(LHS, RHS);
258 Pred = SwappedPred;
259 }
260 return hash_combine(Inst->getOpcode(), Pred, LHS, RHS);
261 }
262
263 // Hash general selects to allow matching commuted true/false operands.
265 Value *Cond, *A, *B;
266 if (matchSelectWithOptionalNotCond(Inst, Cond, A, B, SPF)) {
267 // Hash min/max (cmp + select) to allow for commuted operands.
268 // Min/max may also have non-canonical compare predicate (eg, the compare for
269 // smin may use 'sgt' rather than 'slt'), and non-canonical operands in the
270 // compare.
271 // TODO: We should also detect FP min/max.
272 if (SPF == SPF_SMIN || SPF == SPF_SMAX ||
273 SPF == SPF_UMIN || SPF == SPF_UMAX) {
274 if (A > B)
275 std::swap(A, B);
276 return hash_combine(Inst->getOpcode(), SPF, A, B);
277 }
278
279 // Hash general selects to allow matching commuted true/false operands.
280
281 // If we do not have a compare as the condition, just hash in the condition.
283 Value *X, *Y;
284 if (!match(Cond, m_Cmp(Pred, m_Value(X), m_Value(Y))))
285 return hash_combine(Inst->getOpcode(), Cond, A, B);
286
287 // Similar to cmp normalization (above) - canonicalize the predicate value:
288 // select (icmp Pred, X, Y), A, B --> select (icmp InvPred, X, Y), B, A
289 if (CmpInst::getInversePredicate(Pred) < Pred) {
290 Pred = CmpInst::getInversePredicate(Pred);
291 std::swap(A, B);
292 }
293 return hash_combine(Inst->getOpcode(), Pred, X, Y, A, B);
294 }
295
296 if (CastInst *CI = dyn_cast<CastInst>(Inst))
297 return hash_combine(CI->getOpcode(), CI->getType(), CI->getOperand(0));
298
299 if (FreezeInst *FI = dyn_cast<FreezeInst>(Inst))
300 return hash_combine(FI->getOpcode(), FI->getOperand(0));
301
302 if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Inst))
303 return hash_combine(EVI->getOpcode(), EVI->getOperand(0),
304 hash_combine_range(EVI->idx_begin(), EVI->idx_end()));
305
306 if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(Inst))
307 return hash_combine(IVI->getOpcode(), IVI->getOperand(0),
308 IVI->getOperand(1),
309 hash_combine_range(IVI->idx_begin(), IVI->idx_end()));
310
311 assert((isa<CallInst>(Inst) || isa<ExtractElementInst>(Inst) ||
312 isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
313 isa<UnaryOperator>(Inst) || isa<FreezeInst>(Inst)) &&
314 "Invalid/unknown instruction");
315
316 // Handle intrinsics with commutative operands.
317 auto *II = dyn_cast<IntrinsicInst>(Inst);
318 if (II && II->isCommutative() && II->arg_size() >= 2) {
319 Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
320 if (LHS > RHS)
321 std::swap(LHS, RHS);
322 return hash_combine(
323 II->getOpcode(), LHS, RHS,
324 hash_combine_range(II->value_op_begin() + 2, II->value_op_end()));
325 }
326
327 // gc.relocate is 'special' call: its second and third operands are
328 // not real values, but indices into statepoint's argument list.
329 // Get values they point to.
330 if (const GCRelocateInst *GCR = dyn_cast<GCRelocateInst>(Inst))
331 return hash_combine(GCR->getOpcode(), GCR->getOperand(0),
332 GCR->getBasePtr(), GCR->getDerivedPtr());
333
334 // Don't CSE convergent calls in different basic blocks, because they
335 // implicitly depend on the set of threads that is currently executing.
336 if (CallInst *CI = dyn_cast<CallInst>(Inst))
337 return hashCallInst(CI);
338
339 // Mix in the opcode.
340 return hash_combine(
341 Inst->getOpcode(),
343}
344
345unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
346#ifndef NDEBUG
347 // If -earlycse-debug-hash was specified, return a constant -- this
348 // will force all hashing to collide, so we'll exhaustively search
349 // the table for a match, and the assertion in isEqual will fire if
350 // there's a bug causing equal keys to hash differently.
352 return 0;
353#endif
354 return getHashValueImpl(Val);
355}
356
357static bool isEqualImpl(SimpleValue LHS, SimpleValue RHS) {
358 Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
359
360 if (LHS.isSentinel() || RHS.isSentinel())
361 return LHSI == RHSI;
362
363 if (LHSI->getOpcode() != RHSI->getOpcode())
364 return false;
365 if (LHSI->isIdenticalToWhenDefined(RHSI)) {
366 // Convergent calls implicitly depend on the set of threads that is
367 // currently executing, so conservatively return false if they are in
368 // different basic blocks.
369 if (CallInst *CI = dyn_cast<CallInst>(LHSI);
370 CI && CI->isConvergent() && LHSI->getParent() != RHSI->getParent())
371 return false;
372
373 return true;
374 }
375
376 // If we're not strictly identical, we still might be a commutable instruction
377 if (BinaryOperator *LHSBinOp = dyn_cast<BinaryOperator>(LHSI)) {
378 if (!LHSBinOp->isCommutative())
379 return false;
380
381 assert(isa<BinaryOperator>(RHSI) &&
382 "same opcode, but different instruction type?");
383 BinaryOperator *RHSBinOp = cast<BinaryOperator>(RHSI);
384
385 // Commuted equality
386 return LHSBinOp->getOperand(0) == RHSBinOp->getOperand(1) &&
387 LHSBinOp->getOperand(1) == RHSBinOp->getOperand(0);
388 }
389 if (CmpInst *LHSCmp = dyn_cast<CmpInst>(LHSI)) {
390 assert(isa<CmpInst>(RHSI) &&
391 "same opcode, but different instruction type?");
392 CmpInst *RHSCmp = cast<CmpInst>(RHSI);
393 // Commuted equality
394 return LHSCmp->getOperand(0) == RHSCmp->getOperand(1) &&
395 LHSCmp->getOperand(1) == RHSCmp->getOperand(0) &&
396 LHSCmp->getSwappedPredicate() == RHSCmp->getPredicate();
397 }
398
399 auto *LII = dyn_cast<IntrinsicInst>(LHSI);
400 auto *RII = dyn_cast<IntrinsicInst>(RHSI);
401 if (LII && RII && LII->getIntrinsicID() == RII->getIntrinsicID() &&
402 LII->isCommutative() && LII->arg_size() >= 2) {
403 return LII->getArgOperand(0) == RII->getArgOperand(1) &&
404 LII->getArgOperand(1) == RII->getArgOperand(0) &&
405 std::equal(LII->arg_begin() + 2, LII->arg_end(),
406 RII->arg_begin() + 2, RII->arg_end());
407 }
408
409 // See comment above in `getHashValue()`.
410 if (const GCRelocateInst *GCR1 = dyn_cast<GCRelocateInst>(LHSI))
411 if (const GCRelocateInst *GCR2 = dyn_cast<GCRelocateInst>(RHSI))
412 return GCR1->getOperand(0) == GCR2->getOperand(0) &&
413 GCR1->getBasePtr() == GCR2->getBasePtr() &&
414 GCR1->getDerivedPtr() == GCR2->getDerivedPtr();
415
416 // Min/max can occur with commuted operands, non-canonical predicates,
417 // and/or non-canonical operands.
418 // Selects can be non-trivially equivalent via inverted conditions and swaps.
419 SelectPatternFlavor LSPF, RSPF;
420 Value *CondL, *CondR, *LHSA, *RHSA, *LHSB, *RHSB;
421 if (matchSelectWithOptionalNotCond(LHSI, CondL, LHSA, LHSB, LSPF) &&
422 matchSelectWithOptionalNotCond(RHSI, CondR, RHSA, RHSB, RSPF)) {
423 if (LSPF == RSPF) {
424 // TODO: We should also detect FP min/max.
425 if (LSPF == SPF_SMIN || LSPF == SPF_SMAX ||
426 LSPF == SPF_UMIN || LSPF == SPF_UMAX)
427 return ((LHSA == RHSA && LHSB == RHSB) ||
428 (LHSA == RHSB && LHSB == RHSA));
429
430 // select Cond, A, B <--> select not(Cond), B, A
431 if (CondL == CondR && LHSA == RHSA && LHSB == RHSB)
432 return true;
433 }
434
435 // If the true/false operands are swapped and the conditions are compares
436 // with inverted predicates, the selects are equal:
437 // select (icmp Pred, X, Y), A, B <--> select (icmp InvPred, X, Y), B, A
438 //
439 // This also handles patterns with a double-negation in the sense of not +
440 // inverse, because we looked through a 'not' in the matching function and
441 // swapped A/B:
442 // select (cmp Pred, X, Y), A, B <--> select (not (cmp InvPred, X, Y)), B, A
443 //
444 // This intentionally does NOT handle patterns with a double-negation in
445 // the sense of not + not, because doing so could result in values
446 // comparing
447 // as equal that hash differently in the min/max cases like:
448 // select (cmp slt, X, Y), X, Y <--> select (not (not (cmp slt, X, Y))), X, Y
449 // ^ hashes as min ^ would not hash as min
450 // In the context of the EarlyCSE pass, however, such cases never reach
451 // this code, as we simplify the double-negation before hashing the second
452 // select (and so still succeed at CSEing them).
453 if (LHSA == RHSB && LHSB == RHSA) {
454 CmpInst::Predicate PredL, PredR;
455 Value *X, *Y;
456 if (match(CondL, m_Cmp(PredL, m_Value(X), m_Value(Y))) &&
457 match(CondR, m_Cmp(PredR, m_Specific(X), m_Specific(Y))) &&
458 CmpInst::getInversePredicate(PredL) == PredR)
459 return true;
460 }
461 }
462
463 return false;
464}
465
466bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
467 // These comparisons are nontrivial, so assert that equality implies
468 // hash equality (DenseMap demands this as an invariant).
469 bool Result = isEqualImpl(LHS, RHS);
470 assert(!Result || (LHS.isSentinel() && LHS.Inst == RHS.Inst) ||
472 return Result;
473}
474
475//===----------------------------------------------------------------------===//
476// CallValue
477//===----------------------------------------------------------------------===//
478
479namespace {
480
481/// Struct representing the available call values in the scoped hash
482/// table.
483struct CallValue {
484 Instruction *Inst;
485
486 CallValue(Instruction *I) : Inst(I) {
487 assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
488 }
489
490 bool isSentinel() const {
493 }
494
495 static bool canHandle(Instruction *Inst) {
496 // Don't value number anything that returns void.
497 if (Inst->getType()->isVoidTy())
498 return false;
499
500 CallInst *CI = dyn_cast<CallInst>(Inst);
501 if (!CI || !CI->onlyReadsMemory() ||
502 // FIXME: Currently the calls which may access the thread id may
503 // be considered as not accessing the memory. But this is
504 // problematic for coroutines, since coroutines may resume in a
505 // different thread. So we disable the optimization here for the
506 // correctness. However, it may block many other correct
507 // optimizations. Revert this one when we detect the memory
508 // accessing kind more precisely.
510 return false;
511 return true;
512 }
513};
514
515} // end anonymous namespace
516
517namespace llvm {
518
519template <> struct DenseMapInfo<CallValue> {
520 static inline CallValue getEmptyKey() {
522 }
523
524 static inline CallValue getTombstoneKey() {
526 }
527
528 static unsigned getHashValue(CallValue Val);
529 static bool isEqual(CallValue LHS, CallValue RHS);
530};
531
532} // end namespace llvm
533
534unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
535 Instruction *Inst = Val.Inst;
536
537 // Hash all of the operands as pointers and mix in the opcode.
538 return hashCallInst(cast<CallInst>(Inst));
539}
540
541bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
542 if (LHS.isSentinel() || RHS.isSentinel())
543 return LHS.Inst == RHS.Inst;
544
545 CallInst *LHSI = cast<CallInst>(LHS.Inst);
546 CallInst *RHSI = cast<CallInst>(RHS.Inst);
547
548 // Convergent calls implicitly depend on the set of threads that is
549 // currently executing, so conservatively return false if they are in
550 // different basic blocks.
551 if (LHSI->isConvergent() && LHSI->getParent() != RHSI->getParent())
552 return false;
553
554 return LHSI->isIdenticalTo(RHSI);
555}
556
557//===----------------------------------------------------------------------===//
558// GEPValue
559//===----------------------------------------------------------------------===//
560
561namespace {
562
563struct GEPValue {
564 Instruction *Inst;
565 std::optional<int64_t> ConstantOffset;
566
567 GEPValue(Instruction *I) : Inst(I) {
568 assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
569 }
570
571 GEPValue(Instruction *I, std::optional<int64_t> ConstantOffset)
572 : Inst(I), ConstantOffset(ConstantOffset) {
573 assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
574 }
575
576 bool isSentinel() const {
579 }
580
581 static bool canHandle(Instruction *Inst) {
582 return isa<GetElementPtrInst>(Inst);
583 }
584};
585
586} // namespace
587
588namespace llvm {
589
590template <> struct DenseMapInfo<GEPValue> {
591 static inline GEPValue getEmptyKey() {
593 }
594
595 static inline GEPValue getTombstoneKey() {
597 }
598
599 static unsigned getHashValue(const GEPValue &Val);
600 static bool isEqual(const GEPValue &LHS, const GEPValue &RHS);
601};
602
603} // end namespace llvm
604
605unsigned DenseMapInfo<GEPValue>::getHashValue(const GEPValue &Val) {
606 auto *GEP = cast<GetElementPtrInst>(Val.Inst);
607 if (Val.ConstantOffset.has_value())
608 return hash_combine(GEP->getOpcode(), GEP->getPointerOperand(),
609 Val.ConstantOffset.value());
610 return hash_combine(
611 GEP->getOpcode(),
612 hash_combine_range(GEP->value_op_begin(), GEP->value_op_end()));
613}
614
615bool DenseMapInfo<GEPValue>::isEqual(const GEPValue &LHS, const GEPValue &RHS) {
616 if (LHS.isSentinel() || RHS.isSentinel())
617 return LHS.Inst == RHS.Inst;
618 auto *LGEP = cast<GetElementPtrInst>(LHS.Inst);
619 auto *RGEP = cast<GetElementPtrInst>(RHS.Inst);
620 if (LGEP->getPointerOperand() != RGEP->getPointerOperand())
621 return false;
622 if (LHS.ConstantOffset.has_value() && RHS.ConstantOffset.has_value())
623 return LHS.ConstantOffset.value() == RHS.ConstantOffset.value();
624 return LGEP->isIdenticalToWhenDefined(RGEP);
625}
626
627//===----------------------------------------------------------------------===//
628// EarlyCSE implementation
629//===----------------------------------------------------------------------===//
630
631namespace {
632
633/// A simple and fast domtree-based CSE pass.
634///
635/// This pass does a simple depth-first walk over the dominator tree,
636/// eliminating trivially redundant instructions and using instsimplify to
637/// canonicalize things as it goes. It is intended to be fast and catch obvious
638/// cases so that instcombine and other passes are more effective. It is
639/// expected that a later pass of GVN will catch the interesting/hard cases.
640class EarlyCSE {
641public:
642 const TargetLibraryInfo &TLI;
644 DominatorTree &DT;
645 AssumptionCache &AC;
646 const SimplifyQuery SQ;
647 MemorySSA *MSSA;
648 std::unique_ptr<MemorySSAUpdater> MSSAUpdater;
649
650 using AllocatorTy =
653 using ScopedHTType =
655 AllocatorTy>;
656
657 /// A scoped hash table of the current values of all of our simple
658 /// scalar expressions.
659 ///
660 /// As we walk down the domtree, we look to see if instructions are in this:
661 /// if so, we replace them with what we find, otherwise we insert them so
662 /// that dominated values can succeed in their lookup.
663 ScopedHTType AvailableValues;
664
665 /// A scoped hash table of the current values of previously encountered
666 /// memory locations.
667 ///
668 /// This allows us to get efficient access to dominating loads or stores when
669 /// we have a fully redundant load. In addition to the most recent load, we
670 /// keep track of a generation count of the read, which is compared against
671 /// the current generation count. The current generation count is incremented
672 /// after every possibly writing memory operation, which ensures that we only
673 /// CSE loads with other loads that have no intervening store. Ordering
674 /// events (such as fences or atomic instructions) increment the generation
675 /// count as well; essentially, we model these as writes to all possible
676 /// locations. Note that atomic and/or volatile loads and stores can be
677 /// present the table; it is the responsibility of the consumer to inspect
678 /// the atomicity/volatility if needed.
679 struct LoadValue {
680 Instruction *DefInst = nullptr;
681 unsigned Generation = 0;
682 int MatchingId = -1;
683 bool IsAtomic = false;
684 bool IsLoad = false;
685
686 LoadValue() = default;
687 LoadValue(Instruction *Inst, unsigned Generation, unsigned MatchingId,
688 bool IsAtomic, bool IsLoad)
689 : DefInst(Inst), Generation(Generation), MatchingId(MatchingId),
690 IsAtomic(IsAtomic), IsLoad(IsLoad) {}
691 };
692
693 using LoadMapAllocator =
696 using LoadHTType =
698 LoadMapAllocator>;
699
700 LoadHTType AvailableLoads;
701
702 // A scoped hash table mapping memory locations (represented as typed
703 // addresses) to generation numbers at which that memory location became
704 // (henceforth indefinitely) invariant.
705 using InvariantMapAllocator =
708 using InvariantHTType =
710 InvariantMapAllocator>;
711 InvariantHTType AvailableInvariants;
712
713 /// A scoped hash table of the current values of read-only call
714 /// values.
715 ///
716 /// It uses the same generation count as loads.
717 using CallHTType =
719 CallHTType AvailableCalls;
720
721 using GEPMapAllocatorTy =
725 GEPMapAllocatorTy>;
726 GEPHTType AvailableGEPs;
727
728 /// This is the current generation of the memory value.
729 unsigned CurrentGeneration = 0;
730
731 /// Set up the EarlyCSE runner for a particular function.
732 EarlyCSE(const DataLayout &DL, const TargetLibraryInfo &TLI,
734 AssumptionCache &AC, MemorySSA *MSSA)
735 : TLI(TLI), TTI(TTI), DT(DT), AC(AC), SQ(DL, &TLI, &DT, &AC), MSSA(MSSA),
736 MSSAUpdater(std::make_unique<MemorySSAUpdater>(MSSA)) {}
737
738 bool run();
739
740private:
741 unsigned ClobberCounter = 0;
742 // Almost a POD, but needs to call the constructors for the scoped hash
743 // tables so that a new scope gets pushed on. These are RAII so that the
744 // scope gets popped when the NodeScope is destroyed.
745 class NodeScope {
746 public:
747 NodeScope(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
748 InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls,
749 GEPHTType &AvailableGEPs)
750 : Scope(AvailableValues), LoadScope(AvailableLoads),
751 InvariantScope(AvailableInvariants), CallScope(AvailableCalls),
752 GEPScope(AvailableGEPs) {}
753 NodeScope(const NodeScope &) = delete;
754 NodeScope &operator=(const NodeScope &) = delete;
755
756 private:
758 LoadHTType::ScopeTy LoadScope;
759 InvariantHTType::ScopeTy InvariantScope;
760 CallHTType::ScopeTy CallScope;
761 GEPHTType::ScopeTy GEPScope;
762 };
763
764 // Contains all the needed information to create a stack for doing a depth
765 // first traversal of the tree. This includes scopes for values, loads, and
766 // calls as well as the generation. There is a child iterator so that the
767 // children do not need to be store separately.
768 class StackNode {
769 public:
770 StackNode(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
771 InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls,
772 GEPHTType &AvailableGEPs, unsigned cg, DomTreeNode *n,
775 : CurrentGeneration(cg), ChildGeneration(cg), Node(n), ChildIter(child),
776 EndIter(end),
777 Scopes(AvailableValues, AvailableLoads, AvailableInvariants,
778 AvailableCalls, AvailableGEPs) {}
779 StackNode(const StackNode &) = delete;
780 StackNode &operator=(const StackNode &) = delete;
781
782 // Accessors.
783 unsigned currentGeneration() const { return CurrentGeneration; }
784 unsigned childGeneration() const { return ChildGeneration; }
785 void childGeneration(unsigned generation) { ChildGeneration = generation; }
786 DomTreeNode *node() { return Node; }
787 DomTreeNode::const_iterator childIter() const { return ChildIter; }
788
789 DomTreeNode *nextChild() {
790 DomTreeNode *child = *ChildIter;
791 ++ChildIter;
792 return child;
793 }
794
795 DomTreeNode::const_iterator end() const { return EndIter; }
796 bool isProcessed() const { return Processed; }
797 void process() { Processed = true; }
798
799 private:
800 unsigned CurrentGeneration;
801 unsigned ChildGeneration;
805 NodeScope Scopes;
806 bool Processed = false;
807 };
808
809 /// Wrapper class to handle memory instructions, including loads,
810 /// stores and intrinsic loads and stores defined by the target.
811 class ParseMemoryInst {
812 public:
813 ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI)
814 : Inst(Inst) {
815 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
816 IntrID = II->getIntrinsicID();
817 if (TTI.getTgtMemIntrinsic(II, Info))
818 return;
819 if (isHandledNonTargetIntrinsic(IntrID)) {
820 switch (IntrID) {
821 case Intrinsic::masked_load:
822 Info.PtrVal = Inst->getOperand(0);
823 Info.MatchingId = Intrinsic::masked_load;
824 Info.ReadMem = true;
825 Info.WriteMem = false;
826 Info.IsVolatile = false;
827 break;
828 case Intrinsic::masked_store:
829 Info.PtrVal = Inst->getOperand(1);
830 // Use the ID of masked load as the "matching id". This will
831 // prevent matching non-masked loads/stores with masked ones
832 // (which could be done), but at the moment, the code here
833 // does not support matching intrinsics with non-intrinsics,
834 // so keep the MatchingIds specific to masked instructions
835 // for now (TODO).
836 Info.MatchingId = Intrinsic::masked_load;
837 Info.ReadMem = false;
838 Info.WriteMem = true;
839 Info.IsVolatile = false;
840 break;
841 }
842 }
843 }
844 }
845
846 Instruction *get() { return Inst; }
847 const Instruction *get() const { return Inst; }
848
849 bool isLoad() const {
850 if (IntrID != 0)
851 return Info.ReadMem;
852 return isa<LoadInst>(Inst);
853 }
854
855 bool isStore() const {
856 if (IntrID != 0)
857 return Info.WriteMem;
858 return isa<StoreInst>(Inst);
859 }
860
861 bool isAtomic() const {
862 if (IntrID != 0)
863 return Info.Ordering != AtomicOrdering::NotAtomic;
864 return Inst->isAtomic();
865 }
866
867 bool isUnordered() const {
868 if (IntrID != 0)
869 return Info.isUnordered();
870
871 if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
872 return LI->isUnordered();
873 } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
874 return SI->isUnordered();
875 }
876 // Conservative answer
877 return !Inst->isAtomic();
878 }
879
880 bool isVolatile() const {
881 if (IntrID != 0)
882 return Info.IsVolatile;
883
884 if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
885 return LI->isVolatile();
886 } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
887 return SI->isVolatile();
888 }
889 // Conservative answer
890 return true;
891 }
892
893 bool isInvariantLoad() const {
894 if (auto *LI = dyn_cast<LoadInst>(Inst))
895 return LI->hasMetadata(LLVMContext::MD_invariant_load);
896 return false;
897 }
898
899 bool isValid() const { return getPointerOperand() != nullptr; }
900
901 // For regular (non-intrinsic) loads/stores, this is set to -1. For
902 // intrinsic loads/stores, the id is retrieved from the corresponding
903 // field in the MemIntrinsicInfo structure. That field contains
904 // non-negative values only.
905 int getMatchingId() const {
906 if (IntrID != 0)
907 return Info.MatchingId;
908 return -1;
909 }
910
911 Value *getPointerOperand() const {
912 if (IntrID != 0)
913 return Info.PtrVal;
914 return getLoadStorePointerOperand(Inst);
915 }
916
917 Type *getValueType() const {
918 // TODO: handle target-specific intrinsics.
919 return Inst->getAccessType();
920 }
921
922 bool mayReadFromMemory() const {
923 if (IntrID != 0)
924 return Info.ReadMem;
925 return Inst->mayReadFromMemory();
926 }
927
928 bool mayWriteToMemory() const {
929 if (IntrID != 0)
930 return Info.WriteMem;
931 return Inst->mayWriteToMemory();
932 }
933
934 private:
935 Intrinsic::ID IntrID = 0;
937 Instruction *Inst;
938 };
939
940 // This function is to prevent accidentally passing a non-target
941 // intrinsic ID to TargetTransformInfo.
942 static bool isHandledNonTargetIntrinsic(Intrinsic::ID ID) {
943 switch (ID) {
944 case Intrinsic::masked_load:
945 case Intrinsic::masked_store:
946 return true;
947 }
948 return false;
949 }
950 static bool isHandledNonTargetIntrinsic(const Value *V) {
951 if (auto *II = dyn_cast<IntrinsicInst>(V))
952 return isHandledNonTargetIntrinsic(II->getIntrinsicID());
953 return false;
954 }
955
956 bool processNode(DomTreeNode *Node);
957
958 bool handleBranchCondition(Instruction *CondInst, const BranchInst *BI,
959 const BasicBlock *BB, const BasicBlock *Pred);
960
961 Value *getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst,
962 unsigned CurrentGeneration);
963
964 bool overridingStores(const ParseMemoryInst &Earlier,
965 const ParseMemoryInst &Later);
966
967 Value *getOrCreateResult(Value *Inst, Type *ExpectedType) const {
968 // TODO: We could insert relevant casts on type mismatch here.
969 if (auto *LI = dyn_cast<LoadInst>(Inst))
970 return LI->getType() == ExpectedType ? LI : nullptr;
971 if (auto *SI = dyn_cast<StoreInst>(Inst)) {
972 Value *V = SI->getValueOperand();
973 return V->getType() == ExpectedType ? V : nullptr;
974 }
975 assert(isa<IntrinsicInst>(Inst) && "Instruction not supported");
976 auto *II = cast<IntrinsicInst>(Inst);
977 if (isHandledNonTargetIntrinsic(II->getIntrinsicID()))
978 return getOrCreateResultNonTargetMemIntrinsic(II, ExpectedType);
979 return TTI.getOrCreateResultFromMemIntrinsic(II, ExpectedType);
980 }
981
982 Value *getOrCreateResultNonTargetMemIntrinsic(IntrinsicInst *II,
983 Type *ExpectedType) const {
984 // TODO: We could insert relevant casts on type mismatch here.
985 switch (II->getIntrinsicID()) {
986 case Intrinsic::masked_load:
987 return II->getType() == ExpectedType ? II : nullptr;
988 case Intrinsic::masked_store: {
989 Value *V = II->getOperand(0);
990 return V->getType() == ExpectedType ? V : nullptr;
991 }
992 }
993 return nullptr;
994 }
995
996 /// Return true if the instruction is known to only operate on memory
997 /// provably invariant in the given "generation".
998 bool isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt);
999
1000 bool isSameMemGeneration(unsigned EarlierGeneration, unsigned LaterGeneration,
1001 Instruction *EarlierInst, Instruction *LaterInst);
1002
1003 bool isNonTargetIntrinsicMatch(const IntrinsicInst *Earlier,
1004 const IntrinsicInst *Later) {
1005 auto IsSubmask = [](const Value *Mask0, const Value *Mask1) {
1006 // Is Mask0 a submask of Mask1?
1007 if (Mask0 == Mask1)
1008 return true;
1009 if (isa<UndefValue>(Mask0) || isa<UndefValue>(Mask1))
1010 return false;
1011 auto *Vec0 = dyn_cast<ConstantVector>(Mask0);
1012 auto *Vec1 = dyn_cast<ConstantVector>(Mask1);
1013 if (!Vec0 || !Vec1)
1014 return false;
1015 if (Vec0->getType() != Vec1->getType())
1016 return false;
1017 for (int i = 0, e = Vec0->getNumOperands(); i != e; ++i) {
1018 Constant *Elem0 = Vec0->getOperand(i);
1019 Constant *Elem1 = Vec1->getOperand(i);
1020 auto *Int0 = dyn_cast<ConstantInt>(Elem0);
1021 if (Int0 && Int0->isZero())
1022 continue;
1023 auto *Int1 = dyn_cast<ConstantInt>(Elem1);
1024 if (Int1 && !Int1->isZero())
1025 continue;
1026 if (isa<UndefValue>(Elem0) || isa<UndefValue>(Elem1))
1027 return false;
1028 if (Elem0 == Elem1)
1029 continue;
1030 return false;
1031 }
1032 return true;
1033 };
1034 auto PtrOp = [](const IntrinsicInst *II) {
1035 if (II->getIntrinsicID() == Intrinsic::masked_load)
1036 return II->getOperand(0);
1037 if (II->getIntrinsicID() == Intrinsic::masked_store)
1038 return II->getOperand(1);
1039 llvm_unreachable("Unexpected IntrinsicInst");
1040 };
1041 auto MaskOp = [](const IntrinsicInst *II) {
1042 if (II->getIntrinsicID() == Intrinsic::masked_load)
1043 return II->getOperand(2);
1044 if (II->getIntrinsicID() == Intrinsic::masked_store)
1045 return II->getOperand(3);
1046 llvm_unreachable("Unexpected IntrinsicInst");
1047 };
1048 auto ThruOp = [](const IntrinsicInst *II) {
1049 if (II->getIntrinsicID() == Intrinsic::masked_load)
1050 return II->getOperand(3);
1051 llvm_unreachable("Unexpected IntrinsicInst");
1052 };
1053
1054 if (PtrOp(Earlier) != PtrOp(Later))
1055 return false;
1056
1057 Intrinsic::ID IDE = Earlier->getIntrinsicID();
1058 Intrinsic::ID IDL = Later->getIntrinsicID();
1059 // We could really use specific intrinsic classes for masked loads
1060 // and stores in IntrinsicInst.h.
1061 if (IDE == Intrinsic::masked_load && IDL == Intrinsic::masked_load) {
1062 // Trying to replace later masked load with the earlier one.
1063 // Check that the pointers are the same, and
1064 // - masks and pass-throughs are the same, or
1065 // - replacee's pass-through is "undef" and replacer's mask is a
1066 // super-set of the replacee's mask.
1067 if (MaskOp(Earlier) == MaskOp(Later) && ThruOp(Earlier) == ThruOp(Later))
1068 return true;
1069 if (!isa<UndefValue>(ThruOp(Later)))
1070 return false;
1071 return IsSubmask(MaskOp(Later), MaskOp(Earlier));
1072 }
1073 if (IDE == Intrinsic::masked_store && IDL == Intrinsic::masked_load) {
1074 // Trying to replace a load of a stored value with the store's value.
1075 // Check that the pointers are the same, and
1076 // - load's mask is a subset of store's mask, and
1077 // - load's pass-through is "undef".
1078 if (!IsSubmask(MaskOp(Later), MaskOp(Earlier)))
1079 return false;
1080 return isa<UndefValue>(ThruOp(Later));
1081 }
1082 if (IDE == Intrinsic::masked_load && IDL == Intrinsic::masked_store) {
1083 // Trying to remove a store of the loaded value.
1084 // Check that the pointers are the same, and
1085 // - store's mask is a subset of the load's mask.
1086 return IsSubmask(MaskOp(Later), MaskOp(Earlier));
1087 }
1088 if (IDE == Intrinsic::masked_store && IDL == Intrinsic::masked_store) {
1089 // Trying to remove a dead store (earlier).
1090 // Check that the pointers are the same,
1091 // - the to-be-removed store's mask is a subset of the other store's
1092 // mask.
1093 return IsSubmask(MaskOp(Earlier), MaskOp(Later));
1094 }
1095 return false;
1096 }
1097
1098 void removeMSSA(Instruction &Inst) {
1099 if (!MSSA)
1100 return;
1101 if (VerifyMemorySSA)
1102 MSSA->verifyMemorySSA();
1103 // Removing a store here can leave MemorySSA in an unoptimized state by
1104 // creating MemoryPhis that have identical arguments and by creating
1105 // MemoryUses whose defining access is not an actual clobber. The phi case
1106 // is handled by MemorySSA when passing OptimizePhis = true to
1107 // removeMemoryAccess. The non-optimized MemoryUse case is lazily updated
1108 // by MemorySSA's getClobberingMemoryAccess.
1109 MSSAUpdater->removeMemoryAccess(&Inst, true);
1110 }
1111};
1112
1113} // end anonymous namespace
1114
1115/// Determine if the memory referenced by LaterInst is from the same heap
1116/// version as EarlierInst.
1117/// This is currently called in two scenarios:
1118///
1119/// load p
1120/// ...
1121/// load p
1122///
1123/// and
1124///
1125/// x = load p
1126/// ...
1127/// store x, p
1128///
1129/// in both cases we want to verify that there are no possible writes to the
1130/// memory referenced by p between the earlier and later instruction.
1131bool EarlyCSE::isSameMemGeneration(unsigned EarlierGeneration,
1132 unsigned LaterGeneration,
1133 Instruction *EarlierInst,
1134 Instruction *LaterInst) {
1135 // Check the simple memory generation tracking first.
1136 if (EarlierGeneration == LaterGeneration)
1137 return true;
1138
1139 if (!MSSA)
1140 return false;
1141
1142 // If MemorySSA has determined that one of EarlierInst or LaterInst does not
1143 // read/write memory, then we can safely return true here.
1144 // FIXME: We could be more aggressive when checking doesNotAccessMemory(),
1145 // onlyReadsMemory(), mayReadFromMemory(), and mayWriteToMemory() in this pass
1146 // by also checking the MemorySSA MemoryAccess on the instruction. Initial
1147 // experiments suggest this isn't worthwhile, at least for C/C++ code compiled
1148 // with the default optimization pipeline.
1149 auto *EarlierMA = MSSA->getMemoryAccess(EarlierInst);
1150 if (!EarlierMA)
1151 return true;
1152 auto *LaterMA = MSSA->getMemoryAccess(LaterInst);
1153 if (!LaterMA)
1154 return true;
1155
1156 // Since we know LaterDef dominates LaterInst and EarlierInst dominates
1157 // LaterInst, if LaterDef dominates EarlierInst then it can't occur between
1158 // EarlierInst and LaterInst and neither can any other write that potentially
1159 // clobbers LaterInst.
1160 MemoryAccess *LaterDef;
1161 if (ClobberCounter < EarlyCSEMssaOptCap) {
1162 LaterDef = MSSA->getWalker()->getClobberingMemoryAccess(LaterInst);
1163 ClobberCounter++;
1164 } else
1165 LaterDef = LaterMA->getDefiningAccess();
1166
1167 return MSSA->dominates(LaterDef, EarlierMA);
1168}
1169
1170bool EarlyCSE::isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt) {
1171 // A location loaded from with an invariant_load is assumed to *never* change
1172 // within the visible scope of the compilation.
1173 if (auto *LI = dyn_cast<LoadInst>(I))
1174 if (LI->hasMetadata(LLVMContext::MD_invariant_load))
1175 return true;
1176
1177 auto MemLocOpt = MemoryLocation::getOrNone(I);
1178 if (!MemLocOpt)
1179 // "target" intrinsic forms of loads aren't currently known to
1180 // MemoryLocation::get. TODO
1181 return false;
1182 MemoryLocation MemLoc = *MemLocOpt;
1183 if (!AvailableInvariants.count(MemLoc))
1184 return false;
1185
1186 // Is the generation at which this became invariant older than the
1187 // current one?
1188 return AvailableInvariants.lookup(MemLoc) <= GenAt;
1189}
1190
1191bool EarlyCSE::handleBranchCondition(Instruction *CondInst,
1192 const BranchInst *BI, const BasicBlock *BB,
1193 const BasicBlock *Pred) {
1194 assert(BI->isConditional() && "Should be a conditional branch!");
1195 assert(BI->getCondition() == CondInst && "Wrong condition?");
1196 assert(BI->getSuccessor(0) == BB || BI->getSuccessor(1) == BB);
1197 auto *TorF = (BI->getSuccessor(0) == BB)
1200 auto MatchBinOp = [](Instruction *I, unsigned Opcode, Value *&LHS,
1201 Value *&RHS) {
1202 if (Opcode == Instruction::And &&
1203 match(I, m_LogicalAnd(m_Value(LHS), m_Value(RHS))))
1204 return true;
1205 else if (Opcode == Instruction::Or &&
1206 match(I, m_LogicalOr(m_Value(LHS), m_Value(RHS))))
1207 return true;
1208 return false;
1209 };
1210 // If the condition is AND operation, we can propagate its operands into the
1211 // true branch. If it is OR operation, we can propagate them into the false
1212 // branch.
1213 unsigned PropagateOpcode =
1214 (BI->getSuccessor(0) == BB) ? Instruction::And : Instruction::Or;
1215
1216 bool MadeChanges = false;
1219 WorkList.push_back(CondInst);
1220 while (!WorkList.empty()) {
1221 Instruction *Curr = WorkList.pop_back_val();
1222
1223 AvailableValues.insert(Curr, TorF);
1224 LLVM_DEBUG(dbgs() << "EarlyCSE CVP: Add conditional value for '"
1225 << Curr->getName() << "' as " << *TorF << " in "
1226 << BB->getName() << "\n");
1227 if (!DebugCounter::shouldExecute(CSECounter)) {
1228 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1229 } else {
1230 // Replace all dominated uses with the known value.
1231 if (unsigned Count = replaceDominatedUsesWith(Curr, TorF, DT,
1232 BasicBlockEdge(Pred, BB))) {
1233 NumCSECVP += Count;
1234 MadeChanges = true;
1235 }
1236 }
1237
1238 Value *LHS, *RHS;
1239 if (MatchBinOp(Curr, PropagateOpcode, LHS, RHS))
1240 for (auto *Op : { LHS, RHS })
1241 if (Instruction *OPI = dyn_cast<Instruction>(Op))
1242 if (SimpleValue::canHandle(OPI) && Visited.insert(OPI).second)
1243 WorkList.push_back(OPI);
1244 }
1245
1246 return MadeChanges;
1247}
1248
1249Value *EarlyCSE::getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst,
1250 unsigned CurrentGeneration) {
1251 if (InVal.DefInst == nullptr)
1252 return nullptr;
1253 if (InVal.MatchingId != MemInst.getMatchingId())
1254 return nullptr;
1255 // We don't yet handle removing loads with ordering of any kind.
1256 if (MemInst.isVolatile() || !MemInst.isUnordered())
1257 return nullptr;
1258 // We can't replace an atomic load with one which isn't also atomic.
1259 if (MemInst.isLoad() && !InVal.IsAtomic && MemInst.isAtomic())
1260 return nullptr;
1261 // The value V returned from this function is used differently depending
1262 // on whether MemInst is a load or a store. If it's a load, we will replace
1263 // MemInst with V, if it's a store, we will check if V is the same as the
1264 // available value.
1265 bool MemInstMatching = !MemInst.isLoad();
1266 Instruction *Matching = MemInstMatching ? MemInst.get() : InVal.DefInst;
1267 Instruction *Other = MemInstMatching ? InVal.DefInst : MemInst.get();
1268
1269 // For stores check the result values before checking memory generation
1270 // (otherwise isSameMemGeneration may crash).
1271 Value *Result = MemInst.isStore()
1272 ? getOrCreateResult(Matching, Other->getType())
1273 : nullptr;
1274 if (MemInst.isStore() && InVal.DefInst != Result)
1275 return nullptr;
1276
1277 // Deal with non-target memory intrinsics.
1278 bool MatchingNTI = isHandledNonTargetIntrinsic(Matching);
1279 bool OtherNTI = isHandledNonTargetIntrinsic(Other);
1280 if (OtherNTI != MatchingNTI)
1281 return nullptr;
1282 if (OtherNTI && MatchingNTI) {
1283 if (!isNonTargetIntrinsicMatch(cast<IntrinsicInst>(InVal.DefInst),
1284 cast<IntrinsicInst>(MemInst.get())))
1285 return nullptr;
1286 }
1287
1288 if (!isOperatingOnInvariantMemAt(MemInst.get(), InVal.Generation) &&
1289 !isSameMemGeneration(InVal.Generation, CurrentGeneration, InVal.DefInst,
1290 MemInst.get()))
1291 return nullptr;
1292
1293 if (!Result)
1294 Result = getOrCreateResult(Matching, Other->getType());
1295 return Result;
1296}
1297
1299 if (auto *I = dyn_cast<Instruction>(To)) {
1300 // If I being poison triggers UB, there is no need to drop those
1301 // flags. Otherwise, only retain flags present on both I and Inst.
1302 // TODO: Currently some fast-math flags are not treated as
1303 // poison-generating even though they should. Until this is fixed,
1304 // always retain flags present on both I and Inst for floating point
1305 // instructions.
1306 if (isa<FPMathOperator>(I) ||
1307 (I->hasPoisonGeneratingFlags() && !programUndefinedIfPoison(I)))
1308 I->andIRFlags(&From);
1309 }
1310}
1311
1312bool EarlyCSE::overridingStores(const ParseMemoryInst &Earlier,
1313 const ParseMemoryInst &Later) {
1314 // Can we remove Earlier store because of Later store?
1315
1316 assert(Earlier.isUnordered() && !Earlier.isVolatile() &&
1317 "Violated invariant");
1318 if (Earlier.getPointerOperand() != Later.getPointerOperand())
1319 return false;
1320 if (!Earlier.getValueType() || !Later.getValueType() ||
1321 Earlier.getValueType() != Later.getValueType())
1322 return false;
1323 if (Earlier.getMatchingId() != Later.getMatchingId())
1324 return false;
1325 // At the moment, we don't remove ordered stores, but do remove
1326 // unordered atomic stores. There's no special requirement (for
1327 // unordered atomics) about removing atomic stores only in favor of
1328 // other atomic stores since we were going to execute the non-atomic
1329 // one anyway and the atomic one might never have become visible.
1330 if (!Earlier.isUnordered() || !Later.isUnordered())
1331 return false;
1332
1333 // Deal with non-target memory intrinsics.
1334 bool ENTI = isHandledNonTargetIntrinsic(Earlier.get());
1335 bool LNTI = isHandledNonTargetIntrinsic(Later.get());
1336 if (ENTI && LNTI)
1337 return isNonTargetIntrinsicMatch(cast<IntrinsicInst>(Earlier.get()),
1338 cast<IntrinsicInst>(Later.get()));
1339
1340 // Because of the check above, at least one of them is false.
1341 // For now disallow matching intrinsics with non-intrinsics,
1342 // so assume that the stores match if neither is an intrinsic.
1343 return ENTI == LNTI;
1344}
1345
1346bool EarlyCSE::processNode(DomTreeNode *Node) {
1347 bool Changed = false;
1348 BasicBlock *BB = Node->getBlock();
1349
1350 // If this block has a single predecessor, then the predecessor is the parent
1351 // of the domtree node and all of the live out memory values are still current
1352 // in this block. If this block has multiple predecessors, then they could
1353 // have invalidated the live-out memory values of our parent value. For now,
1354 // just be conservative and invalidate memory if this block has multiple
1355 // predecessors.
1356 if (!BB->getSinglePredecessor())
1357 ++CurrentGeneration;
1358
1359 // If this node has a single predecessor which ends in a conditional branch,
1360 // we can infer the value of the branch condition given that we took this
1361 // path. We need the single predecessor to ensure there's not another path
1362 // which reaches this block where the condition might hold a different
1363 // value. Since we're adding this to the scoped hash table (like any other
1364 // def), it will have been popped if we encounter a future merge block.
1365 if (BasicBlock *Pred = BB->getSinglePredecessor()) {
1366 auto *BI = dyn_cast<BranchInst>(Pred->getTerminator());
1367 if (BI && BI->isConditional()) {
1368 auto *CondInst = dyn_cast<Instruction>(BI->getCondition());
1369 if (CondInst && SimpleValue::canHandle(CondInst))
1370 Changed |= handleBranchCondition(CondInst, BI, BB, Pred);
1371 }
1372 }
1373
1374 /// LastStore - Keep track of the last non-volatile store that we saw... for
1375 /// as long as there in no instruction that reads memory. If we see a store
1376 /// to the same location, we delete the dead store. This zaps trivial dead
1377 /// stores which can occur in bitfield code among other things.
1378 Instruction *LastStore = nullptr;
1379
1380 // See if any instructions in the block can be eliminated. If so, do it. If
1381 // not, add them to AvailableValues.
1382 for (Instruction &Inst : make_early_inc_range(*BB)) {
1383 // Dead instructions should just be removed.
1384 if (isInstructionTriviallyDead(&Inst, &TLI)) {
1385 LLVM_DEBUG(dbgs() << "EarlyCSE DCE: " << Inst << '\n');
1386 if (!DebugCounter::shouldExecute(CSECounter)) {
1387 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1388 continue;
1389 }
1390
1391 salvageKnowledge(&Inst, &AC);
1392 salvageDebugInfo(Inst);
1393 removeMSSA(Inst);
1394 Inst.eraseFromParent();
1395 Changed = true;
1396 ++NumSimplify;
1397 continue;
1398 }
1399
1400 // Skip assume intrinsics, they don't really have side effects (although
1401 // they're marked as such to ensure preservation of control dependencies),
1402 // and this pass will not bother with its removal. However, we should mark
1403 // its condition as true for all dominated blocks.
1404 if (auto *Assume = dyn_cast<AssumeInst>(&Inst)) {
1405 auto *CondI = dyn_cast<Instruction>(Assume->getArgOperand(0));
1406 if (CondI && SimpleValue::canHandle(CondI)) {
1407 LLVM_DEBUG(dbgs() << "EarlyCSE considering assumption: " << Inst
1408 << '\n');
1409 AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
1410 } else
1411 LLVM_DEBUG(dbgs() << "EarlyCSE skipping assumption: " << Inst << '\n');
1412 continue;
1413 }
1414
1415 // Likewise, noalias intrinsics don't actually write.
1416 if (match(&Inst,
1417 m_Intrinsic<Intrinsic::experimental_noalias_scope_decl>())) {
1418 LLVM_DEBUG(dbgs() << "EarlyCSE skipping noalias intrinsic: " << Inst
1419 << '\n');
1420 continue;
1421 }
1422
1423 // Skip sideeffect intrinsics, for the same reason as assume intrinsics.
1424 if (match(&Inst, m_Intrinsic<Intrinsic::sideeffect>())) {
1425 LLVM_DEBUG(dbgs() << "EarlyCSE skipping sideeffect: " << Inst << '\n');
1426 continue;
1427 }
1428
1429 // Skip pseudoprobe intrinsics, for the same reason as assume intrinsics.
1430 if (match(&Inst, m_Intrinsic<Intrinsic::pseudoprobe>())) {
1431 LLVM_DEBUG(dbgs() << "EarlyCSE skipping pseudoprobe: " << Inst << '\n');
1432 continue;
1433 }
1434
1435 // We can skip all invariant.start intrinsics since they only read memory,
1436 // and we can forward values across it. For invariant starts without
1437 // invariant ends, we can use the fact that the invariantness never ends to
1438 // start a scope in the current generaton which is true for all future
1439 // generations. Also, we dont need to consume the last store since the
1440 // semantics of invariant.start allow us to perform DSE of the last
1441 // store, if there was a store following invariant.start. Consider:
1442 //
1443 // store 30, i8* p
1444 // invariant.start(p)
1445 // store 40, i8* p
1446 // We can DSE the store to 30, since the store 40 to invariant location p
1447 // causes undefined behaviour.
1448 if (match(&Inst, m_Intrinsic<Intrinsic::invariant_start>())) {
1449 // If there are any uses, the scope might end.
1450 if (!Inst.use_empty())
1451 continue;
1452 MemoryLocation MemLoc =
1453 MemoryLocation::getForArgument(&cast<CallInst>(Inst), 1, TLI);
1454 // Don't start a scope if we already have a better one pushed
1455 if (!AvailableInvariants.count(MemLoc))
1456 AvailableInvariants.insert(MemLoc, CurrentGeneration);
1457 continue;
1458 }
1459
1460 if (isGuard(&Inst)) {
1461 if (auto *CondI =
1462 dyn_cast<Instruction>(cast<CallInst>(Inst).getArgOperand(0))) {
1463 if (SimpleValue::canHandle(CondI)) {
1464 // Do we already know the actual value of this condition?
1465 if (auto *KnownCond = AvailableValues.lookup(CondI)) {
1466 // Is the condition known to be true?
1467 if (isa<ConstantInt>(KnownCond) &&
1468 cast<ConstantInt>(KnownCond)->isOne()) {
1470 << "EarlyCSE removing guard: " << Inst << '\n');
1471 salvageKnowledge(&Inst, &AC);
1472 removeMSSA(Inst);
1473 Inst.eraseFromParent();
1474 Changed = true;
1475 continue;
1476 } else
1477 // Use the known value if it wasn't true.
1478 cast<CallInst>(Inst).setArgOperand(0, KnownCond);
1479 }
1480 // The condition we're on guarding here is true for all dominated
1481 // locations.
1482 AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
1483 }
1484 }
1485
1486 // Guard intrinsics read all memory, but don't write any memory.
1487 // Accordingly, don't update the generation but consume the last store (to
1488 // avoid an incorrect DSE).
1489 LastStore = nullptr;
1490 continue;
1491 }
1492
1493 // If the instruction can be simplified (e.g. X+0 = X) then replace it with
1494 // its simpler value.
1495 if (Value *V = simplifyInstruction(&Inst, SQ)) {
1496 LLVM_DEBUG(dbgs() << "EarlyCSE Simplify: " << Inst << " to: " << *V
1497 << '\n');
1498 if (!DebugCounter::shouldExecute(CSECounter)) {
1499 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1500 } else {
1501 bool Killed = false;
1502 if (!Inst.use_empty()) {
1503 Inst.replaceAllUsesWith(V);
1504 Changed = true;
1505 }
1506 if (isInstructionTriviallyDead(&Inst, &TLI)) {
1507 salvageKnowledge(&Inst, &AC);
1508 removeMSSA(Inst);
1509 Inst.eraseFromParent();
1510 Changed = true;
1511 Killed = true;
1512 }
1513 if (Changed)
1514 ++NumSimplify;
1515 if (Killed)
1516 continue;
1517 }
1518 }
1519
1520 // If this is a simple instruction that we can value number, process it.
1521 if (SimpleValue::canHandle(&Inst)) {
1522 if ([[maybe_unused]] auto *CI = dyn_cast<ConstrainedFPIntrinsic>(&Inst)) {
1523 assert(CI->getExceptionBehavior() != fp::ebStrict &&
1524 "Unexpected ebStrict from SimpleValue::canHandle()");
1525 assert((!CI->getRoundingMode() ||
1526 CI->getRoundingMode() != RoundingMode::Dynamic) &&
1527 "Unexpected dynamic rounding from SimpleValue::canHandle()");
1528 }
1529 // See if the instruction has an available value. If so, use it.
1530 if (Value *V = AvailableValues.lookup(&Inst)) {
1531 LLVM_DEBUG(dbgs() << "EarlyCSE CSE: " << Inst << " to: " << *V
1532 << '\n');
1533 if (!DebugCounter::shouldExecute(CSECounter)) {
1534 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1535 continue;
1536 }
1537 combineIRFlags(Inst, V);
1538 Inst.replaceAllUsesWith(V);
1539 salvageKnowledge(&Inst, &AC);
1540 removeMSSA(Inst);
1541 Inst.eraseFromParent();
1542 Changed = true;
1543 ++NumCSE;
1544 continue;
1545 }
1546
1547 // Otherwise, just remember that this value is available.
1548 AvailableValues.insert(&Inst, &Inst);
1549 continue;
1550 }
1551
1552 ParseMemoryInst MemInst(&Inst, TTI);
1553 // If this is a non-volatile load, process it.
1554 if (MemInst.isValid() && MemInst.isLoad()) {
1555 // (conservatively) we can't peak past the ordering implied by this
1556 // operation, but we can add this load to our set of available values
1557 if (MemInst.isVolatile() || !MemInst.isUnordered()) {
1558 LastStore = nullptr;
1559 ++CurrentGeneration;
1560 }
1561
1562 if (MemInst.isInvariantLoad()) {
1563 // If we pass an invariant load, we know that memory location is
1564 // indefinitely constant from the moment of first dereferenceability.
1565 // We conservatively treat the invariant_load as that moment. If we
1566 // pass a invariant load after already establishing a scope, don't
1567 // restart it since we want to preserve the earliest point seen.
1568 auto MemLoc = MemoryLocation::get(&Inst);
1569 if (!AvailableInvariants.count(MemLoc))
1570 AvailableInvariants.insert(MemLoc, CurrentGeneration);
1571 }
1572
1573 // If we have an available version of this load, and if it is the right
1574 // generation or the load is known to be from an invariant location,
1575 // replace this instruction.
1576 //
1577 // If either the dominating load or the current load are invariant, then
1578 // we can assume the current load loads the same value as the dominating
1579 // load.
1580 LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
1581 if (Value *Op = getMatchingValue(InVal, MemInst, CurrentGeneration)) {
1582 LLVM_DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << Inst
1583 << " to: " << *InVal.DefInst << '\n');
1584 if (!DebugCounter::shouldExecute(CSECounter)) {
1585 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1586 continue;
1587 }
1588 if (InVal.IsLoad)
1589 if (auto *I = dyn_cast<Instruction>(Op))
1590 combineMetadataForCSE(I, &Inst, false);
1591 if (!Inst.use_empty())
1592 Inst.replaceAllUsesWith(Op);
1593 salvageKnowledge(&Inst, &AC);
1594 removeMSSA(Inst);
1595 Inst.eraseFromParent();
1596 Changed = true;
1597 ++NumCSELoad;
1598 continue;
1599 }
1600
1601 // Otherwise, remember that we have this instruction.
1602 AvailableLoads.insert(MemInst.getPointerOperand(),
1603 LoadValue(&Inst, CurrentGeneration,
1604 MemInst.getMatchingId(),
1605 MemInst.isAtomic(),
1606 MemInst.isLoad()));
1607 LastStore = nullptr;
1608 continue;
1609 }
1610
1611 // If this instruction may read from memory or throw (and potentially read
1612 // from memory in the exception handler), forget LastStore. Load/store
1613 // intrinsics will indicate both a read and a write to memory. The target
1614 // may override this (e.g. so that a store intrinsic does not read from
1615 // memory, and thus will be treated the same as a regular store for
1616 // commoning purposes).
1617 if ((Inst.mayReadFromMemory() || Inst.mayThrow()) &&
1618 !(MemInst.isValid() && !MemInst.mayReadFromMemory()))
1619 LastStore = nullptr;
1620
1621 // If this is a read-only call, process it.
1622 if (CallValue::canHandle(&Inst)) {
1623 // If we have an available version of this call, and if it is the right
1624 // generation, replace this instruction.
1625 std::pair<Instruction *, unsigned> InVal = AvailableCalls.lookup(&Inst);
1626 if (InVal.first != nullptr &&
1627 isSameMemGeneration(InVal.second, CurrentGeneration, InVal.first,
1628 &Inst)) {
1629 LLVM_DEBUG(dbgs() << "EarlyCSE CSE CALL: " << Inst
1630 << " to: " << *InVal.first << '\n');
1631 if (!DebugCounter::shouldExecute(CSECounter)) {
1632 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1633 continue;
1634 }
1635 if (!Inst.use_empty())
1636 Inst.replaceAllUsesWith(InVal.first);
1637 salvageKnowledge(&Inst, &AC);
1638 removeMSSA(Inst);
1639 Inst.eraseFromParent();
1640 Changed = true;
1641 ++NumCSECall;
1642 continue;
1643 }
1644
1645 // Otherwise, remember that we have this instruction.
1646 AvailableCalls.insert(&Inst, std::make_pair(&Inst, CurrentGeneration));
1647 continue;
1648 }
1649
1650 // Compare GEP instructions based on offset.
1651 if (GEPValue::canHandle(&Inst)) {
1652 auto *GEP = cast<GetElementPtrInst>(&Inst);
1653 APInt Offset = APInt(SQ.DL.getIndexTypeSizeInBits(GEP->getType()), 0);
1654 GEPValue GEPVal(GEP, GEP->accumulateConstantOffset(SQ.DL, Offset)
1655 ? Offset.trySExtValue()
1656 : std::nullopt);
1657 if (Value *V = AvailableGEPs.lookup(GEPVal)) {
1658 LLVM_DEBUG(dbgs() << "EarlyCSE CSE GEP: " << Inst << " to: " << *V
1659 << '\n');
1660 combineIRFlags(Inst, V);
1661 Inst.replaceAllUsesWith(V);
1662 salvageKnowledge(&Inst, &AC);
1663 removeMSSA(Inst);
1664 Inst.eraseFromParent();
1665 Changed = true;
1666 ++NumCSEGEP;
1667 continue;
1668 }
1669
1670 // Otherwise, just remember that we have this GEP.
1671 AvailableGEPs.insert(GEPVal, &Inst);
1672 continue;
1673 }
1674
1675 // A release fence requires that all stores complete before it, but does
1676 // not prevent the reordering of following loads 'before' the fence. As a
1677 // result, we don't need to consider it as writing to memory and don't need
1678 // to advance the generation. We do need to prevent DSE across the fence,
1679 // but that's handled above.
1680 if (auto *FI = dyn_cast<FenceInst>(&Inst))
1681 if (FI->getOrdering() == AtomicOrdering::Release) {
1682 assert(Inst.mayReadFromMemory() && "relied on to prevent DSE above");
1683 continue;
1684 }
1685
1686 // write back DSE - If we write back the same value we just loaded from
1687 // the same location and haven't passed any intervening writes or ordering
1688 // operations, we can remove the write. The primary benefit is in allowing
1689 // the available load table to remain valid and value forward past where
1690 // the store originally was.
1691 if (MemInst.isValid() && MemInst.isStore()) {
1692 LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
1693 if (InVal.DefInst &&
1694 InVal.DefInst == getMatchingValue(InVal, MemInst, CurrentGeneration)) {
1695 // It is okay to have a LastStore to a different pointer here if MemorySSA
1696 // tells us that the load and store are from the same memory generation.
1697 // In that case, LastStore should keep its present value since we're
1698 // removing the current store.
1699 assert((!LastStore ||
1700 ParseMemoryInst(LastStore, TTI).getPointerOperand() ==
1701 MemInst.getPointerOperand() ||
1702 MSSA) &&
1703 "can't have an intervening store if not using MemorySSA!");
1704 LLVM_DEBUG(dbgs() << "EarlyCSE DSE (writeback): " << Inst << '\n');
1705 if (!DebugCounter::shouldExecute(CSECounter)) {
1706 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1707 continue;
1708 }
1709 salvageKnowledge(&Inst, &AC);
1710 removeMSSA(Inst);
1711 Inst.eraseFromParent();
1712 Changed = true;
1713 ++NumDSE;
1714 // We can avoid incrementing the generation count since we were able
1715 // to eliminate this store.
1716 continue;
1717 }
1718 }
1719
1720 // Okay, this isn't something we can CSE at all. Check to see if it is
1721 // something that could modify memory. If so, our available memory values
1722 // cannot be used so bump the generation count.
1723 if (Inst.mayWriteToMemory()) {
1724 ++CurrentGeneration;
1725
1726 if (MemInst.isValid() && MemInst.isStore()) {
1727 // We do a trivial form of DSE if there are two stores to the same
1728 // location with no intervening loads. Delete the earlier store.
1729 if (LastStore) {
1730 if (overridingStores(ParseMemoryInst(LastStore, TTI), MemInst)) {
1731 LLVM_DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore
1732 << " due to: " << Inst << '\n');
1733 if (!DebugCounter::shouldExecute(CSECounter)) {
1734 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1735 } else {
1736 salvageKnowledge(&Inst, &AC);
1737 removeMSSA(*LastStore);
1738 LastStore->eraseFromParent();
1739 Changed = true;
1740 ++NumDSE;
1741 LastStore = nullptr;
1742 }
1743 }
1744 // fallthrough - we can exploit information about this store
1745 }
1746
1747 // Okay, we just invalidated anything we knew about loaded values. Try
1748 // to salvage *something* by remembering that the stored value is a live
1749 // version of the pointer. It is safe to forward from volatile stores
1750 // to non-volatile loads, so we don't have to check for volatility of
1751 // the store.
1752 AvailableLoads.insert(MemInst.getPointerOperand(),
1753 LoadValue(&Inst, CurrentGeneration,
1754 MemInst.getMatchingId(),
1755 MemInst.isAtomic(),
1756 MemInst.isLoad()));
1757
1758 // Remember that this was the last unordered store we saw for DSE. We
1759 // don't yet handle DSE on ordered or volatile stores since we don't
1760 // have a good way to model the ordering requirement for following
1761 // passes once the store is removed. We could insert a fence, but
1762 // since fences are slightly stronger than stores in their ordering,
1763 // it's not clear this is a profitable transform. Another option would
1764 // be to merge the ordering with that of the post dominating store.
1765 if (MemInst.isUnordered() && !MemInst.isVolatile())
1766 LastStore = &Inst;
1767 else
1768 LastStore = nullptr;
1769 }
1770 }
1771 }
1772
1773 return Changed;
1774}
1775
1776bool EarlyCSE::run() {
1777 // Note, deque is being used here because there is significant performance
1778 // gains over vector when the container becomes very large due to the
1779 // specific access patterns. For more information see the mailing list
1780 // discussion on this:
1781 // http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20120116/135228.html
1782 std::deque<StackNode *> nodesToProcess;
1783
1784 bool Changed = false;
1785
1786 // Process the root node.
1787 nodesToProcess.push_back(new StackNode(
1788 AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls,
1789 AvailableGEPs, CurrentGeneration, DT.getRootNode(),
1790 DT.getRootNode()->begin(), DT.getRootNode()->end()));
1791
1792 assert(!CurrentGeneration && "Create a new EarlyCSE instance to rerun it.");
1793
1794 // Process the stack.
1795 while (!nodesToProcess.empty()) {
1796 // Grab the first item off the stack. Set the current generation, remove
1797 // the node from the stack, and process it.
1798 StackNode *NodeToProcess = nodesToProcess.back();
1799
1800 // Initialize class members.
1801 CurrentGeneration = NodeToProcess->currentGeneration();
1802
1803 // Check if the node needs to be processed.
1804 if (!NodeToProcess->isProcessed()) {
1805 // Process the node.
1806 Changed |= processNode(NodeToProcess->node());
1807 NodeToProcess->childGeneration(CurrentGeneration);
1808 NodeToProcess->process();
1809 } else if (NodeToProcess->childIter() != NodeToProcess->end()) {
1810 // Push the next child onto the stack.
1811 DomTreeNode *child = NodeToProcess->nextChild();
1812 nodesToProcess.push_back(new StackNode(
1813 AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls,
1814 AvailableGEPs, NodeToProcess->childGeneration(), child,
1815 child->begin(), child->end()));
1816 } else {
1817 // It has been processed, and there are no more children to process,
1818 // so delete it and pop it off the stack.
1819 delete NodeToProcess;
1820 nodesToProcess.pop_back();
1821 }
1822 } // while (!nodes...)
1823
1824 return Changed;
1825}
1826
1829 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1830 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1831 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1832 auto &AC = AM.getResult<AssumptionAnalysis>(F);
1833 auto *MSSA =
1834 UseMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA() : nullptr;
1835
1836 EarlyCSE CSE(F.getDataLayout(), TLI, TTI, DT, AC, MSSA);
1837
1838 if (!CSE.run())
1839 return PreservedAnalyses::all();
1840
1843 if (UseMemorySSA)
1845 return PA;
1846}
1847
1849 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1850 static_cast<PassInfoMixin<EarlyCSEPass> *>(this)->printPipeline(
1851 OS, MapClassName2PassName);
1852 OS << '<';
1853 if (UseMemorySSA)
1854 OS << "memssa";
1855 OS << '>';
1856}
1857
1858namespace {
1859
1860/// A simple and fast domtree-based CSE pass.
1861///
1862/// This pass does a simple depth-first walk over the dominator tree,
1863/// eliminating trivially redundant instructions and using instsimplify to
1864/// canonicalize things as it goes. It is intended to be fast and catch obvious
1865/// cases so that instcombine and other passes are more effective. It is
1866/// expected that a later pass of GVN will catch the interesting/hard cases.
1867template<bool UseMemorySSA>
1868class EarlyCSELegacyCommonPass : public FunctionPass {
1869public:
1870 static char ID;
1871
1872 EarlyCSELegacyCommonPass() : FunctionPass(ID) {
1873 if (UseMemorySSA)
1875 else
1877 }
1878
1879 bool runOnFunction(Function &F) override {
1880 if (skipFunction(F))
1881 return false;
1882
1883 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1884 auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1885 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1886 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1887 auto *MSSA =
1888 UseMemorySSA ? &getAnalysis<MemorySSAWrapperPass>().getMSSA() : nullptr;
1889
1890 EarlyCSE CSE(F.getDataLayout(), TLI, TTI, DT, AC, MSSA);
1891
1892 return CSE.run();
1893 }
1894
1895 void getAnalysisUsage(AnalysisUsage &AU) const override {
1900 if (UseMemorySSA) {
1904 }
1907 AU.setPreservesCFG();
1908 }
1909};
1910
1911} // end anonymous namespace
1912
1913using EarlyCSELegacyPass = EarlyCSELegacyCommonPass</*UseMemorySSA=*/false>;
1914
1915template<>
1916char EarlyCSELegacyPass::ID = 0;
1917
1918INITIALIZE_PASS_BEGIN(EarlyCSELegacyPass, "early-cse", "Early CSE", false,
1919 false)
1925
1926using EarlyCSEMemSSALegacyPass =
1927 EarlyCSELegacyCommonPass</*UseMemorySSA=*/true>;
1928
1929template<>
1930char EarlyCSEMemSSALegacyPass::ID = 0;
1931
1933 if (UseMemorySSA)
1934 return new EarlyCSEMemSSALegacyPass();
1935 else
1936 return new EarlyCSELegacyPass();
1937}
1938
1939INITIALIZE_PASS_BEGIN(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
1940 "Early CSE w/ MemorySSA", false, false)
1947INITIALIZE_PASS_END(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
1948 "Early CSE w/ MemorySSA", false, false)
static bool isLoad(int Opcode)
static bool isStore(int Opcode)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file defines the BumpPtrAllocator interface.
Atomic ordering constants.
basic Basic Alias true
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Optimize for code generation
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool isSentinel(const DWARFDebugNames::AttributeEncoding &AE)
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
Definition: DebugCounter.h:190
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines DenseMapInfo traits for DenseMap.
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1309
static cl::opt< bool > EarlyCSEDebugHash("earlycse-debug-hash", cl::init(false), cl::Hidden, cl::desc("Perform extra assertion checking to verify that SimpleValue's hash " "function is well-behaved w.r.t. its isEqual predicate"))
static void combineIRFlags(Instruction &From, Value *To)
Definition: EarlyCSE.cpp:1298
EarlyCSELegacyCommonPass< false > EarlyCSELegacyPass
Definition: EarlyCSE.cpp:1913
early cse memssa
Definition: EarlyCSE.cpp:1947
static unsigned getHashValueImpl(SimpleValue Val)
Definition: EarlyCSE.cpp:235
static bool isEqualImpl(SimpleValue LHS, SimpleValue RHS)
Definition: EarlyCSE.cpp:357
static bool matchSelectWithOptionalNotCond(Value *V, Value *&Cond, Value *&A, Value *&B, SelectPatternFlavor &Flavor)
Match a 'select' including an optional 'not's of the condition.
Definition: EarlyCSE.cpp:175
static unsigned hashCallInst(CallInst *CI)
Definition: EarlyCSE.cpp:222
static cl::opt< unsigned > EarlyCSEMssaOptCap("earlycse-mssa-optimization-cap", cl::init(500), cl::Hidden, cl::desc("Enable imprecision in EarlyCSE in pathological cases, in exchange " "for faster compile. Caps the MemorySSA clobbering calls."))
This file provides the interface for a simple, fast CSE pass.
static bool runOnFunction(Function &F, bool PostInlining)
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
Value * getMatchingValue(LoadValue LV, LoadInst *LI, unsigned CurrentGeneration, BatchAAResults &BAA, function_ref< MemorySSA *()> GetMSSA)
Definition: LoopUnroll.cpp:256
static void cse(BasicBlock *BB)
Perform cse of induction variable instructions.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
const SmallVectorImpl< MachineOperand > & Cond
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
separate const offset from Split GEPs to a variadic base and a constant offset for better CSE
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
bool isProcessed() const
Definition: LoopUnroll.cpp:252
unsigned currentGeneration() const
Definition: LoopUnroll.cpp:239
unsigned childGeneration() const
Definition: LoopUnroll.cpp:240
DomTreeNode::const_iterator end() const
Definition: LoopUnroll.cpp:251
void process()
Definition: LoopUnroll.cpp:253
DomTreeNode * nextChild()
Definition: LoopUnroll.cpp:245
DomTreeNode::const_iterator childIter() const
Definition: LoopUnroll.cpp:243
DomTreeNode * node()
Definition: LoopUnroll.cpp:242
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
Definition: APInt.h:78
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:405
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:459
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:168
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
Conditional or Unconditional Branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:72
bool onlyReadsMemory(unsigned OpNo) const
Definition: InstrTypes.h:1816
bool isConvergent() const
Determine if the invoke is convergent.
Definition: InstrTypes.h:2027
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:530
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:747
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:786
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:787
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:781
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:780
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:784
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:782
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:785
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:783
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:871
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:847
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:850
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:857
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
unsigned getIndexTypeSizeInBits(Type *Ty) const
Layout size of the index used in GEP calculation.
Definition: DataLayout.cpp:749
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:87
typename SmallVector< DomTreeNodeBase *, 4 >::const_iterator const_iterator
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
DomTreeNodeBase< NodeT > * getRootNode()
getRootNode - This returns the entry node for the CFG of the function.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:317
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
This instruction extracts a struct member or array element value from an aggregate value.
This class represents a freeze function that returns random concrete value if an operand is either a ...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
bool isPresplitCoroutine() const
Determine if the function is presplit coroutine.
Definition: Function.h:538
Represents calls to the gc.relocate intrinsic.
Legacy wrapper pass to provide the GlobalsAAResult object.
This instruction inserts a struct field of array element value into an aggregate value.
bool mayThrow(bool IncludePhaseOneUnwind=false) const LLVM_READONLY
Return true if this instruction may throw an exception.
bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:92
bool isIdenticalToWhenDefined(const Instruction *I) const LLVM_READONLY
This is like isIdenticalTo, except that it ignores the SubclassOptionalData flags,...
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:70
Type * getAccessType() const LLVM_READONLY
Return the type this instruction accesses in memory, if any.
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:55
An instruction for reading from memory.
Definition: Instructions.h:174
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static std::optional< MemoryLocation > getOrNone(const Instruction *Inst)
static MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:924
MemoryAccess * getClobberingMemoryAccess(const Instruction *I, BatchAAResults &AA)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition: MemorySSA.h:1041
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:981
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:697
bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
Definition: MemorySSA.cpp:2173
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1905
MemorySSAWalker * getWalker()
Definition: MemorySSA.cpp:1590
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:715
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:146
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:131
RecyclingAllocator - This class wraps an Allocator, adding the functionality of recycling deleted obj...
ScopedHashTableScope< K, V, KInfo, AllocatorTy > ScopeTy
ScopeTy - This is a helpful typedef that allows clients to get easy access to the name of the scope f...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:367
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:502
bool empty() const
Definition: SmallVector.h:94
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:290
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) const
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
value_op_iterator value_op_end()
Definition: User.h:263
Value * getOperand(unsigned i) const
Definition: User.h:169
value_op_iterator value_op_begin()
Definition: User.h:260
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
bool use_empty() const
Definition: Value.h:344
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition: ilist_node.h:32
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:875
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:105
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ ebStrict
This corresponds to "fpexcept.strict".
Definition: FPEnv.h:41
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:236
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition: Utils.cpp:1678
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
void initializeEarlyCSEMemSSALegacyPassPass(PassRegistry &)
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:400
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
Definition: GuardUtils.cpp:18
SelectPatternFlavor
Specific patterns of select instructions we can match.
@ SPF_UMIN
Signed minimum.
@ SPF_UMAX
Signed maximum.
@ SPF_SMAX
Unsigned minimum.
@ SPF_UNKNOWN
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
bool programUndefinedIfPoison(const Instruction *Inst)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT, const BasicBlockEdge &Edge)
Replace each use of 'From' with 'To' if that use is dominated by the given edge.
Definition: Local.cpp:3485
BumpPtrAllocatorImpl BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
Definition: Allocator.h:382
void initializeEarlyCSELegacyPassPass(PassRegistry &)
void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:3345
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:84
bool salvageKnowledge(Instruction *I, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Calls BuildAssumeFromInst and if the resulting llvm.assume is valid insert if before I.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition: Hashing.h:593
FunctionPass * createEarlyCSEPass(bool UseMemorySSA=false)
Definition: EarlyCSE.cpp:1932
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition: Hashing.h:471
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
unsigned Generation
Definition: LoopUnroll.cpp:217
static unsigned getHashValue(CallValue Val)
static CallValue getTombstoneKey()
Definition: EarlyCSE.cpp:524
static bool isEqual(CallValue LHS, CallValue RHS)
static CallValue getEmptyKey()
Definition: EarlyCSE.cpp:520
static bool isEqual(const GEPValue &LHS, const GEPValue &RHS)
static unsigned getHashValue(const GEPValue &Val)
static GEPValue getTombstoneKey()
Definition: EarlyCSE.cpp:595
static GEPValue getEmptyKey()
Definition: EarlyCSE.cpp:591
static SimpleValue getEmptyKey()
Definition: EarlyCSE.cpp:160
static unsigned getHashValue(SimpleValue Val)
static SimpleValue getTombstoneKey()
Definition: EarlyCSE.cpp:164
static bool isEqual(SimpleValue LHS, SimpleValue RHS)
An information struct used to provide DenseMap with the various necessary components for a given valu...
Definition: DenseMapInfo.h:52
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
Definition: EarlyCSE.cpp:1827
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Definition: EarlyCSE.cpp:1848
Information about a load/store intrinsic defined by the target.
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition: PassManager.h:69
const DataLayout & DL
Definition: SimplifyQuery.h:71