LLVM 20.0.0git
EarlyCSE.cpp
Go to the documentation of this file.
1//===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs a simple dominator tree walk that eliminates trivially
10// redundant instructions.
11//
12//===----------------------------------------------------------------------===//
13
16#include "llvm/ADT/Hashing.h"
17#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/Statistic.h"
30#include "llvm/IR/BasicBlock.h"
31#include "llvm/IR/Constants.h"
32#include "llvm/IR/Dominators.h"
33#include "llvm/IR/Function.h"
34#include "llvm/IR/InstrTypes.h"
35#include "llvm/IR/Instruction.h"
38#include "llvm/IR/LLVMContext.h"
39#include "llvm/IR/PassManager.h"
41#include "llvm/IR/Type.h"
42#include "llvm/IR/Value.h"
44#include "llvm/Pass.h"
48#include "llvm/Support/Debug.h"
55#include <cassert>
56#include <deque>
57#include <memory>
58#include <utility>
59
60using namespace llvm;
61using namespace llvm::PatternMatch;
62
63#define DEBUG_TYPE "early-cse"
64
65STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd");
66STATISTIC(NumCSE, "Number of instructions CSE'd");
67STATISTIC(NumCSECVP, "Number of compare instructions CVP'd");
68STATISTIC(NumCSELoad, "Number of load instructions CSE'd");
69STATISTIC(NumCSECall, "Number of call instructions CSE'd");
70STATISTIC(NumCSEGEP, "Number of GEP instructions CSE'd");
71STATISTIC(NumDSE, "Number of trivial dead stores removed");
72
73DEBUG_COUNTER(CSECounter, "early-cse",
74 "Controls which instructions are removed");
75
77 "earlycse-mssa-optimization-cap", cl::init(500), cl::Hidden,
78 cl::desc("Enable imprecision in EarlyCSE in pathological cases, in exchange "
79 "for faster compile. Caps the MemorySSA clobbering calls."));
80
82 "earlycse-debug-hash", cl::init(false), cl::Hidden,
83 cl::desc("Perform extra assertion checking to verify that SimpleValue's hash "
84 "function is well-behaved w.r.t. its isEqual predicate"));
85
86//===----------------------------------------------------------------------===//
87// SimpleValue
88//===----------------------------------------------------------------------===//
89
90namespace {
91
92/// Struct representing the available values in the scoped hash table.
93struct SimpleValue {
94 Instruction *Inst;
95
96 SimpleValue(Instruction *I) : Inst(I) {
97 assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
98 }
99
100 bool isSentinel() const {
103 }
104
105 static bool canHandle(Instruction *Inst) {
106 // This can only handle non-void readnone functions.
107 // Also handled are constrained intrinsic that look like the types
108 // of instruction handled below (UnaryOperator, etc.).
109 if (CallInst *CI = dyn_cast<CallInst>(Inst)) {
110 if (Function *F = CI->getCalledFunction()) {
111 switch ((Intrinsic::ID)F->getIntrinsicID()) {
112 case Intrinsic::experimental_constrained_fadd:
113 case Intrinsic::experimental_constrained_fsub:
114 case Intrinsic::experimental_constrained_fmul:
115 case Intrinsic::experimental_constrained_fdiv:
116 case Intrinsic::experimental_constrained_frem:
117 case Intrinsic::experimental_constrained_fptosi:
118 case Intrinsic::experimental_constrained_sitofp:
119 case Intrinsic::experimental_constrained_fptoui:
120 case Intrinsic::experimental_constrained_uitofp:
121 case Intrinsic::experimental_constrained_fcmp:
122 case Intrinsic::experimental_constrained_fcmps: {
123 auto *CFP = cast<ConstrainedFPIntrinsic>(CI);
124 if (CFP->getExceptionBehavior() &&
125 CFP->getExceptionBehavior() == fp::ebStrict)
126 return false;
127 // Since we CSE across function calls we must not allow
128 // the rounding mode to change.
129 if (CFP->getRoundingMode() &&
130 CFP->getRoundingMode() == RoundingMode::Dynamic)
131 return false;
132 return true;
133 }
134 }
135 }
136 return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy() &&
137 // FIXME: Currently the calls which may access the thread id may
138 // be considered as not accessing the memory. But this is
139 // problematic for coroutines, since coroutines may resume in a
140 // different thread. So we disable the optimization here for the
141 // correctness. However, it may block many other correct
142 // optimizations. Revert this one when we detect the memory
143 // accessing kind more precisely.
144 !CI->getFunction()->isPresplitCoroutine();
145 }
146 return isa<CastInst>(Inst) || isa<UnaryOperator>(Inst) ||
147 isa<BinaryOperator>(Inst) || isa<CmpInst>(Inst) ||
148 isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
149 isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
150 isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst) ||
151 isa<FreezeInst>(Inst);
152 }
153};
154
155} // end anonymous namespace
156
157namespace llvm {
158
159template <> struct DenseMapInfo<SimpleValue> {
160 static inline SimpleValue getEmptyKey() {
162 }
163
164 static inline SimpleValue getTombstoneKey() {
166 }
167
168 static unsigned getHashValue(SimpleValue Val);
169 static bool isEqual(SimpleValue LHS, SimpleValue RHS);
170};
171
172} // end namespace llvm
173
174/// Match a 'select' including an optional 'not's of the condition.
176 Value *&B,
177 SelectPatternFlavor &Flavor) {
178 // Return false if V is not even a select.
179 if (!match(V, m_Select(m_Value(Cond), m_Value(A), m_Value(B))))
180 return false;
181
182 // Look through a 'not' of the condition operand by swapping A/B.
183 Value *CondNot;
184 if (match(Cond, m_Not(m_Value(CondNot)))) {
185 Cond = CondNot;
186 std::swap(A, B);
187 }
188
189 // Match canonical forms of min/max. We are not using ValueTracking's
190 // more powerful matchSelectPattern() because it may rely on instruction flags
191 // such as "nsw". That would be incompatible with the current hashing
192 // mechanism that may remove flags to increase the likelihood of CSE.
193
194 Flavor = SPF_UNKNOWN;
195 CmpPredicate Pred;
196
197 if (!match(Cond, m_ICmp(Pred, m_Specific(A), m_Specific(B)))) {
198 // Check for commuted variants of min/max by swapping predicate.
199 // If we do not match the standard or commuted patterns, this is not a
200 // recognized form of min/max, but it is still a select, so return true.
201 if (!match(Cond, m_ICmp(Pred, m_Specific(B), m_Specific(A))))
202 return true;
203 Pred = ICmpInst::getSwappedPredicate(Pred);
204 }
205
206 switch (Pred) {
207 case CmpInst::ICMP_UGT: Flavor = SPF_UMAX; break;
208 case CmpInst::ICMP_ULT: Flavor = SPF_UMIN; break;
209 case CmpInst::ICMP_SGT: Flavor = SPF_SMAX; break;
210 case CmpInst::ICMP_SLT: Flavor = SPF_SMIN; break;
211 // Non-strict inequalities.
212 case CmpInst::ICMP_ULE: Flavor = SPF_UMIN; break;
213 case CmpInst::ICMP_UGE: Flavor = SPF_UMAX; break;
214 case CmpInst::ICMP_SLE: Flavor = SPF_SMIN; break;
215 case CmpInst::ICMP_SGE: Flavor = SPF_SMAX; break;
216 default: break;
217 }
218
219 return true;
220}
221
222static unsigned hashCallInst(CallInst *CI) {
223 // Don't CSE convergent calls in different basic blocks, because they
224 // implicitly depend on the set of threads that is currently executing.
225 if (CI->isConvergent()) {
226 return hash_combine(
227 CI->getOpcode(), CI->getParent(),
229 }
230 return hash_combine(
231 CI->getOpcode(),
233}
234
235static unsigned getHashValueImpl(SimpleValue Val) {
236 Instruction *Inst = Val.Inst;
237 // Hash in all of the operands as pointers.
238 if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst)) {
239 Value *LHS = BinOp->getOperand(0);
240 Value *RHS = BinOp->getOperand(1);
241 if (BinOp->isCommutative() && BinOp->getOperand(0) > BinOp->getOperand(1))
242 std::swap(LHS, RHS);
243
244 return hash_combine(BinOp->getOpcode(), LHS, RHS);
245 }
246
247 if (CmpInst *CI = dyn_cast<CmpInst>(Inst)) {
248 // Compares can be commuted by swapping the comparands and
249 // updating the predicate. Choose the form that has the
250 // comparands in sorted order, or in the case of a tie, the
251 // one with the lower predicate.
252 Value *LHS = CI->getOperand(0);
253 Value *RHS = CI->getOperand(1);
254 CmpInst::Predicate Pred = CI->getPredicate();
255 CmpInst::Predicate SwappedPred = CI->getSwappedPredicate();
256 if (std::tie(LHS, Pred) > std::tie(RHS, SwappedPred)) {
257 std::swap(LHS, RHS);
258 Pred = SwappedPred;
259 }
260 return hash_combine(Inst->getOpcode(), Pred, LHS, RHS);
261 }
262
263 // Hash general selects to allow matching commuted true/false operands.
265 Value *Cond, *A, *B;
266 if (matchSelectWithOptionalNotCond(Inst, Cond, A, B, SPF)) {
267 // Hash min/max (cmp + select) to allow for commuted operands.
268 // Min/max may also have non-canonical compare predicate (eg, the compare for
269 // smin may use 'sgt' rather than 'slt'), and non-canonical operands in the
270 // compare.
271 // TODO: We should also detect FP min/max.
272 if (SPF == SPF_SMIN || SPF == SPF_SMAX ||
273 SPF == SPF_UMIN || SPF == SPF_UMAX) {
274 if (A > B)
275 std::swap(A, B);
276 return hash_combine(Inst->getOpcode(), SPF, A, B);
277 }
278
279 // Hash general selects to allow matching commuted true/false operands.
280
281 // If we do not have a compare as the condition, just hash in the condition.
282 CmpPredicate Pred;
283 Value *X, *Y;
284 if (!match(Cond, m_Cmp(Pred, m_Value(X), m_Value(Y))))
285 return hash_combine(Inst->getOpcode(), Cond, A, B);
286
287 // Similar to cmp normalization (above) - canonicalize the predicate value:
288 // select (icmp Pred, X, Y), A, B --> select (icmp InvPred, X, Y), B, A
289 if (CmpInst::getInversePredicate(Pred) < Pred) {
290 Pred = CmpInst::getInversePredicate(Pred);
291 std::swap(A, B);
292 }
293 return hash_combine(Inst->getOpcode(),
294 static_cast<CmpInst::Predicate>(Pred), X, Y, A, B);
295 }
296
297 if (CastInst *CI = dyn_cast<CastInst>(Inst))
298 return hash_combine(CI->getOpcode(), CI->getType(), CI->getOperand(0));
299
300 if (FreezeInst *FI = dyn_cast<FreezeInst>(Inst))
301 return hash_combine(FI->getOpcode(), FI->getOperand(0));
302
303 if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Inst))
304 return hash_combine(EVI->getOpcode(), EVI->getOperand(0),
305 hash_combine_range(EVI->idx_begin(), EVI->idx_end()));
306
307 if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(Inst))
308 return hash_combine(IVI->getOpcode(), IVI->getOperand(0),
309 IVI->getOperand(1),
310 hash_combine_range(IVI->idx_begin(), IVI->idx_end()));
311
312 assert((isa<CallInst>(Inst) || isa<ExtractElementInst>(Inst) ||
313 isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
314 isa<UnaryOperator>(Inst) || isa<FreezeInst>(Inst)) &&
315 "Invalid/unknown instruction");
316
317 // Handle intrinsics with commutative operands.
318 auto *II = dyn_cast<IntrinsicInst>(Inst);
319 if (II && II->isCommutative() && II->arg_size() >= 2) {
320 Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
321 if (LHS > RHS)
322 std::swap(LHS, RHS);
323 return hash_combine(
324 II->getOpcode(), LHS, RHS,
325 hash_combine_range(II->value_op_begin() + 2, II->value_op_end()));
326 }
327
328 // gc.relocate is 'special' call: its second and third operands are
329 // not real values, but indices into statepoint's argument list.
330 // Get values they point to.
331 if (const GCRelocateInst *GCR = dyn_cast<GCRelocateInst>(Inst))
332 return hash_combine(GCR->getOpcode(), GCR->getOperand(0),
333 GCR->getBasePtr(), GCR->getDerivedPtr());
334
335 // Don't CSE convergent calls in different basic blocks, because they
336 // implicitly depend on the set of threads that is currently executing.
337 if (CallInst *CI = dyn_cast<CallInst>(Inst))
338 return hashCallInst(CI);
339
340 // Mix in the opcode.
341 return hash_combine(
342 Inst->getOpcode(),
344}
345
346unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
347#ifndef NDEBUG
348 // If -earlycse-debug-hash was specified, return a constant -- this
349 // will force all hashing to collide, so we'll exhaustively search
350 // the table for a match, and the assertion in isEqual will fire if
351 // there's a bug causing equal keys to hash differently.
353 return 0;
354#endif
355 return getHashValueImpl(Val);
356}
357
358static bool isEqualImpl(SimpleValue LHS, SimpleValue RHS) {
359 Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
360
361 if (LHS.isSentinel() || RHS.isSentinel())
362 return LHSI == RHSI;
363
364 if (LHSI->getOpcode() != RHSI->getOpcode())
365 return false;
366 if (LHSI->isIdenticalToWhenDefined(RHSI, /*IntersectAttrs=*/true)) {
367 // Convergent calls implicitly depend on the set of threads that is
368 // currently executing, so conservatively return false if they are in
369 // different basic blocks.
370 if (CallInst *CI = dyn_cast<CallInst>(LHSI);
371 CI && CI->isConvergent() && LHSI->getParent() != RHSI->getParent())
372 return false;
373
374 return true;
375 }
376
377 // If we're not strictly identical, we still might be a commutable instruction
378 if (BinaryOperator *LHSBinOp = dyn_cast<BinaryOperator>(LHSI)) {
379 if (!LHSBinOp->isCommutative())
380 return false;
381
382 assert(isa<BinaryOperator>(RHSI) &&
383 "same opcode, but different instruction type?");
384 BinaryOperator *RHSBinOp = cast<BinaryOperator>(RHSI);
385
386 // Commuted equality
387 return LHSBinOp->getOperand(0) == RHSBinOp->getOperand(1) &&
388 LHSBinOp->getOperand(1) == RHSBinOp->getOperand(0);
389 }
390 if (CmpInst *LHSCmp = dyn_cast<CmpInst>(LHSI)) {
391 assert(isa<CmpInst>(RHSI) &&
392 "same opcode, but different instruction type?");
393 CmpInst *RHSCmp = cast<CmpInst>(RHSI);
394 // Commuted equality
395 return LHSCmp->getOperand(0) == RHSCmp->getOperand(1) &&
396 LHSCmp->getOperand(1) == RHSCmp->getOperand(0) &&
397 LHSCmp->getSwappedPredicate() == RHSCmp->getPredicate();
398 }
399
400 auto *LII = dyn_cast<IntrinsicInst>(LHSI);
401 auto *RII = dyn_cast<IntrinsicInst>(RHSI);
402 if (LII && RII && LII->getIntrinsicID() == RII->getIntrinsicID() &&
403 LII->isCommutative() && LII->arg_size() >= 2) {
404 return LII->getArgOperand(0) == RII->getArgOperand(1) &&
405 LII->getArgOperand(1) == RII->getArgOperand(0) &&
406 std::equal(LII->arg_begin() + 2, LII->arg_end(),
407 RII->arg_begin() + 2, RII->arg_end());
408 }
409
410 // See comment above in `getHashValue()`.
411 if (const GCRelocateInst *GCR1 = dyn_cast<GCRelocateInst>(LHSI))
412 if (const GCRelocateInst *GCR2 = dyn_cast<GCRelocateInst>(RHSI))
413 return GCR1->getOperand(0) == GCR2->getOperand(0) &&
414 GCR1->getBasePtr() == GCR2->getBasePtr() &&
415 GCR1->getDerivedPtr() == GCR2->getDerivedPtr();
416
417 // Min/max can occur with commuted operands, non-canonical predicates,
418 // and/or non-canonical operands.
419 // Selects can be non-trivially equivalent via inverted conditions and swaps.
420 SelectPatternFlavor LSPF, RSPF;
421 Value *CondL, *CondR, *LHSA, *RHSA, *LHSB, *RHSB;
422 if (matchSelectWithOptionalNotCond(LHSI, CondL, LHSA, LHSB, LSPF) &&
423 matchSelectWithOptionalNotCond(RHSI, CondR, RHSA, RHSB, RSPF)) {
424 if (LSPF == RSPF) {
425 // TODO: We should also detect FP min/max.
426 if (LSPF == SPF_SMIN || LSPF == SPF_SMAX ||
427 LSPF == SPF_UMIN || LSPF == SPF_UMAX)
428 return ((LHSA == RHSA && LHSB == RHSB) ||
429 (LHSA == RHSB && LHSB == RHSA));
430
431 // select Cond, A, B <--> select not(Cond), B, A
432 if (CondL == CondR && LHSA == RHSA && LHSB == RHSB)
433 return true;
434 }
435
436 // If the true/false operands are swapped and the conditions are compares
437 // with inverted predicates, the selects are equal:
438 // select (icmp Pred, X, Y), A, B <--> select (icmp InvPred, X, Y), B, A
439 //
440 // This also handles patterns with a double-negation in the sense of not +
441 // inverse, because we looked through a 'not' in the matching function and
442 // swapped A/B:
443 // select (cmp Pred, X, Y), A, B <--> select (not (cmp InvPred, X, Y)), B, A
444 //
445 // This intentionally does NOT handle patterns with a double-negation in
446 // the sense of not + not, because doing so could result in values
447 // comparing
448 // as equal that hash differently in the min/max cases like:
449 // select (cmp slt, X, Y), X, Y <--> select (not (not (cmp slt, X, Y))), X, Y
450 // ^ hashes as min ^ would not hash as min
451 // In the context of the EarlyCSE pass, however, such cases never reach
452 // this code, as we simplify the double-negation before hashing the second
453 // select (and so still succeed at CSEing them).
454 if (LHSA == RHSB && LHSB == RHSA) {
455 CmpPredicate PredL, PredR;
456 Value *X, *Y;
457 if (match(CondL, m_Cmp(PredL, m_Value(X), m_Value(Y))) &&
458 match(CondR, m_Cmp(PredR, m_Specific(X), m_Specific(Y))) &&
459 CmpInst::getInversePredicate(PredL) == PredR)
460 return true;
461 }
462 }
463
464 return false;
465}
466
467bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
468 // These comparisons are nontrivial, so assert that equality implies
469 // hash equality (DenseMap demands this as an invariant).
470 bool Result = isEqualImpl(LHS, RHS);
471 assert(!Result || (LHS.isSentinel() && LHS.Inst == RHS.Inst) ||
473 return Result;
474}
475
476//===----------------------------------------------------------------------===//
477// CallValue
478//===----------------------------------------------------------------------===//
479
480namespace {
481
482/// Struct representing the available call values in the scoped hash
483/// table.
484struct CallValue {
485 Instruction *Inst;
486
487 CallValue(Instruction *I) : Inst(I) {
488 assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
489 }
490
491 bool isSentinel() const {
494 }
495
496 static bool canHandle(Instruction *Inst) {
497 // Don't value number anything that returns void.
498 if (Inst->getType()->isVoidTy())
499 return false;
500
501 CallInst *CI = dyn_cast<CallInst>(Inst);
502 if (!CI || !CI->onlyReadsMemory() ||
503 // FIXME: Currently the calls which may access the thread id may
504 // be considered as not accessing the memory. But this is
505 // problematic for coroutines, since coroutines may resume in a
506 // different thread. So we disable the optimization here for the
507 // correctness. However, it may block many other correct
508 // optimizations. Revert this one when we detect the memory
509 // accessing kind more precisely.
511 return false;
512 return true;
513 }
514};
515
516} // end anonymous namespace
517
518namespace llvm {
519
520template <> struct DenseMapInfo<CallValue> {
521 static inline CallValue getEmptyKey() {
523 }
524
525 static inline CallValue getTombstoneKey() {
527 }
528
529 static unsigned getHashValue(CallValue Val);
530 static bool isEqual(CallValue LHS, CallValue RHS);
531};
532
533} // end namespace llvm
534
535unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
536 Instruction *Inst = Val.Inst;
537
538 // Hash all of the operands as pointers and mix in the opcode.
539 return hashCallInst(cast<CallInst>(Inst));
540}
541
542bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
543 if (LHS.isSentinel() || RHS.isSentinel())
544 return LHS.Inst == RHS.Inst;
545
546 CallInst *LHSI = cast<CallInst>(LHS.Inst);
547 CallInst *RHSI = cast<CallInst>(RHS.Inst);
548
549 // Convergent calls implicitly depend on the set of threads that is
550 // currently executing, so conservatively return false if they are in
551 // different basic blocks.
552 if (LHSI->isConvergent() && LHSI->getParent() != RHSI->getParent())
553 return false;
554
555 return LHSI->isIdenticalToWhenDefined(RHSI, /*IntersectAttrs=*/true);
556}
557
558//===----------------------------------------------------------------------===//
559// GEPValue
560//===----------------------------------------------------------------------===//
561
562namespace {
563
564struct GEPValue {
565 Instruction *Inst;
566 std::optional<int64_t> ConstantOffset;
567
568 GEPValue(Instruction *I) : Inst(I) {
569 assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
570 }
571
572 GEPValue(Instruction *I, std::optional<int64_t> ConstantOffset)
573 : Inst(I), ConstantOffset(ConstantOffset) {
574 assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
575 }
576
577 bool isSentinel() const {
580 }
581
582 static bool canHandle(Instruction *Inst) {
583 return isa<GetElementPtrInst>(Inst);
584 }
585};
586
587} // namespace
588
589namespace llvm {
590
591template <> struct DenseMapInfo<GEPValue> {
592 static inline GEPValue getEmptyKey() {
594 }
595
596 static inline GEPValue getTombstoneKey() {
598 }
599
600 static unsigned getHashValue(const GEPValue &Val);
601 static bool isEqual(const GEPValue &LHS, const GEPValue &RHS);
602};
603
604} // end namespace llvm
605
606unsigned DenseMapInfo<GEPValue>::getHashValue(const GEPValue &Val) {
607 auto *GEP = cast<GetElementPtrInst>(Val.Inst);
608 if (Val.ConstantOffset.has_value())
609 return hash_combine(GEP->getOpcode(), GEP->getPointerOperand(),
610 Val.ConstantOffset.value());
611 return hash_combine(
612 GEP->getOpcode(),
613 hash_combine_range(GEP->value_op_begin(), GEP->value_op_end()));
614}
615
616bool DenseMapInfo<GEPValue>::isEqual(const GEPValue &LHS, const GEPValue &RHS) {
617 if (LHS.isSentinel() || RHS.isSentinel())
618 return LHS.Inst == RHS.Inst;
619 auto *LGEP = cast<GetElementPtrInst>(LHS.Inst);
620 auto *RGEP = cast<GetElementPtrInst>(RHS.Inst);
621 if (LGEP->getPointerOperand() != RGEP->getPointerOperand())
622 return false;
623 if (LHS.ConstantOffset.has_value() && RHS.ConstantOffset.has_value())
624 return LHS.ConstantOffset.value() == RHS.ConstantOffset.value();
625 return LGEP->isIdenticalToWhenDefined(RGEP);
626}
627
628//===----------------------------------------------------------------------===//
629// EarlyCSE implementation
630//===----------------------------------------------------------------------===//
631
632namespace {
633
634/// A simple and fast domtree-based CSE pass.
635///
636/// This pass does a simple depth-first walk over the dominator tree,
637/// eliminating trivially redundant instructions and using instsimplify to
638/// canonicalize things as it goes. It is intended to be fast and catch obvious
639/// cases so that instcombine and other passes are more effective. It is
640/// expected that a later pass of GVN will catch the interesting/hard cases.
641class EarlyCSE {
642public:
643 const TargetLibraryInfo &TLI;
645 DominatorTree &DT;
646 AssumptionCache &AC;
647 const SimplifyQuery SQ;
648 MemorySSA *MSSA;
649 std::unique_ptr<MemorySSAUpdater> MSSAUpdater;
650
651 using AllocatorTy =
654 using ScopedHTType =
656 AllocatorTy>;
657
658 /// A scoped hash table of the current values of all of our simple
659 /// scalar expressions.
660 ///
661 /// As we walk down the domtree, we look to see if instructions are in this:
662 /// if so, we replace them with what we find, otherwise we insert them so
663 /// that dominated values can succeed in their lookup.
664 ScopedHTType AvailableValues;
665
666 /// A scoped hash table of the current values of previously encountered
667 /// memory locations.
668 ///
669 /// This allows us to get efficient access to dominating loads or stores when
670 /// we have a fully redundant load. In addition to the most recent load, we
671 /// keep track of a generation count of the read, which is compared against
672 /// the current generation count. The current generation count is incremented
673 /// after every possibly writing memory operation, which ensures that we only
674 /// CSE loads with other loads that have no intervening store. Ordering
675 /// events (such as fences or atomic instructions) increment the generation
676 /// count as well; essentially, we model these as writes to all possible
677 /// locations. Note that atomic and/or volatile loads and stores can be
678 /// present the table; it is the responsibility of the consumer to inspect
679 /// the atomicity/volatility if needed.
680 struct LoadValue {
681 Instruction *DefInst = nullptr;
682 unsigned Generation = 0;
683 int MatchingId = -1;
684 bool IsAtomic = false;
685 bool IsLoad = false;
686
687 LoadValue() = default;
688 LoadValue(Instruction *Inst, unsigned Generation, unsigned MatchingId,
689 bool IsAtomic, bool IsLoad)
690 : DefInst(Inst), Generation(Generation), MatchingId(MatchingId),
691 IsAtomic(IsAtomic), IsLoad(IsLoad) {}
692 };
693
694 using LoadMapAllocator =
697 using LoadHTType =
699 LoadMapAllocator>;
700
701 LoadHTType AvailableLoads;
702
703 // A scoped hash table mapping memory locations (represented as typed
704 // addresses) to generation numbers at which that memory location became
705 // (henceforth indefinitely) invariant.
706 using InvariantMapAllocator =
709 using InvariantHTType =
711 InvariantMapAllocator>;
712 InvariantHTType AvailableInvariants;
713
714 /// A scoped hash table of the current values of read-only call
715 /// values.
716 ///
717 /// It uses the same generation count as loads.
718 using CallHTType =
720 CallHTType AvailableCalls;
721
722 using GEPMapAllocatorTy =
726 GEPMapAllocatorTy>;
727 GEPHTType AvailableGEPs;
728
729 /// This is the current generation of the memory value.
730 unsigned CurrentGeneration = 0;
731
732 /// Set up the EarlyCSE runner for a particular function.
733 EarlyCSE(const DataLayout &DL, const TargetLibraryInfo &TLI,
735 AssumptionCache &AC, MemorySSA *MSSA)
736 : TLI(TLI), TTI(TTI), DT(DT), AC(AC), SQ(DL, &TLI, &DT, &AC), MSSA(MSSA),
737 MSSAUpdater(std::make_unique<MemorySSAUpdater>(MSSA)) {}
738
739 bool run();
740
741private:
742 unsigned ClobberCounter = 0;
743 // Almost a POD, but needs to call the constructors for the scoped hash
744 // tables so that a new scope gets pushed on. These are RAII so that the
745 // scope gets popped when the NodeScope is destroyed.
746 class NodeScope {
747 public:
748 NodeScope(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
749 InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls,
750 GEPHTType &AvailableGEPs)
751 : Scope(AvailableValues), LoadScope(AvailableLoads),
752 InvariantScope(AvailableInvariants), CallScope(AvailableCalls),
753 GEPScope(AvailableGEPs) {}
754 NodeScope(const NodeScope &) = delete;
755 NodeScope &operator=(const NodeScope &) = delete;
756
757 private:
759 LoadHTType::ScopeTy LoadScope;
760 InvariantHTType::ScopeTy InvariantScope;
761 CallHTType::ScopeTy CallScope;
762 GEPHTType::ScopeTy GEPScope;
763 };
764
765 // Contains all the needed information to create a stack for doing a depth
766 // first traversal of the tree. This includes scopes for values, loads, and
767 // calls as well as the generation. There is a child iterator so that the
768 // children do not need to be store separately.
769 class StackNode {
770 public:
771 StackNode(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
772 InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls,
773 GEPHTType &AvailableGEPs, unsigned cg, DomTreeNode *n,
776 : CurrentGeneration(cg), ChildGeneration(cg), Node(n), ChildIter(child),
777 EndIter(end),
778 Scopes(AvailableValues, AvailableLoads, AvailableInvariants,
779 AvailableCalls, AvailableGEPs) {}
780 StackNode(const StackNode &) = delete;
781 StackNode &operator=(const StackNode &) = delete;
782
783 // Accessors.
784 unsigned currentGeneration() const { return CurrentGeneration; }
785 unsigned childGeneration() const { return ChildGeneration; }
786 void childGeneration(unsigned generation) { ChildGeneration = generation; }
787 DomTreeNode *node() { return Node; }
788 DomTreeNode::const_iterator childIter() const { return ChildIter; }
789
790 DomTreeNode *nextChild() {
791 DomTreeNode *child = *ChildIter;
792 ++ChildIter;
793 return child;
794 }
795
796 DomTreeNode::const_iterator end() const { return EndIter; }
797 bool isProcessed() const { return Processed; }
798 void process() { Processed = true; }
799
800 private:
801 unsigned CurrentGeneration;
802 unsigned ChildGeneration;
806 NodeScope Scopes;
807 bool Processed = false;
808 };
809
810 /// Wrapper class to handle memory instructions, including loads,
811 /// stores and intrinsic loads and stores defined by the target.
812 class ParseMemoryInst {
813 public:
814 ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI)
815 : Inst(Inst) {
816 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
817 IntrID = II->getIntrinsicID();
818 if (TTI.getTgtMemIntrinsic(II, Info))
819 return;
820 if (isHandledNonTargetIntrinsic(IntrID)) {
821 switch (IntrID) {
822 case Intrinsic::masked_load:
823 Info.PtrVal = Inst->getOperand(0);
824 Info.MatchingId = Intrinsic::masked_load;
825 Info.ReadMem = true;
826 Info.WriteMem = false;
827 Info.IsVolatile = false;
828 break;
829 case Intrinsic::masked_store:
830 Info.PtrVal = Inst->getOperand(1);
831 // Use the ID of masked load as the "matching id". This will
832 // prevent matching non-masked loads/stores with masked ones
833 // (which could be done), but at the moment, the code here
834 // does not support matching intrinsics with non-intrinsics,
835 // so keep the MatchingIds specific to masked instructions
836 // for now (TODO).
837 Info.MatchingId = Intrinsic::masked_load;
838 Info.ReadMem = false;
839 Info.WriteMem = true;
840 Info.IsVolatile = false;
841 break;
842 }
843 }
844 }
845 }
846
847 Instruction *get() { return Inst; }
848 const Instruction *get() const { return Inst; }
849
850 bool isLoad() const {
851 if (IntrID != 0)
852 return Info.ReadMem;
853 return isa<LoadInst>(Inst);
854 }
855
856 bool isStore() const {
857 if (IntrID != 0)
858 return Info.WriteMem;
859 return isa<StoreInst>(Inst);
860 }
861
862 bool isAtomic() const {
863 if (IntrID != 0)
864 return Info.Ordering != AtomicOrdering::NotAtomic;
865 return Inst->isAtomic();
866 }
867
868 bool isUnordered() const {
869 if (IntrID != 0)
870 return Info.isUnordered();
871
872 if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
873 return LI->isUnordered();
874 } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
875 return SI->isUnordered();
876 }
877 // Conservative answer
878 return !Inst->isAtomic();
879 }
880
881 bool isVolatile() const {
882 if (IntrID != 0)
883 return Info.IsVolatile;
884
885 if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
886 return LI->isVolatile();
887 } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
888 return SI->isVolatile();
889 }
890 // Conservative answer
891 return true;
892 }
893
894 bool isInvariantLoad() const {
895 if (auto *LI = dyn_cast<LoadInst>(Inst))
896 return LI->hasMetadata(LLVMContext::MD_invariant_load);
897 return false;
898 }
899
900 bool isValid() const { return getPointerOperand() != nullptr; }
901
902 // For regular (non-intrinsic) loads/stores, this is set to -1. For
903 // intrinsic loads/stores, the id is retrieved from the corresponding
904 // field in the MemIntrinsicInfo structure. That field contains
905 // non-negative values only.
906 int getMatchingId() const {
907 if (IntrID != 0)
908 return Info.MatchingId;
909 return -1;
910 }
911
912 Value *getPointerOperand() const {
913 if (IntrID != 0)
914 return Info.PtrVal;
915 return getLoadStorePointerOperand(Inst);
916 }
917
918 Type *getValueType() const {
919 // TODO: handle target-specific intrinsics.
920 return Inst->getAccessType();
921 }
922
923 bool mayReadFromMemory() const {
924 if (IntrID != 0)
925 return Info.ReadMem;
926 return Inst->mayReadFromMemory();
927 }
928
929 bool mayWriteToMemory() const {
930 if (IntrID != 0)
931 return Info.WriteMem;
932 return Inst->mayWriteToMemory();
933 }
934
935 private:
936 Intrinsic::ID IntrID = 0;
938 Instruction *Inst;
939 };
940
941 // This function is to prevent accidentally passing a non-target
942 // intrinsic ID to TargetTransformInfo.
943 static bool isHandledNonTargetIntrinsic(Intrinsic::ID ID) {
944 switch (ID) {
945 case Intrinsic::masked_load:
946 case Intrinsic::masked_store:
947 return true;
948 }
949 return false;
950 }
951 static bool isHandledNonTargetIntrinsic(const Value *V) {
952 if (auto *II = dyn_cast<IntrinsicInst>(V))
953 return isHandledNonTargetIntrinsic(II->getIntrinsicID());
954 return false;
955 }
956
957 bool processNode(DomTreeNode *Node);
958
959 bool handleBranchCondition(Instruction *CondInst, const BranchInst *BI,
960 const BasicBlock *BB, const BasicBlock *Pred);
961
962 Value *getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst,
963 unsigned CurrentGeneration);
964
965 bool overridingStores(const ParseMemoryInst &Earlier,
966 const ParseMemoryInst &Later);
967
968 Value *getOrCreateResult(Instruction *Inst, Type *ExpectedType) const {
969 // TODO: We could insert relevant casts on type mismatch.
970 // The load or the store's first operand.
971 Value *V;
972 if (auto *II = dyn_cast<IntrinsicInst>(Inst)) {
973 switch (II->getIntrinsicID()) {
974 case Intrinsic::masked_load:
975 V = II;
976 break;
977 case Intrinsic::masked_store:
978 V = II->getOperand(0);
979 break;
980 default:
981 return TTI.getOrCreateResultFromMemIntrinsic(II, ExpectedType);
982 }
983 } else {
984 V = isa<LoadInst>(Inst) ? Inst : cast<StoreInst>(Inst)->getValueOperand();
985 }
986
987 return V->getType() == ExpectedType ? V : nullptr;
988 }
989
990 /// Return true if the instruction is known to only operate on memory
991 /// provably invariant in the given "generation".
992 bool isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt);
993
994 bool isSameMemGeneration(unsigned EarlierGeneration, unsigned LaterGeneration,
995 Instruction *EarlierInst, Instruction *LaterInst);
996
997 bool isNonTargetIntrinsicMatch(const IntrinsicInst *Earlier,
998 const IntrinsicInst *Later) {
999 auto IsSubmask = [](const Value *Mask0, const Value *Mask1) {
1000 // Is Mask0 a submask of Mask1?
1001 if (Mask0 == Mask1)
1002 return true;
1003 if (isa<UndefValue>(Mask0) || isa<UndefValue>(Mask1))
1004 return false;
1005 auto *Vec0 = dyn_cast<ConstantVector>(Mask0);
1006 auto *Vec1 = dyn_cast<ConstantVector>(Mask1);
1007 if (!Vec0 || !Vec1)
1008 return false;
1009 if (Vec0->getType() != Vec1->getType())
1010 return false;
1011 for (int i = 0, e = Vec0->getNumOperands(); i != e; ++i) {
1012 Constant *Elem0 = Vec0->getOperand(i);
1013 Constant *Elem1 = Vec1->getOperand(i);
1014 auto *Int0 = dyn_cast<ConstantInt>(Elem0);
1015 if (Int0 && Int0->isZero())
1016 continue;
1017 auto *Int1 = dyn_cast<ConstantInt>(Elem1);
1018 if (Int1 && !Int1->isZero())
1019 continue;
1020 if (isa<UndefValue>(Elem0) || isa<UndefValue>(Elem1))
1021 return false;
1022 if (Elem0 == Elem1)
1023 continue;
1024 return false;
1025 }
1026 return true;
1027 };
1028 auto PtrOp = [](const IntrinsicInst *II) {
1029 if (II->getIntrinsicID() == Intrinsic::masked_load)
1030 return II->getOperand(0);
1031 if (II->getIntrinsicID() == Intrinsic::masked_store)
1032 return II->getOperand(1);
1033 llvm_unreachable("Unexpected IntrinsicInst");
1034 };
1035 auto MaskOp = [](const IntrinsicInst *II) {
1036 if (II->getIntrinsicID() == Intrinsic::masked_load)
1037 return II->getOperand(2);
1038 if (II->getIntrinsicID() == Intrinsic::masked_store)
1039 return II->getOperand(3);
1040 llvm_unreachable("Unexpected IntrinsicInst");
1041 };
1042 auto ThruOp = [](const IntrinsicInst *II) {
1043 if (II->getIntrinsicID() == Intrinsic::masked_load)
1044 return II->getOperand(3);
1045 llvm_unreachable("Unexpected IntrinsicInst");
1046 };
1047
1048 if (PtrOp(Earlier) != PtrOp(Later))
1049 return false;
1050
1051 Intrinsic::ID IDE = Earlier->getIntrinsicID();
1052 Intrinsic::ID IDL = Later->getIntrinsicID();
1053 // We could really use specific intrinsic classes for masked loads
1054 // and stores in IntrinsicInst.h.
1055 if (IDE == Intrinsic::masked_load && IDL == Intrinsic::masked_load) {
1056 // Trying to replace later masked load with the earlier one.
1057 // Check that the pointers are the same, and
1058 // - masks and pass-throughs are the same, or
1059 // - replacee's pass-through is "undef" and replacer's mask is a
1060 // super-set of the replacee's mask.
1061 if (MaskOp(Earlier) == MaskOp(Later) && ThruOp(Earlier) == ThruOp(Later))
1062 return true;
1063 if (!isa<UndefValue>(ThruOp(Later)))
1064 return false;
1065 return IsSubmask(MaskOp(Later), MaskOp(Earlier));
1066 }
1067 if (IDE == Intrinsic::masked_store && IDL == Intrinsic::masked_load) {
1068 // Trying to replace a load of a stored value with the store's value.
1069 // Check that the pointers are the same, and
1070 // - load's mask is a subset of store's mask, and
1071 // - load's pass-through is "undef".
1072 if (!IsSubmask(MaskOp(Later), MaskOp(Earlier)))
1073 return false;
1074 return isa<UndefValue>(ThruOp(Later));
1075 }
1076 if (IDE == Intrinsic::masked_load && IDL == Intrinsic::masked_store) {
1077 // Trying to remove a store of the loaded value.
1078 // Check that the pointers are the same, and
1079 // - store's mask is a subset of the load's mask.
1080 return IsSubmask(MaskOp(Later), MaskOp(Earlier));
1081 }
1082 if (IDE == Intrinsic::masked_store && IDL == Intrinsic::masked_store) {
1083 // Trying to remove a dead store (earlier).
1084 // Check that the pointers are the same,
1085 // - the to-be-removed store's mask is a subset of the other store's
1086 // mask.
1087 return IsSubmask(MaskOp(Earlier), MaskOp(Later));
1088 }
1089 return false;
1090 }
1091
1092 void removeMSSA(Instruction &Inst) {
1093 if (!MSSA)
1094 return;
1095 if (VerifyMemorySSA)
1096 MSSA->verifyMemorySSA();
1097 // Removing a store here can leave MemorySSA in an unoptimized state by
1098 // creating MemoryPhis that have identical arguments and by creating
1099 // MemoryUses whose defining access is not an actual clobber. The phi case
1100 // is handled by MemorySSA when passing OptimizePhis = true to
1101 // removeMemoryAccess. The non-optimized MemoryUse case is lazily updated
1102 // by MemorySSA's getClobberingMemoryAccess.
1103 MSSAUpdater->removeMemoryAccess(&Inst, true);
1104 }
1105};
1106
1107} // end anonymous namespace
1108
1109/// Determine if the memory referenced by LaterInst is from the same heap
1110/// version as EarlierInst.
1111/// This is currently called in two scenarios:
1112///
1113/// load p
1114/// ...
1115/// load p
1116///
1117/// and
1118///
1119/// x = load p
1120/// ...
1121/// store x, p
1122///
1123/// in both cases we want to verify that there are no possible writes to the
1124/// memory referenced by p between the earlier and later instruction.
1125bool EarlyCSE::isSameMemGeneration(unsigned EarlierGeneration,
1126 unsigned LaterGeneration,
1127 Instruction *EarlierInst,
1128 Instruction *LaterInst) {
1129 // Check the simple memory generation tracking first.
1130 if (EarlierGeneration == LaterGeneration)
1131 return true;
1132
1133 if (!MSSA)
1134 return false;
1135
1136 // If MemorySSA has determined that one of EarlierInst or LaterInst does not
1137 // read/write memory, then we can safely return true here.
1138 // FIXME: We could be more aggressive when checking doesNotAccessMemory(),
1139 // onlyReadsMemory(), mayReadFromMemory(), and mayWriteToMemory() in this pass
1140 // by also checking the MemorySSA MemoryAccess on the instruction. Initial
1141 // experiments suggest this isn't worthwhile, at least for C/C++ code compiled
1142 // with the default optimization pipeline.
1143 auto *EarlierMA = MSSA->getMemoryAccess(EarlierInst);
1144 if (!EarlierMA)
1145 return true;
1146 auto *LaterMA = MSSA->getMemoryAccess(LaterInst);
1147 if (!LaterMA)
1148 return true;
1149
1150 // Since we know LaterDef dominates LaterInst and EarlierInst dominates
1151 // LaterInst, if LaterDef dominates EarlierInst then it can't occur between
1152 // EarlierInst and LaterInst and neither can any other write that potentially
1153 // clobbers LaterInst.
1154 MemoryAccess *LaterDef;
1155 if (ClobberCounter < EarlyCSEMssaOptCap) {
1156 LaterDef = MSSA->getWalker()->getClobberingMemoryAccess(LaterInst);
1157 ClobberCounter++;
1158 } else
1159 LaterDef = LaterMA->getDefiningAccess();
1160
1161 return MSSA->dominates(LaterDef, EarlierMA);
1162}
1163
1164bool EarlyCSE::isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt) {
1165 // A location loaded from with an invariant_load is assumed to *never* change
1166 // within the visible scope of the compilation.
1167 if (auto *LI = dyn_cast<LoadInst>(I))
1168 if (LI->hasMetadata(LLVMContext::MD_invariant_load))
1169 return true;
1170
1171 auto MemLocOpt = MemoryLocation::getOrNone(I);
1172 if (!MemLocOpt)
1173 // "target" intrinsic forms of loads aren't currently known to
1174 // MemoryLocation::get. TODO
1175 return false;
1176 MemoryLocation MemLoc = *MemLocOpt;
1177 if (!AvailableInvariants.count(MemLoc))
1178 return false;
1179
1180 // Is the generation at which this became invariant older than the
1181 // current one?
1182 return AvailableInvariants.lookup(MemLoc) <= GenAt;
1183}
1184
1185bool EarlyCSE::handleBranchCondition(Instruction *CondInst,
1186 const BranchInst *BI, const BasicBlock *BB,
1187 const BasicBlock *Pred) {
1188 assert(BI->isConditional() && "Should be a conditional branch!");
1189 assert(BI->getCondition() == CondInst && "Wrong condition?");
1190 assert(BI->getSuccessor(0) == BB || BI->getSuccessor(1) == BB);
1191 auto *TorF = (BI->getSuccessor(0) == BB)
1194 auto MatchBinOp = [](Instruction *I, unsigned Opcode, Value *&LHS,
1195 Value *&RHS) {
1196 if (Opcode == Instruction::And &&
1197 match(I, m_LogicalAnd(m_Value(LHS), m_Value(RHS))))
1198 return true;
1199 else if (Opcode == Instruction::Or &&
1200 match(I, m_LogicalOr(m_Value(LHS), m_Value(RHS))))
1201 return true;
1202 return false;
1203 };
1204 // If the condition is AND operation, we can propagate its operands into the
1205 // true branch. If it is OR operation, we can propagate them into the false
1206 // branch.
1207 unsigned PropagateOpcode =
1208 (BI->getSuccessor(0) == BB) ? Instruction::And : Instruction::Or;
1209
1210 bool MadeChanges = false;
1213 WorkList.push_back(CondInst);
1214 while (!WorkList.empty()) {
1215 Instruction *Curr = WorkList.pop_back_val();
1216
1217 AvailableValues.insert(Curr, TorF);
1218 LLVM_DEBUG(dbgs() << "EarlyCSE CVP: Add conditional value for '"
1219 << Curr->getName() << "' as " << *TorF << " in "
1220 << BB->getName() << "\n");
1221 if (!DebugCounter::shouldExecute(CSECounter)) {
1222 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1223 } else {
1224 // Replace all dominated uses with the known value.
1225 if (unsigned Count = replaceDominatedUsesWith(Curr, TorF, DT,
1226 BasicBlockEdge(Pred, BB))) {
1227 NumCSECVP += Count;
1228 MadeChanges = true;
1229 }
1230 }
1231
1232 Value *LHS, *RHS;
1233 if (MatchBinOp(Curr, PropagateOpcode, LHS, RHS))
1234 for (auto *Op : { LHS, RHS })
1235 if (Instruction *OPI = dyn_cast<Instruction>(Op))
1236 if (SimpleValue::canHandle(OPI) && Visited.insert(OPI).second)
1237 WorkList.push_back(OPI);
1238 }
1239
1240 return MadeChanges;
1241}
1242
1243Value *EarlyCSE::getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst,
1244 unsigned CurrentGeneration) {
1245 if (InVal.DefInst == nullptr)
1246 return nullptr;
1247 if (InVal.MatchingId != MemInst.getMatchingId())
1248 return nullptr;
1249 // We don't yet handle removing loads with ordering of any kind.
1250 if (MemInst.isVolatile() || !MemInst.isUnordered())
1251 return nullptr;
1252 // We can't replace an atomic load with one which isn't also atomic.
1253 if (MemInst.isLoad() && !InVal.IsAtomic && MemInst.isAtomic())
1254 return nullptr;
1255 // The value V returned from this function is used differently depending
1256 // on whether MemInst is a load or a store. If it's a load, we will replace
1257 // MemInst with V, if it's a store, we will check if V is the same as the
1258 // available value.
1259 bool MemInstMatching = !MemInst.isLoad();
1260 Instruction *Matching = MemInstMatching ? MemInst.get() : InVal.DefInst;
1261 Instruction *Other = MemInstMatching ? InVal.DefInst : MemInst.get();
1262
1263 // For stores check the result values before checking memory generation
1264 // (otherwise isSameMemGeneration may crash).
1265 Value *Result = MemInst.isStore()
1266 ? getOrCreateResult(Matching, Other->getType())
1267 : nullptr;
1268 if (MemInst.isStore() && InVal.DefInst != Result)
1269 return nullptr;
1270
1271 // Deal with non-target memory intrinsics.
1272 bool MatchingNTI = isHandledNonTargetIntrinsic(Matching);
1273 bool OtherNTI = isHandledNonTargetIntrinsic(Other);
1274 if (OtherNTI != MatchingNTI)
1275 return nullptr;
1276 if (OtherNTI && MatchingNTI) {
1277 if (!isNonTargetIntrinsicMatch(cast<IntrinsicInst>(InVal.DefInst),
1278 cast<IntrinsicInst>(MemInst.get())))
1279 return nullptr;
1280 }
1281
1282 if (!isOperatingOnInvariantMemAt(MemInst.get(), InVal.Generation) &&
1283 !isSameMemGeneration(InVal.Generation, CurrentGeneration, InVal.DefInst,
1284 MemInst.get()))
1285 return nullptr;
1286
1287 if (!Result)
1288 Result = getOrCreateResult(Matching, Other->getType());
1289 return Result;
1290}
1291
1293 if (auto *I = dyn_cast<Instruction>(To)) {
1294 // If I being poison triggers UB, there is no need to drop those
1295 // flags. Otherwise, only retain flags present on both I and Inst.
1296 // TODO: Currently some fast-math flags are not treated as
1297 // poison-generating even though they should. Until this is fixed,
1298 // always retain flags present on both I and Inst for floating point
1299 // instructions.
1300 if (isa<FPMathOperator>(I) ||
1301 (I->hasPoisonGeneratingFlags() && !programUndefinedIfPoison(I)))
1302 I->andIRFlags(&From);
1303 }
1304 if (isa<CallBase>(&From) && isa<CallBase>(To)) {
1305 // NB: Intersection of attrs between InVal.first and Inst is overly
1306 // conservative. Since we only CSE readonly functions that have the same
1307 // memory state, we can preserve (or possibly in some cases combine)
1308 // more attributes. Likewise this implies when checking equality of
1309 // callsite for CSEing, we can probably ignore more attributes.
1310 // Generally poison generating attributes need to be handled with more
1311 // care as they can create *new* UB if preserved/combined and violated.
1312 // Attributes that imply immediate UB on the other hand would have been
1313 // violated either way.
1314 bool Success =
1315 cast<CallBase>(To)->tryIntersectAttributes(cast<CallBase>(&From));
1316 assert(Success && "Failed to intersect attributes in callsites that "
1317 "passed identical check");
1318 // For NDEBUG Compile.
1319 (void)Success;
1320 }
1321}
1322
1323bool EarlyCSE::overridingStores(const ParseMemoryInst &Earlier,
1324 const ParseMemoryInst &Later) {
1325 // Can we remove Earlier store because of Later store?
1326
1327 assert(Earlier.isUnordered() && !Earlier.isVolatile() &&
1328 "Violated invariant");
1329 if (Earlier.getPointerOperand() != Later.getPointerOperand())
1330 return false;
1331 if (!Earlier.getValueType() || !Later.getValueType() ||
1332 Earlier.getValueType() != Later.getValueType())
1333 return false;
1334 if (Earlier.getMatchingId() != Later.getMatchingId())
1335 return false;
1336 // At the moment, we don't remove ordered stores, but do remove
1337 // unordered atomic stores. There's no special requirement (for
1338 // unordered atomics) about removing atomic stores only in favor of
1339 // other atomic stores since we were going to execute the non-atomic
1340 // one anyway and the atomic one might never have become visible.
1341 if (!Earlier.isUnordered() || !Later.isUnordered())
1342 return false;
1343
1344 // Deal with non-target memory intrinsics.
1345 bool ENTI = isHandledNonTargetIntrinsic(Earlier.get());
1346 bool LNTI = isHandledNonTargetIntrinsic(Later.get());
1347 if (ENTI && LNTI)
1348 return isNonTargetIntrinsicMatch(cast<IntrinsicInst>(Earlier.get()),
1349 cast<IntrinsicInst>(Later.get()));
1350
1351 // Because of the check above, at least one of them is false.
1352 // For now disallow matching intrinsics with non-intrinsics,
1353 // so assume that the stores match if neither is an intrinsic.
1354 return ENTI == LNTI;
1355}
1356
1357bool EarlyCSE::processNode(DomTreeNode *Node) {
1358 bool Changed = false;
1359 BasicBlock *BB = Node->getBlock();
1360
1361 // If this block has a single predecessor, then the predecessor is the parent
1362 // of the domtree node and all of the live out memory values are still current
1363 // in this block. If this block has multiple predecessors, then they could
1364 // have invalidated the live-out memory values of our parent value. For now,
1365 // just be conservative and invalidate memory if this block has multiple
1366 // predecessors.
1367 if (!BB->getSinglePredecessor())
1368 ++CurrentGeneration;
1369
1370 // If this node has a single predecessor which ends in a conditional branch,
1371 // we can infer the value of the branch condition given that we took this
1372 // path. We need the single predecessor to ensure there's not another path
1373 // which reaches this block where the condition might hold a different
1374 // value. Since we're adding this to the scoped hash table (like any other
1375 // def), it will have been popped if we encounter a future merge block.
1376 if (BasicBlock *Pred = BB->getSinglePredecessor()) {
1377 auto *BI = dyn_cast<BranchInst>(Pred->getTerminator());
1378 if (BI && BI->isConditional()) {
1379 auto *CondInst = dyn_cast<Instruction>(BI->getCondition());
1380 if (CondInst && SimpleValue::canHandle(CondInst))
1381 Changed |= handleBranchCondition(CondInst, BI, BB, Pred);
1382 }
1383 }
1384
1385 /// LastStore - Keep track of the last non-volatile store that we saw... for
1386 /// as long as there in no instruction that reads memory. If we see a store
1387 /// to the same location, we delete the dead store. This zaps trivial dead
1388 /// stores which can occur in bitfield code among other things.
1389 Instruction *LastStore = nullptr;
1390
1391 // See if any instructions in the block can be eliminated. If so, do it. If
1392 // not, add them to AvailableValues.
1393 for (Instruction &Inst : make_early_inc_range(*BB)) {
1394 // Dead instructions should just be removed.
1395 if (isInstructionTriviallyDead(&Inst, &TLI)) {
1396 LLVM_DEBUG(dbgs() << "EarlyCSE DCE: " << Inst << '\n');
1397 if (!DebugCounter::shouldExecute(CSECounter)) {
1398 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1399 continue;
1400 }
1401
1402 salvageKnowledge(&Inst, &AC);
1403 salvageDebugInfo(Inst);
1404 removeMSSA(Inst);
1405 Inst.eraseFromParent();
1406 Changed = true;
1407 ++NumSimplify;
1408 continue;
1409 }
1410
1411 // Skip assume intrinsics, they don't really have side effects (although
1412 // they're marked as such to ensure preservation of control dependencies),
1413 // and this pass will not bother with its removal. However, we should mark
1414 // its condition as true for all dominated blocks.
1415 if (auto *Assume = dyn_cast<AssumeInst>(&Inst)) {
1416 auto *CondI = dyn_cast<Instruction>(Assume->getArgOperand(0));
1417 if (CondI && SimpleValue::canHandle(CondI)) {
1418 LLVM_DEBUG(dbgs() << "EarlyCSE considering assumption: " << Inst
1419 << '\n');
1420 AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
1421 } else
1422 LLVM_DEBUG(dbgs() << "EarlyCSE skipping assumption: " << Inst << '\n');
1423 continue;
1424 }
1425
1426 // Likewise, noalias intrinsics don't actually write.
1427 if (match(&Inst,
1428 m_Intrinsic<Intrinsic::experimental_noalias_scope_decl>())) {
1429 LLVM_DEBUG(dbgs() << "EarlyCSE skipping noalias intrinsic: " << Inst
1430 << '\n');
1431 continue;
1432 }
1433
1434 // Skip sideeffect intrinsics, for the same reason as assume intrinsics.
1435 if (match(&Inst, m_Intrinsic<Intrinsic::sideeffect>())) {
1436 LLVM_DEBUG(dbgs() << "EarlyCSE skipping sideeffect: " << Inst << '\n');
1437 continue;
1438 }
1439
1440 // Skip pseudoprobe intrinsics, for the same reason as assume intrinsics.
1441 if (match(&Inst, m_Intrinsic<Intrinsic::pseudoprobe>())) {
1442 LLVM_DEBUG(dbgs() << "EarlyCSE skipping pseudoprobe: " << Inst << '\n');
1443 continue;
1444 }
1445
1446 // We can skip all invariant.start intrinsics since they only read memory,
1447 // and we can forward values across it. For invariant starts without
1448 // invariant ends, we can use the fact that the invariantness never ends to
1449 // start a scope in the current generaton which is true for all future
1450 // generations. Also, we dont need to consume the last store since the
1451 // semantics of invariant.start allow us to perform DSE of the last
1452 // store, if there was a store following invariant.start. Consider:
1453 //
1454 // store 30, i8* p
1455 // invariant.start(p)
1456 // store 40, i8* p
1457 // We can DSE the store to 30, since the store 40 to invariant location p
1458 // causes undefined behaviour.
1459 if (match(&Inst, m_Intrinsic<Intrinsic::invariant_start>())) {
1460 // If there are any uses, the scope might end.
1461 if (!Inst.use_empty())
1462 continue;
1463 MemoryLocation MemLoc =
1464 MemoryLocation::getForArgument(&cast<CallInst>(Inst), 1, TLI);
1465 // Don't start a scope if we already have a better one pushed
1466 if (!AvailableInvariants.count(MemLoc))
1467 AvailableInvariants.insert(MemLoc, CurrentGeneration);
1468 continue;
1469 }
1470
1471 if (isGuard(&Inst)) {
1472 if (auto *CondI =
1473 dyn_cast<Instruction>(cast<CallInst>(Inst).getArgOperand(0))) {
1474 if (SimpleValue::canHandle(CondI)) {
1475 // Do we already know the actual value of this condition?
1476 if (auto *KnownCond = AvailableValues.lookup(CondI)) {
1477 // Is the condition known to be true?
1478 if (isa<ConstantInt>(KnownCond) &&
1479 cast<ConstantInt>(KnownCond)->isOne()) {
1481 << "EarlyCSE removing guard: " << Inst << '\n');
1482 salvageKnowledge(&Inst, &AC);
1483 removeMSSA(Inst);
1484 Inst.eraseFromParent();
1485 Changed = true;
1486 continue;
1487 } else
1488 // Use the known value if it wasn't true.
1489 cast<CallInst>(Inst).setArgOperand(0, KnownCond);
1490 }
1491 // The condition we're on guarding here is true for all dominated
1492 // locations.
1493 AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
1494 }
1495 }
1496
1497 // Guard intrinsics read all memory, but don't write any memory.
1498 // Accordingly, don't update the generation but consume the last store (to
1499 // avoid an incorrect DSE).
1500 LastStore = nullptr;
1501 continue;
1502 }
1503
1504 // If the instruction can be simplified (e.g. X+0 = X) then replace it with
1505 // its simpler value.
1506 if (Value *V = simplifyInstruction(&Inst, SQ)) {
1507 LLVM_DEBUG(dbgs() << "EarlyCSE Simplify: " << Inst << " to: " << *V
1508 << '\n');
1509 if (!DebugCounter::shouldExecute(CSECounter)) {
1510 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1511 } else {
1512 bool Killed = false;
1513 if (!Inst.use_empty()) {
1514 Inst.replaceAllUsesWith(V);
1515 Changed = true;
1516 }
1517 if (isInstructionTriviallyDead(&Inst, &TLI)) {
1518 salvageKnowledge(&Inst, &AC);
1519 removeMSSA(Inst);
1520 Inst.eraseFromParent();
1521 Changed = true;
1522 Killed = true;
1523 }
1524 if (Changed)
1525 ++NumSimplify;
1526 if (Killed)
1527 continue;
1528 }
1529 }
1530
1531 // If this is a simple instruction that we can value number, process it.
1532 if (SimpleValue::canHandle(&Inst)) {
1533 if ([[maybe_unused]] auto *CI = dyn_cast<ConstrainedFPIntrinsic>(&Inst)) {
1534 assert(CI->getExceptionBehavior() != fp::ebStrict &&
1535 "Unexpected ebStrict from SimpleValue::canHandle()");
1536 assert((!CI->getRoundingMode() ||
1537 CI->getRoundingMode() != RoundingMode::Dynamic) &&
1538 "Unexpected dynamic rounding from SimpleValue::canHandle()");
1539 }
1540 // See if the instruction has an available value. If so, use it.
1541 if (Value *V = AvailableValues.lookup(&Inst)) {
1542 LLVM_DEBUG(dbgs() << "EarlyCSE CSE: " << Inst << " to: " << *V
1543 << '\n');
1544 if (!DebugCounter::shouldExecute(CSECounter)) {
1545 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1546 continue;
1547 }
1548 combineIRFlags(Inst, V);
1549 Inst.replaceAllUsesWith(V);
1550 salvageKnowledge(&Inst, &AC);
1551 removeMSSA(Inst);
1552 Inst.eraseFromParent();
1553 Changed = true;
1554 ++NumCSE;
1555 continue;
1556 }
1557
1558 // Otherwise, just remember that this value is available.
1559 AvailableValues.insert(&Inst, &Inst);
1560 continue;
1561 }
1562
1563 ParseMemoryInst MemInst(&Inst, TTI);
1564 // If this is a non-volatile load, process it.
1565 if (MemInst.isValid() && MemInst.isLoad()) {
1566 // (conservatively) we can't peak past the ordering implied by this
1567 // operation, but we can add this load to our set of available values
1568 if (MemInst.isVolatile() || !MemInst.isUnordered()) {
1569 LastStore = nullptr;
1570 ++CurrentGeneration;
1571 }
1572
1573 if (MemInst.isInvariantLoad()) {
1574 // If we pass an invariant load, we know that memory location is
1575 // indefinitely constant from the moment of first dereferenceability.
1576 // We conservatively treat the invariant_load as that moment. If we
1577 // pass a invariant load after already establishing a scope, don't
1578 // restart it since we want to preserve the earliest point seen.
1579 auto MemLoc = MemoryLocation::get(&Inst);
1580 if (!AvailableInvariants.count(MemLoc))
1581 AvailableInvariants.insert(MemLoc, CurrentGeneration);
1582 }
1583
1584 // If we have an available version of this load, and if it is the right
1585 // generation or the load is known to be from an invariant location,
1586 // replace this instruction.
1587 //
1588 // If either the dominating load or the current load are invariant, then
1589 // we can assume the current load loads the same value as the dominating
1590 // load.
1591 LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
1592 if (Value *Op = getMatchingValue(InVal, MemInst, CurrentGeneration)) {
1593 LLVM_DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << Inst
1594 << " to: " << *InVal.DefInst << '\n');
1595 if (!DebugCounter::shouldExecute(CSECounter)) {
1596 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1597 continue;
1598 }
1599 if (InVal.IsLoad)
1600 if (auto *I = dyn_cast<Instruction>(Op))
1601 combineMetadataForCSE(I, &Inst, false);
1602 if (!Inst.use_empty())
1603 Inst.replaceAllUsesWith(Op);
1604 salvageKnowledge(&Inst, &AC);
1605 removeMSSA(Inst);
1606 Inst.eraseFromParent();
1607 Changed = true;
1608 ++NumCSELoad;
1609 continue;
1610 }
1611
1612 // Otherwise, remember that we have this instruction.
1613 AvailableLoads.insert(MemInst.getPointerOperand(),
1614 LoadValue(&Inst, CurrentGeneration,
1615 MemInst.getMatchingId(),
1616 MemInst.isAtomic(),
1617 MemInst.isLoad()));
1618 LastStore = nullptr;
1619 continue;
1620 }
1621
1622 // If this instruction may read from memory or throw (and potentially read
1623 // from memory in the exception handler), forget LastStore. Load/store
1624 // intrinsics will indicate both a read and a write to memory. The target
1625 // may override this (e.g. so that a store intrinsic does not read from
1626 // memory, and thus will be treated the same as a regular store for
1627 // commoning purposes).
1628 if ((Inst.mayReadFromMemory() || Inst.mayThrow()) &&
1629 !(MemInst.isValid() && !MemInst.mayReadFromMemory()))
1630 LastStore = nullptr;
1631
1632 // If this is a read-only call, process it.
1633 if (CallValue::canHandle(&Inst)) {
1634 // If we have an available version of this call, and if it is the right
1635 // generation, replace this instruction.
1636 std::pair<Instruction *, unsigned> InVal = AvailableCalls.lookup(&Inst);
1637 if (InVal.first != nullptr &&
1638 isSameMemGeneration(InVal.second, CurrentGeneration, InVal.first,
1639 &Inst)) {
1640 LLVM_DEBUG(dbgs() << "EarlyCSE CSE CALL: " << Inst
1641 << " to: " << *InVal.first << '\n');
1642 if (!DebugCounter::shouldExecute(CSECounter)) {
1643 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1644 continue;
1645 }
1646 combineIRFlags(Inst, InVal.first);
1647 if (!Inst.use_empty())
1648 Inst.replaceAllUsesWith(InVal.first);
1649 salvageKnowledge(&Inst, &AC);
1650 removeMSSA(Inst);
1651 Inst.eraseFromParent();
1652 Changed = true;
1653 ++NumCSECall;
1654 continue;
1655 }
1656
1657 // Otherwise, remember that we have this instruction.
1658 AvailableCalls.insert(&Inst, std::make_pair(&Inst, CurrentGeneration));
1659 continue;
1660 }
1661
1662 // Compare GEP instructions based on offset.
1663 if (GEPValue::canHandle(&Inst)) {
1664 auto *GEP = cast<GetElementPtrInst>(&Inst);
1665 APInt Offset = APInt(SQ.DL.getIndexTypeSizeInBits(GEP->getType()), 0);
1666 GEPValue GEPVal(GEP, GEP->accumulateConstantOffset(SQ.DL, Offset)
1667 ? Offset.trySExtValue()
1668 : std::nullopt);
1669 if (Value *V = AvailableGEPs.lookup(GEPVal)) {
1670 LLVM_DEBUG(dbgs() << "EarlyCSE CSE GEP: " << Inst << " to: " << *V
1671 << '\n');
1672 combineIRFlags(Inst, V);
1673 Inst.replaceAllUsesWith(V);
1674 salvageKnowledge(&Inst, &AC);
1675 removeMSSA(Inst);
1676 Inst.eraseFromParent();
1677 Changed = true;
1678 ++NumCSEGEP;
1679 continue;
1680 }
1681
1682 // Otherwise, just remember that we have this GEP.
1683 AvailableGEPs.insert(GEPVal, &Inst);
1684 continue;
1685 }
1686
1687 // A release fence requires that all stores complete before it, but does
1688 // not prevent the reordering of following loads 'before' the fence. As a
1689 // result, we don't need to consider it as writing to memory and don't need
1690 // to advance the generation. We do need to prevent DSE across the fence,
1691 // but that's handled above.
1692 if (auto *FI = dyn_cast<FenceInst>(&Inst))
1693 if (FI->getOrdering() == AtomicOrdering::Release) {
1694 assert(Inst.mayReadFromMemory() && "relied on to prevent DSE above");
1695 continue;
1696 }
1697
1698 // write back DSE - If we write back the same value we just loaded from
1699 // the same location and haven't passed any intervening writes or ordering
1700 // operations, we can remove the write. The primary benefit is in allowing
1701 // the available load table to remain valid and value forward past where
1702 // the store originally was.
1703 if (MemInst.isValid() && MemInst.isStore()) {
1704 LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
1705 if (InVal.DefInst &&
1706 InVal.DefInst == getMatchingValue(InVal, MemInst, CurrentGeneration)) {
1707 // It is okay to have a LastStore to a different pointer here if MemorySSA
1708 // tells us that the load and store are from the same memory generation.
1709 // In that case, LastStore should keep its present value since we're
1710 // removing the current store.
1711 assert((!LastStore ||
1712 ParseMemoryInst(LastStore, TTI).getPointerOperand() ==
1713 MemInst.getPointerOperand() ||
1714 MSSA) &&
1715 "can't have an intervening store if not using MemorySSA!");
1716 LLVM_DEBUG(dbgs() << "EarlyCSE DSE (writeback): " << Inst << '\n');
1717 if (!DebugCounter::shouldExecute(CSECounter)) {
1718 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1719 continue;
1720 }
1721 salvageKnowledge(&Inst, &AC);
1722 removeMSSA(Inst);
1723 Inst.eraseFromParent();
1724 Changed = true;
1725 ++NumDSE;
1726 // We can avoid incrementing the generation count since we were able
1727 // to eliminate this store.
1728 continue;
1729 }
1730 }
1731
1732 // Okay, this isn't something we can CSE at all. Check to see if it is
1733 // something that could modify memory. If so, our available memory values
1734 // cannot be used so bump the generation count.
1735 if (Inst.mayWriteToMemory()) {
1736 ++CurrentGeneration;
1737
1738 if (MemInst.isValid() && MemInst.isStore()) {
1739 // We do a trivial form of DSE if there are two stores to the same
1740 // location with no intervening loads. Delete the earlier store.
1741 if (LastStore) {
1742 if (overridingStores(ParseMemoryInst(LastStore, TTI), MemInst)) {
1743 LLVM_DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore
1744 << " due to: " << Inst << '\n');
1745 if (!DebugCounter::shouldExecute(CSECounter)) {
1746 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1747 } else {
1748 salvageKnowledge(&Inst, &AC);
1749 removeMSSA(*LastStore);
1750 LastStore->eraseFromParent();
1751 Changed = true;
1752 ++NumDSE;
1753 LastStore = nullptr;
1754 }
1755 }
1756 // fallthrough - we can exploit information about this store
1757 }
1758
1759 // Okay, we just invalidated anything we knew about loaded values. Try
1760 // to salvage *something* by remembering that the stored value is a live
1761 // version of the pointer. It is safe to forward from volatile stores
1762 // to non-volatile loads, so we don't have to check for volatility of
1763 // the store.
1764 AvailableLoads.insert(MemInst.getPointerOperand(),
1765 LoadValue(&Inst, CurrentGeneration,
1766 MemInst.getMatchingId(),
1767 MemInst.isAtomic(),
1768 MemInst.isLoad()));
1769
1770 // Remember that this was the last unordered store we saw for DSE. We
1771 // don't yet handle DSE on ordered or volatile stores since we don't
1772 // have a good way to model the ordering requirement for following
1773 // passes once the store is removed. We could insert a fence, but
1774 // since fences are slightly stronger than stores in their ordering,
1775 // it's not clear this is a profitable transform. Another option would
1776 // be to merge the ordering with that of the post dominating store.
1777 if (MemInst.isUnordered() && !MemInst.isVolatile())
1778 LastStore = &Inst;
1779 else
1780 LastStore = nullptr;
1781 }
1782 }
1783 }
1784
1785 return Changed;
1786}
1787
1788bool EarlyCSE::run() {
1789 // Note, deque is being used here because there is significant performance
1790 // gains over vector when the container becomes very large due to the
1791 // specific access patterns. For more information see the mailing list
1792 // discussion on this:
1793 // http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20120116/135228.html
1794 std::deque<StackNode *> nodesToProcess;
1795
1796 bool Changed = false;
1797
1798 // Process the root node.
1799 nodesToProcess.push_back(new StackNode(
1800 AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls,
1801 AvailableGEPs, CurrentGeneration, DT.getRootNode(),
1802 DT.getRootNode()->begin(), DT.getRootNode()->end()));
1803
1804 assert(!CurrentGeneration && "Create a new EarlyCSE instance to rerun it.");
1805
1806 // Process the stack.
1807 while (!nodesToProcess.empty()) {
1808 // Grab the first item off the stack. Set the current generation, remove
1809 // the node from the stack, and process it.
1810 StackNode *NodeToProcess = nodesToProcess.back();
1811
1812 // Initialize class members.
1813 CurrentGeneration = NodeToProcess->currentGeneration();
1814
1815 // Check if the node needs to be processed.
1816 if (!NodeToProcess->isProcessed()) {
1817 // Process the node.
1818 Changed |= processNode(NodeToProcess->node());
1819 NodeToProcess->childGeneration(CurrentGeneration);
1820 NodeToProcess->process();
1821 } else if (NodeToProcess->childIter() != NodeToProcess->end()) {
1822 // Push the next child onto the stack.
1823 DomTreeNode *child = NodeToProcess->nextChild();
1824 nodesToProcess.push_back(new StackNode(
1825 AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls,
1826 AvailableGEPs, NodeToProcess->childGeneration(), child,
1827 child->begin(), child->end()));
1828 } else {
1829 // It has been processed, and there are no more children to process,
1830 // so delete it and pop it off the stack.
1831 delete NodeToProcess;
1832 nodesToProcess.pop_back();
1833 }
1834 } // while (!nodes...)
1835
1836 return Changed;
1837}
1838
1841 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1842 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1843 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1844 auto &AC = AM.getResult<AssumptionAnalysis>(F);
1845 auto *MSSA =
1846 UseMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA() : nullptr;
1847
1848 EarlyCSE CSE(F.getDataLayout(), TLI, TTI, DT, AC, MSSA);
1849
1850 if (!CSE.run())
1851 return PreservedAnalyses::all();
1852
1855 if (UseMemorySSA)
1857 return PA;
1858}
1859
1861 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1862 static_cast<PassInfoMixin<EarlyCSEPass> *>(this)->printPipeline(
1863 OS, MapClassName2PassName);
1864 OS << '<';
1865 if (UseMemorySSA)
1866 OS << "memssa";
1867 OS << '>';
1868}
1869
1870namespace {
1871
1872/// A simple and fast domtree-based CSE pass.
1873///
1874/// This pass does a simple depth-first walk over the dominator tree,
1875/// eliminating trivially redundant instructions and using instsimplify to
1876/// canonicalize things as it goes. It is intended to be fast and catch obvious
1877/// cases so that instcombine and other passes are more effective. It is
1878/// expected that a later pass of GVN will catch the interesting/hard cases.
1879template<bool UseMemorySSA>
1880class EarlyCSELegacyCommonPass : public FunctionPass {
1881public:
1882 static char ID;
1883
1884 EarlyCSELegacyCommonPass() : FunctionPass(ID) {
1885 if (UseMemorySSA)
1887 else
1889 }
1890
1891 bool runOnFunction(Function &F) override {
1892 if (skipFunction(F))
1893 return false;
1894
1895 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1896 auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1897 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1898 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1899 auto *MSSA =
1900 UseMemorySSA ? &getAnalysis<MemorySSAWrapperPass>().getMSSA() : nullptr;
1901
1902 EarlyCSE CSE(F.getDataLayout(), TLI, TTI, DT, AC, MSSA);
1903
1904 return CSE.run();
1905 }
1906
1907 void getAnalysisUsage(AnalysisUsage &AU) const override {
1912 if (UseMemorySSA) {
1916 }
1919 AU.setPreservesCFG();
1920 }
1921};
1922
1923} // end anonymous namespace
1924
1925using EarlyCSELegacyPass = EarlyCSELegacyCommonPass</*UseMemorySSA=*/false>;
1926
1927template<>
1928char EarlyCSELegacyPass::ID = 0;
1929
1930INITIALIZE_PASS_BEGIN(EarlyCSELegacyPass, "early-cse", "Early CSE", false,
1931 false)
1937
1938using EarlyCSEMemSSALegacyPass =
1939 EarlyCSELegacyCommonPass</*UseMemorySSA=*/true>;
1940
1941template<>
1942char EarlyCSEMemSSALegacyPass::ID = 0;
1943
1945 if (UseMemorySSA)
1946 return new EarlyCSEMemSSALegacyPass();
1947 else
1948 return new EarlyCSELegacyPass();
1949}
1950
1951INITIALIZE_PASS_BEGIN(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
1952 "Early CSE w/ MemorySSA", false, false)
1959INITIALIZE_PASS_END(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
1960 "Early CSE w/ MemorySSA", false, false)
#define Success
static bool isLoad(int Opcode)
static bool isStore(int Opcode)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file defines the BumpPtrAllocator interface.
Atomic ordering constants.
basic Basic Alias true
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Optimize for code generation
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool isSentinel(const DWARFDebugNames::AttributeEncoding &AE)
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
Definition: DebugCounter.h:190
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines DenseMapInfo traits for DenseMap.
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1313
static cl::opt< bool > EarlyCSEDebugHash("earlycse-debug-hash", cl::init(false), cl::Hidden, cl::desc("Perform extra assertion checking to verify that SimpleValue's hash " "function is well-behaved w.r.t. its isEqual predicate"))
static void combineIRFlags(Instruction &From, Value *To)
Definition: EarlyCSE.cpp:1292
EarlyCSELegacyCommonPass< false > EarlyCSELegacyPass
Definition: EarlyCSE.cpp:1925
early cse memssa
Definition: EarlyCSE.cpp:1959
static unsigned getHashValueImpl(SimpleValue Val)
Definition: EarlyCSE.cpp:235
static bool isEqualImpl(SimpleValue LHS, SimpleValue RHS)
Definition: EarlyCSE.cpp:358
static bool matchSelectWithOptionalNotCond(Value *V, Value *&Cond, Value *&A, Value *&B, SelectPatternFlavor &Flavor)
Match a 'select' including an optional 'not's of the condition.
Definition: EarlyCSE.cpp:175
static unsigned hashCallInst(CallInst *CI)
Definition: EarlyCSE.cpp:222
static cl::opt< unsigned > EarlyCSEMssaOptCap("earlycse-mssa-optimization-cap", cl::init(500), cl::Hidden, cl::desc("Enable imprecision in EarlyCSE in pathological cases, in exchange " "for faster compile. Caps the MemorySSA clobbering calls."))
This file provides the interface for a simple, fast CSE pass.
static bool runOnFunction(Function &F, bool PostInlining)
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
This header defines various interfaces for pass management in LLVM.
Value * getMatchingValue(LoadValue LV, LoadInst *LI, unsigned CurrentGeneration, BatchAAResults &BAA, function_ref< MemorySSA *()> GetMSSA)
Definition: LoopUnroll.cpp:254
static void cse(BasicBlock *BB)
Perform cse of induction variable instructions.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
const SmallVectorImpl< MachineOperand > & Cond
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
separate const offset from Split GEPs to a variadic base and a constant offset for better CSE
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
bool isProcessed() const
Definition: LoopUnroll.cpp:250
unsigned currentGeneration() const
Definition: LoopUnroll.cpp:237
unsigned childGeneration() const
Definition: LoopUnroll.cpp:238
DomTreeNode::const_iterator end() const
Definition: LoopUnroll.cpp:249
void process()
Definition: LoopUnroll.cpp:251
DomTreeNode * nextChild()
Definition: LoopUnroll.cpp:243
DomTreeNode::const_iterator childIter() const
Definition: LoopUnroll.cpp:241
DomTreeNode * node()
Definition: LoopUnroll.cpp:240
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
Definition: APInt.h:78
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:459
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:168
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
Conditional or Unconditional Branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:72
bool onlyReadsMemory(unsigned OpNo) const
Definition: InstrTypes.h:1724
bool isConvergent() const
Determine if the invoke is convergent.
Definition: InstrTypes.h:1935
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:444
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:661
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:702
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:703
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:697
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:696
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:700
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:701
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:699
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:787
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:763
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Definition: CmpPredicate.h:22
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:866
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:873
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
unsigned getIndexTypeSizeInBits(Type *Ty) const
Layout size of the index used in GEP calculation.
Definition: DataLayout.cpp:754
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:87
typename SmallVector< DomTreeNodeBase *, 4 >::const_iterator const_iterator
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
DomTreeNodeBase< NodeT > * getRootNode()
getRootNode - This returns the entry node for the CFG of the function.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:317
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
This instruction extracts a struct member or array element value from an aggregate value.
This class represents a freeze function that returns random concrete value if an operand is either a ...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
bool isPresplitCoroutine() const
Determine if the function is presplit coroutine.
Definition: Function.h:540
Represents calls to the gc.relocate intrinsic.
Legacy wrapper pass to provide the GlobalsAAResult object.
This instruction inserts a struct field of array element value into an aggregate value.
bool mayThrow(bool IncludePhaseOneUnwind=false) const LLVM_READONLY
Return true if this instruction may throw an exception.
bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:92
bool isIdenticalToWhenDefined(const Instruction *I, bool IntersectAttrs=false) const LLVM_READONLY
This is like isIdenticalTo, except that it ignores the SubclassOptionalData flags,...
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:70
Type * getAccessType() const LLVM_READONLY
Return the type this instruction accesses in memory, if any.
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:55
An instruction for reading from memory.
Definition: Instructions.h:176
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static std::optional< MemoryLocation > getOrNone(const Instruction *Inst)
static MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:928
MemoryAccess * getClobberingMemoryAccess(const Instruction *I, BatchAAResults &AA)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition: MemorySSA.h:1045
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:985
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:701
bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
Definition: MemorySSA.cpp:2173
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1905
MemorySSAWalker * getWalker()
Definition: MemorySSA.cpp:1590
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:719
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:146
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:131
RecyclingAllocator - This class wraps an Allocator, adding the functionality of recycling deleted obj...
ScopedHashTableScope< K, V, KInfo, AllocatorTy > ScopeTy
ScopeTy - This is a helpful typedef that allows clients to get easy access to the name of the scope f...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
bool empty() const
Definition: SmallVector.h:81
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) const
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
value_op_iterator value_op_end()
Definition: User.h:309
Value * getOperand(unsigned i) const
Definition: User.h:228
value_op_iterator value_op_begin()
Definition: User.h:306
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
bool use_empty() const
Definition: Value.h:344
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition: ilist_node.h:32
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:885
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:105
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ ebStrict
This corresponds to "fpexcept.strict".
Definition: FPEnv.h:41
@ Assume
Do not drop type tests (default).
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition: Utils.cpp:1656
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:657
void initializeEarlyCSEMemSSALegacyPassPass(PassRegistry &)
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:406
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
Definition: GuardUtils.cpp:18
SelectPatternFlavor
Specific patterns of select instructions we can match.
@ SPF_UMIN
Signed minimum.
@ SPF_UMAX
Signed maximum.
@ SPF_SMAX
Unsigned minimum.
@ SPF_UNKNOWN
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
bool programUndefinedIfPoison(const Instruction *Inst)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT, const BasicBlockEdge &Edge)
Replace each use of 'From' with 'To' if that use is dominated by the given edge.
Definition: Local.cpp:3578
BumpPtrAllocatorImpl BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
Definition: Allocator.h:382
void initializeEarlyCSELegacyPassPass(PassRegistry &)
void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:3423
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:84
bool salvageKnowledge(Instruction *I, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Calls BuildAssumeFromInst and if the resulting llvm.assume is valid insert if before I.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition: Hashing.h:590
FunctionPass * createEarlyCSEPass(bool UseMemorySSA=false)
Definition: EarlyCSE.cpp:1944
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition: Hashing.h:468
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
unsigned Generation
Definition: LoopUnroll.cpp:215
static unsigned getHashValue(CallValue Val)
static CallValue getTombstoneKey()
Definition: EarlyCSE.cpp:525
static bool isEqual(CallValue LHS, CallValue RHS)
static CallValue getEmptyKey()
Definition: EarlyCSE.cpp:521
static bool isEqual(const GEPValue &LHS, const GEPValue &RHS)
static unsigned getHashValue(const GEPValue &Val)
static GEPValue getTombstoneKey()
Definition: EarlyCSE.cpp:596
static GEPValue getEmptyKey()
Definition: EarlyCSE.cpp:592
static SimpleValue getEmptyKey()
Definition: EarlyCSE.cpp:160
static unsigned getHashValue(SimpleValue Val)
static SimpleValue getTombstoneKey()
Definition: EarlyCSE.cpp:164
static bool isEqual(SimpleValue LHS, SimpleValue RHS)
An information struct used to provide DenseMap with the various necessary components for a given valu...
Definition: DenseMapInfo.h:52
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
Definition: EarlyCSE.cpp:1839
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Definition: EarlyCSE.cpp:1860
Information about a load/store intrinsic defined by the target.
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition: PassManager.h:69
const DataLayout & DL
Definition: SimplifyQuery.h:71