LLVM 22.0.0git
PassBuilderPipelines.cpp
Go to the documentation of this file.
1//===- Construction of pass pipelines -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// This file provides the implementation of the PassBuilder based on our
11/// static pass registry as well as related functionality. It also provides
12/// helpers to aid in analyzing, debugging, and testing passes and pass
13/// pipelines.
14///
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/Statistic.h"
27#include "llvm/IR/PassManager.h"
28#include "llvm/Pass.h"
150
151using namespace llvm;
152
153namespace llvm {
154
156 "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
157 cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
159 "Heuristics-based inliner version"),
161 "Use development mode (runtime-loadable model)"),
163 "Use release mode (AOT-compiled model)")));
164
165/// Flag to enable inline deferral during PGO.
166static cl::opt<bool>
167 EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
169 cl::desc("Enable inline deferral during PGO"));
170
171static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
172 cl::init(false), cl::Hidden,
173 cl::desc("Enable module inliner"));
174
176 "mandatory-inlining-first", cl::init(false), cl::Hidden,
177 cl::desc("Perform mandatory inlinings module-wide, before performing "
178 "inlining"));
179
181 "eagerly-invalidate-analyses", cl::init(true), cl::Hidden,
182 cl::desc("Eagerly invalidate more analyses in default pipelines"));
183
185 "enable-merge-functions", cl::init(false), cl::Hidden,
186 cl::desc("Enable function merging as part of the optimization pipeline"));
187
189 "enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden,
190 cl::desc("Run the loop rotation transformation after PGO instrumentation"));
191
193 "enable-global-analyses", cl::init(true), cl::Hidden,
194 cl::desc("Enable inter-procedural analyses"));
195
196static cl::opt<bool> RunPartialInlining("enable-partial-inlining",
197 cl::init(false), cl::Hidden,
198 cl::desc("Run Partial inlining pass"));
199
201 "extra-vectorizer-passes", cl::init(false), cl::Hidden,
202 cl::desc("Run cleanup optimization passes after vectorization"));
203
204static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
205 cl::desc("Run the NewGVN pass"));
206
207static cl::opt<bool>
208 EnableLoopInterchange("enable-loopinterchange", cl::init(false), cl::Hidden,
209 cl::desc("Enable the LoopInterchange Pass"));
210
211static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam",
212 cl::init(false), cl::Hidden,
213 cl::desc("Enable Unroll And Jam Pass"));
214
215static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
217 cl::desc("Enable the LoopFlatten Pass"));
218
219// Experimentally allow loop header duplication. This should allow for better
220// optimization at Oz, since loop-idiom recognition can then recognize things
221// like memcpy. If this ends up being useful for many targets, we should drop
222// this flag and make a code generation option that can be controlled
223// independent of the opt level and exposed through the frontend.
225 "enable-loop-header-duplication", cl::init(false), cl::Hidden,
226 cl::desc("Enable loop header duplication at any optimization level"));
227
228static cl::opt<bool>
229 EnableDFAJumpThreading("enable-dfa-jump-thread",
230 cl::desc("Enable DFA jump threading"),
231 cl::init(false), cl::Hidden);
232
233static cl::opt<bool>
234 EnableHotColdSplit("hot-cold-split",
235 cl::desc("Enable hot-cold splitting pass"));
236
237static cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(false),
239 cl::desc("Enable ir outliner pass"));
240
241static cl::opt<bool>
242 DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden,
243 cl::desc("Disable pre-instrumentation inliner"));
244
246 "preinline-threshold", cl::Hidden, cl::init(75),
247 cl::desc("Control the amount of inlining in pre-instrumentation inliner "
248 "(default = 75)"));
249
250static cl::opt<bool>
251 EnableGVNHoist("enable-gvn-hoist",
252 cl::desc("Enable the GVN hoisting pass (default = off)"));
253
254static cl::opt<bool>
255 EnableGVNSink("enable-gvn-sink",
256 cl::desc("Enable the GVN sinking pass (default = off)"));
257
259 "enable-jump-table-to-switch",
260 cl::desc("Enable JumpTableToSwitch pass (default = off)"));
261
262// This option is used in simplifying testing SampleFDO optimizations for
263// profile loading.
264static cl::opt<bool>
265 EnableCHR("enable-chr", cl::init(true), cl::Hidden,
266 cl::desc("Enable control height reduction optimization (CHR)"));
267
269 "flattened-profile-used", cl::init(false), cl::Hidden,
270 cl::desc("Indicate the sample profile being used is flattened, i.e., "
271 "no inline hierarchy exists in the profile"));
272
273static cl::opt<bool>
274 EnableMatrix("enable-matrix", cl::init(false), cl::Hidden,
275 cl::desc("Enable lowering of the matrix intrinsics"));
276
278 "enable-constraint-elimination", cl::init(true), cl::Hidden,
279 cl::desc(
280 "Enable pass to eliminate conditions based on linear constraints"));
281
283 "attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE),
284 cl::desc("Enable the attributor inter-procedural deduction pass"),
286 "enable all attributor runs"),
288 "enable module-wide attributor runs"),
290 "enable call graph SCC attributor runs"),
291 clEnumValN(AttributorRunOption::NONE, "none",
292 "disable attributor runs")));
293
295 "enable-sampled-instrumentation", cl::init(false), cl::Hidden,
296 cl::desc("Enable profile instrumentation sampling (default = off)"));
298 "enable-loop-versioning-licm", cl::init(false), cl::Hidden,
299 cl::desc("Enable the experimental Loop Versioning LICM pass"));
300
302 "instrument-cold-function-only-path", cl::init(""),
303 cl::desc("File path for cold function only instrumentation(requires use "
304 "with --pgo-instrument-cold-function-only)"),
305 cl::Hidden);
306
309
311} // namespace llvm
312
329
330namespace llvm {
332} // namespace llvm
333
335 OptimizationLevel Level) {
336 for (auto &C : PeepholeEPCallbacks)
337 C(FPM, Level);
338}
341 for (auto &C : LateLoopOptimizationsEPCallbacks)
342 C(LPM, Level);
343}
345 OptimizationLevel Level) {
346 for (auto &C : LoopOptimizerEndEPCallbacks)
347 C(LPM, Level);
348}
351 for (auto &C : ScalarOptimizerLateEPCallbacks)
352 C(FPM, Level);
353}
355 OptimizationLevel Level) {
356 for (auto &C : CGSCCOptimizerLateEPCallbacks)
357 C(CGPM, Level);
358}
360 OptimizationLevel Level) {
361 for (auto &C : VectorizerStartEPCallbacks)
362 C(FPM, Level);
363}
365 OptimizationLevel Level) {
366 for (auto &C : VectorizerEndEPCallbacks)
367 C(FPM, Level);
368}
370 OptimizationLevel Level,
372 for (auto &C : OptimizerEarlyEPCallbacks)
373 C(MPM, Level, Phase);
374}
376 OptimizationLevel Level,
378 for (auto &C : OptimizerLastEPCallbacks)
379 C(MPM, Level, Phase);
380}
383 for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks)
384 C(MPM, Level);
385}
388 for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
389 C(MPM, Level);
390}
392 OptimizationLevel Level) {
393 for (auto &C : PipelineStartEPCallbacks)
394 C(MPM, Level);
395}
398 for (auto &C : PipelineEarlySimplificationEPCallbacks)
399 C(MPM, Level, Phase);
400}
401
402// Helper to add AnnotationRemarksPass.
406
407// Helper to check if the current compilation phase is preparing for LTO
412
413// Helper to check if the current compilation phase is LTO backend
418
419// Helper to wrap conditionally Coro passes.
421 // TODO: Skip passes according to Phase.
422 ModulePassManager CoroPM;
423 CoroPM.addPass(CoroEarlyPass());
424 CGSCCPassManager CGPM;
425 CGPM.addPass(CoroSplitPass());
426 CoroPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
427 CoroPM.addPass(CoroCleanupPass());
428 CoroPM.addPass(GlobalDCEPass());
429 return CoroConditionalWrapper(std::move(CoroPM));
430}
431
432// TODO: Investigate the cost/benefit of tail call elimination on debugging.
434PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
436
438
440 FPM.addPass(CountVisitsPass());
441
442 // Form SSA out of local memory accesses after breaking apart aggregates into
443 // scalars.
444 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
445
446 // Catch trivial redundancies
447 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
448
449 // Hoisting of scalars and load expressions.
450 FPM.addPass(
451 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
452 FPM.addPass(InstCombinePass());
453
454 FPM.addPass(LibCallsShrinkWrapPass());
455
456 invokePeepholeEPCallbacks(FPM, Level);
457
458 FPM.addPass(
459 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
460
461 // Form canonically associated expression trees, and simplify the trees using
462 // basic mathematical properties. For example, this will form (nearly)
463 // minimal multiplication trees.
464 FPM.addPass(ReassociatePass());
465
466 // Add the primary loop simplification pipeline.
467 // FIXME: Currently this is split into two loop pass pipelines because we run
468 // some function passes in between them. These can and should be removed
469 // and/or replaced by scheduling the loop pass equivalents in the correct
470 // positions. But those equivalent passes aren't powerful enough yet.
471 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
472 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
473 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
474 // `LoopInstSimplify`.
475 LoopPassManager LPM1, LPM2;
476
477 // Simplify the loop body. We do this initially to clean up after other loop
478 // passes run, either when iterating on a loop or on inner loops with
479 // implications on the outer loop.
480 LPM1.addPass(LoopInstSimplifyPass());
481 LPM1.addPass(LoopSimplifyCFGPass());
482
483 // Try to remove as much code from the loop header as possible,
484 // to reduce amount of IR that will have to be duplicated. However,
485 // do not perform speculative hoisting the first time as LICM
486 // will destroy metadata that may not need to be destroyed if run
487 // after loop rotation.
488 // TODO: Investigate promotion cap for O1.
489 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
490 /*AllowSpeculation=*/false));
491
492 LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true,
494 // TODO: Investigate promotion cap for O1.
495 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
496 /*AllowSpeculation=*/true));
497 LPM1.addPass(SimpleLoopUnswitchPass());
499 LPM1.addPass(LoopFlattenPass());
500
501 LPM2.addPass(LoopIdiomRecognizePass());
502 LPM2.addPass(IndVarSimplifyPass());
503
505
506 LPM2.addPass(LoopDeletionPass());
507
508 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
509 // because it changes IR to makes profile annotation in back compile
510 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
511 // attributes so we need to make sure and allow the full unroll pass to pay
512 // attention to it.
513 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
514 PGOOpt->Action != PGOOptions::SampleUse)
515 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
516 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
517 PTO.ForgetAllSCEVInLoopUnroll));
518
520
521 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
522 /*UseMemorySSA=*/true));
523 FPM.addPass(
524 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
525 FPM.addPass(InstCombinePass());
526 // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
527 // *All* loop passes must preserve it, in order to be able to use it.
528 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
529 /*UseMemorySSA=*/false));
530
531 // Delete small array after loop unroll.
532 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
533
534 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
535 FPM.addPass(MemCpyOptPass());
536
537 // Sparse conditional constant propagation.
538 // FIXME: It isn't clear why we do this *after* loop passes rather than
539 // before...
540 FPM.addPass(SCCPPass());
541
542 // Delete dead bit computations (instcombine runs after to fold away the dead
543 // computations, and then ADCE will run later to exploit any new DCE
544 // opportunities that creates).
545 FPM.addPass(BDCEPass());
546
547 // Run instcombine after redundancy and dead bit elimination to exploit
548 // opportunities opened up by them.
549 FPM.addPass(InstCombinePass());
550 invokePeepholeEPCallbacks(FPM, Level);
551
552 FPM.addPass(CoroElidePass());
553
555
556 // Finally, do an expensive DCE pass to catch all the dead code exposed by
557 // the simplifications and basic cleanup after all the simplifications.
558 // TODO: Investigate if this is too expensive.
559 FPM.addPass(ADCEPass());
560 FPM.addPass(
561 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
562 FPM.addPass(InstCombinePass());
563 invokePeepholeEPCallbacks(FPM, Level);
564
565 return FPM;
566}
567
571 assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
572
573 // The O1 pipeline has a separate pipeline creation function to simplify
574 // construction readability.
575 if (Level.getSpeedupLevel() == 1)
576 return buildO1FunctionSimplificationPipeline(Level, Phase);
577
579
582
583 // Form SSA out of local memory accesses after breaking apart aggregates into
584 // scalars.
586
587 // Catch trivial redundancies
588 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
591
592 // Hoisting of scalars and load expressions.
593 if (EnableGVNHoist)
594 FPM.addPass(GVNHoistPass());
595
596 // Global value numbering based sinking.
597 if (EnableGVNSink) {
598 FPM.addPass(GVNSinkPass());
599 FPM.addPass(
600 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
601 }
602
603 // Speculative execution if the target has divergent branches; otherwise nop.
604 FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
605
606 // Optimize based on known information about branches, and cleanup afterward.
609
610 // Jump table to switch conversion.
615
616 FPM.addPass(
617 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
620
621 if (!Level.isOptimizingForSize())
623
624 invokePeepholeEPCallbacks(FPM, Level);
625
626 // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
627 // using the size value profile. Don't perform this when optimizing for size.
628 if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
629 !Level.isOptimizingForSize())
631
632 FPM.addPass(TailCallElimPass(/*UpdateFunctionEntryCount=*/
633 isInstrumentedPGOUse()));
634 FPM.addPass(
635 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
636
637 // Form canonically associated expression trees, and simplify the trees using
638 // basic mathematical properties. For example, this will form (nearly)
639 // minimal multiplication trees.
641
644
645 // Add the primary loop simplification pipeline.
646 // FIXME: Currently this is split into two loop pass pipelines because we run
647 // some function passes in between them. These can and should be removed
648 // and/or replaced by scheduling the loop pass equivalents in the correct
649 // positions. But those equivalent passes aren't powerful enough yet.
650 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
651 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
652 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
653 // `LoopInstSimplify`.
654 LoopPassManager LPM1, LPM2;
655
656 // Simplify the loop body. We do this initially to clean up after other loop
657 // passes run, either when iterating on a loop or on inner loops with
658 // implications on the outer loop.
659 LPM1.addPass(LoopInstSimplifyPass());
660 LPM1.addPass(LoopSimplifyCFGPass());
661
662 // Try to remove as much code from the loop header as possible,
663 // to reduce amount of IR that will have to be duplicated. However,
664 // do not perform speculative hoisting the first time as LICM
665 // will destroy metadata that may not need to be destroyed if run
666 // after loop rotation.
667 // TODO: Investigate promotion cap for O1.
668 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
669 /*AllowSpeculation=*/false));
670
671 // Disable header duplication in loop rotation at -Oz.
673 Level != OptimizationLevel::Oz,
675 // TODO: Investigate promotion cap for O1.
676 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
677 /*AllowSpeculation=*/true));
678 LPM1.addPass(
679 SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3));
681 LPM1.addPass(LoopFlattenPass());
682
683 LPM2.addPass(LoopIdiomRecognizePass());
684 LPM2.addPass(IndVarSimplifyPass());
685
686 {
688 ExtraPasses.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
690 LPM2.addPass(std::move(ExtraPasses));
691 }
692
694
695 LPM2.addPass(LoopDeletionPass());
696
697 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
698 // because it changes IR to makes profile annotation in back compile
699 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
700 // attributes so we need to make sure and allow the full unroll pass to pay
701 // attention to it.
702 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
703 PGOOpt->Action != PGOOptions::SampleUse)
704 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
705 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
706 PTO.ForgetAllSCEVInLoopUnroll));
707
709
710 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
711 /*UseMemorySSA=*/true));
712 FPM.addPass(
713 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
715 // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
716 // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
717 // *All* loop passes must preserve it, in order to be able to use it.
718 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
719 /*UseMemorySSA=*/false));
720
721 // Delete small array after loop unroll.
723
724 // Try vectorization/scalarization transforms that are both improvements
725 // themselves and can allow further folds with GVN and InstCombine.
726 FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
727
728 // Eliminate redundancies.
730 if (RunNewGVN)
731 FPM.addPass(NewGVNPass());
732 else
733 FPM.addPass(GVNPass());
734
735 // Sparse conditional constant propagation.
736 // FIXME: It isn't clear why we do this *after* loop passes rather than
737 // before...
738 FPM.addPass(SCCPPass());
739
740 // Delete dead bit computations (instcombine runs after to fold away the dead
741 // computations, and then ADCE will run later to exploit any new DCE
742 // opportunities that creates).
743 FPM.addPass(BDCEPass());
744
745 // Run instcombine after redundancy and dead bit elimination to exploit
746 // opportunities opened up by them.
748 invokePeepholeEPCallbacks(FPM, Level);
749
750 // Re-consider control flow based optimizations after redundancy elimination,
751 // redo DCE, etc.
754
757
758 // Finally, do an expensive DCE pass to catch all the dead code exposed by
759 // the simplifications and basic cleanup after all the simplifications.
760 // TODO: Investigate if this is too expensive.
761 FPM.addPass(ADCEPass());
762
763 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
764 FPM.addPass(MemCpyOptPass());
765
766 FPM.addPass(DSEPass());
768
770 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
771 /*AllowSpeculation=*/true),
772 /*UseMemorySSA=*/true));
773
774 FPM.addPass(CoroElidePass());
775
777
779 .convertSwitchRangeToICmp(true)
780 .convertSwitchToArithmetic(true)
781 .hoistCommonInsts(true)
782 .sinkCommonInsts(true)));
784 invokePeepholeEPCallbacks(FPM, Level);
785
786 return FPM;
787}
788
789void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
792}
793
794void PassBuilder::addPreInlinerPasses(ModulePassManager &MPM,
795 OptimizationLevel Level,
796 ThinOrFullLTOPhase LTOPhase) {
797 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
799 return;
800 InlineParams IP;
801
803
804 // FIXME: The hint threshold has the same value used by the regular inliner
805 // when not optimzing for size. This should probably be lowered after
806 // performance testing.
807 // FIXME: this comment is cargo culted from the old pass manager, revisit).
808 IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
810 IP, /* MandatoryFirst */ true,
812 CGSCCPassManager &CGPipeline = MIWP.getPM();
813
815 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
816 FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
817 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
818 true))); // Merge & remove basic blocks.
819 FPM.addPass(InstCombinePass()); // Combine silly sequences.
820 invokePeepholeEPCallbacks(FPM, Level);
821
822 CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
823 std::move(FPM), PTO.EagerlyInvalidateAnalyses));
824
825 MPM.addPass(std::move(MIWP));
826
827 // Delete anything that is now dead to make sure that we don't instrument
828 // dead code. Instrumentation can end up keeping dead code around and
829 // dramatically increase code size.
830 MPM.addPass(GlobalDCEPass());
831}
832
833void PassBuilder::addPostPGOLoopRotation(ModulePassManager &MPM,
834 OptimizationLevel Level) {
836 // Disable header duplication in loop rotation at -Oz.
839 LoopRotatePass(EnableLoopHeaderDuplication ||
840 Level != OptimizationLevel::Oz),
841 /*UseMemorySSA=*/false),
842 PTO.EagerlyInvalidateAnalyses));
843 }
844}
845
846void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
847 OptimizationLevel Level, bool RunProfileGen,
848 bool IsCS, bool AtomicCounterUpdate,
849 std::string ProfileFile,
850 std::string ProfileRemappingFile) {
851 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
852
853 if (!RunProfileGen) {
854 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
855 MPM.addPass(
856 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
857 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
858 // RequireAnalysisPass for PSI before subsequent non-module passes.
859 MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
860 return;
861 }
862
863 // Perform PGO instrumentation.
864 MPM.addPass(PGOInstrumentationGen(IsCS ? PGOInstrumentationType::CSFDO
866
867 addPostPGOLoopRotation(MPM, Level);
868 // Add the profile lowering pass.
869 InstrProfOptions Options;
870 if (!ProfileFile.empty())
871 Options.InstrProfileOutput = ProfileFile;
872 // Do counter promotion at Level greater than O0.
873 Options.DoCounterPromotion = true;
874 Options.UseBFIInPromotion = IsCS;
875 if (EnableSampledInstr) {
876 Options.Sampling = true;
877 // With sampling, there is little beneifit to enable counter promotion.
878 // But note that sampling does work with counter promotion.
879 Options.DoCounterPromotion = false;
880 }
881 Options.Atomic = AtomicCounterUpdate;
882 MPM.addPass(InstrProfilingLoweringPass(Options, IsCS));
883}
884
886 bool RunProfileGen, bool IsCS,
887 bool AtomicCounterUpdate,
888 std::string ProfileFile,
889 std::string ProfileRemappingFile) {
890 if (!RunProfileGen) {
891 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
892 MPM.addPass(
893 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
894 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
895 // RequireAnalysisPass for PSI before subsequent non-module passes.
897 return;
898 }
899
900 // Perform PGO instrumentation.
903 // Add the profile lowering pass.
905 if (!ProfileFile.empty())
906 Options.InstrProfileOutput = ProfileFile;
907 // Do not do counter promotion at O0.
908 Options.DoCounterPromotion = false;
909 Options.UseBFIInPromotion = IsCS;
910 Options.Atomic = AtomicCounterUpdate;
912}
913
915 return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
916}
917
921 InlineParams IP;
922 if (PTO.InlinerThreshold == -1)
923 IP = getInlineParamsFromOptLevel(Level);
924 else
925 IP = getInlineParams(PTO.InlinerThreshold);
926 // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO,
927 // set hot-caller threshold to 0 to disable hot
928 // callsite inline (as much as possible [1]) because it makes
929 // profile annotation in the backend inaccurate.
930 //
931 // [1] Note the cost of a function could be below zero due to erased
932 // prologue / epilogue.
933 if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
935
936 if (PGOOpt)
938
942
943 // Require the GlobalsAA analysis for the module so we can query it within
944 // the CGSCC pipeline.
946 MIWP.addModulePass(RequireAnalysisPass<GlobalsAA, Module>());
947 // Invalidate AAManager so it can be recreated and pick up the newly
948 // available GlobalsAA.
949 MIWP.addModulePass(
951 }
952
953 // Require the ProfileSummaryAnalysis for the module so we can query it within
954 // the inliner pass.
956
957 // Now begin the main postorder CGSCC pipeline.
958 // FIXME: The current CGSCC pipeline has its origins in the legacy pass
959 // manager and trying to emulate its precise behavior. Much of this doesn't
960 // make a lot of sense and we should revisit the core CGSCC structure.
961 CGSCCPassManager &MainCGPipeline = MIWP.getPM();
962
963 // Note: historically, the PruneEH pass was run first to deduce nounwind and
964 // generally clean up exception handling overhead. It isn't clear this is
965 // valuable as the inliner doesn't currently care whether it is inlining an
966 // invoke or a call.
967
969 MainCGPipeline.addPass(AttributorCGSCCPass());
970
971 // Deduce function attributes. We do another run of this after the function
972 // simplification pipeline, so this only needs to run when it could affect the
973 // function simplification pipeline, which is only the case with recursive
974 // functions.
975 MainCGPipeline.addPass(PostOrderFunctionAttrsPass(/*SkipNonRecursive*/ true));
976
977 // When at O3 add argument promotion to the pass pipeline.
978 // FIXME: It isn't at all clear why this should be limited to O3.
979 if (Level == OptimizationLevel::O3)
980 MainCGPipeline.addPass(ArgumentPromotionPass());
981
982 // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
983 // there are no OpenMP runtime calls present in the module.
984 if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
985 MainCGPipeline.addPass(OpenMPOptCGSCCPass(Phase));
986
987 invokeCGSCCOptimizerLateEPCallbacks(MainCGPipeline, Level);
988
989 // Add the core function simplification pipeline nested inside the
990 // CGSCC walk.
993 PTO.EagerlyInvalidateAnalyses, /*NoRerun=*/true));
994
995 // Finally, deduce any function attributes based on the fully simplified
996 // function.
997 MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
998
999 // Mark that the function is fully simplified and that it shouldn't be
1000 // simplified again if we somehow revisit it due to CGSCC mutations unless
1001 // it's been modified since.
1004
1006 MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
1007 MainCGPipeline.addPass(CoroAnnotationElidePass());
1008 }
1009
1010 // Make sure we don't affect potential future NoRerun CGSCC adaptors.
1011 MIWP.addLateModulePass(createModuleToFunctionPassAdaptor(
1013
1014 return MIWP;
1015}
1016
1021
1023 // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO,
1024 // set hot-caller threshold to 0 to disable hot
1025 // callsite inline (as much as possible [1]) because it makes
1026 // profile annotation in the backend inaccurate.
1027 //
1028 // [1] Note the cost of a function could be below zero due to erased
1029 // prologue / epilogue.
1030 if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1031 IP.HotCallSiteThreshold = 0;
1032
1033 if (PGOOpt)
1035
1036 // The inline deferral logic is used to avoid losing some
1037 // inlining chance in future. It is helpful in SCC inliner, in which
1038 // inlining is processed in bottom-up order.
1039 // While in module inliner, the inlining order is a priority-based order
1040 // by default. The inline deferral is unnecessary there. So we disable the
1041 // inline deferral logic in module inliner.
1042 IP.EnableDeferral = false;
1043
1046 MPM.addPass(GlobalOptPass());
1047 MPM.addPass(GlobalDCEPass());
1048 MPM.addPass(PGOCtxProfFlatteningPass(/*IsPreThinlink=*/false));
1049 }
1050
1053 PTO.EagerlyInvalidateAnalyses));
1054
1058 MPM.addPass(
1060 }
1061
1062 return MPM;
1063}
1064
1068 assert(Level != OptimizationLevel::O0 &&
1069 "Should not be used for O0 pipeline");
1070
1072 "FullLTOPostLink shouldn't call buildModuleSimplificationPipeline!");
1073
1075
1076 // Place pseudo probe instrumentation as the first pass of the pipeline to
1077 // minimize the impact of optimization changes.
1078 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1081
1082 bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
1083
1084 // In ThinLTO mode, when flattened profile is used, all the available
1085 // profile information will be annotated in PreLink phase so there is
1086 // no need to load the profile again in PostLink.
1087 bool LoadSampleProfile =
1088 HasSampleProfile &&
1090
1091 // During the ThinLTO backend phase we perform early indirect call promotion
1092 // here, before globalopt. Otherwise imported available_externally functions
1093 // look unreferenced and are removed. If we are going to load the sample
1094 // profile then defer until later.
1095 // TODO: See if we can move later and consolidate with the location where
1096 // we perform ICP when we are loading a sample profile.
1097 // TODO: We pass HasSampleProfile (whether there was a sample profile file
1098 // passed to the compile) to the SamplePGO flag of ICP. This is used to
1099 // determine whether the new direct calls are annotated with prof metadata.
1100 // Ideally this should be determined from whether the IR is annotated with
1101 // sample profile, and not whether the a sample profile was provided on the
1102 // command line. E.g. for flattened profiles where we will not be reloading
1103 // the sample profile in the ThinLTO backend, we ideally shouldn't have to
1104 // provide the sample profile file.
1105 if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
1106 MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
1107
1108 // Create an early function pass manager to cleanup the output of the
1109 // frontend. Not necessary with LTO post link pipelines since the pre link
1110 // pipeline already cleaned up the frontend output.
1112 // Do basic inference of function attributes from known properties of system
1113 // libraries and other oracles.
1115 MPM.addPass(CoroEarlyPass());
1116
1117 FunctionPassManager EarlyFPM;
1118 EarlyFPM.addPass(EntryExitInstrumenterPass(/*PostInlining=*/false));
1119 // Lower llvm.expect to metadata before attempting transforms.
1120 // Compare/branch metadata may alter the behavior of passes like
1121 // SimplifyCFG.
1123 EarlyFPM.addPass(SimplifyCFGPass());
1125 EarlyFPM.addPass(EarlyCSEPass());
1126 if (Level == OptimizationLevel::O3)
1127 EarlyFPM.addPass(CallSiteSplittingPass());
1129 std::move(EarlyFPM), PTO.EagerlyInvalidateAnalyses));
1130 }
1131
1132 if (LoadSampleProfile) {
1133 // Annotate sample profile right after early FPM to ensure freshness of
1134 // the debug info.
1136 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile, Phase, FS));
1137 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1138 // RequireAnalysisPass for PSI before subsequent non-module passes.
1140 // Do not invoke ICP in the LTOPrelink phase as it makes it hard
1141 // for the profile annotation to be accurate in the LTO backend.
1142 if (!isLTOPreLink(Phase))
1143 // We perform early indirect call promotion here, before globalopt.
1144 // This is important for the ThinLTO backend phase because otherwise
1145 // imported available_externally functions look unreferenced and are
1146 // removed.
1147 MPM.addPass(
1148 PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
1149 }
1150
1151 // Try to perform OpenMP specific optimizations on the module. This is a
1152 // (quick!) no-op if there are no OpenMP runtime calls present in the module.
1154
1156 MPM.addPass(AttributorPass());
1157
1158 // Lower type metadata and the type.test intrinsic in the ThinLTO
1159 // post link pipeline after ICP. This is to enable usage of the type
1160 // tests in ICP sequences.
1162 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1164
1166
1167 // Interprocedural constant propagation now that basic cleanup has occurred
1168 // and prior to optimizing globals.
1169 // FIXME: This position in the pipeline hasn't been carefully considered in
1170 // years, it should be re-analyzed.
1171 MPM.addPass(IPSCCPPass(
1172 IPSCCPOptions(/*AllowFuncSpec=*/
1173 Level != OptimizationLevel::Os &&
1174 Level != OptimizationLevel::Oz &&
1175 !isLTOPreLink(Phase))));
1176
1177 // Attach metadata to indirect call sites indicating the set of functions
1178 // they may target at run-time. This should follow IPSCCP.
1180
1181 // Optimize globals to try and fold them into constants.
1182 MPM.addPass(GlobalOptPass());
1183
1184 // Create a small function pass pipeline to cleanup after all the global
1185 // optimizations.
1186 FunctionPassManager GlobalCleanupPM;
1187 // FIXME: Should this instead by a run of SROA?
1188 GlobalCleanupPM.addPass(PromotePass());
1189 GlobalCleanupPM.addPass(InstCombinePass());
1190 invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
1191 GlobalCleanupPM.addPass(
1192 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1193 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM),
1194 PTO.EagerlyInvalidateAnalyses));
1195
1196 // We already asserted this happens in non-FullLTOPostLink earlier.
1197 const bool IsPreLink = Phase != ThinOrFullLTOPhase::ThinLTOPostLink;
1198 // Enable contextual profiling instrumentation.
1199 const bool IsCtxProfGen =
1201 const bool IsPGOPreLink = !IsCtxProfGen && PGOOpt && IsPreLink;
1202 const bool IsPGOInstrGen =
1203 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRInstr;
1204 const bool IsPGOInstrUse =
1205 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRUse;
1206 const bool IsMemprofUse = IsPGOPreLink && !PGOOpt->MemoryProfile.empty();
1207 // We don't want to mix pgo ctx gen and pgo gen; we also don't currently
1208 // enable ctx profiling from the frontend.
1210 "Enabling both instrumented PGO and contextual instrumentation is not "
1211 "supported.");
1212 const bool IsCtxProfUse =
1214
1215 assert(
1217 "--instrument-cold-function-only-path is provided but "
1218 "--pgo-instrument-cold-function-only is not enabled");
1219 const bool IsColdFuncOnlyInstrGen = PGOInstrumentColdFunctionOnly &&
1220 IsPGOPreLink &&
1222
1223 if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen ||
1224 IsCtxProfUse || IsColdFuncOnlyInstrGen)
1225 addPreInlinerPasses(MPM, Level, Phase);
1226
1227 // Add all the requested passes for instrumentation PGO, if requested.
1228 if (IsPGOInstrGen || IsPGOInstrUse) {
1229 addPGOInstrPasses(MPM, Level,
1230 /*RunProfileGen=*/IsPGOInstrGen,
1231 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate,
1232 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
1233 } else if (IsCtxProfGen || IsCtxProfUse) {
1235 // In pre-link, we just want the instrumented IR. We use the contextual
1236 // profile in the post-thinlink phase.
1237 // The instrumentation will be removed in post-thinlink after IPO.
1238 // FIXME(mtrofin): move AssignGUIDPass if there is agreement to use this
1239 // mechanism for GUIDs.
1240 MPM.addPass(AssignGUIDPass());
1241 if (IsCtxProfUse) {
1242 MPM.addPass(PGOCtxProfFlatteningPass(/*IsPreThinlink=*/true));
1243 return MPM;
1244 }
1245 // Block further inlining in the instrumented ctxprof case. This avoids
1246 // confusingly collecting profiles for the same GUID corresponding to
1247 // different variants of the function. We could do like PGO and identify
1248 // functions by a (GUID, Hash) tuple, but since the ctxprof "use" waits for
1249 // thinlto to happen before performing any further optimizations, it's
1250 // unnecessary to collect profiles for non-prevailing copies.
1252 addPostPGOLoopRotation(MPM, Level);
1254 } else if (IsColdFuncOnlyInstrGen) {
1255 addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true, /* IsCS */ false,
1256 /* AtomicCounterUpdate */ false,
1258 /* ProfileRemappingFile */ "");
1259 }
1260
1261 if (IsPGOInstrGen || IsPGOInstrUse || IsCtxProfGen)
1262 MPM.addPass(PGOIndirectCallPromotion(false, false));
1263
1264 if (IsPGOPreLink && PGOOpt->CSAction == PGOOptions::CSIRInstr)
1265 MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile,
1267
1268 if (IsMemprofUse)
1269 MPM.addPass(MemProfUsePass(PGOOpt->MemoryProfile, FS));
1270
1271 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRUse ||
1272 PGOOpt->Action == PGOOptions::SampleUse))
1273 MPM.addPass(PGOForceFunctionAttrsPass(PGOOpt->ColdOptType));
1274
1275 MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/true));
1276
1279 else
1280 MPM.addPass(buildInlinerPipeline(Level, Phase));
1281
1282 // Remove any dead arguments exposed by cleanups, constant folding globals,
1283 // and argument promotion.
1285
1288
1290 MPM.addPass(CoroCleanupPass());
1291
1292 // Optimize globals now that functions are fully simplified.
1293 MPM.addPass(GlobalOptPass());
1294 MPM.addPass(GlobalDCEPass());
1295
1296 return MPM;
1297}
1298
1299/// TODO: Should LTO cause any differences to this set of passes?
1300void PassBuilder::addVectorPasses(OptimizationLevel Level,
1302 ThinOrFullLTOPhase LTOPhase) {
1303 const bool IsFullLTO = LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink;
1304
1307
1308 // Drop dereferenceable assumes after vectorization, as they are no longer
1309 // needed and can inhibit further optimization.
1310 if (!isLTOPreLink(LTOPhase))
1311 FPM.addPass(DropUnnecessaryAssumesPass(/*DropDereferenceable=*/true));
1312
1314 if (IsFullLTO) {
1315 // The vectorizer may have significantly shortened a loop body; unroll
1316 // again. Unroll small loops to hide loop backedge latency and saturate any
1317 // parallel execution resources of an out-of-order processor. We also then
1318 // need to clean up redundancies and loop invariant code.
1319 // FIXME: It would be really good to use a loop-integrated instruction
1320 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1321 // across the loop nests.
1322 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1325 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1327 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1330 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1331 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1332 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1333 // NOTE: we are very late in the pipeline, and we don't have any LICM
1334 // or SimplifyCFG passes scheduled after us, that would cleanup
1335 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1337 }
1338
1339 if (!IsFullLTO) {
1340 // Eliminate loads by forwarding stores from the previous iteration to loads
1341 // of the current iteration.
1343 }
1344 // Cleanup after the loop optimization passes.
1345 FPM.addPass(InstCombinePass());
1346
1347 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1348 ExtraFunctionPassManager<ShouldRunExtraVectorPasses> ExtraPasses;
1349 // At higher optimization levels, try to clean up any runtime overlap and
1350 // alignment checks inserted by the vectorizer. We want to track correlated
1351 // runtime checks for two inner loops in the same outer loop, fold any
1352 // common computations, hoist loop-invariant aspects out of any outer loop,
1353 // and unswitch the runtime checks if possible. Once hoisted, we may have
1354 // dead (or speculatable) control flows or more combining opportunities.
1355 ExtraPasses.addPass(EarlyCSEPass());
1356 ExtraPasses.addPass(CorrelatedValuePropagationPass());
1357 ExtraPasses.addPass(InstCombinePass());
1358 LoopPassManager LPM;
1359 LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1360 /*AllowSpeculation=*/true));
1361 LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
1363 ExtraPasses.addPass(
1364 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true));
1365 ExtraPasses.addPass(
1366 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1367 ExtraPasses.addPass(InstCombinePass());
1368 FPM.addPass(std::move(ExtraPasses));
1369 }
1370
1371 // Now that we've formed fast to execute loop structures, we do further
1372 // optimizations. These are run afterward as they might block doing complex
1373 // analyses and transforms such as what are needed for loop vectorization.
1374
1375 // Cleanup after loop vectorization, etc. Simplification passes like CVP and
1376 // GVN, loop transforms, and others have already run, so it's now better to
1377 // convert to more optimized IR using more aggressive simplify CFG options.
1378 // The extra sinking transform can create larger basic blocks, so do this
1379 // before SLP vectorization.
1380 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
1381 .forwardSwitchCondToPhi(true)
1382 .convertSwitchRangeToICmp(true)
1383 .convertSwitchToArithmetic(true)
1384 .convertSwitchToLookupTable(true)
1385 .needCanonicalLoops(false)
1386 .hoistCommonInsts(true)
1387 .sinkCommonInsts(true)));
1388
1389 if (IsFullLTO) {
1390 FPM.addPass(SCCPPass());
1391 FPM.addPass(InstCombinePass());
1392 FPM.addPass(BDCEPass());
1393 }
1394
1395 // Optimize parallel scalar instruction chains into SIMD instructions.
1396 if (PTO.SLPVectorization) {
1397 FPM.addPass(SLPVectorizerPass());
1398 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1399 FPM.addPass(EarlyCSEPass());
1400 }
1401 }
1402 // Enhance/cleanup vector code.
1403 FPM.addPass(VectorCombinePass());
1404
1405 if (!IsFullLTO) {
1406 FPM.addPass(InstCombinePass());
1407 // Unroll small loops to hide loop backedge latency and saturate any
1408 // parallel execution resources of an out-of-order processor. We also then
1409 // need to clean up redundancies and loop invariant code.
1410 // FIXME: It would be really good to use a loop-integrated instruction
1411 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1412 // across the loop nests.
1413 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1414 if (EnableUnrollAndJam && PTO.LoopUnrolling) {
1416 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1417 }
1418 FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
1419 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1420 PTO.ForgetAllSCEVInLoopUnroll)));
1421 FPM.addPass(WarnMissedTransformationsPass());
1422 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1423 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1424 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1425 // NOTE: we are very late in the pipeline, and we don't have any LICM
1426 // or SimplifyCFG passes scheduled after us, that would cleanup
1427 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1428 FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
1429 }
1430
1431 FPM.addPass(InferAlignmentPass());
1432 FPM.addPass(InstCombinePass());
1433
1434 // This is needed for two reasons:
1435 // 1. It works around problems that instcombine introduces, such as sinking
1436 // expensive FP divides into loops containing multiplications using the
1437 // divide result.
1438 // 2. It helps to clean up some loop-invariant code created by the loop
1439 // unroll pass when IsFullLTO=false.
1441 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1442 /*AllowSpeculation=*/true),
1443 /*UseMemorySSA=*/true));
1444
1445 // Now that we've vectorized and unrolled loops, we may have more refined
1446 // alignment information, try to re-derive it here.
1447 FPM.addPass(AlignmentFromAssumptionsPass());
1448}
1449
1452 ThinOrFullLTOPhase LTOPhase) {
1453 const bool LTOPreLink = isLTOPreLink(LTOPhase);
1455
1456 // Run partial inlining pass to partially inline functions that have
1457 // large bodies.
1460
1461 // Remove avail extern fns and globals definitions since we aren't compiling
1462 // an object file for later LTO. For LTO we want to preserve these so they
1463 // are eligible for inlining at link-time. Note if they are unreferenced they
1464 // will be removed by GlobalDCE later, so this only impacts referenced
1465 // available externally globals. Eventually they will be suppressed during
1466 // codegen, but eliminating here enables more opportunity for GlobalDCE as it
1467 // may make globals referenced by available external functions dead and saves
1468 // running remaining passes on the eliminated functions. These should be
1469 // preserved during prelinking for link-time inlining decisions.
1470 if (!LTOPreLink)
1472
1473 // Do RPO function attribute inference across the module to forward-propagate
1474 // attributes where applicable.
1475 // FIXME: Is this really an optimization rather than a canonicalization?
1477
1478 // Do a post inline PGO instrumentation and use pass. This is a context
1479 // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
1480 // cross-module inline has not been done yet. The context sensitive
1481 // instrumentation is after all the inlines are done.
1482 if (!LTOPreLink && PGOOpt) {
1483 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1484 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
1485 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1486 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile);
1487 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1488 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
1489 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1490 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
1491 }
1492
1493 // Re-compute GlobalsAA here prior to function passes. This is particularly
1494 // useful as the above will have inlined, DCE'ed, and function-attr
1495 // propagated everything. We should at this point have a reasonably minimal
1496 // and richly annotated call graph. By computing aliasing and mod/ref
1497 // information for all local globals here, the late loop passes and notably
1498 // the vectorizer will be able to use them to help recognize vectorizable
1499 // memory operations.
1502
1503 invokeOptimizerEarlyEPCallbacks(MPM, Level, LTOPhase);
1504
1505 FunctionPassManager OptimizePM;
1506
1507 // Only drop unnecessary assumes post-inline and post-link, as otherwise
1508 // additional uses of the affected value may be introduced through inlining
1509 // and CSE.
1510 if (!isLTOPreLink(LTOPhase))
1511 OptimizePM.addPass(DropUnnecessaryAssumesPass());
1512
1513 // Scheduling LoopVersioningLICM when inlining is over, because after that
1514 // we may see more accurate aliasing. Reason to run this late is that too
1515 // early versioning may prevent further inlining due to increase of code
1516 // size. Other optimizations which runs later might get benefit of no-alias
1517 // assumption in clone loop.
1519 OptimizePM.addPass(
1521 // LoopVersioningLICM pass might increase new LICM opportunities.
1523 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1524 /*AllowSpeculation=*/true),
1525 /*USeMemorySSA=*/true));
1526 }
1527
1528 OptimizePM.addPass(Float2IntPass());
1530
1531 if (EnableMatrix) {
1532 OptimizePM.addPass(LowerMatrixIntrinsicsPass());
1533 OptimizePM.addPass(EarlyCSEPass());
1534 }
1535
1536 // CHR pass should only be applied with the profile information.
1537 // The check is to check the profile summary information in CHR.
1538 if (EnableCHR && Level == OptimizationLevel::O3)
1539 OptimizePM.addPass(ControlHeightReductionPass());
1540
1541 // FIXME: We need to run some loop optimizations to re-rotate loops after
1542 // simplifycfg and others undo their rotation.
1543
1544 // Optimize the loop execution. These passes operate on entire loop nests
1545 // rather than on each loop in an inside-out manner, and so they are actually
1546 // function passes.
1547
1548 invokeVectorizerStartEPCallbacks(OptimizePM, Level);
1549
1550 LoopPassManager LPM;
1551 // First rotate loops that may have been un-rotated by prior passes.
1552 // Disable header duplication at -Oz.
1554 Level != OptimizationLevel::Oz,
1555 LTOPreLink));
1556 // Some loops may have become dead by now. Try to delete them.
1557 // FIXME: see discussion in https://reviews.llvm.org/D112851,
1558 // this may need to be revisited once we run GVN before loop deletion
1559 // in the simplification pipeline.
1560 LPM.addPass(LoopDeletionPass());
1561
1562 if (PTO.LoopInterchange)
1563 LPM.addPass(LoopInterchangePass());
1564
1565 OptimizePM.addPass(
1566 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/false));
1567
1568 // FIXME: This may not be the right place in the pipeline.
1569 // We need to have the data to support the right place.
1570 if (PTO.LoopFusion)
1571 OptimizePM.addPass(LoopFusePass());
1572
1573 // Distribute loops to allow partial vectorization. I.e. isolate dependences
1574 // into separate loop that would otherwise inhibit vectorization. This is
1575 // currently only performed for loops marked with the metadata
1576 // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1577 OptimizePM.addPass(LoopDistributePass());
1578
1579 // Populates the VFABI attribute with the scalar-to-vector mappings
1580 // from the TargetLibraryInfo.
1581 OptimizePM.addPass(InjectTLIMappings());
1582
1583 addVectorPasses(Level, OptimizePM, LTOPhase);
1584
1585 invokeVectorizerEndEPCallbacks(OptimizePM, Level);
1586
1587 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
1588 // canonicalization pass that enables other optimizations. As a result,
1589 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1590 // result too early.
1591 OptimizePM.addPass(LoopSinkPass());
1592
1593 // And finally clean up LCSSA form before generating code.
1594 OptimizePM.addPass(InstSimplifyPass());
1595
1596 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
1597 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
1598 // flattening of blocks.
1599 OptimizePM.addPass(DivRemPairsPass());
1600
1601 // Try to annotate calls that were created during optimization.
1602 OptimizePM.addPass(
1603 TailCallElimPass(/*UpdateFunctionEntryCount=*/isInstrumentedPGOUse()));
1604
1605 // LoopSink (and other loop passes since the last simplifyCFG) might have
1606 // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
1607 OptimizePM.addPass(
1609 .convertSwitchRangeToICmp(true)
1610 .convertSwitchToArithmetic(true)
1611 .speculateUnpredictables(true)
1612 .hoistLoadsStoresWithCondFaulting(true)));
1613
1614 // Add the core optimizing pipeline.
1615 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
1616 PTO.EagerlyInvalidateAnalyses));
1617
1618 invokeOptimizerLastEPCallbacks(MPM, Level, LTOPhase);
1619
1620 // Split out cold code. Splitting is done late to avoid hiding context from
1621 // other optimizations and inadvertently regressing performance. The tradeoff
1622 // is that this has a higher code size cost than splitting early.
1623 if (EnableHotColdSplit && !LTOPreLink)
1625
1626 // Search the code for similar regions of code. If enough similar regions can
1627 // be found where extracting the regions into their own function will decrease
1628 // the size of the program, we extract the regions, a deduplicate the
1629 // structurally similar regions.
1630 if (EnableIROutliner)
1631 MPM.addPass(IROutlinerPass());
1632
1633 // Now we need to do some global optimization transforms.
1634 // FIXME: It would seem like these should come first in the optimization
1635 // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
1636 // ordering here.
1637 MPM.addPass(GlobalDCEPass());
1639
1640 // Merge functions if requested. It has a better chance to merge functions
1641 // after ConstantMerge folded jump tables.
1642 if (PTO.MergeFunctions)
1644
1645 if (PTO.CallGraphProfile && !LTOPreLink)
1646 MPM.addPass(CGProfilePass(isLTOPostLink(LTOPhase)));
1647
1648 // RelLookupTableConverterPass runs later in LTO post-link pipeline.
1649 if (!LTOPreLink)
1651
1652 return MPM;
1653}
1654
1658 if (Level == OptimizationLevel::O0)
1659 return buildO0DefaultPipeline(Level, Phase);
1660
1662
1663 // Currently this pipeline is only invoked in an LTO pre link pass or when we
1664 // are not running LTO. If that changes the below checks may need updating.
1666
1667 // If we are invoking this in non-LTO mode, remove any MemProf related
1668 // attributes and metadata, as we don't know whether we are linking with
1669 // a library containing the necessary interfaces.
1672
1673 // Convert @llvm.global.annotations to !annotation metadata.
1675
1676 // Force any function attributes we want the rest of the pipeline to observe.
1678
1679 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1681
1682 // Apply module pipeline start EP callback.
1684
1685 // Add the core simplification pipeline.
1687
1688 // Now add the optimization pipeline.
1690
1691 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1692 PGOOpt->Action == PGOOptions::SampleUse)
1694
1695 // Emit annotation remarks.
1697
1698 if (isLTOPreLink(Phase))
1699 addRequiredLTOPreLinkPasses(MPM);
1700 return MPM;
1701}
1702
1705 bool EmitSummary) {
1707 if (ThinLTO)
1709 else
1711 MPM.addPass(EmbedBitcodePass(ThinLTO, EmitSummary));
1712
1713 // Perform any cleanups to the IR that aren't suitable for per TU compilation,
1714 // like removing CFI/WPD related instructions. Note, we reuse
1715 // LowerTypeTestsPass to clean up type tests rather than duplicate that logic
1716 // in FatLtoCleanup.
1717 MPM.addPass(FatLtoCleanup());
1718
1719 // If we're doing FatLTO w/ CFI enabled, we don't want the type tests in the
1720 // object code, only in the bitcode section, so drop it before we run
1721 // module optimization and generate machine code. If llvm.type.test() isn't in
1722 // the IR, this won't do anything.
1723 MPM.addPass(
1725
1726 // Use the ThinLTO post-link pipeline with sample profiling
1727 if (ThinLTO && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1728 MPM.addPass(buildThinLTODefaultPipeline(Level, /*ImportSummary=*/nullptr));
1729 else {
1730 // ModuleSimplification does not run the coroutine passes for
1731 // ThinLTOPreLink, so we need the coroutine passes to run for ThinLTO
1732 // builds, otherwise they will miscompile.
1733 if (ThinLTO) {
1734 // TODO: replace w/ buildCoroWrapper() when it takes phase and level into
1735 // consideration.
1736 CGSCCPassManager CGPM;
1740 MPM.addPass(CoroCleanupPass());
1741 }
1742
1743 // otherwise, just use module optimization
1744 MPM.addPass(
1746 // Emit annotation remarks.
1748 }
1749 return MPM;
1750}
1751
1754 if (Level == OptimizationLevel::O0)
1756
1758
1759 // Convert @llvm.global.annotations to !annotation metadata.
1761
1762 // Force any function attributes we want the rest of the pipeline to observe.
1764
1765 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1767
1768 // Apply module pipeline start EP callback.
1770
1771 // If we are planning to perform ThinLTO later, we don't bloat the code with
1772 // unrolling/vectorization/... now. Just simplify the module as much as we
1773 // can.
1776 // In pre-link, for ctx prof use, we stop here with an instrumented IR. We let
1777 // thinlto use the contextual info to perform imports; then use the contextual
1778 // profile in the post-thinlink phase.
1779 if (!UseCtxProfile.empty()) {
1780 addRequiredLTOPreLinkPasses(MPM);
1781 return MPM;
1782 }
1783
1784 // Run partial inlining pass to partially inline functions that have
1785 // large bodies.
1786 // FIXME: It isn't clear whether this is really the right place to run this
1787 // in ThinLTO. Because there is another canonicalization and simplification
1788 // phase that will run after the thin link, running this here ends up with
1789 // less information than will be available later and it may grow functions in
1790 // ways that aren't beneficial.
1793
1794 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1795 PGOOpt->Action == PGOOptions::SampleUse)
1797
1798 // Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual
1799 // optimization is going to be done in PostLink stage, but clang can't add
1800 // callbacks there in case of in-process ThinLTO called by linker.
1805
1806 // Emit annotation remarks.
1808
1809 addRequiredLTOPreLinkPasses(MPM);
1810
1811 return MPM;
1812}
1813
1815 OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
1817
1818 // If we are invoking this without a summary index noting that we are linking
1819 // with a library containing the necessary APIs, remove any MemProf related
1820 // attributes and metadata.
1821 if (!ImportSummary || !ImportSummary->withSupportsHotColdNew())
1823
1824 if (ImportSummary) {
1825 // For ThinLTO we must apply the context disambiguation decisions early, to
1826 // ensure we can correctly match the callsites to summary data.
1829 ImportSummary, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1830
1831 // These passes import type identifier resolutions for whole-program
1832 // devirtualization and CFI. They must run early because other passes may
1833 // disturb the specific instruction patterns that these passes look for,
1834 // creating dependencies on resolutions that may not appear in the summary.
1835 //
1836 // For example, GVN may transform the pattern assume(type.test) appearing in
1837 // two basic blocks into assume(phi(type.test, type.test)), which would
1838 // transform a dependency on a WPD resolution into a dependency on a type
1839 // identifier resolution for CFI.
1840 //
1841 // Also, WPD has access to more precise information than ICP and can
1842 // devirtualize more effectively, so it should operate on the IR first.
1843 //
1844 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1845 // metadata and intrinsics.
1846 MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
1847 MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
1848 }
1849
1850 if (Level == OptimizationLevel::O0) {
1851 // Run a second time to clean up any type tests left behind by WPD for use
1852 // in ICP.
1853 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1856 // Drop available_externally and unreferenced globals. This is necessary
1857 // with ThinLTO in order to avoid leaving undefined references to dead
1858 // globals in the object file.
1860 MPM.addPass(GlobalDCEPass());
1861 return MPM;
1862 }
1863 if (!UseCtxProfile.empty()) {
1864 MPM.addPass(
1866 } else {
1867 // Add the core simplification pipeline.
1870 }
1871 // Now add the optimization pipeline.
1874
1875 // Emit annotation remarks.
1877
1878 return MPM;
1879}
1880
1883 // FIXME: We should use a customized pre-link pipeline!
1884 return buildPerModuleDefaultPipeline(Level,
1886}
1887
1890 ModuleSummaryIndex *ExportSummary) {
1892
1894
1895 // If we are invoking this without a summary index noting that we are linking
1896 // with a library containing the necessary APIs, remove any MemProf related
1897 // attributes and metadata.
1898 if (!ExportSummary || !ExportSummary->withSupportsHotColdNew())
1900
1901 // Create a function that performs CFI checks for cross-DSO calls with targets
1902 // in the current module.
1903 MPM.addPass(CrossDSOCFIPass());
1904
1905 if (Level == OptimizationLevel::O0) {
1906 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1907 // metadata and intrinsics.
1908 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1909 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1910 // Run a second time to clean up any type tests left behind by WPD for use
1911 // in ICP.
1912 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1914
1916
1918
1919 // Emit annotation remarks.
1921
1922 return MPM;
1923 }
1924
1925 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
1926 // Load sample profile before running the LTO optimization pipeline.
1927 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1928 PGOOpt->ProfileRemappingFile,
1930 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1931 // RequireAnalysisPass for PSI before subsequent non-module passes.
1933 }
1934
1935 // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
1937
1938 // Remove unused virtual tables to improve the quality of code generated by
1939 // whole-program devirtualization and bitset lowering.
1940 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
1941
1942 // Do basic inference of function attributes from known properties of system
1943 // libraries and other oracles.
1945
1946 if (Level.getSpeedupLevel() > 1) {
1948 CallSiteSplittingPass(), PTO.EagerlyInvalidateAnalyses));
1949
1950 // Indirect call promotion. This should promote all the targets that are
1951 // left by the earlier promotion pass that promotes intra-module targets.
1952 // This two-step promotion is to save the compile time. For LTO, it should
1953 // produce the same result as if we only do promotion here.
1955 true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1956
1957 // Promoting by-reference arguments to by-value exposes more constants to
1958 // IPSCCP.
1959 CGSCCPassManager CGPM;
1962 CGPM.addPass(
1965
1966 // Propagate constants at call sites into the functions they call. This
1967 // opens opportunities for globalopt (and inlining) by substituting function
1968 // pointers passed as arguments to direct uses of functions.
1969 MPM.addPass(IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/
1970 Level != OptimizationLevel::Os &&
1971 Level != OptimizationLevel::Oz)));
1972
1973 // Attach metadata to indirect call sites indicating the set of functions
1974 // they may target at run-time. This should follow IPSCCP.
1976 }
1977
1978 // Do RPO function attribute inference across the module to forward-propagate
1979 // attributes where applicable.
1980 // FIXME: Is this really an optimization rather than a canonicalization?
1982
1983 // Use in-range annotations on GEP indices to split globals where beneficial.
1984 MPM.addPass(GlobalSplitPass());
1985
1986 // Run whole program optimization of virtual call when the list of callees
1987 // is fixed.
1988 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1989
1991 // Stop here at -O1.
1992 if (Level == OptimizationLevel::O1) {
1993 // The LowerTypeTestsPass needs to run to lower type metadata and the
1994 // type.test intrinsics. The pass does nothing if CFI is disabled.
1995 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1996 // Run a second time to clean up any type tests left behind by WPD for use
1997 // in ICP (which is performed earlier than this in the regular LTO
1998 // pipeline).
1999 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
2001
2003
2005
2006 // Emit annotation remarks.
2008
2009 return MPM;
2010 }
2011
2012 // TODO: Skip to match buildCoroWrapper.
2013 MPM.addPass(CoroEarlyPass());
2014
2015 // Optimize globals to try and fold them into constants.
2016 MPM.addPass(GlobalOptPass());
2017
2018 // Promote any localized globals to SSA registers.
2020
2021 // Linking modules together can lead to duplicate global constant, only
2022 // keep one copy of each constant.
2024
2025 // Remove unused arguments from functions.
2027
2028 // Reduce the code after globalopt and ipsccp. Both can open up significant
2029 // simplification opportunities, and both can propagate functions through
2030 // function pointers. When this happens, we often have to resolve varargs
2031 // calls, etc, so let instcombine do this.
2032 FunctionPassManager PeepholeFPM;
2033 PeepholeFPM.addPass(InstCombinePass());
2034 if (Level.getSpeedupLevel() > 1)
2035 PeepholeFPM.addPass(AggressiveInstCombinePass());
2036 invokePeepholeEPCallbacks(PeepholeFPM, Level);
2037
2038 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM),
2039 PTO.EagerlyInvalidateAnalyses));
2040
2041 // Lower variadic functions for supported targets prior to inlining.
2043
2044 // Note: historically, the PruneEH pass was run first to deduce nounwind and
2045 // generally clean up exception handling overhead. It isn't clear this is
2046 // valuable as the inliner doesn't currently care whether it is inlining an
2047 // invoke or a call.
2048 // Run the inliner now.
2049 if (EnableModuleInliner) {
2053 } else {
2056 /* MandatoryFirst */ true,
2059 }
2060
2061 // Perform context disambiguation after inlining, since that would reduce the
2062 // amount of additional cloning required to distinguish the allocation
2063 // contexts.
2066 /*Summary=*/nullptr,
2067 PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
2068
2069 // Optimize globals again after we ran the inliner.
2070 MPM.addPass(GlobalOptPass());
2071
2072 // Run the OpenMPOpt pass again after global optimizations.
2074
2075 // Garbage collect dead functions.
2076 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2077
2078 // If we didn't decide to inline a function, check to see if we can
2079 // transform it to pass arguments by value instead of by reference.
2080 CGSCCPassManager CGPM;
2085
2087 // The IPO Passes may leave cruft around. Clean up after them.
2088 FPM.addPass(InstCombinePass());
2089 invokePeepholeEPCallbacks(FPM, Level);
2090
2093
2095
2096 // Do a post inline PGO instrumentation and use pass. This is a context
2097 // sensitive PGO pass.
2098 if (PGOOpt) {
2099 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
2100 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
2101 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
2102 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile);
2103 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
2104 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
2105 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
2106 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
2107 }
2108
2109 // Break up allocas
2111
2112 // LTO provides additional opportunities for tailcall elimination due to
2113 // link-time inlining, and visibility of nocapture attribute.
2114 FPM.addPass(
2115 TailCallElimPass(/*UpdateFunctionEntryCount=*/isInstrumentedPGOUse()));
2116
2117 // Run a few AA driver optimizations here and now to cleanup the code.
2118 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM),
2119 PTO.EagerlyInvalidateAnalyses));
2120
2121 MPM.addPass(
2123
2124 // Require the GlobalsAA analysis for the module so we can query it within
2125 // MainFPM.
2128 // Invalidate AAManager so it can be recreated and pick up the newly
2129 // available GlobalsAA.
2130 MPM.addPass(
2132 }
2133
2134 FunctionPassManager MainFPM;
2136 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
2137 /*AllowSpeculation=*/true),
2138 /*USeMemorySSA=*/true));
2139
2140 if (RunNewGVN)
2141 MainFPM.addPass(NewGVNPass());
2142 else
2143 MainFPM.addPass(GVNPass());
2144
2145 // Remove dead memcpy()'s.
2146 MainFPM.addPass(MemCpyOptPass());
2147
2148 // Nuke dead stores.
2149 MainFPM.addPass(DSEPass());
2150 MainFPM.addPass(MoveAutoInitPass());
2152
2153 invokeVectorizerStartEPCallbacks(MainFPM, Level);
2154
2155 LoopPassManager LPM;
2156 if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
2157 LPM.addPass(LoopFlattenPass());
2158 LPM.addPass(IndVarSimplifyPass());
2159 LPM.addPass(LoopDeletionPass());
2160 // FIXME: Add loop interchange.
2161
2162 // Unroll small loops and perform peeling.
2163 LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
2164 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
2165 PTO.ForgetAllSCEVInLoopUnroll));
2166 // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
2167 // *All* loop passes must preserve it, in order to be able to use it.
2168 MainFPM.addPass(
2169 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/false));
2170
2171 MainFPM.addPass(LoopDistributePass());
2172
2173 addVectorPasses(Level, MainFPM, ThinOrFullLTOPhase::FullLTOPostLink);
2174
2175 invokeVectorizerEndEPCallbacks(MainFPM, Level);
2176
2177 // Run the OpenMPOpt CGSCC pass again late.
2180
2181 invokePeepholeEPCallbacks(MainFPM, Level);
2182 MainFPM.addPass(JumpThreadingPass());
2183 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM),
2184 PTO.EagerlyInvalidateAnalyses));
2185
2186 // Lower type metadata and the type.test intrinsic. This pass supports
2187 // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
2188 // to be run at link time if CFI is enabled. This pass does nothing if
2189 // CFI is disabled.
2190 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
2191 // Run a second time to clean up any type tests left behind by WPD for use
2192 // in ICP (which is performed earlier than this in the regular LTO pipeline).
2193 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
2195
2196 // Enable splitting late in the FullLTO post-link pipeline.
2199
2200 // Add late LTO optimization passes.
2201 FunctionPassManager LateFPM;
2202
2203 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
2204 // canonicalization pass that enables other optimizations. As a result,
2205 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
2206 // result too early.
2207 LateFPM.addPass(LoopSinkPass());
2208
2209 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
2210 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
2211 // flattening of blocks.
2212 LateFPM.addPass(DivRemPairsPass());
2213
2214 // Delete basic blocks, which optimization passes may have killed.
2216 .convertSwitchRangeToICmp(true)
2217 .convertSwitchToArithmetic(true)
2218 .hoistCommonInsts(true)
2219 .speculateUnpredictables(true)));
2220 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(LateFPM)));
2221
2222 // Drop bodies of available eternally objects to improve GlobalDCE.
2224
2225 // Now that we have optimized the program, discard unreachable functions.
2226 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2227
2228 if (PTO.MergeFunctions)
2230
2232
2233 if (PTO.CallGraphProfile)
2234 MPM.addPass(CGProfilePass(/*InLTOPostLink=*/true));
2235
2236 MPM.addPass(CoroCleanupPass());
2237
2239
2240 // Emit annotation remarks.
2242
2243 return MPM;
2244}
2245
2249 assert(Level == OptimizationLevel::O0 &&
2250 "buildO0DefaultPipeline should only be used with O0");
2251
2253
2254 // Perform pseudo probe instrumentation in O0 mode. This is for the
2255 // consistency between different build modes. For example, a LTO build can be
2256 // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
2257 // the postlink will require pseudo probe instrumentation in the prelink.
2258 if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
2260
2261 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
2262 PGOOpt->Action == PGOOptions::IRUse))
2264 MPM,
2265 /*RunProfileGen=*/(PGOOpt->Action == PGOOptions::IRInstr),
2266 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate, PGOOpt->ProfileFile,
2267 PGOOpt->ProfileRemappingFile);
2268
2269 // Instrument function entry and exit before all inlining.
2271 EntryExitInstrumenterPass(/*PostInlining=*/false)));
2272
2274
2275 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
2277
2278 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
2279 // Explicitly disable sample loader inlining and use flattened profile in O0
2280 // pipeline.
2281 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
2282 PGOOpt->ProfileRemappingFile,
2283 ThinOrFullLTOPhase::None, nullptr,
2284 /*DisableSampleProfileInlining=*/true,
2285 /*UseFlattenedProfile=*/true));
2286 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
2287 // RequireAnalysisPass for PSI before subsequent non-module passes.
2289 }
2290
2292
2293 // Build a minimal pipeline based on the semantics required by LLVM,
2294 // which is just that always inlining occurs. Further, disable generating
2295 // lifetime intrinsics to avoid enabling further optimizations during
2296 // code generation.
2298 /*InsertLifetimeIntrinsics=*/false));
2299
2300 if (PTO.MergeFunctions)
2302
2303 if (EnableMatrix)
2304 MPM.addPass(
2306
2307 if (!CGSCCOptimizerLateEPCallbacks.empty()) {
2308 CGSCCPassManager CGPM;
2310 if (!CGPM.isEmpty())
2312 }
2313 if (!LateLoopOptimizationsEPCallbacks.empty()) {
2314 LoopPassManager LPM;
2316 if (!LPM.isEmpty()) {
2318 createFunctionToLoopPassAdaptor(std::move(LPM))));
2319 }
2320 }
2321 if (!LoopOptimizerEndEPCallbacks.empty()) {
2322 LoopPassManager LPM;
2324 if (!LPM.isEmpty()) {
2326 createFunctionToLoopPassAdaptor(std::move(LPM))));
2327 }
2328 }
2329 if (!ScalarOptimizerLateEPCallbacks.empty()) {
2332 if (!FPM.isEmpty())
2333 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2334 }
2335
2337
2338 if (!VectorizerStartEPCallbacks.empty()) {
2341 if (!FPM.isEmpty())
2342 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2343 }
2344
2345 if (!VectorizerEndEPCallbacks.empty()) {
2348 if (!FPM.isEmpty())
2349 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2350 }
2351
2353
2355
2356 if (isLTOPreLink(Phase))
2357 addRequiredLTOPreLinkPasses(MPM);
2358
2360
2361 return MPM;
2362}
2363
2365 AAManager AA;
2366
2367 // The order in which these are registered determines their priority when
2368 // being queried.
2369
2370 // Add any target-specific alias analyses that should be run early.
2371 if (TM)
2372 TM->registerEarlyDefaultAliasAnalyses(AA);
2373
2374 // First we register the basic alias analysis that provides the majority of
2375 // per-function local AA logic. This is a stateless, on-demand local set of
2376 // AA techniques.
2377 AA.registerFunctionAnalysis<BasicAA>();
2378
2379 // Next we query fast, specialized alias analyses that wrap IR-embedded
2380 // information about aliasing.
2381 AA.registerFunctionAnalysis<ScopedNoAliasAA>();
2382 AA.registerFunctionAnalysis<TypeBasedAA>();
2383
2384 // Add support for querying global aliasing information when available.
2385 // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
2386 // analysis, all that the `AAManager` can do is query for any *cached*
2387 // results from `GlobalsAA` through a readonly proxy.
2389 AA.registerModuleAnalysis<GlobalsAA>();
2390
2391 // Add target-specific alias analyses.
2392 if (TM)
2393 TM->registerDefaultAliasAnalyses(AA);
2394
2395 return AA;
2396}
2397
2398bool PassBuilder::isInstrumentedPGOUse() const {
2399 return (PGOOpt && PGOOpt->Action == PGOOptions::IRUse) ||
2400 !UseCtxProfile.empty();
2401}
aarch64 falkor hwpf fix Falkor HW Prefetch Fix Late Phase
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AggressiveInstCombiner - Combine expression patterns to form expressions with fewer,...
Provides passes to inlining "always_inline" functions.
This is the interface for LLVM's primary stateless and local alias analysis.
This file provides the interface for LLVM's Call Graph Profile pass.
This header provides classes for managing passes over SCCs of the call graph.
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file provides the interface for a simple, fast CSE pass.
This file provides a pass which clones the current module and runs the provided pass pipeline on the ...
This file provides a pass manager that only runs its passes if the provided marker analysis has been ...
Super simple passes to force specific function attrs from the commandline into the IR for debugging p...
Provides passes for computing function attributes based on interprocedural analyses.
This file provides the interface for LLVM's Global Value Numbering pass which eliminates fully redund...
This is the interface for a simple mod/ref and alias analysis over globals.
AcceleratorCodeSelection - Identify all functions reachable from a kernel, removing those that are un...
This header defines various interfaces for pass management in LLVM.
Interfaces for passes which infer implicit function attributes from the name and signature of functio...
This file provides the primary interface to the instcombine pass.
Defines passes for running instruction simplification across chunks of IR.
This file provides the interface for LLVM's PGO Instrumentation lowering pass.
See the comments on JumpThreadingPass.
static LVOptions Options
Definition LVOptions.cpp:25
This file implements the Loop Fusion pass.
This header defines the LoopLoadEliminationPass object.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
The header file for the LowerConstantIntrinsics pass as used by the new pass manager.
The header file for the LowerExpectIntrinsic pass as used by the new pass manager.
This pass performs merges of loads and stores on both sides of a.
This file provides the interface for LLVM's Global Value Numbering pass.
This header enumerates the LLVM-provided high-level optimization levels.
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
Define option tunables for PGO.
static void addAnnotationRemarksPass(ModulePassManager &MPM)
static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level)
static CoroConditionalWrapper buildCoroWrapper(ThinOrFullLTOPhase Phase)
static bool isLTOPreLink(ThinOrFullLTOPhase Phase)
static bool isLTOPostLink(ThinOrFullLTOPhase Phase)
This file implements relative lookup table converter that converts lookup tables to relative lookup t...
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
This file provides the interface for the pseudo probe implementation for AutoFDO.
This file provides the interface for the sampled PGO loader pass.
This is the interface for a metadata-based scoped no-alias analysis.
This file provides the interface for the pass responsible for both simplifying and canonicalizing the...
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
This is the interface for a metadata-based TBAA.
Defines the virtual file system interface vfs::FileSystem.
A manager for alias analyses.
Inlines functions marked as "always_inline".
Argument promotion pass.
Assign a GUID to functions as metadata.
Analysis pass providing a never-invalidated alias analysis result.
Simple pass that canonicalizes aliases.
A pass that merges duplicate global constants into a single constant.
This class implements a trivial dead store elimination.
Eliminate dead arguments (and return values) from functions.
A pass that transforms external global definitions into declarations.
Pass embeds a copy of the module optimized with the provided pass pipeline into a global variable.
A pass manager to run a set of extra loop passes if the MarkerTy analysis is present.
The core GVN pass object.
Definition GVN.h:128
Pass to remove unused function declarations.
Definition GlobalDCE.h:38
Optimize globals that never have their address taken.
Definition GlobalOpt.h:25
Pass to perform split of global variables.
Definition GlobalSplit.h:26
Analysis pass providing a never-invalidated alias analysis result.
Pass to outline cold regions.
Pass to perform interprocedural constant propagation.
Definition SCCP.h:48
Pass to outline similar regions.
Definition IROutliner.h:444
Run instruction simplification across each instruction in the function.
Instrumentation based profiling lowering pass.
This pass performs 'jump threading', which looks at blocks that have multiple predecessors and multip...
Performs Loop Invariant Code Motion Pass.
Definition LICM.h:66
Loop unroll pass that only does full loop unrolling and peeling.
Performs Loop Idiom Recognize Pass.
Performs Loop Inst Simplify Pass.
A simple loop rotation transformation.
Performs basic CFG simplifications to assist other loop passes.
A pass that does profile-guided sinking of instructions into loops.
Definition LoopSink.h:33
A simple loop rotation transformation.
Loop unroll pass that will support both full and partial unrolling.
Strips MemProf attributes and metadata.
Merge identical functions.
The module inliner pass for the new pass manager.
Module pass, wrapping the inliner pass.
Definition Inliner.h:65
void addModulePass(T Pass)
Add a module pass that runs before the CGSCC passes.
Definition Inliner.h:81
Class to hold module path string table and global value map, and encapsulate methods for operating on...
Simple pass that provides a name to every anonymous globals.
Additional 'norecurse' attribute deduction during postlink LTO phase.
OpenMP optimizations pass.
Definition OpenMPOpt.h:42
static LLVM_ABI const OptimizationLevel O3
Optimize for fast execution as much as possible.
static LLVM_ABI const OptimizationLevel Oz
A very specialized mode that will optimize for code size at any and all costs.
static LLVM_ABI const OptimizationLevel O0
Disable as many optimizations as possible.
static LLVM_ABI const OptimizationLevel Os
Similar to O2 but tries to optimize for small code size instead of fast execution without triggering ...
static LLVM_ABI const OptimizationLevel O2
Optimize for fast execution as much as possible without triggering significant incremental compile ti...
static LLVM_ABI const OptimizationLevel O1
Optimize quickly without destroying debuggability.
The indirect function call promotion pass.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The profile annotation (profile-instr-use) pass for IR based PGO.
The profile size based optimization pass for memory intrinsics.
Pass to remove unused function declarations.
LLVM_ABI void invokeFullLinkTimeOptimizationLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
LLVM_ABI ModuleInlinerWrapperPass buildInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining as well as the inlining-driven cleanups.
LLVM_ABI void invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
LLVM_ABI void invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI AAManager buildDefaultAAPipeline()
Build the default AAManager with the default alias analysis pipeline registered.
LLVM_ABI void invokeCGSCCOptimizerLateEPCallbacks(CGSCCPassManager &CGPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, ThinLTO-targeting default optimization pipeline to a pass manager.
LLVM_ABI void addPGOInstrPassesForO0(ModulePassManager &MPM, bool RunProfileGen, bool IsCS, bool AtomicCounterUpdate, std::string ProfileFile, std::string ProfileRemappingFile)
Add PGOInstrumenation passes for O0 only.
LLVM_ABI void invokeScalarOptimizerLateEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase=ThinOrFullLTOPhase::None)
Build a per-module default optimization pipeline.
LLVM_ABI void invokePipelineStartEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
LLVM_ABI void invokeVectorizerEndEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildO0DefaultPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase=ThinOrFullLTOPhase::None)
Build an O0 pipeline with the minimal semantically required passes.
LLVM_ABI FunctionPassManager buildFunctionSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM function canonicalization and simplification pipeline.
LLVM_ABI void invokePeepholeEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI void invokePipelineEarlySimplificationEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
LLVM_ABI void invokeLoopOptimizerEndEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildLTODefaultPipeline(OptimizationLevel Level, ModuleSummaryIndex *ExportSummary)
Build an LTO default optimization pipeline to a pass manager.
LLVM_ABI ModulePassManager buildModuleInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining with module inliner pass.
LLVM_ABI ModulePassManager buildThinLTODefaultPipeline(OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary)
Build a ThinLTO default optimization pipeline to a pass manager.
LLVM_ABI void invokeLateLoopOptimizationsEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
LLVM_ABI void invokeFullLinkTimeOptimizationEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO, bool EmitSummary)
Build a fat object default optimization pipeline.
LLVM_ABI ModulePassManager buildModuleSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM module canonicalization and simplification pipeline.
LLVM_ABI ModulePassManager buildModuleOptimizationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase LTOPhase)
Construct the core LLVM module optimization pipeline.
LLVM_ABI void invokeOptimizerLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
LLVM_ABI ModulePassManager buildLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, LTO-targeting default optimization pipeline to a pass manager.
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t<!std::is_same_v< PassT, PassManager > > addPass(PassT &&Pass)
bool isEmpty() const
Returns if the pass manager contains any passes.
unsigned LicmMssaNoAccForPromotionCap
Tuning option to disable promotion to scalars in LICM with MemorySSA, if the number of access is too ...
Definition PassBuilder.h:78
bool SLPVectorization
Tuning option to enable/disable slp loop vectorization, set based on opt level.
Definition PassBuilder.h:56
int InlinerThreshold
Tuning option to override the default inliner threshold.
Definition PassBuilder.h:92
bool LoopFusion
Tuning option to enable/disable loop fusion. Its default value is false.
Definition PassBuilder.h:66
bool CallGraphProfile
Tuning option to enable/disable call graph profile.
Definition PassBuilder.h:82
bool MergeFunctions
Tuning option to enable/disable function merging.
Definition PassBuilder.h:89
bool ForgetAllSCEVInLoopUnroll
Tuning option to forget all SCEV loops in LoopUnroll.
Definition PassBuilder.h:70
unsigned LicmMssaOptCap
Tuning option to cap the number of calls to retrive clobbering accesses in MemorySSA,...
Definition PassBuilder.h:74
bool LoopInterleaving
Tuning option to set loop interleaving on/off, set based on opt level.
Definition PassBuilder.h:48
LLVM_ABI PipelineTuningOptions()
Constructor sets pipeline tuning defaults based on cl::opts.
bool LoopUnrolling
Tuning option to enable/disable loop unrolling. Its default value is true.
Definition PassBuilder.h:59
bool LoopInterchange
Tuning option to enable/disable loop interchange.
Definition PassBuilder.h:63
bool LoopVectorization
Tuning option to enable/disable loop vectorization, set based on opt level.
Definition PassBuilder.h:52
Reassociate commutative expressions.
Definition Reassociate.h:74
A pass to do RPO deduction and propagation of function attributes.
This pass performs function-level constant propagation and merging.
Definition SCCP.h:30
The sample profiler data loader pass.
Analysis pass providing a never-invalidated alias analysis result.
This pass transforms loops that contain branches or switches on loop- invariant conditions to have mu...
A pass to simplify and canonicalize the CFG of a function.
Definition SimplifyCFG.h:30
Analysis pass providing a never-invalidated alias analysis result.
Optimize scalar/vector interactions in IR using target cost models.
Interfaces for registering analysis passes, producing common pass manager configurations,...
Abstract Attribute helper functions.
Definition Attributor.h:165
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
@ Assume
Do not drop type tests (default).
@ All
Drop only llvm.assumes using type test value.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI cl::opt< bool > EnableKnowledgeRetention
static cl::opt< bool > RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden, cl::desc("Run the NewGVN pass"))
static cl::opt< bool > DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden, cl::desc("Disable pre-instrumentation inliner"))
static cl::opt< bool > EnableJumpTableToSwitch("enable-jump-table-to-switch", cl::desc("Enable JumpTableToSwitch pass (default = off)"))
static cl::opt< bool > PerformMandatoryInliningsFirst("mandatory-inlining-first", cl::init(false), cl::Hidden, cl::desc("Perform mandatory inlinings module-wide, before performing " "inlining"))
static cl::opt< bool > RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Run Partial inlining pass"))
static cl::opt< bool > EnableGVNSink("enable-gvn-sink", cl::desc("Enable the GVN sinking pass (default = off)"))
static cl::opt< bool > EnableLoopHeaderDuplication("enable-loop-header-duplication", cl::init(false), cl::Hidden, cl::desc("Enable loop header duplication at any optimization level"))
static cl::opt< bool > EnableModuleInliner("enable-module-inliner", cl::init(false), cl::Hidden, cl::desc("Enable module inliner"))
static cl::opt< bool > EnableEagerlyInvalidateAnalyses("eagerly-invalidate-analyses", cl::init(true), cl::Hidden, cl::desc("Eagerly invalidate more analyses in default pipelines"))
static cl::opt< bool > EnableMatrix("enable-matrix", cl::init(false), cl::Hidden, cl::desc("Enable lowering of the matrix intrinsics"))
ModuleToFunctionPassAdaptor createModuleToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
cl::opt< std::string > UseCtxProfile("use-ctx-profile", cl::init(""), cl::Hidden, cl::desc("Use the specified contextual profile file"))
static cl::opt< bool > EnableSampledInstr("enable-sampled-instrumentation", cl::init(false), cl::Hidden, cl::desc("Enable profile instrumentation sampling (default = off)"))
static cl::opt< bool > EnableLoopFlatten("enable-loop-flatten", cl::init(false), cl::Hidden, cl::desc("Enable the LoopFlatten Pass"))
static cl::opt< InliningAdvisorMode > UseInlineAdvisor("enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden, cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"), cl::values(clEnumValN(InliningAdvisorMode::Default, "default", "Heuristics-based inliner version"), clEnumValN(InliningAdvisorMode::Development, "development", "Use development mode (runtime-loadable model)"), clEnumValN(InliningAdvisorMode::Release, "release", "Use release mode (AOT-compiled model)")))
PassManager< LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &, CGSCCUpdateResult & > CGSCCPassManager
The CGSCC pass manager.
static cl::opt< bool > EnableUnrollAndJam("enable-unroll-and-jam", cl::init(false), cl::Hidden, cl::desc("Enable Unroll And Jam Pass"))
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition Pass.h:77
@ FullLTOPreLink
Full LTO prelink phase.
Definition Pass.h:85
@ ThinLTOPostLink
ThinLTO postlink (backend compile) phase.
Definition Pass.h:83
@ None
No LTO/ThinLTO behavior needed.
Definition Pass.h:79
@ FullLTOPostLink
Full LTO postlink (backend compile) phase.
Definition Pass.h:87
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
Definition Pass.h:81
PassManager< Loop, LoopAnalysisManager, LoopStandardAnalysisResults &, LPMUpdater & > LoopPassManager
The Loop pass manager.
static cl::opt< bool > EnableConstraintElimination("enable-constraint-elimination", cl::init(true), cl::Hidden, cl::desc("Enable pass to eliminate conditions based on linear constraints"))
ModuleToPostOrderCGSCCPassAdaptor createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT &&Pass)
A function to deduce a function pass type and wrap it in the templated adaptor.
static cl::opt< bool > EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true), cl::Hidden, cl::desc("Enable inline deferral during PGO"))
Flag to enable inline deferral during PGO.
FunctionToLoopPassAdaptor createFunctionToLoopPassAdaptor(LoopPassT &&Pass, bool UseMemorySSA=false)
A function to deduce a loop pass type and wrap it in the templated adaptor.
CGSCCToFunctionPassAdaptor createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false, bool NoRerun=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
cl::opt< bool > ForgetSCEVInLoopUnroll
PassManager< Module > ModulePassManager
Convenience typedef for a pass manager over modules.
static cl::opt< bool > EnablePostPGOLoopRotation("enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden, cl::desc("Run the loop rotation transformation after PGO instrumentation"))
LLVM_ABI bool AreStatisticsEnabled()
Check if statistics are enabled.
static cl::opt< std::string > InstrumentColdFuncOnlyPath("instrument-cold-function-only-path", cl::init(""), cl::desc("File path for cold function only instrumentation(requires use " "with --pgo-instrument-cold-function-only)"), cl::Hidden)
static cl::opt< bool > EnableGlobalAnalyses("enable-global-analyses", cl::init(true), cl::Hidden, cl::desc("Enable inter-procedural analyses"))
static cl::opt< bool > EnableDFAJumpThreading("enable-dfa-jump-thread", cl::desc("Enable DFA jump threading"), cl::init(false), cl::Hidden)
static cl::opt< bool > FlattenedProfileUsed("flattened-profile-used", cl::init(false), cl::Hidden, cl::desc("Indicate the sample profile being used is flattened, i.e., " "no inline hierarchy exists in the profile"))
static cl::opt< bool > ExtraVectorizerPasses("extra-vectorizer-passes", cl::init(false), cl::Hidden, cl::desc("Run cleanup optimization passes after vectorization"))
static cl::opt< bool > EnableHotColdSplit("hot-cold-split", cl::desc("Enable hot-cold splitting pass"))
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
PassManager< Function > FunctionPassManager
Convenience typedef for a pass manager over functions.
LLVM_ABI InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
cl::opt< bool > PGOInstrumentColdFunctionOnly
static cl::opt< bool > EnableLoopInterchange("enable-loopinterchange", cl::init(false), cl::Hidden, cl::desc("Enable the LoopInterchange Pass"))
static cl::opt< bool > EnableCHR("enable-chr", cl::init(true), cl::Hidden, cl::desc("Enable control height reduction optimization (CHR)"))
static cl::opt< bool > EnableMergeFunctions("enable-merge-functions", cl::init(false), cl::Hidden, cl::desc("Enable function merging as part of the optimization pipeline"))
static cl::opt< bool > EnableGVNHoist("enable-gvn-hoist", cl::desc("Enable the GVN hoisting pass (default = off)"))
cl::opt< unsigned > SetLicmMssaNoAccForPromotionCap
static cl::opt< bool > EnableIROutliner("ir-outliner", cl::init(false), cl::Hidden, cl::desc("Enable ir outliner pass"))
static cl::opt< AttributorRunOption > AttributorRun("attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE), cl::desc("Enable the attributor inter-procedural deduction pass"), cl::values(clEnumValN(AttributorRunOption::ALL, "all", "enable all attributor runs"), clEnumValN(AttributorRunOption::MODULE, "module", "enable module-wide attributor runs"), clEnumValN(AttributorRunOption::CGSCC, "cgscc", "enable call graph SCC attributor runs"), clEnumValN(AttributorRunOption::NONE, "none", "disable attributor runs")))
static cl::opt< int > PreInlineThreshold("preinline-threshold", cl::Hidden, cl::init(75), cl::desc("Control the amount of inlining in pre-instrumentation inliner " "(default = 75)"))
static cl::opt< bool > UseLoopVersioningLICM("enable-loop-versioning-licm", cl::init(false), cl::Hidden, cl::desc("Enable the experimental Loop Versioning LICM pass"))
cl::opt< unsigned > MaxDevirtIterations("max-devirt-iterations", cl::ReallyHidden, cl::init(4))
cl::opt< unsigned > SetLicmMssaOptCap
A DCE pass that assumes instructions are dead until proven otherwise.
Definition ADCE.h:31
Pass to convert @llvm.global.annotations to !annotation metadata.
This pass attempts to minimize the number of assume without loosing any information.
Hoist/decompose integer division and remainder instructions to enable CFG improvements and better cod...
Definition DivRemPairs.h:23
A simple and fast domtree-based CSE pass.
Definition EarlyCSE.h:31
Pass which forces specific function attributes into the IR, primarily as a debugging tool.
A simple and fast domtree-based GVN pass to hoist common expressions from sibling branches.
Definition GVN.h:415
Uses an "inverted" value numbering to decide the similarity of expressions and sinks similar expressi...
Definition GVN.h:422
A set of parameters to control various transforms performed by IPSCCP pass.
Definition SCCP.h:35
A pass which infers function attributes from the names and signatures of function declarations in a m...
Provides context on when an inline advisor is constructed in the pipeline (e.g., link phase,...
Thresholds to tune inline cost analysis.
Definition InlineCost.h:207
std::optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
Definition InlineCost.h:224
int DefaultThreshold
The default threshold to start with for a callee.
Definition InlineCost.h:209
std::optional< bool > EnableDeferral
Indicate whether we should allow inline deferral.
Definition InlineCost.h:237
std::optional< int > HintThreshold
Threshold to use for callees with inline hint.
Definition InlineCost.h:212
Options for the frontend instrumentation based profiling pass.
A no-op pass template which simply forces a specific analysis result to be invalidated.
Pass to forward loads in a loop around the backedge to subsequent iterations.
A set of parameters used to control various transforms performed by the LoopUnroll pass.
The LoopVectorize Pass.
Computes function attributes in post-order over the call graph.
A utility pass template to force an analysis result to be available.