LLVM 22.0.0git
PassBuilderPipelines.cpp
Go to the documentation of this file.
1//===- Construction of pass pipelines -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// This file provides the implementation of the PassBuilder based on our
11/// static pass registry as well as related functionality. It also provides
12/// helpers to aid in analyzing, debugging, and testing passes and pass
13/// pipelines.
14///
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/Statistic.h"
27#include "llvm/IR/PassManager.h"
28#include "llvm/Pass.h"
151
152using namespace llvm;
153
154namespace llvm {
155
157 "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
158 cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
160 "Heuristics-based inliner version"),
162 "Use development mode (runtime-loadable model)"),
164 "Use release mode (AOT-compiled model)")));
165
166/// Flag to enable inline deferral during PGO.
167static cl::opt<bool>
168 EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
170 cl::desc("Enable inline deferral during PGO"));
171
172static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
173 cl::init(false), cl::Hidden,
174 cl::desc("Enable module inliner"));
175
177 "mandatory-inlining-first", cl::init(false), cl::Hidden,
178 cl::desc("Perform mandatory inlinings module-wide, before performing "
179 "inlining"));
180
182 "eagerly-invalidate-analyses", cl::init(true), cl::Hidden,
183 cl::desc("Eagerly invalidate more analyses in default pipelines"));
184
186 "enable-merge-functions", cl::init(false), cl::Hidden,
187 cl::desc("Enable function merging as part of the optimization pipeline"));
188
190 "enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden,
191 cl::desc("Run the loop rotation transformation after PGO instrumentation"));
192
194 "enable-global-analyses", cl::init(true), cl::Hidden,
195 cl::desc("Enable inter-procedural analyses"));
196
197static cl::opt<bool> RunPartialInlining("enable-partial-inlining",
198 cl::init(false), cl::Hidden,
199 cl::desc("Run Partial inlining pass"));
200
202 "extra-vectorizer-passes", cl::init(false), cl::Hidden,
203 cl::desc("Run cleanup optimization passes after vectorization"));
204
205static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
206 cl::desc("Run the NewGVN pass"));
207
208static cl::opt<bool>
209 EnableLoopInterchange("enable-loopinterchange", cl::init(false), cl::Hidden,
210 cl::desc("Enable the LoopInterchange Pass"));
211
212static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam",
213 cl::init(false), cl::Hidden,
214 cl::desc("Enable Unroll And Jam Pass"));
215
216static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
218 cl::desc("Enable the LoopFlatten Pass"));
219
220// Experimentally allow loop header duplication. This should allow for better
221// optimization at Oz, since loop-idiom recognition can then recognize things
222// like memcpy. If this ends up being useful for many targets, we should drop
223// this flag and make a code generation option that can be controlled
224// independent of the opt level and exposed through the frontend.
226 "enable-loop-header-duplication", cl::init(false), cl::Hidden,
227 cl::desc("Enable loop header duplication at any optimization level"));
228
229static cl::opt<bool>
230 EnableDFAJumpThreading("enable-dfa-jump-thread",
231 cl::desc("Enable DFA jump threading"),
232 cl::init(false), cl::Hidden);
233
234static cl::opt<bool>
235 EnableHotColdSplit("hot-cold-split",
236 cl::desc("Enable hot-cold splitting pass"));
237
238static cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(false),
240 cl::desc("Enable ir outliner pass"));
241
242static cl::opt<bool>
243 DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden,
244 cl::desc("Disable pre-instrumentation inliner"));
245
247 "preinline-threshold", cl::Hidden, cl::init(75),
248 cl::desc("Control the amount of inlining in pre-instrumentation inliner "
249 "(default = 75)"));
250
251static cl::opt<bool>
252 EnableGVNHoist("enable-gvn-hoist",
253 cl::desc("Enable the GVN hoisting pass (default = off)"));
254
255static cl::opt<bool>
256 EnableGVNSink("enable-gvn-sink",
257 cl::desc("Enable the GVN sinking pass (default = off)"));
258
260 "enable-jump-table-to-switch",
261 cl::desc("Enable JumpTableToSwitch pass (default = off)"));
262
263// This option is used in simplifying testing SampleFDO optimizations for
264// profile loading.
265static cl::opt<bool>
266 EnableCHR("enable-chr", cl::init(true), cl::Hidden,
267 cl::desc("Enable control height reduction optimization (CHR)"));
268
270 "flattened-profile-used", cl::init(false), cl::Hidden,
271 cl::desc("Indicate the sample profile being used is flattened, i.e., "
272 "no inline hierarchy exists in the profile"));
273
274static cl::opt<bool>
275 EnableMatrix("enable-matrix", cl::init(false), cl::Hidden,
276 cl::desc("Enable lowering of the matrix intrinsics"));
277
279 "enable-constraint-elimination", cl::init(true), cl::Hidden,
280 cl::desc(
281 "Enable pass to eliminate conditions based on linear constraints"));
282
284 "attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE),
285 cl::desc("Enable the attributor inter-procedural deduction pass"),
287 "enable all attributor runs"),
289 "enable module-wide attributor runs"),
291 "enable call graph SCC attributor runs"),
292 clEnumValN(AttributorRunOption::NONE, "none",
293 "disable attributor runs")));
294
296 "enable-sampled-instrumentation", cl::init(false), cl::Hidden,
297 cl::desc("Enable profile instrumentation sampling (default = off)"));
299 "enable-loop-versioning-licm", cl::init(false), cl::Hidden,
300 cl::desc("Enable the experimental Loop Versioning LICM pass"));
301
303 "instrument-cold-function-only-path", cl::init(""),
304 cl::desc("File path for cold function only instrumentation(requires use "
305 "with --pgo-instrument-cold-function-only)"),
306 cl::Hidden);
307
308// TODO: There is a similar flag in WPD pass, we should consolidate them by
309// parsing the option only once in PassBuilder and share it across both places.
311 "enable-devirtualize-speculatively",
312 cl::desc("Enable speculative devirtualization optimization"),
313 cl::init(false));
314
317
319} // namespace llvm
320
338
339namespace llvm {
341} // namespace llvm
342
344 OptimizationLevel Level) {
345 for (auto &C : PeepholeEPCallbacks)
346 C(FPM, Level);
347}
350 for (auto &C : LateLoopOptimizationsEPCallbacks)
351 C(LPM, Level);
352}
354 OptimizationLevel Level) {
355 for (auto &C : LoopOptimizerEndEPCallbacks)
356 C(LPM, Level);
357}
360 for (auto &C : ScalarOptimizerLateEPCallbacks)
361 C(FPM, Level);
362}
364 OptimizationLevel Level) {
365 for (auto &C : CGSCCOptimizerLateEPCallbacks)
366 C(CGPM, Level);
367}
369 OptimizationLevel Level) {
370 for (auto &C : VectorizerStartEPCallbacks)
371 C(FPM, Level);
372}
374 OptimizationLevel Level) {
375 for (auto &C : VectorizerEndEPCallbacks)
376 C(FPM, Level);
377}
379 OptimizationLevel Level,
381 for (auto &C : OptimizerEarlyEPCallbacks)
382 C(MPM, Level, Phase);
383}
385 OptimizationLevel Level,
387 for (auto &C : OptimizerLastEPCallbacks)
388 C(MPM, Level, Phase);
389}
392 for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks)
393 C(MPM, Level);
394}
397 for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
398 C(MPM, Level);
399}
401 OptimizationLevel Level) {
402 for (auto &C : PipelineStartEPCallbacks)
403 C(MPM, Level);
404}
407 for (auto &C : PipelineEarlySimplificationEPCallbacks)
408 C(MPM, Level, Phase);
409}
410
411// Helper to add AnnotationRemarksPass.
415
416// Helper to check if the current compilation phase is preparing for LTO
421
422// Helper to check if the current compilation phase is LTO backend
427
428// Helper to wrap conditionally Coro passes.
430 // TODO: Skip passes according to Phase.
431 ModulePassManager CoroPM;
432 CoroPM.addPass(CoroEarlyPass());
433 CGSCCPassManager CGPM;
434 CGPM.addPass(CoroSplitPass());
435 CoroPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
436 CoroPM.addPass(CoroCleanupPass());
437 CoroPM.addPass(GlobalDCEPass());
438 return CoroConditionalWrapper(std::move(CoroPM));
439}
440
441// TODO: Investigate the cost/benefit of tail call elimination on debugging.
443PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
445
447
449 FPM.addPass(CountVisitsPass());
450
451 // Form SSA out of local memory accesses after breaking apart aggregates into
452 // scalars.
453 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
454
455 // Catch trivial redundancies
456 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
457
458 // Hoisting of scalars and load expressions.
459 FPM.addPass(
460 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
461 FPM.addPass(InstCombinePass());
462
463 FPM.addPass(LibCallsShrinkWrapPass());
464
465 invokePeepholeEPCallbacks(FPM, Level);
466
467 FPM.addPass(
468 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
469
470 // Form canonically associated expression trees, and simplify the trees using
471 // basic mathematical properties. For example, this will form (nearly)
472 // minimal multiplication trees.
473 FPM.addPass(ReassociatePass());
474
475 // Add the primary loop simplification pipeline.
476 // FIXME: Currently this is split into two loop pass pipelines because we run
477 // some function passes in between them. These can and should be removed
478 // and/or replaced by scheduling the loop pass equivalents in the correct
479 // positions. But those equivalent passes aren't powerful enough yet.
480 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
481 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
482 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
483 // `LoopInstSimplify`.
484 LoopPassManager LPM1, LPM2;
485
486 // Simplify the loop body. We do this initially to clean up after other loop
487 // passes run, either when iterating on a loop or on inner loops with
488 // implications on the outer loop.
489 LPM1.addPass(LoopInstSimplifyPass());
490 LPM1.addPass(LoopSimplifyCFGPass());
491
492 // Try to remove as much code from the loop header as possible,
493 // to reduce amount of IR that will have to be duplicated. However,
494 // do not perform speculative hoisting the first time as LICM
495 // will destroy metadata that may not need to be destroyed if run
496 // after loop rotation.
497 // TODO: Investigate promotion cap for O1.
498 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
499 /*AllowSpeculation=*/false));
500
501 LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true,
503 // TODO: Investigate promotion cap for O1.
504 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
505 /*AllowSpeculation=*/true));
506 LPM1.addPass(SimpleLoopUnswitchPass());
508 LPM1.addPass(LoopFlattenPass());
509
510 LPM2.addPass(LoopIdiomRecognizePass());
511 LPM2.addPass(IndVarSimplifyPass());
512
514
515 LPM2.addPass(LoopDeletionPass());
516
517 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
518 // because it changes IR to makes profile annotation in back compile
519 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
520 // attributes so we need to make sure and allow the full unroll pass to pay
521 // attention to it.
522 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
523 PGOOpt->Action != PGOOptions::SampleUse)
524 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
525 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
526 PTO.ForgetAllSCEVInLoopUnroll));
527
529
530 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
531 /*UseMemorySSA=*/true));
532 FPM.addPass(
533 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
534 FPM.addPass(InstCombinePass());
535 // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
536 // *All* loop passes must preserve it, in order to be able to use it.
537 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
538 /*UseMemorySSA=*/false));
539
540 // Delete small array after loop unroll.
541 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
542
543 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
544 FPM.addPass(MemCpyOptPass());
545
546 // Sparse conditional constant propagation.
547 // FIXME: It isn't clear why we do this *after* loop passes rather than
548 // before...
549 FPM.addPass(SCCPPass());
550
551 // Delete dead bit computations (instcombine runs after to fold away the dead
552 // computations, and then ADCE will run later to exploit any new DCE
553 // opportunities that creates).
554 FPM.addPass(BDCEPass());
555
556 // Run instcombine after redundancy and dead bit elimination to exploit
557 // opportunities opened up by them.
558 FPM.addPass(InstCombinePass());
559 invokePeepholeEPCallbacks(FPM, Level);
560
561 FPM.addPass(CoroElidePass());
562
564
565 // Finally, do an expensive DCE pass to catch all the dead code exposed by
566 // the simplifications and basic cleanup after all the simplifications.
567 // TODO: Investigate if this is too expensive.
568 FPM.addPass(ADCEPass());
569 FPM.addPass(
570 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
571 FPM.addPass(InstCombinePass());
572 invokePeepholeEPCallbacks(FPM, Level);
573
574 return FPM;
575}
576
580 assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
581
582 // The O1 pipeline has a separate pipeline creation function to simplify
583 // construction readability.
584 if (Level.getSpeedupLevel() == 1)
585 return buildO1FunctionSimplificationPipeline(Level, Phase);
586
588
591
592 // Form SSA out of local memory accesses after breaking apart aggregates into
593 // scalars.
595
596 // Catch trivial redundancies
597 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
600
601 // Hoisting of scalars and load expressions.
602 if (EnableGVNHoist)
603 FPM.addPass(GVNHoistPass());
604
605 // Global value numbering based sinking.
606 if (EnableGVNSink) {
607 FPM.addPass(GVNSinkPass());
608 FPM.addPass(
609 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
610 }
611
612 // Speculative execution if the target has divergent branches; otherwise nop.
613 FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
614
615 // Optimize based on known information about branches, and cleanup afterward.
618
619 // Jump table to switch conversion.
624
625 FPM.addPass(
626 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
629
630 if (!Level.isOptimizingForSize())
632
633 invokePeepholeEPCallbacks(FPM, Level);
634
635 // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
636 // using the size value profile. Don't perform this when optimizing for size.
637 if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
638 !Level.isOptimizingForSize())
640
641 FPM.addPass(TailCallElimPass(/*UpdateFunctionEntryCount=*/
642 isInstrumentedPGOUse()));
643 FPM.addPass(
644 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
645
646 // Form canonically associated expression trees, and simplify the trees using
647 // basic mathematical properties. For example, this will form (nearly)
648 // minimal multiplication trees.
650
653
654 // Add the primary loop simplification pipeline.
655 // FIXME: Currently this is split into two loop pass pipelines because we run
656 // some function passes in between them. These can and should be removed
657 // and/or replaced by scheduling the loop pass equivalents in the correct
658 // positions. But those equivalent passes aren't powerful enough yet.
659 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
660 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
661 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
662 // `LoopInstSimplify`.
663 LoopPassManager LPM1, LPM2;
664
665 // Simplify the loop body. We do this initially to clean up after other loop
666 // passes run, either when iterating on a loop or on inner loops with
667 // implications on the outer loop.
668 LPM1.addPass(LoopInstSimplifyPass());
669 LPM1.addPass(LoopSimplifyCFGPass());
670
671 // Try to remove as much code from the loop header as possible,
672 // to reduce amount of IR that will have to be duplicated. However,
673 // do not perform speculative hoisting the first time as LICM
674 // will destroy metadata that may not need to be destroyed if run
675 // after loop rotation.
676 // TODO: Investigate promotion cap for O1.
677 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
678 /*AllowSpeculation=*/false));
679
680 // Disable header duplication in loop rotation at -Oz.
682 Level != OptimizationLevel::Oz,
684 // TODO: Investigate promotion cap for O1.
685 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
686 /*AllowSpeculation=*/true));
687 LPM1.addPass(
688 SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3));
690 LPM1.addPass(LoopFlattenPass());
691
692 LPM2.addPass(LoopIdiomRecognizePass());
693 LPM2.addPass(IndVarSimplifyPass());
694
695 {
697 ExtraPasses.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
699 LPM2.addPass(std::move(ExtraPasses));
700 }
701
703
704 LPM2.addPass(LoopDeletionPass());
705
706 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
707 // because it changes IR to makes profile annotation in back compile
708 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
709 // attributes so we need to make sure and allow the full unroll pass to pay
710 // attention to it.
711 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
712 PGOOpt->Action != PGOOptions::SampleUse)
713 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
714 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
715 PTO.ForgetAllSCEVInLoopUnroll));
716
718
719 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
720 /*UseMemorySSA=*/true));
721 FPM.addPass(
722 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
724 // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
725 // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
726 // *All* loop passes must preserve it, in order to be able to use it.
727 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
728 /*UseMemorySSA=*/false));
729
730 // Delete small array after loop unroll.
732
733 // Try vectorization/scalarization transforms that are both improvements
734 // themselves and can allow further folds with GVN and InstCombine.
735 FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
736
737 // Eliminate redundancies.
739 if (RunNewGVN)
740 FPM.addPass(NewGVNPass());
741 else
742 FPM.addPass(GVNPass());
743
744 // Sparse conditional constant propagation.
745 // FIXME: It isn't clear why we do this *after* loop passes rather than
746 // before...
747 FPM.addPass(SCCPPass());
748
749 // Delete dead bit computations (instcombine runs after to fold away the dead
750 // computations, and then ADCE will run later to exploit any new DCE
751 // opportunities that creates).
752 FPM.addPass(BDCEPass());
753
754 // Run instcombine after redundancy and dead bit elimination to exploit
755 // opportunities opened up by them.
757 invokePeepholeEPCallbacks(FPM, Level);
758
759 // Re-consider control flow based optimizations after redundancy elimination,
760 // redo DCE, etc.
763
766
767 // Finally, do an expensive DCE pass to catch all the dead code exposed by
768 // the simplifications and basic cleanup after all the simplifications.
769 // TODO: Investigate if this is too expensive.
770 FPM.addPass(ADCEPass());
771
772 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
773 FPM.addPass(MemCpyOptPass());
774
775 FPM.addPass(DSEPass());
777
779 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
780 /*AllowSpeculation=*/true),
781 /*UseMemorySSA=*/true));
782
783 FPM.addPass(CoroElidePass());
784
786
788 .convertSwitchRangeToICmp(true)
789 .convertSwitchToArithmetic(true)
790 .hoistCommonInsts(true)
791 .sinkCommonInsts(true)));
793 invokePeepholeEPCallbacks(FPM, Level);
794
795 return FPM;
796}
797
798void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
801}
802
803void PassBuilder::addPreInlinerPasses(ModulePassManager &MPM,
804 OptimizationLevel Level,
805 ThinOrFullLTOPhase LTOPhase) {
806 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
808 return;
809 InlineParams IP;
810
812
813 // FIXME: The hint threshold has the same value used by the regular inliner
814 // when not optimzing for size. This should probably be lowered after
815 // performance testing.
816 // FIXME: this comment is cargo culted from the old pass manager, revisit).
817 IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
819 IP, /* MandatoryFirst */ true,
821 CGSCCPassManager &CGPipeline = MIWP.getPM();
822
824 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
825 FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
826 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
827 true))); // Merge & remove basic blocks.
828 FPM.addPass(InstCombinePass()); // Combine silly sequences.
829 invokePeepholeEPCallbacks(FPM, Level);
830
831 CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
832 std::move(FPM), PTO.EagerlyInvalidateAnalyses));
833
834 MPM.addPass(std::move(MIWP));
835
836 // Delete anything that is now dead to make sure that we don't instrument
837 // dead code. Instrumentation can end up keeping dead code around and
838 // dramatically increase code size.
839 MPM.addPass(GlobalDCEPass());
840}
841
842void PassBuilder::addPostPGOLoopRotation(ModulePassManager &MPM,
843 OptimizationLevel Level) {
845 // Disable header duplication in loop rotation at -Oz.
848 LoopRotatePass(EnableLoopHeaderDuplication ||
849 Level != OptimizationLevel::Oz),
850 /*UseMemorySSA=*/false),
851 PTO.EagerlyInvalidateAnalyses));
852 }
853}
854
855void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
856 OptimizationLevel Level, bool RunProfileGen,
857 bool IsCS, bool AtomicCounterUpdate,
858 std::string ProfileFile,
859 std::string ProfileRemappingFile) {
860 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
861
862 if (!RunProfileGen) {
863 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
864 MPM.addPass(
865 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
866 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
867 // RequireAnalysisPass for PSI before subsequent non-module passes.
868 MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
869 return;
870 }
871
872 // Perform PGO instrumentation.
873 MPM.addPass(PGOInstrumentationGen(IsCS ? PGOInstrumentationType::CSFDO
875
876 addPostPGOLoopRotation(MPM, Level);
877 // Add the profile lowering pass.
878 InstrProfOptions Options;
879 if (!ProfileFile.empty())
880 Options.InstrProfileOutput = ProfileFile;
881 // Do counter promotion at Level greater than O0.
882 Options.DoCounterPromotion = true;
883 Options.UseBFIInPromotion = IsCS;
884 if (EnableSampledInstr) {
885 Options.Sampling = true;
886 // With sampling, there is little beneifit to enable counter promotion.
887 // But note that sampling does work with counter promotion.
888 Options.DoCounterPromotion = false;
889 }
890 Options.Atomic = AtomicCounterUpdate;
891 MPM.addPass(InstrProfilingLoweringPass(Options, IsCS));
892}
893
895 bool RunProfileGen, bool IsCS,
896 bool AtomicCounterUpdate,
897 std::string ProfileFile,
898 std::string ProfileRemappingFile) {
899 if (!RunProfileGen) {
900 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
901 MPM.addPass(
902 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
903 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
904 // RequireAnalysisPass for PSI before subsequent non-module passes.
906 return;
907 }
908
909 // Perform PGO instrumentation.
912 // Add the profile lowering pass.
914 if (!ProfileFile.empty())
915 Options.InstrProfileOutput = ProfileFile;
916 // Do not do counter promotion at O0.
917 Options.DoCounterPromotion = false;
918 Options.UseBFIInPromotion = IsCS;
919 Options.Atomic = AtomicCounterUpdate;
921}
922
924 return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
925}
926
930 InlineParams IP;
931 if (PTO.InlinerThreshold == -1)
932 IP = getInlineParamsFromOptLevel(Level);
933 else
934 IP = getInlineParams(PTO.InlinerThreshold);
935 // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO,
936 // set hot-caller threshold to 0 to disable hot
937 // callsite inline (as much as possible [1]) because it makes
938 // profile annotation in the backend inaccurate.
939 //
940 // [1] Note the cost of a function could be below zero due to erased
941 // prologue / epilogue.
942 if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
944
945 if (PGOOpt)
947
951
952 // Require the GlobalsAA analysis for the module so we can query it within
953 // the CGSCC pipeline.
955 MIWP.addModulePass(RequireAnalysisPass<GlobalsAA, Module>());
956 // Invalidate AAManager so it can be recreated and pick up the newly
957 // available GlobalsAA.
958 MIWP.addModulePass(
960 }
961
962 // Require the ProfileSummaryAnalysis for the module so we can query it within
963 // the inliner pass.
965
966 // Now begin the main postorder CGSCC pipeline.
967 // FIXME: The current CGSCC pipeline has its origins in the legacy pass
968 // manager and trying to emulate its precise behavior. Much of this doesn't
969 // make a lot of sense and we should revisit the core CGSCC structure.
970 CGSCCPassManager &MainCGPipeline = MIWP.getPM();
971
972 // Note: historically, the PruneEH pass was run first to deduce nounwind and
973 // generally clean up exception handling overhead. It isn't clear this is
974 // valuable as the inliner doesn't currently care whether it is inlining an
975 // invoke or a call.
976
978 MainCGPipeline.addPass(AttributorCGSCCPass());
979
980 // Deduce function attributes. We do another run of this after the function
981 // simplification pipeline, so this only needs to run when it could affect the
982 // function simplification pipeline, which is only the case with recursive
983 // functions.
984 MainCGPipeline.addPass(PostOrderFunctionAttrsPass(/*SkipNonRecursive*/ true));
985
986 // When at O3 add argument promotion to the pass pipeline.
987 // FIXME: It isn't at all clear why this should be limited to O3.
988 if (Level == OptimizationLevel::O3)
989 MainCGPipeline.addPass(ArgumentPromotionPass());
990
991 // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
992 // there are no OpenMP runtime calls present in the module.
993 if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
994 MainCGPipeline.addPass(OpenMPOptCGSCCPass(Phase));
995
996 invokeCGSCCOptimizerLateEPCallbacks(MainCGPipeline, Level);
997
998 // Add the core function simplification pipeline nested inside the
999 // CGSCC walk.
1002 PTO.EagerlyInvalidateAnalyses, /*NoRerun=*/true));
1003
1004 // Finally, deduce any function attributes based on the fully simplified
1005 // function.
1006 MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
1007
1008 // Mark that the function is fully simplified and that it shouldn't be
1009 // simplified again if we somehow revisit it due to CGSCC mutations unless
1010 // it's been modified since.
1013
1015 MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
1016 MainCGPipeline.addPass(CoroAnnotationElidePass());
1017 }
1018
1019 // Make sure we don't affect potential future NoRerun CGSCC adaptors.
1020 MIWP.addLateModulePass(createModuleToFunctionPassAdaptor(
1022
1023 return MIWP;
1024}
1025
1030
1032 // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO,
1033 // set hot-caller threshold to 0 to disable hot
1034 // callsite inline (as much as possible [1]) because it makes
1035 // profile annotation in the backend inaccurate.
1036 //
1037 // [1] Note the cost of a function could be below zero due to erased
1038 // prologue / epilogue.
1039 if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1040 IP.HotCallSiteThreshold = 0;
1041
1042 if (PGOOpt)
1044
1045 // The inline deferral logic is used to avoid losing some
1046 // inlining chance in future. It is helpful in SCC inliner, in which
1047 // inlining is processed in bottom-up order.
1048 // While in module inliner, the inlining order is a priority-based order
1049 // by default. The inline deferral is unnecessary there. So we disable the
1050 // inline deferral logic in module inliner.
1051 IP.EnableDeferral = false;
1052
1055 MPM.addPass(GlobalOptPass());
1056 MPM.addPass(GlobalDCEPass());
1057 MPM.addPass(PGOCtxProfFlatteningPass(/*IsPreThinlink=*/false));
1058 }
1059
1062 PTO.EagerlyInvalidateAnalyses));
1063
1067 MPM.addPass(
1069 }
1070
1071 return MPM;
1072}
1073
1077 assert(Level != OptimizationLevel::O0 &&
1078 "Should not be used for O0 pipeline");
1079
1081 "FullLTOPostLink shouldn't call buildModuleSimplificationPipeline!");
1082
1084
1085 // Place pseudo probe instrumentation as the first pass of the pipeline to
1086 // minimize the impact of optimization changes.
1087 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1090
1091 bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
1092
1093 // In ThinLTO mode, when flattened profile is used, all the available
1094 // profile information will be annotated in PreLink phase so there is
1095 // no need to load the profile again in PostLink.
1096 bool LoadSampleProfile =
1097 HasSampleProfile &&
1099
1100 // During the ThinLTO backend phase we perform early indirect call promotion
1101 // here, before globalopt. Otherwise imported available_externally functions
1102 // look unreferenced and are removed. If we are going to load the sample
1103 // profile then defer until later.
1104 // TODO: See if we can move later and consolidate with the location where
1105 // we perform ICP when we are loading a sample profile.
1106 // TODO: We pass HasSampleProfile (whether there was a sample profile file
1107 // passed to the compile) to the SamplePGO flag of ICP. This is used to
1108 // determine whether the new direct calls are annotated with prof metadata.
1109 // Ideally this should be determined from whether the IR is annotated with
1110 // sample profile, and not whether the a sample profile was provided on the
1111 // command line. E.g. for flattened profiles where we will not be reloading
1112 // the sample profile in the ThinLTO backend, we ideally shouldn't have to
1113 // provide the sample profile file.
1114 if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
1115 MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
1116
1117 // Create an early function pass manager to cleanup the output of the
1118 // frontend. Not necessary with LTO post link pipelines since the pre link
1119 // pipeline already cleaned up the frontend output.
1121 // Do basic inference of function attributes from known properties of system
1122 // libraries and other oracles.
1124 MPM.addPass(CoroEarlyPass());
1125
1126 FunctionPassManager EarlyFPM;
1127 EarlyFPM.addPass(EntryExitInstrumenterPass(/*PostInlining=*/false));
1128 // Lower llvm.expect to metadata before attempting transforms.
1129 // Compare/branch metadata may alter the behavior of passes like
1130 // SimplifyCFG.
1132 EarlyFPM.addPass(SimplifyCFGPass());
1134 EarlyFPM.addPass(EarlyCSEPass());
1135 if (Level == OptimizationLevel::O3)
1136 EarlyFPM.addPass(CallSiteSplittingPass());
1138 std::move(EarlyFPM), PTO.EagerlyInvalidateAnalyses));
1139 }
1140
1141 if (LoadSampleProfile) {
1142 // Annotate sample profile right after early FPM to ensure freshness of
1143 // the debug info.
1145 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile, Phase, FS));
1146 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1147 // RequireAnalysisPass for PSI before subsequent non-module passes.
1149 // Do not invoke ICP in the LTOPrelink phase as it makes it hard
1150 // for the profile annotation to be accurate in the LTO backend.
1151 if (!isLTOPreLink(Phase))
1152 // We perform early indirect call promotion here, before globalopt.
1153 // This is important for the ThinLTO backend phase because otherwise
1154 // imported available_externally functions look unreferenced and are
1155 // removed.
1156 MPM.addPass(
1157 PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
1158 }
1159
1160 // Try to perform OpenMP specific optimizations on the module. This is a
1161 // (quick!) no-op if there are no OpenMP runtime calls present in the module.
1163
1165 MPM.addPass(AttributorPass());
1166
1167 // Lower type metadata and the type.test intrinsic in the ThinLTO
1168 // post link pipeline after ICP. This is to enable usage of the type
1169 // tests in ICP sequences.
1171 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1173
1175
1176 // Interprocedural constant propagation now that basic cleanup has occurred
1177 // and prior to optimizing globals.
1178 // FIXME: This position in the pipeline hasn't been carefully considered in
1179 // years, it should be re-analyzed.
1180 MPM.addPass(IPSCCPPass(
1181 IPSCCPOptions(/*AllowFuncSpec=*/
1182 Level != OptimizationLevel::Os &&
1183 Level != OptimizationLevel::Oz &&
1184 !isLTOPreLink(Phase))));
1185
1186 // Attach metadata to indirect call sites indicating the set of functions
1187 // they may target at run-time. This should follow IPSCCP.
1189
1190 // Optimize globals to try and fold them into constants.
1191 MPM.addPass(GlobalOptPass());
1192
1193 // Create a small function pass pipeline to cleanup after all the global
1194 // optimizations.
1195 FunctionPassManager GlobalCleanupPM;
1196 // FIXME: Should this instead by a run of SROA?
1197 GlobalCleanupPM.addPass(PromotePass());
1198 GlobalCleanupPM.addPass(InstCombinePass());
1199 invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
1200 GlobalCleanupPM.addPass(
1201 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1202 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM),
1203 PTO.EagerlyInvalidateAnalyses));
1204
1205 // We already asserted this happens in non-FullLTOPostLink earlier.
1206 const bool IsPreLink = Phase != ThinOrFullLTOPhase::ThinLTOPostLink;
1207 // Enable contextual profiling instrumentation.
1208 const bool IsCtxProfGen =
1210 const bool IsPGOPreLink = !IsCtxProfGen && PGOOpt && IsPreLink;
1211 const bool IsPGOInstrGen =
1212 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRInstr;
1213 const bool IsPGOInstrUse =
1214 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRUse;
1215 const bool IsMemprofUse = IsPGOPreLink && !PGOOpt->MemoryProfile.empty();
1216 // We don't want to mix pgo ctx gen and pgo gen; we also don't currently
1217 // enable ctx profiling from the frontend.
1219 "Enabling both instrumented PGO and contextual instrumentation is not "
1220 "supported.");
1221 const bool IsCtxProfUse =
1223
1224 assert(
1226 "--instrument-cold-function-only-path is provided but "
1227 "--pgo-instrument-cold-function-only is not enabled");
1228 const bool IsColdFuncOnlyInstrGen = PGOInstrumentColdFunctionOnly &&
1229 IsPGOPreLink &&
1231
1232 if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen ||
1233 IsCtxProfUse || IsColdFuncOnlyInstrGen)
1234 addPreInlinerPasses(MPM, Level, Phase);
1235
1236 // Add all the requested passes for instrumentation PGO, if requested.
1237 if (IsPGOInstrGen || IsPGOInstrUse) {
1238 addPGOInstrPasses(MPM, Level,
1239 /*RunProfileGen=*/IsPGOInstrGen,
1240 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate,
1241 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
1242 } else if (IsCtxProfGen || IsCtxProfUse) {
1244 // In pre-link, we just want the instrumented IR. We use the contextual
1245 // profile in the post-thinlink phase.
1246 // The instrumentation will be removed in post-thinlink after IPO.
1247 // FIXME(mtrofin): move AssignGUIDPass if there is agreement to use this
1248 // mechanism for GUIDs.
1249 MPM.addPass(AssignGUIDPass());
1250 if (IsCtxProfUse) {
1251 MPM.addPass(PGOCtxProfFlatteningPass(/*IsPreThinlink=*/true));
1252 return MPM;
1253 }
1254 // Block further inlining in the instrumented ctxprof case. This avoids
1255 // confusingly collecting profiles for the same GUID corresponding to
1256 // different variants of the function. We could do like PGO and identify
1257 // functions by a (GUID, Hash) tuple, but since the ctxprof "use" waits for
1258 // thinlto to happen before performing any further optimizations, it's
1259 // unnecessary to collect profiles for non-prevailing copies.
1261 addPostPGOLoopRotation(MPM, Level);
1263 } else if (IsColdFuncOnlyInstrGen) {
1264 addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true, /* IsCS */ false,
1265 /* AtomicCounterUpdate */ false,
1267 /* ProfileRemappingFile */ "");
1268 }
1269
1270 if (IsPGOInstrGen || IsPGOInstrUse || IsCtxProfGen)
1271 MPM.addPass(PGOIndirectCallPromotion(false, false));
1272
1273 if (IsPGOPreLink && PGOOpt->CSAction == PGOOptions::CSIRInstr)
1274 MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile,
1276
1277 if (IsMemprofUse)
1278 MPM.addPass(MemProfUsePass(PGOOpt->MemoryProfile, FS));
1279
1280 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRUse ||
1281 PGOOpt->Action == PGOOptions::SampleUse))
1282 MPM.addPass(PGOForceFunctionAttrsPass(PGOOpt->ColdOptType));
1283
1284 MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/true));
1285
1288 else
1289 MPM.addPass(buildInlinerPipeline(Level, Phase));
1290
1291 // Remove any dead arguments exposed by cleanups, constant folding globals,
1292 // and argument promotion.
1294
1297
1299 MPM.addPass(CoroCleanupPass());
1300
1301 // Optimize globals now that functions are fully simplified.
1302 MPM.addPass(GlobalOptPass());
1303 MPM.addPass(GlobalDCEPass());
1304
1305 return MPM;
1306}
1307
1308/// TODO: Should LTO cause any differences to this set of passes?
1309void PassBuilder::addVectorPasses(OptimizationLevel Level,
1311 ThinOrFullLTOPhase LTOPhase) {
1312 const bool IsFullLTO = LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink;
1313
1316
1317 // Drop dereferenceable assumes after vectorization, as they are no longer
1318 // needed and can inhibit further optimization.
1319 if (!isLTOPreLink(LTOPhase))
1320 FPM.addPass(DropUnnecessaryAssumesPass(/*DropDereferenceable=*/true));
1321
1323 if (IsFullLTO) {
1324 // The vectorizer may have significantly shortened a loop body; unroll
1325 // again. Unroll small loops to hide loop backedge latency and saturate any
1326 // parallel execution resources of an out-of-order processor. We also then
1327 // need to clean up redundancies and loop invariant code.
1328 // FIXME: It would be really good to use a loop-integrated instruction
1329 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1330 // across the loop nests.
1331 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1334 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1336 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1339 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1340 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1341 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1342 // NOTE: we are very late in the pipeline, and we don't have any LICM
1343 // or SimplifyCFG passes scheduled after us, that would cleanup
1344 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1346 }
1347
1348 if (!IsFullLTO) {
1349 // Eliminate loads by forwarding stores from the previous iteration to loads
1350 // of the current iteration.
1352 }
1353 // Cleanup after the loop optimization passes.
1354 FPM.addPass(InstCombinePass());
1355
1356 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1357 ExtraFunctionPassManager<ShouldRunExtraVectorPasses> ExtraPasses;
1358 // At higher optimization levels, try to clean up any runtime overlap and
1359 // alignment checks inserted by the vectorizer. We want to track correlated
1360 // runtime checks for two inner loops in the same outer loop, fold any
1361 // common computations, hoist loop-invariant aspects out of any outer loop,
1362 // and unswitch the runtime checks if possible. Once hoisted, we may have
1363 // dead (or speculatable) control flows or more combining opportunities.
1364 ExtraPasses.addPass(EarlyCSEPass());
1365 ExtraPasses.addPass(CorrelatedValuePropagationPass());
1366 ExtraPasses.addPass(InstCombinePass());
1367 LoopPassManager LPM;
1368 LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1369 /*AllowSpeculation=*/true));
1370 LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
1372 ExtraPasses.addPass(
1373 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true));
1374 ExtraPasses.addPass(
1375 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1376 ExtraPasses.addPass(InstCombinePass());
1377 FPM.addPass(std::move(ExtraPasses));
1378 }
1379
1380 // Now that we've formed fast to execute loop structures, we do further
1381 // optimizations. These are run afterward as they might block doing complex
1382 // analyses and transforms such as what are needed for loop vectorization.
1383
1384 // Cleanup after loop vectorization, etc. Simplification passes like CVP and
1385 // GVN, loop transforms, and others have already run, so it's now better to
1386 // convert to more optimized IR using more aggressive simplify CFG options.
1387 // The extra sinking transform can create larger basic blocks, so do this
1388 // before SLP vectorization.
1389 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
1390 .forwardSwitchCondToPhi(true)
1391 .convertSwitchRangeToICmp(true)
1392 .convertSwitchToArithmetic(true)
1393 .convertSwitchToLookupTable(true)
1394 .needCanonicalLoops(false)
1395 .hoistCommonInsts(true)
1396 .sinkCommonInsts(true)));
1397
1398 if (IsFullLTO) {
1399 FPM.addPass(SCCPPass());
1400 FPM.addPass(InstCombinePass());
1401 FPM.addPass(BDCEPass());
1402 }
1403
1404 // Optimize parallel scalar instruction chains into SIMD instructions.
1405 if (PTO.SLPVectorization) {
1406 FPM.addPass(SLPVectorizerPass());
1407 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1408 FPM.addPass(EarlyCSEPass());
1409 }
1410 }
1411 // Enhance/cleanup vector code.
1412 FPM.addPass(VectorCombinePass());
1413
1414 if (!IsFullLTO) {
1415 FPM.addPass(InstCombinePass());
1416 // Unroll small loops to hide loop backedge latency and saturate any
1417 // parallel execution resources of an out-of-order processor. We also then
1418 // need to clean up redundancies and loop invariant code.
1419 // FIXME: It would be really good to use a loop-integrated instruction
1420 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1421 // across the loop nests.
1422 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1423 if (EnableUnrollAndJam && PTO.LoopUnrolling) {
1425 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1426 }
1427 FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
1428 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1429 PTO.ForgetAllSCEVInLoopUnroll)));
1430 FPM.addPass(WarnMissedTransformationsPass());
1431 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1432 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1433 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1434 // NOTE: we are very late in the pipeline, and we don't have any LICM
1435 // or SimplifyCFG passes scheduled after us, that would cleanup
1436 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1437 FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
1438 }
1439
1440 FPM.addPass(InferAlignmentPass());
1441 FPM.addPass(InstCombinePass());
1442
1443 // This is needed for two reasons:
1444 // 1. It works around problems that instcombine introduces, such as sinking
1445 // expensive FP divides into loops containing multiplications using the
1446 // divide result.
1447 // 2. It helps to clean up some loop-invariant code created by the loop
1448 // unroll pass when IsFullLTO=false.
1450 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1451 /*AllowSpeculation=*/true),
1452 /*UseMemorySSA=*/true));
1453
1454 // Now that we've vectorized and unrolled loops, we may have more refined
1455 // alignment information, try to re-derive it here.
1456 FPM.addPass(AlignmentFromAssumptionsPass());
1457}
1458
1461 ThinOrFullLTOPhase LTOPhase) {
1462 const bool LTOPreLink = isLTOPreLink(LTOPhase);
1464
1465 // Run partial inlining pass to partially inline functions that have
1466 // large bodies.
1469
1470 // Remove avail extern fns and globals definitions since we aren't compiling
1471 // an object file for later LTO. For LTO we want to preserve these so they
1472 // are eligible for inlining at link-time. Note if they are unreferenced they
1473 // will be removed by GlobalDCE later, so this only impacts referenced
1474 // available externally globals. Eventually they will be suppressed during
1475 // codegen, but eliminating here enables more opportunity for GlobalDCE as it
1476 // may make globals referenced by available external functions dead and saves
1477 // running remaining passes on the eliminated functions. These should be
1478 // preserved during prelinking for link-time inlining decisions.
1479 if (!LTOPreLink)
1481
1482 // Do RPO function attribute inference across the module to forward-propagate
1483 // attributes where applicable.
1484 // FIXME: Is this really an optimization rather than a canonicalization?
1486
1487 // Do a post inline PGO instrumentation and use pass. This is a context
1488 // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
1489 // cross-module inline has not been done yet. The context sensitive
1490 // instrumentation is after all the inlines are done.
1491 if (!LTOPreLink && PGOOpt) {
1492 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1493 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
1494 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1495 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile);
1496 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1497 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
1498 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1499 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
1500 }
1501
1502 // Re-compute GlobalsAA here prior to function passes. This is particularly
1503 // useful as the above will have inlined, DCE'ed, and function-attr
1504 // propagated everything. We should at this point have a reasonably minimal
1505 // and richly annotated call graph. By computing aliasing and mod/ref
1506 // information for all local globals here, the late loop passes and notably
1507 // the vectorizer will be able to use them to help recognize vectorizable
1508 // memory operations.
1511
1512 invokeOptimizerEarlyEPCallbacks(MPM, Level, LTOPhase);
1513
1514 FunctionPassManager OptimizePM;
1515
1516 // Only drop unnecessary assumes post-inline and post-link, as otherwise
1517 // additional uses of the affected value may be introduced through inlining
1518 // and CSE.
1519 if (!isLTOPreLink(LTOPhase))
1520 OptimizePM.addPass(DropUnnecessaryAssumesPass());
1521
1522 // Scheduling LoopVersioningLICM when inlining is over, because after that
1523 // we may see more accurate aliasing. Reason to run this late is that too
1524 // early versioning may prevent further inlining due to increase of code
1525 // size. Other optimizations which runs later might get benefit of no-alias
1526 // assumption in clone loop.
1528 OptimizePM.addPass(
1530 // LoopVersioningLICM pass might increase new LICM opportunities.
1532 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1533 /*AllowSpeculation=*/true),
1534 /*USeMemorySSA=*/true));
1535 }
1536
1537 OptimizePM.addPass(Float2IntPass());
1539
1540 if (EnableMatrix) {
1541 OptimizePM.addPass(LowerMatrixIntrinsicsPass());
1542 OptimizePM.addPass(EarlyCSEPass());
1543 }
1544
1545 // CHR pass should only be applied with the profile information.
1546 // The check is to check the profile summary information in CHR.
1547 if (EnableCHR && Level == OptimizationLevel::O3)
1548 OptimizePM.addPass(ControlHeightReductionPass());
1549
1550 // FIXME: We need to run some loop optimizations to re-rotate loops after
1551 // simplifycfg and others undo their rotation.
1552
1553 // Optimize the loop execution. These passes operate on entire loop nests
1554 // rather than on each loop in an inside-out manner, and so they are actually
1555 // function passes.
1556
1557 invokeVectorizerStartEPCallbacks(OptimizePM, Level);
1558
1559 LoopPassManager LPM;
1560 // First rotate loops that may have been un-rotated by prior passes.
1561 // Disable header duplication at -Oz.
1563 Level != OptimizationLevel::Oz,
1564 LTOPreLink));
1565 // Some loops may have become dead by now. Try to delete them.
1566 // FIXME: see discussion in https://reviews.llvm.org/D112851,
1567 // this may need to be revisited once we run GVN before loop deletion
1568 // in the simplification pipeline.
1569 LPM.addPass(LoopDeletionPass());
1570
1571 if (PTO.LoopInterchange)
1572 LPM.addPass(LoopInterchangePass());
1573
1574 OptimizePM.addPass(
1575 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/false));
1576
1577 // FIXME: This may not be the right place in the pipeline.
1578 // We need to have the data to support the right place.
1579 if (PTO.LoopFusion)
1580 OptimizePM.addPass(LoopFusePass());
1581
1582 // Distribute loops to allow partial vectorization. I.e. isolate dependences
1583 // into separate loop that would otherwise inhibit vectorization. This is
1584 // currently only performed for loops marked with the metadata
1585 // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1586 OptimizePM.addPass(LoopDistributePass());
1587
1588 // Populates the VFABI attribute with the scalar-to-vector mappings
1589 // from the TargetLibraryInfo.
1590 OptimizePM.addPass(InjectTLIMappings());
1591
1592 addVectorPasses(Level, OptimizePM, LTOPhase);
1593
1594 invokeVectorizerEndEPCallbacks(OptimizePM, Level);
1595
1596 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
1597 // canonicalization pass that enables other optimizations. As a result,
1598 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1599 // result too early.
1600 OptimizePM.addPass(LoopSinkPass());
1601
1602 // And finally clean up LCSSA form before generating code.
1603 OptimizePM.addPass(InstSimplifyPass());
1604
1605 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
1606 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
1607 // flattening of blocks.
1608 OptimizePM.addPass(DivRemPairsPass());
1609
1610 // Try to annotate calls that were created during optimization.
1611 OptimizePM.addPass(
1612 TailCallElimPass(/*UpdateFunctionEntryCount=*/isInstrumentedPGOUse()));
1613
1614 // LoopSink (and other loop passes since the last simplifyCFG) might have
1615 // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
1616 OptimizePM.addPass(
1618 .convertSwitchRangeToICmp(true)
1619 .convertSwitchToArithmetic(true)
1620 .speculateUnpredictables(true)
1621 .hoistLoadsStoresWithCondFaulting(true)));
1622
1623 // Add the core optimizing pipeline.
1624 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
1625 PTO.EagerlyInvalidateAnalyses));
1626
1627 // AllocToken transforms heap allocation calls; this needs to run late after
1628 // other allocation call transformations (such as those in InstCombine).
1629 if (!LTOPreLink)
1630 MPM.addPass(AllocTokenPass());
1631
1632 invokeOptimizerLastEPCallbacks(MPM, Level, LTOPhase);
1633
1634 // Split out cold code. Splitting is done late to avoid hiding context from
1635 // other optimizations and inadvertently regressing performance. The tradeoff
1636 // is that this has a higher code size cost than splitting early.
1637 if (EnableHotColdSplit && !LTOPreLink)
1639
1640 // Search the code for similar regions of code. If enough similar regions can
1641 // be found where extracting the regions into their own function will decrease
1642 // the size of the program, we extract the regions, a deduplicate the
1643 // structurally similar regions.
1644 if (EnableIROutliner)
1645 MPM.addPass(IROutlinerPass());
1646
1647 // Now we need to do some global optimization transforms.
1648 // FIXME: It would seem like these should come first in the optimization
1649 // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
1650 // ordering here.
1651 MPM.addPass(GlobalDCEPass());
1653
1654 // Merge functions if requested. It has a better chance to merge functions
1655 // after ConstantMerge folded jump tables.
1656 if (PTO.MergeFunctions)
1658
1659 if (PTO.CallGraphProfile && !LTOPreLink)
1660 MPM.addPass(CGProfilePass(isLTOPostLink(LTOPhase)));
1661
1662 // RelLookupTableConverterPass runs later in LTO post-link pipeline.
1663 if (!LTOPreLink)
1665
1666 // Add devirtualization pass only when LTO is not enabled, as otherwise
1667 // the pass is already enabled in the LTO pipeline.
1668 if (PTO.DevirtualizeSpeculatively && LTOPhase == ThinOrFullLTOPhase::None) {
1669 // TODO: explore a better pipeline configuration that can improve
1670 // compilation time overhead.
1672 /*ExportSummary*/ nullptr,
1673 /*ImportSummary*/ nullptr,
1674 /*DevirtSpeculatively*/ PTO.DevirtualizeSpeculatively));
1675 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1677 // Given that the devirtualization creates more opportunities for inlining,
1678 // we run the Inliner again here to maximize the optimization gain we
1679 // get from devirtualization.
1680 // Also, we can't run devirtualization before inlining because the
1681 // devirtualization depends on the passes optimizing/eliminating vtable GVs
1682 // and those passes are only effective after inlining.
1683 if (EnableModuleInliner) {
1687 } else {
1690 /* MandatoryFirst */ true,
1692 }
1693 }
1694 return MPM;
1695}
1696
1700 if (Level == OptimizationLevel::O0)
1701 return buildO0DefaultPipeline(Level, Phase);
1702
1704
1705 // Currently this pipeline is only invoked in an LTO pre link pass or when we
1706 // are not running LTO. If that changes the below checks may need updating.
1708
1709 // If we are invoking this in non-LTO mode, remove any MemProf related
1710 // attributes and metadata, as we don't know whether we are linking with
1711 // a library containing the necessary interfaces.
1714
1715 // Convert @llvm.global.annotations to !annotation metadata.
1717
1718 // Force any function attributes we want the rest of the pipeline to observe.
1720
1721 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1723
1724 // Apply module pipeline start EP callback.
1726
1727 // Add the core simplification pipeline.
1729
1730 // Now add the optimization pipeline.
1732
1733 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1734 PGOOpt->Action == PGOOptions::SampleUse)
1736
1737 // Emit annotation remarks.
1739
1740 if (isLTOPreLink(Phase))
1741 addRequiredLTOPreLinkPasses(MPM);
1742 return MPM;
1743}
1744
1747 bool EmitSummary) {
1749 if (ThinLTO)
1751 else
1753 MPM.addPass(EmbedBitcodePass(ThinLTO, EmitSummary));
1754
1755 // Perform any cleanups to the IR that aren't suitable for per TU compilation,
1756 // like removing CFI/WPD related instructions. Note, we reuse
1757 // LowerTypeTestsPass to clean up type tests rather than duplicate that logic
1758 // in FatLtoCleanup.
1759 MPM.addPass(FatLtoCleanup());
1760
1761 // If we're doing FatLTO w/ CFI enabled, we don't want the type tests in the
1762 // object code, only in the bitcode section, so drop it before we run
1763 // module optimization and generate machine code. If llvm.type.test() isn't in
1764 // the IR, this won't do anything.
1765 MPM.addPass(
1767
1768 // Use the ThinLTO post-link pipeline with sample profiling
1769 if (ThinLTO && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1770 MPM.addPass(buildThinLTODefaultPipeline(Level, /*ImportSummary=*/nullptr));
1771 else {
1772 // ModuleSimplification does not run the coroutine passes for
1773 // ThinLTOPreLink, so we need the coroutine passes to run for ThinLTO
1774 // builds, otherwise they will miscompile.
1775 if (ThinLTO) {
1776 // TODO: replace w/ buildCoroWrapper() when it takes phase and level into
1777 // consideration.
1778 CGSCCPassManager CGPM;
1782 MPM.addPass(CoroCleanupPass());
1783 }
1784
1785 // otherwise, just use module optimization
1786 MPM.addPass(
1788 // Emit annotation remarks.
1790 }
1791 return MPM;
1792}
1793
1796 if (Level == OptimizationLevel::O0)
1798
1800
1801 // Convert @llvm.global.annotations to !annotation metadata.
1803
1804 // Force any function attributes we want the rest of the pipeline to observe.
1806
1807 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1809
1810 // Apply module pipeline start EP callback.
1812
1813 // If we are planning to perform ThinLTO later, we don't bloat the code with
1814 // unrolling/vectorization/... now. Just simplify the module as much as we
1815 // can.
1818 // In pre-link, for ctx prof use, we stop here with an instrumented IR. We let
1819 // thinlto use the contextual info to perform imports; then use the contextual
1820 // profile in the post-thinlink phase.
1821 if (!UseCtxProfile.empty()) {
1822 addRequiredLTOPreLinkPasses(MPM);
1823 return MPM;
1824 }
1825
1826 // Run partial inlining pass to partially inline functions that have
1827 // large bodies.
1828 // FIXME: It isn't clear whether this is really the right place to run this
1829 // in ThinLTO. Because there is another canonicalization and simplification
1830 // phase that will run after the thin link, running this here ends up with
1831 // less information than will be available later and it may grow functions in
1832 // ways that aren't beneficial.
1835
1836 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1837 PGOOpt->Action == PGOOptions::SampleUse)
1839
1840 // Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual
1841 // optimization is going to be done in PostLink stage, but clang can't add
1842 // callbacks there in case of in-process ThinLTO called by linker.
1847
1848 // Emit annotation remarks.
1850
1851 addRequiredLTOPreLinkPasses(MPM);
1852
1853 return MPM;
1854}
1855
1857 OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
1859
1860 // If we are invoking this without a summary index noting that we are linking
1861 // with a library containing the necessary APIs, remove any MemProf related
1862 // attributes and metadata.
1863 if (!ImportSummary || !ImportSummary->withSupportsHotColdNew())
1865
1866 if (ImportSummary) {
1867 // For ThinLTO we must apply the context disambiguation decisions early, to
1868 // ensure we can correctly match the callsites to summary data.
1871 ImportSummary, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1872
1873 // These passes import type identifier resolutions for whole-program
1874 // devirtualization and CFI. They must run early because other passes may
1875 // disturb the specific instruction patterns that these passes look for,
1876 // creating dependencies on resolutions that may not appear in the summary.
1877 //
1878 // For example, GVN may transform the pattern assume(type.test) appearing in
1879 // two basic blocks into assume(phi(type.test, type.test)), which would
1880 // transform a dependency on a WPD resolution into a dependency on a type
1881 // identifier resolution for CFI.
1882 //
1883 // Also, WPD has access to more precise information than ICP and can
1884 // devirtualize more effectively, so it should operate on the IR first.
1885 //
1886 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1887 // metadata and intrinsics.
1888 MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
1889 MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
1890 }
1891
1892 if (Level == OptimizationLevel::O0) {
1893 // Run a second time to clean up any type tests left behind by WPD for use
1894 // in ICP.
1895 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1898
1899 // AllocToken transforms heap allocation calls; this needs to run late after
1900 // other allocation call transformations (such as those in InstCombine).
1901 MPM.addPass(AllocTokenPass());
1902
1903 // Drop available_externally and unreferenced globals. This is necessary
1904 // with ThinLTO in order to avoid leaving undefined references to dead
1905 // globals in the object file.
1907 MPM.addPass(GlobalDCEPass());
1908 return MPM;
1909 }
1910 if (!UseCtxProfile.empty()) {
1911 MPM.addPass(
1913 } else {
1914 // Add the core simplification pipeline.
1917 }
1918 // Now add the optimization pipeline.
1921
1922 // Emit annotation remarks.
1924
1925 return MPM;
1926}
1927
1930 // FIXME: We should use a customized pre-link pipeline!
1931 return buildPerModuleDefaultPipeline(Level,
1933}
1934
1937 ModuleSummaryIndex *ExportSummary) {
1939
1941
1942 // If we are invoking this without a summary index noting that we are linking
1943 // with a library containing the necessary APIs, remove any MemProf related
1944 // attributes and metadata.
1945 if (!ExportSummary || !ExportSummary->withSupportsHotColdNew())
1947
1948 // Create a function that performs CFI checks for cross-DSO calls with targets
1949 // in the current module.
1950 MPM.addPass(CrossDSOCFIPass());
1951
1952 if (Level == OptimizationLevel::O0) {
1953 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1954 // metadata and intrinsics.
1955 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1956 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1957 // Run a second time to clean up any type tests left behind by WPD for use
1958 // in ICP.
1959 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1961
1963
1964 // AllocToken transforms heap allocation calls; this needs to run late after
1965 // other allocation call transformations (such as those in InstCombine).
1966 MPM.addPass(AllocTokenPass());
1967
1969
1970 // Emit annotation remarks.
1972
1973 return MPM;
1974 }
1975
1976 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
1977 // Load sample profile before running the LTO optimization pipeline.
1978 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1979 PGOOpt->ProfileRemappingFile,
1981 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1982 // RequireAnalysisPass for PSI before subsequent non-module passes.
1984 }
1985
1986 // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
1988
1989 // Remove unused virtual tables to improve the quality of code generated by
1990 // whole-program devirtualization and bitset lowering.
1991 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
1992
1993 // Do basic inference of function attributes from known properties of system
1994 // libraries and other oracles.
1996
1997 if (Level.getSpeedupLevel() > 1) {
1999 CallSiteSplittingPass(), PTO.EagerlyInvalidateAnalyses));
2000
2001 // Indirect call promotion. This should promote all the targets that are
2002 // left by the earlier promotion pass that promotes intra-module targets.
2003 // This two-step promotion is to save the compile time. For LTO, it should
2004 // produce the same result as if we only do promotion here.
2006 true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
2007
2008 // Promoting by-reference arguments to by-value exposes more constants to
2009 // IPSCCP.
2010 CGSCCPassManager CGPM;
2013 CGPM.addPass(
2016
2017 // Propagate constants at call sites into the functions they call. This
2018 // opens opportunities for globalopt (and inlining) by substituting function
2019 // pointers passed as arguments to direct uses of functions.
2020 MPM.addPass(IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/
2021 Level != OptimizationLevel::Os &&
2022 Level != OptimizationLevel::Oz)));
2023
2024 // Attach metadata to indirect call sites indicating the set of functions
2025 // they may target at run-time. This should follow IPSCCP.
2027 }
2028
2029 // Do RPO function attribute inference across the module to forward-propagate
2030 // attributes where applicable.
2031 // FIXME: Is this really an optimization rather than a canonicalization?
2033
2034 // Use in-range annotations on GEP indices to split globals where beneficial.
2035 MPM.addPass(GlobalSplitPass());
2036
2037 // Run whole program optimization of virtual call when the list of callees
2038 // is fixed.
2039 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
2040
2042 // Stop here at -O1.
2043 if (Level == OptimizationLevel::O1) {
2044 // The LowerTypeTestsPass needs to run to lower type metadata and the
2045 // type.test intrinsics. The pass does nothing if CFI is disabled.
2046 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
2047 // Run a second time to clean up any type tests left behind by WPD for use
2048 // in ICP (which is performed earlier than this in the regular LTO
2049 // pipeline).
2050 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
2052
2054
2055 // AllocToken transforms heap allocation calls; this needs to run late after
2056 // other allocation call transformations (such as those in InstCombine).
2057 MPM.addPass(AllocTokenPass());
2058
2060
2061 // Emit annotation remarks.
2063
2064 return MPM;
2065 }
2066
2067 // TODO: Skip to match buildCoroWrapper.
2068 MPM.addPass(CoroEarlyPass());
2069
2070 // Optimize globals to try and fold them into constants.
2071 MPM.addPass(GlobalOptPass());
2072
2073 // Promote any localized globals to SSA registers.
2075
2076 // Linking modules together can lead to duplicate global constant, only
2077 // keep one copy of each constant.
2079
2080 // Remove unused arguments from functions.
2082
2083 // Reduce the code after globalopt and ipsccp. Both can open up significant
2084 // simplification opportunities, and both can propagate functions through
2085 // function pointers. When this happens, we often have to resolve varargs
2086 // calls, etc, so let instcombine do this.
2087 FunctionPassManager PeepholeFPM;
2088 PeepholeFPM.addPass(InstCombinePass());
2089 if (Level.getSpeedupLevel() > 1)
2090 PeepholeFPM.addPass(AggressiveInstCombinePass());
2091 invokePeepholeEPCallbacks(PeepholeFPM, Level);
2092
2093 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM),
2094 PTO.EagerlyInvalidateAnalyses));
2095
2096 // Lower variadic functions for supported targets prior to inlining.
2098
2099 // Note: historically, the PruneEH pass was run first to deduce nounwind and
2100 // generally clean up exception handling overhead. It isn't clear this is
2101 // valuable as the inliner doesn't currently care whether it is inlining an
2102 // invoke or a call.
2103 // Run the inliner now.
2104 if (EnableModuleInliner) {
2108 } else {
2111 /* MandatoryFirst */ true,
2114 }
2115
2116 // Perform context disambiguation after inlining, since that would reduce the
2117 // amount of additional cloning required to distinguish the allocation
2118 // contexts.
2121 /*Summary=*/nullptr,
2122 PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
2123
2124 // Optimize globals again after we ran the inliner.
2125 MPM.addPass(GlobalOptPass());
2126
2127 // Run the OpenMPOpt pass again after global optimizations.
2129
2130 // Garbage collect dead functions.
2131 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2132
2133 // If we didn't decide to inline a function, check to see if we can
2134 // transform it to pass arguments by value instead of by reference.
2135 CGSCCPassManager CGPM;
2140
2142 // The IPO Passes may leave cruft around. Clean up after them.
2143 FPM.addPass(InstCombinePass());
2144 invokePeepholeEPCallbacks(FPM, Level);
2145
2148
2150
2151 // Do a post inline PGO instrumentation and use pass. This is a context
2152 // sensitive PGO pass.
2153 if (PGOOpt) {
2154 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
2155 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
2156 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
2157 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile);
2158 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
2159 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
2160 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
2161 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
2162 }
2163
2164 // Break up allocas
2166
2167 // LTO provides additional opportunities for tailcall elimination due to
2168 // link-time inlining, and visibility of nocapture attribute.
2169 FPM.addPass(
2170 TailCallElimPass(/*UpdateFunctionEntryCount=*/isInstrumentedPGOUse()));
2171
2172 // Run a few AA driver optimizations here and now to cleanup the code.
2173 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM),
2174 PTO.EagerlyInvalidateAnalyses));
2175
2176 MPM.addPass(
2178
2179 // Require the GlobalsAA analysis for the module so we can query it within
2180 // MainFPM.
2183 // Invalidate AAManager so it can be recreated and pick up the newly
2184 // available GlobalsAA.
2185 MPM.addPass(
2187 }
2188
2189 FunctionPassManager MainFPM;
2191 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
2192 /*AllowSpeculation=*/true),
2193 /*USeMemorySSA=*/true));
2194
2195 if (RunNewGVN)
2196 MainFPM.addPass(NewGVNPass());
2197 else
2198 MainFPM.addPass(GVNPass());
2199
2200 // Remove dead memcpy()'s.
2201 MainFPM.addPass(MemCpyOptPass());
2202
2203 // Nuke dead stores.
2204 MainFPM.addPass(DSEPass());
2205 MainFPM.addPass(MoveAutoInitPass());
2207
2208 invokeVectorizerStartEPCallbacks(MainFPM, Level);
2209
2210 LoopPassManager LPM;
2211 if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
2212 LPM.addPass(LoopFlattenPass());
2213 LPM.addPass(IndVarSimplifyPass());
2214 LPM.addPass(LoopDeletionPass());
2215 // FIXME: Add loop interchange.
2216
2217 // Unroll small loops and perform peeling.
2218 LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
2219 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
2220 PTO.ForgetAllSCEVInLoopUnroll));
2221 // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
2222 // *All* loop passes must preserve it, in order to be able to use it.
2223 MainFPM.addPass(
2224 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/false));
2225
2226 MainFPM.addPass(LoopDistributePass());
2227
2228 addVectorPasses(Level, MainFPM, ThinOrFullLTOPhase::FullLTOPostLink);
2229
2230 invokeVectorizerEndEPCallbacks(MainFPM, Level);
2231
2232 // Run the OpenMPOpt CGSCC pass again late.
2235
2236 invokePeepholeEPCallbacks(MainFPM, Level);
2237 MainFPM.addPass(JumpThreadingPass());
2238 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM),
2239 PTO.EagerlyInvalidateAnalyses));
2240
2241 // Lower type metadata and the type.test intrinsic. This pass supports
2242 // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
2243 // to be run at link time if CFI is enabled. This pass does nothing if
2244 // CFI is disabled.
2245 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
2246 // Run a second time to clean up any type tests left behind by WPD for use
2247 // in ICP (which is performed earlier than this in the regular LTO pipeline).
2248 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
2250
2251 // Enable splitting late in the FullLTO post-link pipeline.
2254
2255 // Add late LTO optimization passes.
2256 FunctionPassManager LateFPM;
2257
2258 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
2259 // canonicalization pass that enables other optimizations. As a result,
2260 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
2261 // result too early.
2262 LateFPM.addPass(LoopSinkPass());
2263
2264 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
2265 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
2266 // flattening of blocks.
2267 LateFPM.addPass(DivRemPairsPass());
2268
2269 // Delete basic blocks, which optimization passes may have killed.
2271 .convertSwitchRangeToICmp(true)
2272 .convertSwitchToArithmetic(true)
2273 .hoistCommonInsts(true)
2274 .speculateUnpredictables(true)));
2275 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(LateFPM)));
2276
2277 // Drop bodies of available eternally objects to improve GlobalDCE.
2279
2280 // Now that we have optimized the program, discard unreachable functions.
2281 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2282
2283 if (PTO.MergeFunctions)
2285
2287
2288 if (PTO.CallGraphProfile)
2289 MPM.addPass(CGProfilePass(/*InLTOPostLink=*/true));
2290
2291 MPM.addPass(CoroCleanupPass());
2292
2293 // AllocToken transforms heap allocation calls; this needs to run late after
2294 // other allocation call transformations (such as those in InstCombine).
2295 MPM.addPass(AllocTokenPass());
2296
2298
2299 // Emit annotation remarks.
2301
2302 return MPM;
2303}
2304
2308 assert(Level == OptimizationLevel::O0 &&
2309 "buildO0DefaultPipeline should only be used with O0");
2310
2312
2313 // Perform pseudo probe instrumentation in O0 mode. This is for the
2314 // consistency between different build modes. For example, a LTO build can be
2315 // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
2316 // the postlink will require pseudo probe instrumentation in the prelink.
2317 if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
2319
2320 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
2321 PGOOpt->Action == PGOOptions::IRUse))
2323 MPM,
2324 /*RunProfileGen=*/(PGOOpt->Action == PGOOptions::IRInstr),
2325 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate, PGOOpt->ProfileFile,
2326 PGOOpt->ProfileRemappingFile);
2327
2328 // Instrument function entry and exit before all inlining.
2330 EntryExitInstrumenterPass(/*PostInlining=*/false)));
2331
2333
2334 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
2336
2337 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
2338 // Explicitly disable sample loader inlining and use flattened profile in O0
2339 // pipeline.
2340 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
2341 PGOOpt->ProfileRemappingFile,
2342 ThinOrFullLTOPhase::None, nullptr,
2343 /*DisableSampleProfileInlining=*/true,
2344 /*UseFlattenedProfile=*/true));
2345 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
2346 // RequireAnalysisPass for PSI before subsequent non-module passes.
2348 }
2349
2351
2352 // Build a minimal pipeline based on the semantics required by LLVM,
2353 // which is just that always inlining occurs. Further, disable generating
2354 // lifetime intrinsics to avoid enabling further optimizations during
2355 // code generation.
2357 /*InsertLifetimeIntrinsics=*/false));
2358
2359 if (PTO.MergeFunctions)
2361
2362 if (EnableMatrix)
2363 MPM.addPass(
2365
2366 if (!CGSCCOptimizerLateEPCallbacks.empty()) {
2367 CGSCCPassManager CGPM;
2369 if (!CGPM.isEmpty())
2371 }
2372 if (!LateLoopOptimizationsEPCallbacks.empty()) {
2373 LoopPassManager LPM;
2375 if (!LPM.isEmpty()) {
2377 createFunctionToLoopPassAdaptor(std::move(LPM))));
2378 }
2379 }
2380 if (!LoopOptimizerEndEPCallbacks.empty()) {
2381 LoopPassManager LPM;
2383 if (!LPM.isEmpty()) {
2385 createFunctionToLoopPassAdaptor(std::move(LPM))));
2386 }
2387 }
2388 if (!ScalarOptimizerLateEPCallbacks.empty()) {
2391 if (!FPM.isEmpty())
2392 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2393 }
2394
2396
2397 if (!VectorizerStartEPCallbacks.empty()) {
2400 if (!FPM.isEmpty())
2401 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2402 }
2403
2404 if (!VectorizerEndEPCallbacks.empty()) {
2407 if (!FPM.isEmpty())
2408 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2409 }
2410
2412
2413 // AllocToken transforms heap allocation calls; this needs to run late after
2414 // other allocation call transformations (such as those in InstCombine).
2415 if (!isLTOPreLink(Phase))
2416 MPM.addPass(AllocTokenPass());
2417
2419
2420 if (isLTOPreLink(Phase))
2421 addRequiredLTOPreLinkPasses(MPM);
2422
2424
2425 return MPM;
2426}
2427
2429 AAManager AA;
2430
2431 // The order in which these are registered determines their priority when
2432 // being queried.
2433
2434 // Add any target-specific alias analyses that should be run early.
2435 if (TM)
2436 TM->registerEarlyDefaultAliasAnalyses(AA);
2437
2438 // First we register the basic alias analysis that provides the majority of
2439 // per-function local AA logic. This is a stateless, on-demand local set of
2440 // AA techniques.
2441 AA.registerFunctionAnalysis<BasicAA>();
2442
2443 // Next we query fast, specialized alias analyses that wrap IR-embedded
2444 // information about aliasing.
2445 AA.registerFunctionAnalysis<ScopedNoAliasAA>();
2446 AA.registerFunctionAnalysis<TypeBasedAA>();
2447
2448 // Add support for querying global aliasing information when available.
2449 // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
2450 // analysis, all that the `AAManager` can do is query for any *cached*
2451 // results from `GlobalsAA` through a readonly proxy.
2453 AA.registerModuleAnalysis<GlobalsAA>();
2454
2455 // Add target-specific alias analyses.
2456 if (TM)
2457 TM->registerDefaultAliasAnalyses(AA);
2458
2459 return AA;
2460}
2461
2462bool PassBuilder::isInstrumentedPGOUse() const {
2463 return (PGOOpt && PGOOpt->Action == PGOOptions::IRUse) ||
2464 !UseCtxProfile.empty();
2465}
aarch64 falkor hwpf fix Falkor HW Prefetch Fix Late Phase
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AggressiveInstCombiner - Combine expression patterns to form expressions with fewer,...
Provides passes to inlining "always_inline" functions.
This is the interface for LLVM's primary stateless and local alias analysis.
This file provides the interface for LLVM's Call Graph Profile pass.
This header provides classes for managing passes over SCCs of the call graph.
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file provides the interface for a simple, fast CSE pass.
This file provides a pass which clones the current module and runs the provided pass pipeline on the ...
This file provides a pass manager that only runs its passes if the provided marker analysis has been ...
Super simple passes to force specific function attrs from the commandline into the IR for debugging p...
Provides passes for computing function attributes based on interprocedural analyses.
This file provides the interface for LLVM's Global Value Numbering pass which eliminates fully redund...
This is the interface for a simple mod/ref and alias analysis over globals.
AcceleratorCodeSelection - Identify all functions reachable from a kernel, removing those that are un...
This header defines various interfaces for pass management in LLVM.
Interfaces for passes which infer implicit function attributes from the name and signature of functio...
This file provides the primary interface to the instcombine pass.
Defines passes for running instruction simplification across chunks of IR.
This file provides the interface for LLVM's PGO Instrumentation lowering pass.
See the comments on JumpThreadingPass.
static LVOptions Options
Definition LVOptions.cpp:25
This file implements the Loop Fusion pass.
This header defines the LoopLoadEliminationPass object.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
The header file for the LowerConstantIntrinsics pass as used by the new pass manager.
The header file for the LowerExpectIntrinsic pass as used by the new pass manager.
This pass performs merges of loads and stores on both sides of a.
This file provides the interface for LLVM's Global Value Numbering pass.
This header enumerates the LLVM-provided high-level optimization levels.
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
Define option tunables for PGO.
static void addAnnotationRemarksPass(ModulePassManager &MPM)
static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level)
static CoroConditionalWrapper buildCoroWrapper(ThinOrFullLTOPhase Phase)
static bool isLTOPreLink(ThinOrFullLTOPhase Phase)
static bool isLTOPostLink(ThinOrFullLTOPhase Phase)
This file implements relative lookup table converter that converts lookup tables to relative lookup t...
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
This file provides the interface for the pseudo probe implementation for AutoFDO.
This file provides the interface for the sampled PGO loader pass.
This is the interface for a metadata-based scoped no-alias analysis.
This file provides the interface for the pass responsible for both simplifying and canonicalizing the...
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
This is the interface for a metadata-based TBAA.
Defines the virtual file system interface vfs::FileSystem.
A manager for alias analyses.
A module pass that rewrites heap allocations to use token-enabled allocation functions based on vario...
Definition AllocToken.h:36
Inlines functions marked as "always_inline".
Argument promotion pass.
Assign a GUID to functions as metadata.
Analysis pass providing a never-invalidated alias analysis result.
Simple pass that canonicalizes aliases.
A pass that merges duplicate global constants into a single constant.
This class implements a trivial dead store elimination.
Eliminate dead arguments (and return values) from functions.
A pass that transforms external global definitions into declarations.
Pass embeds a copy of the module optimized with the provided pass pipeline into a global variable.
A pass manager to run a set of extra loop passes if the MarkerTy analysis is present.
The core GVN pass object.
Definition GVN.h:128
Pass to remove unused function declarations.
Definition GlobalDCE.h:38
Optimize globals that never have their address taken.
Definition GlobalOpt.h:25
Pass to perform split of global variables.
Definition GlobalSplit.h:26
Analysis pass providing a never-invalidated alias analysis result.
Pass to outline cold regions.
Pass to perform interprocedural constant propagation.
Definition SCCP.h:48
Pass to outline similar regions.
Definition IROutliner.h:444
Run instruction simplification across each instruction in the function.
Instrumentation based profiling lowering pass.
This pass performs 'jump threading', which looks at blocks that have multiple predecessors and multip...
Performs Loop Invariant Code Motion Pass.
Definition LICM.h:66
Loop unroll pass that only does full loop unrolling and peeling.
Performs Loop Idiom Recognize Pass.
Performs Loop Inst Simplify Pass.
A simple loop rotation transformation.
Performs basic CFG simplifications to assist other loop passes.
A pass that does profile-guided sinking of instructions into loops.
Definition LoopSink.h:33
A simple loop rotation transformation.
Loop unroll pass that will support both full and partial unrolling.
Strips MemProf attributes and metadata.
Merge identical functions.
The module inliner pass for the new pass manager.
Module pass, wrapping the inliner pass.
Definition Inliner.h:65
void addModulePass(T Pass)
Add a module pass that runs before the CGSCC passes.
Definition Inliner.h:81
Class to hold module path string table and global value map, and encapsulate methods for operating on...
Simple pass that provides a name to every anonymous globals.
Additional 'norecurse' attribute deduction during postlink LTO phase.
OpenMP optimizations pass.
Definition OpenMPOpt.h:42
static LLVM_ABI const OptimizationLevel O3
Optimize for fast execution as much as possible.
static LLVM_ABI const OptimizationLevel Oz
A very specialized mode that will optimize for code size at any and all costs.
static LLVM_ABI const OptimizationLevel O0
Disable as many optimizations as possible.
static LLVM_ABI const OptimizationLevel Os
Similar to O2 but tries to optimize for small code size instead of fast execution without triggering ...
static LLVM_ABI const OptimizationLevel O2
Optimize for fast execution as much as possible without triggering significant incremental compile ti...
static LLVM_ABI const OptimizationLevel O1
Optimize quickly without destroying debuggability.
The indirect function call promotion pass.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The profile annotation (profile-instr-use) pass for IR based PGO.
The profile size based optimization pass for memory intrinsics.
Pass to remove unused function declarations.
LLVM_ABI void invokeFullLinkTimeOptimizationLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
LLVM_ABI ModuleInlinerWrapperPass buildInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining as well as the inlining-driven cleanups.
LLVM_ABI void invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
LLVM_ABI void invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI AAManager buildDefaultAAPipeline()
Build the default AAManager with the default alias analysis pipeline registered.
LLVM_ABI void invokeCGSCCOptimizerLateEPCallbacks(CGSCCPassManager &CGPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, ThinLTO-targeting default optimization pipeline to a pass manager.
LLVM_ABI void addPGOInstrPassesForO0(ModulePassManager &MPM, bool RunProfileGen, bool IsCS, bool AtomicCounterUpdate, std::string ProfileFile, std::string ProfileRemappingFile)
Add PGOInstrumenation passes for O0 only.
LLVM_ABI void invokeScalarOptimizerLateEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase=ThinOrFullLTOPhase::None)
Build a per-module default optimization pipeline.
LLVM_ABI void invokePipelineStartEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
LLVM_ABI void invokeVectorizerEndEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildO0DefaultPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase=ThinOrFullLTOPhase::None)
Build an O0 pipeline with the minimal semantically required passes.
LLVM_ABI FunctionPassManager buildFunctionSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM function canonicalization and simplification pipeline.
LLVM_ABI void invokePeepholeEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI void invokePipelineEarlySimplificationEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
LLVM_ABI void invokeLoopOptimizerEndEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildLTODefaultPipeline(OptimizationLevel Level, ModuleSummaryIndex *ExportSummary)
Build an LTO default optimization pipeline to a pass manager.
LLVM_ABI ModulePassManager buildModuleInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining with module inliner pass.
LLVM_ABI ModulePassManager buildThinLTODefaultPipeline(OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary)
Build a ThinLTO default optimization pipeline to a pass manager.
LLVM_ABI void invokeLateLoopOptimizationsEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
LLVM_ABI void invokeFullLinkTimeOptimizationEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO, bool EmitSummary)
Build a fat object default optimization pipeline.
LLVM_ABI ModulePassManager buildModuleSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM module canonicalization and simplification pipeline.
LLVM_ABI ModulePassManager buildModuleOptimizationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase LTOPhase)
Construct the core LLVM module optimization pipeline.
LLVM_ABI void invokeOptimizerLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
LLVM_ABI ModulePassManager buildLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, LTO-targeting default optimization pipeline to a pass manager.
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t<!std::is_same_v< PassT, PassManager > > addPass(PassT &&Pass)
bool isEmpty() const
Returns if the pass manager contains any passes.
unsigned LicmMssaNoAccForPromotionCap
Tuning option to disable promotion to scalars in LICM with MemorySSA, if the number of access is too ...
Definition PassBuilder.h:78
bool SLPVectorization
Tuning option to enable/disable slp loop vectorization, set based on opt level.
Definition PassBuilder.h:56
int InlinerThreshold
Tuning option to override the default inliner threshold.
Definition PassBuilder.h:92
bool LoopFusion
Tuning option to enable/disable loop fusion. Its default value is false.
Definition PassBuilder.h:66
bool CallGraphProfile
Tuning option to enable/disable call graph profile.
Definition PassBuilder.h:82
bool MergeFunctions
Tuning option to enable/disable function merging.
Definition PassBuilder.h:89
bool ForgetAllSCEVInLoopUnroll
Tuning option to forget all SCEV loops in LoopUnroll.
Definition PassBuilder.h:70
unsigned LicmMssaOptCap
Tuning option to cap the number of calls to retrive clobbering accesses in MemorySSA,...
Definition PassBuilder.h:74
bool LoopInterleaving
Tuning option to set loop interleaving on/off, set based on opt level.
Definition PassBuilder.h:48
LLVM_ABI PipelineTuningOptions()
Constructor sets pipeline tuning defaults based on cl::opts.
bool LoopUnrolling
Tuning option to enable/disable loop unrolling. Its default value is true.
Definition PassBuilder.h:59
bool LoopInterchange
Tuning option to enable/disable loop interchange.
Definition PassBuilder.h:63
bool LoopVectorization
Tuning option to enable/disable loop vectorization, set based on opt level.
Definition PassBuilder.h:52
Reassociate commutative expressions.
Definition Reassociate.h:74
A pass to do RPO deduction and propagation of function attributes.
This pass performs function-level constant propagation and merging.
Definition SCCP.h:30
The sample profiler data loader pass.
Analysis pass providing a never-invalidated alias analysis result.
This pass transforms loops that contain branches or switches on loop- invariant conditions to have mu...
A pass to simplify and canonicalize the CFG of a function.
Definition SimplifyCFG.h:30
Analysis pass providing a never-invalidated alias analysis result.
Optimize scalar/vector interactions in IR using target cost models.
Interfaces for registering analysis passes, producing common pass manager configurations,...
Abstract Attribute helper functions.
Definition Attributor.h:165
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
@ Assume
Do not drop type tests (default).
@ All
Drop only llvm.assumes using type test value.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI cl::opt< bool > EnableKnowledgeRetention
static cl::opt< bool > RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden, cl::desc("Run the NewGVN pass"))
static cl::opt< bool > DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden, cl::desc("Disable pre-instrumentation inliner"))
static cl::opt< bool > EnableJumpTableToSwitch("enable-jump-table-to-switch", cl::desc("Enable JumpTableToSwitch pass (default = off)"))
static cl::opt< bool > PerformMandatoryInliningsFirst("mandatory-inlining-first", cl::init(false), cl::Hidden, cl::desc("Perform mandatory inlinings module-wide, before performing " "inlining"))
static cl::opt< bool > RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Run Partial inlining pass"))
static cl::opt< bool > EnableGVNSink("enable-gvn-sink", cl::desc("Enable the GVN sinking pass (default = off)"))
static cl::opt< bool > EnableLoopHeaderDuplication("enable-loop-header-duplication", cl::init(false), cl::Hidden, cl::desc("Enable loop header duplication at any optimization level"))
static cl::opt< bool > EnableModuleInliner("enable-module-inliner", cl::init(false), cl::Hidden, cl::desc("Enable module inliner"))
static cl::opt< bool > EnableEagerlyInvalidateAnalyses("eagerly-invalidate-analyses", cl::init(true), cl::Hidden, cl::desc("Eagerly invalidate more analyses in default pipelines"))
static cl::opt< bool > EnableMatrix("enable-matrix", cl::init(false), cl::Hidden, cl::desc("Enable lowering of the matrix intrinsics"))
ModuleToFunctionPassAdaptor createModuleToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
cl::opt< std::string > UseCtxProfile("use-ctx-profile", cl::init(""), cl::Hidden, cl::desc("Use the specified contextual profile file"))
static cl::opt< bool > EnableSampledInstr("enable-sampled-instrumentation", cl::init(false), cl::Hidden, cl::desc("Enable profile instrumentation sampling (default = off)"))
static cl::opt< bool > EnableLoopFlatten("enable-loop-flatten", cl::init(false), cl::Hidden, cl::desc("Enable the LoopFlatten Pass"))
static cl::opt< InliningAdvisorMode > UseInlineAdvisor("enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden, cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"), cl::values(clEnumValN(InliningAdvisorMode::Default, "default", "Heuristics-based inliner version"), clEnumValN(InliningAdvisorMode::Development, "development", "Use development mode (runtime-loadable model)"), clEnumValN(InliningAdvisorMode::Release, "release", "Use release mode (AOT-compiled model)")))
PassManager< LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &, CGSCCUpdateResult & > CGSCCPassManager
The CGSCC pass manager.
static cl::opt< bool > EnableUnrollAndJam("enable-unroll-and-jam", cl::init(false), cl::Hidden, cl::desc("Enable Unroll And Jam Pass"))
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition Pass.h:77
@ FullLTOPreLink
Full LTO prelink phase.
Definition Pass.h:85
@ ThinLTOPostLink
ThinLTO postlink (backend compile) phase.
Definition Pass.h:83
@ None
No LTO/ThinLTO behavior needed.
Definition Pass.h:79
@ FullLTOPostLink
Full LTO postlink (backend compile) phase.
Definition Pass.h:87
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
Definition Pass.h:81
PassManager< Loop, LoopAnalysisManager, LoopStandardAnalysisResults &, LPMUpdater & > LoopPassManager
The Loop pass manager.
static cl::opt< bool > EnableConstraintElimination("enable-constraint-elimination", cl::init(true), cl::Hidden, cl::desc("Enable pass to eliminate conditions based on linear constraints"))
ModuleToPostOrderCGSCCPassAdaptor createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT &&Pass)
A function to deduce a function pass type and wrap it in the templated adaptor.
static cl::opt< bool > EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true), cl::Hidden, cl::desc("Enable inline deferral during PGO"))
Flag to enable inline deferral during PGO.
FunctionToLoopPassAdaptor createFunctionToLoopPassAdaptor(LoopPassT &&Pass, bool UseMemorySSA=false)
A function to deduce a loop pass type and wrap it in the templated adaptor.
CGSCCToFunctionPassAdaptor createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false, bool NoRerun=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
cl::opt< bool > ForgetSCEVInLoopUnroll
PassManager< Module > ModulePassManager
Convenience typedef for a pass manager over modules.
static cl::opt< bool > EnablePostPGOLoopRotation("enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden, cl::desc("Run the loop rotation transformation after PGO instrumentation"))
LLVM_ABI bool AreStatisticsEnabled()
Check if statistics are enabled.
static cl::opt< std::string > InstrumentColdFuncOnlyPath("instrument-cold-function-only-path", cl::init(""), cl::desc("File path for cold function only instrumentation(requires use " "with --pgo-instrument-cold-function-only)"), cl::Hidden)
static cl::opt< bool > EnableGlobalAnalyses("enable-global-analyses", cl::init(true), cl::Hidden, cl::desc("Enable inter-procedural analyses"))
static cl::opt< bool > EnableDFAJumpThreading("enable-dfa-jump-thread", cl::desc("Enable DFA jump threading"), cl::init(false), cl::Hidden)
static cl::opt< bool > FlattenedProfileUsed("flattened-profile-used", cl::init(false), cl::Hidden, cl::desc("Indicate the sample profile being used is flattened, i.e., " "no inline hierarchy exists in the profile"))
static cl::opt< bool > ExtraVectorizerPasses("extra-vectorizer-passes", cl::init(false), cl::Hidden, cl::desc("Run cleanup optimization passes after vectorization"))
static cl::opt< bool > EnableHotColdSplit("hot-cold-split", cl::desc("Enable hot-cold splitting pass"))
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
PassManager< Function > FunctionPassManager
Convenience typedef for a pass manager over functions.
LLVM_ABI InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
cl::opt< bool > PGOInstrumentColdFunctionOnly
static cl::opt< bool > EnableLoopInterchange("enable-loopinterchange", cl::init(false), cl::Hidden, cl::desc("Enable the LoopInterchange Pass"))
static cl::opt< bool > EnableCHR("enable-chr", cl::init(true), cl::Hidden, cl::desc("Enable control height reduction optimization (CHR)"))
static cl::opt< bool > EnableMergeFunctions("enable-merge-functions", cl::init(false), cl::Hidden, cl::desc("Enable function merging as part of the optimization pipeline"))
static cl::opt< bool > EnableDevirtualizeSpeculatively("enable-devirtualize-speculatively", cl::desc("Enable speculative devirtualization optimization"), cl::init(false))
static cl::opt< bool > EnableGVNHoist("enable-gvn-hoist", cl::desc("Enable the GVN hoisting pass (default = off)"))
cl::opt< unsigned > SetLicmMssaNoAccForPromotionCap
static cl::opt< bool > EnableIROutliner("ir-outliner", cl::init(false), cl::Hidden, cl::desc("Enable ir outliner pass"))
static cl::opt< AttributorRunOption > AttributorRun("attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE), cl::desc("Enable the attributor inter-procedural deduction pass"), cl::values(clEnumValN(AttributorRunOption::ALL, "all", "enable all attributor runs"), clEnumValN(AttributorRunOption::MODULE, "module", "enable module-wide attributor runs"), clEnumValN(AttributorRunOption::CGSCC, "cgscc", "enable call graph SCC attributor runs"), clEnumValN(AttributorRunOption::NONE, "none", "disable attributor runs")))
static cl::opt< int > PreInlineThreshold("preinline-threshold", cl::Hidden, cl::init(75), cl::desc("Control the amount of inlining in pre-instrumentation inliner " "(default = 75)"))
static cl::opt< bool > UseLoopVersioningLICM("enable-loop-versioning-licm", cl::init(false), cl::Hidden, cl::desc("Enable the experimental Loop Versioning LICM pass"))
cl::opt< unsigned > MaxDevirtIterations("max-devirt-iterations", cl::ReallyHidden, cl::init(4))
cl::opt< unsigned > SetLicmMssaOptCap
A DCE pass that assumes instructions are dead until proven otherwise.
Definition ADCE.h:31
Pass to convert @llvm.global.annotations to !annotation metadata.
This pass attempts to minimize the number of assume without loosing any information.
Hoist/decompose integer division and remainder instructions to enable CFG improvements and better cod...
Definition DivRemPairs.h:23
A simple and fast domtree-based CSE pass.
Definition EarlyCSE.h:31
Pass which forces specific function attributes into the IR, primarily as a debugging tool.
A simple and fast domtree-based GVN pass to hoist common expressions from sibling branches.
Definition GVN.h:415
Uses an "inverted" value numbering to decide the similarity of expressions and sinks similar expressi...
Definition GVN.h:422
A set of parameters to control various transforms performed by IPSCCP pass.
Definition SCCP.h:35
A pass which infers function attributes from the names and signatures of function declarations in a m...
Provides context on when an inline advisor is constructed in the pipeline (e.g., link phase,...
Thresholds to tune inline cost analysis.
Definition InlineCost.h:207
std::optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
Definition InlineCost.h:224
int DefaultThreshold
The default threshold to start with for a callee.
Definition InlineCost.h:209
std::optional< bool > EnableDeferral
Indicate whether we should allow inline deferral.
Definition InlineCost.h:237
std::optional< int > HintThreshold
Threshold to use for callees with inline hint.
Definition InlineCost.h:212
Options for the frontend instrumentation based profiling pass.
A no-op pass template which simply forces a specific analysis result to be invalidated.
Pass to forward loads in a loop around the backedge to subsequent iterations.
A set of parameters used to control various transforms performed by the LoopUnroll pass.
The LoopVectorize Pass.
Computes function attributes in post-order over the call graph.
A utility pass template to force an analysis result to be available.