LLVM 23.0.0git
PassBuilderPipelines.cpp
Go to the documentation of this file.
1//===- Construction of pass pipelines -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// This file provides the implementation of the PassBuilder based on our
11/// static pass registry as well as related functionality. It also provides
12/// helpers to aid in analyzing, debugging, and testing passes and pass
13/// pipelines.
14///
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/Statistic.h"
29#include "llvm/IR/PassManager.h"
30#include "llvm/Pass.h"
153
154using namespace llvm;
155
156namespace llvm {
157
159 "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
160 cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
162 "Heuristics-based inliner version"),
164 "Use development mode (runtime-loadable model)"),
166 "Use release mode (AOT-compiled model)")));
167
168/// Flag to enable inline deferral during PGO.
169static cl::opt<bool>
170 EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
172 cl::desc("Enable inline deferral during PGO"));
173
174static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
175 cl::init(false), cl::Hidden,
176 cl::desc("Enable module inliner"));
177
179 "mandatory-inlining-first", cl::init(false), cl::Hidden,
180 cl::desc("Perform mandatory inlinings module-wide, before performing "
181 "inlining"));
182
184 "eagerly-invalidate-analyses", cl::init(true), cl::Hidden,
185 cl::desc("Eagerly invalidate more analyses in default pipelines"));
186
188 "enable-merge-functions", cl::init(false), cl::Hidden,
189 cl::desc("Enable function merging as part of the optimization pipeline"));
190
192 "enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden,
193 cl::desc("Run the loop rotation transformation after PGO instrumentation"));
194
196 "enable-global-analyses", cl::init(true), cl::Hidden,
197 cl::desc("Enable inter-procedural analyses"));
198
199static cl::opt<bool> RunPartialInlining("enable-partial-inlining",
200 cl::init(false), cl::Hidden,
201 cl::desc("Run Partial inlining pass"));
202
204 "extra-vectorizer-passes", cl::init(false), cl::Hidden,
205 cl::desc("Run cleanup optimization passes after vectorization"));
206
207static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
208 cl::desc("Run the NewGVN pass"));
209
210static cl::opt<bool>
211 EnableLoopInterchange("enable-loopinterchange", cl::init(false), cl::Hidden,
212 cl::desc("Enable the LoopInterchange Pass"));
213
214static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam",
215 cl::init(false), cl::Hidden,
216 cl::desc("Enable Unroll And Jam Pass"));
217
218static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
220 cl::desc("Enable the LoopFlatten Pass"));
221
222// Experimentally allow loop header duplication. This should allow for better
223// optimization at Oz, since loop-idiom recognition can then recognize things
224// like memcpy. If this ends up being useful for many targets, we should drop
225// this flag and make a code generation option that can be controlled
226// independent of the opt level and exposed through the frontend.
228 "enable-loop-header-duplication", cl::init(false), cl::Hidden,
229 cl::desc("Enable loop header duplication at any optimization level"));
230
231static cl::opt<bool>
232 EnableDFAJumpThreading("enable-dfa-jump-thread",
233 cl::desc("Enable DFA jump threading"),
234 cl::init(false), cl::Hidden);
235
236static cl::opt<bool>
237 EnableHotColdSplit("hot-cold-split",
238 cl::desc("Enable hot-cold splitting pass"));
239
240static cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(false),
242 cl::desc("Enable ir outliner pass"));
243
244static cl::opt<bool>
245 DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden,
246 cl::desc("Disable pre-instrumentation inliner"));
247
249 "preinline-threshold", cl::Hidden, cl::init(75),
250 cl::desc("Control the amount of inlining in pre-instrumentation inliner "
251 "(default = 75)"));
252
253static cl::opt<bool>
254 EnableGVNHoist("enable-gvn-hoist",
255 cl::desc("Enable the GVN hoisting pass (default = off)"));
256
257static cl::opt<bool>
258 EnableGVNSink("enable-gvn-sink",
259 cl::desc("Enable the GVN sinking pass (default = off)"));
260
262 "enable-jump-table-to-switch",
263 cl::desc("Enable JumpTableToSwitch pass (default = off)"));
264
265// This option is used in simplifying testing SampleFDO optimizations for
266// profile loading.
267static cl::opt<bool>
268 EnableCHR("enable-chr", cl::init(true), cl::Hidden,
269 cl::desc("Enable control height reduction optimization (CHR)"));
270
272 "flattened-profile-used", cl::init(false), cl::Hidden,
273 cl::desc("Indicate the sample profile being used is flattened, i.e., "
274 "no inline hierarchy exists in the profile"));
275
276static cl::opt<bool>
277 EnableMatrix("enable-matrix", cl::init(false), cl::Hidden,
278 cl::desc("Enable lowering of the matrix intrinsics"));
279
281 "enable-constraint-elimination", cl::init(true), cl::Hidden,
282 cl::desc(
283 "Enable pass to eliminate conditions based on linear constraints"));
284
286 "attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE),
287 cl::desc("Enable the attributor inter-procedural deduction pass"),
289 "enable all full attributor runs"),
291 "enable all attributor-light runs"),
293 "enable module-wide attributor runs"),
295 "enable module-wide attributor-light runs"),
297 "enable call graph SCC attributor runs"),
299 "enable call graph SCC attributor-light runs"),
300 clEnumValN(AttributorRunOption::NONE, "none",
301 "disable attributor runs")));
302
304 "enable-sampled-instrumentation", cl::init(false), cl::Hidden,
305 cl::desc("Enable profile instrumentation sampling (default = off)"));
307 "enable-loop-versioning-licm", cl::init(false), cl::Hidden,
308 cl::desc("Enable the experimental Loop Versioning LICM pass"));
309
311 "instrument-cold-function-only-path", cl::init(""),
312 cl::desc("File path for cold function only instrumentation(requires use "
313 "with --pgo-instrument-cold-function-only)"),
314 cl::Hidden);
315
316// TODO: There is a similar flag in WPD pass, we should consolidate them by
317// parsing the option only once in PassBuilder and share it across both places.
319 "enable-devirtualize-speculatively",
320 cl::desc("Enable speculative devirtualization optimization"),
321 cl::init(false));
322
325
327} // namespace llvm
328
346
347namespace llvm {
349} // namespace llvm
350
352 OptimizationLevel Level) {
353 for (auto &C : PeepholeEPCallbacks)
354 C(FPM, Level);
355}
358 for (auto &C : LateLoopOptimizationsEPCallbacks)
359 C(LPM, Level);
360}
362 OptimizationLevel Level) {
363 for (auto &C : LoopOptimizerEndEPCallbacks)
364 C(LPM, Level);
365}
368 for (auto &C : ScalarOptimizerLateEPCallbacks)
369 C(FPM, Level);
370}
372 OptimizationLevel Level) {
373 for (auto &C : CGSCCOptimizerLateEPCallbacks)
374 C(CGPM, Level);
375}
377 OptimizationLevel Level) {
378 for (auto &C : VectorizerStartEPCallbacks)
379 C(FPM, Level);
380}
382 OptimizationLevel Level) {
383 for (auto &C : VectorizerEndEPCallbacks)
384 C(FPM, Level);
385}
387 OptimizationLevel Level,
389 for (auto &C : OptimizerEarlyEPCallbacks)
390 C(MPM, Level, Phase);
391}
393 OptimizationLevel Level,
395 for (auto &C : OptimizerLastEPCallbacks)
396 C(MPM, Level, Phase);
397}
400 for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks)
401 C(MPM, Level);
402}
405 for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
406 C(MPM, Level);
407}
409 OptimizationLevel Level) {
410 for (auto &C : PipelineStartEPCallbacks)
411 C(MPM, Level);
412}
415 for (auto &C : PipelineEarlySimplificationEPCallbacks)
416 C(MPM, Level, Phase);
417}
418
419// Helper to add AnnotationRemarksPass.
422 // Count the stats for InstCount and FunctionPropertiesAnalysis
423 if (AreStatisticsEnabled()) {
425 MPM.addPass(
427 }
428}
429
430// Helper to check if the current compilation phase is preparing for LTO
435
436// Helper to check if the current compilation phase is LTO backend
441
442// Helper to wrap conditionally Coro passes.
444 // TODO: Skip passes according to Phase.
445 ModulePassManager CoroPM;
446 CoroPM.addPass(CoroEarlyPass());
447 CGSCCPassManager CGPM;
448 CGPM.addPass(CoroSplitPass());
449 CoroPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
450 CoroPM.addPass(CoroCleanupPass());
451 CoroPM.addPass(GlobalDCEPass());
452 return CoroConditionalWrapper(std::move(CoroPM));
453}
454
455// TODO: Investigate the cost/benefit of tail call elimination on debugging.
457PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
459
461
463 FPM.addPass(CountVisitsPass());
464
465 // Form SSA out of local memory accesses after breaking apart aggregates into
466 // scalars.
467 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
468
469 // Catch trivial redundancies
470 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
471
472 // Hoisting of scalars and load expressions.
473 FPM.addPass(
474 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
475 FPM.addPass(InstCombinePass());
476
477 FPM.addPass(LibCallsShrinkWrapPass());
478
479 invokePeepholeEPCallbacks(FPM, Level);
480
481 FPM.addPass(
482 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
483
484 // Form canonically associated expression trees, and simplify the trees using
485 // basic mathematical properties. For example, this will form (nearly)
486 // minimal multiplication trees.
487 FPM.addPass(ReassociatePass());
488
489 // Add the primary loop simplification pipeline.
490 // FIXME: Currently this is split into two loop pass pipelines because we run
491 // some function passes in between them. These can and should be removed
492 // and/or replaced by scheduling the loop pass equivalents in the correct
493 // positions. But those equivalent passes aren't powerful enough yet.
494 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
495 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
496 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
497 // `LoopInstSimplify`.
498 LoopPassManager LPM1, LPM2;
499
500 // Simplify the loop body. We do this initially to clean up after other loop
501 // passes run, either when iterating on a loop or on inner loops with
502 // implications on the outer loop.
503 LPM1.addPass(LoopInstSimplifyPass());
504 LPM1.addPass(LoopSimplifyCFGPass());
505
506 // Try to remove as much code from the loop header as possible,
507 // to reduce amount of IR that will have to be duplicated. However,
508 // do not perform speculative hoisting the first time as LICM
509 // will destroy metadata that may not need to be destroyed if run
510 // after loop rotation.
511 // TODO: Investigate promotion cap for O1.
512 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
513 /*AllowSpeculation=*/false));
514
515 LPM1.addPass(
516 LoopRotatePass(/*EnableHeaderDuplication=*/true, isLTOPreLink(Phase)));
517 // TODO: Investigate promotion cap for O1.
518 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
519 /*AllowSpeculation=*/true));
520 LPM1.addPass(SimpleLoopUnswitchPass());
522 LPM1.addPass(LoopFlattenPass());
523
524 LPM2.addPass(LoopIdiomRecognizePass());
525 LPM2.addPass(IndVarSimplifyPass());
526
528
529 LPM2.addPass(LoopDeletionPass());
530
531 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
532 // because it changes IR to makes profile annotation in back compile
533 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
534 // attributes so we need to make sure and allow the full unroll pass to pay
535 // attention to it.
536 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
537 PGOOpt->Action != PGOOptions::SampleUse)
538 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
539 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
540 PTO.ForgetAllSCEVInLoopUnroll));
541
543
544 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
545 /*UseMemorySSA=*/true));
546 FPM.addPass(
547 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
548 FPM.addPass(InstCombinePass());
549 // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
550 // *All* loop passes must preserve it, in order to be able to use it.
551 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
552 /*UseMemorySSA=*/false));
553
554 // Delete small array after loop unroll.
555 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
556
557 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
558 FPM.addPass(MemCpyOptPass());
559
560 // Sparse conditional constant propagation.
561 // FIXME: It isn't clear why we do this *after* loop passes rather than
562 // before...
563 FPM.addPass(SCCPPass());
564
565 // Delete dead bit computations (instcombine runs after to fold away the dead
566 // computations, and then ADCE will run later to exploit any new DCE
567 // opportunities that creates).
568 FPM.addPass(BDCEPass());
569
570 // Run instcombine after redundancy and dead bit elimination to exploit
571 // opportunities opened up by them.
572 FPM.addPass(InstCombinePass());
573 invokePeepholeEPCallbacks(FPM, Level);
574
575 FPM.addPass(CoroElidePass());
576
578
579 // Finally, do an expensive DCE pass to catch all the dead code exposed by
580 // the simplifications and basic cleanup after all the simplifications.
581 // TODO: Investigate if this is too expensive.
582 FPM.addPass(ADCEPass());
583 FPM.addPass(
584 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
585 FPM.addPass(InstCombinePass());
586 invokePeepholeEPCallbacks(FPM, Level);
587
588 return FPM;
589}
590
594 assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
595
596 // The O1 pipeline has a separate pipeline creation function to simplify
597 // construction readability.
598 if (Level.getSpeedupLevel() == 1)
599 return buildO1FunctionSimplificationPipeline(Level, Phase);
600
602
605
606 // Form SSA out of local memory accesses after breaking apart aggregates into
607 // scalars.
609
610 // Catch trivial redundancies
611 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
614
615 // Hoisting of scalars and load expressions.
616 if (EnableGVNHoist)
617 FPM.addPass(GVNHoistPass());
618
619 // Global value numbering based sinking.
620 if (EnableGVNSink) {
621 FPM.addPass(GVNSinkPass());
622 FPM.addPass(
623 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
624 }
625
626 // Speculative execution if the target has divergent branches; otherwise nop.
627 FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
628
629 // Optimize based on known information about branches, and cleanup afterward.
632
633 // Jump table to switch conversion.
638
639 FPM.addPass(
640 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
643
644 if (!Level.isOptimizingForSize())
646
647 invokePeepholeEPCallbacks(FPM, Level);
648
649 // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
650 // using the size value profile. Don't perform this when optimizing for size.
651 if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
652 !Level.isOptimizingForSize())
654
655 FPM.addPass(TailCallElimPass(/*UpdateFunctionEntryCount=*/
656 isInstrumentedPGOUse()));
657 FPM.addPass(
658 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
659
660 // Form canonically associated expression trees, and simplify the trees using
661 // basic mathematical properties. For example, this will form (nearly)
662 // minimal multiplication trees.
664
667
668 // Add the primary loop simplification pipeline.
669 // FIXME: Currently this is split into two loop pass pipelines because we run
670 // some function passes in between them. These can and should be removed
671 // and/or replaced by scheduling the loop pass equivalents in the correct
672 // positions. But those equivalent passes aren't powerful enough yet.
673 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
674 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
675 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
676 // `LoopInstSimplify`.
677 LoopPassManager LPM1, LPM2;
678
679 // Simplify the loop body. We do this initially to clean up after other loop
680 // passes run, either when iterating on a loop or on inner loops with
681 // implications on the outer loop.
682 LPM1.addPass(LoopInstSimplifyPass());
683 LPM1.addPass(LoopSimplifyCFGPass());
684
685 // Try to remove as much code from the loop header as possible,
686 // to reduce amount of IR that will have to be duplicated. However,
687 // do not perform speculative hoisting the first time as LICM
688 // will destroy metadata that may not need to be destroyed if run
689 // after loop rotation.
690 // TODO: Investigate promotion cap for O1.
691 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
692 /*AllowSpeculation=*/false));
693
694 // Disable header duplication in loop rotation at -Oz.
696 Level != OptimizationLevel::Oz,
698 // TODO: Investigate promotion cap for O1.
699 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
700 /*AllowSpeculation=*/true));
701 LPM1.addPass(
702 SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3));
704 LPM1.addPass(LoopFlattenPass());
705
706 LPM2.addPass(LoopIdiomRecognizePass());
707 LPM2.addPass(IndVarSimplifyPass());
708
709 {
711 ExtraPasses.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
713 LPM2.addPass(std::move(ExtraPasses));
714 }
715
717
718 LPM2.addPass(LoopDeletionPass());
719
720 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
721 // because it changes IR to makes profile annotation in back compile
722 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
723 // attributes so we need to make sure and allow the full unroll pass to pay
724 // attention to it.
725 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
726 PGOOpt->Action != PGOOptions::SampleUse)
727 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
728 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
729 PTO.ForgetAllSCEVInLoopUnroll));
730
732
733 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
734 /*UseMemorySSA=*/true));
735 FPM.addPass(
736 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
738 // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
739 // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
740 // *All* loop passes must preserve it, in order to be able to use it.
741 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
742 /*UseMemorySSA=*/false));
743
744 // Delete small array after loop unroll.
746
747 // Try vectorization/scalarization transforms that are both improvements
748 // themselves and can allow further folds with GVN and InstCombine.
749 FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
750
751 // Eliminate redundancies.
753 if (RunNewGVN)
754 FPM.addPass(NewGVNPass());
755 else
756 FPM.addPass(GVNPass());
757
758 // Sparse conditional constant propagation.
759 // FIXME: It isn't clear why we do this *after* loop passes rather than
760 // before...
761 FPM.addPass(SCCPPass());
762
763 // Delete dead bit computations (instcombine runs after to fold away the dead
764 // computations, and then ADCE will run later to exploit any new DCE
765 // opportunities that creates).
766 FPM.addPass(BDCEPass());
767
768 // Run instcombine after redundancy and dead bit elimination to exploit
769 // opportunities opened up by them.
771 invokePeepholeEPCallbacks(FPM, Level);
772
773 // Re-consider control flow based optimizations after redundancy elimination,
774 // redo DCE, etc.
777
780
781 // Finally, do an expensive DCE pass to catch all the dead code exposed by
782 // the simplifications and basic cleanup after all the simplifications.
783 // TODO: Investigate if this is too expensive.
784 FPM.addPass(ADCEPass());
785
786 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
787 FPM.addPass(MemCpyOptPass());
788
789 FPM.addPass(DSEPass());
791
793 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
794 /*AllowSpeculation=*/true),
795 /*UseMemorySSA=*/true));
796
797 FPM.addPass(CoroElidePass());
798
800
802 .convertSwitchRangeToICmp(true)
803 .convertSwitchToArithmetic(true)
804 .hoistCommonInsts(true)
805 .sinkCommonInsts(true)));
807 invokePeepholeEPCallbacks(FPM, Level);
808
809 return FPM;
810}
811
812void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
815}
816
817void PassBuilder::addPreInlinerPasses(ModulePassManager &MPM,
818 OptimizationLevel Level,
819 ThinOrFullLTOPhase LTOPhase) {
820 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
822 return;
823 InlineParams IP;
824
826
827 // FIXME: The hint threshold has the same value used by the regular inliner
828 // when not optimzing for size. This should probably be lowered after
829 // performance testing.
830 // FIXME: this comment is cargo culted from the old pass manager, revisit).
831 IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
833 IP, /* MandatoryFirst */ true,
835 CGSCCPassManager &CGPipeline = MIWP.getPM();
836
838 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
839 FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
840 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
841 true))); // Merge & remove basic blocks.
842 FPM.addPass(InstCombinePass()); // Combine silly sequences.
843 invokePeepholeEPCallbacks(FPM, Level);
844
845 CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
846 std::move(FPM), PTO.EagerlyInvalidateAnalyses));
847
848 MPM.addPass(std::move(MIWP));
849
850 // Delete anything that is now dead to make sure that we don't instrument
851 // dead code. Instrumentation can end up keeping dead code around and
852 // dramatically increase code size.
853 MPM.addPass(GlobalDCEPass());
854}
855
856void PassBuilder::addPostPGOLoopRotation(ModulePassManager &MPM,
857 OptimizationLevel Level) {
859 // Disable header duplication in loop rotation at -Oz.
862 LoopRotatePass(EnableLoopHeaderDuplication ||
863 Level != OptimizationLevel::Oz),
864 /*UseMemorySSA=*/false),
865 PTO.EagerlyInvalidateAnalyses));
866 }
867}
868
869void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
870 OptimizationLevel Level, bool RunProfileGen,
871 bool IsCS, bool AtomicCounterUpdate,
872 std::string ProfileFile,
873 std::string ProfileRemappingFile) {
874 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
875
876 if (!RunProfileGen) {
877 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
878 MPM.addPass(
879 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
880 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
881 // RequireAnalysisPass for PSI before subsequent non-module passes.
882 MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
883 return;
884 }
885
886 // Perform PGO instrumentation.
887 MPM.addPass(PGOInstrumentationGen(IsCS ? PGOInstrumentationType::CSFDO
889
890 addPostPGOLoopRotation(MPM, Level);
891 // Add the profile lowering pass.
892 InstrProfOptions Options;
893 if (!ProfileFile.empty())
894 Options.InstrProfileOutput = ProfileFile;
895 // Do counter promotion at Level greater than O0.
896 Options.DoCounterPromotion = true;
897 Options.UseBFIInPromotion = IsCS;
898 if (EnableSampledInstr) {
899 Options.Sampling = true;
900 // With sampling, there is little beneifit to enable counter promotion.
901 // But note that sampling does work with counter promotion.
902 Options.DoCounterPromotion = false;
903 }
904 Options.Atomic = AtomicCounterUpdate;
905 MPM.addPass(InstrProfilingLoweringPass(Options, IsCS));
906}
907
909 bool RunProfileGen, bool IsCS,
910 bool AtomicCounterUpdate,
911 std::string ProfileFile,
912 std::string ProfileRemappingFile) {
913 if (!RunProfileGen) {
914 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
915 MPM.addPass(
916 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
917 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
918 // RequireAnalysisPass for PSI before subsequent non-module passes.
920 return;
921 }
922
923 // Perform PGO instrumentation.
926 // Add the profile lowering pass.
928 if (!ProfileFile.empty())
929 Options.InstrProfileOutput = ProfileFile;
930 // Do not do counter promotion at O0.
931 Options.DoCounterPromotion = false;
932 Options.UseBFIInPromotion = IsCS;
933 Options.Atomic = AtomicCounterUpdate;
935}
936
938 return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
939}
940
944 InlineParams IP;
945 if (PTO.InlinerThreshold == -1)
946 IP = getInlineParamsFromOptLevel(Level);
947 else
948 IP = getInlineParams(PTO.InlinerThreshold);
949 // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO,
950 // set hot-caller threshold to 0 to disable hot
951 // callsite inline (as much as possible [1]) because it makes
952 // profile annotation in the backend inaccurate.
953 //
954 // [1] Note the cost of a function could be below zero due to erased
955 // prologue / epilogue.
956 if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
958
959 if (PGOOpt)
961
965
966 // Require the GlobalsAA analysis for the module so we can query it within
967 // the CGSCC pipeline.
969 MIWP.addModulePass(RequireAnalysisPass<GlobalsAA, Module>());
970 // Invalidate AAManager so it can be recreated and pick up the newly
971 // available GlobalsAA.
972 MIWP.addModulePass(
974 }
975
976 // Require the ProfileSummaryAnalysis for the module so we can query it within
977 // the inliner pass.
979
980 // Now begin the main postorder CGSCC pipeline.
981 // FIXME: The current CGSCC pipeline has its origins in the legacy pass
982 // manager and trying to emulate its precise behavior. Much of this doesn't
983 // make a lot of sense and we should revisit the core CGSCC structure.
984 CGSCCPassManager &MainCGPipeline = MIWP.getPM();
985
986 // Note: historically, the PruneEH pass was run first to deduce nounwind and
987 // generally clean up exception handling overhead. It isn't clear this is
988 // valuable as the inliner doesn't currently care whether it is inlining an
989 // invoke or a call.
990
992 MainCGPipeline.addPass(AttributorCGSCCPass());
994 MainCGPipeline.addPass(AttributorLightCGSCCPass());
995
996 // Deduce function attributes. We do another run of this after the function
997 // simplification pipeline, so this only needs to run when it could affect the
998 // function simplification pipeline, which is only the case with recursive
999 // functions.
1000 MainCGPipeline.addPass(PostOrderFunctionAttrsPass(/*SkipNonRecursive*/ true));
1001
1002 // When at O3 add argument promotion to the pass pipeline.
1003 // FIXME: It isn't at all clear why this should be limited to O3.
1004 if (Level == OptimizationLevel::O3)
1005 MainCGPipeline.addPass(ArgumentPromotionPass());
1006
1007 // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
1008 // there are no OpenMP runtime calls present in the module.
1009 if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
1010 MainCGPipeline.addPass(OpenMPOptCGSCCPass(Phase));
1011
1012 invokeCGSCCOptimizerLateEPCallbacks(MainCGPipeline, Level);
1013
1014 // Add the core function simplification pipeline nested inside the
1015 // CGSCC walk.
1018 PTO.EagerlyInvalidateAnalyses, /*NoRerun=*/true));
1019
1020 // Finally, deduce any function attributes based on the fully simplified
1021 // function.
1022 MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
1023
1024 // Mark that the function is fully simplified and that it shouldn't be
1025 // simplified again if we somehow revisit it due to CGSCC mutations unless
1026 // it's been modified since.
1029
1031 MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
1032 MainCGPipeline.addPass(CoroAnnotationElidePass());
1033 }
1034
1035 // Make sure we don't affect potential future NoRerun CGSCC adaptors.
1036 MIWP.addLateModulePass(createModuleToFunctionPassAdaptor(
1038
1039 return MIWP;
1040}
1041
1046
1048 // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO,
1049 // set hot-caller threshold to 0 to disable hot
1050 // callsite inline (as much as possible [1]) because it makes
1051 // profile annotation in the backend inaccurate.
1052 //
1053 // [1] Note the cost of a function could be below zero due to erased
1054 // prologue / epilogue.
1055 if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1056 IP.HotCallSiteThreshold = 0;
1057
1058 if (PGOOpt)
1060
1061 // The inline deferral logic is used to avoid losing some
1062 // inlining chance in future. It is helpful in SCC inliner, in which
1063 // inlining is processed in bottom-up order.
1064 // While in module inliner, the inlining order is a priority-based order
1065 // by default. The inline deferral is unnecessary there. So we disable the
1066 // inline deferral logic in module inliner.
1067 IP.EnableDeferral = false;
1068
1071 MPM.addPass(GlobalOptPass());
1072 MPM.addPass(GlobalDCEPass());
1073 MPM.addPass(PGOCtxProfFlatteningPass(/*IsPreThinlink=*/false));
1074 }
1075
1078 PTO.EagerlyInvalidateAnalyses));
1079
1083 MPM.addPass(
1085 }
1086
1087 return MPM;
1088}
1089
1093 assert(Level != OptimizationLevel::O0 &&
1094 "Should not be used for O0 pipeline");
1095
1097 "FullLTOPostLink shouldn't call buildModuleSimplificationPipeline!");
1098
1100
1101 // Place pseudo probe instrumentation as the first pass of the pipeline to
1102 // minimize the impact of optimization changes.
1103 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1106
1107 bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
1108
1109 // In ThinLTO mode, when flattened profile is used, all the available
1110 // profile information will be annotated in PreLink phase so there is
1111 // no need to load the profile again in PostLink.
1112 bool LoadSampleProfile =
1113 HasSampleProfile &&
1115
1116 // During the ThinLTO backend phase we perform early indirect call promotion
1117 // here, before globalopt. Otherwise imported available_externally functions
1118 // look unreferenced and are removed. If we are going to load the sample
1119 // profile then defer until later.
1120 // TODO: See if we can move later and consolidate with the location where
1121 // we perform ICP when we are loading a sample profile.
1122 // TODO: We pass HasSampleProfile (whether there was a sample profile file
1123 // passed to the compile) to the SamplePGO flag of ICP. This is used to
1124 // determine whether the new direct calls are annotated with prof metadata.
1125 // Ideally this should be determined from whether the IR is annotated with
1126 // sample profile, and not whether the a sample profile was provided on the
1127 // command line. E.g. for flattened profiles where we will not be reloading
1128 // the sample profile in the ThinLTO backend, we ideally shouldn't have to
1129 // provide the sample profile file.
1130 if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
1131 MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
1132
1133 // Create an early function pass manager to cleanup the output of the
1134 // frontend. Not necessary with LTO post link pipelines since the pre link
1135 // pipeline already cleaned up the frontend output.
1137 // Do basic inference of function attributes from known properties of system
1138 // libraries and other oracles.
1140 MPM.addPass(CoroEarlyPass());
1141
1142 FunctionPassManager EarlyFPM;
1143 EarlyFPM.addPass(EntryExitInstrumenterPass(/*PostInlining=*/false));
1144 // Lower llvm.expect to metadata before attempting transforms.
1145 // Compare/branch metadata may alter the behavior of passes like
1146 // SimplifyCFG.
1148 EarlyFPM.addPass(SimplifyCFGPass());
1150 EarlyFPM.addPass(EarlyCSEPass());
1151 if (Level == OptimizationLevel::O3)
1152 EarlyFPM.addPass(CallSiteSplittingPass());
1154 std::move(EarlyFPM), PTO.EagerlyInvalidateAnalyses));
1155 }
1156
1157 if (LoadSampleProfile) {
1158 // Annotate sample profile right after early FPM to ensure freshness of
1159 // the debug info.
1161 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile, Phase, FS));
1162 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1163 // RequireAnalysisPass for PSI before subsequent non-module passes.
1165 // Do not invoke ICP in the LTOPrelink phase as it makes it hard
1166 // for the profile annotation to be accurate in the LTO backend.
1167 if (!isLTOPreLink(Phase))
1168 // We perform early indirect call promotion here, before globalopt.
1169 // This is important for the ThinLTO backend phase because otherwise
1170 // imported available_externally functions look unreferenced and are
1171 // removed.
1172 MPM.addPass(
1173 PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
1174 }
1175
1176 // Try to perform OpenMP specific optimizations on the module. This is a
1177 // (quick!) no-op if there are no OpenMP runtime calls present in the module.
1179
1181 MPM.addPass(AttributorPass());
1184
1185 // Lower type metadata and the type.test intrinsic in the ThinLTO
1186 // post link pipeline after ICP. This is to enable usage of the type
1187 // tests in ICP sequences.
1189 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1191
1193
1194 // Interprocedural constant propagation now that basic cleanup has occurred
1195 // and prior to optimizing globals.
1196 // FIXME: This position in the pipeline hasn't been carefully considered in
1197 // years, it should be re-analyzed.
1198 MPM.addPass(IPSCCPPass(
1199 IPSCCPOptions(/*AllowFuncSpec=*/
1200 Level != OptimizationLevel::Os &&
1201 Level != OptimizationLevel::Oz &&
1202 !isLTOPreLink(Phase))));
1203
1204 // Attach metadata to indirect call sites indicating the set of functions
1205 // they may target at run-time. This should follow IPSCCP.
1207
1208 // Optimize globals to try and fold them into constants.
1209 MPM.addPass(GlobalOptPass());
1210
1211 // Create a small function pass pipeline to cleanup after all the global
1212 // optimizations.
1213 FunctionPassManager GlobalCleanupPM;
1214 // FIXME: Should this instead by a run of SROA?
1215 GlobalCleanupPM.addPass(PromotePass());
1216 GlobalCleanupPM.addPass(InstCombinePass());
1217 invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
1218 GlobalCleanupPM.addPass(
1219 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1220 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM),
1221 PTO.EagerlyInvalidateAnalyses));
1222
1223 // We already asserted this happens in non-FullLTOPostLink earlier.
1224 const bool IsPreLink = Phase != ThinOrFullLTOPhase::ThinLTOPostLink;
1225 // Enable contextual profiling instrumentation.
1226 const bool IsCtxProfGen =
1228 const bool IsPGOPreLink = !IsCtxProfGen && PGOOpt && IsPreLink;
1229 const bool IsPGOInstrGen =
1230 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRInstr;
1231 const bool IsPGOInstrUse =
1232 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRUse;
1233 const bool IsMemprofUse = IsPGOPreLink && !PGOOpt->MemoryProfile.empty();
1234 // We don't want to mix pgo ctx gen and pgo gen; we also don't currently
1235 // enable ctx profiling from the frontend.
1237 "Enabling both instrumented PGO and contextual instrumentation is not "
1238 "supported.");
1239 const bool IsCtxProfUse =
1241
1242 assert(
1244 "--instrument-cold-function-only-path is provided but "
1245 "--pgo-instrument-cold-function-only is not enabled");
1246 const bool IsColdFuncOnlyInstrGen = PGOInstrumentColdFunctionOnly &&
1247 IsPGOPreLink &&
1249
1250 if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen ||
1251 IsCtxProfUse || IsColdFuncOnlyInstrGen)
1252 addPreInlinerPasses(MPM, Level, Phase);
1253
1254 // Add all the requested passes for instrumentation PGO, if requested.
1255 if (IsPGOInstrGen || IsPGOInstrUse) {
1256 addPGOInstrPasses(MPM, Level,
1257 /*RunProfileGen=*/IsPGOInstrGen,
1258 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate,
1259 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
1260 } else if (IsCtxProfGen || IsCtxProfUse) {
1262 // In pre-link, we just want the instrumented IR. We use the contextual
1263 // profile in the post-thinlink phase.
1264 // The instrumentation will be removed in post-thinlink after IPO.
1265 // FIXME(mtrofin): move AssignGUIDPass if there is agreement to use this
1266 // mechanism for GUIDs.
1267 MPM.addPass(AssignGUIDPass());
1268 if (IsCtxProfUse) {
1269 MPM.addPass(PGOCtxProfFlatteningPass(/*IsPreThinlink=*/true));
1270 return MPM;
1271 }
1272 // Block further inlining in the instrumented ctxprof case. This avoids
1273 // confusingly collecting profiles for the same GUID corresponding to
1274 // different variants of the function. We could do like PGO and identify
1275 // functions by a (GUID, Hash) tuple, but since the ctxprof "use" waits for
1276 // thinlto to happen before performing any further optimizations, it's
1277 // unnecessary to collect profiles for non-prevailing copies.
1279 addPostPGOLoopRotation(MPM, Level);
1281 } else if (IsColdFuncOnlyInstrGen) {
1282 addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true, /* IsCS */ false,
1283 /* AtomicCounterUpdate */ false,
1285 /* ProfileRemappingFile */ "");
1286 }
1287
1288 if (IsPGOInstrGen || IsPGOInstrUse || IsCtxProfGen)
1289 MPM.addPass(PGOIndirectCallPromotion(false, false));
1290
1291 if (IsPGOPreLink && PGOOpt->CSAction == PGOOptions::CSIRInstr)
1292 MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile,
1294
1295 if (IsMemprofUse)
1296 MPM.addPass(MemProfUsePass(PGOOpt->MemoryProfile, FS));
1297
1298 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRUse ||
1299 PGOOpt->Action == PGOOptions::SampleUse))
1300 MPM.addPass(PGOForceFunctionAttrsPass(PGOOpt->ColdOptType));
1301
1302 MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/true));
1303
1306 else
1307 MPM.addPass(buildInlinerPipeline(Level, Phase));
1308
1309 // Remove any dead arguments exposed by cleanups, constant folding globals,
1310 // and argument promotion.
1312
1315
1317 MPM.addPass(CoroCleanupPass());
1318
1319 // Optimize globals now that functions are fully simplified.
1320 MPM.addPass(GlobalOptPass());
1321 MPM.addPass(GlobalDCEPass());
1322
1323 return MPM;
1324}
1325
1326/// TODO: Should LTO cause any differences to this set of passes?
1327void PassBuilder::addVectorPasses(OptimizationLevel Level,
1329 ThinOrFullLTOPhase LTOPhase) {
1330 const bool IsFullLTO = LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink;
1331
1334
1335 // Drop dereferenceable assumes after vectorization, as they are no longer
1336 // needed and can inhibit further optimization.
1337 if (!isLTOPreLink(LTOPhase))
1338 FPM.addPass(DropUnnecessaryAssumesPass(/*DropDereferenceable=*/true));
1339
1341 if (IsFullLTO) {
1342 // The vectorizer may have significantly shortened a loop body; unroll
1343 // again. Unroll small loops to hide loop backedge latency and saturate any
1344 // parallel execution resources of an out-of-order processor. We also then
1345 // need to clean up redundancies and loop invariant code.
1346 // FIXME: It would be really good to use a loop-integrated instruction
1347 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1348 // across the loop nests.
1349 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1352 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1354 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1357 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1358 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1359 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1360 // NOTE: we are very late in the pipeline, and we don't have any LICM
1361 // or SimplifyCFG passes scheduled after us, that would cleanup
1362 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1364 }
1365
1366 if (!IsFullLTO) {
1367 // Eliminate loads by forwarding stores from the previous iteration to loads
1368 // of the current iteration.
1370 }
1371 // Cleanup after the loop optimization passes.
1372 FPM.addPass(InstCombinePass());
1373
1374 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1375 ExtraFunctionPassManager<ShouldRunExtraVectorPasses> ExtraPasses;
1376 // At higher optimization levels, try to clean up any runtime overlap and
1377 // alignment checks inserted by the vectorizer. We want to track correlated
1378 // runtime checks for two inner loops in the same outer loop, fold any
1379 // common computations, hoist loop-invariant aspects out of any outer loop,
1380 // and unswitch the runtime checks if possible. Once hoisted, we may have
1381 // dead (or speculatable) control flows or more combining opportunities.
1382 ExtraPasses.addPass(EarlyCSEPass());
1383 ExtraPasses.addPass(CorrelatedValuePropagationPass());
1384 ExtraPasses.addPass(InstCombinePass());
1385 LoopPassManager LPM;
1386 LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1387 /*AllowSpeculation=*/true));
1388 LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
1390 ExtraPasses.addPass(
1391 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true));
1392 ExtraPasses.addPass(
1393 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1394 ExtraPasses.addPass(InstCombinePass());
1395 FPM.addPass(std::move(ExtraPasses));
1396 }
1397
1398 // Now that we've formed fast to execute loop structures, we do further
1399 // optimizations. These are run afterward as they might block doing complex
1400 // analyses and transforms such as what are needed for loop vectorization.
1401
1402 // Cleanup after loop vectorization, etc. Simplification passes like CVP and
1403 // GVN, loop transforms, and others have already run, so it's now better to
1404 // convert to more optimized IR using more aggressive simplify CFG options.
1405 // The extra sinking transform can create larger basic blocks, so do this
1406 // before SLP vectorization.
1407 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
1408 .forwardSwitchCondToPhi(true)
1409 .convertSwitchRangeToICmp(true)
1410 .convertSwitchToArithmetic(true)
1411 .convertSwitchToLookupTable(true)
1412 .needCanonicalLoops(false)
1413 .hoistCommonInsts(true)
1414 .sinkCommonInsts(true)));
1415
1416 if (IsFullLTO) {
1417 FPM.addPass(SCCPPass());
1418 FPM.addPass(InstCombinePass());
1419 FPM.addPass(BDCEPass());
1420 }
1421
1422 // Optimize parallel scalar instruction chains into SIMD instructions.
1423 if (PTO.SLPVectorization) {
1424 FPM.addPass(SLPVectorizerPass());
1425 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1426 FPM.addPass(EarlyCSEPass());
1427 }
1428 }
1429 // Enhance/cleanup vector code.
1430 FPM.addPass(VectorCombinePass());
1431
1432 if (!IsFullLTO) {
1433 FPM.addPass(InstCombinePass());
1434 // Unroll small loops to hide loop backedge latency and saturate any
1435 // parallel execution resources of an out-of-order processor. We also then
1436 // need to clean up redundancies and loop invariant code.
1437 // FIXME: It would be really good to use a loop-integrated instruction
1438 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1439 // across the loop nests.
1440 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1441 if (EnableUnrollAndJam && PTO.LoopUnrolling) {
1443 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1444 }
1445 FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
1446 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1447 PTO.ForgetAllSCEVInLoopUnroll)));
1448 FPM.addPass(WarnMissedTransformationsPass());
1449 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1450 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1451 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1452 // NOTE: we are very late in the pipeline, and we don't have any LICM
1453 // or SimplifyCFG passes scheduled after us, that would cleanup
1454 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1455 FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
1456 }
1457
1458 FPM.addPass(InferAlignmentPass());
1459 FPM.addPass(InstCombinePass());
1460
1461 // This is needed for two reasons:
1462 // 1. It works around problems that instcombine introduces, such as sinking
1463 // expensive FP divides into loops containing multiplications using the
1464 // divide result.
1465 // 2. It helps to clean up some loop-invariant code created by the loop
1466 // unroll pass when IsFullLTO=false.
1468 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1469 /*AllowSpeculation=*/true),
1470 /*UseMemorySSA=*/true));
1471
1472 // Now that we've vectorized and unrolled loops, we may have more refined
1473 // alignment information, try to re-derive it here.
1474 FPM.addPass(AlignmentFromAssumptionsPass());
1475}
1476
1479 ThinOrFullLTOPhase LTOPhase) {
1480 const bool LTOPreLink = isLTOPreLink(LTOPhase);
1482
1483 // Run partial inlining pass to partially inline functions that have
1484 // large bodies.
1487
1488 // Remove avail extern fns and globals definitions since we aren't compiling
1489 // an object file for later LTO. For LTO we want to preserve these so they
1490 // are eligible for inlining at link-time. Note if they are unreferenced they
1491 // will be removed by GlobalDCE later, so this only impacts referenced
1492 // available externally globals. Eventually they will be suppressed during
1493 // codegen, but eliminating here enables more opportunity for GlobalDCE as it
1494 // may make globals referenced by available external functions dead and saves
1495 // running remaining passes on the eliminated functions. These should be
1496 // preserved during prelinking for link-time inlining decisions.
1497 if (!LTOPreLink)
1499
1500 // Do RPO function attribute inference across the module to forward-propagate
1501 // attributes where applicable.
1502 // FIXME: Is this really an optimization rather than a canonicalization?
1504
1505 // Do a post inline PGO instrumentation and use pass. This is a context
1506 // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
1507 // cross-module inline has not been done yet. The context sensitive
1508 // instrumentation is after all the inlines are done.
1509 if (!LTOPreLink && PGOOpt) {
1510 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1511 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
1512 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1513 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile);
1514 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1515 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
1516 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1517 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
1518 }
1519
1520 // Re-compute GlobalsAA here prior to function passes. This is particularly
1521 // useful as the above will have inlined, DCE'ed, and function-attr
1522 // propagated everything. We should at this point have a reasonably minimal
1523 // and richly annotated call graph. By computing aliasing and mod/ref
1524 // information for all local globals here, the late loop passes and notably
1525 // the vectorizer will be able to use them to help recognize vectorizable
1526 // memory operations.
1529
1530 invokeOptimizerEarlyEPCallbacks(MPM, Level, LTOPhase);
1531
1532 FunctionPassManager OptimizePM;
1533
1534 // Only drop unnecessary assumes post-inline and post-link, as otherwise
1535 // additional uses of the affected value may be introduced through inlining
1536 // and CSE.
1537 if (!isLTOPreLink(LTOPhase))
1538 OptimizePM.addPass(DropUnnecessaryAssumesPass());
1539
1540 // Scheduling LoopVersioningLICM when inlining is over, because after that
1541 // we may see more accurate aliasing. Reason to run this late is that too
1542 // early versioning may prevent further inlining due to increase of code
1543 // size. Other optimizations which runs later might get benefit of no-alias
1544 // assumption in clone loop.
1546 OptimizePM.addPass(
1548 // LoopVersioningLICM pass might increase new LICM opportunities.
1550 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1551 /*AllowSpeculation=*/true),
1552 /*USeMemorySSA=*/true));
1553 }
1554
1555 OptimizePM.addPass(Float2IntPass());
1557
1558 if (EnableMatrix) {
1559 OptimizePM.addPass(LowerMatrixIntrinsicsPass());
1560 OptimizePM.addPass(EarlyCSEPass());
1561 }
1562
1563 // CHR pass should only be applied with the profile information.
1564 // The check is to check the profile summary information in CHR.
1565 if (EnableCHR && Level == OptimizationLevel::O3)
1566 OptimizePM.addPass(ControlHeightReductionPass());
1567
1568 // FIXME: We need to run some loop optimizations to re-rotate loops after
1569 // simplifycfg and others undo their rotation.
1570
1571 // Optimize the loop execution. These passes operate on entire loop nests
1572 // rather than on each loop in an inside-out manner, and so they are actually
1573 // function passes.
1574
1575 invokeVectorizerStartEPCallbacks(OptimizePM, Level);
1576
1577 LoopPassManager LPM;
1578 // First rotate loops that may have been un-rotated by prior passes.
1579 // Disable header duplication at -Oz.
1581 Level != OptimizationLevel::Oz,
1582 LTOPreLink));
1583 // Some loops may have become dead by now. Try to delete them.
1584 // FIXME: see discussion in https://reviews.llvm.org/D112851,
1585 // this may need to be revisited once we run GVN before loop deletion
1586 // in the simplification pipeline.
1587 LPM.addPass(LoopDeletionPass());
1588
1589 if (PTO.LoopInterchange)
1590 LPM.addPass(LoopInterchangePass());
1591
1592 OptimizePM.addPass(
1593 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/false));
1594
1595 // FIXME: This may not be the right place in the pipeline.
1596 // We need to have the data to support the right place.
1597 if (PTO.LoopFusion)
1598 OptimizePM.addPass(LoopFusePass());
1599
1600 // Distribute loops to allow partial vectorization. I.e. isolate dependences
1601 // into separate loop that would otherwise inhibit vectorization. This is
1602 // currently only performed for loops marked with the metadata
1603 // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1604 OptimizePM.addPass(LoopDistributePass());
1605
1606 // Populates the VFABI attribute with the scalar-to-vector mappings
1607 // from the TargetLibraryInfo.
1608 OptimizePM.addPass(InjectTLIMappings());
1609
1610 addVectorPasses(Level, OptimizePM, LTOPhase);
1611
1612 invokeVectorizerEndEPCallbacks(OptimizePM, Level);
1613
1614 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
1615 // canonicalization pass that enables other optimizations. As a result,
1616 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1617 // result too early.
1618 OptimizePM.addPass(LoopSinkPass());
1619
1620 // And finally clean up LCSSA form before generating code.
1621 OptimizePM.addPass(InstSimplifyPass());
1622
1623 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
1624 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
1625 // flattening of blocks.
1626 OptimizePM.addPass(DivRemPairsPass());
1627
1628 // Try to annotate calls that were created during optimization.
1629 OptimizePM.addPass(
1630 TailCallElimPass(/*UpdateFunctionEntryCount=*/isInstrumentedPGOUse()));
1631
1632 // LoopSink (and other loop passes since the last simplifyCFG) might have
1633 // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
1634 OptimizePM.addPass(
1636 .convertSwitchRangeToICmp(true)
1637 .convertSwitchToArithmetic(true)
1638 .speculateUnpredictables(true)
1639 .hoistLoadsStoresWithCondFaulting(true)));
1640
1641 // Add the core optimizing pipeline.
1642 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
1643 PTO.EagerlyInvalidateAnalyses));
1644
1645 // AllocToken transforms heap allocation calls; this needs to run late after
1646 // other allocation call transformations (such as those in InstCombine).
1647 if (!LTOPreLink)
1648 MPM.addPass(AllocTokenPass());
1649
1650 invokeOptimizerLastEPCallbacks(MPM, Level, LTOPhase);
1651
1652 // Split out cold code. Splitting is done late to avoid hiding context from
1653 // other optimizations and inadvertently regressing performance. The tradeoff
1654 // is that this has a higher code size cost than splitting early.
1655 if (EnableHotColdSplit && !LTOPreLink)
1657
1658 // Search the code for similar regions of code. If enough similar regions can
1659 // be found where extracting the regions into their own function will decrease
1660 // the size of the program, we extract the regions, a deduplicate the
1661 // structurally similar regions.
1662 if (EnableIROutliner)
1663 MPM.addPass(IROutlinerPass());
1664
1665 // Now we need to do some global optimization transforms.
1666 // FIXME: It would seem like these should come first in the optimization
1667 // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
1668 // ordering here.
1669 MPM.addPass(GlobalDCEPass());
1671
1672 // Merge functions if requested. It has a better chance to merge functions
1673 // after ConstantMerge folded jump tables.
1674 if (PTO.MergeFunctions)
1676
1677 if (PTO.CallGraphProfile && !LTOPreLink)
1678 MPM.addPass(CGProfilePass(isLTOPostLink(LTOPhase)));
1679
1680 // RelLookupTableConverterPass runs later in LTO post-link pipeline.
1681 if (!LTOPreLink)
1683
1684 // Add devirtualization pass only when LTO is not enabled, as otherwise
1685 // the pass is already enabled in the LTO pipeline.
1686 if (PTO.DevirtualizeSpeculatively && LTOPhase == ThinOrFullLTOPhase::None) {
1687 // TODO: explore a better pipeline configuration that can improve
1688 // compilation time overhead.
1690 /*ExportSummary*/ nullptr,
1691 /*ImportSummary*/ nullptr,
1692 /*DevirtSpeculatively*/ PTO.DevirtualizeSpeculatively));
1693 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1695 // Given that the devirtualization creates more opportunities for inlining,
1696 // we run the Inliner again here to maximize the optimization gain we
1697 // get from devirtualization.
1698 // Also, we can't run devirtualization before inlining because the
1699 // devirtualization depends on the passes optimizing/eliminating vtable GVs
1700 // and those passes are only effective after inlining.
1701 if (EnableModuleInliner) {
1705 } else {
1708 /* MandatoryFirst */ true,
1710 }
1711 }
1712 return MPM;
1713}
1714
1718 if (Level == OptimizationLevel::O0)
1719 return buildO0DefaultPipeline(Level, Phase);
1720
1722
1723 // Currently this pipeline is only invoked in an LTO pre link pass or when we
1724 // are not running LTO. If that changes the below checks may need updating.
1726
1727 // If we are invoking this in non-LTO mode, remove any MemProf related
1728 // attributes and metadata, as we don't know whether we are linking with
1729 // a library containing the necessary interfaces.
1732
1733 // Convert @llvm.global.annotations to !annotation metadata.
1735
1736 // Force any function attributes we want the rest of the pipeline to observe.
1738
1739 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1741
1742 // Apply module pipeline start EP callback.
1744
1745 // Add the core simplification pipeline.
1747
1748 // Now add the optimization pipeline.
1750
1751 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1752 PGOOpt->Action == PGOOptions::SampleUse)
1754
1755 // Emit annotation remarks.
1757
1758 if (isLTOPreLink(Phase))
1759 addRequiredLTOPreLinkPasses(MPM);
1760 return MPM;
1761}
1762
1765 bool EmitSummary) {
1767 if (ThinLTO)
1769 else
1771 MPM.addPass(EmbedBitcodePass(ThinLTO, EmitSummary));
1772
1773 // Perform any cleanups to the IR that aren't suitable for per TU compilation,
1774 // like removing CFI/WPD related instructions. Note, we reuse
1775 // LowerTypeTestsPass to clean up type tests rather than duplicate that logic
1776 // in FatLtoCleanup.
1777 MPM.addPass(FatLtoCleanup());
1778
1779 // If we're doing FatLTO w/ CFI enabled, we don't want the type tests in the
1780 // object code, only in the bitcode section, so drop it before we run
1781 // module optimization and generate machine code. If llvm.type.test() isn't in
1782 // the IR, this won't do anything.
1783 MPM.addPass(
1785
1786 // Use the ThinLTO post-link pipeline with sample profiling
1787 if (ThinLTO && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1788 MPM.addPass(buildThinLTODefaultPipeline(Level, /*ImportSummary=*/nullptr));
1789 else {
1790 // ModuleSimplification does not run the coroutine passes for
1791 // ThinLTOPreLink, so we need the coroutine passes to run for ThinLTO
1792 // builds, otherwise they will miscompile.
1793 if (ThinLTO) {
1794 // TODO: replace w/ buildCoroWrapper() when it takes phase and level into
1795 // consideration.
1796 CGSCCPassManager CGPM;
1800 MPM.addPass(CoroCleanupPass());
1801 }
1802
1803 // otherwise, just use module optimization
1804 MPM.addPass(
1806 // Emit annotation remarks.
1808 }
1809 return MPM;
1810}
1811
1814 if (Level == OptimizationLevel::O0)
1816
1818
1819 // Convert @llvm.global.annotations to !annotation metadata.
1821
1822 // Force any function attributes we want the rest of the pipeline to observe.
1824
1825 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1827
1828 // Apply module pipeline start EP callback.
1830
1831 // If we are planning to perform ThinLTO later, we don't bloat the code with
1832 // unrolling/vectorization/... now. Just simplify the module as much as we
1833 // can.
1836 // In pre-link, for ctx prof use, we stop here with an instrumented IR. We let
1837 // thinlto use the contextual info to perform imports; then use the contextual
1838 // profile in the post-thinlink phase.
1839 if (!UseCtxProfile.empty()) {
1840 addRequiredLTOPreLinkPasses(MPM);
1841 return MPM;
1842 }
1843
1844 // Run partial inlining pass to partially inline functions that have
1845 // large bodies.
1846 // FIXME: It isn't clear whether this is really the right place to run this
1847 // in ThinLTO. Because there is another canonicalization and simplification
1848 // phase that will run after the thin link, running this here ends up with
1849 // less information than will be available later and it may grow functions in
1850 // ways that aren't beneficial.
1853
1854 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1855 PGOOpt->Action == PGOOptions::SampleUse)
1857
1858 // Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual
1859 // optimization is going to be done in PostLink stage, but clang can't add
1860 // callbacks there in case of in-process ThinLTO called by linker.
1865
1866 // Emit annotation remarks.
1868
1869 addRequiredLTOPreLinkPasses(MPM);
1870
1871 return MPM;
1872}
1873
1875 OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
1877
1878 // If we are invoking this without a summary index noting that we are linking
1879 // with a library containing the necessary APIs, remove any MemProf related
1880 // attributes and metadata.
1881 if (!ImportSummary || !ImportSummary->withSupportsHotColdNew())
1883
1884 if (ImportSummary) {
1885 // For ThinLTO we must apply the context disambiguation decisions early, to
1886 // ensure we can correctly match the callsites to summary data.
1889 ImportSummary, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1890
1891 // These passes import type identifier resolutions for whole-program
1892 // devirtualization and CFI. They must run early because other passes may
1893 // disturb the specific instruction patterns that these passes look for,
1894 // creating dependencies on resolutions that may not appear in the summary.
1895 //
1896 // For example, GVN may transform the pattern assume(type.test) appearing in
1897 // two basic blocks into assume(phi(type.test, type.test)), which would
1898 // transform a dependency on a WPD resolution into a dependency on a type
1899 // identifier resolution for CFI.
1900 //
1901 // Also, WPD has access to more precise information than ICP and can
1902 // devirtualize more effectively, so it should operate on the IR first.
1903 //
1904 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1905 // metadata and intrinsics.
1906 MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
1907 MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
1908 }
1909
1910 if (Level == OptimizationLevel::O0) {
1911 // Run a second time to clean up any type tests left behind by WPD for use
1912 // in ICP.
1913 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1916
1917 // AllocToken transforms heap allocation calls; this needs to run late after
1918 // other allocation call transformations (such as those in InstCombine).
1919 MPM.addPass(AllocTokenPass());
1920
1921 // Drop available_externally and unreferenced globals. This is necessary
1922 // with ThinLTO in order to avoid leaving undefined references to dead
1923 // globals in the object file.
1925 MPM.addPass(GlobalDCEPass());
1926 return MPM;
1927 }
1928 if (!UseCtxProfile.empty()) {
1929 MPM.addPass(
1931 } else {
1932 // Add the core simplification pipeline.
1935 }
1936 // Now add the optimization pipeline.
1939
1940 // Emit annotation remarks.
1942
1943 return MPM;
1944}
1945
1948 // FIXME: We should use a customized pre-link pipeline!
1949 return buildPerModuleDefaultPipeline(Level,
1951}
1952
1955 ModuleSummaryIndex *ExportSummary) {
1957
1959
1960 // If we are invoking this without a summary index noting that we are linking
1961 // with a library containing the necessary APIs, remove any MemProf related
1962 // attributes and metadata.
1963 if (!ExportSummary || !ExportSummary->withSupportsHotColdNew())
1965
1966 // Create a function that performs CFI checks for cross-DSO calls with targets
1967 // in the current module.
1968 MPM.addPass(CrossDSOCFIPass());
1969
1970 if (Level == OptimizationLevel::O0) {
1971 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1972 // metadata and intrinsics.
1973 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1974 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1975 // Run a second time to clean up any type tests left behind by WPD for use
1976 // in ICP.
1977 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1979
1981
1982 // AllocToken transforms heap allocation calls; this needs to run late after
1983 // other allocation call transformations (such as those in InstCombine).
1984 MPM.addPass(AllocTokenPass());
1985
1987
1988 // Emit annotation remarks.
1990
1991 return MPM;
1992 }
1993
1994 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
1995 // Load sample profile before running the LTO optimization pipeline.
1996 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1997 PGOOpt->ProfileRemappingFile,
1999 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
2000 // RequireAnalysisPass for PSI before subsequent non-module passes.
2002 }
2003
2004 // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
2006
2007 // Remove unused virtual tables to improve the quality of code generated by
2008 // whole-program devirtualization and bitset lowering.
2009 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2010
2011 // Do basic inference of function attributes from known properties of system
2012 // libraries and other oracles.
2014
2015 if (Level.getSpeedupLevel() > 1) {
2017 CallSiteSplittingPass(), PTO.EagerlyInvalidateAnalyses));
2018
2019 // Indirect call promotion. This should promote all the targets that are
2020 // left by the earlier promotion pass that promotes intra-module targets.
2021 // This two-step promotion is to save the compile time. For LTO, it should
2022 // produce the same result as if we only do promotion here.
2024 true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
2025
2026 // Promoting by-reference arguments to by-value exposes more constants to
2027 // IPSCCP.
2028 CGSCCPassManager CGPM;
2031 CGPM.addPass(
2034
2035 // Propagate constants at call sites into the functions they call. This
2036 // opens opportunities for globalopt (and inlining) by substituting function
2037 // pointers passed as arguments to direct uses of functions.
2038 MPM.addPass(IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/
2039 Level != OptimizationLevel::Os &&
2040 Level != OptimizationLevel::Oz)));
2041
2042 // Attach metadata to indirect call sites indicating the set of functions
2043 // they may target at run-time. This should follow IPSCCP.
2045 }
2046
2047 // Do RPO function attribute inference across the module to forward-propagate
2048 // attributes where applicable.
2049 // FIXME: Is this really an optimization rather than a canonicalization?
2051
2052 // Use in-range annotations on GEP indices to split globals where beneficial.
2053 MPM.addPass(GlobalSplitPass());
2054
2055 // Run whole program optimization of virtual call when the list of callees
2056 // is fixed.
2057 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
2058
2060 // Stop here at -O1.
2061 if (Level == OptimizationLevel::O1) {
2062 // The LowerTypeTestsPass needs to run to lower type metadata and the
2063 // type.test intrinsics. The pass does nothing if CFI is disabled.
2064 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
2065 // Run a second time to clean up any type tests left behind by WPD for use
2066 // in ICP (which is performed earlier than this in the regular LTO
2067 // pipeline).
2068 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
2070
2072
2073 // AllocToken transforms heap allocation calls; this needs to run late after
2074 // other allocation call transformations (such as those in InstCombine).
2075 MPM.addPass(AllocTokenPass());
2076
2078
2079 // Emit annotation remarks.
2081
2082 return MPM;
2083 }
2084
2085 // TODO: Skip to match buildCoroWrapper.
2086 MPM.addPass(CoroEarlyPass());
2087
2088 // Optimize globals to try and fold them into constants.
2089 MPM.addPass(GlobalOptPass());
2090
2091 // Promote any localized globals to SSA registers.
2093
2094 // Linking modules together can lead to duplicate global constant, only
2095 // keep one copy of each constant.
2097
2098 // Remove unused arguments from functions.
2100
2101 // Reduce the code after globalopt and ipsccp. Both can open up significant
2102 // simplification opportunities, and both can propagate functions through
2103 // function pointers. When this happens, we often have to resolve varargs
2104 // calls, etc, so let instcombine do this.
2105 FunctionPassManager PeepholeFPM;
2106 PeepholeFPM.addPass(InstCombinePass());
2107 if (Level.getSpeedupLevel() > 1)
2108 PeepholeFPM.addPass(AggressiveInstCombinePass());
2109 invokePeepholeEPCallbacks(PeepholeFPM, Level);
2110
2111 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM),
2112 PTO.EagerlyInvalidateAnalyses));
2113
2114 // Lower variadic functions for supported targets prior to inlining.
2116
2117 // Note: historically, the PruneEH pass was run first to deduce nounwind and
2118 // generally clean up exception handling overhead. It isn't clear this is
2119 // valuable as the inliner doesn't currently care whether it is inlining an
2120 // invoke or a call.
2121 // Run the inliner now.
2122 if (EnableModuleInliner) {
2126 } else {
2129 /* MandatoryFirst */ true,
2132 }
2133
2134 // Perform context disambiguation after inlining, since that would reduce the
2135 // amount of additional cloning required to distinguish the allocation
2136 // contexts.
2139 /*Summary=*/nullptr,
2140 PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
2141
2142 // Optimize globals again after we ran the inliner.
2143 MPM.addPass(GlobalOptPass());
2144
2145 // Run the OpenMPOpt pass again after global optimizations.
2147
2148 // Garbage collect dead functions.
2149 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2150
2151 // If we didn't decide to inline a function, check to see if we can
2152 // transform it to pass arguments by value instead of by reference.
2153 CGSCCPassManager CGPM;
2158
2160 // The IPO Passes may leave cruft around. Clean up after them.
2161 FPM.addPass(InstCombinePass());
2162 invokePeepholeEPCallbacks(FPM, Level);
2163
2166
2168
2169 // Do a post inline PGO instrumentation and use pass. This is a context
2170 // sensitive PGO pass.
2171 if (PGOOpt) {
2172 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
2173 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
2174 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
2175 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile);
2176 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
2177 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
2178 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
2179 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
2180 }
2181
2182 // Break up allocas
2184
2185 // LTO provides additional opportunities for tailcall elimination due to
2186 // link-time inlining, and visibility of nocapture attribute.
2187 FPM.addPass(
2188 TailCallElimPass(/*UpdateFunctionEntryCount=*/isInstrumentedPGOUse()));
2189
2190 // Run a few AA driver optimizations here and now to cleanup the code.
2191 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM),
2192 PTO.EagerlyInvalidateAnalyses));
2193
2194 MPM.addPass(
2196
2197 // Require the GlobalsAA analysis for the module so we can query it within
2198 // MainFPM.
2201 // Invalidate AAManager so it can be recreated and pick up the newly
2202 // available GlobalsAA.
2203 MPM.addPass(
2205 }
2206
2207 FunctionPassManager MainFPM;
2209 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
2210 /*AllowSpeculation=*/true),
2211 /*USeMemorySSA=*/true));
2212
2213 if (RunNewGVN)
2214 MainFPM.addPass(NewGVNPass());
2215 else
2216 MainFPM.addPass(GVNPass());
2217
2218 // Remove dead memcpy()'s.
2219 MainFPM.addPass(MemCpyOptPass());
2220
2221 // Nuke dead stores.
2222 MainFPM.addPass(DSEPass());
2223 MainFPM.addPass(MoveAutoInitPass());
2225
2226 invokeVectorizerStartEPCallbacks(MainFPM, Level);
2227
2228 LoopPassManager LPM;
2229 if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
2230 LPM.addPass(LoopFlattenPass());
2231 LPM.addPass(IndVarSimplifyPass());
2232 LPM.addPass(LoopDeletionPass());
2233 // FIXME: Add loop interchange.
2234
2235 // Unroll small loops and perform peeling.
2236 LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
2237 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
2238 PTO.ForgetAllSCEVInLoopUnroll));
2239 // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
2240 // *All* loop passes must preserve it, in order to be able to use it.
2241 MainFPM.addPass(
2242 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/false));
2243
2244 MainFPM.addPass(LoopDistributePass());
2245
2246 addVectorPasses(Level, MainFPM, ThinOrFullLTOPhase::FullLTOPostLink);
2247
2248 invokeVectorizerEndEPCallbacks(MainFPM, Level);
2249
2250 // Run the OpenMPOpt CGSCC pass again late.
2253
2254 invokePeepholeEPCallbacks(MainFPM, Level);
2255 MainFPM.addPass(JumpThreadingPass());
2256 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM),
2257 PTO.EagerlyInvalidateAnalyses));
2258
2259 // Lower type metadata and the type.test intrinsic. This pass supports
2260 // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
2261 // to be run at link time if CFI is enabled. This pass does nothing if
2262 // CFI is disabled.
2263 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
2264 // Run a second time to clean up any type tests left behind by WPD for use
2265 // in ICP (which is performed earlier than this in the regular LTO pipeline).
2266 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
2268
2269 // Enable splitting late in the FullLTO post-link pipeline.
2272
2273 // Add late LTO optimization passes.
2274 FunctionPassManager LateFPM;
2275
2276 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
2277 // canonicalization pass that enables other optimizations. As a result,
2278 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
2279 // result too early.
2280 LateFPM.addPass(LoopSinkPass());
2281
2282 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
2283 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
2284 // flattening of blocks.
2285 LateFPM.addPass(DivRemPairsPass());
2286
2287 // Delete basic blocks, which optimization passes may have killed.
2289 .convertSwitchRangeToICmp(true)
2290 .convertSwitchToArithmetic(true)
2291 .hoistCommonInsts(true)
2292 .speculateUnpredictables(true)));
2293 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(LateFPM)));
2294
2295 // Drop bodies of available eternally objects to improve GlobalDCE.
2297
2298 // Now that we have optimized the program, discard unreachable functions.
2299 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2300
2301 if (PTO.MergeFunctions)
2303
2305
2306 if (PTO.CallGraphProfile)
2307 MPM.addPass(CGProfilePass(/*InLTOPostLink=*/true));
2308
2309 MPM.addPass(CoroCleanupPass());
2310
2311 // AllocToken transforms heap allocation calls; this needs to run late after
2312 // other allocation call transformations (such as those in InstCombine).
2313 MPM.addPass(AllocTokenPass());
2314
2316
2317 // Emit annotation remarks.
2319
2320 return MPM;
2321}
2322
2326 assert(Level == OptimizationLevel::O0 &&
2327 "buildO0DefaultPipeline should only be used with O0");
2328
2330
2331 // Perform pseudo probe instrumentation in O0 mode. This is for the
2332 // consistency between different build modes. For example, a LTO build can be
2333 // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
2334 // the postlink will require pseudo probe instrumentation in the prelink.
2335 if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
2337
2338 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
2339 PGOOpt->Action == PGOOptions::IRUse))
2341 MPM,
2342 /*RunProfileGen=*/(PGOOpt->Action == PGOOptions::IRInstr),
2343 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate, PGOOpt->ProfileFile,
2344 PGOOpt->ProfileRemappingFile);
2345
2346 // Instrument function entry and exit before all inlining.
2348 EntryExitInstrumenterPass(/*PostInlining=*/false)));
2349
2351
2352 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
2354
2355 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
2356 // Explicitly disable sample loader inlining and use flattened profile in O0
2357 // pipeline.
2358 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
2359 PGOOpt->ProfileRemappingFile,
2361 /*DisableSampleProfileInlining=*/true,
2362 /*UseFlattenedProfile=*/true));
2363 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
2364 // RequireAnalysisPass for PSI before subsequent non-module passes.
2366 }
2367
2369
2370 // Build a minimal pipeline based on the semantics required by LLVM,
2371 // which is just that always inlining occurs. Further, disable generating
2372 // lifetime intrinsics to avoid enabling further optimizations during
2373 // code generation.
2375 /*InsertLifetimeIntrinsics=*/false));
2376
2377 if (PTO.MergeFunctions)
2379
2380 if (EnableMatrix)
2381 MPM.addPass(
2383
2384 if (!CGSCCOptimizerLateEPCallbacks.empty()) {
2385 CGSCCPassManager CGPM;
2387 if (!CGPM.isEmpty())
2389 }
2390 if (!LateLoopOptimizationsEPCallbacks.empty()) {
2391 LoopPassManager LPM;
2393 if (!LPM.isEmpty()) {
2395 createFunctionToLoopPassAdaptor(std::move(LPM))));
2396 }
2397 }
2398 if (!LoopOptimizerEndEPCallbacks.empty()) {
2399 LoopPassManager LPM;
2401 if (!LPM.isEmpty()) {
2403 createFunctionToLoopPassAdaptor(std::move(LPM))));
2404 }
2405 }
2406 if (!ScalarOptimizerLateEPCallbacks.empty()) {
2409 if (!FPM.isEmpty())
2410 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2411 }
2412
2414
2415 if (!VectorizerStartEPCallbacks.empty()) {
2418 if (!FPM.isEmpty())
2419 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2420 }
2421
2422 if (!VectorizerEndEPCallbacks.empty()) {
2425 if (!FPM.isEmpty())
2426 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2427 }
2428
2430
2431 // AllocToken transforms heap allocation calls; this needs to run late after
2432 // other allocation call transformations (such as those in InstCombine).
2433 if (!isLTOPreLink(Phase))
2434 MPM.addPass(AllocTokenPass());
2435
2437
2438 if (isLTOPreLink(Phase))
2439 addRequiredLTOPreLinkPasses(MPM);
2440
2441 // Emit annotation remarks.
2443
2444 return MPM;
2445}
2446
2448 AAManager AA;
2449
2450 // The order in which these are registered determines their priority when
2451 // being queried.
2452
2453 // Add any target-specific alias analyses that should be run early.
2454 if (TM)
2455 TM->registerEarlyDefaultAliasAnalyses(AA);
2456
2457 // First we register the basic alias analysis that provides the majority of
2458 // per-function local AA logic. This is a stateless, on-demand local set of
2459 // AA techniques.
2460 AA.registerFunctionAnalysis<BasicAA>();
2461
2462 // Next we query fast, specialized alias analyses that wrap IR-embedded
2463 // information about aliasing.
2464 AA.registerFunctionAnalysis<ScopedNoAliasAA>();
2465 AA.registerFunctionAnalysis<TypeBasedAA>();
2466
2467 // Add support for querying global aliasing information when available.
2468 // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
2469 // analysis, all that the `AAManager` can do is query for any *cached*
2470 // results from `GlobalsAA` through a readonly proxy.
2472 AA.registerModuleAnalysis<GlobalsAA>();
2473
2474 // Add target-specific alias analyses.
2475 if (TM)
2476 TM->registerDefaultAliasAnalyses(AA);
2477
2478 return AA;
2479}
2480
2481bool PassBuilder::isInstrumentedPGOUse() const {
2482 return (PGOOpt && PGOOpt->Action == PGOOptions::IRUse) ||
2483 !UseCtxProfile.empty();
2484}
aarch64 falkor hwpf fix Falkor HW Prefetch Fix Late Phase
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AggressiveInstCombiner - Combine expression patterns to form expressions with fewer,...
Provides passes to inlining "always_inline" functions.
This is the interface for LLVM's primary stateless and local alias analysis.
This file provides the interface for LLVM's Call Graph Profile pass.
This header provides classes for managing passes over SCCs of the call graph.
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file provides the interface for a simple, fast CSE pass.
This file provides a pass which clones the current module and runs the provided pass pipeline on the ...
This file provides a pass manager that only runs its passes if the provided marker analysis has been ...
Super simple passes to force specific function attrs from the commandline into the IR for debugging p...
Provides passes for computing function attributes based on interprocedural analyses.
This file provides the interface for LLVM's Global Value Numbering pass which eliminates fully redund...
This is the interface for a simple mod/ref and alias analysis over globals.
AcceleratorCodeSelection - Identify all functions reachable from a kernel, removing those that are un...
This header defines various interfaces for pass management in LLVM.
Interfaces for passes which infer implicit function attributes from the name and signature of functio...
This file provides the primary interface to the instcombine pass.
Defines passes for running instruction simplification across chunks of IR.
This file provides the interface for LLVM's PGO Instrumentation lowering pass.
See the comments on JumpThreadingPass.
static LVOptions Options
Definition LVOptions.cpp:25
This file implements the Loop Fusion pass.
This header defines the LoopLoadEliminationPass object.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
The header file for the LowerConstantIntrinsics pass as used by the new pass manager.
The header file for the LowerExpectIntrinsic pass as used by the new pass manager.
This pass performs merges of loads and stores on both sides of a.
This file provides the interface for LLVM's Global Value Numbering pass.
This header enumerates the LLVM-provided high-level optimization levels.
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
Define option tunables for PGO.
static void addAnnotationRemarksPass(ModulePassManager &MPM)
static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level)
static CoroConditionalWrapper buildCoroWrapper(ThinOrFullLTOPhase Phase)
static bool isLTOPreLink(ThinOrFullLTOPhase Phase)
static bool isLTOPostLink(ThinOrFullLTOPhase Phase)
This file implements relative lookup table converter that converts lookup tables to relative lookup t...
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
This file provides the interface for the pseudo probe implementation for AutoFDO.
This file provides the interface for the sampled PGO loader pass.
This is the interface for a metadata-based scoped no-alias analysis.
This file provides the interface for the pass responsible for both simplifying and canonicalizing the...
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
This is the interface for a metadata-based TBAA.
Defines the virtual file system interface vfs::FileSystem.
A manager for alias analyses.
A module pass that rewrites heap allocations to use token-enabled allocation functions based on vario...
Definition AllocToken.h:36
Inlines functions marked as "always_inline".
Argument promotion pass.
Assign a GUID to functions as metadata.
Analysis pass providing a never-invalidated alias analysis result.
Simple pass that canonicalizes aliases.
A pass that merges duplicate global constants into a single constant.
This class implements a trivial dead store elimination.
Eliminate dead arguments (and return values) from functions.
A pass that transforms external global definitions into declarations.
Pass embeds a copy of the module optimized with the provided pass pipeline into a global variable.
A pass manager to run a set of extra loop passes if the MarkerTy analysis is present.
The core GVN pass object.
Definition GVN.h:128
Pass to remove unused function declarations.
Definition GlobalDCE.h:38
Optimize globals that never have their address taken.
Definition GlobalOpt.h:25
Pass to perform split of global variables.
Definition GlobalSplit.h:26
Analysis pass providing a never-invalidated alias analysis result.
Pass to outline cold regions.
Pass to perform interprocedural constant propagation.
Definition SCCP.h:48
Pass to outline similar regions.
Definition IROutliner.h:467
Run instruction simplification across each instruction in the function.
Instrumentation based profiling lowering pass.
This pass performs 'jump threading', which looks at blocks that have multiple predecessors and multip...
Performs Loop Invariant Code Motion Pass.
Definition LICM.h:66
Loop unroll pass that only does full loop unrolling and peeling.
Performs Loop Idiom Recognize Pass.
Performs Loop Inst Simplify Pass.
A simple loop rotation transformation.
Performs basic CFG simplifications to assist other loop passes.
A pass that does profile-guided sinking of instructions into loops.
Definition LoopSink.h:33
A simple loop rotation transformation.
Loop unroll pass that will support both full and partial unrolling.
Strips MemProf attributes and metadata.
Merge identical functions.
The module inliner pass for the new pass manager.
Module pass, wrapping the inliner pass.
Definition Inliner.h:65
void addModulePass(T Pass)
Add a module pass that runs before the CGSCC passes.
Definition Inliner.h:81
Class to hold module path string table and global value map, and encapsulate methods for operating on...
Simple pass that provides a name to every anonymous globals.
Additional 'norecurse' attribute deduction during postlink LTO phase.
OpenMP optimizations pass.
Definition OpenMPOpt.h:42
static LLVM_ABI const OptimizationLevel O3
Optimize for fast execution as much as possible.
static LLVM_ABI const OptimizationLevel Oz
A very specialized mode that will optimize for code size at any and all costs.
static LLVM_ABI const OptimizationLevel O0
Disable as many optimizations as possible.
static LLVM_ABI const OptimizationLevel Os
Similar to O2 but tries to optimize for small code size instead of fast execution without triggering ...
static LLVM_ABI const OptimizationLevel O2
Optimize for fast execution as much as possible without triggering significant incremental compile ti...
static LLVM_ABI const OptimizationLevel O1
Optimize quickly without destroying debuggability.
The indirect function call promotion pass.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The profile annotation (profile-instr-use) pass for IR based PGO.
The profile size based optimization pass for memory intrinsics.
Pass to remove unused function declarations.
LLVM_ABI void invokeFullLinkTimeOptimizationLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
LLVM_ABI ModuleInlinerWrapperPass buildInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining as well as the inlining-driven cleanups.
LLVM_ABI void invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
LLVM_ABI void invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI AAManager buildDefaultAAPipeline()
Build the default AAManager with the default alias analysis pipeline registered.
LLVM_ABI void invokeCGSCCOptimizerLateEPCallbacks(CGSCCPassManager &CGPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, ThinLTO-targeting default optimization pipeline to a pass manager.
LLVM_ABI void addPGOInstrPassesForO0(ModulePassManager &MPM, bool RunProfileGen, bool IsCS, bool AtomicCounterUpdate, std::string ProfileFile, std::string ProfileRemappingFile)
Add PGOInstrumenation passes for O0 only.
LLVM_ABI void invokeScalarOptimizerLateEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase=ThinOrFullLTOPhase::None)
Build a per-module default optimization pipeline.
LLVM_ABI void invokePipelineStartEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
LLVM_ABI void invokeVectorizerEndEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildO0DefaultPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase=ThinOrFullLTOPhase::None)
Build an O0 pipeline with the minimal semantically required passes.
LLVM_ABI FunctionPassManager buildFunctionSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM function canonicalization and simplification pipeline.
LLVM_ABI void invokePeepholeEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI void invokePipelineEarlySimplificationEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
LLVM_ABI void invokeLoopOptimizerEndEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildLTODefaultPipeline(OptimizationLevel Level, ModuleSummaryIndex *ExportSummary)
Build an LTO default optimization pipeline to a pass manager.
LLVM_ABI ModulePassManager buildModuleInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining with module inliner pass.
LLVM_ABI ModulePassManager buildThinLTODefaultPipeline(OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary)
Build a ThinLTO default optimization pipeline to a pass manager.
LLVM_ABI void invokeLateLoopOptimizationsEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
LLVM_ABI void invokeFullLinkTimeOptimizationEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO, bool EmitSummary)
Build a fat object default optimization pipeline.
LLVM_ABI ModulePassManager buildModuleSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM module canonicalization and simplification pipeline.
LLVM_ABI ModulePassManager buildModuleOptimizationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase LTOPhase)
Construct the core LLVM module optimization pipeline.
LLVM_ABI void invokeOptimizerLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
LLVM_ABI ModulePassManager buildLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, LTO-targeting default optimization pipeline to a pass manager.
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t<!std::is_same_v< PassT, PassManager > > addPass(PassT &&Pass)
bool isEmpty() const
Returns if the pass manager contains any passes.
unsigned LicmMssaNoAccForPromotionCap
Tuning option to disable promotion to scalars in LICM with MemorySSA, if the number of access is too ...
Definition PassBuilder.h:78
bool SLPVectorization
Tuning option to enable/disable slp loop vectorization, set based on opt level.
Definition PassBuilder.h:56
int InlinerThreshold
Tuning option to override the default inliner threshold.
Definition PassBuilder.h:92
bool LoopFusion
Tuning option to enable/disable loop fusion. Its default value is false.
Definition PassBuilder.h:66
bool CallGraphProfile
Tuning option to enable/disable call graph profile.
Definition PassBuilder.h:82
bool MergeFunctions
Tuning option to enable/disable function merging.
Definition PassBuilder.h:89
bool ForgetAllSCEVInLoopUnroll
Tuning option to forget all SCEV loops in LoopUnroll.
Definition PassBuilder.h:70
unsigned LicmMssaOptCap
Tuning option to cap the number of calls to retrive clobbering accesses in MemorySSA,...
Definition PassBuilder.h:74
bool LoopInterleaving
Tuning option to set loop interleaving on/off, set based on opt level.
Definition PassBuilder.h:48
LLVM_ABI PipelineTuningOptions()
Constructor sets pipeline tuning defaults based on cl::opts.
bool LoopUnrolling
Tuning option to enable/disable loop unrolling. Its default value is true.
Definition PassBuilder.h:59
bool LoopInterchange
Tuning option to enable/disable loop interchange.
Definition PassBuilder.h:63
bool LoopVectorization
Tuning option to enable/disable loop vectorization, set based on opt level.
Definition PassBuilder.h:52
Reassociate commutative expressions.
Definition Reassociate.h:74
A pass to do RPO deduction and propagation of function attributes.
This pass performs function-level constant propagation and merging.
Definition SCCP.h:30
The sample profiler data loader pass.
Analysis pass providing a never-invalidated alias analysis result.
This pass transforms loops that contain branches or switches on loop- invariant conditions to have mu...
A pass to simplify and canonicalize the CFG of a function.
Definition SimplifyCFG.h:30
Analysis pass providing a never-invalidated alias analysis result.
Optimize scalar/vector interactions in IR using target cost models.
Interfaces for registering analysis passes, producing common pass manager configurations,...
Abstract Attribute helper functions.
Definition Attributor.h:165
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
@ Assume
Do not drop type tests (default).
@ All
Drop only llvm.assumes using type test value.
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
LLVM_ABI cl::opt< bool > EnableKnowledgeRetention
static cl::opt< bool > RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden, cl::desc("Run the NewGVN pass"))
static cl::opt< bool > DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden, cl::desc("Disable pre-instrumentation inliner"))
static cl::opt< bool > EnableJumpTableToSwitch("enable-jump-table-to-switch", cl::desc("Enable JumpTableToSwitch pass (default = off)"))
static cl::opt< bool > PerformMandatoryInliningsFirst("mandatory-inlining-first", cl::init(false), cl::Hidden, cl::desc("Perform mandatory inlinings module-wide, before performing " "inlining"))
static cl::opt< bool > RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Run Partial inlining pass"))
static cl::opt< bool > EnableGVNSink("enable-gvn-sink", cl::desc("Enable the GVN sinking pass (default = off)"))
static cl::opt< bool > EnableLoopHeaderDuplication("enable-loop-header-duplication", cl::init(false), cl::Hidden, cl::desc("Enable loop header duplication at any optimization level"))
static cl::opt< bool > EnableModuleInliner("enable-module-inliner", cl::init(false), cl::Hidden, cl::desc("Enable module inliner"))
static cl::opt< bool > EnableEagerlyInvalidateAnalyses("eagerly-invalidate-analyses", cl::init(true), cl::Hidden, cl::desc("Eagerly invalidate more analyses in default pipelines"))
static cl::opt< bool > EnableMatrix("enable-matrix", cl::init(false), cl::Hidden, cl::desc("Enable lowering of the matrix intrinsics"))
ModuleToFunctionPassAdaptor createModuleToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
cl::opt< std::string > UseCtxProfile("use-ctx-profile", cl::init(""), cl::Hidden, cl::desc("Use the specified contextual profile file"))
static cl::opt< bool > EnableSampledInstr("enable-sampled-instrumentation", cl::init(false), cl::Hidden, cl::desc("Enable profile instrumentation sampling (default = off)"))
static cl::opt< bool > EnableLoopFlatten("enable-loop-flatten", cl::init(false), cl::Hidden, cl::desc("Enable the LoopFlatten Pass"))
static cl::opt< InliningAdvisorMode > UseInlineAdvisor("enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden, cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"), cl::values(clEnumValN(InliningAdvisorMode::Default, "default", "Heuristics-based inliner version"), clEnumValN(InliningAdvisorMode::Development, "development", "Use development mode (runtime-loadable model)"), clEnumValN(InliningAdvisorMode::Release, "release", "Use release mode (AOT-compiled model)")))
PassManager< LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &, CGSCCUpdateResult & > CGSCCPassManager
The CGSCC pass manager.
static cl::opt< bool > EnableUnrollAndJam("enable-unroll-and-jam", cl::init(false), cl::Hidden, cl::desc("Enable Unroll And Jam Pass"))
@ CGSCC_LIGHT
@ MODULE_LIGHT
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition Pass.h:77
@ FullLTOPreLink
Full LTO prelink phase.
Definition Pass.h:85
@ ThinLTOPostLink
ThinLTO postlink (backend compile) phase.
Definition Pass.h:83
@ None
No LTO/ThinLTO behavior needed.
Definition Pass.h:79
@ FullLTOPostLink
Full LTO postlink (backend compile) phase.
Definition Pass.h:87
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
Definition Pass.h:81
PassManager< Loop, LoopAnalysisManager, LoopStandardAnalysisResults &, LPMUpdater & > LoopPassManager
The Loop pass manager.
static cl::opt< bool > EnableConstraintElimination("enable-constraint-elimination", cl::init(true), cl::Hidden, cl::desc("Enable pass to eliminate conditions based on linear constraints"))
ModuleToPostOrderCGSCCPassAdaptor createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT &&Pass)
A function to deduce a function pass type and wrap it in the templated adaptor.
static cl::opt< bool > EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true), cl::Hidden, cl::desc("Enable inline deferral during PGO"))
Flag to enable inline deferral during PGO.
FunctionToLoopPassAdaptor createFunctionToLoopPassAdaptor(LoopPassT &&Pass, bool UseMemorySSA=false)
A function to deduce a loop pass type and wrap it in the templated adaptor.
CGSCCToFunctionPassAdaptor createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false, bool NoRerun=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
cl::opt< bool > ForgetSCEVInLoopUnroll
PassManager< Module > ModulePassManager
Convenience typedef for a pass manager over modules.
static cl::opt< bool > EnablePostPGOLoopRotation("enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden, cl::desc("Run the loop rotation transformation after PGO instrumentation"))
LLVM_ABI bool AreStatisticsEnabled()
Check if statistics are enabled.
static cl::opt< std::string > InstrumentColdFuncOnlyPath("instrument-cold-function-only-path", cl::init(""), cl::desc("File path for cold function only instrumentation(requires use " "with --pgo-instrument-cold-function-only)"), cl::Hidden)
static cl::opt< bool > EnableGlobalAnalyses("enable-global-analyses", cl::init(true), cl::Hidden, cl::desc("Enable inter-procedural analyses"))
static cl::opt< bool > EnableDFAJumpThreading("enable-dfa-jump-thread", cl::desc("Enable DFA jump threading"), cl::init(false), cl::Hidden)
static cl::opt< bool > FlattenedProfileUsed("flattened-profile-used", cl::init(false), cl::Hidden, cl::desc("Indicate the sample profile being used is flattened, i.e., " "no inline hierarchy exists in the profile"))
static cl::opt< AttributorRunOption > AttributorRun("attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE), cl::desc("Enable the attributor inter-procedural deduction pass"), cl::values(clEnumValN(AttributorRunOption::FULL, "full", "enable all full attributor runs"), clEnumValN(AttributorRunOption::LIGHT, "light", "enable all attributor-light runs"), clEnumValN(AttributorRunOption::MODULE, "module", "enable module-wide attributor runs"), clEnumValN(AttributorRunOption::MODULE_LIGHT, "module-light", "enable module-wide attributor-light runs"), clEnumValN(AttributorRunOption::CGSCC, "cgscc", "enable call graph SCC attributor runs"), clEnumValN(AttributorRunOption::CGSCC_LIGHT, "cgscc-light", "enable call graph SCC attributor-light runs"), clEnumValN(AttributorRunOption::NONE, "none", "disable attributor runs")))
static cl::opt< bool > ExtraVectorizerPasses("extra-vectorizer-passes", cl::init(false), cl::Hidden, cl::desc("Run cleanup optimization passes after vectorization"))
static cl::opt< bool > EnableHotColdSplit("hot-cold-split", cl::desc("Enable hot-cold splitting pass"))
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
PassManager< Function > FunctionPassManager
Convenience typedef for a pass manager over functions.
LLVM_ABI InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
cl::opt< bool > PGOInstrumentColdFunctionOnly
static cl::opt< bool > EnableLoopInterchange("enable-loopinterchange", cl::init(false), cl::Hidden, cl::desc("Enable the LoopInterchange Pass"))
static cl::opt< bool > EnableCHR("enable-chr", cl::init(true), cl::Hidden, cl::desc("Enable control height reduction optimization (CHR)"))
static cl::opt< bool > EnableMergeFunctions("enable-merge-functions", cl::init(false), cl::Hidden, cl::desc("Enable function merging as part of the optimization pipeline"))
static cl::opt< bool > EnableDevirtualizeSpeculatively("enable-devirtualize-speculatively", cl::desc("Enable speculative devirtualization optimization"), cl::init(false))
static cl::opt< bool > EnableGVNHoist("enable-gvn-hoist", cl::desc("Enable the GVN hoisting pass (default = off)"))
cl::opt< unsigned > SetLicmMssaNoAccForPromotionCap
static cl::opt< bool > EnableIROutliner("ir-outliner", cl::init(false), cl::Hidden, cl::desc("Enable ir outliner pass"))
static cl::opt< int > PreInlineThreshold("preinline-threshold", cl::Hidden, cl::init(75), cl::desc("Control the amount of inlining in pre-instrumentation inliner " "(default = 75)"))
static cl::opt< bool > UseLoopVersioningLICM("enable-loop-versioning-licm", cl::init(false), cl::Hidden, cl::desc("Enable the experimental Loop Versioning LICM pass"))
cl::opt< unsigned > MaxDevirtIterations("max-devirt-iterations", cl::ReallyHidden, cl::init(4))
cl::opt< unsigned > SetLicmMssaOptCap
A DCE pass that assumes instructions are dead until proven otherwise.
Definition ADCE.h:31
Pass to convert @llvm.global.annotations to !annotation metadata.
This pass attempts to minimize the number of assume without loosing any information.
A more lightweight version of the Attributor which only runs attribute inference but no simplificatio...
A more lightweight version of the Attributor which only runs attribute inference but no simplificatio...
Hoist/decompose integer division and remainder instructions to enable CFG improvements and better cod...
Definition DivRemPairs.h:23
A simple and fast domtree-based CSE pass.
Definition EarlyCSE.h:31
Pass which forces specific function attributes into the IR, primarily as a debugging tool.
Statistics pass for the FunctionPropertiesAnalysis results.
A simple and fast domtree-based GVN pass to hoist common expressions from sibling branches.
Definition GVN.h:415
Uses an "inverted" value numbering to decide the similarity of expressions and sinks similar expressi...
Definition GVN.h:422
A set of parameters to control various transforms performed by IPSCCP pass.
Definition SCCP.h:35
A pass which infers function attributes from the names and signatures of function declarations in a m...
Provides context on when an inline advisor is constructed in the pipeline (e.g., link phase,...
Thresholds to tune inline cost analysis.
Definition InlineCost.h:207
std::optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
Definition InlineCost.h:224
int DefaultThreshold
The default threshold to start with for a callee.
Definition InlineCost.h:209
std::optional< bool > EnableDeferral
Indicate whether we should allow inline deferral.
Definition InlineCost.h:237
std::optional< int > HintThreshold
Threshold to use for callees with inline hint.
Definition InlineCost.h:212
Options for the frontend instrumentation based profiling pass.
A no-op pass template which simply forces a specific analysis result to be invalidated.
Pass to forward loads in a loop around the backedge to subsequent iterations.
A set of parameters used to control various transforms performed by the LoopUnroll pass.
The LoopVectorize Pass.
Computes function attributes in post-order over the call graph.
A utility pass template to force an analysis result to be available.