LLVM 22.0.0git
PassBuilderPipelines.cpp
Go to the documentation of this file.
1//===- Construction of pass pipelines -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// This file provides the implementation of the PassBuilder based on our
11/// static pass registry as well as related functionality. It also provides
12/// helpers to aid in analyzing, debugging, and testing passes and pass
13/// pipelines.
14///
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/Statistic.h"
29#include "llvm/IR/PassManager.h"
30#include "llvm/Pass.h"
153
154using namespace llvm;
155
156namespace llvm {
157
159 "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
160 cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
162 "Heuristics-based inliner version"),
164 "Use development mode (runtime-loadable model)"),
166 "Use release mode (AOT-compiled model)")));
167
168/// Flag to enable inline deferral during PGO.
169static cl::opt<bool>
170 EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
172 cl::desc("Enable inline deferral during PGO"));
173
174static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
175 cl::init(false), cl::Hidden,
176 cl::desc("Enable module inliner"));
177
179 "mandatory-inlining-first", cl::init(false), cl::Hidden,
180 cl::desc("Perform mandatory inlinings module-wide, before performing "
181 "inlining"));
182
184 "eagerly-invalidate-analyses", cl::init(true), cl::Hidden,
185 cl::desc("Eagerly invalidate more analyses in default pipelines"));
186
188 "enable-merge-functions", cl::init(false), cl::Hidden,
189 cl::desc("Enable function merging as part of the optimization pipeline"));
190
192 "enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden,
193 cl::desc("Run the loop rotation transformation after PGO instrumentation"));
194
196 "enable-global-analyses", cl::init(true), cl::Hidden,
197 cl::desc("Enable inter-procedural analyses"));
198
199static cl::opt<bool> RunPartialInlining("enable-partial-inlining",
200 cl::init(false), cl::Hidden,
201 cl::desc("Run Partial inlining pass"));
202
204 "extra-vectorizer-passes", cl::init(false), cl::Hidden,
205 cl::desc("Run cleanup optimization passes after vectorization"));
206
207static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
208 cl::desc("Run the NewGVN pass"));
209
210static cl::opt<bool>
211 EnableLoopInterchange("enable-loopinterchange", cl::init(false), cl::Hidden,
212 cl::desc("Enable the LoopInterchange Pass"));
213
214static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam",
215 cl::init(false), cl::Hidden,
216 cl::desc("Enable Unroll And Jam Pass"));
217
218static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
220 cl::desc("Enable the LoopFlatten Pass"));
221
222// Experimentally allow loop header duplication. This should allow for better
223// optimization at Oz, since loop-idiom recognition can then recognize things
224// like memcpy. If this ends up being useful for many targets, we should drop
225// this flag and make a code generation option that can be controlled
226// independent of the opt level and exposed through the frontend.
228 "enable-loop-header-duplication", cl::init(false), cl::Hidden,
229 cl::desc("Enable loop header duplication at any optimization level"));
230
231static cl::opt<bool>
232 EnableDFAJumpThreading("enable-dfa-jump-thread",
233 cl::desc("Enable DFA jump threading"),
234 cl::init(false), cl::Hidden);
235
236static cl::opt<bool>
237 EnableHotColdSplit("hot-cold-split",
238 cl::desc("Enable hot-cold splitting pass"));
239
240static cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(false),
242 cl::desc("Enable ir outliner pass"));
243
244static cl::opt<bool>
245 DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden,
246 cl::desc("Disable pre-instrumentation inliner"));
247
249 "preinline-threshold", cl::Hidden, cl::init(75),
250 cl::desc("Control the amount of inlining in pre-instrumentation inliner "
251 "(default = 75)"));
252
253static cl::opt<bool>
254 EnableGVNHoist("enable-gvn-hoist",
255 cl::desc("Enable the GVN hoisting pass (default = off)"));
256
257static cl::opt<bool>
258 EnableGVNSink("enable-gvn-sink",
259 cl::desc("Enable the GVN sinking pass (default = off)"));
260
262 "enable-jump-table-to-switch",
263 cl::desc("Enable JumpTableToSwitch pass (default = off)"));
264
265// This option is used in simplifying testing SampleFDO optimizations for
266// profile loading.
267static cl::opt<bool>
268 EnableCHR("enable-chr", cl::init(true), cl::Hidden,
269 cl::desc("Enable control height reduction optimization (CHR)"));
270
272 "flattened-profile-used", cl::init(false), cl::Hidden,
273 cl::desc("Indicate the sample profile being used is flattened, i.e., "
274 "no inline hierarchy exists in the profile"));
275
276static cl::opt<bool>
277 EnableMatrix("enable-matrix", cl::init(false), cl::Hidden,
278 cl::desc("Enable lowering of the matrix intrinsics"));
279
281 "enable-constraint-elimination", cl::init(true), cl::Hidden,
282 cl::desc(
283 "Enable pass to eliminate conditions based on linear constraints"));
284
286 "attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE),
287 cl::desc("Enable the attributor inter-procedural deduction pass"),
289 "enable all attributor runs"),
291 "enable module-wide attributor runs"),
293 "enable call graph SCC attributor runs"),
294 clEnumValN(AttributorRunOption::NONE, "none",
295 "disable attributor runs")));
296
298 "enable-sampled-instrumentation", cl::init(false), cl::Hidden,
299 cl::desc("Enable profile instrumentation sampling (default = off)"));
301 "enable-loop-versioning-licm", cl::init(false), cl::Hidden,
302 cl::desc("Enable the experimental Loop Versioning LICM pass"));
303
305 "instrument-cold-function-only-path", cl::init(""),
306 cl::desc("File path for cold function only instrumentation(requires use "
307 "with --pgo-instrument-cold-function-only)"),
308 cl::Hidden);
309
310// TODO: There is a similar flag in WPD pass, we should consolidate them by
311// parsing the option only once in PassBuilder and share it across both places.
313 "enable-devirtualize-speculatively",
314 cl::desc("Enable speculative devirtualization optimization"),
315 cl::init(false));
316
319
321} // namespace llvm
322
340
341namespace llvm {
343} // namespace llvm
344
346 OptimizationLevel Level) {
347 for (auto &C : PeepholeEPCallbacks)
348 C(FPM, Level);
349}
352 for (auto &C : LateLoopOptimizationsEPCallbacks)
353 C(LPM, Level);
354}
356 OptimizationLevel Level) {
357 for (auto &C : LoopOptimizerEndEPCallbacks)
358 C(LPM, Level);
359}
362 for (auto &C : ScalarOptimizerLateEPCallbacks)
363 C(FPM, Level);
364}
366 OptimizationLevel Level) {
367 for (auto &C : CGSCCOptimizerLateEPCallbacks)
368 C(CGPM, Level);
369}
371 OptimizationLevel Level) {
372 for (auto &C : VectorizerStartEPCallbacks)
373 C(FPM, Level);
374}
376 OptimizationLevel Level) {
377 for (auto &C : VectorizerEndEPCallbacks)
378 C(FPM, Level);
379}
381 OptimizationLevel Level,
383 for (auto &C : OptimizerEarlyEPCallbacks)
384 C(MPM, Level, Phase);
385}
387 OptimizationLevel Level,
389 for (auto &C : OptimizerLastEPCallbacks)
390 C(MPM, Level, Phase);
391}
394 for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks)
395 C(MPM, Level);
396}
399 for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
400 C(MPM, Level);
401}
403 OptimizationLevel Level) {
404 for (auto &C : PipelineStartEPCallbacks)
405 C(MPM, Level);
406}
409 for (auto &C : PipelineEarlySimplificationEPCallbacks)
410 C(MPM, Level, Phase);
411}
412
413// Helper to add AnnotationRemarksPass.
416 // Count the stats for InstCount and FunctionPropertiesAnalysis
417 if (AreStatisticsEnabled()) {
419 MPM.addPass(
421 }
422}
423
424// Helper to check if the current compilation phase is preparing for LTO
429
430// Helper to check if the current compilation phase is LTO backend
435
436// Helper to wrap conditionally Coro passes.
438 // TODO: Skip passes according to Phase.
439 ModulePassManager CoroPM;
440 CoroPM.addPass(CoroEarlyPass());
441 CGSCCPassManager CGPM;
442 CGPM.addPass(CoroSplitPass());
443 CoroPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
444 CoroPM.addPass(CoroCleanupPass());
445 CoroPM.addPass(GlobalDCEPass());
446 return CoroConditionalWrapper(std::move(CoroPM));
447}
448
449// TODO: Investigate the cost/benefit of tail call elimination on debugging.
451PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
453
455
457 FPM.addPass(CountVisitsPass());
458
459 // Form SSA out of local memory accesses after breaking apart aggregates into
460 // scalars.
461 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
462
463 // Catch trivial redundancies
464 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
465
466 // Hoisting of scalars and load expressions.
467 FPM.addPass(
468 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
469 FPM.addPass(InstCombinePass());
470
471 FPM.addPass(LibCallsShrinkWrapPass());
472
473 invokePeepholeEPCallbacks(FPM, Level);
474
475 FPM.addPass(
476 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
477
478 // Form canonically associated expression trees, and simplify the trees using
479 // basic mathematical properties. For example, this will form (nearly)
480 // minimal multiplication trees.
481 FPM.addPass(ReassociatePass());
482
483 // Add the primary loop simplification pipeline.
484 // FIXME: Currently this is split into two loop pass pipelines because we run
485 // some function passes in between them. These can and should be removed
486 // and/or replaced by scheduling the loop pass equivalents in the correct
487 // positions. But those equivalent passes aren't powerful enough yet.
488 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
489 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
490 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
491 // `LoopInstSimplify`.
492 LoopPassManager LPM1, LPM2;
493
494 // Simplify the loop body. We do this initially to clean up after other loop
495 // passes run, either when iterating on a loop or on inner loops with
496 // implications on the outer loop.
497 LPM1.addPass(LoopInstSimplifyPass());
498 LPM1.addPass(LoopSimplifyCFGPass());
499
500 // Try to remove as much code from the loop header as possible,
501 // to reduce amount of IR that will have to be duplicated. However,
502 // do not perform speculative hoisting the first time as LICM
503 // will destroy metadata that may not need to be destroyed if run
504 // after loop rotation.
505 // TODO: Investigate promotion cap for O1.
506 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
507 /*AllowSpeculation=*/false));
508
509 LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true,
511 // TODO: Investigate promotion cap for O1.
512 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
513 /*AllowSpeculation=*/true));
514 LPM1.addPass(SimpleLoopUnswitchPass());
516 LPM1.addPass(LoopFlattenPass());
517
518 LPM2.addPass(LoopIdiomRecognizePass());
519 LPM2.addPass(IndVarSimplifyPass());
520
522
523 LPM2.addPass(LoopDeletionPass());
524
525 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
526 // because it changes IR to makes profile annotation in back compile
527 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
528 // attributes so we need to make sure and allow the full unroll pass to pay
529 // attention to it.
530 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
531 PGOOpt->Action != PGOOptions::SampleUse)
532 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
533 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
534 PTO.ForgetAllSCEVInLoopUnroll));
535
537
538 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
539 /*UseMemorySSA=*/true));
540 FPM.addPass(
541 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
542 FPM.addPass(InstCombinePass());
543 // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
544 // *All* loop passes must preserve it, in order to be able to use it.
545 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
546 /*UseMemorySSA=*/false));
547
548 // Delete small array after loop unroll.
549 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
550
551 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
552 FPM.addPass(MemCpyOptPass());
553
554 // Sparse conditional constant propagation.
555 // FIXME: It isn't clear why we do this *after* loop passes rather than
556 // before...
557 FPM.addPass(SCCPPass());
558
559 // Delete dead bit computations (instcombine runs after to fold away the dead
560 // computations, and then ADCE will run later to exploit any new DCE
561 // opportunities that creates).
562 FPM.addPass(BDCEPass());
563
564 // Run instcombine after redundancy and dead bit elimination to exploit
565 // opportunities opened up by them.
566 FPM.addPass(InstCombinePass());
567 invokePeepholeEPCallbacks(FPM, Level);
568
569 FPM.addPass(CoroElidePass());
570
572
573 // Finally, do an expensive DCE pass to catch all the dead code exposed by
574 // the simplifications and basic cleanup after all the simplifications.
575 // TODO: Investigate if this is too expensive.
576 FPM.addPass(ADCEPass());
577 FPM.addPass(
578 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
579 FPM.addPass(InstCombinePass());
580 invokePeepholeEPCallbacks(FPM, Level);
581
582 return FPM;
583}
584
588 assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
589
590 // The O1 pipeline has a separate pipeline creation function to simplify
591 // construction readability.
592 if (Level.getSpeedupLevel() == 1)
593 return buildO1FunctionSimplificationPipeline(Level, Phase);
594
596
599
600 // Form SSA out of local memory accesses after breaking apart aggregates into
601 // scalars.
603
604 // Catch trivial redundancies
605 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
608
609 // Hoisting of scalars and load expressions.
610 if (EnableGVNHoist)
611 FPM.addPass(GVNHoistPass());
612
613 // Global value numbering based sinking.
614 if (EnableGVNSink) {
615 FPM.addPass(GVNSinkPass());
616 FPM.addPass(
617 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
618 }
619
620 // Speculative execution if the target has divergent branches; otherwise nop.
621 FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
622
623 // Optimize based on known information about branches, and cleanup afterward.
626
627 // Jump table to switch conversion.
632
633 FPM.addPass(
634 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
637
638 if (!Level.isOptimizingForSize())
640
641 invokePeepholeEPCallbacks(FPM, Level);
642
643 // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
644 // using the size value profile. Don't perform this when optimizing for size.
645 if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
646 !Level.isOptimizingForSize())
648
649 FPM.addPass(TailCallElimPass(/*UpdateFunctionEntryCount=*/
650 isInstrumentedPGOUse()));
651 FPM.addPass(
652 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
653
654 // Form canonically associated expression trees, and simplify the trees using
655 // basic mathematical properties. For example, this will form (nearly)
656 // minimal multiplication trees.
658
661
662 // Add the primary loop simplification pipeline.
663 // FIXME: Currently this is split into two loop pass pipelines because we run
664 // some function passes in between them. These can and should be removed
665 // and/or replaced by scheduling the loop pass equivalents in the correct
666 // positions. But those equivalent passes aren't powerful enough yet.
667 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
668 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
669 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
670 // `LoopInstSimplify`.
671 LoopPassManager LPM1, LPM2;
672
673 // Simplify the loop body. We do this initially to clean up after other loop
674 // passes run, either when iterating on a loop or on inner loops with
675 // implications on the outer loop.
676 LPM1.addPass(LoopInstSimplifyPass());
677 LPM1.addPass(LoopSimplifyCFGPass());
678
679 // Try to remove as much code from the loop header as possible,
680 // to reduce amount of IR that will have to be duplicated. However,
681 // do not perform speculative hoisting the first time as LICM
682 // will destroy metadata that may not need to be destroyed if run
683 // after loop rotation.
684 // TODO: Investigate promotion cap for O1.
685 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
686 /*AllowSpeculation=*/false));
687
688 // Disable header duplication in loop rotation at -Oz.
690 Level != OptimizationLevel::Oz,
692 // TODO: Investigate promotion cap for O1.
693 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
694 /*AllowSpeculation=*/true));
695 LPM1.addPass(
696 SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3));
698 LPM1.addPass(LoopFlattenPass());
699
700 LPM2.addPass(LoopIdiomRecognizePass());
701 LPM2.addPass(IndVarSimplifyPass());
702
703 {
705 ExtraPasses.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
707 LPM2.addPass(std::move(ExtraPasses));
708 }
709
711
712 LPM2.addPass(LoopDeletionPass());
713
714 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
715 // because it changes IR to makes profile annotation in back compile
716 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
717 // attributes so we need to make sure and allow the full unroll pass to pay
718 // attention to it.
719 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
720 PGOOpt->Action != PGOOptions::SampleUse)
721 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
722 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
723 PTO.ForgetAllSCEVInLoopUnroll));
724
726
727 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
728 /*UseMemorySSA=*/true));
729 FPM.addPass(
730 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
732 // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
733 // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
734 // *All* loop passes must preserve it, in order to be able to use it.
735 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
736 /*UseMemorySSA=*/false));
737
738 // Delete small array after loop unroll.
740
741 // Try vectorization/scalarization transforms that are both improvements
742 // themselves and can allow further folds with GVN and InstCombine.
743 FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
744
745 // Eliminate redundancies.
747 if (RunNewGVN)
748 FPM.addPass(NewGVNPass());
749 else
750 FPM.addPass(GVNPass());
751
752 // Sparse conditional constant propagation.
753 // FIXME: It isn't clear why we do this *after* loop passes rather than
754 // before...
755 FPM.addPass(SCCPPass());
756
757 // Delete dead bit computations (instcombine runs after to fold away the dead
758 // computations, and then ADCE will run later to exploit any new DCE
759 // opportunities that creates).
760 FPM.addPass(BDCEPass());
761
762 // Run instcombine after redundancy and dead bit elimination to exploit
763 // opportunities opened up by them.
765 invokePeepholeEPCallbacks(FPM, Level);
766
767 // Re-consider control flow based optimizations after redundancy elimination,
768 // redo DCE, etc.
771
774
775 // Finally, do an expensive DCE pass to catch all the dead code exposed by
776 // the simplifications and basic cleanup after all the simplifications.
777 // TODO: Investigate if this is too expensive.
778 FPM.addPass(ADCEPass());
779
780 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
781 FPM.addPass(MemCpyOptPass());
782
783 FPM.addPass(DSEPass());
785
787 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
788 /*AllowSpeculation=*/true),
789 /*UseMemorySSA=*/true));
790
791 FPM.addPass(CoroElidePass());
792
794
796 .convertSwitchRangeToICmp(true)
797 .convertSwitchToArithmetic(true)
798 .hoistCommonInsts(true)
799 .sinkCommonInsts(true)));
801 invokePeepholeEPCallbacks(FPM, Level);
802
803 return FPM;
804}
805
806void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
809}
810
811void PassBuilder::addPreInlinerPasses(ModulePassManager &MPM,
812 OptimizationLevel Level,
813 ThinOrFullLTOPhase LTOPhase) {
814 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
816 return;
817 InlineParams IP;
818
820
821 // FIXME: The hint threshold has the same value used by the regular inliner
822 // when not optimzing for size. This should probably be lowered after
823 // performance testing.
824 // FIXME: this comment is cargo culted from the old pass manager, revisit).
825 IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
827 IP, /* MandatoryFirst */ true,
829 CGSCCPassManager &CGPipeline = MIWP.getPM();
830
832 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
833 FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
834 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
835 true))); // Merge & remove basic blocks.
836 FPM.addPass(InstCombinePass()); // Combine silly sequences.
837 invokePeepholeEPCallbacks(FPM, Level);
838
839 CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
840 std::move(FPM), PTO.EagerlyInvalidateAnalyses));
841
842 MPM.addPass(std::move(MIWP));
843
844 // Delete anything that is now dead to make sure that we don't instrument
845 // dead code. Instrumentation can end up keeping dead code around and
846 // dramatically increase code size.
847 MPM.addPass(GlobalDCEPass());
848}
849
850void PassBuilder::addPostPGOLoopRotation(ModulePassManager &MPM,
851 OptimizationLevel Level) {
853 // Disable header duplication in loop rotation at -Oz.
856 LoopRotatePass(EnableLoopHeaderDuplication ||
857 Level != OptimizationLevel::Oz),
858 /*UseMemorySSA=*/false),
859 PTO.EagerlyInvalidateAnalyses));
860 }
861}
862
863void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
864 OptimizationLevel Level, bool RunProfileGen,
865 bool IsCS, bool AtomicCounterUpdate,
866 std::string ProfileFile,
867 std::string ProfileRemappingFile) {
868 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
869
870 if (!RunProfileGen) {
871 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
872 MPM.addPass(
873 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
874 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
875 // RequireAnalysisPass for PSI before subsequent non-module passes.
876 MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
877 return;
878 }
879
880 // Perform PGO instrumentation.
881 MPM.addPass(PGOInstrumentationGen(IsCS ? PGOInstrumentationType::CSFDO
883
884 addPostPGOLoopRotation(MPM, Level);
885 // Add the profile lowering pass.
886 InstrProfOptions Options;
887 if (!ProfileFile.empty())
888 Options.InstrProfileOutput = ProfileFile;
889 // Do counter promotion at Level greater than O0.
890 Options.DoCounterPromotion = true;
891 Options.UseBFIInPromotion = IsCS;
892 if (EnableSampledInstr) {
893 Options.Sampling = true;
894 // With sampling, there is little beneifit to enable counter promotion.
895 // But note that sampling does work with counter promotion.
896 Options.DoCounterPromotion = false;
897 }
898 Options.Atomic = AtomicCounterUpdate;
899 MPM.addPass(InstrProfilingLoweringPass(Options, IsCS));
900}
901
903 bool RunProfileGen, bool IsCS,
904 bool AtomicCounterUpdate,
905 std::string ProfileFile,
906 std::string ProfileRemappingFile) {
907 if (!RunProfileGen) {
908 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
909 MPM.addPass(
910 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
911 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
912 // RequireAnalysisPass for PSI before subsequent non-module passes.
914 return;
915 }
916
917 // Perform PGO instrumentation.
920 // Add the profile lowering pass.
922 if (!ProfileFile.empty())
923 Options.InstrProfileOutput = ProfileFile;
924 // Do not do counter promotion at O0.
925 Options.DoCounterPromotion = false;
926 Options.UseBFIInPromotion = IsCS;
927 Options.Atomic = AtomicCounterUpdate;
929}
930
932 return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
933}
934
938 InlineParams IP;
939 if (PTO.InlinerThreshold == -1)
940 IP = getInlineParamsFromOptLevel(Level);
941 else
942 IP = getInlineParams(PTO.InlinerThreshold);
943 // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO,
944 // set hot-caller threshold to 0 to disable hot
945 // callsite inline (as much as possible [1]) because it makes
946 // profile annotation in the backend inaccurate.
947 //
948 // [1] Note the cost of a function could be below zero due to erased
949 // prologue / epilogue.
950 if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
952
953 if (PGOOpt)
955
959
960 // Require the GlobalsAA analysis for the module so we can query it within
961 // the CGSCC pipeline.
963 MIWP.addModulePass(RequireAnalysisPass<GlobalsAA, Module>());
964 // Invalidate AAManager so it can be recreated and pick up the newly
965 // available GlobalsAA.
966 MIWP.addModulePass(
968 }
969
970 // Require the ProfileSummaryAnalysis for the module so we can query it within
971 // the inliner pass.
973
974 // Now begin the main postorder CGSCC pipeline.
975 // FIXME: The current CGSCC pipeline has its origins in the legacy pass
976 // manager and trying to emulate its precise behavior. Much of this doesn't
977 // make a lot of sense and we should revisit the core CGSCC structure.
978 CGSCCPassManager &MainCGPipeline = MIWP.getPM();
979
980 // Note: historically, the PruneEH pass was run first to deduce nounwind and
981 // generally clean up exception handling overhead. It isn't clear this is
982 // valuable as the inliner doesn't currently care whether it is inlining an
983 // invoke or a call.
984
986 MainCGPipeline.addPass(AttributorCGSCCPass());
987
988 // Deduce function attributes. We do another run of this after the function
989 // simplification pipeline, so this only needs to run when it could affect the
990 // function simplification pipeline, which is only the case with recursive
991 // functions.
992 MainCGPipeline.addPass(PostOrderFunctionAttrsPass(/*SkipNonRecursive*/ true));
993
994 // When at O3 add argument promotion to the pass pipeline.
995 // FIXME: It isn't at all clear why this should be limited to O3.
996 if (Level == OptimizationLevel::O3)
997 MainCGPipeline.addPass(ArgumentPromotionPass());
998
999 // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
1000 // there are no OpenMP runtime calls present in the module.
1001 if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
1002 MainCGPipeline.addPass(OpenMPOptCGSCCPass(Phase));
1003
1004 invokeCGSCCOptimizerLateEPCallbacks(MainCGPipeline, Level);
1005
1006 // Add the core function simplification pipeline nested inside the
1007 // CGSCC walk.
1010 PTO.EagerlyInvalidateAnalyses, /*NoRerun=*/true));
1011
1012 // Finally, deduce any function attributes based on the fully simplified
1013 // function.
1014 MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
1015
1016 // Mark that the function is fully simplified and that it shouldn't be
1017 // simplified again if we somehow revisit it due to CGSCC mutations unless
1018 // it's been modified since.
1021
1023 MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
1024 MainCGPipeline.addPass(CoroAnnotationElidePass());
1025 }
1026
1027 // Make sure we don't affect potential future NoRerun CGSCC adaptors.
1028 MIWP.addLateModulePass(createModuleToFunctionPassAdaptor(
1030
1031 return MIWP;
1032}
1033
1038
1040 // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO,
1041 // set hot-caller threshold to 0 to disable hot
1042 // callsite inline (as much as possible [1]) because it makes
1043 // profile annotation in the backend inaccurate.
1044 //
1045 // [1] Note the cost of a function could be below zero due to erased
1046 // prologue / epilogue.
1047 if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1048 IP.HotCallSiteThreshold = 0;
1049
1050 if (PGOOpt)
1052
1053 // The inline deferral logic is used to avoid losing some
1054 // inlining chance in future. It is helpful in SCC inliner, in which
1055 // inlining is processed in bottom-up order.
1056 // While in module inliner, the inlining order is a priority-based order
1057 // by default. The inline deferral is unnecessary there. So we disable the
1058 // inline deferral logic in module inliner.
1059 IP.EnableDeferral = false;
1060
1063 MPM.addPass(GlobalOptPass());
1064 MPM.addPass(GlobalDCEPass());
1065 MPM.addPass(PGOCtxProfFlatteningPass(/*IsPreThinlink=*/false));
1066 }
1067
1070 PTO.EagerlyInvalidateAnalyses));
1071
1075 MPM.addPass(
1077 }
1078
1079 return MPM;
1080}
1081
1085 assert(Level != OptimizationLevel::O0 &&
1086 "Should not be used for O0 pipeline");
1087
1089 "FullLTOPostLink shouldn't call buildModuleSimplificationPipeline!");
1090
1092
1093 // Place pseudo probe instrumentation as the first pass of the pipeline to
1094 // minimize the impact of optimization changes.
1095 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1098
1099 bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
1100
1101 // In ThinLTO mode, when flattened profile is used, all the available
1102 // profile information will be annotated in PreLink phase so there is
1103 // no need to load the profile again in PostLink.
1104 bool LoadSampleProfile =
1105 HasSampleProfile &&
1107
1108 // During the ThinLTO backend phase we perform early indirect call promotion
1109 // here, before globalopt. Otherwise imported available_externally functions
1110 // look unreferenced and are removed. If we are going to load the sample
1111 // profile then defer until later.
1112 // TODO: See if we can move later and consolidate with the location where
1113 // we perform ICP when we are loading a sample profile.
1114 // TODO: We pass HasSampleProfile (whether there was a sample profile file
1115 // passed to the compile) to the SamplePGO flag of ICP. This is used to
1116 // determine whether the new direct calls are annotated with prof metadata.
1117 // Ideally this should be determined from whether the IR is annotated with
1118 // sample profile, and not whether the a sample profile was provided on the
1119 // command line. E.g. for flattened profiles where we will not be reloading
1120 // the sample profile in the ThinLTO backend, we ideally shouldn't have to
1121 // provide the sample profile file.
1122 if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
1123 MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
1124
1125 // Create an early function pass manager to cleanup the output of the
1126 // frontend. Not necessary with LTO post link pipelines since the pre link
1127 // pipeline already cleaned up the frontend output.
1129 // Do basic inference of function attributes from known properties of system
1130 // libraries and other oracles.
1132 MPM.addPass(CoroEarlyPass());
1133
1134 FunctionPassManager EarlyFPM;
1135 EarlyFPM.addPass(EntryExitInstrumenterPass(/*PostInlining=*/false));
1136 // Lower llvm.expect to metadata before attempting transforms.
1137 // Compare/branch metadata may alter the behavior of passes like
1138 // SimplifyCFG.
1140 EarlyFPM.addPass(SimplifyCFGPass());
1142 EarlyFPM.addPass(EarlyCSEPass());
1143 if (Level == OptimizationLevel::O3)
1144 EarlyFPM.addPass(CallSiteSplittingPass());
1146 std::move(EarlyFPM), PTO.EagerlyInvalidateAnalyses));
1147 }
1148
1149 if (LoadSampleProfile) {
1150 // Annotate sample profile right after early FPM to ensure freshness of
1151 // the debug info.
1153 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile, Phase, FS));
1154 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1155 // RequireAnalysisPass for PSI before subsequent non-module passes.
1157 // Do not invoke ICP in the LTOPrelink phase as it makes it hard
1158 // for the profile annotation to be accurate in the LTO backend.
1159 if (!isLTOPreLink(Phase))
1160 // We perform early indirect call promotion here, before globalopt.
1161 // This is important for the ThinLTO backend phase because otherwise
1162 // imported available_externally functions look unreferenced and are
1163 // removed.
1164 MPM.addPass(
1165 PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
1166 }
1167
1168 // Try to perform OpenMP specific optimizations on the module. This is a
1169 // (quick!) no-op if there are no OpenMP runtime calls present in the module.
1171
1173 MPM.addPass(AttributorPass());
1174
1175 // Lower type metadata and the type.test intrinsic in the ThinLTO
1176 // post link pipeline after ICP. This is to enable usage of the type
1177 // tests in ICP sequences.
1179 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1181
1183
1184 // Interprocedural constant propagation now that basic cleanup has occurred
1185 // and prior to optimizing globals.
1186 // FIXME: This position in the pipeline hasn't been carefully considered in
1187 // years, it should be re-analyzed.
1188 MPM.addPass(IPSCCPPass(
1189 IPSCCPOptions(/*AllowFuncSpec=*/
1190 Level != OptimizationLevel::Os &&
1191 Level != OptimizationLevel::Oz &&
1192 !isLTOPreLink(Phase))));
1193
1194 // Attach metadata to indirect call sites indicating the set of functions
1195 // they may target at run-time. This should follow IPSCCP.
1197
1198 // Optimize globals to try and fold them into constants.
1199 MPM.addPass(GlobalOptPass());
1200
1201 // Create a small function pass pipeline to cleanup after all the global
1202 // optimizations.
1203 FunctionPassManager GlobalCleanupPM;
1204 // FIXME: Should this instead by a run of SROA?
1205 GlobalCleanupPM.addPass(PromotePass());
1206 GlobalCleanupPM.addPass(InstCombinePass());
1207 invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
1208 GlobalCleanupPM.addPass(
1209 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1210 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM),
1211 PTO.EagerlyInvalidateAnalyses));
1212
1213 // We already asserted this happens in non-FullLTOPostLink earlier.
1214 const bool IsPreLink = Phase != ThinOrFullLTOPhase::ThinLTOPostLink;
1215 // Enable contextual profiling instrumentation.
1216 const bool IsCtxProfGen =
1218 const bool IsPGOPreLink = !IsCtxProfGen && PGOOpt && IsPreLink;
1219 const bool IsPGOInstrGen =
1220 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRInstr;
1221 const bool IsPGOInstrUse =
1222 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRUse;
1223 const bool IsMemprofUse = IsPGOPreLink && !PGOOpt->MemoryProfile.empty();
1224 // We don't want to mix pgo ctx gen and pgo gen; we also don't currently
1225 // enable ctx profiling from the frontend.
1227 "Enabling both instrumented PGO and contextual instrumentation is not "
1228 "supported.");
1229 const bool IsCtxProfUse =
1231
1232 assert(
1234 "--instrument-cold-function-only-path is provided but "
1235 "--pgo-instrument-cold-function-only is not enabled");
1236 const bool IsColdFuncOnlyInstrGen = PGOInstrumentColdFunctionOnly &&
1237 IsPGOPreLink &&
1239
1240 if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen ||
1241 IsCtxProfUse || IsColdFuncOnlyInstrGen)
1242 addPreInlinerPasses(MPM, Level, Phase);
1243
1244 // Add all the requested passes for instrumentation PGO, if requested.
1245 if (IsPGOInstrGen || IsPGOInstrUse) {
1246 addPGOInstrPasses(MPM, Level,
1247 /*RunProfileGen=*/IsPGOInstrGen,
1248 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate,
1249 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
1250 } else if (IsCtxProfGen || IsCtxProfUse) {
1252 // In pre-link, we just want the instrumented IR. We use the contextual
1253 // profile in the post-thinlink phase.
1254 // The instrumentation will be removed in post-thinlink after IPO.
1255 // FIXME(mtrofin): move AssignGUIDPass if there is agreement to use this
1256 // mechanism for GUIDs.
1257 MPM.addPass(AssignGUIDPass());
1258 if (IsCtxProfUse) {
1259 MPM.addPass(PGOCtxProfFlatteningPass(/*IsPreThinlink=*/true));
1260 return MPM;
1261 }
1262 // Block further inlining in the instrumented ctxprof case. This avoids
1263 // confusingly collecting profiles for the same GUID corresponding to
1264 // different variants of the function. We could do like PGO and identify
1265 // functions by a (GUID, Hash) tuple, but since the ctxprof "use" waits for
1266 // thinlto to happen before performing any further optimizations, it's
1267 // unnecessary to collect profiles for non-prevailing copies.
1269 addPostPGOLoopRotation(MPM, Level);
1271 } else if (IsColdFuncOnlyInstrGen) {
1272 addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true, /* IsCS */ false,
1273 /* AtomicCounterUpdate */ false,
1275 /* ProfileRemappingFile */ "");
1276 }
1277
1278 if (IsPGOInstrGen || IsPGOInstrUse || IsCtxProfGen)
1279 MPM.addPass(PGOIndirectCallPromotion(false, false));
1280
1281 if (IsPGOPreLink && PGOOpt->CSAction == PGOOptions::CSIRInstr)
1282 MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile,
1284
1285 if (IsMemprofUse)
1286 MPM.addPass(MemProfUsePass(PGOOpt->MemoryProfile, FS));
1287
1288 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRUse ||
1289 PGOOpt->Action == PGOOptions::SampleUse))
1290 MPM.addPass(PGOForceFunctionAttrsPass(PGOOpt->ColdOptType));
1291
1292 MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/true));
1293
1296 else
1297 MPM.addPass(buildInlinerPipeline(Level, Phase));
1298
1299 // Remove any dead arguments exposed by cleanups, constant folding globals,
1300 // and argument promotion.
1302
1305
1307 MPM.addPass(CoroCleanupPass());
1308
1309 // Optimize globals now that functions are fully simplified.
1310 MPM.addPass(GlobalOptPass());
1311 MPM.addPass(GlobalDCEPass());
1312
1313 return MPM;
1314}
1315
1316/// TODO: Should LTO cause any differences to this set of passes?
1317void PassBuilder::addVectorPasses(OptimizationLevel Level,
1319 ThinOrFullLTOPhase LTOPhase) {
1320 const bool IsFullLTO = LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink;
1321
1324
1325 // Drop dereferenceable assumes after vectorization, as they are no longer
1326 // needed and can inhibit further optimization.
1327 if (!isLTOPreLink(LTOPhase))
1328 FPM.addPass(DropUnnecessaryAssumesPass(/*DropDereferenceable=*/true));
1329
1331 if (IsFullLTO) {
1332 // The vectorizer may have significantly shortened a loop body; unroll
1333 // again. Unroll small loops to hide loop backedge latency and saturate any
1334 // parallel execution resources of an out-of-order processor. We also then
1335 // need to clean up redundancies and loop invariant code.
1336 // FIXME: It would be really good to use a loop-integrated instruction
1337 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1338 // across the loop nests.
1339 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1342 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1344 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1347 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1348 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1349 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1350 // NOTE: we are very late in the pipeline, and we don't have any LICM
1351 // or SimplifyCFG passes scheduled after us, that would cleanup
1352 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1354 }
1355
1356 if (!IsFullLTO) {
1357 // Eliminate loads by forwarding stores from the previous iteration to loads
1358 // of the current iteration.
1360 }
1361 // Cleanup after the loop optimization passes.
1362 FPM.addPass(InstCombinePass());
1363
1364 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1365 ExtraFunctionPassManager<ShouldRunExtraVectorPasses> ExtraPasses;
1366 // At higher optimization levels, try to clean up any runtime overlap and
1367 // alignment checks inserted by the vectorizer. We want to track correlated
1368 // runtime checks for two inner loops in the same outer loop, fold any
1369 // common computations, hoist loop-invariant aspects out of any outer loop,
1370 // and unswitch the runtime checks if possible. Once hoisted, we may have
1371 // dead (or speculatable) control flows or more combining opportunities.
1372 ExtraPasses.addPass(EarlyCSEPass());
1373 ExtraPasses.addPass(CorrelatedValuePropagationPass());
1374 ExtraPasses.addPass(InstCombinePass());
1375 LoopPassManager LPM;
1376 LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1377 /*AllowSpeculation=*/true));
1378 LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
1380 ExtraPasses.addPass(
1381 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true));
1382 ExtraPasses.addPass(
1383 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1384 ExtraPasses.addPass(InstCombinePass());
1385 FPM.addPass(std::move(ExtraPasses));
1386 }
1387
1388 // Now that we've formed fast to execute loop structures, we do further
1389 // optimizations. These are run afterward as they might block doing complex
1390 // analyses and transforms such as what are needed for loop vectorization.
1391
1392 // Cleanup after loop vectorization, etc. Simplification passes like CVP and
1393 // GVN, loop transforms, and others have already run, so it's now better to
1394 // convert to more optimized IR using more aggressive simplify CFG options.
1395 // The extra sinking transform can create larger basic blocks, so do this
1396 // before SLP vectorization.
1397 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
1398 .forwardSwitchCondToPhi(true)
1399 .convertSwitchRangeToICmp(true)
1400 .convertSwitchToArithmetic(true)
1401 .convertSwitchToLookupTable(true)
1402 .needCanonicalLoops(false)
1403 .hoistCommonInsts(true)
1404 .sinkCommonInsts(true)));
1405
1406 if (IsFullLTO) {
1407 FPM.addPass(SCCPPass());
1408 FPM.addPass(InstCombinePass());
1409 FPM.addPass(BDCEPass());
1410 }
1411
1412 // Optimize parallel scalar instruction chains into SIMD instructions.
1413 if (PTO.SLPVectorization) {
1414 FPM.addPass(SLPVectorizerPass());
1415 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1416 FPM.addPass(EarlyCSEPass());
1417 }
1418 }
1419 // Enhance/cleanup vector code.
1420 FPM.addPass(VectorCombinePass());
1421
1422 if (!IsFullLTO) {
1423 FPM.addPass(InstCombinePass());
1424 // Unroll small loops to hide loop backedge latency and saturate any
1425 // parallel execution resources of an out-of-order processor. We also then
1426 // need to clean up redundancies and loop invariant code.
1427 // FIXME: It would be really good to use a loop-integrated instruction
1428 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1429 // across the loop nests.
1430 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1431 if (EnableUnrollAndJam && PTO.LoopUnrolling) {
1433 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1434 }
1435 FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
1436 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1437 PTO.ForgetAllSCEVInLoopUnroll)));
1438 FPM.addPass(WarnMissedTransformationsPass());
1439 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1440 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1441 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1442 // NOTE: we are very late in the pipeline, and we don't have any LICM
1443 // or SimplifyCFG passes scheduled after us, that would cleanup
1444 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1445 FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
1446 }
1447
1448 FPM.addPass(InferAlignmentPass());
1449 FPM.addPass(InstCombinePass());
1450
1451 // This is needed for two reasons:
1452 // 1. It works around problems that instcombine introduces, such as sinking
1453 // expensive FP divides into loops containing multiplications using the
1454 // divide result.
1455 // 2. It helps to clean up some loop-invariant code created by the loop
1456 // unroll pass when IsFullLTO=false.
1458 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1459 /*AllowSpeculation=*/true),
1460 /*UseMemorySSA=*/true));
1461
1462 // Now that we've vectorized and unrolled loops, we may have more refined
1463 // alignment information, try to re-derive it here.
1464 FPM.addPass(AlignmentFromAssumptionsPass());
1465}
1466
1469 ThinOrFullLTOPhase LTOPhase) {
1470 const bool LTOPreLink = isLTOPreLink(LTOPhase);
1472
1473 // Run partial inlining pass to partially inline functions that have
1474 // large bodies.
1477
1478 // Remove avail extern fns and globals definitions since we aren't compiling
1479 // an object file for later LTO. For LTO we want to preserve these so they
1480 // are eligible for inlining at link-time. Note if they are unreferenced they
1481 // will be removed by GlobalDCE later, so this only impacts referenced
1482 // available externally globals. Eventually they will be suppressed during
1483 // codegen, but eliminating here enables more opportunity for GlobalDCE as it
1484 // may make globals referenced by available external functions dead and saves
1485 // running remaining passes on the eliminated functions. These should be
1486 // preserved during prelinking for link-time inlining decisions.
1487 if (!LTOPreLink)
1489
1490 // Do RPO function attribute inference across the module to forward-propagate
1491 // attributes where applicable.
1492 // FIXME: Is this really an optimization rather than a canonicalization?
1494
1495 // Do a post inline PGO instrumentation and use pass. This is a context
1496 // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
1497 // cross-module inline has not been done yet. The context sensitive
1498 // instrumentation is after all the inlines are done.
1499 if (!LTOPreLink && PGOOpt) {
1500 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1501 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
1502 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1503 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile);
1504 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1505 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
1506 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1507 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
1508 }
1509
1510 // Re-compute GlobalsAA here prior to function passes. This is particularly
1511 // useful as the above will have inlined, DCE'ed, and function-attr
1512 // propagated everything. We should at this point have a reasonably minimal
1513 // and richly annotated call graph. By computing aliasing and mod/ref
1514 // information for all local globals here, the late loop passes and notably
1515 // the vectorizer will be able to use them to help recognize vectorizable
1516 // memory operations.
1519
1520 invokeOptimizerEarlyEPCallbacks(MPM, Level, LTOPhase);
1521
1522 FunctionPassManager OptimizePM;
1523
1524 // Only drop unnecessary assumes post-inline and post-link, as otherwise
1525 // additional uses of the affected value may be introduced through inlining
1526 // and CSE.
1527 if (!isLTOPreLink(LTOPhase))
1528 OptimizePM.addPass(DropUnnecessaryAssumesPass());
1529
1530 // Scheduling LoopVersioningLICM when inlining is over, because after that
1531 // we may see more accurate aliasing. Reason to run this late is that too
1532 // early versioning may prevent further inlining due to increase of code
1533 // size. Other optimizations which runs later might get benefit of no-alias
1534 // assumption in clone loop.
1536 OptimizePM.addPass(
1538 // LoopVersioningLICM pass might increase new LICM opportunities.
1540 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1541 /*AllowSpeculation=*/true),
1542 /*USeMemorySSA=*/true));
1543 }
1544
1545 OptimizePM.addPass(Float2IntPass());
1547
1548 if (EnableMatrix) {
1549 OptimizePM.addPass(LowerMatrixIntrinsicsPass());
1550 OptimizePM.addPass(EarlyCSEPass());
1551 }
1552
1553 // CHR pass should only be applied with the profile information.
1554 // The check is to check the profile summary information in CHR.
1555 if (EnableCHR && Level == OptimizationLevel::O3)
1556 OptimizePM.addPass(ControlHeightReductionPass());
1557
1558 // FIXME: We need to run some loop optimizations to re-rotate loops after
1559 // simplifycfg and others undo their rotation.
1560
1561 // Optimize the loop execution. These passes operate on entire loop nests
1562 // rather than on each loop in an inside-out manner, and so they are actually
1563 // function passes.
1564
1565 invokeVectorizerStartEPCallbacks(OptimizePM, Level);
1566
1567 LoopPassManager LPM;
1568 // First rotate loops that may have been un-rotated by prior passes.
1569 // Disable header duplication at -Oz.
1571 Level != OptimizationLevel::Oz,
1572 LTOPreLink));
1573 // Some loops may have become dead by now. Try to delete them.
1574 // FIXME: see discussion in https://reviews.llvm.org/D112851,
1575 // this may need to be revisited once we run GVN before loop deletion
1576 // in the simplification pipeline.
1577 LPM.addPass(LoopDeletionPass());
1578
1579 if (PTO.LoopInterchange)
1580 LPM.addPass(LoopInterchangePass());
1581
1582 OptimizePM.addPass(
1583 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/false));
1584
1585 // FIXME: This may not be the right place in the pipeline.
1586 // We need to have the data to support the right place.
1587 if (PTO.LoopFusion)
1588 OptimizePM.addPass(LoopFusePass());
1589
1590 // Distribute loops to allow partial vectorization. I.e. isolate dependences
1591 // into separate loop that would otherwise inhibit vectorization. This is
1592 // currently only performed for loops marked with the metadata
1593 // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1594 OptimizePM.addPass(LoopDistributePass());
1595
1596 // Populates the VFABI attribute with the scalar-to-vector mappings
1597 // from the TargetLibraryInfo.
1598 OptimizePM.addPass(InjectTLIMappings());
1599
1600 addVectorPasses(Level, OptimizePM, LTOPhase);
1601
1602 invokeVectorizerEndEPCallbacks(OptimizePM, Level);
1603
1604 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
1605 // canonicalization pass that enables other optimizations. As a result,
1606 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1607 // result too early.
1608 OptimizePM.addPass(LoopSinkPass());
1609
1610 // And finally clean up LCSSA form before generating code.
1611 OptimizePM.addPass(InstSimplifyPass());
1612
1613 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
1614 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
1615 // flattening of blocks.
1616 OptimizePM.addPass(DivRemPairsPass());
1617
1618 // Try to annotate calls that were created during optimization.
1619 OptimizePM.addPass(
1620 TailCallElimPass(/*UpdateFunctionEntryCount=*/isInstrumentedPGOUse()));
1621
1622 // LoopSink (and other loop passes since the last simplifyCFG) might have
1623 // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
1624 OptimizePM.addPass(
1626 .convertSwitchRangeToICmp(true)
1627 .convertSwitchToArithmetic(true)
1628 .speculateUnpredictables(true)
1629 .hoistLoadsStoresWithCondFaulting(true)));
1630
1631 // Add the core optimizing pipeline.
1632 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
1633 PTO.EagerlyInvalidateAnalyses));
1634
1635 // AllocToken transforms heap allocation calls; this needs to run late after
1636 // other allocation call transformations (such as those in InstCombine).
1637 if (!LTOPreLink)
1638 MPM.addPass(AllocTokenPass());
1639
1640 invokeOptimizerLastEPCallbacks(MPM, Level, LTOPhase);
1641
1642 // Split out cold code. Splitting is done late to avoid hiding context from
1643 // other optimizations and inadvertently regressing performance. The tradeoff
1644 // is that this has a higher code size cost than splitting early.
1645 if (EnableHotColdSplit && !LTOPreLink)
1647
1648 // Search the code for similar regions of code. If enough similar regions can
1649 // be found where extracting the regions into their own function will decrease
1650 // the size of the program, we extract the regions, a deduplicate the
1651 // structurally similar regions.
1652 if (EnableIROutliner)
1653 MPM.addPass(IROutlinerPass());
1654
1655 // Now we need to do some global optimization transforms.
1656 // FIXME: It would seem like these should come first in the optimization
1657 // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
1658 // ordering here.
1659 MPM.addPass(GlobalDCEPass());
1661
1662 // Merge functions if requested. It has a better chance to merge functions
1663 // after ConstantMerge folded jump tables.
1664 if (PTO.MergeFunctions)
1666
1667 if (PTO.CallGraphProfile && !LTOPreLink)
1668 MPM.addPass(CGProfilePass(isLTOPostLink(LTOPhase)));
1669
1670 // RelLookupTableConverterPass runs later in LTO post-link pipeline.
1671 if (!LTOPreLink)
1673
1674 // Add devirtualization pass only when LTO is not enabled, as otherwise
1675 // the pass is already enabled in the LTO pipeline.
1676 if (PTO.DevirtualizeSpeculatively && LTOPhase == ThinOrFullLTOPhase::None) {
1677 // TODO: explore a better pipeline configuration that can improve
1678 // compilation time overhead.
1680 /*ExportSummary*/ nullptr,
1681 /*ImportSummary*/ nullptr,
1682 /*DevirtSpeculatively*/ PTO.DevirtualizeSpeculatively));
1683 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1685 // Given that the devirtualization creates more opportunities for inlining,
1686 // we run the Inliner again here to maximize the optimization gain we
1687 // get from devirtualization.
1688 // Also, we can't run devirtualization before inlining because the
1689 // devirtualization depends on the passes optimizing/eliminating vtable GVs
1690 // and those passes are only effective after inlining.
1691 if (EnableModuleInliner) {
1695 } else {
1698 /* MandatoryFirst */ true,
1700 }
1701 }
1702 return MPM;
1703}
1704
1708 if (Level == OptimizationLevel::O0)
1709 return buildO0DefaultPipeline(Level, Phase);
1710
1712
1713 // Currently this pipeline is only invoked in an LTO pre link pass or when we
1714 // are not running LTO. If that changes the below checks may need updating.
1716
1717 // If we are invoking this in non-LTO mode, remove any MemProf related
1718 // attributes and metadata, as we don't know whether we are linking with
1719 // a library containing the necessary interfaces.
1722
1723 // Convert @llvm.global.annotations to !annotation metadata.
1725
1726 // Force any function attributes we want the rest of the pipeline to observe.
1728
1729 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1731
1732 // Apply module pipeline start EP callback.
1734
1735 // Add the core simplification pipeline.
1737
1738 // Now add the optimization pipeline.
1740
1741 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1742 PGOOpt->Action == PGOOptions::SampleUse)
1744
1745 // Emit annotation remarks.
1747
1748 if (isLTOPreLink(Phase))
1749 addRequiredLTOPreLinkPasses(MPM);
1750 return MPM;
1751}
1752
1755 bool EmitSummary) {
1757 if (ThinLTO)
1759 else
1761 MPM.addPass(EmbedBitcodePass(ThinLTO, EmitSummary));
1762
1763 // Perform any cleanups to the IR that aren't suitable for per TU compilation,
1764 // like removing CFI/WPD related instructions. Note, we reuse
1765 // LowerTypeTestsPass to clean up type tests rather than duplicate that logic
1766 // in FatLtoCleanup.
1767 MPM.addPass(FatLtoCleanup());
1768
1769 // If we're doing FatLTO w/ CFI enabled, we don't want the type tests in the
1770 // object code, only in the bitcode section, so drop it before we run
1771 // module optimization and generate machine code. If llvm.type.test() isn't in
1772 // the IR, this won't do anything.
1773 MPM.addPass(
1775
1776 // Use the ThinLTO post-link pipeline with sample profiling
1777 if (ThinLTO && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1778 MPM.addPass(buildThinLTODefaultPipeline(Level, /*ImportSummary=*/nullptr));
1779 else {
1780 // ModuleSimplification does not run the coroutine passes for
1781 // ThinLTOPreLink, so we need the coroutine passes to run for ThinLTO
1782 // builds, otherwise they will miscompile.
1783 if (ThinLTO) {
1784 // TODO: replace w/ buildCoroWrapper() when it takes phase and level into
1785 // consideration.
1786 CGSCCPassManager CGPM;
1790 MPM.addPass(CoroCleanupPass());
1791 }
1792
1793 // otherwise, just use module optimization
1794 MPM.addPass(
1796 // Emit annotation remarks.
1798 }
1799 return MPM;
1800}
1801
1804 if (Level == OptimizationLevel::O0)
1806
1808
1809 // Convert @llvm.global.annotations to !annotation metadata.
1811
1812 // Force any function attributes we want the rest of the pipeline to observe.
1814
1815 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1817
1818 // Apply module pipeline start EP callback.
1820
1821 // If we are planning to perform ThinLTO later, we don't bloat the code with
1822 // unrolling/vectorization/... now. Just simplify the module as much as we
1823 // can.
1826 // In pre-link, for ctx prof use, we stop here with an instrumented IR. We let
1827 // thinlto use the contextual info to perform imports; then use the contextual
1828 // profile in the post-thinlink phase.
1829 if (!UseCtxProfile.empty()) {
1830 addRequiredLTOPreLinkPasses(MPM);
1831 return MPM;
1832 }
1833
1834 // Run partial inlining pass to partially inline functions that have
1835 // large bodies.
1836 // FIXME: It isn't clear whether this is really the right place to run this
1837 // in ThinLTO. Because there is another canonicalization and simplification
1838 // phase that will run after the thin link, running this here ends up with
1839 // less information than will be available later and it may grow functions in
1840 // ways that aren't beneficial.
1843
1844 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1845 PGOOpt->Action == PGOOptions::SampleUse)
1847
1848 // Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual
1849 // optimization is going to be done in PostLink stage, but clang can't add
1850 // callbacks there in case of in-process ThinLTO called by linker.
1855
1856 // Emit annotation remarks.
1858
1859 addRequiredLTOPreLinkPasses(MPM);
1860
1861 return MPM;
1862}
1863
1865 OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
1867
1868 // If we are invoking this without a summary index noting that we are linking
1869 // with a library containing the necessary APIs, remove any MemProf related
1870 // attributes and metadata.
1871 if (!ImportSummary || !ImportSummary->withSupportsHotColdNew())
1873
1874 if (ImportSummary) {
1875 // For ThinLTO we must apply the context disambiguation decisions early, to
1876 // ensure we can correctly match the callsites to summary data.
1879 ImportSummary, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1880
1881 // These passes import type identifier resolutions for whole-program
1882 // devirtualization and CFI. They must run early because other passes may
1883 // disturb the specific instruction patterns that these passes look for,
1884 // creating dependencies on resolutions that may not appear in the summary.
1885 //
1886 // For example, GVN may transform the pattern assume(type.test) appearing in
1887 // two basic blocks into assume(phi(type.test, type.test)), which would
1888 // transform a dependency on a WPD resolution into a dependency on a type
1889 // identifier resolution for CFI.
1890 //
1891 // Also, WPD has access to more precise information than ICP and can
1892 // devirtualize more effectively, so it should operate on the IR first.
1893 //
1894 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1895 // metadata and intrinsics.
1896 MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
1897 MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
1898 }
1899
1900 if (Level == OptimizationLevel::O0) {
1901 // Run a second time to clean up any type tests left behind by WPD for use
1902 // in ICP.
1903 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1906
1907 // AllocToken transforms heap allocation calls; this needs to run late after
1908 // other allocation call transformations (such as those in InstCombine).
1909 MPM.addPass(AllocTokenPass());
1910
1911 // Drop available_externally and unreferenced globals. This is necessary
1912 // with ThinLTO in order to avoid leaving undefined references to dead
1913 // globals in the object file.
1915 MPM.addPass(GlobalDCEPass());
1916 return MPM;
1917 }
1918 if (!UseCtxProfile.empty()) {
1919 MPM.addPass(
1921 } else {
1922 // Add the core simplification pipeline.
1925 }
1926 // Now add the optimization pipeline.
1929
1930 // Emit annotation remarks.
1932
1933 return MPM;
1934}
1935
1938 // FIXME: We should use a customized pre-link pipeline!
1939 return buildPerModuleDefaultPipeline(Level,
1941}
1942
1945 ModuleSummaryIndex *ExportSummary) {
1947
1949
1950 // If we are invoking this without a summary index noting that we are linking
1951 // with a library containing the necessary APIs, remove any MemProf related
1952 // attributes and metadata.
1953 if (!ExportSummary || !ExportSummary->withSupportsHotColdNew())
1955
1956 // Create a function that performs CFI checks for cross-DSO calls with targets
1957 // in the current module.
1958 MPM.addPass(CrossDSOCFIPass());
1959
1960 if (Level == OptimizationLevel::O0) {
1961 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1962 // metadata and intrinsics.
1963 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1964 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1965 // Run a second time to clean up any type tests left behind by WPD for use
1966 // in ICP.
1967 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1969
1971
1972 // AllocToken transforms heap allocation calls; this needs to run late after
1973 // other allocation call transformations (such as those in InstCombine).
1974 MPM.addPass(AllocTokenPass());
1975
1977
1978 // Emit annotation remarks.
1980
1981 return MPM;
1982 }
1983
1984 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
1985 // Load sample profile before running the LTO optimization pipeline.
1986 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1987 PGOOpt->ProfileRemappingFile,
1989 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1990 // RequireAnalysisPass for PSI before subsequent non-module passes.
1992 }
1993
1994 // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
1996
1997 // Remove unused virtual tables to improve the quality of code generated by
1998 // whole-program devirtualization and bitset lowering.
1999 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2000
2001 // Do basic inference of function attributes from known properties of system
2002 // libraries and other oracles.
2004
2005 if (Level.getSpeedupLevel() > 1) {
2007 CallSiteSplittingPass(), PTO.EagerlyInvalidateAnalyses));
2008
2009 // Indirect call promotion. This should promote all the targets that are
2010 // left by the earlier promotion pass that promotes intra-module targets.
2011 // This two-step promotion is to save the compile time. For LTO, it should
2012 // produce the same result as if we only do promotion here.
2014 true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
2015
2016 // Promoting by-reference arguments to by-value exposes more constants to
2017 // IPSCCP.
2018 CGSCCPassManager CGPM;
2021 CGPM.addPass(
2024
2025 // Propagate constants at call sites into the functions they call. This
2026 // opens opportunities for globalopt (and inlining) by substituting function
2027 // pointers passed as arguments to direct uses of functions.
2028 MPM.addPass(IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/
2029 Level != OptimizationLevel::Os &&
2030 Level != OptimizationLevel::Oz)));
2031
2032 // Attach metadata to indirect call sites indicating the set of functions
2033 // they may target at run-time. This should follow IPSCCP.
2035 }
2036
2037 // Do RPO function attribute inference across the module to forward-propagate
2038 // attributes where applicable.
2039 // FIXME: Is this really an optimization rather than a canonicalization?
2041
2042 // Use in-range annotations on GEP indices to split globals where beneficial.
2043 MPM.addPass(GlobalSplitPass());
2044
2045 // Run whole program optimization of virtual call when the list of callees
2046 // is fixed.
2047 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
2048
2050 // Stop here at -O1.
2051 if (Level == OptimizationLevel::O1) {
2052 // The LowerTypeTestsPass needs to run to lower type metadata and the
2053 // type.test intrinsics. The pass does nothing if CFI is disabled.
2054 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
2055 // Run a second time to clean up any type tests left behind by WPD for use
2056 // in ICP (which is performed earlier than this in the regular LTO
2057 // pipeline).
2058 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
2060
2062
2063 // AllocToken transforms heap allocation calls; this needs to run late after
2064 // other allocation call transformations (such as those in InstCombine).
2065 MPM.addPass(AllocTokenPass());
2066
2068
2069 // Emit annotation remarks.
2071
2072 return MPM;
2073 }
2074
2075 // TODO: Skip to match buildCoroWrapper.
2076 MPM.addPass(CoroEarlyPass());
2077
2078 // Optimize globals to try and fold them into constants.
2079 MPM.addPass(GlobalOptPass());
2080
2081 // Promote any localized globals to SSA registers.
2083
2084 // Linking modules together can lead to duplicate global constant, only
2085 // keep one copy of each constant.
2087
2088 // Remove unused arguments from functions.
2090
2091 // Reduce the code after globalopt and ipsccp. Both can open up significant
2092 // simplification opportunities, and both can propagate functions through
2093 // function pointers. When this happens, we often have to resolve varargs
2094 // calls, etc, so let instcombine do this.
2095 FunctionPassManager PeepholeFPM;
2096 PeepholeFPM.addPass(InstCombinePass());
2097 if (Level.getSpeedupLevel() > 1)
2098 PeepholeFPM.addPass(AggressiveInstCombinePass());
2099 invokePeepholeEPCallbacks(PeepholeFPM, Level);
2100
2101 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM),
2102 PTO.EagerlyInvalidateAnalyses));
2103
2104 // Lower variadic functions for supported targets prior to inlining.
2106
2107 // Note: historically, the PruneEH pass was run first to deduce nounwind and
2108 // generally clean up exception handling overhead. It isn't clear this is
2109 // valuable as the inliner doesn't currently care whether it is inlining an
2110 // invoke or a call.
2111 // Run the inliner now.
2112 if (EnableModuleInliner) {
2116 } else {
2119 /* MandatoryFirst */ true,
2122 }
2123
2124 // Perform context disambiguation after inlining, since that would reduce the
2125 // amount of additional cloning required to distinguish the allocation
2126 // contexts.
2129 /*Summary=*/nullptr,
2130 PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
2131
2132 // Optimize globals again after we ran the inliner.
2133 MPM.addPass(GlobalOptPass());
2134
2135 // Run the OpenMPOpt pass again after global optimizations.
2137
2138 // Garbage collect dead functions.
2139 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2140
2141 // If we didn't decide to inline a function, check to see if we can
2142 // transform it to pass arguments by value instead of by reference.
2143 CGSCCPassManager CGPM;
2148
2150 // The IPO Passes may leave cruft around. Clean up after them.
2151 FPM.addPass(InstCombinePass());
2152 invokePeepholeEPCallbacks(FPM, Level);
2153
2156
2158
2159 // Do a post inline PGO instrumentation and use pass. This is a context
2160 // sensitive PGO pass.
2161 if (PGOOpt) {
2162 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
2163 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
2164 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
2165 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile);
2166 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
2167 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
2168 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
2169 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
2170 }
2171
2172 // Break up allocas
2174
2175 // LTO provides additional opportunities for tailcall elimination due to
2176 // link-time inlining, and visibility of nocapture attribute.
2177 FPM.addPass(
2178 TailCallElimPass(/*UpdateFunctionEntryCount=*/isInstrumentedPGOUse()));
2179
2180 // Run a few AA driver optimizations here and now to cleanup the code.
2181 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM),
2182 PTO.EagerlyInvalidateAnalyses));
2183
2184 MPM.addPass(
2186
2187 // Require the GlobalsAA analysis for the module so we can query it within
2188 // MainFPM.
2191 // Invalidate AAManager so it can be recreated and pick up the newly
2192 // available GlobalsAA.
2193 MPM.addPass(
2195 }
2196
2197 FunctionPassManager MainFPM;
2199 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
2200 /*AllowSpeculation=*/true),
2201 /*USeMemorySSA=*/true));
2202
2203 if (RunNewGVN)
2204 MainFPM.addPass(NewGVNPass());
2205 else
2206 MainFPM.addPass(GVNPass());
2207
2208 // Remove dead memcpy()'s.
2209 MainFPM.addPass(MemCpyOptPass());
2210
2211 // Nuke dead stores.
2212 MainFPM.addPass(DSEPass());
2213 MainFPM.addPass(MoveAutoInitPass());
2215
2216 invokeVectorizerStartEPCallbacks(MainFPM, Level);
2217
2218 LoopPassManager LPM;
2219 if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
2220 LPM.addPass(LoopFlattenPass());
2221 LPM.addPass(IndVarSimplifyPass());
2222 LPM.addPass(LoopDeletionPass());
2223 // FIXME: Add loop interchange.
2224
2225 // Unroll small loops and perform peeling.
2226 LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
2227 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
2228 PTO.ForgetAllSCEVInLoopUnroll));
2229 // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
2230 // *All* loop passes must preserve it, in order to be able to use it.
2231 MainFPM.addPass(
2232 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/false));
2233
2234 MainFPM.addPass(LoopDistributePass());
2235
2236 addVectorPasses(Level, MainFPM, ThinOrFullLTOPhase::FullLTOPostLink);
2237
2238 invokeVectorizerEndEPCallbacks(MainFPM, Level);
2239
2240 // Run the OpenMPOpt CGSCC pass again late.
2243
2244 invokePeepholeEPCallbacks(MainFPM, Level);
2245 MainFPM.addPass(JumpThreadingPass());
2246 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM),
2247 PTO.EagerlyInvalidateAnalyses));
2248
2249 // Lower type metadata and the type.test intrinsic. This pass supports
2250 // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
2251 // to be run at link time if CFI is enabled. This pass does nothing if
2252 // CFI is disabled.
2253 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
2254 // Run a second time to clean up any type tests left behind by WPD for use
2255 // in ICP (which is performed earlier than this in the regular LTO pipeline).
2256 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
2258
2259 // Enable splitting late in the FullLTO post-link pipeline.
2262
2263 // Add late LTO optimization passes.
2264 FunctionPassManager LateFPM;
2265
2266 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
2267 // canonicalization pass that enables other optimizations. As a result,
2268 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
2269 // result too early.
2270 LateFPM.addPass(LoopSinkPass());
2271
2272 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
2273 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
2274 // flattening of blocks.
2275 LateFPM.addPass(DivRemPairsPass());
2276
2277 // Delete basic blocks, which optimization passes may have killed.
2279 .convertSwitchRangeToICmp(true)
2280 .convertSwitchToArithmetic(true)
2281 .hoistCommonInsts(true)
2282 .speculateUnpredictables(true)));
2283 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(LateFPM)));
2284
2285 // Drop bodies of available eternally objects to improve GlobalDCE.
2287
2288 // Now that we have optimized the program, discard unreachable functions.
2289 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2290
2291 if (PTO.MergeFunctions)
2293
2295
2296 if (PTO.CallGraphProfile)
2297 MPM.addPass(CGProfilePass(/*InLTOPostLink=*/true));
2298
2299 MPM.addPass(CoroCleanupPass());
2300
2301 // AllocToken transforms heap allocation calls; this needs to run late after
2302 // other allocation call transformations (such as those in InstCombine).
2303 MPM.addPass(AllocTokenPass());
2304
2306
2307 // Emit annotation remarks.
2309
2310 return MPM;
2311}
2312
2316 assert(Level == OptimizationLevel::O0 &&
2317 "buildO0DefaultPipeline should only be used with O0");
2318
2320
2321 // Perform pseudo probe instrumentation in O0 mode. This is for the
2322 // consistency between different build modes. For example, a LTO build can be
2323 // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
2324 // the postlink will require pseudo probe instrumentation in the prelink.
2325 if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
2327
2328 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
2329 PGOOpt->Action == PGOOptions::IRUse))
2331 MPM,
2332 /*RunProfileGen=*/(PGOOpt->Action == PGOOptions::IRInstr),
2333 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate, PGOOpt->ProfileFile,
2334 PGOOpt->ProfileRemappingFile);
2335
2336 // Instrument function entry and exit before all inlining.
2338 EntryExitInstrumenterPass(/*PostInlining=*/false)));
2339
2341
2342 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
2344
2345 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
2346 // Explicitly disable sample loader inlining and use flattened profile in O0
2347 // pipeline.
2348 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
2349 PGOOpt->ProfileRemappingFile,
2351 /*DisableSampleProfileInlining=*/true,
2352 /*UseFlattenedProfile=*/true));
2353 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
2354 // RequireAnalysisPass for PSI before subsequent non-module passes.
2356 }
2357
2359
2360 // Build a minimal pipeline based on the semantics required by LLVM,
2361 // which is just that always inlining occurs. Further, disable generating
2362 // lifetime intrinsics to avoid enabling further optimizations during
2363 // code generation.
2365 /*InsertLifetimeIntrinsics=*/false));
2366
2367 if (PTO.MergeFunctions)
2369
2370 if (EnableMatrix)
2371 MPM.addPass(
2373
2374 if (!CGSCCOptimizerLateEPCallbacks.empty()) {
2375 CGSCCPassManager CGPM;
2377 if (!CGPM.isEmpty())
2379 }
2380 if (!LateLoopOptimizationsEPCallbacks.empty()) {
2381 LoopPassManager LPM;
2383 if (!LPM.isEmpty()) {
2385 createFunctionToLoopPassAdaptor(std::move(LPM))));
2386 }
2387 }
2388 if (!LoopOptimizerEndEPCallbacks.empty()) {
2389 LoopPassManager LPM;
2391 if (!LPM.isEmpty()) {
2393 createFunctionToLoopPassAdaptor(std::move(LPM))));
2394 }
2395 }
2396 if (!ScalarOptimizerLateEPCallbacks.empty()) {
2399 if (!FPM.isEmpty())
2400 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2401 }
2402
2404
2405 if (!VectorizerStartEPCallbacks.empty()) {
2408 if (!FPM.isEmpty())
2409 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2410 }
2411
2412 if (!VectorizerEndEPCallbacks.empty()) {
2415 if (!FPM.isEmpty())
2416 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2417 }
2418
2420
2421 // AllocToken transforms heap allocation calls; this needs to run late after
2422 // other allocation call transformations (such as those in InstCombine).
2423 if (!isLTOPreLink(Phase))
2424 MPM.addPass(AllocTokenPass());
2425
2427
2428 if (isLTOPreLink(Phase))
2429 addRequiredLTOPreLinkPasses(MPM);
2430
2431 // Emit annotation remarks.
2433
2434 return MPM;
2435}
2436
2438 AAManager AA;
2439
2440 // The order in which these are registered determines their priority when
2441 // being queried.
2442
2443 // Add any target-specific alias analyses that should be run early.
2444 if (TM)
2445 TM->registerEarlyDefaultAliasAnalyses(AA);
2446
2447 // First we register the basic alias analysis that provides the majority of
2448 // per-function local AA logic. This is a stateless, on-demand local set of
2449 // AA techniques.
2450 AA.registerFunctionAnalysis<BasicAA>();
2451
2452 // Next we query fast, specialized alias analyses that wrap IR-embedded
2453 // information about aliasing.
2454 AA.registerFunctionAnalysis<ScopedNoAliasAA>();
2455 AA.registerFunctionAnalysis<TypeBasedAA>();
2456
2457 // Add support for querying global aliasing information when available.
2458 // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
2459 // analysis, all that the `AAManager` can do is query for any *cached*
2460 // results from `GlobalsAA` through a readonly proxy.
2462 AA.registerModuleAnalysis<GlobalsAA>();
2463
2464 // Add target-specific alias analyses.
2465 if (TM)
2466 TM->registerDefaultAliasAnalyses(AA);
2467
2468 return AA;
2469}
2470
2471bool PassBuilder::isInstrumentedPGOUse() const {
2472 return (PGOOpt && PGOOpt->Action == PGOOptions::IRUse) ||
2473 !UseCtxProfile.empty();
2474}
aarch64 falkor hwpf fix Falkor HW Prefetch Fix Late Phase
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AggressiveInstCombiner - Combine expression patterns to form expressions with fewer,...
Provides passes to inlining "always_inline" functions.
This is the interface for LLVM's primary stateless and local alias analysis.
This file provides the interface for LLVM's Call Graph Profile pass.
This header provides classes for managing passes over SCCs of the call graph.
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file provides the interface for a simple, fast CSE pass.
This file provides a pass which clones the current module and runs the provided pass pipeline on the ...
This file provides a pass manager that only runs its passes if the provided marker analysis has been ...
Super simple passes to force specific function attrs from the commandline into the IR for debugging p...
Provides passes for computing function attributes based on interprocedural analyses.
This file provides the interface for LLVM's Global Value Numbering pass which eliminates fully redund...
This is the interface for a simple mod/ref and alias analysis over globals.
AcceleratorCodeSelection - Identify all functions reachable from a kernel, removing those that are un...
This header defines various interfaces for pass management in LLVM.
Interfaces for passes which infer implicit function attributes from the name and signature of functio...
This file provides the primary interface to the instcombine pass.
Defines passes for running instruction simplification across chunks of IR.
This file provides the interface for LLVM's PGO Instrumentation lowering pass.
See the comments on JumpThreadingPass.
static LVOptions Options
Definition LVOptions.cpp:25
This file implements the Loop Fusion pass.
This header defines the LoopLoadEliminationPass object.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
The header file for the LowerConstantIntrinsics pass as used by the new pass manager.
The header file for the LowerExpectIntrinsic pass as used by the new pass manager.
This pass performs merges of loads and stores on both sides of a.
This file provides the interface for LLVM's Global Value Numbering pass.
This header enumerates the LLVM-provided high-level optimization levels.
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
Define option tunables for PGO.
static void addAnnotationRemarksPass(ModulePassManager &MPM)
static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level)
static CoroConditionalWrapper buildCoroWrapper(ThinOrFullLTOPhase Phase)
static bool isLTOPreLink(ThinOrFullLTOPhase Phase)
static bool isLTOPostLink(ThinOrFullLTOPhase Phase)
This file implements relative lookup table converter that converts lookup tables to relative lookup t...
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
This file provides the interface for the pseudo probe implementation for AutoFDO.
This file provides the interface for the sampled PGO loader pass.
This is the interface for a metadata-based scoped no-alias analysis.
This file provides the interface for the pass responsible for both simplifying and canonicalizing the...
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
This is the interface for a metadata-based TBAA.
Defines the virtual file system interface vfs::FileSystem.
A manager for alias analyses.
A module pass that rewrites heap allocations to use token-enabled allocation functions based on vario...
Definition AllocToken.h:36
Inlines functions marked as "always_inline".
Argument promotion pass.
Assign a GUID to functions as metadata.
Analysis pass providing a never-invalidated alias analysis result.
Simple pass that canonicalizes aliases.
A pass that merges duplicate global constants into a single constant.
This class implements a trivial dead store elimination.
Eliminate dead arguments (and return values) from functions.
A pass that transforms external global definitions into declarations.
Pass embeds a copy of the module optimized with the provided pass pipeline into a global variable.
A pass manager to run a set of extra loop passes if the MarkerTy analysis is present.
The core GVN pass object.
Definition GVN.h:128
Pass to remove unused function declarations.
Definition GlobalDCE.h:38
Optimize globals that never have their address taken.
Definition GlobalOpt.h:25
Pass to perform split of global variables.
Definition GlobalSplit.h:26
Analysis pass providing a never-invalidated alias analysis result.
Pass to outline cold regions.
Pass to perform interprocedural constant propagation.
Definition SCCP.h:48
Pass to outline similar regions.
Definition IROutliner.h:444
Run instruction simplification across each instruction in the function.
Instrumentation based profiling lowering pass.
This pass performs 'jump threading', which looks at blocks that have multiple predecessors and multip...
Performs Loop Invariant Code Motion Pass.
Definition LICM.h:66
Loop unroll pass that only does full loop unrolling and peeling.
Performs Loop Idiom Recognize Pass.
Performs Loop Inst Simplify Pass.
A simple loop rotation transformation.
Performs basic CFG simplifications to assist other loop passes.
A pass that does profile-guided sinking of instructions into loops.
Definition LoopSink.h:33
A simple loop rotation transformation.
Loop unroll pass that will support both full and partial unrolling.
Strips MemProf attributes and metadata.
Merge identical functions.
The module inliner pass for the new pass manager.
Module pass, wrapping the inliner pass.
Definition Inliner.h:65
void addModulePass(T Pass)
Add a module pass that runs before the CGSCC passes.
Definition Inliner.h:81
Class to hold module path string table and global value map, and encapsulate methods for operating on...
Simple pass that provides a name to every anonymous globals.
Additional 'norecurse' attribute deduction during postlink LTO phase.
OpenMP optimizations pass.
Definition OpenMPOpt.h:42
static LLVM_ABI const OptimizationLevel O3
Optimize for fast execution as much as possible.
static LLVM_ABI const OptimizationLevel Oz
A very specialized mode that will optimize for code size at any and all costs.
static LLVM_ABI const OptimizationLevel O0
Disable as many optimizations as possible.
static LLVM_ABI const OptimizationLevel Os
Similar to O2 but tries to optimize for small code size instead of fast execution without triggering ...
static LLVM_ABI const OptimizationLevel O2
Optimize for fast execution as much as possible without triggering significant incremental compile ti...
static LLVM_ABI const OptimizationLevel O1
Optimize quickly without destroying debuggability.
The indirect function call promotion pass.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The profile annotation (profile-instr-use) pass for IR based PGO.
The profile size based optimization pass for memory intrinsics.
Pass to remove unused function declarations.
LLVM_ABI void invokeFullLinkTimeOptimizationLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
LLVM_ABI ModuleInlinerWrapperPass buildInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining as well as the inlining-driven cleanups.
LLVM_ABI void invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
LLVM_ABI void invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI AAManager buildDefaultAAPipeline()
Build the default AAManager with the default alias analysis pipeline registered.
LLVM_ABI void invokeCGSCCOptimizerLateEPCallbacks(CGSCCPassManager &CGPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, ThinLTO-targeting default optimization pipeline to a pass manager.
LLVM_ABI void addPGOInstrPassesForO0(ModulePassManager &MPM, bool RunProfileGen, bool IsCS, bool AtomicCounterUpdate, std::string ProfileFile, std::string ProfileRemappingFile)
Add PGOInstrumenation passes for O0 only.
LLVM_ABI void invokeScalarOptimizerLateEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase=ThinOrFullLTOPhase::None)
Build a per-module default optimization pipeline.
LLVM_ABI void invokePipelineStartEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
LLVM_ABI void invokeVectorizerEndEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildO0DefaultPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase=ThinOrFullLTOPhase::None)
Build an O0 pipeline with the minimal semantically required passes.
LLVM_ABI FunctionPassManager buildFunctionSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM function canonicalization and simplification pipeline.
LLVM_ABI void invokePeepholeEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI void invokePipelineEarlySimplificationEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
LLVM_ABI void invokeLoopOptimizerEndEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildLTODefaultPipeline(OptimizationLevel Level, ModuleSummaryIndex *ExportSummary)
Build an LTO default optimization pipeline to a pass manager.
LLVM_ABI ModulePassManager buildModuleInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining with module inliner pass.
LLVM_ABI ModulePassManager buildThinLTODefaultPipeline(OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary)
Build a ThinLTO default optimization pipeline to a pass manager.
LLVM_ABI void invokeLateLoopOptimizationsEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
LLVM_ABI void invokeFullLinkTimeOptimizationEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO, bool EmitSummary)
Build a fat object default optimization pipeline.
LLVM_ABI ModulePassManager buildModuleSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM module canonicalization and simplification pipeline.
LLVM_ABI ModulePassManager buildModuleOptimizationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase LTOPhase)
Construct the core LLVM module optimization pipeline.
LLVM_ABI void invokeOptimizerLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
LLVM_ABI ModulePassManager buildLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, LTO-targeting default optimization pipeline to a pass manager.
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t<!std::is_same_v< PassT, PassManager > > addPass(PassT &&Pass)
bool isEmpty() const
Returns if the pass manager contains any passes.
unsigned LicmMssaNoAccForPromotionCap
Tuning option to disable promotion to scalars in LICM with MemorySSA, if the number of access is too ...
Definition PassBuilder.h:78
bool SLPVectorization
Tuning option to enable/disable slp loop vectorization, set based on opt level.
Definition PassBuilder.h:56
int InlinerThreshold
Tuning option to override the default inliner threshold.
Definition PassBuilder.h:92
bool LoopFusion
Tuning option to enable/disable loop fusion. Its default value is false.
Definition PassBuilder.h:66
bool CallGraphProfile
Tuning option to enable/disable call graph profile.
Definition PassBuilder.h:82
bool MergeFunctions
Tuning option to enable/disable function merging.
Definition PassBuilder.h:89
bool ForgetAllSCEVInLoopUnroll
Tuning option to forget all SCEV loops in LoopUnroll.
Definition PassBuilder.h:70
unsigned LicmMssaOptCap
Tuning option to cap the number of calls to retrive clobbering accesses in MemorySSA,...
Definition PassBuilder.h:74
bool LoopInterleaving
Tuning option to set loop interleaving on/off, set based on opt level.
Definition PassBuilder.h:48
LLVM_ABI PipelineTuningOptions()
Constructor sets pipeline tuning defaults based on cl::opts.
bool LoopUnrolling
Tuning option to enable/disable loop unrolling. Its default value is true.
Definition PassBuilder.h:59
bool LoopInterchange
Tuning option to enable/disable loop interchange.
Definition PassBuilder.h:63
bool LoopVectorization
Tuning option to enable/disable loop vectorization, set based on opt level.
Definition PassBuilder.h:52
Reassociate commutative expressions.
Definition Reassociate.h:74
A pass to do RPO deduction and propagation of function attributes.
This pass performs function-level constant propagation and merging.
Definition SCCP.h:30
The sample profiler data loader pass.
Analysis pass providing a never-invalidated alias analysis result.
This pass transforms loops that contain branches or switches on loop- invariant conditions to have mu...
A pass to simplify and canonicalize the CFG of a function.
Definition SimplifyCFG.h:30
Analysis pass providing a never-invalidated alias analysis result.
Optimize scalar/vector interactions in IR using target cost models.
Interfaces for registering analysis passes, producing common pass manager configurations,...
Abstract Attribute helper functions.
Definition Attributor.h:165
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
@ Assume
Do not drop type tests (default).
@ All
Drop only llvm.assumes using type test value.
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
LLVM_ABI cl::opt< bool > EnableKnowledgeRetention
static cl::opt< bool > RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden, cl::desc("Run the NewGVN pass"))
static cl::opt< bool > DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden, cl::desc("Disable pre-instrumentation inliner"))
static cl::opt< bool > EnableJumpTableToSwitch("enable-jump-table-to-switch", cl::desc("Enable JumpTableToSwitch pass (default = off)"))
static cl::opt< bool > PerformMandatoryInliningsFirst("mandatory-inlining-first", cl::init(false), cl::Hidden, cl::desc("Perform mandatory inlinings module-wide, before performing " "inlining"))
static cl::opt< bool > RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Run Partial inlining pass"))
static cl::opt< bool > EnableGVNSink("enable-gvn-sink", cl::desc("Enable the GVN sinking pass (default = off)"))
static cl::opt< bool > EnableLoopHeaderDuplication("enable-loop-header-duplication", cl::init(false), cl::Hidden, cl::desc("Enable loop header duplication at any optimization level"))
static cl::opt< bool > EnableModuleInliner("enable-module-inliner", cl::init(false), cl::Hidden, cl::desc("Enable module inliner"))
static cl::opt< bool > EnableEagerlyInvalidateAnalyses("eagerly-invalidate-analyses", cl::init(true), cl::Hidden, cl::desc("Eagerly invalidate more analyses in default pipelines"))
static cl::opt< bool > EnableMatrix("enable-matrix", cl::init(false), cl::Hidden, cl::desc("Enable lowering of the matrix intrinsics"))
ModuleToFunctionPassAdaptor createModuleToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
cl::opt< std::string > UseCtxProfile("use-ctx-profile", cl::init(""), cl::Hidden, cl::desc("Use the specified contextual profile file"))
static cl::opt< bool > EnableSampledInstr("enable-sampled-instrumentation", cl::init(false), cl::Hidden, cl::desc("Enable profile instrumentation sampling (default = off)"))
static cl::opt< bool > EnableLoopFlatten("enable-loop-flatten", cl::init(false), cl::Hidden, cl::desc("Enable the LoopFlatten Pass"))
static cl::opt< InliningAdvisorMode > UseInlineAdvisor("enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden, cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"), cl::values(clEnumValN(InliningAdvisorMode::Default, "default", "Heuristics-based inliner version"), clEnumValN(InliningAdvisorMode::Development, "development", "Use development mode (runtime-loadable model)"), clEnumValN(InliningAdvisorMode::Release, "release", "Use release mode (AOT-compiled model)")))
PassManager< LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &, CGSCCUpdateResult & > CGSCCPassManager
The CGSCC pass manager.
static cl::opt< bool > EnableUnrollAndJam("enable-unroll-and-jam", cl::init(false), cl::Hidden, cl::desc("Enable Unroll And Jam Pass"))
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition Pass.h:77
@ FullLTOPreLink
Full LTO prelink phase.
Definition Pass.h:85
@ ThinLTOPostLink
ThinLTO postlink (backend compile) phase.
Definition Pass.h:83
@ None
No LTO/ThinLTO behavior needed.
Definition Pass.h:79
@ FullLTOPostLink
Full LTO postlink (backend compile) phase.
Definition Pass.h:87
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
Definition Pass.h:81
PassManager< Loop, LoopAnalysisManager, LoopStandardAnalysisResults &, LPMUpdater & > LoopPassManager
The Loop pass manager.
static cl::opt< bool > EnableConstraintElimination("enable-constraint-elimination", cl::init(true), cl::Hidden, cl::desc("Enable pass to eliminate conditions based on linear constraints"))
ModuleToPostOrderCGSCCPassAdaptor createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT &&Pass)
A function to deduce a function pass type and wrap it in the templated adaptor.
static cl::opt< bool > EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true), cl::Hidden, cl::desc("Enable inline deferral during PGO"))
Flag to enable inline deferral during PGO.
FunctionToLoopPassAdaptor createFunctionToLoopPassAdaptor(LoopPassT &&Pass, bool UseMemorySSA=false)
A function to deduce a loop pass type and wrap it in the templated adaptor.
CGSCCToFunctionPassAdaptor createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false, bool NoRerun=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
cl::opt< bool > ForgetSCEVInLoopUnroll
PassManager< Module > ModulePassManager
Convenience typedef for a pass manager over modules.
static cl::opt< bool > EnablePostPGOLoopRotation("enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden, cl::desc("Run the loop rotation transformation after PGO instrumentation"))
LLVM_ABI bool AreStatisticsEnabled()
Check if statistics are enabled.
static cl::opt< std::string > InstrumentColdFuncOnlyPath("instrument-cold-function-only-path", cl::init(""), cl::desc("File path for cold function only instrumentation(requires use " "with --pgo-instrument-cold-function-only)"), cl::Hidden)
static cl::opt< bool > EnableGlobalAnalyses("enable-global-analyses", cl::init(true), cl::Hidden, cl::desc("Enable inter-procedural analyses"))
static cl::opt< bool > EnableDFAJumpThreading("enable-dfa-jump-thread", cl::desc("Enable DFA jump threading"), cl::init(false), cl::Hidden)
static cl::opt< bool > FlattenedProfileUsed("flattened-profile-used", cl::init(false), cl::Hidden, cl::desc("Indicate the sample profile being used is flattened, i.e., " "no inline hierarchy exists in the profile"))
static cl::opt< bool > ExtraVectorizerPasses("extra-vectorizer-passes", cl::init(false), cl::Hidden, cl::desc("Run cleanup optimization passes after vectorization"))
static cl::opt< bool > EnableHotColdSplit("hot-cold-split", cl::desc("Enable hot-cold splitting pass"))
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
PassManager< Function > FunctionPassManager
Convenience typedef for a pass manager over functions.
LLVM_ABI InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
cl::opt< bool > PGOInstrumentColdFunctionOnly
static cl::opt< bool > EnableLoopInterchange("enable-loopinterchange", cl::init(false), cl::Hidden, cl::desc("Enable the LoopInterchange Pass"))
static cl::opt< bool > EnableCHR("enable-chr", cl::init(true), cl::Hidden, cl::desc("Enable control height reduction optimization (CHR)"))
static cl::opt< bool > EnableMergeFunctions("enable-merge-functions", cl::init(false), cl::Hidden, cl::desc("Enable function merging as part of the optimization pipeline"))
static cl::opt< bool > EnableDevirtualizeSpeculatively("enable-devirtualize-speculatively", cl::desc("Enable speculative devirtualization optimization"), cl::init(false))
static cl::opt< bool > EnableGVNHoist("enable-gvn-hoist", cl::desc("Enable the GVN hoisting pass (default = off)"))
cl::opt< unsigned > SetLicmMssaNoAccForPromotionCap
static cl::opt< bool > EnableIROutliner("ir-outliner", cl::init(false), cl::Hidden, cl::desc("Enable ir outliner pass"))
static cl::opt< AttributorRunOption > AttributorRun("attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE), cl::desc("Enable the attributor inter-procedural deduction pass"), cl::values(clEnumValN(AttributorRunOption::ALL, "all", "enable all attributor runs"), clEnumValN(AttributorRunOption::MODULE, "module", "enable module-wide attributor runs"), clEnumValN(AttributorRunOption::CGSCC, "cgscc", "enable call graph SCC attributor runs"), clEnumValN(AttributorRunOption::NONE, "none", "disable attributor runs")))
static cl::opt< int > PreInlineThreshold("preinline-threshold", cl::Hidden, cl::init(75), cl::desc("Control the amount of inlining in pre-instrumentation inliner " "(default = 75)"))
static cl::opt< bool > UseLoopVersioningLICM("enable-loop-versioning-licm", cl::init(false), cl::Hidden, cl::desc("Enable the experimental Loop Versioning LICM pass"))
cl::opt< unsigned > MaxDevirtIterations("max-devirt-iterations", cl::ReallyHidden, cl::init(4))
cl::opt< unsigned > SetLicmMssaOptCap
A DCE pass that assumes instructions are dead until proven otherwise.
Definition ADCE.h:31
Pass to convert @llvm.global.annotations to !annotation metadata.
This pass attempts to minimize the number of assume without loosing any information.
Hoist/decompose integer division and remainder instructions to enable CFG improvements and better cod...
Definition DivRemPairs.h:23
A simple and fast domtree-based CSE pass.
Definition EarlyCSE.h:31
Pass which forces specific function attributes into the IR, primarily as a debugging tool.
Statistics pass for the FunctionPropertiesAnalysis results.
A simple and fast domtree-based GVN pass to hoist common expressions from sibling branches.
Definition GVN.h:415
Uses an "inverted" value numbering to decide the similarity of expressions and sinks similar expressi...
Definition GVN.h:422
A set of parameters to control various transforms performed by IPSCCP pass.
Definition SCCP.h:35
A pass which infers function attributes from the names and signatures of function declarations in a m...
Provides context on when an inline advisor is constructed in the pipeline (e.g., link phase,...
Thresholds to tune inline cost analysis.
Definition InlineCost.h:207
std::optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
Definition InlineCost.h:224
int DefaultThreshold
The default threshold to start with for a callee.
Definition InlineCost.h:209
std::optional< bool > EnableDeferral
Indicate whether we should allow inline deferral.
Definition InlineCost.h:237
std::optional< int > HintThreshold
Threshold to use for callees with inline hint.
Definition InlineCost.h:212
Options for the frontend instrumentation based profiling pass.
A no-op pass template which simply forces a specific analysis result to be invalidated.
Pass to forward loads in a loop around the backedge to subsequent iterations.
A set of parameters used to control various transforms performed by the LoopUnroll pass.
The LoopVectorize Pass.
Computes function attributes in post-order over the call graph.
A utility pass template to force an analysis result to be available.