LLVM 22.0.0git
PassBuilderPipelines.cpp
Go to the documentation of this file.
1//===- Construction of pass pipelines -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// This file provides the implementation of the PassBuilder based on our
11/// static pass registry as well as related functionality. It also provides
12/// helpers to aid in analyzing, debugging, and testing passes and pass
13/// pipelines.
14///
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/Statistic.h"
27#include "llvm/IR/PassManager.h"
28#include "llvm/Pass.h"
150
151using namespace llvm;
152
153namespace llvm {
154
156 "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
157 cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
159 "Heuristics-based inliner version"),
161 "Use development mode (runtime-loadable model)"),
163 "Use release mode (AOT-compiled model)")));
164
165/// Flag to enable inline deferral during PGO.
166static cl::opt<bool>
167 EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
169 cl::desc("Enable inline deferral during PGO"));
170
171static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
172 cl::init(false), cl::Hidden,
173 cl::desc("Enable module inliner"));
174
176 "mandatory-inlining-first", cl::init(false), cl::Hidden,
177 cl::desc("Perform mandatory inlinings module-wide, before performing "
178 "inlining"));
179
181 "eagerly-invalidate-analyses", cl::init(true), cl::Hidden,
182 cl::desc("Eagerly invalidate more analyses in default pipelines"));
183
185 "enable-merge-functions", cl::init(false), cl::Hidden,
186 cl::desc("Enable function merging as part of the optimization pipeline"));
187
189 "enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden,
190 cl::desc("Run the loop rotation transformation after PGO instrumentation"));
191
193 "enable-global-analyses", cl::init(true), cl::Hidden,
194 cl::desc("Enable inter-procedural analyses"));
195
196static cl::opt<bool> RunPartialInlining("enable-partial-inlining",
197 cl::init(false), cl::Hidden,
198 cl::desc("Run Partial inlining pass"));
199
201 "extra-vectorizer-passes", cl::init(false), cl::Hidden,
202 cl::desc("Run cleanup optimization passes after vectorization"));
203
204static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
205 cl::desc("Run the NewGVN pass"));
206
207static cl::opt<bool>
208 EnableLoopInterchange("enable-loopinterchange", cl::init(false), cl::Hidden,
209 cl::desc("Enable the LoopInterchange Pass"));
210
211static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam",
212 cl::init(false), cl::Hidden,
213 cl::desc("Enable Unroll And Jam Pass"));
214
215static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
217 cl::desc("Enable the LoopFlatten Pass"));
218
219// Experimentally allow loop header duplication. This should allow for better
220// optimization at Oz, since loop-idiom recognition can then recognize things
221// like memcpy. If this ends up being useful for many targets, we should drop
222// this flag and make a code generation option that can be controlled
223// independent of the opt level and exposed through the frontend.
225 "enable-loop-header-duplication", cl::init(false), cl::Hidden,
226 cl::desc("Enable loop header duplication at any optimization level"));
227
228static cl::opt<bool>
229 EnableDFAJumpThreading("enable-dfa-jump-thread",
230 cl::desc("Enable DFA jump threading"),
231 cl::init(false), cl::Hidden);
232
233static cl::opt<bool>
234 EnableHotColdSplit("hot-cold-split",
235 cl::desc("Enable hot-cold splitting pass"));
236
237static cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(false),
239 cl::desc("Enable ir outliner pass"));
240
241static cl::opt<bool>
242 DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden,
243 cl::desc("Disable pre-instrumentation inliner"));
244
246 "preinline-threshold", cl::Hidden, cl::init(75),
247 cl::desc("Control the amount of inlining in pre-instrumentation inliner "
248 "(default = 75)"));
249
250static cl::opt<bool>
251 EnableGVNHoist("enable-gvn-hoist",
252 cl::desc("Enable the GVN hoisting pass (default = off)"));
253
254static cl::opt<bool>
255 EnableGVNSink("enable-gvn-sink",
256 cl::desc("Enable the GVN sinking pass (default = off)"));
257
259 "enable-jump-table-to-switch",
260 cl::desc("Enable JumpTableToSwitch pass (default = off)"));
261
262// This option is used in simplifying testing SampleFDO optimizations for
263// profile loading.
264static cl::opt<bool>
265 EnableCHR("enable-chr", cl::init(true), cl::Hidden,
266 cl::desc("Enable control height reduction optimization (CHR)"));
267
269 "flattened-profile-used", cl::init(false), cl::Hidden,
270 cl::desc("Indicate the sample profile being used is flattened, i.e., "
271 "no inline hierarchy exists in the profile"));
272
273static cl::opt<bool>
274 EnableMatrix("enable-matrix", cl::init(false), cl::Hidden,
275 cl::desc("Enable lowering of the matrix intrinsics"));
276
278 "enable-constraint-elimination", cl::init(true), cl::Hidden,
279 cl::desc(
280 "Enable pass to eliminate conditions based on linear constraints"));
281
283 "attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE),
284 cl::desc("Enable the attributor inter-procedural deduction pass"),
286 "enable all attributor runs"),
288 "enable module-wide attributor runs"),
290 "enable call graph SCC attributor runs"),
291 clEnumValN(AttributorRunOption::NONE, "none",
292 "disable attributor runs")));
293
295 "enable-sampled-instrumentation", cl::init(false), cl::Hidden,
296 cl::desc("Enable profile instrumentation sampling (default = off)"));
298 "enable-loop-versioning-licm", cl::init(false), cl::Hidden,
299 cl::desc("Enable the experimental Loop Versioning LICM pass"));
300
302 "instrument-cold-function-only-path", cl::init(""),
303 cl::desc("File path for cold function only instrumentation(requires use "
304 "with --pgo-instrument-cold-function-only)"),
305 cl::Hidden);
306
309
311} // namespace llvm
312
329
330namespace llvm {
332} // namespace llvm
333
335 OptimizationLevel Level) {
336 for (auto &C : PeepholeEPCallbacks)
337 C(FPM, Level);
338}
341 for (auto &C : LateLoopOptimizationsEPCallbacks)
342 C(LPM, Level);
343}
345 OptimizationLevel Level) {
346 for (auto &C : LoopOptimizerEndEPCallbacks)
347 C(LPM, Level);
348}
351 for (auto &C : ScalarOptimizerLateEPCallbacks)
352 C(FPM, Level);
353}
355 OptimizationLevel Level) {
356 for (auto &C : CGSCCOptimizerLateEPCallbacks)
357 C(CGPM, Level);
358}
360 OptimizationLevel Level) {
361 for (auto &C : VectorizerStartEPCallbacks)
362 C(FPM, Level);
363}
365 OptimizationLevel Level) {
366 for (auto &C : VectorizerEndEPCallbacks)
367 C(FPM, Level);
368}
370 OptimizationLevel Level,
372 for (auto &C : OptimizerEarlyEPCallbacks)
373 C(MPM, Level, Phase);
374}
376 OptimizationLevel Level,
378 for (auto &C : OptimizerLastEPCallbacks)
379 C(MPM, Level, Phase);
380}
383 for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks)
384 C(MPM, Level);
385}
388 for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
389 C(MPM, Level);
390}
392 OptimizationLevel Level) {
393 for (auto &C : PipelineStartEPCallbacks)
394 C(MPM, Level);
395}
398 for (auto &C : PipelineEarlySimplificationEPCallbacks)
399 C(MPM, Level, Phase);
400}
401
402// Helper to add AnnotationRemarksPass.
406
407// Helper to check if the current compilation phase is preparing for LTO
412
413// Helper to check if the current compilation phase is LTO backend
418
419// Helper to wrap conditionally Coro passes.
421 // TODO: Skip passes according to Phase.
422 ModulePassManager CoroPM;
423 CoroPM.addPass(CoroEarlyPass());
424 CGSCCPassManager CGPM;
425 CGPM.addPass(CoroSplitPass());
426 CoroPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
427 CoroPM.addPass(CoroCleanupPass());
428 CoroPM.addPass(GlobalDCEPass());
429 return CoroConditionalWrapper(std::move(CoroPM));
430}
431
432// TODO: Investigate the cost/benefit of tail call elimination on debugging.
434PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
436
438
440 FPM.addPass(CountVisitsPass());
441
442 // Form SSA out of local memory accesses after breaking apart aggregates into
443 // scalars.
444 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
445
446 // Catch trivial redundancies
447 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
448
449 // Hoisting of scalars and load expressions.
450 FPM.addPass(
451 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
452 FPM.addPass(InstCombinePass());
453
454 FPM.addPass(LibCallsShrinkWrapPass());
455
456 invokePeepholeEPCallbacks(FPM, Level);
457
458 FPM.addPass(
459 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
460
461 // Form canonically associated expression trees, and simplify the trees using
462 // basic mathematical properties. For example, this will form (nearly)
463 // minimal multiplication trees.
464 FPM.addPass(ReassociatePass());
465
466 // Add the primary loop simplification pipeline.
467 // FIXME: Currently this is split into two loop pass pipelines because we run
468 // some function passes in between them. These can and should be removed
469 // and/or replaced by scheduling the loop pass equivalents in the correct
470 // positions. But those equivalent passes aren't powerful enough yet.
471 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
472 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
473 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
474 // `LoopInstSimplify`.
475 LoopPassManager LPM1, LPM2;
476
477 // Simplify the loop body. We do this initially to clean up after other loop
478 // passes run, either when iterating on a loop or on inner loops with
479 // implications on the outer loop.
480 LPM1.addPass(LoopInstSimplifyPass());
481 LPM1.addPass(LoopSimplifyCFGPass());
482
483 // Try to remove as much code from the loop header as possible,
484 // to reduce amount of IR that will have to be duplicated. However,
485 // do not perform speculative hoisting the first time as LICM
486 // will destroy metadata that may not need to be destroyed if run
487 // after loop rotation.
488 // TODO: Investigate promotion cap for O1.
489 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
490 /*AllowSpeculation=*/false));
491
492 LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true,
494 // TODO: Investigate promotion cap for O1.
495 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
496 /*AllowSpeculation=*/true));
497 LPM1.addPass(SimpleLoopUnswitchPass());
499 LPM1.addPass(LoopFlattenPass());
500
501 LPM2.addPass(LoopIdiomRecognizePass());
502 LPM2.addPass(IndVarSimplifyPass());
503
505
506 LPM2.addPass(LoopDeletionPass());
507
508 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
509 // because it changes IR to makes profile annotation in back compile
510 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
511 // attributes so we need to make sure and allow the full unroll pass to pay
512 // attention to it.
513 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
514 PGOOpt->Action != PGOOptions::SampleUse)
515 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
516 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
517 PTO.ForgetAllSCEVInLoopUnroll));
518
520
521 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
522 /*UseMemorySSA=*/true,
523 /*UseBlockFrequencyInfo=*/true));
524 FPM.addPass(
525 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
526 FPM.addPass(InstCombinePass());
527 // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
528 // *All* loop passes must preserve it, in order to be able to use it.
529 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
530 /*UseMemorySSA=*/false,
531 /*UseBlockFrequencyInfo=*/false));
532
533 // Delete small array after loop unroll.
534 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
535
536 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
537 FPM.addPass(MemCpyOptPass());
538
539 // Sparse conditional constant propagation.
540 // FIXME: It isn't clear why we do this *after* loop passes rather than
541 // before...
542 FPM.addPass(SCCPPass());
543
544 // Delete dead bit computations (instcombine runs after to fold away the dead
545 // computations, and then ADCE will run later to exploit any new DCE
546 // opportunities that creates).
547 FPM.addPass(BDCEPass());
548
549 // Run instcombine after redundancy and dead bit elimination to exploit
550 // opportunities opened up by them.
551 FPM.addPass(InstCombinePass());
552 invokePeepholeEPCallbacks(FPM, Level);
553
554 FPM.addPass(CoroElidePass());
555
557
558 // Finally, do an expensive DCE pass to catch all the dead code exposed by
559 // the simplifications and basic cleanup after all the simplifications.
560 // TODO: Investigate if this is too expensive.
561 FPM.addPass(ADCEPass());
562 FPM.addPass(
563 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
564 FPM.addPass(InstCombinePass());
565 invokePeepholeEPCallbacks(FPM, Level);
566
567 return FPM;
568}
569
573 assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
574
575 // The O1 pipeline has a separate pipeline creation function to simplify
576 // construction readability.
577 if (Level.getSpeedupLevel() == 1)
578 return buildO1FunctionSimplificationPipeline(Level, Phase);
579
581
584
585 // Form SSA out of local memory accesses after breaking apart aggregates into
586 // scalars.
588
589 // Catch trivial redundancies
590 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
593
594 // Hoisting of scalars and load expressions.
595 if (EnableGVNHoist)
596 FPM.addPass(GVNHoistPass());
597
598 // Global value numbering based sinking.
599 if (EnableGVNSink) {
600 FPM.addPass(GVNSinkPass());
601 FPM.addPass(
602 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
603 }
604
605 // Speculative execution if the target has divergent branches; otherwise nop.
606 FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
607
608 // Optimize based on known information about branches, and cleanup afterward.
611
612 // Jump table to switch conversion.
617
618 FPM.addPass(
619 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
622
623 if (!Level.isOptimizingForSize())
625
626 invokePeepholeEPCallbacks(FPM, Level);
627
628 // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
629 // using the size value profile. Don't perform this when optimizing for size.
630 if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
631 !Level.isOptimizingForSize())
633
634 FPM.addPass(TailCallElimPass(/*UpdateFunctionEntryCount=*/
635 isInstrumentedPGOUse()));
636 FPM.addPass(
637 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
638
639 // Form canonically associated expression trees, and simplify the trees using
640 // basic mathematical properties. For example, this will form (nearly)
641 // minimal multiplication trees.
643
646
647 // Add the primary loop simplification pipeline.
648 // FIXME: Currently this is split into two loop pass pipelines because we run
649 // some function passes in between them. These can and should be removed
650 // and/or replaced by scheduling the loop pass equivalents in the correct
651 // positions. But those equivalent passes aren't powerful enough yet.
652 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
653 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
654 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
655 // `LoopInstSimplify`.
656 LoopPassManager LPM1, LPM2;
657
658 // Simplify the loop body. We do this initially to clean up after other loop
659 // passes run, either when iterating on a loop or on inner loops with
660 // implications on the outer loop.
661 LPM1.addPass(LoopInstSimplifyPass());
662 LPM1.addPass(LoopSimplifyCFGPass());
663
664 // Try to remove as much code from the loop header as possible,
665 // to reduce amount of IR that will have to be duplicated. However,
666 // do not perform speculative hoisting the first time as LICM
667 // will destroy metadata that may not need to be destroyed if run
668 // after loop rotation.
669 // TODO: Investigate promotion cap for O1.
670 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
671 /*AllowSpeculation=*/false));
672
673 // Disable header duplication in loop rotation at -Oz.
675 Level != OptimizationLevel::Oz,
677 // TODO: Investigate promotion cap for O1.
678 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
679 /*AllowSpeculation=*/true));
680 LPM1.addPass(
681 SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3));
683 LPM1.addPass(LoopFlattenPass());
684
685 LPM2.addPass(LoopIdiomRecognizePass());
686 LPM2.addPass(IndVarSimplifyPass());
687
688 {
690 ExtraPasses.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
692 LPM2.addPass(std::move(ExtraPasses));
693 }
694
696
697 LPM2.addPass(LoopDeletionPass());
698
699 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
700 // because it changes IR to makes profile annotation in back compile
701 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
702 // attributes so we need to make sure and allow the full unroll pass to pay
703 // attention to it.
704 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
705 PGOOpt->Action != PGOOptions::SampleUse)
706 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
707 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
708 PTO.ForgetAllSCEVInLoopUnroll));
709
711
712 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
713 /*UseMemorySSA=*/true,
714 /*UseBlockFrequencyInfo=*/true));
715 FPM.addPass(
716 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
718 // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
719 // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
720 // *All* loop passes must preserve it, in order to be able to use it.
721 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
722 /*UseMemorySSA=*/false,
723 /*UseBlockFrequencyInfo=*/false));
724
725 // Delete small array after loop unroll.
727
728 // Try vectorization/scalarization transforms that are both improvements
729 // themselves and can allow further folds with GVN and InstCombine.
730 FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
731
732 // Eliminate redundancies.
734 if (RunNewGVN)
735 FPM.addPass(NewGVNPass());
736 else
737 FPM.addPass(GVNPass());
738
739 // Sparse conditional constant propagation.
740 // FIXME: It isn't clear why we do this *after* loop passes rather than
741 // before...
742 FPM.addPass(SCCPPass());
743
744 // Delete dead bit computations (instcombine runs after to fold away the dead
745 // computations, and then ADCE will run later to exploit any new DCE
746 // opportunities that creates).
747 FPM.addPass(BDCEPass());
748
749 // Run instcombine after redundancy and dead bit elimination to exploit
750 // opportunities opened up by them.
752 invokePeepholeEPCallbacks(FPM, Level);
753
754 // Re-consider control flow based optimizations after redundancy elimination,
755 // redo DCE, etc.
758
761
762 // Finally, do an expensive DCE pass to catch all the dead code exposed by
763 // the simplifications and basic cleanup after all the simplifications.
764 // TODO: Investigate if this is too expensive.
765 FPM.addPass(ADCEPass());
766
767 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
768 FPM.addPass(MemCpyOptPass());
769
770 FPM.addPass(DSEPass());
772
774 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
775 /*AllowSpeculation=*/true),
776 /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
777
778 FPM.addPass(CoroElidePass());
779
781
783 .convertSwitchRangeToICmp(true)
784 .convertSwitchToArithmetic(true)
785 .hoistCommonInsts(true)
786 .sinkCommonInsts(true)));
788 invokePeepholeEPCallbacks(FPM, Level);
789
790 return FPM;
791}
792
793void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
796}
797
798void PassBuilder::addPreInlinerPasses(ModulePassManager &MPM,
799 OptimizationLevel Level,
800 ThinOrFullLTOPhase LTOPhase) {
801 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
803 return;
804 InlineParams IP;
805
807
808 // FIXME: The hint threshold has the same value used by the regular inliner
809 // when not optimzing for size. This should probably be lowered after
810 // performance testing.
811 // FIXME: this comment is cargo culted from the old pass manager, revisit).
812 IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
814 IP, /* MandatoryFirst */ true,
816 CGSCCPassManager &CGPipeline = MIWP.getPM();
817
819 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
820 FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
821 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
822 true))); // Merge & remove basic blocks.
823 FPM.addPass(InstCombinePass()); // Combine silly sequences.
824 invokePeepholeEPCallbacks(FPM, Level);
825
826 CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
827 std::move(FPM), PTO.EagerlyInvalidateAnalyses));
828
829 MPM.addPass(std::move(MIWP));
830
831 // Delete anything that is now dead to make sure that we don't instrument
832 // dead code. Instrumentation can end up keeping dead code around and
833 // dramatically increase code size.
834 MPM.addPass(GlobalDCEPass());
835}
836
837void PassBuilder::addPostPGOLoopRotation(ModulePassManager &MPM,
838 OptimizationLevel Level) {
840 // Disable header duplication in loop rotation at -Oz.
843 LoopRotatePass(EnableLoopHeaderDuplication ||
844 Level != OptimizationLevel::Oz),
845 /*UseMemorySSA=*/false,
846 /*UseBlockFrequencyInfo=*/false),
847 PTO.EagerlyInvalidateAnalyses));
848 }
849}
850
851void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
852 OptimizationLevel Level, bool RunProfileGen,
853 bool IsCS, bool AtomicCounterUpdate,
854 std::string ProfileFile,
855 std::string ProfileRemappingFile) {
856 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
857
858 if (!RunProfileGen) {
859 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
860 MPM.addPass(
861 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
862 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
863 // RequireAnalysisPass for PSI before subsequent non-module passes.
864 MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
865 return;
866 }
867
868 // Perform PGO instrumentation.
869 MPM.addPass(PGOInstrumentationGen(IsCS ? PGOInstrumentationType::CSFDO
871
872 addPostPGOLoopRotation(MPM, Level);
873 // Add the profile lowering pass.
874 InstrProfOptions Options;
875 if (!ProfileFile.empty())
876 Options.InstrProfileOutput = ProfileFile;
877 // Do counter promotion at Level greater than O0.
878 Options.DoCounterPromotion = true;
879 Options.UseBFIInPromotion = IsCS;
880 if (EnableSampledInstr) {
881 Options.Sampling = true;
882 // With sampling, there is little beneifit to enable counter promotion.
883 // But note that sampling does work with counter promotion.
884 Options.DoCounterPromotion = false;
885 }
886 Options.Atomic = AtomicCounterUpdate;
887 MPM.addPass(InstrProfilingLoweringPass(Options, IsCS));
888}
889
891 bool RunProfileGen, bool IsCS,
892 bool AtomicCounterUpdate,
893 std::string ProfileFile,
894 std::string ProfileRemappingFile) {
895 if (!RunProfileGen) {
896 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
897 MPM.addPass(
898 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
899 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
900 // RequireAnalysisPass for PSI before subsequent non-module passes.
902 return;
903 }
904
905 // Perform PGO instrumentation.
908 // Add the profile lowering pass.
910 if (!ProfileFile.empty())
911 Options.InstrProfileOutput = ProfileFile;
912 // Do not do counter promotion at O0.
913 Options.DoCounterPromotion = false;
914 Options.UseBFIInPromotion = IsCS;
915 Options.Atomic = AtomicCounterUpdate;
917}
918
920 return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
921}
922
926 InlineParams IP;
927 if (PTO.InlinerThreshold == -1)
928 IP = getInlineParamsFromOptLevel(Level);
929 else
930 IP = getInlineParams(PTO.InlinerThreshold);
931 // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO,
932 // set hot-caller threshold to 0 to disable hot
933 // callsite inline (as much as possible [1]) because it makes
934 // profile annotation in the backend inaccurate.
935 //
936 // [1] Note the cost of a function could be below zero due to erased
937 // prologue / epilogue.
938 if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
940
941 if (PGOOpt)
943
947
948 // Require the GlobalsAA analysis for the module so we can query it within
949 // the CGSCC pipeline.
951 MIWP.addModulePass(RequireAnalysisPass<GlobalsAA, Module>());
952 // Invalidate AAManager so it can be recreated and pick up the newly
953 // available GlobalsAA.
954 MIWP.addModulePass(
956 }
957
958 // Require the ProfileSummaryAnalysis for the module so we can query it within
959 // the inliner pass.
961
962 // Now begin the main postorder CGSCC pipeline.
963 // FIXME: The current CGSCC pipeline has its origins in the legacy pass
964 // manager and trying to emulate its precise behavior. Much of this doesn't
965 // make a lot of sense and we should revisit the core CGSCC structure.
966 CGSCCPassManager &MainCGPipeline = MIWP.getPM();
967
968 // Note: historically, the PruneEH pass was run first to deduce nounwind and
969 // generally clean up exception handling overhead. It isn't clear this is
970 // valuable as the inliner doesn't currently care whether it is inlining an
971 // invoke or a call.
972
974 MainCGPipeline.addPass(AttributorCGSCCPass());
975
976 // Deduce function attributes. We do another run of this after the function
977 // simplification pipeline, so this only needs to run when it could affect the
978 // function simplification pipeline, which is only the case with recursive
979 // functions.
980 MainCGPipeline.addPass(PostOrderFunctionAttrsPass(/*SkipNonRecursive*/ true));
981
982 // When at O3 add argument promotion to the pass pipeline.
983 // FIXME: It isn't at all clear why this should be limited to O3.
984 if (Level == OptimizationLevel::O3)
985 MainCGPipeline.addPass(ArgumentPromotionPass());
986
987 // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
988 // there are no OpenMP runtime calls present in the module.
989 if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
990 MainCGPipeline.addPass(OpenMPOptCGSCCPass(Phase));
991
992 invokeCGSCCOptimizerLateEPCallbacks(MainCGPipeline, Level);
993
994 // Add the core function simplification pipeline nested inside the
995 // CGSCC walk.
998 PTO.EagerlyInvalidateAnalyses, /*NoRerun=*/true));
999
1000 // Finally, deduce any function attributes based on the fully simplified
1001 // function.
1002 MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
1003
1004 // Mark that the function is fully simplified and that it shouldn't be
1005 // simplified again if we somehow revisit it due to CGSCC mutations unless
1006 // it's been modified since.
1009
1011 MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
1012 MainCGPipeline.addPass(CoroAnnotationElidePass());
1013 }
1014
1015 // Make sure we don't affect potential future NoRerun CGSCC adaptors.
1016 MIWP.addLateModulePass(createModuleToFunctionPassAdaptor(
1018
1019 return MIWP;
1020}
1021
1026
1028 // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO,
1029 // set hot-caller threshold to 0 to disable hot
1030 // callsite inline (as much as possible [1]) because it makes
1031 // profile annotation in the backend inaccurate.
1032 //
1033 // [1] Note the cost of a function could be below zero due to erased
1034 // prologue / epilogue.
1035 if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1036 IP.HotCallSiteThreshold = 0;
1037
1038 if (PGOOpt)
1040
1041 // The inline deferral logic is used to avoid losing some
1042 // inlining chance in future. It is helpful in SCC inliner, in which
1043 // inlining is processed in bottom-up order.
1044 // While in module inliner, the inlining order is a priority-based order
1045 // by default. The inline deferral is unnecessary there. So we disable the
1046 // inline deferral logic in module inliner.
1047 IP.EnableDeferral = false;
1048
1051 MPM.addPass(GlobalOptPass());
1052 MPM.addPass(GlobalDCEPass());
1053 MPM.addPass(PGOCtxProfFlatteningPass(/*IsPreThinlink=*/false));
1054 }
1055
1058 PTO.EagerlyInvalidateAnalyses));
1059
1063 MPM.addPass(
1065 }
1066
1067 return MPM;
1068}
1069
1073 assert(Level != OptimizationLevel::O0 &&
1074 "Should not be used for O0 pipeline");
1075
1077 "FullLTOPostLink shouldn't call buildModuleSimplificationPipeline!");
1078
1080
1081 // Place pseudo probe instrumentation as the first pass of the pipeline to
1082 // minimize the impact of optimization changes.
1083 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1086
1087 bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
1088
1089 // In ThinLTO mode, when flattened profile is used, all the available
1090 // profile information will be annotated in PreLink phase so there is
1091 // no need to load the profile again in PostLink.
1092 bool LoadSampleProfile =
1093 HasSampleProfile &&
1095
1096 // During the ThinLTO backend phase we perform early indirect call promotion
1097 // here, before globalopt. Otherwise imported available_externally functions
1098 // look unreferenced and are removed. If we are going to load the sample
1099 // profile then defer until later.
1100 // TODO: See if we can move later and consolidate with the location where
1101 // we perform ICP when we are loading a sample profile.
1102 // TODO: We pass HasSampleProfile (whether there was a sample profile file
1103 // passed to the compile) to the SamplePGO flag of ICP. This is used to
1104 // determine whether the new direct calls are annotated with prof metadata.
1105 // Ideally this should be determined from whether the IR is annotated with
1106 // sample profile, and not whether the a sample profile was provided on the
1107 // command line. E.g. for flattened profiles where we will not be reloading
1108 // the sample profile in the ThinLTO backend, we ideally shouldn't have to
1109 // provide the sample profile file.
1110 if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
1111 MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
1112
1113 // Create an early function pass manager to cleanup the output of the
1114 // frontend. Not necessary with LTO post link pipelines since the pre link
1115 // pipeline already cleaned up the frontend output.
1117 // Do basic inference of function attributes from known properties of system
1118 // libraries and other oracles.
1120 MPM.addPass(CoroEarlyPass());
1121
1122 FunctionPassManager EarlyFPM;
1123 EarlyFPM.addPass(EntryExitInstrumenterPass(/*PostInlining=*/false));
1124 // Lower llvm.expect to metadata before attempting transforms.
1125 // Compare/branch metadata may alter the behavior of passes like
1126 // SimplifyCFG.
1128 EarlyFPM.addPass(SimplifyCFGPass());
1130 EarlyFPM.addPass(EarlyCSEPass());
1131 if (Level == OptimizationLevel::O3)
1132 EarlyFPM.addPass(CallSiteSplittingPass());
1134 std::move(EarlyFPM), PTO.EagerlyInvalidateAnalyses));
1135 }
1136
1137 if (LoadSampleProfile) {
1138 // Annotate sample profile right after early FPM to ensure freshness of
1139 // the debug info.
1141 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile, Phase, FS));
1142 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1143 // RequireAnalysisPass for PSI before subsequent non-module passes.
1145 // Do not invoke ICP in the LTOPrelink phase as it makes it hard
1146 // for the profile annotation to be accurate in the LTO backend.
1147 if (!isLTOPreLink(Phase))
1148 // We perform early indirect call promotion here, before globalopt.
1149 // This is important for the ThinLTO backend phase because otherwise
1150 // imported available_externally functions look unreferenced and are
1151 // removed.
1152 MPM.addPass(
1153 PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
1154 }
1155
1156 // Try to perform OpenMP specific optimizations on the module. This is a
1157 // (quick!) no-op if there are no OpenMP runtime calls present in the module.
1159
1161 MPM.addPass(AttributorPass());
1162
1163 // Lower type metadata and the type.test intrinsic in the ThinLTO
1164 // post link pipeline after ICP. This is to enable usage of the type
1165 // tests in ICP sequences.
1167 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1169
1171
1172 // Interprocedural constant propagation now that basic cleanup has occurred
1173 // and prior to optimizing globals.
1174 // FIXME: This position in the pipeline hasn't been carefully considered in
1175 // years, it should be re-analyzed.
1176 MPM.addPass(IPSCCPPass(
1177 IPSCCPOptions(/*AllowFuncSpec=*/
1178 Level != OptimizationLevel::Os &&
1179 Level != OptimizationLevel::Oz &&
1180 !isLTOPreLink(Phase))));
1181
1182 // Attach metadata to indirect call sites indicating the set of functions
1183 // they may target at run-time. This should follow IPSCCP.
1185
1186 // Optimize globals to try and fold them into constants.
1187 MPM.addPass(GlobalOptPass());
1188
1189 // Create a small function pass pipeline to cleanup after all the global
1190 // optimizations.
1191 FunctionPassManager GlobalCleanupPM;
1192 // FIXME: Should this instead by a run of SROA?
1193 GlobalCleanupPM.addPass(PromotePass());
1194 GlobalCleanupPM.addPass(InstCombinePass());
1195 invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
1196 GlobalCleanupPM.addPass(
1197 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1198 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM),
1199 PTO.EagerlyInvalidateAnalyses));
1200
1201 // We already asserted this happens in non-FullLTOPostLink earlier.
1202 const bool IsPreLink = Phase != ThinOrFullLTOPhase::ThinLTOPostLink;
1203 // Enable contextual profiling instrumentation.
1204 const bool IsCtxProfGen =
1206 const bool IsPGOPreLink = !IsCtxProfGen && PGOOpt && IsPreLink;
1207 const bool IsPGOInstrGen =
1208 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRInstr;
1209 const bool IsPGOInstrUse =
1210 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRUse;
1211 const bool IsMemprofUse = IsPGOPreLink && !PGOOpt->MemoryProfile.empty();
1212 // We don't want to mix pgo ctx gen and pgo gen; we also don't currently
1213 // enable ctx profiling from the frontend.
1215 "Enabling both instrumented PGO and contextual instrumentation is not "
1216 "supported.");
1217 const bool IsCtxProfUse =
1219
1220 assert(
1222 "--instrument-cold-function-only-path is provided but "
1223 "--pgo-instrument-cold-function-only is not enabled");
1224 const bool IsColdFuncOnlyInstrGen = PGOInstrumentColdFunctionOnly &&
1225 IsPGOPreLink &&
1227
1228 if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen ||
1229 IsCtxProfUse || IsColdFuncOnlyInstrGen)
1230 addPreInlinerPasses(MPM, Level, Phase);
1231
1232 // Add all the requested passes for instrumentation PGO, if requested.
1233 if (IsPGOInstrGen || IsPGOInstrUse) {
1234 addPGOInstrPasses(MPM, Level,
1235 /*RunProfileGen=*/IsPGOInstrGen,
1236 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate,
1237 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
1238 } else if (IsCtxProfGen || IsCtxProfUse) {
1240 // In pre-link, we just want the instrumented IR. We use the contextual
1241 // profile in the post-thinlink phase.
1242 // The instrumentation will be removed in post-thinlink after IPO.
1243 // FIXME(mtrofin): move AssignGUIDPass if there is agreement to use this
1244 // mechanism for GUIDs.
1245 MPM.addPass(AssignGUIDPass());
1246 if (IsCtxProfUse) {
1247 MPM.addPass(PGOCtxProfFlatteningPass(/*IsPreThinlink=*/true));
1248 return MPM;
1249 }
1250 // Block further inlining in the instrumented ctxprof case. This avoids
1251 // confusingly collecting profiles for the same GUID corresponding to
1252 // different variants of the function. We could do like PGO and identify
1253 // functions by a (GUID, Hash) tuple, but since the ctxprof "use" waits for
1254 // thinlto to happen before performing any further optimizations, it's
1255 // unnecessary to collect profiles for non-prevailing copies.
1257 addPostPGOLoopRotation(MPM, Level);
1259 } else if (IsColdFuncOnlyInstrGen) {
1260 addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true, /* IsCS */ false,
1261 /* AtomicCounterUpdate */ false,
1263 /* ProfileRemappingFile */ "");
1264 }
1265
1266 if (IsPGOInstrGen || IsPGOInstrUse || IsCtxProfGen)
1267 MPM.addPass(PGOIndirectCallPromotion(false, false));
1268
1269 if (IsPGOPreLink && PGOOpt->CSAction == PGOOptions::CSIRInstr)
1270 MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile,
1272
1273 if (IsMemprofUse)
1274 MPM.addPass(MemProfUsePass(PGOOpt->MemoryProfile, FS));
1275
1276 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRUse ||
1277 PGOOpt->Action == PGOOptions::SampleUse))
1278 MPM.addPass(PGOForceFunctionAttrsPass(PGOOpt->ColdOptType));
1279
1280 MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/true));
1281
1284 else
1285 MPM.addPass(buildInlinerPipeline(Level, Phase));
1286
1287 // Remove any dead arguments exposed by cleanups, constant folding globals,
1288 // and argument promotion.
1290
1293
1295 MPM.addPass(CoroCleanupPass());
1296
1297 // Optimize globals now that functions are fully simplified.
1298 MPM.addPass(GlobalOptPass());
1299 MPM.addPass(GlobalDCEPass());
1300
1301 return MPM;
1302}
1303
1304/// TODO: Should LTO cause any differences to this set of passes?
1305void PassBuilder::addVectorPasses(OptimizationLevel Level,
1306 FunctionPassManager &FPM, bool IsFullLTO) {
1309
1311 if (IsFullLTO) {
1312 // The vectorizer may have significantly shortened a loop body; unroll
1313 // again. Unroll small loops to hide loop backedge latency and saturate any
1314 // parallel execution resources of an out-of-order processor. We also then
1315 // need to clean up redundancies and loop invariant code.
1316 // FIXME: It would be really good to use a loop-integrated instruction
1317 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1318 // across the loop nests.
1319 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1322 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1324 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1327 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1328 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1329 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1330 // NOTE: we are very late in the pipeline, and we don't have any LICM
1331 // or SimplifyCFG passes scheduled after us, that would cleanup
1332 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1334 }
1335
1336 if (!IsFullLTO) {
1337 // Eliminate loads by forwarding stores from the previous iteration to loads
1338 // of the current iteration.
1340 }
1341 // Cleanup after the loop optimization passes.
1342 FPM.addPass(InstCombinePass());
1343
1344 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1345 ExtraFunctionPassManager<ShouldRunExtraVectorPasses> ExtraPasses;
1346 // At higher optimization levels, try to clean up any runtime overlap and
1347 // alignment checks inserted by the vectorizer. We want to track correlated
1348 // runtime checks for two inner loops in the same outer loop, fold any
1349 // common computations, hoist loop-invariant aspects out of any outer loop,
1350 // and unswitch the runtime checks if possible. Once hoisted, we may have
1351 // dead (or speculatable) control flows or more combining opportunities.
1352 ExtraPasses.addPass(EarlyCSEPass());
1353 ExtraPasses.addPass(CorrelatedValuePropagationPass());
1354 ExtraPasses.addPass(InstCombinePass());
1355 LoopPassManager LPM;
1356 LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1357 /*AllowSpeculation=*/true));
1358 LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
1360 ExtraPasses.addPass(
1361 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true,
1362 /*UseBlockFrequencyInfo=*/true));
1363 ExtraPasses.addPass(
1364 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1365 ExtraPasses.addPass(InstCombinePass());
1366 FPM.addPass(std::move(ExtraPasses));
1367 }
1368
1369 // Now that we've formed fast to execute loop structures, we do further
1370 // optimizations. These are run afterward as they might block doing complex
1371 // analyses and transforms such as what are needed for loop vectorization.
1372
1373 // Cleanup after loop vectorization, etc. Simplification passes like CVP and
1374 // GVN, loop transforms, and others have already run, so it's now better to
1375 // convert to more optimized IR using more aggressive simplify CFG options.
1376 // The extra sinking transform can create larger basic blocks, so do this
1377 // before SLP vectorization.
1378 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
1379 .forwardSwitchCondToPhi(true)
1380 .convertSwitchRangeToICmp(true)
1381 .convertSwitchToArithmetic(true)
1382 .convertSwitchToLookupTable(true)
1383 .needCanonicalLoops(false)
1384 .hoistCommonInsts(true)
1385 .sinkCommonInsts(true)));
1386
1387 if (IsFullLTO) {
1388 FPM.addPass(SCCPPass());
1389 FPM.addPass(InstCombinePass());
1390 FPM.addPass(BDCEPass());
1391 }
1392
1393 // Optimize parallel scalar instruction chains into SIMD instructions.
1394 if (PTO.SLPVectorization) {
1395 FPM.addPass(SLPVectorizerPass());
1396 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1397 FPM.addPass(EarlyCSEPass());
1398 }
1399 }
1400 // Enhance/cleanup vector code.
1401 FPM.addPass(VectorCombinePass());
1402
1403 if (!IsFullLTO) {
1404 FPM.addPass(InstCombinePass());
1405 // Unroll small loops to hide loop backedge latency and saturate any
1406 // parallel execution resources of an out-of-order processor. We also then
1407 // need to clean up redundancies and loop invariant code.
1408 // FIXME: It would be really good to use a loop-integrated instruction
1409 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1410 // across the loop nests.
1411 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1412 if (EnableUnrollAndJam && PTO.LoopUnrolling) {
1414 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1415 }
1416 FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
1417 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1418 PTO.ForgetAllSCEVInLoopUnroll)));
1419 FPM.addPass(WarnMissedTransformationsPass());
1420 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1421 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1422 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1423 // NOTE: we are very late in the pipeline, and we don't have any LICM
1424 // or SimplifyCFG passes scheduled after us, that would cleanup
1425 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1426 FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
1427 }
1428
1429 FPM.addPass(InferAlignmentPass());
1430 FPM.addPass(InstCombinePass());
1431
1432 // This is needed for two reasons:
1433 // 1. It works around problems that instcombine introduces, such as sinking
1434 // expensive FP divides into loops containing multiplications using the
1435 // divide result.
1436 // 2. It helps to clean up some loop-invariant code created by the loop
1437 // unroll pass when IsFullLTO=false.
1439 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1440 /*AllowSpeculation=*/true),
1441 /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
1442
1443 // Now that we've vectorized and unrolled loops, we may have more refined
1444 // alignment information, try to re-derive it here.
1445 FPM.addPass(AlignmentFromAssumptionsPass());
1446}
1447
1450 ThinOrFullLTOPhase LTOPhase) {
1451 const bool LTOPreLink = isLTOPreLink(LTOPhase);
1453
1454 // Run partial inlining pass to partially inline functions that have
1455 // large bodies.
1458
1459 // Remove avail extern fns and globals definitions since we aren't compiling
1460 // an object file for later LTO. For LTO we want to preserve these so they
1461 // are eligible for inlining at link-time. Note if they are unreferenced they
1462 // will be removed by GlobalDCE later, so this only impacts referenced
1463 // available externally globals. Eventually they will be suppressed during
1464 // codegen, but eliminating here enables more opportunity for GlobalDCE as it
1465 // may make globals referenced by available external functions dead and saves
1466 // running remaining passes on the eliminated functions. These should be
1467 // preserved during prelinking for link-time inlining decisions.
1468 if (!LTOPreLink)
1470
1471 // Do RPO function attribute inference across the module to forward-propagate
1472 // attributes where applicable.
1473 // FIXME: Is this really an optimization rather than a canonicalization?
1475
1476 // Do a post inline PGO instrumentation and use pass. This is a context
1477 // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
1478 // cross-module inline has not been done yet. The context sensitive
1479 // instrumentation is after all the inlines are done.
1480 if (!LTOPreLink && PGOOpt) {
1481 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1482 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
1483 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1484 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile);
1485 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1486 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
1487 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1488 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
1489 }
1490
1491 // Re-compute GlobalsAA here prior to function passes. This is particularly
1492 // useful as the above will have inlined, DCE'ed, and function-attr
1493 // propagated everything. We should at this point have a reasonably minimal
1494 // and richly annotated call graph. By computing aliasing and mod/ref
1495 // information for all local globals here, the late loop passes and notably
1496 // the vectorizer will be able to use them to help recognize vectorizable
1497 // memory operations.
1500
1501 invokeOptimizerEarlyEPCallbacks(MPM, Level, LTOPhase);
1502
1503 FunctionPassManager OptimizePM;
1504
1505 // Only drop unnecessary assumes post-inline and post-link, as otherwise
1506 // additional uses of the affected value may be introduced through inlining
1507 // and CSE.
1508 if (!isLTOPreLink(LTOPhase))
1509 OptimizePM.addPass(DropUnnecessaryAssumesPass());
1510
1511 // Scheduling LoopVersioningLICM when inlining is over, because after that
1512 // we may see more accurate aliasing. Reason to run this late is that too
1513 // early versioning may prevent further inlining due to increase of code
1514 // size. Other optimizations which runs later might get benefit of no-alias
1515 // assumption in clone loop.
1517 OptimizePM.addPass(
1519 // LoopVersioningLICM pass might increase new LICM opportunities.
1521 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1522 /*AllowSpeculation=*/true),
1523 /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
1524 }
1525
1526 OptimizePM.addPass(Float2IntPass());
1528
1529 if (EnableMatrix) {
1530 OptimizePM.addPass(LowerMatrixIntrinsicsPass());
1531 OptimizePM.addPass(EarlyCSEPass());
1532 }
1533
1534 // CHR pass should only be applied with the profile information.
1535 // The check is to check the profile summary information in CHR.
1536 if (EnableCHR && Level == OptimizationLevel::O3)
1537 OptimizePM.addPass(ControlHeightReductionPass());
1538
1539 // FIXME: We need to run some loop optimizations to re-rotate loops after
1540 // simplifycfg and others undo their rotation.
1541
1542 // Optimize the loop execution. These passes operate on entire loop nests
1543 // rather than on each loop in an inside-out manner, and so they are actually
1544 // function passes.
1545
1546 invokeVectorizerStartEPCallbacks(OptimizePM, Level);
1547
1548 LoopPassManager LPM;
1549 // First rotate loops that may have been un-rotated by prior passes.
1550 // Disable header duplication at -Oz.
1552 Level != OptimizationLevel::Oz,
1553 LTOPreLink));
1554 // Some loops may have become dead by now. Try to delete them.
1555 // FIXME: see discussion in https://reviews.llvm.org/D112851,
1556 // this may need to be revisited once we run GVN before loop deletion
1557 // in the simplification pipeline.
1558 LPM.addPass(LoopDeletionPass());
1559
1560 if (PTO.LoopInterchange)
1561 LPM.addPass(LoopInterchangePass());
1562
1564 std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false));
1565
1566 // FIXME: This may not be the right place in the pipeline.
1567 // We need to have the data to support the right place.
1568 if (PTO.LoopFusion)
1569 OptimizePM.addPass(LoopFusePass());
1570
1571 // Distribute loops to allow partial vectorization. I.e. isolate dependences
1572 // into separate loop that would otherwise inhibit vectorization. This is
1573 // currently only performed for loops marked with the metadata
1574 // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1575 OptimizePM.addPass(LoopDistributePass());
1576
1577 // Populates the VFABI attribute with the scalar-to-vector mappings
1578 // from the TargetLibraryInfo.
1579 OptimizePM.addPass(InjectTLIMappings());
1580
1581 addVectorPasses(Level, OptimizePM, /* IsFullLTO */ false);
1582
1583 invokeVectorizerEndEPCallbacks(OptimizePM, Level);
1584
1585 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
1586 // canonicalization pass that enables other optimizations. As a result,
1587 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1588 // result too early.
1589 OptimizePM.addPass(LoopSinkPass());
1590
1591 // And finally clean up LCSSA form before generating code.
1592 OptimizePM.addPass(InstSimplifyPass());
1593
1594 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
1595 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
1596 // flattening of blocks.
1597 OptimizePM.addPass(DivRemPairsPass());
1598
1599 // Try to annotate calls that were created during optimization.
1600 OptimizePM.addPass(
1601 TailCallElimPass(/*UpdateFunctionEntryCount=*/isInstrumentedPGOUse()));
1602
1603 // LoopSink (and other loop passes since the last simplifyCFG) might have
1604 // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
1605 OptimizePM.addPass(
1607 .convertSwitchRangeToICmp(true)
1608 .convertSwitchToArithmetic(true)
1609 .speculateUnpredictables(true)
1610 .hoistLoadsStoresWithCondFaulting(true)));
1611
1612 // Add the core optimizing pipeline.
1613 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
1614 PTO.EagerlyInvalidateAnalyses));
1615
1616 invokeOptimizerLastEPCallbacks(MPM, Level, LTOPhase);
1617
1618 // Split out cold code. Splitting is done late to avoid hiding context from
1619 // other optimizations and inadvertently regressing performance. The tradeoff
1620 // is that this has a higher code size cost than splitting early.
1621 if (EnableHotColdSplit && !LTOPreLink)
1623
1624 // Search the code for similar regions of code. If enough similar regions can
1625 // be found where extracting the regions into their own function will decrease
1626 // the size of the program, we extract the regions, a deduplicate the
1627 // structurally similar regions.
1628 if (EnableIROutliner)
1629 MPM.addPass(IROutlinerPass());
1630
1631 // Now we need to do some global optimization transforms.
1632 // FIXME: It would seem like these should come first in the optimization
1633 // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
1634 // ordering here.
1635 MPM.addPass(GlobalDCEPass());
1637
1638 // Merge functions if requested. It has a better chance to merge functions
1639 // after ConstantMerge folded jump tables.
1640 if (PTO.MergeFunctions)
1642
1643 if (PTO.CallGraphProfile && !LTOPreLink)
1644 MPM.addPass(CGProfilePass(isLTOPostLink(LTOPhase)));
1645
1646 // RelLookupTableConverterPass runs later in LTO post-link pipeline.
1647 if (!LTOPreLink)
1649
1650 return MPM;
1651}
1652
1656 if (Level == OptimizationLevel::O0)
1657 return buildO0DefaultPipeline(Level, Phase);
1658
1660
1661 // Convert @llvm.global.annotations to !annotation metadata.
1663
1664 // Force any function attributes we want the rest of the pipeline to observe.
1666
1667 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1669
1670 // Apply module pipeline start EP callback.
1672
1673 // Add the core simplification pipeline.
1675
1676 // Now add the optimization pipeline.
1678
1679 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1680 PGOOpt->Action == PGOOptions::SampleUse)
1682
1683 // Emit annotation remarks.
1685
1686 if (isLTOPreLink(Phase))
1687 addRequiredLTOPreLinkPasses(MPM);
1688 return MPM;
1689}
1690
1693 bool EmitSummary) {
1695 if (ThinLTO)
1697 else
1699 MPM.addPass(EmbedBitcodePass(ThinLTO, EmitSummary));
1700
1701 // Perform any cleanups to the IR that aren't suitable for per TU compilation,
1702 // like removing CFI/WPD related instructions. Note, we reuse
1703 // LowerTypeTestsPass to clean up type tests rather than duplicate that logic
1704 // in FatLtoCleanup.
1705 MPM.addPass(FatLtoCleanup());
1706
1707 // If we're doing FatLTO w/ CFI enabled, we don't want the type tests in the
1708 // object code, only in the bitcode section, so drop it before we run
1709 // module optimization and generate machine code. If llvm.type.test() isn't in
1710 // the IR, this won't do anything.
1711 MPM.addPass(
1713
1714 // Use the ThinLTO post-link pipeline with sample profiling
1715 if (ThinLTO && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1716 MPM.addPass(buildThinLTODefaultPipeline(Level, /*ImportSummary=*/nullptr));
1717 else {
1718 // ModuleSimplification does not run the coroutine passes for
1719 // ThinLTOPreLink, so we need the coroutine passes to run for ThinLTO
1720 // builds, otherwise they will miscompile.
1721 if (ThinLTO) {
1722 // TODO: replace w/ buildCoroWrapper() when it takes phase and level into
1723 // consideration.
1724 CGSCCPassManager CGPM;
1728 MPM.addPass(CoroCleanupPass());
1729 }
1730
1731 // otherwise, just use module optimization
1732 MPM.addPass(
1734 // Emit annotation remarks.
1736 }
1737 return MPM;
1738}
1739
1742 if (Level == OptimizationLevel::O0)
1744
1746
1747 // Convert @llvm.global.annotations to !annotation metadata.
1749
1750 // Force any function attributes we want the rest of the pipeline to observe.
1752
1753 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1755
1756 // Apply module pipeline start EP callback.
1758
1759 // If we are planning to perform ThinLTO later, we don't bloat the code with
1760 // unrolling/vectorization/... now. Just simplify the module as much as we
1761 // can.
1764 // In pre-link, for ctx prof use, we stop here with an instrumented IR. We let
1765 // thinlto use the contextual info to perform imports; then use the contextual
1766 // profile in the post-thinlink phase.
1767 if (!UseCtxProfile.empty()) {
1768 addRequiredLTOPreLinkPasses(MPM);
1769 return MPM;
1770 }
1771
1772 // Run partial inlining pass to partially inline functions that have
1773 // large bodies.
1774 // FIXME: It isn't clear whether this is really the right place to run this
1775 // in ThinLTO. Because there is another canonicalization and simplification
1776 // phase that will run after the thin link, running this here ends up with
1777 // less information than will be available later and it may grow functions in
1778 // ways that aren't beneficial.
1781
1782 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1783 PGOOpt->Action == PGOOptions::SampleUse)
1785
1786 // Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual
1787 // optimization is going to be done in PostLink stage, but clang can't add
1788 // callbacks there in case of in-process ThinLTO called by linker.
1793
1794 // Emit annotation remarks.
1796
1797 addRequiredLTOPreLinkPasses(MPM);
1798
1799 return MPM;
1800}
1801
1803 OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
1805
1806 if (ImportSummary) {
1807 // For ThinLTO we must apply the context disambiguation decisions early, to
1808 // ensure we can correctly match the callsites to summary data.
1811 ImportSummary, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1812
1813 // These passes import type identifier resolutions for whole-program
1814 // devirtualization and CFI. They must run early because other passes may
1815 // disturb the specific instruction patterns that these passes look for,
1816 // creating dependencies on resolutions that may not appear in the summary.
1817 //
1818 // For example, GVN may transform the pattern assume(type.test) appearing in
1819 // two basic blocks into assume(phi(type.test, type.test)), which would
1820 // transform a dependency on a WPD resolution into a dependency on a type
1821 // identifier resolution for CFI.
1822 //
1823 // Also, WPD has access to more precise information than ICP and can
1824 // devirtualize more effectively, so it should operate on the IR first.
1825 //
1826 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1827 // metadata and intrinsics.
1828 MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
1829 MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
1830 }
1831
1832 if (Level == OptimizationLevel::O0) {
1833 // Run a second time to clean up any type tests left behind by WPD for use
1834 // in ICP.
1835 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1838 // Drop available_externally and unreferenced globals. This is necessary
1839 // with ThinLTO in order to avoid leaving undefined references to dead
1840 // globals in the object file.
1842 MPM.addPass(GlobalDCEPass());
1843 return MPM;
1844 }
1845 if (!UseCtxProfile.empty()) {
1846 MPM.addPass(
1848 } else {
1849 // Add the core simplification pipeline.
1852 }
1853 // Now add the optimization pipeline.
1856
1857 // Emit annotation remarks.
1859
1860 return MPM;
1861}
1862
1865 // FIXME: We should use a customized pre-link pipeline!
1866 return buildPerModuleDefaultPipeline(Level,
1868}
1869
1872 ModuleSummaryIndex *ExportSummary) {
1874
1876
1877 // Create a function that performs CFI checks for cross-DSO calls with targets
1878 // in the current module.
1879 MPM.addPass(CrossDSOCFIPass());
1880
1881 if (Level == OptimizationLevel::O0) {
1882 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1883 // metadata and intrinsics.
1884 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1885 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1886 // Run a second time to clean up any type tests left behind by WPD for use
1887 // in ICP.
1888 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1890
1892
1894
1895 // Emit annotation remarks.
1897
1898 return MPM;
1899 }
1900
1901 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
1902 // Load sample profile before running the LTO optimization pipeline.
1903 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1904 PGOOpt->ProfileRemappingFile,
1906 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1907 // RequireAnalysisPass for PSI before subsequent non-module passes.
1909 }
1910
1911 // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
1913
1914 // Remove unused virtual tables to improve the quality of code generated by
1915 // whole-program devirtualization and bitset lowering.
1916 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
1917
1918 // Do basic inference of function attributes from known properties of system
1919 // libraries and other oracles.
1921
1922 if (Level.getSpeedupLevel() > 1) {
1924 CallSiteSplittingPass(), PTO.EagerlyInvalidateAnalyses));
1925
1926 // Indirect call promotion. This should promote all the targets that are
1927 // left by the earlier promotion pass that promotes intra-module targets.
1928 // This two-step promotion is to save the compile time. For LTO, it should
1929 // produce the same result as if we only do promotion here.
1931 true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1932
1933 // Promoting by-reference arguments to by-value exposes more constants to
1934 // IPSCCP.
1935 CGSCCPassManager CGPM;
1938 CGPM.addPass(
1941
1942 // Propagate constants at call sites into the functions they call. This
1943 // opens opportunities for globalopt (and inlining) by substituting function
1944 // pointers passed as arguments to direct uses of functions.
1945 MPM.addPass(IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/
1946 Level != OptimizationLevel::Os &&
1947 Level != OptimizationLevel::Oz)));
1948
1949 // Attach metadata to indirect call sites indicating the set of functions
1950 // they may target at run-time. This should follow IPSCCP.
1952 }
1953
1954 // Do RPO function attribute inference across the module to forward-propagate
1955 // attributes where applicable.
1956 // FIXME: Is this really an optimization rather than a canonicalization?
1958
1959 // Use in-range annotations on GEP indices to split globals where beneficial.
1960 MPM.addPass(GlobalSplitPass());
1961
1962 // Run whole program optimization of virtual call when the list of callees
1963 // is fixed.
1964 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1965
1967 // Stop here at -O1.
1968 if (Level == OptimizationLevel::O1) {
1969 // The LowerTypeTestsPass needs to run to lower type metadata and the
1970 // type.test intrinsics. The pass does nothing if CFI is disabled.
1971 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1972 // Run a second time to clean up any type tests left behind by WPD for use
1973 // in ICP (which is performed earlier than this in the regular LTO
1974 // pipeline).
1975 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1977
1979
1981
1982 // Emit annotation remarks.
1984
1985 return MPM;
1986 }
1987
1988 // TODO: Skip to match buildCoroWrapper.
1989 MPM.addPass(CoroEarlyPass());
1990
1991 // Optimize globals to try and fold them into constants.
1992 MPM.addPass(GlobalOptPass());
1993
1994 // Promote any localized globals to SSA registers.
1996
1997 // Linking modules together can lead to duplicate global constant, only
1998 // keep one copy of each constant.
2000
2001 // Remove unused arguments from functions.
2003
2004 // Reduce the code after globalopt and ipsccp. Both can open up significant
2005 // simplification opportunities, and both can propagate functions through
2006 // function pointers. When this happens, we often have to resolve varargs
2007 // calls, etc, so let instcombine do this.
2008 FunctionPassManager PeepholeFPM;
2009 PeepholeFPM.addPass(InstCombinePass());
2010 if (Level.getSpeedupLevel() > 1)
2011 PeepholeFPM.addPass(AggressiveInstCombinePass());
2012 invokePeepholeEPCallbacks(PeepholeFPM, Level);
2013
2014 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM),
2015 PTO.EagerlyInvalidateAnalyses));
2016
2017 // Lower variadic functions for supported targets prior to inlining.
2019
2020 // Note: historically, the PruneEH pass was run first to deduce nounwind and
2021 // generally clean up exception handling overhead. It isn't clear this is
2022 // valuable as the inliner doesn't currently care whether it is inlining an
2023 // invoke or a call.
2024 // Run the inliner now.
2025 if (EnableModuleInliner) {
2029 } else {
2032 /* MandatoryFirst */ true,
2035 }
2036
2037 // Perform context disambiguation after inlining, since that would reduce the
2038 // amount of additional cloning required to distinguish the allocation
2039 // contexts.
2042 /*Summary=*/nullptr,
2043 PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
2044
2045 // Optimize globals again after we ran the inliner.
2046 MPM.addPass(GlobalOptPass());
2047
2048 // Run the OpenMPOpt pass again after global optimizations.
2050
2051 // Garbage collect dead functions.
2052 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2053
2054 // If we didn't decide to inline a function, check to see if we can
2055 // transform it to pass arguments by value instead of by reference.
2056 CGSCCPassManager CGPM;
2061
2063 // The IPO Passes may leave cruft around. Clean up after them.
2064 FPM.addPass(InstCombinePass());
2065 invokePeepholeEPCallbacks(FPM, Level);
2066
2069
2071
2072 // Do a post inline PGO instrumentation and use pass. This is a context
2073 // sensitive PGO pass.
2074 if (PGOOpt) {
2075 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
2076 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
2077 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
2078 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile);
2079 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
2080 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
2081 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
2082 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
2083 }
2084
2085 // Break up allocas
2087
2088 // LTO provides additional opportunities for tailcall elimination due to
2089 // link-time inlining, and visibility of nocapture attribute.
2090 FPM.addPass(
2091 TailCallElimPass(/*UpdateFunctionEntryCount=*/isInstrumentedPGOUse()));
2092
2093 // Run a few AA driver optimizations here and now to cleanup the code.
2094 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM),
2095 PTO.EagerlyInvalidateAnalyses));
2096
2097 MPM.addPass(
2099
2100 // Require the GlobalsAA analysis for the module so we can query it within
2101 // MainFPM.
2104 // Invalidate AAManager so it can be recreated and pick up the newly
2105 // available GlobalsAA.
2106 MPM.addPass(
2108 }
2109
2110 FunctionPassManager MainFPM;
2112 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
2113 /*AllowSpeculation=*/true),
2114 /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
2115
2116 if (RunNewGVN)
2117 MainFPM.addPass(NewGVNPass());
2118 else
2119 MainFPM.addPass(GVNPass());
2120
2121 // Remove dead memcpy()'s.
2122 MainFPM.addPass(MemCpyOptPass());
2123
2124 // Nuke dead stores.
2125 MainFPM.addPass(DSEPass());
2126 MainFPM.addPass(MoveAutoInitPass());
2128
2129 invokeVectorizerStartEPCallbacks(MainFPM, Level);
2130
2131 LoopPassManager LPM;
2132 if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
2133 LPM.addPass(LoopFlattenPass());
2134 LPM.addPass(IndVarSimplifyPass());
2135 LPM.addPass(LoopDeletionPass());
2136 // FIXME: Add loop interchange.
2137
2138 // Unroll small loops and perform peeling.
2139 LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
2140 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
2141 PTO.ForgetAllSCEVInLoopUnroll));
2142 // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
2143 // *All* loop passes must preserve it, in order to be able to use it.
2145 std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true));
2146
2147 MainFPM.addPass(LoopDistributePass());
2148
2149 addVectorPasses(Level, MainFPM, /* IsFullLTO */ true);
2150
2151 invokeVectorizerEndEPCallbacks(MainFPM, Level);
2152
2153 // Run the OpenMPOpt CGSCC pass again late.
2156
2157 invokePeepholeEPCallbacks(MainFPM, Level);
2158 MainFPM.addPass(JumpThreadingPass());
2159 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM),
2160 PTO.EagerlyInvalidateAnalyses));
2161
2162 // Lower type metadata and the type.test intrinsic. This pass supports
2163 // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
2164 // to be run at link time if CFI is enabled. This pass does nothing if
2165 // CFI is disabled.
2166 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
2167 // Run a second time to clean up any type tests left behind by WPD for use
2168 // in ICP (which is performed earlier than this in the regular LTO pipeline).
2169 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
2171
2172 // Enable splitting late in the FullLTO post-link pipeline.
2175
2176 // Add late LTO optimization passes.
2177 FunctionPassManager LateFPM;
2178
2179 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
2180 // canonicalization pass that enables other optimizations. As a result,
2181 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
2182 // result too early.
2183 LateFPM.addPass(LoopSinkPass());
2184
2185 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
2186 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
2187 // flattening of blocks.
2188 LateFPM.addPass(DivRemPairsPass());
2189
2190 // Delete basic blocks, which optimization passes may have killed.
2192 .convertSwitchRangeToICmp(true)
2193 .convertSwitchToArithmetic(true)
2194 .hoistCommonInsts(true)
2195 .speculateUnpredictables(true)));
2196 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(LateFPM)));
2197
2198 // Drop bodies of available eternally objects to improve GlobalDCE.
2200
2201 // Now that we have optimized the program, discard unreachable functions.
2202 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2203
2204 if (PTO.MergeFunctions)
2206
2208
2209 if (PTO.CallGraphProfile)
2210 MPM.addPass(CGProfilePass(/*InLTOPostLink=*/true));
2211
2212 MPM.addPass(CoroCleanupPass());
2213
2215
2216 // Emit annotation remarks.
2218
2219 return MPM;
2220}
2221
2225 assert(Level == OptimizationLevel::O0 &&
2226 "buildO0DefaultPipeline should only be used with O0");
2227
2229
2230 // Perform pseudo probe instrumentation in O0 mode. This is for the
2231 // consistency between different build modes. For example, a LTO build can be
2232 // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
2233 // the postlink will require pseudo probe instrumentation in the prelink.
2234 if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
2236
2237 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
2238 PGOOpt->Action == PGOOptions::IRUse))
2240 MPM,
2241 /*RunProfileGen=*/(PGOOpt->Action == PGOOptions::IRInstr),
2242 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate, PGOOpt->ProfileFile,
2243 PGOOpt->ProfileRemappingFile);
2244
2245 // Instrument function entry and exit before all inlining.
2247 EntryExitInstrumenterPass(/*PostInlining=*/false)));
2248
2250
2251 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
2253
2254 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
2255 // Explicitly disable sample loader inlining and use flattened profile in O0
2256 // pipeline.
2257 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
2258 PGOOpt->ProfileRemappingFile,
2259 ThinOrFullLTOPhase::None, nullptr,
2260 /*DisableSampleProfileInlining=*/true,
2261 /*UseFlattenedProfile=*/true));
2262 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
2263 // RequireAnalysisPass for PSI before subsequent non-module passes.
2265 }
2266
2268
2269 // Build a minimal pipeline based on the semantics required by LLVM,
2270 // which is just that always inlining occurs. Further, disable generating
2271 // lifetime intrinsics to avoid enabling further optimizations during
2272 // code generation.
2274 /*InsertLifetimeIntrinsics=*/false));
2275
2276 if (PTO.MergeFunctions)
2278
2279 if (EnableMatrix)
2280 MPM.addPass(
2282
2283 if (!CGSCCOptimizerLateEPCallbacks.empty()) {
2284 CGSCCPassManager CGPM;
2286 if (!CGPM.isEmpty())
2288 }
2289 if (!LateLoopOptimizationsEPCallbacks.empty()) {
2290 LoopPassManager LPM;
2292 if (!LPM.isEmpty()) {
2294 createFunctionToLoopPassAdaptor(std::move(LPM))));
2295 }
2296 }
2297 if (!LoopOptimizerEndEPCallbacks.empty()) {
2298 LoopPassManager LPM;
2300 if (!LPM.isEmpty()) {
2302 createFunctionToLoopPassAdaptor(std::move(LPM))));
2303 }
2304 }
2305 if (!ScalarOptimizerLateEPCallbacks.empty()) {
2308 if (!FPM.isEmpty())
2309 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2310 }
2311
2313
2314 if (!VectorizerStartEPCallbacks.empty()) {
2317 if (!FPM.isEmpty())
2318 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2319 }
2320
2321 if (!VectorizerEndEPCallbacks.empty()) {
2324 if (!FPM.isEmpty())
2325 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2326 }
2327
2329
2331
2332 if (isLTOPreLink(Phase))
2333 addRequiredLTOPreLinkPasses(MPM);
2334
2336
2337 return MPM;
2338}
2339
2341 AAManager AA;
2342
2343 // The order in which these are registered determines their priority when
2344 // being queried.
2345
2346 // Add any target-specific alias analyses that should be run early.
2347 if (TM)
2348 TM->registerEarlyDefaultAliasAnalyses(AA);
2349
2350 // First we register the basic alias analysis that provides the majority of
2351 // per-function local AA logic. This is a stateless, on-demand local set of
2352 // AA techniques.
2353 AA.registerFunctionAnalysis<BasicAA>();
2354
2355 // Next we query fast, specialized alias analyses that wrap IR-embedded
2356 // information about aliasing.
2357 AA.registerFunctionAnalysis<ScopedNoAliasAA>();
2358 AA.registerFunctionAnalysis<TypeBasedAA>();
2359
2360 // Add support for querying global aliasing information when available.
2361 // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
2362 // analysis, all that the `AAManager` can do is query for any *cached*
2363 // results from `GlobalsAA` through a readonly proxy.
2365 AA.registerModuleAnalysis<GlobalsAA>();
2366
2367 // Add target-specific alias analyses.
2368 if (TM)
2369 TM->registerDefaultAliasAnalyses(AA);
2370
2371 return AA;
2372}
2373
2374bool PassBuilder::isInstrumentedPGOUse() const {
2375 return (PGOOpt && PGOOpt->Action == PGOOptions::IRUse) ||
2376 !UseCtxProfile.empty();
2377}
aarch64 falkor hwpf fix Falkor HW Prefetch Fix Late Phase
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AggressiveInstCombiner - Combine expression patterns to form expressions with fewer,...
Provides passes to inlining "always_inline" functions.
This is the interface for LLVM's primary stateless and local alias analysis.
This file provides the interface for LLVM's Call Graph Profile pass.
This header provides classes for managing passes over SCCs of the call graph.
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file provides the interface for a simple, fast CSE pass.
This file provides a pass which clones the current module and runs the provided pass pipeline on the ...
This file provides a pass manager that only runs its passes if the provided marker analysis has been ...
Super simple passes to force specific function attrs from the commandline into the IR for debugging p...
Provides passes for computing function attributes based on interprocedural analyses.
This file provides the interface for LLVM's Global Value Numbering pass which eliminates fully redund...
This is the interface for a simple mod/ref and alias analysis over globals.
AcceleratorCodeSelection - Identify all functions reachable from a kernel, removing those that are un...
This header defines various interfaces for pass management in LLVM.
Interfaces for passes which infer implicit function attributes from the name and signature of functio...
This file provides the primary interface to the instcombine pass.
Defines passes for running instruction simplification across chunks of IR.
This file provides the interface for LLVM's PGO Instrumentation lowering pass.
See the comments on JumpThreadingPass.
static LVOptions Options
Definition LVOptions.cpp:25
This file implements the Loop Fusion pass.
This header defines the LoopLoadEliminationPass object.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
The header file for the LowerConstantIntrinsics pass as used by the new pass manager.
The header file for the LowerExpectIntrinsic pass as used by the new pass manager.
This pass performs merges of loads and stores on both sides of a.
This file provides the interface for LLVM's Global Value Numbering pass.
This header enumerates the LLVM-provided high-level optimization levels.
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
Define option tunables for PGO.
static void addAnnotationRemarksPass(ModulePassManager &MPM)
static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level)
static CoroConditionalWrapper buildCoroWrapper(ThinOrFullLTOPhase Phase)
static bool isLTOPreLink(ThinOrFullLTOPhase Phase)
static bool isLTOPostLink(ThinOrFullLTOPhase Phase)
This file implements relative lookup table converter that converts lookup tables to relative lookup t...
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
This file provides the interface for the pseudo probe implementation for AutoFDO.
This file provides the interface for the sampled PGO loader pass.
This is the interface for a metadata-based scoped no-alias analysis.
This file provides the interface for the pass responsible for both simplifying and canonicalizing the...
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
This is the interface for a metadata-based TBAA.
Defines the virtual file system interface vfs::FileSystem.
A manager for alias analyses.
Inlines functions marked as "always_inline".
Argument promotion pass.
Assign a GUID to functions as metadata.
Analysis pass providing a never-invalidated alias analysis result.
Simple pass that canonicalizes aliases.
A pass that merges duplicate global constants into a single constant.
This class implements a trivial dead store elimination.
Eliminate dead arguments (and return values) from functions.
A pass that transforms external global definitions into declarations.
Pass embeds a copy of the module optimized with the provided pass pipeline into a global variable.
A pass manager to run a set of extra loop passes if the MarkerTy analysis is present.
The core GVN pass object.
Definition GVN.h:128
Pass to remove unused function declarations.
Definition GlobalDCE.h:38
Optimize globals that never have their address taken.
Definition GlobalOpt.h:25
Pass to perform split of global variables.
Definition GlobalSplit.h:26
Analysis pass providing a never-invalidated alias analysis result.
Pass to outline cold regions.
Pass to perform interprocedural constant propagation.
Definition SCCP.h:48
Pass to outline similar regions.
Definition IROutliner.h:444
Run instruction simplification across each instruction in the function.
Instrumentation based profiling lowering pass.
This pass performs 'jump threading', which looks at blocks that have multiple predecessors and multip...
Performs Loop Invariant Code Motion Pass.
Definition LICM.h:66
Loop unroll pass that only does full loop unrolling and peeling.
Performs Loop Idiom Recognize Pass.
Performs Loop Inst Simplify Pass.
A simple loop rotation transformation.
Performs basic CFG simplifications to assist other loop passes.
A pass that does profile-guided sinking of instructions into loops.
Definition LoopSink.h:33
A simple loop rotation transformation.
Loop unroll pass that will support both full and partial unrolling.
Merge identical functions.
The module inliner pass for the new pass manager.
Module pass, wrapping the inliner pass.
Definition Inliner.h:65
void addModulePass(T Pass)
Add a module pass that runs before the CGSCC passes.
Definition Inliner.h:81
Class to hold module path string table and global value map, and encapsulate methods for operating on...
Simple pass that provides a name to every anonymous globals.
Additional 'norecurse' attribute deduction during postlink LTO phase.
OpenMP optimizations pass.
Definition OpenMPOpt.h:42
static LLVM_ABI const OptimizationLevel O3
Optimize for fast execution as much as possible.
static LLVM_ABI const OptimizationLevel Oz
A very specialized mode that will optimize for code size at any and all costs.
static LLVM_ABI const OptimizationLevel O0
Disable as many optimizations as possible.
static LLVM_ABI const OptimizationLevel Os
Similar to O2 but tries to optimize for small code size instead of fast execution without triggering ...
static LLVM_ABI const OptimizationLevel O2
Optimize for fast execution as much as possible without triggering significant incremental compile ti...
static LLVM_ABI const OptimizationLevel O1
Optimize quickly without destroying debuggability.
The indirect function call promotion pass.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The profile annotation (profile-instr-use) pass for IR based PGO.
The profile size based optimization pass for memory intrinsics.
Pass to remove unused function declarations.
LLVM_ABI void invokeFullLinkTimeOptimizationLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
LLVM_ABI ModuleInlinerWrapperPass buildInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining as well as the inlining-driven cleanups.
LLVM_ABI void invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
LLVM_ABI void invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI AAManager buildDefaultAAPipeline()
Build the default AAManager with the default alias analysis pipeline registered.
LLVM_ABI void invokeCGSCCOptimizerLateEPCallbacks(CGSCCPassManager &CGPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, ThinLTO-targeting default optimization pipeline to a pass manager.
LLVM_ABI void addPGOInstrPassesForO0(ModulePassManager &MPM, bool RunProfileGen, bool IsCS, bool AtomicCounterUpdate, std::string ProfileFile, std::string ProfileRemappingFile)
Add PGOInstrumenation passes for O0 only.
LLVM_ABI void invokeScalarOptimizerLateEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase=ThinOrFullLTOPhase::None)
Build a per-module default optimization pipeline.
LLVM_ABI void invokePipelineStartEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
LLVM_ABI void invokeVectorizerEndEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildO0DefaultPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase=ThinOrFullLTOPhase::None)
Build an O0 pipeline with the minimal semantically required passes.
LLVM_ABI FunctionPassManager buildFunctionSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM function canonicalization and simplification pipeline.
LLVM_ABI void invokePeepholeEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI void invokePipelineEarlySimplificationEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
LLVM_ABI void invokeLoopOptimizerEndEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildLTODefaultPipeline(OptimizationLevel Level, ModuleSummaryIndex *ExportSummary)
Build an LTO default optimization pipeline to a pass manager.
LLVM_ABI ModulePassManager buildModuleInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining with module inliner pass.
LLVM_ABI ModulePassManager buildThinLTODefaultPipeline(OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary)
Build a ThinLTO default optimization pipeline to a pass manager.
LLVM_ABI void invokeLateLoopOptimizationsEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
LLVM_ABI void invokeFullLinkTimeOptimizationEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO, bool EmitSummary)
Build a fat object default optimization pipeline.
LLVM_ABI ModulePassManager buildModuleSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM module canonicalization and simplification pipeline.
LLVM_ABI ModulePassManager buildModuleOptimizationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase LTOPhase)
Construct the core LLVM module optimization pipeline.
LLVM_ABI void invokeOptimizerLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
LLVM_ABI ModulePassManager buildLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, LTO-targeting default optimization pipeline to a pass manager.
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t<!std::is_same_v< PassT, PassManager > > addPass(PassT &&Pass)
bool isEmpty() const
Returns if the pass manager contains any passes.
unsigned LicmMssaNoAccForPromotionCap
Tuning option to disable promotion to scalars in LICM with MemorySSA, if the number of access is too ...
Definition PassBuilder.h:78
bool SLPVectorization
Tuning option to enable/disable slp loop vectorization, set based on opt level.
Definition PassBuilder.h:56
int InlinerThreshold
Tuning option to override the default inliner threshold.
Definition PassBuilder.h:92
bool LoopFusion
Tuning option to enable/disable loop fusion. Its default value is false.
Definition PassBuilder.h:66
bool CallGraphProfile
Tuning option to enable/disable call graph profile.
Definition PassBuilder.h:82
bool MergeFunctions
Tuning option to enable/disable function merging.
Definition PassBuilder.h:89
bool ForgetAllSCEVInLoopUnroll
Tuning option to forget all SCEV loops in LoopUnroll.
Definition PassBuilder.h:70
unsigned LicmMssaOptCap
Tuning option to cap the number of calls to retrive clobbering accesses in MemorySSA,...
Definition PassBuilder.h:74
bool LoopInterleaving
Tuning option to set loop interleaving on/off, set based on opt level.
Definition PassBuilder.h:48
LLVM_ABI PipelineTuningOptions()
Constructor sets pipeline tuning defaults based on cl::opts.
bool LoopUnrolling
Tuning option to enable/disable loop unrolling. Its default value is true.
Definition PassBuilder.h:59
bool LoopInterchange
Tuning option to enable/disable loop interchange.
Definition PassBuilder.h:63
bool LoopVectorization
Tuning option to enable/disable loop vectorization, set based on opt level.
Definition PassBuilder.h:52
Reassociate commutative expressions.
Definition Reassociate.h:74
A pass to do RPO deduction and propagation of function attributes.
This pass performs function-level constant propagation and merging.
Definition SCCP.h:30
The sample profiler data loader pass.
Analysis pass providing a never-invalidated alias analysis result.
This pass transforms loops that contain branches or switches on loop- invariant conditions to have mu...
A pass to simplify and canonicalize the CFG of a function.
Definition SimplifyCFG.h:30
Analysis pass providing a never-invalidated alias analysis result.
Optimize scalar/vector interactions in IR using target cost models.
Interfaces for registering analysis passes, producing common pass manager configurations,...
Abstract Attribute helper functions.
Definition Attributor.h:165
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
@ Assume
Do not drop type tests (default).
@ All
Drop only llvm.assumes using type test value.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI cl::opt< bool > EnableKnowledgeRetention
static cl::opt< bool > RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden, cl::desc("Run the NewGVN pass"))
static cl::opt< bool > DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden, cl::desc("Disable pre-instrumentation inliner"))
static cl::opt< bool > EnableJumpTableToSwitch("enable-jump-table-to-switch", cl::desc("Enable JumpTableToSwitch pass (default = off)"))
static cl::opt< bool > PerformMandatoryInliningsFirst("mandatory-inlining-first", cl::init(false), cl::Hidden, cl::desc("Perform mandatory inlinings module-wide, before performing " "inlining"))
static cl::opt< bool > RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Run Partial inlining pass"))
static cl::opt< bool > EnableGVNSink("enable-gvn-sink", cl::desc("Enable the GVN sinking pass (default = off)"))
static cl::opt< bool > EnableLoopHeaderDuplication("enable-loop-header-duplication", cl::init(false), cl::Hidden, cl::desc("Enable loop header duplication at any optimization level"))
static cl::opt< bool > EnableModuleInliner("enable-module-inliner", cl::init(false), cl::Hidden, cl::desc("Enable module inliner"))
static cl::opt< bool > EnableEagerlyInvalidateAnalyses("eagerly-invalidate-analyses", cl::init(true), cl::Hidden, cl::desc("Eagerly invalidate more analyses in default pipelines"))
static cl::opt< bool > EnableMatrix("enable-matrix", cl::init(false), cl::Hidden, cl::desc("Enable lowering of the matrix intrinsics"))
ModuleToFunctionPassAdaptor createModuleToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
cl::opt< std::string > UseCtxProfile("use-ctx-profile", cl::init(""), cl::Hidden, cl::desc("Use the specified contextual profile file"))
static cl::opt< bool > EnableSampledInstr("enable-sampled-instrumentation", cl::init(false), cl::Hidden, cl::desc("Enable profile instrumentation sampling (default = off)"))
static cl::opt< bool > EnableLoopFlatten("enable-loop-flatten", cl::init(false), cl::Hidden, cl::desc("Enable the LoopFlatten Pass"))
static cl::opt< InliningAdvisorMode > UseInlineAdvisor("enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden, cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"), cl::values(clEnumValN(InliningAdvisorMode::Default, "default", "Heuristics-based inliner version"), clEnumValN(InliningAdvisorMode::Development, "development", "Use development mode (runtime-loadable model)"), clEnumValN(InliningAdvisorMode::Release, "release", "Use release mode (AOT-compiled model)")))
PassManager< LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &, CGSCCUpdateResult & > CGSCCPassManager
The CGSCC pass manager.
static cl::opt< bool > EnableUnrollAndJam("enable-unroll-and-jam", cl::init(false), cl::Hidden, cl::desc("Enable Unroll And Jam Pass"))
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition Pass.h:77
@ FullLTOPreLink
Full LTO prelink phase.
Definition Pass.h:85
@ ThinLTOPostLink
ThinLTO postlink (backend compile) phase.
Definition Pass.h:83
@ None
No LTO/ThinLTO behavior needed.
Definition Pass.h:79
@ FullLTOPostLink
Full LTO postlink (backend compile) phase.
Definition Pass.h:87
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
Definition Pass.h:81
PassManager< Loop, LoopAnalysisManager, LoopStandardAnalysisResults &, LPMUpdater & > LoopPassManager
The Loop pass manager.
static cl::opt< bool > EnableConstraintElimination("enable-constraint-elimination", cl::init(true), cl::Hidden, cl::desc("Enable pass to eliminate conditions based on linear constraints"))
ModuleToPostOrderCGSCCPassAdaptor createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT &&Pass)
A function to deduce a function pass type and wrap it in the templated adaptor.
FunctionToLoopPassAdaptor createFunctionToLoopPassAdaptor(LoopPassT &&Pass, bool UseMemorySSA=false, bool UseBlockFrequencyInfo=false)
A function to deduce a loop pass type and wrap it in the templated adaptor.
static cl::opt< bool > EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true), cl::Hidden, cl::desc("Enable inline deferral during PGO"))
Flag to enable inline deferral during PGO.
CGSCCToFunctionPassAdaptor createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false, bool NoRerun=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
cl::opt< bool > ForgetSCEVInLoopUnroll
PassManager< Module > ModulePassManager
Convenience typedef for a pass manager over modules.
static cl::opt< bool > EnablePostPGOLoopRotation("enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden, cl::desc("Run the loop rotation transformation after PGO instrumentation"))
LLVM_ABI bool AreStatisticsEnabled()
Check if statistics are enabled.
static cl::opt< std::string > InstrumentColdFuncOnlyPath("instrument-cold-function-only-path", cl::init(""), cl::desc("File path for cold function only instrumentation(requires use " "with --pgo-instrument-cold-function-only)"), cl::Hidden)
static cl::opt< bool > EnableGlobalAnalyses("enable-global-analyses", cl::init(true), cl::Hidden, cl::desc("Enable inter-procedural analyses"))
static cl::opt< bool > EnableDFAJumpThreading("enable-dfa-jump-thread", cl::desc("Enable DFA jump threading"), cl::init(false), cl::Hidden)
static cl::opt< bool > FlattenedProfileUsed("flattened-profile-used", cl::init(false), cl::Hidden, cl::desc("Indicate the sample profile being used is flattened, i.e., " "no inline hierarchy exists in the profile"))
static cl::opt< bool > ExtraVectorizerPasses("extra-vectorizer-passes", cl::init(false), cl::Hidden, cl::desc("Run cleanup optimization passes after vectorization"))
static cl::opt< bool > EnableHotColdSplit("hot-cold-split", cl::desc("Enable hot-cold splitting pass"))
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
PassManager< Function > FunctionPassManager
Convenience typedef for a pass manager over functions.
LLVM_ABI InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
cl::opt< bool > PGOInstrumentColdFunctionOnly
static cl::opt< bool > EnableLoopInterchange("enable-loopinterchange", cl::init(false), cl::Hidden, cl::desc("Enable the LoopInterchange Pass"))
static cl::opt< bool > EnableCHR("enable-chr", cl::init(true), cl::Hidden, cl::desc("Enable control height reduction optimization (CHR)"))
static cl::opt< bool > EnableMergeFunctions("enable-merge-functions", cl::init(false), cl::Hidden, cl::desc("Enable function merging as part of the optimization pipeline"))
static cl::opt< bool > EnableGVNHoist("enable-gvn-hoist", cl::desc("Enable the GVN hoisting pass (default = off)"))
cl::opt< unsigned > SetLicmMssaNoAccForPromotionCap
static cl::opt< bool > EnableIROutliner("ir-outliner", cl::init(false), cl::Hidden, cl::desc("Enable ir outliner pass"))
static cl::opt< AttributorRunOption > AttributorRun("attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE), cl::desc("Enable the attributor inter-procedural deduction pass"), cl::values(clEnumValN(AttributorRunOption::ALL, "all", "enable all attributor runs"), clEnumValN(AttributorRunOption::MODULE, "module", "enable module-wide attributor runs"), clEnumValN(AttributorRunOption::CGSCC, "cgscc", "enable call graph SCC attributor runs"), clEnumValN(AttributorRunOption::NONE, "none", "disable attributor runs")))
static cl::opt< int > PreInlineThreshold("preinline-threshold", cl::Hidden, cl::init(75), cl::desc("Control the amount of inlining in pre-instrumentation inliner " "(default = 75)"))
static cl::opt< bool > UseLoopVersioningLICM("enable-loop-versioning-licm", cl::init(false), cl::Hidden, cl::desc("Enable the experimental Loop Versioning LICM pass"))
cl::opt< unsigned > MaxDevirtIterations("max-devirt-iterations", cl::ReallyHidden, cl::init(4))
cl::opt< unsigned > SetLicmMssaOptCap
A DCE pass that assumes instructions are dead until proven otherwise.
Definition ADCE.h:31
Pass to convert @llvm.global.annotations to !annotation metadata.
This pass attempts to minimize the number of assume without loosing any information.
Hoist/decompose integer division and remainder instructions to enable CFG improvements and better cod...
Definition DivRemPairs.h:23
A simple and fast domtree-based CSE pass.
Definition EarlyCSE.h:31
Pass which forces specific function attributes into the IR, primarily as a debugging tool.
A simple and fast domtree-based GVN pass to hoist common expressions from sibling branches.
Definition GVN.h:415
Uses an "inverted" value numbering to decide the similarity of expressions and sinks similar expressi...
Definition GVN.h:422
A set of parameters to control various transforms performed by IPSCCP pass.
Definition SCCP.h:35
A pass which infers function attributes from the names and signatures of function declarations in a m...
Provides context on when an inline advisor is constructed in the pipeline (e.g., link phase,...
Thresholds to tune inline cost analysis.
Definition InlineCost.h:207
std::optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
Definition InlineCost.h:224
int DefaultThreshold
The default threshold to start with for a callee.
Definition InlineCost.h:209
std::optional< bool > EnableDeferral
Indicate whether we should allow inline deferral.
Definition InlineCost.h:237
std::optional< int > HintThreshold
Threshold to use for callees with inline hint.
Definition InlineCost.h:212
Options for the frontend instrumentation based profiling pass.
A no-op pass template which simply forces a specific analysis result to be invalidated.
Pass to forward loads in a loop around the backedge to subsequent iterations.
A set of parameters used to control various transforms performed by the LoopUnroll pass.
The LoopVectorize Pass.
Computes function attributes in post-order over the call graph.
A utility pass template to force an analysis result to be available.