LLVM 18.0.0git
PassBuilderPipelines.cpp
Go to the documentation of this file.
1//===- Construction of pass pipelines -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// This file provides the implementation of the PassBuilder based on our
11/// static pass registry as well as related functionality. It also provides
12/// helpers to aid in analyzing, debugging, and testing passes and pass
13/// pipelines.
14///
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/Statistic.h"
26#include "llvm/IR/PassManager.h"
137
138using namespace llvm;
139
141 "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
142 cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
143 cl::values(clEnumValN(InliningAdvisorMode::Default, "default",
144 "Heuristics-based inliner version"),
145 clEnumValN(InliningAdvisorMode::Development, "development",
146 "Use development mode (runtime-loadable model)"),
147 clEnumValN(InliningAdvisorMode::Release, "release",
148 "Use release mode (AOT-compiled model)")));
149
151 "enable-npm-synthetic-counts", cl::Hidden,
152 cl::desc("Run synthetic function entry count generation "
153 "pass"));
154
155/// Flag to enable inline deferral during PGO.
156static cl::opt<bool>
157 EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
159 cl::desc("Enable inline deferral during PGO"));
160
161static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
162 cl::init(false), cl::Hidden,
163 cl::desc("Enable module inliner"));
164
166 "mandatory-inlining-first", cl::init(true), cl::Hidden,
167 cl::desc("Perform mandatory inlinings module-wide, before performing "
168 "inlining"));
169
171 "eagerly-invalidate-analyses", cl::init(true), cl::Hidden,
172 cl::desc("Eagerly invalidate more analyses in default pipelines"));
173
175 "enable-merge-functions", cl::init(false), cl::Hidden,
176 cl::desc("Enable function merging as part of the optimization pipeline"));
177
179 "enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden,
180 cl::desc("Run the loop rotation transformation after PGO instrumentation"));
181
183 "enable-global-analyses", cl::init(true), cl::Hidden,
184 cl::desc("Enable inter-procedural analyses"));
185
186static cl::opt<bool>
187 RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden,
188 cl::desc("Run Partial inlinining pass"));
189
191 "extra-vectorizer-passes", cl::init(false), cl::Hidden,
192 cl::desc("Run cleanup optimization passes after vectorization"));
193
194static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
195 cl::desc("Run the NewGVN pass"));
196
198 "enable-loopinterchange", cl::init(false), cl::Hidden,
199 cl::desc("Enable the experimental LoopInterchange Pass"));
200
201static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam",
202 cl::init(false), cl::Hidden,
203 cl::desc("Enable Unroll And Jam Pass"));
204
205static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
207 cl::desc("Enable the LoopFlatten Pass"));
208
209static cl::opt<bool>
210 EnableDFAJumpThreading("enable-dfa-jump-thread",
211 cl::desc("Enable DFA jump threading"),
212 cl::init(false), cl::Hidden);
213
214static cl::opt<bool>
215 EnableHotColdSplit("hot-cold-split",
216 cl::desc("Enable hot-cold splitting pass"));
217
218static cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(false),
220 cl::desc("Enable ir outliner pass"));
221
222static cl::opt<bool>
223 DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden,
224 cl::desc("Disable pre-instrumentation inliner"));
225
227 "preinline-threshold", cl::Hidden, cl::init(75),
228 cl::desc("Control the amount of inlining in pre-instrumentation inliner "
229 "(default = 75)"));
230
231static cl::opt<bool>
232 EnableGVNHoist("enable-gvn-hoist",
233 cl::desc("Enable the GVN hoisting pass (default = off)"));
234
235static cl::opt<bool>
236 EnableGVNSink("enable-gvn-sink",
237 cl::desc("Enable the GVN sinking pass (default = off)"));
238
239// This option is used in simplifying testing SampleFDO optimizations for
240// profile loading.
241static cl::opt<bool>
242 EnableCHR("enable-chr", cl::init(true), cl::Hidden,
243 cl::desc("Enable control height reduction optimization (CHR)"));
244
246 "flattened-profile-used", cl::init(false), cl::Hidden,
247 cl::desc("Indicate the sample profile being used is flattened, i.e., "
248 "no inline hierachy exists in the profile"));
249
251 "enable-order-file-instrumentation", cl::init(false), cl::Hidden,
252 cl::desc("Enable order file instrumentation (default = off)"));
253
254static cl::opt<bool>
255 EnableMatrix("enable-matrix", cl::init(false), cl::Hidden,
256 cl::desc("Enable lowering of the matrix intrinsics"));
257
259 "enable-constraint-elimination", cl::init(true), cl::Hidden,
260 cl::desc(
261 "Enable pass to eliminate conditions based on linear constraints"));
262
264 "attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE),
265 cl::desc("Enable the attributor inter-procedural deduction pass"),
266 cl::values(clEnumValN(AttributorRunOption::ALL, "all",
267 "enable all attributor runs"),
268 clEnumValN(AttributorRunOption::MODULE, "module",
269 "enable module-wide attributor runs"),
270 clEnumValN(AttributorRunOption::CGSCC, "cgscc",
271 "enable call graph SCC attributor runs"),
272 clEnumValN(AttributorRunOption::NONE, "none",
273 "disable attributor runs")));
274
276 "enable-loop-versioning-licm", cl::init(false), cl::Hidden,
277 cl::desc("Enable the experimental Loop Versioning LICM pass"));
278
280 "enable-memprof-context-disambiguation", cl::init(false), cl::Hidden,
281 cl::ZeroOrMore, cl::desc("Enable MemProf context disambiguation"));
282
284
286 LoopInterleaving = true;
287 LoopVectorization = true;
288 SLPVectorization = false;
289 LoopUnrolling = true;
293 CallGraphProfile = true;
294 UnifiedLTO = false;
296 InlinerThreshold = -1;
298}
299
300namespace llvm {
303} // namespace llvm
304
306 OptimizationLevel Level) {
307 for (auto &C : PeepholeEPCallbacks)
308 C(FPM, Level);
309}
312 for (auto &C : LateLoopOptimizationsEPCallbacks)
313 C(LPM, Level);
314}
316 OptimizationLevel Level) {
317 for (auto &C : LoopOptimizerEndEPCallbacks)
318 C(LPM, Level);
319}
322 for (auto &C : ScalarOptimizerLateEPCallbacks)
323 C(FPM, Level);
324}
326 OptimizationLevel Level) {
327 for (auto &C : CGSCCOptimizerLateEPCallbacks)
328 C(CGPM, Level);
329}
331 OptimizationLevel Level) {
332 for (auto &C : VectorizerStartEPCallbacks)
333 C(FPM, Level);
334}
336 OptimizationLevel Level) {
337 for (auto &C : OptimizerEarlyEPCallbacks)
338 C(MPM, Level);
339}
341 OptimizationLevel Level) {
342 for (auto &C : OptimizerLastEPCallbacks)
343 C(MPM, Level);
344}
347 for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks)
348 C(MPM, Level);
349}
352 for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
353 C(MPM, Level);
354}
356 OptimizationLevel Level) {
357 for (auto &C : PipelineStartEPCallbacks)
358 C(MPM, Level);
359}
362 for (auto &C : PipelineEarlySimplificationEPCallbacks)
363 C(MPM, Level);
364}
365
366// Helper to add AnnotationRemarksPass.
369}
370
371// Helper to check if the current compilation phase is preparing for LTO
375}
376
377// TODO: Investigate the cost/benefit of tail call elimination on debugging.
379PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
381
383
386
387 // Form SSA out of local memory accesses after breaking apart aggregates into
388 // scalars.
390
391 // Catch trivial redundancies
392 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
393
394 // Hoisting of scalars and load expressions.
395 FPM.addPass(
396 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
398
400
401 invokePeepholeEPCallbacks(FPM, Level);
402
403 FPM.addPass(
404 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
405
406 // Form canonically associated expression trees, and simplify the trees using
407 // basic mathematical properties. For example, this will form (nearly)
408 // minimal multiplication trees.
410
411 // Add the primary loop simplification pipeline.
412 // FIXME: Currently this is split into two loop pass pipelines because we run
413 // some function passes in between them. These can and should be removed
414 // and/or replaced by scheduling the loop pass equivalents in the correct
415 // positions. But those equivalent passes aren't powerful enough yet.
416 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
417 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
418 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
419 // `LoopInstSimplify`.
420 LoopPassManager LPM1, LPM2;
421
422 // Simplify the loop body. We do this initially to clean up after other loop
423 // passes run, either when iterating on a loop or on inner loops with
424 // implications on the outer loop.
427
428 // Try to remove as much code from the loop header as possible,
429 // to reduce amount of IR that will have to be duplicated. However,
430 // do not perform speculative hoisting the first time as LICM
431 // will destroy metadata that may not need to be destroyed if run
432 // after loop rotation.
433 // TODO: Investigate promotion cap for O1.
435 /*AllowSpeculation=*/false));
436
437 LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true,
439 // TODO: Investigate promotion cap for O1.
441 /*AllowSpeculation=*/true));
444 LPM1.addPass(LoopFlattenPass());
445
448
450
452
455
456 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
457 // because it changes IR to makes profile annotation in back compile
458 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
459 // attributes so we need to make sure and allow the full unroll pass to pay
460 // attention to it.
461 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
462 PGOOpt->Action != PGOOptions::SampleUse)
463 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
464 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
466
468
469 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
470 /*UseMemorySSA=*/true,
471 /*UseBlockFrequencyInfo=*/true));
472 FPM.addPass(
473 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
475 // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
476 // *All* loop passes must preserve it, in order to be able to use it.
477 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
478 /*UseMemorySSA=*/false,
479 /*UseBlockFrequencyInfo=*/false));
480
481 // Delete small array after loop unroll.
483
484 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
485 FPM.addPass(MemCpyOptPass());
486
487 // Sparse conditional constant propagation.
488 // FIXME: It isn't clear why we do this *after* loop passes rather than
489 // before...
490 FPM.addPass(SCCPPass());
491
492 // Delete dead bit computations (instcombine runs after to fold away the dead
493 // computations, and then ADCE will run later to exploit any new DCE
494 // opportunities that creates).
495 FPM.addPass(BDCEPass());
496
497 // Run instcombine after redundancy and dead bit elimination to exploit
498 // opportunities opened up by them.
500 invokePeepholeEPCallbacks(FPM, Level);
501
502 FPM.addPass(CoroElidePass());
503
505
506 // Finally, do an expensive DCE pass to catch all the dead code exposed by
507 // the simplifications and basic cleanup after all the simplifications.
508 // TODO: Investigate if this is too expensive.
509 FPM.addPass(ADCEPass());
510 FPM.addPass(
511 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
513 invokePeepholeEPCallbacks(FPM, Level);
514
515 return FPM;
516}
517
521 assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
522
523 // The O1 pipeline has a separate pipeline creation function to simplify
524 // construction readability.
525 if (Level.getSpeedupLevel() == 1)
526 return buildO1FunctionSimplificationPipeline(Level, Phase);
527
529
532
533 // Form SSA out of local memory accesses after breaking apart aggregates into
534 // scalars.
536
537 // Catch trivial redundancies
538 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
541
542 // Hoisting of scalars and load expressions.
543 if (EnableGVNHoist)
544 FPM.addPass(GVNHoistPass());
545
546 // Global value numbering based sinking.
547 if (EnableGVNSink) {
548 FPM.addPass(GVNSinkPass());
549 FPM.addPass(
550 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
551 }
552
553 // Speculative execution if the target has divergent branches; otherwise nop.
554 FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
555
556 // Optimize based on known information about branches, and cleanup afterward.
559
560 FPM.addPass(
561 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
564
565 if (!Level.isOptimizingForSize())
567
568 invokePeepholeEPCallbacks(FPM, Level);
569
570 // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
571 // using the size value profile. Don't perform this when optimizing for size.
572 if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
573 !Level.isOptimizingForSize())
575
577 FPM.addPass(
578 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
579
580 // Form canonically associated expression trees, and simplify the trees using
581 // basic mathematical properties. For example, this will form (nearly)
582 // minimal multiplication trees.
584
587
588 // Add the primary loop simplification pipeline.
589 // FIXME: Currently this is split into two loop pass pipelines because we run
590 // some function passes in between them. These can and should be removed
591 // and/or replaced by scheduling the loop pass equivalents in the correct
592 // positions. But those equivalent passes aren't powerful enough yet.
593 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
594 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
595 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
596 // `LoopInstSimplify`.
597 LoopPassManager LPM1, LPM2;
598
599 // Simplify the loop body. We do this initially to clean up after other loop
600 // passes run, either when iterating on a loop or on inner loops with
601 // implications on the outer loop.
604
605 // Try to remove as much code from the loop header as possible,
606 // to reduce amount of IR that will have to be duplicated. However,
607 // do not perform speculative hoisting the first time as LICM
608 // will destroy metadata that may not need to be destroyed if run
609 // after loop rotation.
610 // TODO: Investigate promotion cap for O1.
612 /*AllowSpeculation=*/false));
613
614 // Disable header duplication in loop rotation at -Oz.
615 LPM1.addPass(
617 // TODO: Investigate promotion cap for O1.
619 /*AllowSpeculation=*/true));
620 LPM1.addPass(
621 SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3));
623 LPM1.addPass(LoopFlattenPass());
624
627
629
631
634
635 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
636 // because it changes IR to makes profile annotation in back compile
637 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
638 // attributes so we need to make sure and allow the full unroll pass to pay
639 // attention to it.
640 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
641 PGOOpt->Action != PGOOptions::SampleUse)
642 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
643 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
645
647
648 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
649 /*UseMemorySSA=*/true,
650 /*UseBlockFrequencyInfo=*/true));
651 FPM.addPass(
652 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
654 // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
655 // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
656 // *All* loop passes must preserve it, in order to be able to use it.
657 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
658 /*UseMemorySSA=*/false,
659 /*UseBlockFrequencyInfo=*/false));
660
661 // Delete small array after loop unroll.
663
664 // Try vectorization/scalarization transforms that are both improvements
665 // themselves and can allow further folds with GVN and InstCombine.
666 FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
667
668 // Eliminate redundancies.
670 if (RunNewGVN)
671 FPM.addPass(NewGVNPass());
672 else
673 FPM.addPass(GVNPass());
674
675 // Sparse conditional constant propagation.
676 // FIXME: It isn't clear why we do this *after* loop passes rather than
677 // before...
678 FPM.addPass(SCCPPass());
679
680 // Delete dead bit computations (instcombine runs after to fold away the dead
681 // computations, and then ADCE will run later to exploit any new DCE
682 // opportunities that creates).
683 FPM.addPass(BDCEPass());
684
685 // Run instcombine after redundancy and dead bit elimination to exploit
686 // opportunities opened up by them.
688 invokePeepholeEPCallbacks(FPM, Level);
689
690 // Re-consider control flow based optimizations after redundancy elimination,
691 // redo DCE, etc.
692 if (EnableDFAJumpThreading && Level.getSizeLevel() == 0)
694
697
698 // Finally, do an expensive DCE pass to catch all the dead code exposed by
699 // the simplifications and basic cleanup after all the simplifications.
700 // TODO: Investigate if this is too expensive.
701 FPM.addPass(ADCEPass());
702
703 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
704 FPM.addPass(MemCpyOptPass());
705
706 FPM.addPass(DSEPass());
708
711 /*AllowSpeculation=*/true),
712 /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
713
714 FPM.addPass(CoroElidePass());
715
717
719 .convertSwitchRangeToICmp(true)
720 .hoistCommonInsts(true)
721 .sinkCommonInsts(true)));
723 invokePeepholeEPCallbacks(FPM, Level);
724
725 return FPM;
726}
727
728void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
731}
732
733void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
734 OptimizationLevel Level, bool RunProfileGen,
735 bool IsCS, bool AtomicCounterUpdate,
736 std::string ProfileFile,
737 std::string ProfileRemappingFile,
738 ThinOrFullLTOPhase LTOPhase,
740 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
741 if (!IsCS && !DisablePreInliner) {
742 InlineParams IP;
743
745
746 // FIXME: The hint threshold has the same value used by the regular inliner
747 // when not optimzing for size. This should probably be lowered after
748 // performance testing.
749 // FIXME: this comment is cargo culted from the old pass manager, revisit).
750 IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
752 IP, /* MandatoryFirst */ true,
754 CGSCCPassManager &CGPipeline = MIWP.getPM();
755
758 FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
759 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
760 true))); // Merge & remove basic blocks.
761 FPM.addPass(InstCombinePass()); // Combine silly sequences.
762 invokePeepholeEPCallbacks(FPM, Level);
763
764 CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
765 std::move(FPM), PTO.EagerlyInvalidateAnalyses));
766
767 MPM.addPass(std::move(MIWP));
768
769 // Delete anything that is now dead to make sure that we don't instrument
770 // dead code. Instrumentation can end up keeping dead code around and
771 // dramatically increase code size.
773 }
774
775 if (!RunProfileGen) {
776 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
777 MPM.addPass(
778 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
779 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
780 // RequireAnalysisPass for PSI before subsequent non-module passes.
782 return;
783 }
784
785 // Perform PGO instrumentation.
787
789 // Disable header duplication in loop rotation at -Oz.
793 /*UseMemorySSA=*/false,
794 /*UseBlockFrequencyInfo=*/false),
796 }
797
798 // Add the profile lowering pass.
800 if (!ProfileFile.empty())
801 Options.InstrProfileOutput = ProfileFile;
802 // Do counter promotion at Level greater than O0.
803 Options.DoCounterPromotion = true;
804 Options.UseBFIInPromotion = IsCS;
805 Options.Atomic = AtomicCounterUpdate;
807}
808
810 ModulePassManager &MPM, bool RunProfileGen, bool IsCS,
811 bool AtomicCounterUpdate, std::string ProfileFile,
812 std::string ProfileRemappingFile, IntrusiveRefCntPtr<vfs::FileSystem> FS) {
813 if (!RunProfileGen) {
814 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
815 MPM.addPass(
816 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
817 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
818 // RequireAnalysisPass for PSI before subsequent non-module passes.
820 return;
821 }
822
823 // Perform PGO instrumentation.
825 // Add the profile lowering pass.
827 if (!ProfileFile.empty())
828 Options.InstrProfileOutput = ProfileFile;
829 // Do not do counter promotion at O0.
830 Options.DoCounterPromotion = false;
831 Options.UseBFIInPromotion = IsCS;
832 Options.Atomic = AtomicCounterUpdate;
834}
835
837 return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
838}
839
843 InlineParams IP;
844 if (PTO.InlinerThreshold == -1)
845 IP = getInlineParamsFromOptLevel(Level);
846 else
848 // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to
849 // disable hot callsite inline (as much as possible [1]) because it makes
850 // profile annotation in the backend inaccurate.
851 //
852 // [1] Note the cost of a function could be below zero due to erased
853 // prologue / epilogue.
854 if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
855 PGOOpt->Action == PGOOptions::SampleUse)
857
858 if (PGOOpt)
860
864
865 // Require the GlobalsAA analysis for the module so we can query it within
866 // the CGSCC pipeline.
869 // Invalidate AAManager so it can be recreated and pick up the newly
870 // available GlobalsAA.
871 MIWP.addModulePass(
873 }
874
875 // Require the ProfileSummaryAnalysis for the module so we can query it within
876 // the inliner pass.
878
879 // Now begin the main postorder CGSCC pipeline.
880 // FIXME: The current CGSCC pipeline has its origins in the legacy pass
881 // manager and trying to emulate its precise behavior. Much of this doesn't
882 // make a lot of sense and we should revisit the core CGSCC structure.
883 CGSCCPassManager &MainCGPipeline = MIWP.getPM();
884
885 // Note: historically, the PruneEH pass was run first to deduce nounwind and
886 // generally clean up exception handling overhead. It isn't clear this is
887 // valuable as the inliner doesn't currently care whether it is inlining an
888 // invoke or a call.
889
891 MainCGPipeline.addPass(AttributorCGSCCPass());
892
893 // Deduce function attributes. We do another run of this after the function
894 // simplification pipeline, so this only needs to run when it could affect the
895 // function simplification pipeline, which is only the case with recursive
896 // functions.
897 MainCGPipeline.addPass(PostOrderFunctionAttrsPass(/*SkipNonRecursive*/ true));
898
899 // When at O3 add argument promotion to the pass pipeline.
900 // FIXME: It isn't at all clear why this should be limited to O3.
901 if (Level == OptimizationLevel::O3)
902 MainCGPipeline.addPass(ArgumentPromotionPass());
903
904 // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
905 // there are no OpenMP runtime calls present in the module.
906 if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
907 MainCGPipeline.addPass(OpenMPOptCGSCCPass());
908
909 invokeCGSCCOptimizerLateEPCallbacks(MainCGPipeline, Level);
910
911 // Add the core function simplification pipeline nested inside the
912 // CGSCC walk.
915 PTO.EagerlyInvalidateAnalyses, /*NoRerun=*/true));
916
917 // Finally, deduce any function attributes based on the fully simplified
918 // function.
919 MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
920
921 // Mark that the function is fully simplified and that it shouldn't be
922 // simplified again if we somehow revisit it due to CGSCC mutations unless
923 // it's been modified since.
926
927 MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
928
929 // Make sure we don't affect potential future NoRerun CGSCC adaptors.
930 MIWP.addLateModulePass(createModuleToFunctionPassAdaptor(
932
933 return MIWP;
934}
935
940
942 // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to
943 // disable hot callsite inline (as much as possible [1]) because it makes
944 // profile annotation in the backend inaccurate.
945 //
946 // [1] Note the cost of a function could be below zero due to erased
947 // prologue / epilogue.
948 if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
949 PGOOpt->Action == PGOOptions::SampleUse)
951
952 if (PGOOpt)
954
955 // The inline deferral logic is used to avoid losing some
956 // inlining chance in future. It is helpful in SCC inliner, in which
957 // inlining is processed in bottom-up order.
958 // While in module inliner, the inlining order is a priority-based order
959 // by default. The inline deferral is unnecessary there. So we disable the
960 // inline deferral logic in module inliner.
961 IP.EnableDeferral = false;
962
964
968
971
972 return MPM;
973}
974
978 assert(Level != OptimizationLevel::O0 &&
979 "Should not be used for O0 pipeline");
980
982 "FullLTOPostLink shouldn't call buildModuleSimplificationPipeline!");
983
985
986 // Place pseudo probe instrumentation as the first pass of the pipeline to
987 // minimize the impact of optimization changes.
988 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
991
992 bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
993
994 // In ThinLTO mode, when flattened profile is used, all the available
995 // profile information will be annotated in PreLink phase so there is
996 // no need to load the profile again in PostLink.
997 bool LoadSampleProfile =
998 HasSampleProfile &&
1000
1001 // During the ThinLTO backend phase we perform early indirect call promotion
1002 // here, before globalopt. Otherwise imported available_externally functions
1003 // look unreferenced and are removed. If we are going to load the sample
1004 // profile then defer until later.
1005 // TODO: See if we can move later and consolidate with the location where
1006 // we perform ICP when we are loading a sample profile.
1007 // TODO: We pass HasSampleProfile (whether there was a sample profile file
1008 // passed to the compile) to the SamplePGO flag of ICP. This is used to
1009 // determine whether the new direct calls are annotated with prof metadata.
1010 // Ideally this should be determined from whether the IR is annotated with
1011 // sample profile, and not whether the a sample profile was provided on the
1012 // command line. E.g. for flattened profiles where we will not be reloading
1013 // the sample profile in the ThinLTO backend, we ideally shouldn't have to
1014 // provide the sample profile file.
1015 if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
1016 MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
1017
1018 // Create an early function pass manager to cleanup the output of the
1019 // frontend. Not necessary with LTO post link pipelines since the pre link
1020 // pipeline already cleaned up the frontend output.
1022 // Do basic inference of function attributes from known properties of system
1023 // libraries and other oracles.
1026
1027 FunctionPassManager EarlyFPM;
1028 // Lower llvm.expect to metadata before attempting transforms.
1029 // Compare/branch metadata may alter the behavior of passes like
1030 // SimplifyCFG.
1032 EarlyFPM.addPass(SimplifyCFGPass());
1034 EarlyFPM.addPass(EarlyCSEPass());
1035 if (Level == OptimizationLevel::O3)
1036 EarlyFPM.addPass(CallSiteSplittingPass());
1038 std::move(EarlyFPM), PTO.EagerlyInvalidateAnalyses));
1039 }
1040
1041 if (LoadSampleProfile) {
1042 // Annotate sample profile right after early FPM to ensure freshness of
1043 // the debug info.
1044 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1045 PGOOpt->ProfileRemappingFile, Phase));
1046 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1047 // RequireAnalysisPass for PSI before subsequent non-module passes.
1049 // Do not invoke ICP in the LTOPrelink phase as it makes it hard
1050 // for the profile annotation to be accurate in the LTO backend.
1051 if (!isLTOPreLink(Phase))
1052 // We perform early indirect call promotion here, before globalopt.
1053 // This is important for the ThinLTO backend phase because otherwise
1054 // imported available_externally functions look unreferenced and are
1055 // removed.
1056 MPM.addPass(
1057 PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
1058 }
1059
1060 // Try to perform OpenMP specific optimizations on the module. This is a
1061 // (quick!) no-op if there are no OpenMP runtime calls present in the module.
1063
1066
1067 // Lower type metadata and the type.test intrinsic in the ThinLTO
1068 // post link pipeline after ICP. This is to enable usage of the type
1069 // tests in ICP sequences.
1071 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1072
1074
1075 // Interprocedural constant propagation now that basic cleanup has occurred
1076 // and prior to optimizing globals.
1077 // FIXME: This position in the pipeline hasn't been carefully considered in
1078 // years, it should be re-analyzed.
1080 IPSCCPOptions(/*AllowFuncSpec=*/
1081 Level != OptimizationLevel::Os &&
1082 Level != OptimizationLevel::Oz &&
1083 !isLTOPreLink(Phase))));
1084
1085 // Attach metadata to indirect call sites indicating the set of functions
1086 // they may target at run-time. This should follow IPSCCP.
1088
1089 // Optimize globals to try and fold them into constants.
1091
1092 // Create a small function pass pipeline to cleanup after all the global
1093 // optimizations.
1094 FunctionPassManager GlobalCleanupPM;
1095 // FIXME: Should this instead by a run of SROA?
1096 GlobalCleanupPM.addPass(PromotePass());
1097 GlobalCleanupPM.addPass(InstCombinePass());
1098 invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
1099 GlobalCleanupPM.addPass(
1100 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1101 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM),
1103
1104 // Add all the requested passes for instrumentation PGO, if requested.
1105 if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
1106 (PGOOpt->Action == PGOOptions::IRInstr ||
1107 PGOOpt->Action == PGOOptions::IRUse)) {
1108 addPGOInstrPasses(MPM, Level,
1109 /*RunProfileGen=*/PGOOpt->Action == PGOOptions::IRInstr,
1110 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate,
1111 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile, Phase,
1112 PGOOpt->FS);
1113 MPM.addPass(PGOIndirectCallPromotion(false, false));
1114 }
1115 if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
1116 PGOOpt->CSAction == PGOOptions::CSIRInstr)
1117 MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile));
1118
1119 if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
1120 !PGOOpt->MemoryProfile.empty())
1121 MPM.addPass(MemProfUsePass(PGOOpt->MemoryProfile, PGOOpt->FS));
1122
1123 // Synthesize function entry counts for non-PGO compilation.
1124 if (EnableSyntheticCounts && !PGOOpt)
1126
1129 else
1131
1132 // Remove any dead arguments exposed by cleanups, constant folding globals,
1133 // and argument promotion.
1135
1137
1138 // Optimize globals now that functions are fully simplified.
1141
1142 return MPM;
1143}
1144
1145/// TODO: Should LTO cause any differences to this set of passes?
1146void PassBuilder::addVectorPasses(OptimizationLevel Level,
1147 FunctionPassManager &FPM, bool IsFullLTO) {
1150
1153 if (IsFullLTO) {
1154 // The vectorizer may have significantly shortened a loop body; unroll
1155 // again. Unroll small loops to hide loop backedge latency and saturate any
1156 // parallel execution resources of an out-of-order processor. We also then
1157 // need to clean up redundancies and loop invariant code.
1158 // FIXME: It would be really good to use a loop-integrated instruction
1159 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1160 // across the loop nests.
1161 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1164 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1166 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1169 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1170 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1171 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1172 // NOTE: we are very late in the pipeline, and we don't have any LICM
1173 // or SimplifyCFG passes scheduled after us, that would cleanup
1174 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1176 }
1177
1178 if (!IsFullLTO) {
1179 // Eliminate loads by forwarding stores from the previous iteration to loads
1180 // of the current iteration.
1182 }
1183 // Cleanup after the loop optimization passes.
1184 FPM.addPass(InstCombinePass());
1185
1186 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1187 ExtraVectorPassManager ExtraPasses;
1188 // At higher optimization levels, try to clean up any runtime overlap and
1189 // alignment checks inserted by the vectorizer. We want to track correlated
1190 // runtime checks for two inner loops in the same outer loop, fold any
1191 // common computations, hoist loop-invariant aspects out of any outer loop,
1192 // and unswitch the runtime checks if possible. Once hoisted, we may have
1193 // dead (or speculatable) control flows or more combining opportunities.
1194 ExtraPasses.addPass(EarlyCSEPass());
1196 ExtraPasses.addPass(InstCombinePass());
1197 LoopPassManager LPM;
1199 /*AllowSpeculation=*/true));
1200 LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
1202 ExtraPasses.addPass(
1203 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true,
1204 /*UseBlockFrequencyInfo=*/true));
1205 ExtraPasses.addPass(
1206 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1207 ExtraPasses.addPass(InstCombinePass());
1208 FPM.addPass(std::move(ExtraPasses));
1209 }
1210
1211 // Now that we've formed fast to execute loop structures, we do further
1212 // optimizations. These are run afterward as they might block doing complex
1213 // analyses and transforms such as what are needed for loop vectorization.
1214
1215 // Cleanup after loop vectorization, etc. Simplification passes like CVP and
1216 // GVN, loop transforms, and others have already run, so it's now better to
1217 // convert to more optimized IR using more aggressive simplify CFG options.
1218 // The extra sinking transform can create larger basic blocks, so do this
1219 // before SLP vectorization.
1221 .forwardSwitchCondToPhi(true)
1222 .convertSwitchRangeToICmp(true)
1223 .convertSwitchToLookupTable(true)
1224 .needCanonicalLoops(false)
1225 .hoistCommonInsts(true)
1226 .sinkCommonInsts(true)));
1227
1228 if (IsFullLTO) {
1229 FPM.addPass(SCCPPass());
1230 FPM.addPass(InstCombinePass());
1231 FPM.addPass(BDCEPass());
1232 }
1233
1234 // Optimize parallel scalar instruction chains into SIMD instructions.
1235 if (PTO.SLPVectorization) {
1237 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1238 FPM.addPass(EarlyCSEPass());
1239 }
1240 }
1241 // Enhance/cleanup vector code.
1243
1244 if (!IsFullLTO) {
1245 FPM.addPass(InstCombinePass());
1246 // Unroll small loops to hide loop backedge latency and saturate any
1247 // parallel execution resources of an out-of-order processor. We also then
1248 // need to clean up redundancies and loop invariant code.
1249 // FIXME: It would be really good to use a loop-integrated instruction
1250 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1251 // across the loop nests.
1252 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1253 if (EnableUnrollAndJam && PTO.LoopUnrolling) {
1255 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1256 }
1258 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1261 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1262 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1263 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1264 // NOTE: we are very late in the pipeline, and we don't have any LICM
1265 // or SimplifyCFG passes scheduled after us, that would cleanup
1266 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1268 }
1269
1272 FPM.addPass(InstCombinePass());
1273
1274 // This is needed for two reasons:
1275 // 1. It works around problems that instcombine introduces, such as sinking
1276 // expensive FP divides into loops containing multiplications using the
1277 // divide result.
1278 // 2. It helps to clean up some loop-invariant code created by the loop
1279 // unroll pass when IsFullLTO=false.
1282 /*AllowSpeculation=*/true),
1283 /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
1284
1285 // Now that we've vectorized and unrolled loops, we may have more refined
1286 // alignment information, try to re-derive it here.
1288}
1289
1292 ThinOrFullLTOPhase LTOPhase) {
1293 const bool LTOPreLink = isLTOPreLink(LTOPhase);
1295
1296 // Run partial inlining pass to partially inline functions that have
1297 // large bodies.
1300
1301 // Remove avail extern fns and globals definitions since we aren't compiling
1302 // an object file for later LTO. For LTO we want to preserve these so they
1303 // are eligible for inlining at link-time. Note if they are unreferenced they
1304 // will be removed by GlobalDCE later, so this only impacts referenced
1305 // available externally globals. Eventually they will be suppressed during
1306 // codegen, but eliminating here enables more opportunity for GlobalDCE as it
1307 // may make globals referenced by available external functions dead and saves
1308 // running remaining passes on the eliminated functions. These should be
1309 // preserved during prelinking for link-time inlining decisions.
1310 if (!LTOPreLink)
1312
1315
1316 // Do RPO function attribute inference across the module to forward-propagate
1317 // attributes where applicable.
1318 // FIXME: Is this really an optimization rather than a canonicalization?
1320
1321 // Do a post inline PGO instrumentation and use pass. This is a context
1322 // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
1323 // cross-module inline has not been done yet. The context sensitive
1324 // instrumentation is after all the inlines are done.
1325 if (!LTOPreLink && PGOOpt) {
1326 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1327 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
1328 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1329 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile,
1330 LTOPhase, PGOOpt->FS);
1331 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1332 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
1333 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1334 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
1335 LTOPhase, PGOOpt->FS);
1336 }
1337
1338 // Re-compute GlobalsAA here prior to function passes. This is particularly
1339 // useful as the above will have inlined, DCE'ed, and function-attr
1340 // propagated everything. We should at this point have a reasonably minimal
1341 // and richly annotated call graph. By computing aliasing and mod/ref
1342 // information for all local globals here, the late loop passes and notably
1343 // the vectorizer will be able to use them to help recognize vectorizable
1344 // memory operations.
1347
1349
1350 FunctionPassManager OptimizePM;
1351 // Scheduling LoopVersioningLICM when inlining is over, because after that
1352 // we may see more accurate aliasing. Reason to run this late is that too
1353 // early versioning may prevent further inlining due to increase of code
1354 // size. Other optimizations which runs later might get benefit of no-alias
1355 // assumption in clone loop.
1357 OptimizePM.addPass(
1359 // LoopVersioningLICM pass might increase new LICM opportunities.
1362 /*AllowSpeculation=*/true),
1363 /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
1364 }
1365
1366 OptimizePM.addPass(Float2IntPass());
1368
1369 if (EnableMatrix) {
1370 OptimizePM.addPass(LowerMatrixIntrinsicsPass());
1371 OptimizePM.addPass(EarlyCSEPass());
1372 }
1373
1374 // CHR pass should only be applied with the profile information.
1375 // The check is to check the profile summary information in CHR.
1376 if (EnableCHR && Level == OptimizationLevel::O3)
1377 OptimizePM.addPass(ControlHeightReductionPass());
1378
1379 // FIXME: We need to run some loop optimizations to re-rotate loops after
1380 // simplifycfg and others undo their rotation.
1381
1382 // Optimize the loop execution. These passes operate on entire loop nests
1383 // rather than on each loop in an inside-out manner, and so they are actually
1384 // function passes.
1385
1386 invokeVectorizerStartEPCallbacks(OptimizePM, Level);
1387
1388 LoopPassManager LPM;
1389 // First rotate loops that may have been un-rotated by prior passes.
1390 // Disable header duplication at -Oz.
1391 LPM.addPass(LoopRotatePass(Level != OptimizationLevel::Oz, LTOPreLink));
1392 // Some loops may have become dead by now. Try to delete them.
1393 // FIXME: see discussion in https://reviews.llvm.org/D112851,
1394 // this may need to be revisited once we run GVN before loop deletion
1395 // in the simplification pipeline.
1398 std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false));
1399
1400 // Distribute loops to allow partial vectorization. I.e. isolate dependences
1401 // into separate loop that would otherwise inhibit vectorization. This is
1402 // currently only performed for loops marked with the metadata
1403 // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1404 OptimizePM.addPass(LoopDistributePass());
1405
1406 // Populates the VFABI attribute with the scalar-to-vector mappings
1407 // from the TargetLibraryInfo.
1408 OptimizePM.addPass(InjectTLIMappings());
1409
1410 addVectorPasses(Level, OptimizePM, /* IsFullLTO */ false);
1411
1412 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
1413 // canonicalization pass that enables other optimizations. As a result,
1414 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1415 // result too early.
1416 OptimizePM.addPass(LoopSinkPass());
1417
1418 // And finally clean up LCSSA form before generating code.
1419 OptimizePM.addPass(InstSimplifyPass());
1420
1421 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
1422 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
1423 // flattening of blocks.
1424 OptimizePM.addPass(DivRemPairsPass());
1425
1426 // Try to annotate calls that were created during optimization.
1427 OptimizePM.addPass(TailCallElimPass());
1428
1429 // LoopSink (and other loop passes since the last simplifyCFG) might have
1430 // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
1431 OptimizePM.addPass(
1432 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1433
1434 // Add the core optimizing pipeline.
1435 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
1437
1439
1440 // Split out cold code. Splitting is done late to avoid hiding context from
1441 // other optimizations and inadvertently regressing performance. The tradeoff
1442 // is that this has a higher code size cost than splitting early.
1443 if (EnableHotColdSplit && !LTOPreLink)
1445
1446 // Search the code for similar regions of code. If enough similar regions can
1447 // be found where extracting the regions into their own function will decrease
1448 // the size of the program, we extract the regions, a deduplicate the
1449 // structurally similar regions.
1450 if (EnableIROutliner)
1452
1453 // Merge functions if requested.
1454 if (PTO.MergeFunctions)
1456
1457 // Now we need to do some global optimization transforms.
1458 // FIXME: It would seem like these should come first in the optimization
1459 // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
1460 // ordering here.
1463
1464 if (PTO.CallGraphProfile && !LTOPreLink)
1466
1467 // TODO: Relative look table converter pass caused an issue when full lto is
1468 // enabled. See https://reviews.llvm.org/D94355 for more details.
1469 // Until the issue fixed, disable this pass during pre-linking phase.
1470 if (!LTOPreLink)
1472
1473 return MPM;
1474}
1475
1478 bool LTOPreLink) {
1479 if (Level == OptimizationLevel::O0)
1480 return buildO0DefaultPipeline(Level, LTOPreLink);
1481
1483
1484 // Convert @llvm.global.annotations to !annotation metadata.
1486
1487 // Force any function attributes we want the rest of the pipeline to observe.
1489
1490 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1492
1493 // Apply module pipeline start EP callback.
1495
1496 const ThinOrFullLTOPhase LTOPhase = LTOPreLink
1499 // Add the core simplification pipeline.
1501
1502 // Now add the optimization pipeline.
1504
1505 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1506 PGOOpt->Action == PGOOptions::SampleUse)
1508
1509 // Emit annotation remarks.
1511
1512 if (LTOPreLink)
1513 addRequiredLTOPreLinkPasses(MPM);
1514 return MPM;
1515}
1516
1519 bool EmitSummary) {
1521 MPM.addPass(EmbedBitcodePass(ThinLTO, EmitSummary,
1522 ThinLTO
1526 return MPM;
1527}
1528
1531 if (Level == OptimizationLevel::O0)
1532 return buildO0DefaultPipeline(Level, /*LTOPreLink*/true);
1533
1535
1536 // Convert @llvm.global.annotations to !annotation metadata.
1538
1539 // Force any function attributes we want the rest of the pipeline to observe.
1541
1542 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1544
1545 // Apply module pipeline start EP callback.
1547
1548 // If we are planning to perform ThinLTO later, we don't bloat the code with
1549 // unrolling/vectorization/... now. Just simplify the module as much as we
1550 // can.
1553
1554 // Run partial inlining pass to partially inline functions that have
1555 // large bodies.
1556 // FIXME: It isn't clear whether this is really the right place to run this
1557 // in ThinLTO. Because there is another canonicalization and simplification
1558 // phase that will run after the thin link, running this here ends up with
1559 // less information than will be available later and it may grow functions in
1560 // ways that aren't beneficial.
1563
1564 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1565 PGOOpt->Action == PGOOptions::SampleUse)
1567
1568 // Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual
1569 // optimization is going to be done in PostLink stage, but clang can't add
1570 // callbacks there in case of in-process ThinLTO called by linker.
1573
1574 // Emit annotation remarks.
1576
1577 addRequiredLTOPreLinkPasses(MPM);
1578
1579 return MPM;
1580}
1581
1583 OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
1585
1586 if (ImportSummary) {
1587 // For ThinLTO we must apply the context disambiguation decisions early, to
1588 // ensure we can correctly match the callsites to summary data.
1591
1592 // These passes import type identifier resolutions for whole-program
1593 // devirtualization and CFI. They must run early because other passes may
1594 // disturb the specific instruction patterns that these passes look for,
1595 // creating dependencies on resolutions that may not appear in the summary.
1596 //
1597 // For example, GVN may transform the pattern assume(type.test) appearing in
1598 // two basic blocks into assume(phi(type.test, type.test)), which would
1599 // transform a dependency on a WPD resolution into a dependency on a type
1600 // identifier resolution for CFI.
1601 //
1602 // Also, WPD has access to more precise information than ICP and can
1603 // devirtualize more effectively, so it should operate on the IR first.
1604 //
1605 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1606 // metadata and intrinsics.
1607 MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
1608 MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
1609 }
1610
1611 if (Level == OptimizationLevel::O0) {
1612 // Run a second time to clean up any type tests left behind by WPD for use
1613 // in ICP.
1614 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1615 // Drop available_externally and unreferenced globals. This is necessary
1616 // with ThinLTO in order to avoid leaving undefined references to dead
1617 // globals in the object file.
1620 return MPM;
1621 }
1622
1623 // Add the core simplification pipeline.
1626
1627 // Now add the optimization pipeline.
1630
1631 // Emit annotation remarks.
1633
1634 return MPM;
1635}
1636
1639 // FIXME: We should use a customized pre-link pipeline!
1640 return buildPerModuleDefaultPipeline(Level,
1641 /* LTOPreLink */ true);
1642}
1643
1646 ModuleSummaryIndex *ExportSummary) {
1648
1650
1651 // Create a function that performs CFI checks for cross-DSO calls with targets
1652 // in the current module.
1654
1655 if (Level == OptimizationLevel::O0) {
1656 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1657 // metadata and intrinsics.
1658 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1659 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1660 // Run a second time to clean up any type tests left behind by WPD for use
1661 // in ICP.
1662 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1663
1665
1666 // Emit annotation remarks.
1668
1669 return MPM;
1670 }
1671
1672 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
1673 // Load sample profile before running the LTO optimization pipeline.
1674 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1675 PGOOpt->ProfileRemappingFile,
1677 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1678 // RequireAnalysisPass for PSI before subsequent non-module passes.
1680 }
1681
1682 // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
1684
1685 // Remove unused virtual tables to improve the quality of code generated by
1686 // whole-program devirtualization and bitset lowering.
1687 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
1688
1689 // Do basic inference of function attributes from known properties of system
1690 // libraries and other oracles.
1692
1693 if (Level.getSpeedupLevel() > 1) {
1696
1697 // Indirect call promotion. This should promote all the targets that are
1698 // left by the earlier promotion pass that promotes intra-module targets.
1699 // This two-step promotion is to save the compile time. For LTO, it should
1700 // produce the same result as if we only do promotion here.
1702 true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1703
1704 // Propagate constants at call sites into the functions they call. This
1705 // opens opportunities for globalopt (and inlining) by substituting function
1706 // pointers passed as arguments to direct uses of functions.
1707 MPM.addPass(IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/
1708 Level != OptimizationLevel::Os &&
1709 Level != OptimizationLevel::Oz)));
1710
1711 // Attach metadata to indirect call sites indicating the set of functions
1712 // they may target at run-time. This should follow IPSCCP.
1714 }
1715
1716 // Now deduce any function attributes based in the current code.
1717 MPM.addPass(
1719
1720 // Do RPO function attribute inference across the module to forward-propagate
1721 // attributes where applicable.
1722 // FIXME: Is this really an optimization rather than a canonicalization?
1724
1725 // Use in-range annotations on GEP indices to split globals where beneficial.
1727
1728 // Run whole program optimization of virtual call when the list of callees
1729 // is fixed.
1730 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1731
1732 // Stop here at -O1.
1733 if (Level == OptimizationLevel::O1) {
1734 // The LowerTypeTestsPass needs to run to lower type metadata and the
1735 // type.test intrinsics. The pass does nothing if CFI is disabled.
1736 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1737 // Run a second time to clean up any type tests left behind by WPD for use
1738 // in ICP (which is performed earlier than this in the regular LTO
1739 // pipeline).
1740 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1741
1743
1744 // Emit annotation remarks.
1746
1747 return MPM;
1748 }
1749
1750 // Optimize globals to try and fold them into constants.
1752
1753 // Promote any localized globals to SSA registers.
1755
1756 // Linking modules together can lead to duplicate global constant, only
1757 // keep one copy of each constant.
1759
1760 // Remove unused arguments from functions.
1762
1763 // Reduce the code after globalopt and ipsccp. Both can open up significant
1764 // simplification opportunities, and both can propagate functions through
1765 // function pointers. When this happens, we often have to resolve varargs
1766 // calls, etc, so let instcombine do this.
1767 FunctionPassManager PeepholeFPM;
1768 PeepholeFPM.addPass(InstCombinePass());
1769 if (Level.getSpeedupLevel() > 1)
1770 PeepholeFPM.addPass(AggressiveInstCombinePass());
1771 invokePeepholeEPCallbacks(PeepholeFPM, Level);
1772
1773 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM),
1775
1776 // Note: historically, the PruneEH pass was run first to deduce nounwind and
1777 // generally clean up exception handling overhead. It isn't clear this is
1778 // valuable as the inliner doesn't currently care whether it is inlining an
1779 // invoke or a call.
1780 // Run the inliner now.
1781 if (EnableModuleInliner) {
1785 } else {
1788 /* MandatoryFirst */ true,
1791 }
1792
1793 // Perform context disambiguation after inlining, since that would reduce the
1794 // amount of additional cloning required to distinguish the allocation
1795 // contexts.
1798
1799 // Optimize globals again after we ran the inliner.
1801
1802 // Run the OpenMPOpt pass again after global optimizations.
1804
1805 // Garbage collect dead functions.
1806 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
1807
1808 // If we didn't decide to inline a function, check to see if we can
1809 // transform it to pass arguments by value instead of by reference.
1811
1813 // The IPO Passes may leave cruft around. Clean up after them.
1814 FPM.addPass(InstCombinePass());
1815 invokePeepholeEPCallbacks(FPM, Level);
1816
1819
1821
1822 // Do a post inline PGO instrumentation and use pass. This is a context
1823 // sensitive PGO pass.
1824 if (PGOOpt) {
1825 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1826 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
1827 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1828 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile,
1830 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1831 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
1832 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1833 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
1835 }
1836
1837 // Break up allocas
1839
1840 // LTO provides additional opportunities for tailcall elimination due to
1841 // link-time inlining, and visibility of nocapture attribute.
1843
1844 // Run a few AA driver optimizations here and now to cleanup the code.
1847
1848 MPM.addPass(
1850
1851 // Require the GlobalsAA analysis for the module so we can query it within
1852 // MainFPM.
1855 // Invalidate AAManager so it can be recreated and pick up the newly
1856 // available GlobalsAA.
1857 MPM.addPass(
1859 }
1860
1861 FunctionPassManager MainFPM;
1864 /*AllowSpeculation=*/true),
1865 /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
1866
1867 if (RunNewGVN)
1868 MainFPM.addPass(NewGVNPass());
1869 else
1870 MainFPM.addPass(GVNPass());
1871
1872 // Remove dead memcpy()'s.
1873 MainFPM.addPass(MemCpyOptPass());
1874
1875 // Nuke dead stores.
1876 MainFPM.addPass(DSEPass());
1877 MainFPM.addPass(MoveAutoInitPass());
1879
1880 LoopPassManager LPM;
1881 if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
1882 LPM.addPass(LoopFlattenPass());
1885 // FIXME: Add loop interchange.
1886
1887 // Unroll small loops and perform peeling.
1888 LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
1889 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
1891 // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
1892 // *All* loop passes must preserve it, in order to be able to use it.
1894 std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true));
1895
1896 MainFPM.addPass(LoopDistributePass());
1897
1898 addVectorPasses(Level, MainFPM, /* IsFullLTO */ true);
1899
1900 // Run the OpenMPOpt CGSCC pass again late.
1903
1904 invokePeepholeEPCallbacks(MainFPM, Level);
1905 MainFPM.addPass(JumpThreadingPass());
1908
1909 // Lower type metadata and the type.test intrinsic. This pass supports
1910 // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
1911 // to be run at link time if CFI is enabled. This pass does nothing if
1912 // CFI is disabled.
1913 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1914 // Run a second time to clean up any type tests left behind by WPD for use
1915 // in ICP (which is performed earlier than this in the regular LTO pipeline).
1916 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1917
1918 // Enable splitting late in the FullLTO post-link pipeline.
1921
1922 // Add late LTO optimization passes.
1923 FunctionPassManager LateFPM;
1924
1925 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
1926 // canonicalization pass that enables other optimizations. As a result,
1927 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1928 // result too early.
1929 LateFPM.addPass(LoopSinkPass());
1930
1931 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
1932 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
1933 // flattening of blocks.
1934 LateFPM.addPass(DivRemPairsPass());
1935
1936 // Delete basic blocks, which optimization passes may have killed.
1937 LateFPM.addPass(SimplifyCFGPass(
1938 SimplifyCFGOptions().convertSwitchRangeToICmp(true).hoistCommonInsts(
1939 true)));
1940 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(LateFPM)));
1941
1942 // Drop bodies of available eternally objects to improve GlobalDCE.
1944
1945 // Now that we have optimized the program, discard unreachable functions.
1946 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
1947
1948 if (PTO.MergeFunctions)
1950
1951 if (PTO.CallGraphProfile)
1953
1955
1956 // Emit annotation remarks.
1958
1959 return MPM;
1960}
1961
1963 bool LTOPreLink) {
1964 assert(Level == OptimizationLevel::O0 &&
1965 "buildO0DefaultPipeline should only be used with O0");
1966
1968
1969 // Perform pseudo probe instrumentation in O0 mode. This is for the
1970 // consistency between different build modes. For example, a LTO build can be
1971 // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
1972 // the postlink will require pseudo probe instrumentation in the prelink.
1973 if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
1975
1976 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
1977 PGOOpt->Action == PGOOptions::IRUse))
1979 MPM,
1980 /*RunProfileGen=*/(PGOOpt->Action == PGOOptions::IRInstr),
1981 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate, PGOOpt->ProfileFile,
1982 PGOOpt->ProfileRemappingFile, PGOOpt->FS);
1983
1985
1986 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1988
1990
1991 // Build a minimal pipeline based on the semantics required by LLVM,
1992 // which is just that always inlining occurs. Further, disable generating
1993 // lifetime intrinsics to avoid enabling further optimizations during
1994 // code generation.
1996 /*InsertLifetimeIntrinsics=*/false));
1997
1998 if (PTO.MergeFunctions)
2000
2001 if (EnableMatrix)
2002 MPM.addPass(
2004
2005 if (!CGSCCOptimizerLateEPCallbacks.empty()) {
2006 CGSCCPassManager CGPM;
2008 if (!CGPM.isEmpty())
2010 }
2011 if (!LateLoopOptimizationsEPCallbacks.empty()) {
2012 LoopPassManager LPM;
2014 if (!LPM.isEmpty()) {
2016 createFunctionToLoopPassAdaptor(std::move(LPM))));
2017 }
2018 }
2019 if (!LoopOptimizerEndEPCallbacks.empty()) {
2020 LoopPassManager LPM;
2022 if (!LPM.isEmpty()) {
2024 createFunctionToLoopPassAdaptor(std::move(LPM))));
2025 }
2026 }
2027 if (!ScalarOptimizerLateEPCallbacks.empty()) {
2030 if (!FPM.isEmpty())
2032 }
2033
2035
2036 if (!VectorizerStartEPCallbacks.empty()) {
2039 if (!FPM.isEmpty())
2041 }
2042
2043 ModulePassManager CoroPM;
2044 CoroPM.addPass(CoroEarlyPass());
2045 CGSCCPassManager CGPM;
2046 CGPM.addPass(CoroSplitPass());
2047 CoroPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
2048 CoroPM.addPass(CoroCleanupPass());
2049 CoroPM.addPass(GlobalDCEPass());
2050 MPM.addPass(CoroConditionalWrapper(std::move(CoroPM)));
2051
2053
2054 if (LTOPreLink)
2055 addRequiredLTOPreLinkPasses(MPM);
2056
2058
2059 return MPM;
2060}
2061
2063 AAManager AA;
2064
2065 // The order in which these are registered determines their priority when
2066 // being queried.
2067
2068 // First we register the basic alias analysis that provides the majority of
2069 // per-function local AA logic. This is a stateless, on-demand local set of
2070 // AA techniques.
2072
2073 // Next we query fast, specialized alias analyses that wrap IR-embedded
2074 // information about aliasing.
2077
2078 // Add support for querying global aliasing information when available.
2079 // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
2080 // analysis, all that the `AAManager` can do is query for any *cached*
2081 // results from `GlobalsAA` through a readonly proxy.
2084
2085 // Add target-specific alias analyses.
2086 if (TM)
2088
2089 return AA;
2090}
aarch64 falkor hwpf fix Falkor HW Prefetch Fix Late Phase
AggressiveInstCombiner - Combine expression patterns to form expressions with fewer,...
Provides passes to inlining "always_inline" functions.
This is the interface for LLVM's primary stateless and local alias analysis.
This file provides the interface for LLVM's Call Graph Profile pass.
This header provides classes for managing passes over SCCs of the call graph.
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:680
This file provides the interface for a simple, fast CSE pass.
This file provides a pass which clones the current module and runs the provided pass pipeline on the ...
Super simple passes to force specific function attrs from the commandline into the IR for debugging p...
Provides passes for computing function attributes based on interprocedural analyses.
This file provides the interface for LLVM's Global Value Numbering pass which eliminates fully redund...
This is the interface for a simple mod/ref and alias analysis over globals.
Interfaces for passes which infer implicit function attributes from the name and signature of functio...
This file provides the primary interface to the instcombine pass.
Defines passes for running instruction simplification across chunks of IR.
This file provides the interface for LLVM's PGO Instrumentation lowering pass.
See the comments on JumpThreadingPass.
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
static LVOptions Options
Definition: LVOptions.cpp:25
This header defines the LoopLoadEliminationPass object.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
The header file for the LowerConstantIntrinsics pass as used by the new pass manager.
The header file for the LowerExpectIntrinsic pass as used by the new pass manager.
This pass performs merges of loads and stores on both sides of a.
This file provides the interface for LLVM's Global Value Numbering pass.
This header enumerates the LLVM-provided high-level optimization levels.
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
Define option tunables for PGO.
ModulePassManager MPM
static cl::opt< bool > EnableMergeFunctions("enable-merge-functions", cl::init(false), cl::Hidden, cl::desc("Enable function merging as part of the optimization pipeline"))
static cl::opt< bool > EnableGlobalAnalyses("enable-global-analyses", cl::init(true), cl::Hidden, cl::desc("Enable inter-procedural analyses"))
static cl::opt< bool > EnableIROutliner("ir-outliner", cl::init(false), cl::Hidden, cl::desc("Enable ir outliner pass"))
static cl::opt< bool > RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden, cl::desc("Run the NewGVN pass"))
cl::opt< bool > EnableInferAlignmentPass
static cl::opt< bool > DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden, cl::desc("Disable pre-instrumentation inliner"))
static cl::opt< bool > EnableEagerlyInvalidateAnalyses("eagerly-invalidate-analyses", cl::init(true), cl::Hidden, cl::desc("Eagerly invalidate more analyses in default pipelines"))
static cl::opt< bool > ExtraVectorizerPasses("extra-vectorizer-passes", cl::init(false), cl::Hidden, cl::desc("Run cleanup optimization passes after vectorization"))
cl::opt< bool > EnableMemProfContextDisambiguation("enable-memprof-context-disambiguation", cl::init(false), cl::Hidden, cl::ZeroOrMore, cl::desc("Enable MemProf context disambiguation"))
static void addAnnotationRemarksPass(ModulePassManager &MPM)
static cl::opt< bool > EnablePostPGOLoopRotation("enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden, cl::desc("Run the loop rotation transformation after PGO instrumentation"))
static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level)
static cl::opt< bool > EnableGVNSink("enable-gvn-sink", cl::desc("Enable the GVN sinking pass (default = off)"))
static cl::opt< bool > RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Run Partial inlinining pass"))
static cl::opt< bool > EnableGVNHoist("enable-gvn-hoist", cl::desc("Enable the GVN hoisting pass (default = off)"))
static cl::opt< bool > EnableDFAJumpThreading("enable-dfa-jump-thread", cl::desc("Enable DFA jump threading"), cl::init(false), cl::Hidden)
static cl::opt< bool > EnableCHR("enable-chr", cl::init(true), cl::Hidden, cl::desc("Enable control height reduction optimization (CHR)"))
static cl::opt< bool > EnableHotColdSplit("hot-cold-split", cl::desc("Enable hot-cold splitting pass"))
static cl::opt< bool > EnableLoopInterchange("enable-loopinterchange", cl::init(false), cl::Hidden, cl::desc("Enable the experimental LoopInterchange Pass"))
static cl::opt< bool > PerformMandatoryInliningsFirst("mandatory-inlining-first", cl::init(true), cl::Hidden, cl::desc("Perform mandatory inlinings module-wide, before performing " "inlining"))
static cl::opt< int > PreInlineThreshold("preinline-threshold", cl::Hidden, cl::init(75), cl::desc("Control the amount of inlining in pre-instrumentation inliner " "(default = 75)"))
static cl::opt< bool > EnableUnrollAndJam("enable-unroll-and-jam", cl::init(false), cl::Hidden, cl::desc("Enable Unroll And Jam Pass"))
static cl::opt< bool > EnableModuleInliner("enable-module-inliner", cl::init(false), cl::Hidden, cl::desc("Enable module inliner"))
static cl::opt< bool > EnableMatrix("enable-matrix", cl::init(false), cl::Hidden, cl::desc("Enable lowering of the matrix intrinsics"))
static cl::opt< AttributorRunOption > AttributorRun("attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE), cl::desc("Enable the attributor inter-procedural deduction pass"), cl::values(clEnumValN(AttributorRunOption::ALL, "all", "enable all attributor runs"), clEnumValN(AttributorRunOption::MODULE, "module", "enable module-wide attributor runs"), clEnumValN(AttributorRunOption::CGSCC, "cgscc", "enable call graph SCC attributor runs"), clEnumValN(AttributorRunOption::NONE, "none", "disable attributor runs")))
static cl::opt< bool > EnableOrderFileInstrumentation("enable-order-file-instrumentation", cl::init(false), cl::Hidden, cl::desc("Enable order file instrumentation (default = off)"))
static cl::opt< bool > UseLoopVersioningLICM("enable-loop-versioning-licm", cl::init(false), cl::Hidden, cl::desc("Enable the experimental Loop Versioning LICM pass"))
static cl::opt< bool > EnableSyntheticCounts("enable-npm-synthetic-counts", cl::Hidden, cl::desc("Run synthetic function entry count generation " "pass"))
static bool isLTOPreLink(ThinOrFullLTOPhase Phase)
static cl::opt< bool > EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true), cl::Hidden, cl::desc("Enable inline deferral during PGO"))
Flag to enable inline deferral during PGO.
static cl::opt< InliningAdvisorMode > UseInlineAdvisor("enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden, cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"), cl::values(clEnumValN(InliningAdvisorMode::Default, "default", "Heuristics-based inliner version"), clEnumValN(InliningAdvisorMode::Development, "development", "Use development mode (runtime-loadable model)"), clEnumValN(InliningAdvisorMode::Release, "release", "Use release mode (AOT-compiled model)")))
static cl::opt< bool > FlattenedProfileUsed("flattened-profile-used", cl::init(false), cl::Hidden, cl::desc("Indicate the sample profile being used is flattened, i.e., " "no inline hierachy exists in the profile"))
static cl::opt< bool > EnableConstraintElimination("enable-constraint-elimination", cl::init(true), cl::Hidden, cl::desc("Enable pass to eliminate conditions based on linear constraints"))
static cl::opt< bool > EnableLoopFlatten("enable-loop-flatten", cl::init(false), cl::Hidden, cl::desc("Enable the LoopFlatten Pass"))
This header defines various interfaces for pass management in LLVM.
This file implements relative lookup table converter that converts lookup tables to relative lookup t...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
This file provides the interface for the pseudo probe implementation for AutoFDO.
This file provides the interface for the sampled PGO loader pass.
This is the interface for a metadata-based scoped no-alias analysis.
This file provides the interface for the pass responsible for both simplifying and canonicalizing the...
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
This is the interface for a metadata-based TBAA.
Defines the virtual file system interface vfs::FileSystem.
A manager for alias analyses.
void registerFunctionAnalysis()
Register a specific AA result.
void registerModuleAnalysis()
Register a specific AA result.
Inlines functions marked as "always_inline".
Definition: AlwaysInliner.h:32
Argument promotion pass.
Analysis pass providing a never-invalidated alias analysis result.
Simple pass that canonicalizes aliases.
A pass that merges duplicate global constants into a single constant.
Definition: ConstantMerge.h:29
This class implements a trivial dead store elimination.
Eliminate dead arguments (and return values) from functions.
A pass that transforms external global definitions into declarations.
Pass embeds a copy of the module optimized with the provided pass pipeline into a global variable.
The core GVN pass object.
Definition: GVN.h:117
Pass to remove unused function declarations.
Definition: GlobalDCE.h:36
Optimize globals that never have their address taken.
Definition: GlobalOpt.h:25
Pass to perform split of global variables.
Definition: GlobalSplit.h:26
Analysis pass providing a never-invalidated alias analysis result.
Pass to outline cold regions.
Pass to perform interprocedural constant propagation.
Definition: SCCP.h:48
Pass to outline similar regions.
Definition: IROutliner.h:444
Run instruction simplification across each instruction in the function.
The instrumentation pass for recording function order.
Instrumentation based profiling lowering pass.
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
This pass performs 'jump threading', which looks at blocks that have multiple predecessors and multip...
Definition: JumpThreading.h:78
Performs Loop Invariant Code Motion Pass.
Definition: LICM.h:66
Loop unroll pass that only does full loop unrolling and peeling.
Performs Loop Idiom Recognize Pass.
Performs Loop Inst Simplify Pass.
A simple loop rotation transformation.
Definition: LoopRotation.h:24
Performs basic CFG simplifications to assist other loop passes.
A pass that does profile-guided sinking of instructions into loops.
Definition: LoopSink.h:33
A simple loop rotation transformation.
Loop unroll pass that will support both full and partial unrolling.
Merge identical functions.
The module inliner pass for the new pass manager.
Definition: ModuleInliner.h:27
Module pass, wrapping the inliner pass.
Definition: Inliner.h:62
void addModulePass(T Pass)
Add a module pass that runs before the CGSCC passes.
Definition: Inliner.h:78
Class to hold module path string table and global value map, and encapsulate methods for operating on...
Simple pass that provides a name to every anonymous globals.
OpenMP optimizations pass.
Definition: OpenMPOpt.h:42
static const OptimizationLevel O3
Optimize for fast execution as much as possible.
static const OptimizationLevel Oz
A very specialized mode that will optimize for code size at any and all costs.
static const OptimizationLevel O0
Disable as many optimizations as possible.
static const OptimizationLevel Os
Similar to O2 but tries to optimize for small code size instead of fast execution without triggering ...
static const OptimizationLevel O2
Optimize for fast execution as much as possible without triggering significant incremental compile ti...
static const OptimizationLevel O1
Optimize quickly without destroying debuggability.
The indirect function call promotion pass.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The profile annotation (profile-instr-use) pass for IR based PGO.
The profile size based optimization pass for memory intrinsics.
Pass to remove unused function declarations.
ModulePassManager buildO0DefaultPipeline(OptimizationLevel Level, bool LTOPreLink=false)
Build an O0 pipeline with the minimal semantically required passes.
void invokeFullLinkTimeOptimizationLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
ModuleInlinerWrapperPass buildInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining as well as the inlining-driven cleanups.
void invokeOptimizerLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
void invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
AAManager buildDefaultAAPipeline()
Build the default AAManager with the default alias analysis pipeline registered.
void invokeCGSCCOptimizerLateEPCallbacks(CGSCCPassManager &CGPM, OptimizationLevel Level)
ModulePassManager buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, ThinLTO-targeting default optimization pipeline to a pass manager.
void invokeScalarOptimizerLateEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level, bool LTOPreLink=false)
Build a per-module default optimization pipeline.
void invokePipelineStartEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
FunctionPassManager buildFunctionSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM function canonicalization and simplification pipeline.
void invokePeepholeEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
void invokeLoopOptimizerEndEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
ModulePassManager buildLTODefaultPipeline(OptimizationLevel Level, ModuleSummaryIndex *ExportSummary)
Build an LTO default optimization pipeline to a pass manager.
ModulePassManager buildModuleInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining with module inliner pass.
ModulePassManager buildThinLTODefaultPipeline(OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary)
Build an ThinLTO default optimization pipeline to a pass manager.
void invokeLateLoopOptimizationsEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
void invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
void invokePipelineEarlySimplificationEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
void invokeFullLinkTimeOptimizationEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
ModulePassManager buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO, bool EmitSummary)
Build a fat object default optimization pipeline.
ModulePassManager buildModuleSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM module canonicalization and simplification pipeline.
ModulePassManager buildModuleOptimizationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase LTOPhase)
Construct the core LLVM module optimization pipeline.
void addPGOInstrPassesForO0(ModulePassManager &MPM, bool RunProfileGen, bool IsCS, bool AtomicCounterUpdate, std::string ProfileFile, std::string ProfileRemappingFile, IntrusiveRefCntPtr< vfs::FileSystem > FS)
Add PGOInstrumenation passes for O0 only.
ModulePassManager buildLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, LTO-targeting default optimization pipeline to a pass manager.
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t< is_detected< HasRunOnLoopT, PassT >::value > addPass(PassT &&Pass)
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t<!std::is_same< PassT, PassManager >::value > addPass(PassT &&Pass)
Definition: PassManager.h:544
bool isEmpty() const
Returns if the pass manager contains any passes.
Definition: PassManager.h:568
unsigned LicmMssaNoAccForPromotionCap
Tuning option to disable promotion to scalars in LICM with MemorySSA, if the number of access is too ...
Definition: PassBuilder.h:71
bool SLPVectorization
Tuning option to enable/disable slp loop vectorization, set based on opt level.
Definition: PassBuilder.h:56
int InlinerThreshold
Tuning option to override the default inliner threshold.
Definition: PassBuilder.h:85
bool CallGraphProfile
Tuning option to enable/disable call graph profile.
Definition: PassBuilder.h:75
bool MergeFunctions
Tuning option to enable/disable function merging.
Definition: PassBuilder.h:82
bool ForgetAllSCEVInLoopUnroll
Tuning option to forget all SCEV loops in LoopUnroll.
Definition: PassBuilder.h:63
unsigned LicmMssaOptCap
Tuning option to cap the number of calls to retrive clobbering accesses in MemorySSA,...
Definition: PassBuilder.h:67
bool LoopInterleaving
Tuning option to set loop interleaving on/off, set based on opt level.
Definition: PassBuilder.h:48
PipelineTuningOptions()
Constructor sets pipeline tuning defaults based on cl::opts.
bool LoopUnrolling
Tuning option to enable/disable loop unrolling. Its default value is true.
Definition: PassBuilder.h:59
bool LoopVectorization
Tuning option to enable/disable loop vectorization, set based on opt level.
Definition: PassBuilder.h:52
Reassociate commutative expressions.
Definition: Reassociate.h:71
A pass to do RPO deduction and propagation of function attributes.
Definition: FunctionAttrs.h:73
This pass performs function-level constant propagation and merging.
Definition: SCCP.h:29
An optimization pass providing Scalar Replacement of Aggregates.
Definition: SROA.h:96
The sample profiler data loader pass.
Definition: SampleProfile.h:39
Analysis pass providing a never-invalidated alias analysis result.
This pass transforms loops that contain branches or switches on loop- invariant conditions to have mu...
A pass to simplify and canonicalize the CFG of a function.
Definition: SimplifyCFG.h:29
virtual void registerDefaultAliasAnalyses(AAManager &)
Allow the target to register alias analyses with the AAManager for use with the new pass manager.
Analysis pass providing a never-invalidated alias analysis result.
Optimize scalar/vector interactions in IR using target cost models.
Definition: VectorCombine.h:23
Interfaces for registering analysis passes, producing common pass manager configurations,...
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:705
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
ModuleToFunctionPassAdaptor createModuleToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
Definition: PassManager.h:1218
@ MODULE
Definition: Attributor.h:6386
@ CGSCC
Definition: Attributor.h:6387
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition: Pass.h:76
@ FullLTOPreLink
Full LTO prelink phase.
@ ThinLTOPostLink
ThinLTO postlink (backend compile) phase.
@ None
No LTO/ThinLTO behavior needed.
@ FullLTOPostLink
Full LTO postlink (backend compile) phase.
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
ModuleToPostOrderCGSCCPassAdaptor createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT &&Pass)
A function to deduce a function pass type and wrap it in the templated adaptor.
CGSCCToFunctionPassAdaptor createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false, bool NoRerun=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
cl::opt< bool > ForgetSCEVInLoopUnroll
bool AreStatisticsEnabled()
Check if statistics are enabled.
Definition: Statistic.cpp:139
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
cl::opt< bool > EnableKnowledgeRetention
enable preservation of attributes in assume like: call void @llvm.assume(i1 true) [ "nonnull"(i32* PT...
cl::opt< unsigned > SetLicmMssaNoAccForPromotionCap
std::enable_if_t< is_detected< HasRunOnLoopT, LoopPassT >::value, FunctionToLoopPassAdaptor > createFunctionToLoopPassAdaptor(LoopPassT &&Pass, bool UseMemorySSA=false, bool UseBlockFrequencyInfo=false, bool UseBranchProbabilityInfo=false)
A function to deduce a loop pass type and wrap it in the templated adaptor.
cl::opt< unsigned > MaxDevirtIterations("max-devirt-iterations", cl::ReallyHidden, cl::init(4))
cl::opt< unsigned > SetLicmMssaOptCap
A DCE pass that assumes instructions are dead until proven otherwise.
Definition: ADCE.h:31
Pass to convert @llvm.global.annotations to !annotation metadata.
This pass attempts to minimize the number of assume without loosing any information.
Hoist/decompose integer division and remainder instructions to enable CFG improvements and better cod...
Definition: DivRemPairs.h:23
A simple and fast domtree-based CSE pass.
Definition: EarlyCSE.h:30
A pass manager to run a set of extra function simplification passes after vectorization,...
Pass which forces specific function attributes into the IR, primarily as a debugging tool.
A simple and fast domtree-based GVN pass to hoist common expressions from sibling branches.
Definition: GVN.h:383
Uses an "inverted" value numbering to decide the similarity of expressions and sinks similar expressi...
Definition: GVN.h:390
A set of parameters to control various transforms performed by IPSCCP pass.
Definition: SCCP.h:35
A pass which infers function attributes from the names and signatures of function declarations in a m...
Provides context on when an inline advisor is constructed in the pipeline (e.g., link phase,...
Definition: InlineAdvisor.h:60
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:205
std::optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
Definition: InlineCost.h:222
int DefaultThreshold
The default threshold to start with for a callee.
Definition: InlineCost.h:207
std::optional< bool > EnableDeferral
Indicate whether we should allow inline deferral.
Definition: InlineCost.h:235
std::optional< int > HintThreshold
Threshold to use for callees with inline hint.
Definition: InlineCost.h:210
Options for the frontend instrumentation based profiling pass.
A no-op pass template which simply forces a specific analysis result to be invalidated.
Definition: PassManager.h:1272
Pass to forward loads in a loop around the backedge to subsequent iterations.
A set of parameters used to control various transforms performed by the LoopUnroll pass.
The LoopVectorize Pass.
Computes function attributes in post-order over the call graph.
Definition: FunctionAttrs.h:49
A utility pass template to force an analysis result to be available.
Definition: PassManager.h:1245