LLVM 19.0.0git
PassBuilderPipelines.cpp
Go to the documentation of this file.
1//===- Construction of pass pipelines -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// This file provides the implementation of the PassBuilder based on our
11/// static pass registry as well as related functionality. It also provides
12/// helpers to aid in analyzing, debugging, and testing passes and pass
13/// pipelines.
14///
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/Statistic.h"
26#include "llvm/IR/PassManager.h"
142
143using namespace llvm;
144
146 "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
147 cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
148 cl::values(clEnumValN(InliningAdvisorMode::Default, "default",
149 "Heuristics-based inliner version"),
150 clEnumValN(InliningAdvisorMode::Development, "development",
151 "Use development mode (runtime-loadable model)"),
152 clEnumValN(InliningAdvisorMode::Release, "release",
153 "Use release mode (AOT-compiled model)")));
154
156 "enable-npm-synthetic-counts", cl::Hidden,
157 cl::desc("Run synthetic function entry count generation "
158 "pass"));
159
160/// Flag to enable inline deferral during PGO.
161static cl::opt<bool>
162 EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
164 cl::desc("Enable inline deferral during PGO"));
165
166static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
167 cl::init(false), cl::Hidden,
168 cl::desc("Enable module inliner"));
169
171 "mandatory-inlining-first", cl::init(false), cl::Hidden,
172 cl::desc("Perform mandatory inlinings module-wide, before performing "
173 "inlining"));
174
176 "eagerly-invalidate-analyses", cl::init(true), cl::Hidden,
177 cl::desc("Eagerly invalidate more analyses in default pipelines"));
178
180 "enable-merge-functions", cl::init(false), cl::Hidden,
181 cl::desc("Enable function merging as part of the optimization pipeline"));
182
184 "enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden,
185 cl::desc("Run the loop rotation transformation after PGO instrumentation"));
186
188 "enable-global-analyses", cl::init(true), cl::Hidden,
189 cl::desc("Enable inter-procedural analyses"));
190
191static cl::opt<bool>
192 RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden,
193 cl::desc("Run Partial inlinining pass"));
194
196 "extra-vectorizer-passes", cl::init(false), cl::Hidden,
197 cl::desc("Run cleanup optimization passes after vectorization"));
198
199static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
200 cl::desc("Run the NewGVN pass"));
201
203 "enable-loopinterchange", cl::init(false), cl::Hidden,
204 cl::desc("Enable the experimental LoopInterchange Pass"));
205
206static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam",
207 cl::init(false), cl::Hidden,
208 cl::desc("Enable Unroll And Jam Pass"));
209
210static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
212 cl::desc("Enable the LoopFlatten Pass"));
213
214// Experimentally allow loop header duplication. This should allow for better
215// optimization at Oz, since loop-idiom recognition can then recognize things
216// like memcpy. If this ends up being useful for many targets, we should drop
217// this flag and make a code generation option that can be controlled
218// independent of the opt level and exposed through the frontend.
220 "enable-loop-header-duplication", cl::init(false), cl::Hidden,
221 cl::desc("Enable loop header duplication at any optimization level"));
222
223static cl::opt<bool>
224 EnableDFAJumpThreading("enable-dfa-jump-thread",
225 cl::desc("Enable DFA jump threading"),
226 cl::init(false), cl::Hidden);
227
228// TODO: turn on and remove flag
230 "enable-pgo-force-function-attrs",
231 cl::desc("Enable pass to set function attributes based on PGO profiles"),
232 cl::init(false));
233
234static cl::opt<bool>
235 EnableHotColdSplit("hot-cold-split",
236 cl::desc("Enable hot-cold splitting pass"));
237
238static cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(false),
240 cl::desc("Enable ir outliner pass"));
241
242static cl::opt<bool>
243 DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden,
244 cl::desc("Disable pre-instrumentation inliner"));
245
247 "preinline-threshold", cl::Hidden, cl::init(75),
248 cl::desc("Control the amount of inlining in pre-instrumentation inliner "
249 "(default = 75)"));
250
251static cl::opt<bool>
252 EnableGVNHoist("enable-gvn-hoist",
253 cl::desc("Enable the GVN hoisting pass (default = off)"));
254
255static cl::opt<bool>
256 EnableGVNSink("enable-gvn-sink",
257 cl::desc("Enable the GVN sinking pass (default = off)"));
258
260 "enable-jump-table-to-switch",
261 cl::desc("Enable JumpTableToSwitch pass (default = off)"));
262
263// This option is used in simplifying testing SampleFDO optimizations for
264// profile loading.
265static cl::opt<bool>
266 EnableCHR("enable-chr", cl::init(true), cl::Hidden,
267 cl::desc("Enable control height reduction optimization (CHR)"));
268
270 "flattened-profile-used", cl::init(false), cl::Hidden,
271 cl::desc("Indicate the sample profile being used is flattened, i.e., "
272 "no inline hierachy exists in the profile"));
273
275 "enable-order-file-instrumentation", cl::init(false), cl::Hidden,
276 cl::desc("Enable order file instrumentation (default = off)"));
277
278static cl::opt<bool>
279 EnableMatrix("enable-matrix", cl::init(false), cl::Hidden,
280 cl::desc("Enable lowering of the matrix intrinsics"));
281
283 "enable-constraint-elimination", cl::init(true), cl::Hidden,
284 cl::desc(
285 "Enable pass to eliminate conditions based on linear constraints"));
286
288 "attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE),
289 cl::desc("Enable the attributor inter-procedural deduction pass"),
290 cl::values(clEnumValN(AttributorRunOption::ALL, "all",
291 "enable all attributor runs"),
292 clEnumValN(AttributorRunOption::MODULE, "module",
293 "enable module-wide attributor runs"),
294 clEnumValN(AttributorRunOption::CGSCC, "cgscc",
295 "enable call graph SCC attributor runs"),
296 clEnumValN(AttributorRunOption::NONE, "none",
297 "disable attributor runs")));
298
300 "enable-loop-versioning-licm", cl::init(false), cl::Hidden,
301 cl::desc("Enable the experimental Loop Versioning LICM pass"));
302
303namespace llvm {
305
307} // namespace llvm
308
310 LoopInterleaving = true;
311 LoopVectorization = true;
312 SLPVectorization = false;
313 LoopUnrolling = true;
317 CallGraphProfile = true;
318 UnifiedLTO = false;
320 InlinerThreshold = -1;
322}
323
324namespace llvm {
326} // namespace llvm
327
329 OptimizationLevel Level) {
330 for (auto &C : PeepholeEPCallbacks)
331 C(FPM, Level);
332}
335 for (auto &C : LateLoopOptimizationsEPCallbacks)
336 C(LPM, Level);
337}
339 OptimizationLevel Level) {
340 for (auto &C : LoopOptimizerEndEPCallbacks)
341 C(LPM, Level);
342}
345 for (auto &C : ScalarOptimizerLateEPCallbacks)
346 C(FPM, Level);
347}
349 OptimizationLevel Level) {
350 for (auto &C : CGSCCOptimizerLateEPCallbacks)
351 C(CGPM, Level);
352}
354 OptimizationLevel Level) {
355 for (auto &C : VectorizerStartEPCallbacks)
356 C(FPM, Level);
357}
359 OptimizationLevel Level) {
360 for (auto &C : OptimizerEarlyEPCallbacks)
361 C(MPM, Level);
362}
364 OptimizationLevel Level) {
365 for (auto &C : OptimizerLastEPCallbacks)
366 C(MPM, Level);
367}
370 for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks)
371 C(MPM, Level);
372}
375 for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
376 C(MPM, Level);
377}
379 OptimizationLevel Level) {
380 for (auto &C : PipelineStartEPCallbacks)
381 C(MPM, Level);
382}
385 for (auto &C : PipelineEarlySimplificationEPCallbacks)
386 C(MPM, Level);
387}
388
389// Helper to add AnnotationRemarksPass.
392}
393
394// Helper to check if the current compilation phase is preparing for LTO
398}
399
400// TODO: Investigate the cost/benefit of tail call elimination on debugging.
402PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
404
406
409
410 // Form SSA out of local memory accesses after breaking apart aggregates into
411 // scalars.
413
414 // Catch trivial redundancies
415 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
416
417 // Hoisting of scalars and load expressions.
418 FPM.addPass(
419 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
421
423
424 invokePeepholeEPCallbacks(FPM, Level);
425
426 FPM.addPass(
427 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
428
429 // Form canonically associated expression trees, and simplify the trees using
430 // basic mathematical properties. For example, this will form (nearly)
431 // minimal multiplication trees.
433
434 // Add the primary loop simplification pipeline.
435 // FIXME: Currently this is split into two loop pass pipelines because we run
436 // some function passes in between them. These can and should be removed
437 // and/or replaced by scheduling the loop pass equivalents in the correct
438 // positions. But those equivalent passes aren't powerful enough yet.
439 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
440 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
441 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
442 // `LoopInstSimplify`.
443 LoopPassManager LPM1, LPM2;
444
445 // Simplify the loop body. We do this initially to clean up after other loop
446 // passes run, either when iterating on a loop or on inner loops with
447 // implications on the outer loop.
450
451 // Try to remove as much code from the loop header as possible,
452 // to reduce amount of IR that will have to be duplicated. However,
453 // do not perform speculative hoisting the first time as LICM
454 // will destroy metadata that may not need to be destroyed if run
455 // after loop rotation.
456 // TODO: Investigate promotion cap for O1.
458 /*AllowSpeculation=*/false));
459
460 LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true,
462 // TODO: Investigate promotion cap for O1.
464 /*AllowSpeculation=*/true));
467 LPM1.addPass(LoopFlattenPass());
468
471
473
475
478
479 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
480 // because it changes IR to makes profile annotation in back compile
481 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
482 // attributes so we need to make sure and allow the full unroll pass to pay
483 // attention to it.
484 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
485 PGOOpt->Action != PGOOptions::SampleUse)
486 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
487 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
489
491
492 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
493 /*UseMemorySSA=*/true,
494 /*UseBlockFrequencyInfo=*/true));
495 FPM.addPass(
496 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
498 // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
499 // *All* loop passes must preserve it, in order to be able to use it.
500 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
501 /*UseMemorySSA=*/false,
502 /*UseBlockFrequencyInfo=*/false));
503
504 // Delete small array after loop unroll.
506
507 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
508 FPM.addPass(MemCpyOptPass());
509
510 // Sparse conditional constant propagation.
511 // FIXME: It isn't clear why we do this *after* loop passes rather than
512 // before...
513 FPM.addPass(SCCPPass());
514
515 // Delete dead bit computations (instcombine runs after to fold away the dead
516 // computations, and then ADCE will run later to exploit any new DCE
517 // opportunities that creates).
518 FPM.addPass(BDCEPass());
519
520 // Run instcombine after redundancy and dead bit elimination to exploit
521 // opportunities opened up by them.
523 invokePeepholeEPCallbacks(FPM, Level);
524
525 FPM.addPass(CoroElidePass());
526
528
529 // Finally, do an expensive DCE pass to catch all the dead code exposed by
530 // the simplifications and basic cleanup after all the simplifications.
531 // TODO: Investigate if this is too expensive.
532 FPM.addPass(ADCEPass());
533 FPM.addPass(
534 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
536 invokePeepholeEPCallbacks(FPM, Level);
537
538 return FPM;
539}
540
544 assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
545
546 // The O1 pipeline has a separate pipeline creation function to simplify
547 // construction readability.
548 if (Level.getSpeedupLevel() == 1)
549 return buildO1FunctionSimplificationPipeline(Level, Phase);
550
552
555
556 // Form SSA out of local memory accesses after breaking apart aggregates into
557 // scalars.
559
560 // Catch trivial redundancies
561 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
564
565 // Hoisting of scalars and load expressions.
566 if (EnableGVNHoist)
567 FPM.addPass(GVNHoistPass());
568
569 // Global value numbering based sinking.
570 if (EnableGVNSink) {
571 FPM.addPass(GVNSinkPass());
572 FPM.addPass(
573 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
574 }
575
576 // Speculative execution if the target has divergent branches; otherwise nop.
577 FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
578
579 // Optimize based on known information about branches, and cleanup afterward.
582
583 // Jump table to switch conversion.
586
587 FPM.addPass(
588 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
591
592 if (!Level.isOptimizingForSize())
594
595 invokePeepholeEPCallbacks(FPM, Level);
596
597 // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
598 // using the size value profile. Don't perform this when optimizing for size.
599 if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
600 !Level.isOptimizingForSize())
602
604 FPM.addPass(
605 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
606
607 // Form canonically associated expression trees, and simplify the trees using
608 // basic mathematical properties. For example, this will form (nearly)
609 // minimal multiplication trees.
611
614
615 // Add the primary loop simplification pipeline.
616 // FIXME: Currently this is split into two loop pass pipelines because we run
617 // some function passes in between them. These can and should be removed
618 // and/or replaced by scheduling the loop pass equivalents in the correct
619 // positions. But those equivalent passes aren't powerful enough yet.
620 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
621 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
622 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
623 // `LoopInstSimplify`.
624 LoopPassManager LPM1, LPM2;
625
626 // Simplify the loop body. We do this initially to clean up after other loop
627 // passes run, either when iterating on a loop or on inner loops with
628 // implications on the outer loop.
631
632 // Try to remove as much code from the loop header as possible,
633 // to reduce amount of IR that will have to be duplicated. However,
634 // do not perform speculative hoisting the first time as LICM
635 // will destroy metadata that may not need to be destroyed if run
636 // after loop rotation.
637 // TODO: Investigate promotion cap for O1.
639 /*AllowSpeculation=*/false));
640
641 // Disable header duplication in loop rotation at -Oz.
643 Level != OptimizationLevel::Oz,
645 // TODO: Investigate promotion cap for O1.
647 /*AllowSpeculation=*/true));
648 LPM1.addPass(
649 SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3));
651 LPM1.addPass(LoopFlattenPass());
652
655
656 {
658 ExtraPasses.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
660 LPM2.addPass(std::move(ExtraPasses));
661 }
662
664
666
669
670 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
671 // because it changes IR to makes profile annotation in back compile
672 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
673 // attributes so we need to make sure and allow the full unroll pass to pay
674 // attention to it.
675 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
676 PGOOpt->Action != PGOOptions::SampleUse)
677 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
678 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
680
682
683 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
684 /*UseMemorySSA=*/true,
685 /*UseBlockFrequencyInfo=*/true));
686 FPM.addPass(
687 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
689 // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
690 // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
691 // *All* loop passes must preserve it, in order to be able to use it.
692 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
693 /*UseMemorySSA=*/false,
694 /*UseBlockFrequencyInfo=*/false));
695
696 // Delete small array after loop unroll.
698
699 // Try vectorization/scalarization transforms that are both improvements
700 // themselves and can allow further folds with GVN and InstCombine.
701 FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
702
703 // Eliminate redundancies.
705 if (RunNewGVN)
706 FPM.addPass(NewGVNPass());
707 else
708 FPM.addPass(GVNPass());
709
710 // Sparse conditional constant propagation.
711 // FIXME: It isn't clear why we do this *after* loop passes rather than
712 // before...
713 FPM.addPass(SCCPPass());
714
715 // Delete dead bit computations (instcombine runs after to fold away the dead
716 // computations, and then ADCE will run later to exploit any new DCE
717 // opportunities that creates).
718 FPM.addPass(BDCEPass());
719
720 // Run instcombine after redundancy and dead bit elimination to exploit
721 // opportunities opened up by them.
723 invokePeepholeEPCallbacks(FPM, Level);
724
725 // Re-consider control flow based optimizations after redundancy elimination,
726 // redo DCE, etc.
729
732
733 // Finally, do an expensive DCE pass to catch all the dead code exposed by
734 // the simplifications and basic cleanup after all the simplifications.
735 // TODO: Investigate if this is too expensive.
736 FPM.addPass(ADCEPass());
737
738 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
739 FPM.addPass(MemCpyOptPass());
740
741 FPM.addPass(DSEPass());
743
746 /*AllowSpeculation=*/true),
747 /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
748
749 FPM.addPass(CoroElidePass());
750
752
754 .convertSwitchRangeToICmp(true)
755 .hoistCommonInsts(true)
756 .sinkCommonInsts(true)));
758 invokePeepholeEPCallbacks(FPM, Level);
759
760 return FPM;
761}
762
763void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
766}
767
768void PassBuilder::addPreInlinerPasses(ModulePassManager &MPM,
769 OptimizationLevel Level,
770 ThinOrFullLTOPhase LTOPhase) {
771 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
773 return;
774 InlineParams IP;
775
777
778 // FIXME: The hint threshold has the same value used by the regular inliner
779 // when not optimzing for size. This should probably be lowered after
780 // performance testing.
781 // FIXME: this comment is cargo culted from the old pass manager, revisit).
782 IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
784 IP, /* MandatoryFirst */ true,
786 CGSCCPassManager &CGPipeline = MIWP.getPM();
787
790 FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
791 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
792 true))); // Merge & remove basic blocks.
793 FPM.addPass(InstCombinePass()); // Combine silly sequences.
794 invokePeepholeEPCallbacks(FPM, Level);
795
796 CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
797 std::move(FPM), PTO.EagerlyInvalidateAnalyses));
798
799 MPM.addPass(std::move(MIWP));
800
801 // Delete anything that is now dead to make sure that we don't instrument
802 // dead code. Instrumentation can end up keeping dead code around and
803 // dramatically increase code size.
805}
806
807void PassBuilder::addPostPGOLoopRotation(ModulePassManager &MPM,
808 OptimizationLevel Level) {
810 // Disable header duplication in loop rotation at -Oz.
814 Level != OptimizationLevel::Oz),
815 /*UseMemorySSA=*/false,
816 /*UseBlockFrequencyInfo=*/false),
818 }
819}
820
821void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
822 OptimizationLevel Level, bool RunProfileGen,
823 bool IsCS, bool AtomicCounterUpdate,
824 std::string ProfileFile,
825 std::string ProfileRemappingFile,
827 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
828
829 if (!RunProfileGen) {
830 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
831 MPM.addPass(
832 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
833 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
834 // RequireAnalysisPass for PSI before subsequent non-module passes.
836 return;
837 }
838
839 // Perform PGO instrumentation.
841
842 addPostPGOLoopRotation(MPM, Level);
843 // Add the profile lowering pass.
845 if (!ProfileFile.empty())
846 Options.InstrProfileOutput = ProfileFile;
847 // Do counter promotion at Level greater than O0.
848 Options.DoCounterPromotion = true;
849 Options.UseBFIInPromotion = IsCS;
850 Options.Atomic = AtomicCounterUpdate;
852}
853
855 ModulePassManager &MPM, bool RunProfileGen, bool IsCS,
856 bool AtomicCounterUpdate, std::string ProfileFile,
857 std::string ProfileRemappingFile, IntrusiveRefCntPtr<vfs::FileSystem> FS) {
858 if (!RunProfileGen) {
859 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
860 MPM.addPass(
861 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
862 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
863 // RequireAnalysisPass for PSI before subsequent non-module passes.
865 return;
866 }
867
868 // Perform PGO instrumentation.
870 // Add the profile lowering pass.
872 if (!ProfileFile.empty())
873 Options.InstrProfileOutput = ProfileFile;
874 // Do not do counter promotion at O0.
875 Options.DoCounterPromotion = false;
876 Options.UseBFIInPromotion = IsCS;
877 Options.Atomic = AtomicCounterUpdate;
879}
880
882 return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
883}
884
888 InlineParams IP;
889 if (PTO.InlinerThreshold == -1)
890 IP = getInlineParamsFromOptLevel(Level);
891 else
893 // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to
894 // disable hot callsite inline (as much as possible [1]) because it makes
895 // profile annotation in the backend inaccurate.
896 //
897 // [1] Note the cost of a function could be below zero due to erased
898 // prologue / epilogue.
899 if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
900 PGOOpt->Action == PGOOptions::SampleUse)
902
903 if (PGOOpt)
905
909
910 // Require the GlobalsAA analysis for the module so we can query it within
911 // the CGSCC pipeline.
914 // Invalidate AAManager so it can be recreated and pick up the newly
915 // available GlobalsAA.
916 MIWP.addModulePass(
918 }
919
920 // Require the ProfileSummaryAnalysis for the module so we can query it within
921 // the inliner pass.
923
924 // Now begin the main postorder CGSCC pipeline.
925 // FIXME: The current CGSCC pipeline has its origins in the legacy pass
926 // manager and trying to emulate its precise behavior. Much of this doesn't
927 // make a lot of sense and we should revisit the core CGSCC structure.
928 CGSCCPassManager &MainCGPipeline = MIWP.getPM();
929
930 // Note: historically, the PruneEH pass was run first to deduce nounwind and
931 // generally clean up exception handling overhead. It isn't clear this is
932 // valuable as the inliner doesn't currently care whether it is inlining an
933 // invoke or a call.
934
936 MainCGPipeline.addPass(AttributorCGSCCPass());
937
938 // Deduce function attributes. We do another run of this after the function
939 // simplification pipeline, so this only needs to run when it could affect the
940 // function simplification pipeline, which is only the case with recursive
941 // functions.
942 MainCGPipeline.addPass(PostOrderFunctionAttrsPass(/*SkipNonRecursive*/ true));
943
944 // When at O3 add argument promotion to the pass pipeline.
945 // FIXME: It isn't at all clear why this should be limited to O3.
946 if (Level == OptimizationLevel::O3)
947 MainCGPipeline.addPass(ArgumentPromotionPass());
948
949 // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
950 // there are no OpenMP runtime calls present in the module.
951 if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
952 MainCGPipeline.addPass(OpenMPOptCGSCCPass());
953
954 invokeCGSCCOptimizerLateEPCallbacks(MainCGPipeline, Level);
955
956 // Add the core function simplification pipeline nested inside the
957 // CGSCC walk.
960 PTO.EagerlyInvalidateAnalyses, /*NoRerun=*/true));
961
962 // Finally, deduce any function attributes based on the fully simplified
963 // function.
964 MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
965
966 // Mark that the function is fully simplified and that it shouldn't be
967 // simplified again if we somehow revisit it due to CGSCC mutations unless
968 // it's been modified since.
971
972 MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
973
974 // Make sure we don't affect potential future NoRerun CGSCC adaptors.
975 MIWP.addLateModulePass(createModuleToFunctionPassAdaptor(
977
978 return MIWP;
979}
980
985
987 // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to
988 // disable hot callsite inline (as much as possible [1]) because it makes
989 // profile annotation in the backend inaccurate.
990 //
991 // [1] Note the cost of a function could be below zero due to erased
992 // prologue / epilogue.
993 if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
994 PGOOpt->Action == PGOOptions::SampleUse)
996
997 if (PGOOpt)
999
1000 // The inline deferral logic is used to avoid losing some
1001 // inlining chance in future. It is helpful in SCC inliner, in which
1002 // inlining is processed in bottom-up order.
1003 // While in module inliner, the inlining order is a priority-based order
1004 // by default. The inline deferral is unnecessary there. So we disable the
1005 // inline deferral logic in module inliner.
1006 IP.EnableDeferral = false;
1007
1009
1013
1016
1017 return MPM;
1018}
1019
1023 assert(Level != OptimizationLevel::O0 &&
1024 "Should not be used for O0 pipeline");
1025
1027 "FullLTOPostLink shouldn't call buildModuleSimplificationPipeline!");
1028
1030
1031 // Place pseudo probe instrumentation as the first pass of the pipeline to
1032 // minimize the impact of optimization changes.
1033 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1036
1037 bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
1038
1039 // In ThinLTO mode, when flattened profile is used, all the available
1040 // profile information will be annotated in PreLink phase so there is
1041 // no need to load the profile again in PostLink.
1042 bool LoadSampleProfile =
1043 HasSampleProfile &&
1045
1046 // During the ThinLTO backend phase we perform early indirect call promotion
1047 // here, before globalopt. Otherwise imported available_externally functions
1048 // look unreferenced and are removed. If we are going to load the sample
1049 // profile then defer until later.
1050 // TODO: See if we can move later and consolidate with the location where
1051 // we perform ICP when we are loading a sample profile.
1052 // TODO: We pass HasSampleProfile (whether there was a sample profile file
1053 // passed to the compile) to the SamplePGO flag of ICP. This is used to
1054 // determine whether the new direct calls are annotated with prof metadata.
1055 // Ideally this should be determined from whether the IR is annotated with
1056 // sample profile, and not whether the a sample profile was provided on the
1057 // command line. E.g. for flattened profiles where we will not be reloading
1058 // the sample profile in the ThinLTO backend, we ideally shouldn't have to
1059 // provide the sample profile file.
1060 if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
1061 MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
1062
1063 // Create an early function pass manager to cleanup the output of the
1064 // frontend. Not necessary with LTO post link pipelines since the pre link
1065 // pipeline already cleaned up the frontend output.
1067 // Do basic inference of function attributes from known properties of system
1068 // libraries and other oracles.
1071
1072 FunctionPassManager EarlyFPM;
1073 EarlyFPM.addPass(EntryExitInstrumenterPass(/*PostInlining=*/false));
1074 // Lower llvm.expect to metadata before attempting transforms.
1075 // Compare/branch metadata may alter the behavior of passes like
1076 // SimplifyCFG.
1078 EarlyFPM.addPass(SimplifyCFGPass());
1080 EarlyFPM.addPass(EarlyCSEPass());
1081 if (Level == OptimizationLevel::O3)
1082 EarlyFPM.addPass(CallSiteSplittingPass());
1084 std::move(EarlyFPM), PTO.EagerlyInvalidateAnalyses));
1085 }
1086
1087 if (LoadSampleProfile) {
1088 // Annotate sample profile right after early FPM to ensure freshness of
1089 // the debug info.
1090 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1091 PGOOpt->ProfileRemappingFile, Phase));
1092 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1093 // RequireAnalysisPass for PSI before subsequent non-module passes.
1095 // Do not invoke ICP in the LTOPrelink phase as it makes it hard
1096 // for the profile annotation to be accurate in the LTO backend.
1097 if (!isLTOPreLink(Phase))
1098 // We perform early indirect call promotion here, before globalopt.
1099 // This is important for the ThinLTO backend phase because otherwise
1100 // imported available_externally functions look unreferenced and are
1101 // removed.
1102 MPM.addPass(
1103 PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
1104 }
1105
1106 // Try to perform OpenMP specific optimizations on the module. This is a
1107 // (quick!) no-op if there are no OpenMP runtime calls present in the module.
1109
1112
1113 // Lower type metadata and the type.test intrinsic in the ThinLTO
1114 // post link pipeline after ICP. This is to enable usage of the type
1115 // tests in ICP sequences.
1117 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1118
1120
1121 // Interprocedural constant propagation now that basic cleanup has occurred
1122 // and prior to optimizing globals.
1123 // FIXME: This position in the pipeline hasn't been carefully considered in
1124 // years, it should be re-analyzed.
1126 IPSCCPOptions(/*AllowFuncSpec=*/
1127 Level != OptimizationLevel::Os &&
1128 Level != OptimizationLevel::Oz &&
1129 !isLTOPreLink(Phase))));
1130
1131 // Attach metadata to indirect call sites indicating the set of functions
1132 // they may target at run-time. This should follow IPSCCP.
1134
1135 // Optimize globals to try and fold them into constants.
1137
1138 // Create a small function pass pipeline to cleanup after all the global
1139 // optimizations.
1140 FunctionPassManager GlobalCleanupPM;
1141 // FIXME: Should this instead by a run of SROA?
1142 GlobalCleanupPM.addPass(PromotePass());
1143 GlobalCleanupPM.addPass(InstCombinePass());
1144 invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
1145 GlobalCleanupPM.addPass(
1146 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1147 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM),
1149
1150 // We already asserted this happens in non-FullLTOPostLink earlier.
1151 const bool IsPreLink = Phase != ThinOrFullLTOPhase::ThinLTOPostLink;
1152 const bool IsPGOPreLink = PGOOpt && IsPreLink;
1153 const bool IsPGOInstrGen =
1154 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRInstr;
1155 const bool IsPGOInstrUse =
1156 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRUse;
1157 const bool IsMemprofUse = IsPGOPreLink && !PGOOpt->MemoryProfile.empty();
1158 // We don't want to mix pgo ctx gen and pgo gen; we also don't currently
1159 // enable ctx profiling from the frontend.
1160 assert(
1162 "Enabling both instrumented FDO and contextual instrumentation is not "
1163 "supported.");
1164 // Enable contextual profiling instrumentation.
1165 const bool IsCtxProfGen = !IsPGOInstrGen && IsPreLink &&
1167
1168 if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen)
1169 addPreInlinerPasses(MPM, Level, Phase);
1170
1171 // Add all the requested passes for instrumentation PGO, if requested.
1172 if (IsPGOInstrGen || IsPGOInstrUse) {
1173 addPGOInstrPasses(MPM, Level,
1174 /*RunProfileGen=*/IsPGOInstrGen,
1175 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate,
1176 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
1177 PGOOpt->FS);
1178 } else if (IsCtxProfGen) {
1180 addPostPGOLoopRotation(MPM, Level);
1182 }
1183
1184 if (IsPGOInstrGen || IsPGOInstrUse || IsCtxProfGen)
1185 MPM.addPass(PGOIndirectCallPromotion(false, false));
1186
1187 if (IsPGOPreLink && PGOOpt->CSAction == PGOOptions::CSIRInstr)
1188 MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile));
1189
1190 if (IsMemprofUse)
1191 MPM.addPass(MemProfUsePass(PGOOpt->MemoryProfile, PGOOpt->FS));
1192
1193 // Synthesize function entry counts for non-PGO compilation.
1194 if (EnableSyntheticCounts && !PGOOpt)
1196
1197 if (EnablePGOForceFunctionAttrs && PGOOpt)
1198 MPM.addPass(PGOForceFunctionAttrsPass(PGOOpt->ColdOptType));
1199
1200 MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/true));
1201
1204 else
1206
1207 // Remove any dead arguments exposed by cleanups, constant folding globals,
1208 // and argument promotion.
1210
1212
1213 // Optimize globals now that functions are fully simplified.
1216
1217 return MPM;
1218}
1219
1220/// TODO: Should LTO cause any differences to this set of passes?
1221void PassBuilder::addVectorPasses(OptimizationLevel Level,
1222 FunctionPassManager &FPM, bool IsFullLTO) {
1225
1228 if (IsFullLTO) {
1229 // The vectorizer may have significantly shortened a loop body; unroll
1230 // again. Unroll small loops to hide loop backedge latency and saturate any
1231 // parallel execution resources of an out-of-order processor. We also then
1232 // need to clean up redundancies and loop invariant code.
1233 // FIXME: It would be really good to use a loop-integrated instruction
1234 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1235 // across the loop nests.
1236 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1239 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1241 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1244 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1245 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1246 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1247 // NOTE: we are very late in the pipeline, and we don't have any LICM
1248 // or SimplifyCFG passes scheduled after us, that would cleanup
1249 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1251 }
1252
1253 if (!IsFullLTO) {
1254 // Eliminate loads by forwarding stores from the previous iteration to loads
1255 // of the current iteration.
1257 }
1258 // Cleanup after the loop optimization passes.
1259 FPM.addPass(InstCombinePass());
1260
1261 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1262 ExtraVectorPassManager ExtraPasses;
1263 // At higher optimization levels, try to clean up any runtime overlap and
1264 // alignment checks inserted by the vectorizer. We want to track correlated
1265 // runtime checks for two inner loops in the same outer loop, fold any
1266 // common computations, hoist loop-invariant aspects out of any outer loop,
1267 // and unswitch the runtime checks if possible. Once hoisted, we may have
1268 // dead (or speculatable) control flows or more combining opportunities.
1269 ExtraPasses.addPass(EarlyCSEPass());
1271 ExtraPasses.addPass(InstCombinePass());
1272 LoopPassManager LPM;
1274 /*AllowSpeculation=*/true));
1275 LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
1277 ExtraPasses.addPass(
1278 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true,
1279 /*UseBlockFrequencyInfo=*/true));
1280 ExtraPasses.addPass(
1281 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1282 ExtraPasses.addPass(InstCombinePass());
1283 FPM.addPass(std::move(ExtraPasses));
1284 }
1285
1286 // Now that we've formed fast to execute loop structures, we do further
1287 // optimizations. These are run afterward as they might block doing complex
1288 // analyses and transforms such as what are needed for loop vectorization.
1289
1290 // Cleanup after loop vectorization, etc. Simplification passes like CVP and
1291 // GVN, loop transforms, and others have already run, so it's now better to
1292 // convert to more optimized IR using more aggressive simplify CFG options.
1293 // The extra sinking transform can create larger basic blocks, so do this
1294 // before SLP vectorization.
1296 .forwardSwitchCondToPhi(true)
1297 .convertSwitchRangeToICmp(true)
1298 .convertSwitchToLookupTable(true)
1299 .needCanonicalLoops(false)
1300 .hoistCommonInsts(true)
1301 .sinkCommonInsts(true)));
1302
1303 if (IsFullLTO) {
1304 FPM.addPass(SCCPPass());
1305 FPM.addPass(InstCombinePass());
1306 FPM.addPass(BDCEPass());
1307 }
1308
1309 // Optimize parallel scalar instruction chains into SIMD instructions.
1310 if (PTO.SLPVectorization) {
1312 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1313 FPM.addPass(EarlyCSEPass());
1314 }
1315 }
1316 // Enhance/cleanup vector code.
1318
1319 if (!IsFullLTO) {
1320 FPM.addPass(InstCombinePass());
1321 // Unroll small loops to hide loop backedge latency and saturate any
1322 // parallel execution resources of an out-of-order processor. We also then
1323 // need to clean up redundancies and loop invariant code.
1324 // FIXME: It would be really good to use a loop-integrated instruction
1325 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1326 // across the loop nests.
1327 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1328 if (EnableUnrollAndJam && PTO.LoopUnrolling) {
1330 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1331 }
1333 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1336 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1337 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1338 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1339 // NOTE: we are very late in the pipeline, and we don't have any LICM
1340 // or SimplifyCFG passes scheduled after us, that would cleanup
1341 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1343 }
1344
1347 FPM.addPass(InstCombinePass());
1348
1349 // This is needed for two reasons:
1350 // 1. It works around problems that instcombine introduces, such as sinking
1351 // expensive FP divides into loops containing multiplications using the
1352 // divide result.
1353 // 2. It helps to clean up some loop-invariant code created by the loop
1354 // unroll pass when IsFullLTO=false.
1357 /*AllowSpeculation=*/true),
1358 /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
1359
1360 // Now that we've vectorized and unrolled loops, we may have more refined
1361 // alignment information, try to re-derive it here.
1363}
1364
1367 ThinOrFullLTOPhase LTOPhase) {
1368 const bool LTOPreLink = isLTOPreLink(LTOPhase);
1370
1371 // Run partial inlining pass to partially inline functions that have
1372 // large bodies.
1375
1376 // Remove avail extern fns and globals definitions since we aren't compiling
1377 // an object file for later LTO. For LTO we want to preserve these so they
1378 // are eligible for inlining at link-time. Note if they are unreferenced they
1379 // will be removed by GlobalDCE later, so this only impacts referenced
1380 // available externally globals. Eventually they will be suppressed during
1381 // codegen, but eliminating here enables more opportunity for GlobalDCE as it
1382 // may make globals referenced by available external functions dead and saves
1383 // running remaining passes on the eliminated functions. These should be
1384 // preserved during prelinking for link-time inlining decisions.
1385 if (!LTOPreLink)
1387
1390
1391 // Do RPO function attribute inference across the module to forward-propagate
1392 // attributes where applicable.
1393 // FIXME: Is this really an optimization rather than a canonicalization?
1395
1396 // Do a post inline PGO instrumentation and use pass. This is a context
1397 // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
1398 // cross-module inline has not been done yet. The context sensitive
1399 // instrumentation is after all the inlines are done.
1400 if (!LTOPreLink && PGOOpt) {
1401 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1402 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
1403 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1404 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile,
1405 PGOOpt->FS);
1406 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1407 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
1408 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1409 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
1410 PGOOpt->FS);
1411 }
1412
1413 // Re-compute GlobalsAA here prior to function passes. This is particularly
1414 // useful as the above will have inlined, DCE'ed, and function-attr
1415 // propagated everything. We should at this point have a reasonably minimal
1416 // and richly annotated call graph. By computing aliasing and mod/ref
1417 // information for all local globals here, the late loop passes and notably
1418 // the vectorizer will be able to use them to help recognize vectorizable
1419 // memory operations.
1422
1424
1425 FunctionPassManager OptimizePM;
1426 // Scheduling LoopVersioningLICM when inlining is over, because after that
1427 // we may see more accurate aliasing. Reason to run this late is that too
1428 // early versioning may prevent further inlining due to increase of code
1429 // size. Other optimizations which runs later might get benefit of no-alias
1430 // assumption in clone loop.
1432 OptimizePM.addPass(
1434 // LoopVersioningLICM pass might increase new LICM opportunities.
1437 /*AllowSpeculation=*/true),
1438 /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
1439 }
1440
1441 OptimizePM.addPass(Float2IntPass());
1443
1444 if (EnableMatrix) {
1445 OptimizePM.addPass(LowerMatrixIntrinsicsPass());
1446 OptimizePM.addPass(EarlyCSEPass());
1447 }
1448
1449 // CHR pass should only be applied with the profile information.
1450 // The check is to check the profile summary information in CHR.
1451 if (EnableCHR && Level == OptimizationLevel::O3)
1452 OptimizePM.addPass(ControlHeightReductionPass());
1453
1454 // FIXME: We need to run some loop optimizations to re-rotate loops after
1455 // simplifycfg and others undo their rotation.
1456
1457 // Optimize the loop execution. These passes operate on entire loop nests
1458 // rather than on each loop in an inside-out manner, and so they are actually
1459 // function passes.
1460
1461 invokeVectorizerStartEPCallbacks(OptimizePM, Level);
1462
1463 LoopPassManager LPM;
1464 // First rotate loops that may have been un-rotated by prior passes.
1465 // Disable header duplication at -Oz.
1467 Level != OptimizationLevel::Oz,
1468 LTOPreLink));
1469 // Some loops may have become dead by now. Try to delete them.
1470 // FIXME: see discussion in https://reviews.llvm.org/D112851,
1471 // this may need to be revisited once we run GVN before loop deletion
1472 // in the simplification pipeline.
1475 std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false));
1476
1477 // Distribute loops to allow partial vectorization. I.e. isolate dependences
1478 // into separate loop that would otherwise inhibit vectorization. This is
1479 // currently only performed for loops marked with the metadata
1480 // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1481 OptimizePM.addPass(LoopDistributePass());
1482
1483 // Populates the VFABI attribute with the scalar-to-vector mappings
1484 // from the TargetLibraryInfo.
1485 OptimizePM.addPass(InjectTLIMappings());
1486
1487 addVectorPasses(Level, OptimizePM, /* IsFullLTO */ false);
1488
1489 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
1490 // canonicalization pass that enables other optimizations. As a result,
1491 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1492 // result too early.
1493 OptimizePM.addPass(LoopSinkPass());
1494
1495 // And finally clean up LCSSA form before generating code.
1496 OptimizePM.addPass(InstSimplifyPass());
1497
1498 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
1499 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
1500 // flattening of blocks.
1501 OptimizePM.addPass(DivRemPairsPass());
1502
1503 // Try to annotate calls that were created during optimization.
1504 OptimizePM.addPass(TailCallElimPass());
1505
1506 // LoopSink (and other loop passes since the last simplifyCFG) might have
1507 // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
1508 OptimizePM.addPass(
1509 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1510
1511 // Add the core optimizing pipeline.
1512 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
1514
1516
1517 // Split out cold code. Splitting is done late to avoid hiding context from
1518 // other optimizations and inadvertently regressing performance. The tradeoff
1519 // is that this has a higher code size cost than splitting early.
1520 if (EnableHotColdSplit && !LTOPreLink)
1522
1523 // Search the code for similar regions of code. If enough similar regions can
1524 // be found where extracting the regions into their own function will decrease
1525 // the size of the program, we extract the regions, a deduplicate the
1526 // structurally similar regions.
1527 if (EnableIROutliner)
1529
1530 // Merge functions if requested.
1531 if (PTO.MergeFunctions)
1533
1534 // Now we need to do some global optimization transforms.
1535 // FIXME: It would seem like these should come first in the optimization
1536 // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
1537 // ordering here.
1540
1541 if (PTO.CallGraphProfile && !LTOPreLink)
1544
1545 // TODO: Relative look table converter pass caused an issue when full lto is
1546 // enabled. See https://reviews.llvm.org/D94355 for more details.
1547 // Until the issue fixed, disable this pass during pre-linking phase.
1548 if (!LTOPreLink)
1550
1551 return MPM;
1552}
1553
1556 bool LTOPreLink) {
1557 if (Level == OptimizationLevel::O0)
1558 return buildO0DefaultPipeline(Level, LTOPreLink);
1559
1561
1562 // Convert @llvm.global.annotations to !annotation metadata.
1564
1565 // Force any function attributes we want the rest of the pipeline to observe.
1567
1568 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1570
1571 // Apply module pipeline start EP callback.
1573
1574 const ThinOrFullLTOPhase LTOPhase = LTOPreLink
1577 // Add the core simplification pipeline.
1579
1580 // Now add the optimization pipeline.
1582
1583 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1584 PGOOpt->Action == PGOOptions::SampleUse)
1586
1587 // Emit annotation remarks.
1589
1590 if (LTOPreLink)
1591 addRequiredLTOPreLinkPasses(MPM);
1592 return MPM;
1593}
1594
1597 bool EmitSummary) {
1599 if (ThinLTO)
1601 else
1603 MPM.addPass(EmbedBitcodePass(ThinLTO, EmitSummary));
1604
1605 // Use the ThinLTO post-link pipeline with sample profiling
1606 if (ThinLTO && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1607 MPM.addPass(buildThinLTODefaultPipeline(Level, /*ImportSummary=*/nullptr));
1608 else {
1609 // otherwise, just use module optimization
1610 MPM.addPass(
1612 // Emit annotation remarks.
1614 }
1615 return MPM;
1616}
1617
1620 if (Level == OptimizationLevel::O0)
1621 return buildO0DefaultPipeline(Level, /*LTOPreLink*/true);
1622
1624
1625 // Convert @llvm.global.annotations to !annotation metadata.
1627
1628 // Force any function attributes we want the rest of the pipeline to observe.
1630
1631 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1633
1634 // Apply module pipeline start EP callback.
1636
1637 // If we are planning to perform ThinLTO later, we don't bloat the code with
1638 // unrolling/vectorization/... now. Just simplify the module as much as we
1639 // can.
1642
1643 // Run partial inlining pass to partially inline functions that have
1644 // large bodies.
1645 // FIXME: It isn't clear whether this is really the right place to run this
1646 // in ThinLTO. Because there is another canonicalization and simplification
1647 // phase that will run after the thin link, running this here ends up with
1648 // less information than will be available later and it may grow functions in
1649 // ways that aren't beneficial.
1652
1653 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1654 PGOOpt->Action == PGOOptions::SampleUse)
1656
1657 // Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual
1658 // optimization is going to be done in PostLink stage, but clang can't add
1659 // callbacks there in case of in-process ThinLTO called by linker.
1662
1663 // Emit annotation remarks.
1665
1666 addRequiredLTOPreLinkPasses(MPM);
1667
1668 return MPM;
1669}
1670
1672 OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
1674
1675 if (ImportSummary) {
1676 // For ThinLTO we must apply the context disambiguation decisions early, to
1677 // ensure we can correctly match the callsites to summary data.
1680
1681 // These passes import type identifier resolutions for whole-program
1682 // devirtualization and CFI. They must run early because other passes may
1683 // disturb the specific instruction patterns that these passes look for,
1684 // creating dependencies on resolutions that may not appear in the summary.
1685 //
1686 // For example, GVN may transform the pattern assume(type.test) appearing in
1687 // two basic blocks into assume(phi(type.test, type.test)), which would
1688 // transform a dependency on a WPD resolution into a dependency on a type
1689 // identifier resolution for CFI.
1690 //
1691 // Also, WPD has access to more precise information than ICP and can
1692 // devirtualize more effectively, so it should operate on the IR first.
1693 //
1694 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1695 // metadata and intrinsics.
1696 MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
1697 MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
1698 }
1699
1700 if (Level == OptimizationLevel::O0) {
1701 // Run a second time to clean up any type tests left behind by WPD for use
1702 // in ICP.
1703 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1704 // Drop available_externally and unreferenced globals. This is necessary
1705 // with ThinLTO in order to avoid leaving undefined references to dead
1706 // globals in the object file.
1709 return MPM;
1710 }
1711
1712 // Add the core simplification pipeline.
1715
1716 // Now add the optimization pipeline.
1719
1720 // Emit annotation remarks.
1722
1723 return MPM;
1724}
1725
1728 // FIXME: We should use a customized pre-link pipeline!
1729 return buildPerModuleDefaultPipeline(Level,
1730 /* LTOPreLink */ true);
1731}
1732
1735 ModuleSummaryIndex *ExportSummary) {
1737
1739
1740 // Create a function that performs CFI checks for cross-DSO calls with targets
1741 // in the current module.
1743
1744 if (Level == OptimizationLevel::O0) {
1745 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1746 // metadata and intrinsics.
1747 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1748 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1749 // Run a second time to clean up any type tests left behind by WPD for use
1750 // in ICP.
1751 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1752
1754
1755 // Emit annotation remarks.
1757
1758 return MPM;
1759 }
1760
1761 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
1762 // Load sample profile before running the LTO optimization pipeline.
1763 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1764 PGOOpt->ProfileRemappingFile,
1766 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1767 // RequireAnalysisPass for PSI before subsequent non-module passes.
1769 }
1770
1771 // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
1773
1774 // Remove unused virtual tables to improve the quality of code generated by
1775 // whole-program devirtualization and bitset lowering.
1776 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
1777
1778 // Do basic inference of function attributes from known properties of system
1779 // libraries and other oracles.
1781
1782 if (Level.getSpeedupLevel() > 1) {
1785
1786 // Indirect call promotion. This should promote all the targets that are
1787 // left by the earlier promotion pass that promotes intra-module targets.
1788 // This two-step promotion is to save the compile time. For LTO, it should
1789 // produce the same result as if we only do promotion here.
1791 true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1792
1793 // Propagate constants at call sites into the functions they call. This
1794 // opens opportunities for globalopt (and inlining) by substituting function
1795 // pointers passed as arguments to direct uses of functions.
1796 MPM.addPass(IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/
1797 Level != OptimizationLevel::Os &&
1798 Level != OptimizationLevel::Oz)));
1799
1800 // Attach metadata to indirect call sites indicating the set of functions
1801 // they may target at run-time. This should follow IPSCCP.
1803 }
1804
1805 // Now deduce any function attributes based in the current code.
1806 MPM.addPass(
1808
1809 // Do RPO function attribute inference across the module to forward-propagate
1810 // attributes where applicable.
1811 // FIXME: Is this really an optimization rather than a canonicalization?
1813
1814 // Use in-range annotations on GEP indices to split globals where beneficial.
1816
1817 // Run whole program optimization of virtual call when the list of callees
1818 // is fixed.
1819 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1820
1821 // Stop here at -O1.
1822 if (Level == OptimizationLevel::O1) {
1823 // The LowerTypeTestsPass needs to run to lower type metadata and the
1824 // type.test intrinsics. The pass does nothing if CFI is disabled.
1825 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1826 // Run a second time to clean up any type tests left behind by WPD for use
1827 // in ICP (which is performed earlier than this in the regular LTO
1828 // pipeline).
1829 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1830
1832
1833 // Emit annotation remarks.
1835
1836 return MPM;
1837 }
1838
1839 // Optimize globals to try and fold them into constants.
1841
1842 // Promote any localized globals to SSA registers.
1844
1845 // Linking modules together can lead to duplicate global constant, only
1846 // keep one copy of each constant.
1848
1849 // Remove unused arguments from functions.
1851
1852 // Reduce the code after globalopt and ipsccp. Both can open up significant
1853 // simplification opportunities, and both can propagate functions through
1854 // function pointers. When this happens, we often have to resolve varargs
1855 // calls, etc, so let instcombine do this.
1856 FunctionPassManager PeepholeFPM;
1857 PeepholeFPM.addPass(InstCombinePass());
1858 if (Level.getSpeedupLevel() > 1)
1859 PeepholeFPM.addPass(AggressiveInstCombinePass());
1860 invokePeepholeEPCallbacks(PeepholeFPM, Level);
1861
1862 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM),
1864
1865 // Note: historically, the PruneEH pass was run first to deduce nounwind and
1866 // generally clean up exception handling overhead. It isn't clear this is
1867 // valuable as the inliner doesn't currently care whether it is inlining an
1868 // invoke or a call.
1869 // Run the inliner now.
1870 if (EnableModuleInliner) {
1874 } else {
1877 /* MandatoryFirst */ true,
1880 }
1881
1882 // Perform context disambiguation after inlining, since that would reduce the
1883 // amount of additional cloning required to distinguish the allocation
1884 // contexts.
1887
1888 // Optimize globals again after we ran the inliner.
1890
1891 // Run the OpenMPOpt pass again after global optimizations.
1893
1894 // Garbage collect dead functions.
1895 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
1896
1897 // If we didn't decide to inline a function, check to see if we can
1898 // transform it to pass arguments by value instead of by reference.
1900
1902 // The IPO Passes may leave cruft around. Clean up after them.
1903 FPM.addPass(InstCombinePass());
1904 invokePeepholeEPCallbacks(FPM, Level);
1905
1908
1910
1911 // Do a post inline PGO instrumentation and use pass. This is a context
1912 // sensitive PGO pass.
1913 if (PGOOpt) {
1914 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1915 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
1916 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1917 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile,
1918 PGOOpt->FS);
1919 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1920 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
1921 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1922 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
1923 PGOOpt->FS);
1924 }
1925
1926 // Break up allocas
1928
1929 // LTO provides additional opportunities for tailcall elimination due to
1930 // link-time inlining, and visibility of nocapture attribute.
1932
1933 // Run a few AA driver optimizations here and now to cleanup the code.
1936
1937 MPM.addPass(
1939
1940 // Require the GlobalsAA analysis for the module so we can query it within
1941 // MainFPM.
1944 // Invalidate AAManager so it can be recreated and pick up the newly
1945 // available GlobalsAA.
1946 MPM.addPass(
1948 }
1949
1950 FunctionPassManager MainFPM;
1953 /*AllowSpeculation=*/true),
1954 /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
1955
1956 if (RunNewGVN)
1957 MainFPM.addPass(NewGVNPass());
1958 else
1959 MainFPM.addPass(GVNPass());
1960
1961 // Remove dead memcpy()'s.
1962 MainFPM.addPass(MemCpyOptPass());
1963
1964 // Nuke dead stores.
1965 MainFPM.addPass(DSEPass());
1966 MainFPM.addPass(MoveAutoInitPass());
1968
1969 LoopPassManager LPM;
1970 if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
1971 LPM.addPass(LoopFlattenPass());
1974 // FIXME: Add loop interchange.
1975
1976 // Unroll small loops and perform peeling.
1977 LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
1978 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
1980 // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
1981 // *All* loop passes must preserve it, in order to be able to use it.
1983 std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true));
1984
1985 MainFPM.addPass(LoopDistributePass());
1986
1987 addVectorPasses(Level, MainFPM, /* IsFullLTO */ true);
1988
1989 // Run the OpenMPOpt CGSCC pass again late.
1992
1993 invokePeepholeEPCallbacks(MainFPM, Level);
1994 MainFPM.addPass(JumpThreadingPass());
1997
1998 // Lower type metadata and the type.test intrinsic. This pass supports
1999 // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
2000 // to be run at link time if CFI is enabled. This pass does nothing if
2001 // CFI is disabled.
2002 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
2003 // Run a second time to clean up any type tests left behind by WPD for use
2004 // in ICP (which is performed earlier than this in the regular LTO pipeline).
2005 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
2006
2007 // Enable splitting late in the FullLTO post-link pipeline.
2010
2011 // Add late LTO optimization passes.
2012 FunctionPassManager LateFPM;
2013
2014 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
2015 // canonicalization pass that enables other optimizations. As a result,
2016 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
2017 // result too early.
2018 LateFPM.addPass(LoopSinkPass());
2019
2020 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
2021 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
2022 // flattening of blocks.
2023 LateFPM.addPass(DivRemPairsPass());
2024
2025 // Delete basic blocks, which optimization passes may have killed.
2026 LateFPM.addPass(SimplifyCFGPass(
2027 SimplifyCFGOptions().convertSwitchRangeToICmp(true).hoistCommonInsts(
2028 true)));
2029 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(LateFPM)));
2030
2031 // Drop bodies of available eternally objects to improve GlobalDCE.
2033
2034 // Now that we have optimized the program, discard unreachable functions.
2035 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2036
2037 if (PTO.MergeFunctions)
2039
2040 if (PTO.CallGraphProfile)
2041 MPM.addPass(CGProfilePass(/*InLTOPostLink=*/true));
2042
2044
2045 // Emit annotation remarks.
2047
2048 return MPM;
2049}
2050
2052 bool LTOPreLink) {
2053 assert(Level == OptimizationLevel::O0 &&
2054 "buildO0DefaultPipeline should only be used with O0");
2055
2057
2058 // Perform pseudo probe instrumentation in O0 mode. This is for the
2059 // consistency between different build modes. For example, a LTO build can be
2060 // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
2061 // the postlink will require pseudo probe instrumentation in the prelink.
2062 if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
2064
2065 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
2066 PGOOpt->Action == PGOOptions::IRUse))
2068 MPM,
2069 /*RunProfileGen=*/(PGOOpt->Action == PGOOptions::IRInstr),
2070 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate, PGOOpt->ProfileFile,
2071 PGOOpt->ProfileRemappingFile, PGOOpt->FS);
2072
2073 // Instrument function entry and exit before all inlining.
2075 EntryExitInstrumenterPass(/*PostInlining=*/false)));
2076
2078
2079 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
2081
2083
2084 // Build a minimal pipeline based on the semantics required by LLVM,
2085 // which is just that always inlining occurs. Further, disable generating
2086 // lifetime intrinsics to avoid enabling further optimizations during
2087 // code generation.
2089 /*InsertLifetimeIntrinsics=*/false));
2090
2091 if (PTO.MergeFunctions)
2093
2094 if (EnableMatrix)
2095 MPM.addPass(
2097
2098 if (!CGSCCOptimizerLateEPCallbacks.empty()) {
2099 CGSCCPassManager CGPM;
2101 if (!CGPM.isEmpty())
2103 }
2104 if (!LateLoopOptimizationsEPCallbacks.empty()) {
2105 LoopPassManager LPM;
2107 if (!LPM.isEmpty()) {
2109 createFunctionToLoopPassAdaptor(std::move(LPM))));
2110 }
2111 }
2112 if (!LoopOptimizerEndEPCallbacks.empty()) {
2113 LoopPassManager LPM;
2115 if (!LPM.isEmpty()) {
2117 createFunctionToLoopPassAdaptor(std::move(LPM))));
2118 }
2119 }
2120 if (!ScalarOptimizerLateEPCallbacks.empty()) {
2123 if (!FPM.isEmpty())
2125 }
2126
2128
2129 if (!VectorizerStartEPCallbacks.empty()) {
2132 if (!FPM.isEmpty())
2134 }
2135
2136 ModulePassManager CoroPM;
2137 CoroPM.addPass(CoroEarlyPass());
2138 CGSCCPassManager CGPM;
2139 CGPM.addPass(CoroSplitPass());
2140 CoroPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
2141 CoroPM.addPass(CoroCleanupPass());
2142 CoroPM.addPass(GlobalDCEPass());
2143 MPM.addPass(CoroConditionalWrapper(std::move(CoroPM)));
2144
2146
2147 if (LTOPreLink)
2148 addRequiredLTOPreLinkPasses(MPM);
2149
2151
2152 return MPM;
2153}
2154
2156 AAManager AA;
2157
2158 // The order in which these are registered determines their priority when
2159 // being queried.
2160
2161 // First we register the basic alias analysis that provides the majority of
2162 // per-function local AA logic. This is a stateless, on-demand local set of
2163 // AA techniques.
2165
2166 // Next we query fast, specialized alias analyses that wrap IR-embedded
2167 // information about aliasing.
2170
2171 // Add support for querying global aliasing information when available.
2172 // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
2173 // analysis, all that the `AAManager` can do is query for any *cached*
2174 // results from `GlobalsAA` through a readonly proxy.
2177
2178 // Add target-specific alias analyses.
2179 if (TM)
2181
2182 return AA;
2183}
aarch64 falkor hwpf fix Falkor HW Prefetch Fix Late Phase
AggressiveInstCombiner - Combine expression patterns to form expressions with fewer,...
Provides passes to inlining "always_inline" functions.
This is the interface for LLVM's primary stateless and local alias analysis.
This file provides the interface for LLVM's Call Graph Profile pass.
This header provides classes for managing passes over SCCs of the call graph.
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:686
This file provides the interface for a simple, fast CSE pass.
This file provides a pass which clones the current module and runs the provided pass pipeline on the ...
Super simple passes to force specific function attrs from the commandline into the IR for debugging p...
Provides passes for computing function attributes based on interprocedural analyses.
This file provides the interface for LLVM's Global Value Numbering pass which eliminates fully redund...
This is the interface for a simple mod/ref and alias analysis over globals.
AcceleratorCodeSelection - Identify all functions reachable from a kernel, removing those that are un...
Interfaces for passes which infer implicit function attributes from the name and signature of functio...
This file provides the primary interface to the instcombine pass.
Defines passes for running instruction simplification across chunks of IR.
This file provides the interface for LLVM's PGO Instrumentation lowering pass.
See the comments on JumpThreadingPass.
static LVOptions Options
Definition: LVOptions.cpp:25
This header defines the LoopLoadEliminationPass object.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
The header file for the LowerConstantIntrinsics pass as used by the new pass manager.
The header file for the LowerExpectIntrinsic pass as used by the new pass manager.
This pass performs merges of loads and stores on both sides of a.
This file provides the interface for LLVM's Global Value Numbering pass.
This header enumerates the LLVM-provided high-level optimization levels.
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
Define option tunables for PGO.
ModulePassManager MPM
static cl::opt< bool > EnableMergeFunctions("enable-merge-functions", cl::init(false), cl::Hidden, cl::desc("Enable function merging as part of the optimization pipeline"))
static cl::opt< bool > EnableGlobalAnalyses("enable-global-analyses", cl::init(true), cl::Hidden, cl::desc("Enable inter-procedural analyses"))
static cl::opt< bool > EnableIROutliner("ir-outliner", cl::init(false), cl::Hidden, cl::desc("Enable ir outliner pass"))
static cl::opt< bool > RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden, cl::desc("Run the NewGVN pass"))
static cl::opt< bool > DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden, cl::desc("Disable pre-instrumentation inliner"))
static cl::opt< bool > EnableEagerlyInvalidateAnalyses("eagerly-invalidate-analyses", cl::init(true), cl::Hidden, cl::desc("Eagerly invalidate more analyses in default pipelines"))
static cl::opt< bool > ExtraVectorizerPasses("extra-vectorizer-passes", cl::init(false), cl::Hidden, cl::desc("Run cleanup optimization passes after vectorization"))
static void addAnnotationRemarksPass(ModulePassManager &MPM)
static cl::opt< bool > EnablePostPGOLoopRotation("enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden, cl::desc("Run the loop rotation transformation after PGO instrumentation"))
static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level)
static cl::opt< bool > EnableGVNSink("enable-gvn-sink", cl::desc("Enable the GVN sinking pass (default = off)"))
static cl::opt< bool > PerformMandatoryInliningsFirst("mandatory-inlining-first", cl::init(false), cl::Hidden, cl::desc("Perform mandatory inlinings module-wide, before performing " "inlining"))
static cl::opt< bool > RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Run Partial inlinining pass"))
static cl::opt< bool > EnableGVNHoist("enable-gvn-hoist", cl::desc("Enable the GVN hoisting pass (default = off)"))
static cl::opt< bool > EnableDFAJumpThreading("enable-dfa-jump-thread", cl::desc("Enable DFA jump threading"), cl::init(false), cl::Hidden)
static cl::opt< bool > EnableCHR("enable-chr", cl::init(true), cl::Hidden, cl::desc("Enable control height reduction optimization (CHR)"))
static cl::opt< bool > EnableHotColdSplit("hot-cold-split", cl::desc("Enable hot-cold splitting pass"))
static cl::opt< bool > EnableLoopInterchange("enable-loopinterchange", cl::init(false), cl::Hidden, cl::desc("Enable the experimental LoopInterchange Pass"))
static cl::opt< int > PreInlineThreshold("preinline-threshold", cl::Hidden, cl::init(75), cl::desc("Control the amount of inlining in pre-instrumentation inliner " "(default = 75)"))
static cl::opt< bool > EnableLoopHeaderDuplication("enable-loop-header-duplication", cl::init(false), cl::Hidden, cl::desc("Enable loop header duplication at any optimization level"))
static cl::opt< bool > EnablePGOForceFunctionAttrs("enable-pgo-force-function-attrs", cl::desc("Enable pass to set function attributes based on PGO profiles"), cl::init(false))
static cl::opt< bool > EnableUnrollAndJam("enable-unroll-and-jam", cl::init(false), cl::Hidden, cl::desc("Enable Unroll And Jam Pass"))
static cl::opt< bool > EnableModuleInliner("enable-module-inliner", cl::init(false), cl::Hidden, cl::desc("Enable module inliner"))
static cl::opt< bool > EnableMatrix("enable-matrix", cl::init(false), cl::Hidden, cl::desc("Enable lowering of the matrix intrinsics"))
static cl::opt< AttributorRunOption > AttributorRun("attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE), cl::desc("Enable the attributor inter-procedural deduction pass"), cl::values(clEnumValN(AttributorRunOption::ALL, "all", "enable all attributor runs"), clEnumValN(AttributorRunOption::MODULE, "module", "enable module-wide attributor runs"), clEnumValN(AttributorRunOption::CGSCC, "cgscc", "enable call graph SCC attributor runs"), clEnumValN(AttributorRunOption::NONE, "none", "disable attributor runs")))
static cl::opt< bool > EnableOrderFileInstrumentation("enable-order-file-instrumentation", cl::init(false), cl::Hidden, cl::desc("Enable order file instrumentation (default = off)"))
static cl::opt< bool > UseLoopVersioningLICM("enable-loop-versioning-licm", cl::init(false), cl::Hidden, cl::desc("Enable the experimental Loop Versioning LICM pass"))
static cl::opt< bool > EnableSyntheticCounts("enable-npm-synthetic-counts", cl::Hidden, cl::desc("Run synthetic function entry count generation " "pass"))
static bool isLTOPreLink(ThinOrFullLTOPhase Phase)
static cl::opt< bool > EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true), cl::Hidden, cl::desc("Enable inline deferral during PGO"))
Flag to enable inline deferral during PGO.
static cl::opt< bool > EnableJumpTableToSwitch("enable-jump-table-to-switch", cl::desc("Enable JumpTableToSwitch pass (default = off)"))
static cl::opt< InliningAdvisorMode > UseInlineAdvisor("enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden, cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"), cl::values(clEnumValN(InliningAdvisorMode::Default, "default", "Heuristics-based inliner version"), clEnumValN(InliningAdvisorMode::Development, "development", "Use development mode (runtime-loadable model)"), clEnumValN(InliningAdvisorMode::Release, "release", "Use release mode (AOT-compiled model)")))
static cl::opt< bool > FlattenedProfileUsed("flattened-profile-used", cl::init(false), cl::Hidden, cl::desc("Indicate the sample profile being used is flattened, i.e., " "no inline hierachy exists in the profile"))
static cl::opt< bool > EnableConstraintElimination("enable-constraint-elimination", cl::init(true), cl::Hidden, cl::desc("Enable pass to eliminate conditions based on linear constraints"))
static cl::opt< bool > EnableLoopFlatten("enable-loop-flatten", cl::init(false), cl::Hidden, cl::desc("Enable the LoopFlatten Pass"))
This header defines various interfaces for pass management in LLVM.
This file implements relative lookup table converter that converts lookup tables to relative lookup t...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
This file provides the interface for the pseudo probe implementation for AutoFDO.
This file provides the interface for the sampled PGO loader pass.
This is the interface for a metadata-based scoped no-alias analysis.
This file provides the interface for the pass responsible for both simplifying and canonicalizing the...
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
This is the interface for a metadata-based TBAA.
Defines the virtual file system interface vfs::FileSystem.
A manager for alias analyses.
void registerFunctionAnalysis()
Register a specific AA result.
void registerModuleAnalysis()
Register a specific AA result.
Inlines functions marked as "always_inline".
Definition: AlwaysInliner.h:32
Argument promotion pass.
Analysis pass providing a never-invalidated alias analysis result.
Simple pass that canonicalizes aliases.
A pass that merges duplicate global constants into a single constant.
Definition: ConstantMerge.h:29
This class implements a trivial dead store elimination.
Eliminate dead arguments (and return values) from functions.
A pass that transforms external global definitions into declarations.
Pass embeds a copy of the module optimized with the provided pass pipeline into a global variable.
The core GVN pass object.
Definition: GVN.h:117
Pass to remove unused function declarations.
Definition: GlobalDCE.h:36
Optimize globals that never have their address taken.
Definition: GlobalOpt.h:25
Pass to perform split of global variables.
Definition: GlobalSplit.h:26
Analysis pass providing a never-invalidated alias analysis result.
Pass to outline cold regions.
Pass to perform interprocedural constant propagation.
Definition: SCCP.h:48
Pass to outline similar regions.
Definition: IROutliner.h:444
Run instruction simplification across each instruction in the function.
The instrumentation pass for recording function order.
Instrumentation based profiling lowering pass.
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
This pass performs 'jump threading', which looks at blocks that have multiple predecessors and multip...
Definition: JumpThreading.h:79
Performs Loop Invariant Code Motion Pass.
Definition: LICM.h:66
Loop unroll pass that only does full loop unrolling and peeling.
Performs Loop Idiom Recognize Pass.
Performs Loop Inst Simplify Pass.
A simple loop rotation transformation.
Definition: LoopRotation.h:24
Performs basic CFG simplifications to assist other loop passes.
A pass that does profile-guided sinking of instructions into loops.
Definition: LoopSink.h:33
A simple loop rotation transformation.
Loop unroll pass that will support both full and partial unrolling.
Merge identical functions.
The module inliner pass for the new pass manager.
Definition: ModuleInliner.h:27
Module pass, wrapping the inliner pass.
Definition: Inliner.h:62
void addModulePass(T Pass)
Add a module pass that runs before the CGSCC passes.
Definition: Inliner.h:78
Class to hold module path string table and global value map, and encapsulate methods for operating on...
Simple pass that provides a name to every anonymous globals.
OpenMP optimizations pass.
Definition: OpenMPOpt.h:42
static const OptimizationLevel O3
Optimize for fast execution as much as possible.
static const OptimizationLevel Oz
A very specialized mode that will optimize for code size at any and all costs.
static const OptimizationLevel O0
Disable as many optimizations as possible.
static const OptimizationLevel Os
Similar to O2 but tries to optimize for small code size instead of fast execution without triggering ...
static const OptimizationLevel O2
Optimize for fast execution as much as possible without triggering significant incremental compile ti...
static const OptimizationLevel O1
Optimize quickly without destroying debuggability.
The indirect function call promotion pass.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The profile annotation (profile-instr-use) pass for IR based PGO.
The profile size based optimization pass for memory intrinsics.
Pass to remove unused function declarations.
ModulePassManager buildO0DefaultPipeline(OptimizationLevel Level, bool LTOPreLink=false)
Build an O0 pipeline with the minimal semantically required passes.
void invokeFullLinkTimeOptimizationLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
ModuleInlinerWrapperPass buildInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining as well as the inlining-driven cleanups.
void invokeOptimizerLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
void invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
AAManager buildDefaultAAPipeline()
Build the default AAManager with the default alias analysis pipeline registered.
void invokeCGSCCOptimizerLateEPCallbacks(CGSCCPassManager &CGPM, OptimizationLevel Level)
ModulePassManager buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, ThinLTO-targeting default optimization pipeline to a pass manager.
void invokeScalarOptimizerLateEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level, bool LTOPreLink=false)
Build a per-module default optimization pipeline.
void invokePipelineStartEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
FunctionPassManager buildFunctionSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM function canonicalization and simplification pipeline.
void invokePeepholeEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
void invokeLoopOptimizerEndEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
ModulePassManager buildLTODefaultPipeline(OptimizationLevel Level, ModuleSummaryIndex *ExportSummary)
Build an LTO default optimization pipeline to a pass manager.
ModulePassManager buildModuleInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining with module inliner pass.
ModulePassManager buildThinLTODefaultPipeline(OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary)
Build a ThinLTO default optimization pipeline to a pass manager.
void invokeLateLoopOptimizationsEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
void invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
void invokePipelineEarlySimplificationEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
void invokeFullLinkTimeOptimizationEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
ModulePassManager buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO, bool EmitSummary)
Build a fat object default optimization pipeline.
ModulePassManager buildModuleSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM module canonicalization and simplification pipeline.
ModulePassManager buildModuleOptimizationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase LTOPhase)
Construct the core LLVM module optimization pipeline.
void addPGOInstrPassesForO0(ModulePassManager &MPM, bool RunProfileGen, bool IsCS, bool AtomicCounterUpdate, std::string ProfileFile, std::string ProfileRemappingFile, IntrusiveRefCntPtr< vfs::FileSystem > FS)
Add PGOInstrumenation passes for O0 only.
ModulePassManager buildLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, LTO-targeting default optimization pipeline to a pass manager.
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t< is_detected< HasRunOnLoopT, PassT >::value > addPass(PassT &&Pass)
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t<!std::is_same_v< PassT, PassManager > > addPass(PassT &&Pass)
Definition: PassManager.h:195
bool isEmpty() const
Returns if the pass manager contains any passes.
Definition: PassManager.h:217
unsigned LicmMssaNoAccForPromotionCap
Tuning option to disable promotion to scalars in LICM with MemorySSA, if the number of access is too ...
Definition: PassBuilder.h:74
bool SLPVectorization
Tuning option to enable/disable slp loop vectorization, set based on opt level.
Definition: PassBuilder.h:59
int InlinerThreshold
Tuning option to override the default inliner threshold.
Definition: PassBuilder.h:88
bool CallGraphProfile
Tuning option to enable/disable call graph profile.
Definition: PassBuilder.h:78
bool MergeFunctions
Tuning option to enable/disable function merging.
Definition: PassBuilder.h:85
bool ForgetAllSCEVInLoopUnroll
Tuning option to forget all SCEV loops in LoopUnroll.
Definition: PassBuilder.h:66
unsigned LicmMssaOptCap
Tuning option to cap the number of calls to retrive clobbering accesses in MemorySSA,...
Definition: PassBuilder.h:70
bool LoopInterleaving
Tuning option to set loop interleaving on/off, set based on opt level.
Definition: PassBuilder.h:51
PipelineTuningOptions()
Constructor sets pipeline tuning defaults based on cl::opts.
bool LoopUnrolling
Tuning option to enable/disable loop unrolling. Its default value is true.
Definition: PassBuilder.h:62
bool LoopVectorization
Tuning option to enable/disable loop vectorization, set based on opt level.
Definition: PassBuilder.h:55
Reassociate commutative expressions.
Definition: Reassociate.h:82
A pass to do RPO deduction and propagation of function attributes.
Definition: FunctionAttrs.h:73
This pass performs function-level constant propagation and merging.
Definition: SCCP.h:29
The sample profiler data loader pass.
Definition: SampleProfile.h:39
Analysis pass providing a never-invalidated alias analysis result.
This pass transforms loops that contain branches or switches on loop- invariant conditions to have mu...
A pass to simplify and canonicalize the CFG of a function.
Definition: SimplifyCFG.h:29
virtual void registerDefaultAliasAnalyses(AAManager &)
Allow the target to register alias analyses with the AAManager for use with the new pass manager.
Analysis pass providing a never-invalidated alias analysis result.
Optimize scalar/vector interactions in IR using target cost models.
Definition: VectorCombine.h:23
Interfaces for registering analysis passes, producing common pass manager configurations,...
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:711
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
cl::opt< bool > EnableKnowledgeRetention
ModuleToFunctionPassAdaptor createModuleToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
Definition: PassManager.h:848
@ MODULE
Definition: Attributor.h:6426
@ CGSCC
Definition: Attributor.h:6427
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition: Pass.h:76
@ FullLTOPreLink
Full LTO prelink phase.
@ ThinLTOPostLink
ThinLTO postlink (backend compile) phase.
@ None
No LTO/ThinLTO behavior needed.
@ FullLTOPostLink
Full LTO postlink (backend compile) phase.
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
ModuleToPostOrderCGSCCPassAdaptor createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT &&Pass)
A function to deduce a function pass type and wrap it in the templated adaptor.
CGSCCToFunctionPassAdaptor createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false, bool NoRerun=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
cl::opt< bool > ForgetSCEVInLoopUnroll
bool AreStatisticsEnabled()
Check if statistics are enabled.
Definition: Statistic.cpp:139
cl::opt< bool > EnableInferAlignmentPass
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
cl::opt< unsigned > SetLicmMssaNoAccForPromotionCap
std::enable_if_t< is_detected< HasRunOnLoopT, LoopPassT >::value, FunctionToLoopPassAdaptor > createFunctionToLoopPassAdaptor(LoopPassT &&Pass, bool UseMemorySSA=false, bool UseBlockFrequencyInfo=false, bool UseBranchProbabilityInfo=false)
A function to deduce a loop pass type and wrap it in the templated adaptor.
cl::opt< unsigned > MaxDevirtIterations("max-devirt-iterations", cl::ReallyHidden, cl::init(4))
cl::opt< unsigned > SetLicmMssaOptCap
A DCE pass that assumes instructions are dead until proven otherwise.
Definition: ADCE.h:31
Pass to convert @llvm.global.annotations to !annotation metadata.
This pass attempts to minimize the number of assume without loosing any information.
Hoist/decompose integer division and remainder instructions to enable CFG improvements and better cod...
Definition: DivRemPairs.h:23
A simple and fast domtree-based CSE pass.
Definition: EarlyCSE.h:30
A pass manager to run a set of extra function simplification passes after vectorization,...
Pass which forces specific function attributes into the IR, primarily as a debugging tool.
A simple and fast domtree-based GVN pass to hoist common expressions from sibling branches.
Definition: GVN.h:392
Uses an "inverted" value numbering to decide the similarity of expressions and sinks similar expressi...
Definition: GVN.h:399
A set of parameters to control various transforms performed by IPSCCP pass.
Definition: SCCP.h:35
A pass which infers function attributes from the names and signatures of function declarations in a m...
Provides context on when an inline advisor is constructed in the pipeline (e.g., link phase,...
Definition: InlineAdvisor.h:59
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:206
std::optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
Definition: InlineCost.h:223
int DefaultThreshold
The default threshold to start with for a callee.
Definition: InlineCost.h:208
std::optional< bool > EnableDeferral
Indicate whether we should allow inline deferral.
Definition: InlineCost.h:236
std::optional< int > HintThreshold
Threshold to use for callees with inline hint.
Definition: InlineCost.h:211
Options for the frontend instrumentation based profiling pass.
A no-op pass template which simply forces a specific analysis result to be invalidated.
Definition: PassManager.h:901
Pass to forward loads in a loop around the backedge to subsequent iterations.
A set of parameters used to control various transforms performed by the LoopUnroll pass.
The LoopVectorize Pass.
Computes function attributes in post-order over the call graph.
Definition: FunctionAttrs.h:49
A utility pass template to force an analysis result to be available.
Definition: PassManager.h:874