LLVM 19.0.0git
PassBuilderPipelines.cpp
Go to the documentation of this file.
1//===- Construction of pass pipelines -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// This file provides the implementation of the PassBuilder based on our
11/// static pass registry as well as related functionality. It also provides
12/// helpers to aid in analyzing, debugging, and testing passes and pass
13/// pipelines.
14///
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/Statistic.h"
26#include "llvm/IR/PassManager.h"
140
141using namespace llvm;
142
144 "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
145 cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
146 cl::values(clEnumValN(InliningAdvisorMode::Default, "default",
147 "Heuristics-based inliner version"),
148 clEnumValN(InliningAdvisorMode::Development, "development",
149 "Use development mode (runtime-loadable model)"),
150 clEnumValN(InliningAdvisorMode::Release, "release",
151 "Use release mode (AOT-compiled model)")));
152
154 "enable-npm-synthetic-counts", cl::Hidden,
155 cl::desc("Run synthetic function entry count generation "
156 "pass"));
157
158/// Flag to enable inline deferral during PGO.
159static cl::opt<bool>
160 EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
162 cl::desc("Enable inline deferral during PGO"));
163
164static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
165 cl::init(false), cl::Hidden,
166 cl::desc("Enable module inliner"));
167
169 "mandatory-inlining-first", cl::init(false), cl::Hidden,
170 cl::desc("Perform mandatory inlinings module-wide, before performing "
171 "inlining"));
172
174 "eagerly-invalidate-analyses", cl::init(true), cl::Hidden,
175 cl::desc("Eagerly invalidate more analyses in default pipelines"));
176
178 "enable-merge-functions", cl::init(false), cl::Hidden,
179 cl::desc("Enable function merging as part of the optimization pipeline"));
180
182 "enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden,
183 cl::desc("Run the loop rotation transformation after PGO instrumentation"));
184
186 "enable-global-analyses", cl::init(true), cl::Hidden,
187 cl::desc("Enable inter-procedural analyses"));
188
189static cl::opt<bool>
190 RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden,
191 cl::desc("Run Partial inlinining pass"));
192
194 "extra-vectorizer-passes", cl::init(false), cl::Hidden,
195 cl::desc("Run cleanup optimization passes after vectorization"));
196
197static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
198 cl::desc("Run the NewGVN pass"));
199
201 "enable-loopinterchange", cl::init(false), cl::Hidden,
202 cl::desc("Enable the experimental LoopInterchange Pass"));
203
204static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam",
205 cl::init(false), cl::Hidden,
206 cl::desc("Enable Unroll And Jam Pass"));
207
208static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
210 cl::desc("Enable the LoopFlatten Pass"));
211
212// Experimentally allow loop header duplication. This should allow for better
213// optimization at Oz, since loop-idiom recognition can then recognize things
214// like memcpy. If this ends up being useful for many targets, we should drop
215// this flag and make a code generation option that can be controlled
216// independent of the opt level and exposed through the frontend.
218 "enable-loop-header-duplication", cl::init(false), cl::Hidden,
219 cl::desc("Enable loop header duplication at any optimization level"));
220
221static cl::opt<bool>
222 EnableDFAJumpThreading("enable-dfa-jump-thread",
223 cl::desc("Enable DFA jump threading"),
224 cl::init(false), cl::Hidden);
225
226// TODO: turn on and remove flag
228 "enable-pgo-force-function-attrs",
229 cl::desc("Enable pass to set function attributes based on PGO profiles"),
230 cl::init(false));
231
232static cl::opt<bool>
233 EnableHotColdSplit("hot-cold-split",
234 cl::desc("Enable hot-cold splitting pass"));
235
236static cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(false),
238 cl::desc("Enable ir outliner pass"));
239
240static cl::opt<bool>
241 DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden,
242 cl::desc("Disable pre-instrumentation inliner"));
243
245 "preinline-threshold", cl::Hidden, cl::init(75),
246 cl::desc("Control the amount of inlining in pre-instrumentation inliner "
247 "(default = 75)"));
248
249static cl::opt<bool>
250 EnableGVNHoist("enable-gvn-hoist",
251 cl::desc("Enable the GVN hoisting pass (default = off)"));
252
253static cl::opt<bool>
254 EnableGVNSink("enable-gvn-sink",
255 cl::desc("Enable the GVN sinking pass (default = off)"));
256
258 "enable-jump-table-to-switch",
259 cl::desc("Enable JumpTableToSwitch pass (default = off)"));
260
261// This option is used in simplifying testing SampleFDO optimizations for
262// profile loading.
263static cl::opt<bool>
264 EnableCHR("enable-chr", cl::init(true), cl::Hidden,
265 cl::desc("Enable control height reduction optimization (CHR)"));
266
268 "flattened-profile-used", cl::init(false), cl::Hidden,
269 cl::desc("Indicate the sample profile being used is flattened, i.e., "
270 "no inline hierachy exists in the profile"));
271
273 "enable-order-file-instrumentation", cl::init(false), cl::Hidden,
274 cl::desc("Enable order file instrumentation (default = off)"));
275
276static cl::opt<bool>
277 EnableMatrix("enable-matrix", cl::init(false), cl::Hidden,
278 cl::desc("Enable lowering of the matrix intrinsics"));
279
281 "enable-constraint-elimination", cl::init(true), cl::Hidden,
282 cl::desc(
283 "Enable pass to eliminate conditions based on linear constraints"));
284
286 "attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE),
287 cl::desc("Enable the attributor inter-procedural deduction pass"),
288 cl::values(clEnumValN(AttributorRunOption::ALL, "all",
289 "enable all attributor runs"),
290 clEnumValN(AttributorRunOption::MODULE, "module",
291 "enable module-wide attributor runs"),
292 clEnumValN(AttributorRunOption::CGSCC, "cgscc",
293 "enable call graph SCC attributor runs"),
294 clEnumValN(AttributorRunOption::NONE, "none",
295 "disable attributor runs")));
296
298 "enable-loop-versioning-licm", cl::init(false), cl::Hidden,
299 cl::desc("Enable the experimental Loop Versioning LICM pass"));
300
301namespace llvm {
303
305} // namespace llvm
306
308 LoopInterleaving = true;
309 LoopVectorization = true;
310 SLPVectorization = false;
311 LoopUnrolling = true;
315 CallGraphProfile = true;
316 UnifiedLTO = false;
318 InlinerThreshold = -1;
320}
321
322namespace llvm {
324} // namespace llvm
325
327 OptimizationLevel Level) {
328 for (auto &C : PeepholeEPCallbacks)
329 C(FPM, Level);
330}
333 for (auto &C : LateLoopOptimizationsEPCallbacks)
334 C(LPM, Level);
335}
337 OptimizationLevel Level) {
338 for (auto &C : LoopOptimizerEndEPCallbacks)
339 C(LPM, Level);
340}
343 for (auto &C : ScalarOptimizerLateEPCallbacks)
344 C(FPM, Level);
345}
347 OptimizationLevel Level) {
348 for (auto &C : CGSCCOptimizerLateEPCallbacks)
349 C(CGPM, Level);
350}
352 OptimizationLevel Level) {
353 for (auto &C : VectorizerStartEPCallbacks)
354 C(FPM, Level);
355}
357 OptimizationLevel Level) {
358 for (auto &C : OptimizerEarlyEPCallbacks)
359 C(MPM, Level);
360}
362 OptimizationLevel Level) {
363 for (auto &C : OptimizerLastEPCallbacks)
364 C(MPM, Level);
365}
368 for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks)
369 C(MPM, Level);
370}
373 for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
374 C(MPM, Level);
375}
377 OptimizationLevel Level) {
378 for (auto &C : PipelineStartEPCallbacks)
379 C(MPM, Level);
380}
383 for (auto &C : PipelineEarlySimplificationEPCallbacks)
384 C(MPM, Level);
385}
386
387// Helper to add AnnotationRemarksPass.
390}
391
392// Helper to check if the current compilation phase is preparing for LTO
396}
397
398// TODO: Investigate the cost/benefit of tail call elimination on debugging.
400PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
402
404
407
408 // Form SSA out of local memory accesses after breaking apart aggregates into
409 // scalars.
411
412 // Catch trivial redundancies
413 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
414
415 // Hoisting of scalars and load expressions.
416 FPM.addPass(
417 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
419
421
422 invokePeepholeEPCallbacks(FPM, Level);
423
424 FPM.addPass(
425 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
426
427 // Form canonically associated expression trees, and simplify the trees using
428 // basic mathematical properties. For example, this will form (nearly)
429 // minimal multiplication trees.
431
432 // Add the primary loop simplification pipeline.
433 // FIXME: Currently this is split into two loop pass pipelines because we run
434 // some function passes in between them. These can and should be removed
435 // and/or replaced by scheduling the loop pass equivalents in the correct
436 // positions. But those equivalent passes aren't powerful enough yet.
437 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
438 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
439 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
440 // `LoopInstSimplify`.
441 LoopPassManager LPM1, LPM2;
442
443 // Simplify the loop body. We do this initially to clean up after other loop
444 // passes run, either when iterating on a loop or on inner loops with
445 // implications on the outer loop.
448
449 // Try to remove as much code from the loop header as possible,
450 // to reduce amount of IR that will have to be duplicated. However,
451 // do not perform speculative hoisting the first time as LICM
452 // will destroy metadata that may not need to be destroyed if run
453 // after loop rotation.
454 // TODO: Investigate promotion cap for O1.
456 /*AllowSpeculation=*/false));
457
458 LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true,
460 // TODO: Investigate promotion cap for O1.
462 /*AllowSpeculation=*/true));
465 LPM1.addPass(LoopFlattenPass());
466
469
471
473
476
477 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
478 // because it changes IR to makes profile annotation in back compile
479 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
480 // attributes so we need to make sure and allow the full unroll pass to pay
481 // attention to it.
482 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
483 PGOOpt->Action != PGOOptions::SampleUse)
484 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
485 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
487
489
490 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
491 /*UseMemorySSA=*/true,
492 /*UseBlockFrequencyInfo=*/true));
493 FPM.addPass(
494 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
496 // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
497 // *All* loop passes must preserve it, in order to be able to use it.
498 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
499 /*UseMemorySSA=*/false,
500 /*UseBlockFrequencyInfo=*/false));
501
502 // Delete small array after loop unroll.
504
505 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
506 FPM.addPass(MemCpyOptPass());
507
508 // Sparse conditional constant propagation.
509 // FIXME: It isn't clear why we do this *after* loop passes rather than
510 // before...
511 FPM.addPass(SCCPPass());
512
513 // Delete dead bit computations (instcombine runs after to fold away the dead
514 // computations, and then ADCE will run later to exploit any new DCE
515 // opportunities that creates).
516 FPM.addPass(BDCEPass());
517
518 // Run instcombine after redundancy and dead bit elimination to exploit
519 // opportunities opened up by them.
521 invokePeepholeEPCallbacks(FPM, Level);
522
523 FPM.addPass(CoroElidePass());
524
526
527 // Finally, do an expensive DCE pass to catch all the dead code exposed by
528 // the simplifications and basic cleanup after all the simplifications.
529 // TODO: Investigate if this is too expensive.
530 FPM.addPass(ADCEPass());
531 FPM.addPass(
532 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
534 invokePeepholeEPCallbacks(FPM, Level);
535
536 return FPM;
537}
538
542 assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
543
544 // The O1 pipeline has a separate pipeline creation function to simplify
545 // construction readability.
546 if (Level.getSpeedupLevel() == 1)
547 return buildO1FunctionSimplificationPipeline(Level, Phase);
548
550
553
554 // Form SSA out of local memory accesses after breaking apart aggregates into
555 // scalars.
557
558 // Catch trivial redundancies
559 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
562
563 // Hoisting of scalars and load expressions.
564 if (EnableGVNHoist)
565 FPM.addPass(GVNHoistPass());
566
567 // Global value numbering based sinking.
568 if (EnableGVNSink) {
569 FPM.addPass(GVNSinkPass());
570 FPM.addPass(
571 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
572 }
573
574 // Speculative execution if the target has divergent branches; otherwise nop.
575 FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
576
577 // Optimize based on known information about branches, and cleanup afterward.
580
581 // Jump table to switch conversion.
584
585 FPM.addPass(
586 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
589
590 if (!Level.isOptimizingForSize())
592
593 invokePeepholeEPCallbacks(FPM, Level);
594
595 // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
596 // using the size value profile. Don't perform this when optimizing for size.
597 if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
598 !Level.isOptimizingForSize())
600
602 FPM.addPass(
603 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
604
605 // Form canonically associated expression trees, and simplify the trees using
606 // basic mathematical properties. For example, this will form (nearly)
607 // minimal multiplication trees.
609
612
613 // Add the primary loop simplification pipeline.
614 // FIXME: Currently this is split into two loop pass pipelines because we run
615 // some function passes in between them. These can and should be removed
616 // and/or replaced by scheduling the loop pass equivalents in the correct
617 // positions. But those equivalent passes aren't powerful enough yet.
618 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
619 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
620 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
621 // `LoopInstSimplify`.
622 LoopPassManager LPM1, LPM2;
623
624 // Simplify the loop body. We do this initially to clean up after other loop
625 // passes run, either when iterating on a loop or on inner loops with
626 // implications on the outer loop.
629
630 // Try to remove as much code from the loop header as possible,
631 // to reduce amount of IR that will have to be duplicated. However,
632 // do not perform speculative hoisting the first time as LICM
633 // will destroy metadata that may not need to be destroyed if run
634 // after loop rotation.
635 // TODO: Investigate promotion cap for O1.
637 /*AllowSpeculation=*/false));
638
639 // Disable header duplication in loop rotation at -Oz.
641 Level != OptimizationLevel::Oz,
643 // TODO: Investigate promotion cap for O1.
645 /*AllowSpeculation=*/true));
646 LPM1.addPass(
647 SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3));
649 LPM1.addPass(LoopFlattenPass());
650
653
654 {
656 ExtraPasses.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
658 LPM2.addPass(std::move(ExtraPasses));
659 }
660
662
664
667
668 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
669 // because it changes IR to makes profile annotation in back compile
670 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
671 // attributes so we need to make sure and allow the full unroll pass to pay
672 // attention to it.
673 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
674 PGOOpt->Action != PGOOptions::SampleUse)
675 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
676 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
678
680
681 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
682 /*UseMemorySSA=*/true,
683 /*UseBlockFrequencyInfo=*/true));
684 FPM.addPass(
685 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
687 // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
688 // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
689 // *All* loop passes must preserve it, in order to be able to use it.
690 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
691 /*UseMemorySSA=*/false,
692 /*UseBlockFrequencyInfo=*/false));
693
694 // Delete small array after loop unroll.
696
697 // Try vectorization/scalarization transforms that are both improvements
698 // themselves and can allow further folds with GVN and InstCombine.
699 FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
700
701 // Eliminate redundancies.
703 if (RunNewGVN)
704 FPM.addPass(NewGVNPass());
705 else
706 FPM.addPass(GVNPass());
707
708 // Sparse conditional constant propagation.
709 // FIXME: It isn't clear why we do this *after* loop passes rather than
710 // before...
711 FPM.addPass(SCCPPass());
712
713 // Delete dead bit computations (instcombine runs after to fold away the dead
714 // computations, and then ADCE will run later to exploit any new DCE
715 // opportunities that creates).
716 FPM.addPass(BDCEPass());
717
718 // Run instcombine after redundancy and dead bit elimination to exploit
719 // opportunities opened up by them.
721 invokePeepholeEPCallbacks(FPM, Level);
722
723 // Re-consider control flow based optimizations after redundancy elimination,
724 // redo DCE, etc.
727
730
731 // Finally, do an expensive DCE pass to catch all the dead code exposed by
732 // the simplifications and basic cleanup after all the simplifications.
733 // TODO: Investigate if this is too expensive.
734 FPM.addPass(ADCEPass());
735
736 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
737 FPM.addPass(MemCpyOptPass());
738
739 FPM.addPass(DSEPass());
741
744 /*AllowSpeculation=*/true),
745 /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
746
747 FPM.addPass(CoroElidePass());
748
750
752 .convertSwitchRangeToICmp(true)
753 .hoistCommonInsts(true)
754 .sinkCommonInsts(true)));
756 invokePeepholeEPCallbacks(FPM, Level);
757
758 return FPM;
759}
760
761void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
764}
765
766void PassBuilder::addPreInlinerPasses(ModulePassManager &MPM,
767 OptimizationLevel Level,
768 ThinOrFullLTOPhase LTOPhase) {
769 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
771 return;
772 InlineParams IP;
773
775
776 // FIXME: The hint threshold has the same value used by the regular inliner
777 // when not optimzing for size. This should probably be lowered after
778 // performance testing.
779 // FIXME: this comment is cargo culted from the old pass manager, revisit).
780 IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
782 IP, /* MandatoryFirst */ true,
784 CGSCCPassManager &CGPipeline = MIWP.getPM();
785
788 FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
789 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
790 true))); // Merge & remove basic blocks.
791 FPM.addPass(InstCombinePass()); // Combine silly sequences.
792 invokePeepholeEPCallbacks(FPM, Level);
793
794 CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
795 std::move(FPM), PTO.EagerlyInvalidateAnalyses));
796
797 MPM.addPass(std::move(MIWP));
798
799 // Delete anything that is now dead to make sure that we don't instrument
800 // dead code. Instrumentation can end up keeping dead code around and
801 // dramatically increase code size.
803}
804
805void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
806 OptimizationLevel Level, bool RunProfileGen,
807 bool IsCS, bool AtomicCounterUpdate,
808 std::string ProfileFile,
809 std::string ProfileRemappingFile,
811 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
812
813 if (!RunProfileGen) {
814 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
815 MPM.addPass(
816 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
817 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
818 // RequireAnalysisPass for PSI before subsequent non-module passes.
820 return;
821 }
822
823 // Perform PGO instrumentation.
825
827 // Disable header duplication in loop rotation at -Oz.
831 Level != OptimizationLevel::Oz),
832 /*UseMemorySSA=*/false,
833 /*UseBlockFrequencyInfo=*/false),
835 }
836
837 // Add the profile lowering pass.
839 if (!ProfileFile.empty())
840 Options.InstrProfileOutput = ProfileFile;
841 // Do counter promotion at Level greater than O0.
842 Options.DoCounterPromotion = true;
843 Options.UseBFIInPromotion = IsCS;
844 Options.Atomic = AtomicCounterUpdate;
846}
847
849 ModulePassManager &MPM, bool RunProfileGen, bool IsCS,
850 bool AtomicCounterUpdate, std::string ProfileFile,
851 std::string ProfileRemappingFile, IntrusiveRefCntPtr<vfs::FileSystem> FS) {
852 if (!RunProfileGen) {
853 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
854 MPM.addPass(
855 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
856 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
857 // RequireAnalysisPass for PSI before subsequent non-module passes.
859 return;
860 }
861
862 // Perform PGO instrumentation.
864 // Add the profile lowering pass.
866 if (!ProfileFile.empty())
867 Options.InstrProfileOutput = ProfileFile;
868 // Do not do counter promotion at O0.
869 Options.DoCounterPromotion = false;
870 Options.UseBFIInPromotion = IsCS;
871 Options.Atomic = AtomicCounterUpdate;
873}
874
876 return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
877}
878
882 InlineParams IP;
883 if (PTO.InlinerThreshold == -1)
884 IP = getInlineParamsFromOptLevel(Level);
885 else
887 // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to
888 // disable hot callsite inline (as much as possible [1]) because it makes
889 // profile annotation in the backend inaccurate.
890 //
891 // [1] Note the cost of a function could be below zero due to erased
892 // prologue / epilogue.
893 if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
894 PGOOpt->Action == PGOOptions::SampleUse)
896
897 if (PGOOpt)
899
903
904 // Require the GlobalsAA analysis for the module so we can query it within
905 // the CGSCC pipeline.
908 // Invalidate AAManager so it can be recreated and pick up the newly
909 // available GlobalsAA.
910 MIWP.addModulePass(
912 }
913
914 // Require the ProfileSummaryAnalysis for the module so we can query it within
915 // the inliner pass.
917
918 // Now begin the main postorder CGSCC pipeline.
919 // FIXME: The current CGSCC pipeline has its origins in the legacy pass
920 // manager and trying to emulate its precise behavior. Much of this doesn't
921 // make a lot of sense and we should revisit the core CGSCC structure.
922 CGSCCPassManager &MainCGPipeline = MIWP.getPM();
923
924 // Note: historically, the PruneEH pass was run first to deduce nounwind and
925 // generally clean up exception handling overhead. It isn't clear this is
926 // valuable as the inliner doesn't currently care whether it is inlining an
927 // invoke or a call.
928
930 MainCGPipeline.addPass(AttributorCGSCCPass());
931
932 // Deduce function attributes. We do another run of this after the function
933 // simplification pipeline, so this only needs to run when it could affect the
934 // function simplification pipeline, which is only the case with recursive
935 // functions.
936 MainCGPipeline.addPass(PostOrderFunctionAttrsPass(/*SkipNonRecursive*/ true));
937
938 // When at O3 add argument promotion to the pass pipeline.
939 // FIXME: It isn't at all clear why this should be limited to O3.
940 if (Level == OptimizationLevel::O3)
941 MainCGPipeline.addPass(ArgumentPromotionPass());
942
943 // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
944 // there are no OpenMP runtime calls present in the module.
945 if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
946 MainCGPipeline.addPass(OpenMPOptCGSCCPass());
947
948 invokeCGSCCOptimizerLateEPCallbacks(MainCGPipeline, Level);
949
950 // Add the core function simplification pipeline nested inside the
951 // CGSCC walk.
954 PTO.EagerlyInvalidateAnalyses, /*NoRerun=*/true));
955
956 // Finally, deduce any function attributes based on the fully simplified
957 // function.
958 MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
959
960 // Mark that the function is fully simplified and that it shouldn't be
961 // simplified again if we somehow revisit it due to CGSCC mutations unless
962 // it's been modified since.
965
967 MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
968
969 // Make sure we don't affect potential future NoRerun CGSCC adaptors.
970 MIWP.addLateModulePass(createModuleToFunctionPassAdaptor(
972
973 return MIWP;
974}
975
980
982 // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to
983 // disable hot callsite inline (as much as possible [1]) because it makes
984 // profile annotation in the backend inaccurate.
985 //
986 // [1] Note the cost of a function could be below zero due to erased
987 // prologue / epilogue.
988 if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
989 PGOOpt->Action == PGOOptions::SampleUse)
991
992 if (PGOOpt)
994
995 // The inline deferral logic is used to avoid losing some
996 // inlining chance in future. It is helpful in SCC inliner, in which
997 // inlining is processed in bottom-up order.
998 // While in module inliner, the inlining order is a priority-based order
999 // by default. The inline deferral is unnecessary there. So we disable the
1000 // inline deferral logic in module inliner.
1001 IP.EnableDeferral = false;
1002
1004
1008
1012
1013 return MPM;
1014}
1015
1019 assert(Level != OptimizationLevel::O0 &&
1020 "Should not be used for O0 pipeline");
1021
1023 "FullLTOPostLink shouldn't call buildModuleSimplificationPipeline!");
1024
1026
1027 // Place pseudo probe instrumentation as the first pass of the pipeline to
1028 // minimize the impact of optimization changes.
1029 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1032
1033 bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
1034
1035 // In ThinLTO mode, when flattened profile is used, all the available
1036 // profile information will be annotated in PreLink phase so there is
1037 // no need to load the profile again in PostLink.
1038 bool LoadSampleProfile =
1039 HasSampleProfile &&
1041
1042 // During the ThinLTO backend phase we perform early indirect call promotion
1043 // here, before globalopt. Otherwise imported available_externally functions
1044 // look unreferenced and are removed. If we are going to load the sample
1045 // profile then defer until later.
1046 // TODO: See if we can move later and consolidate with the location where
1047 // we perform ICP when we are loading a sample profile.
1048 // TODO: We pass HasSampleProfile (whether there was a sample profile file
1049 // passed to the compile) to the SamplePGO flag of ICP. This is used to
1050 // determine whether the new direct calls are annotated with prof metadata.
1051 // Ideally this should be determined from whether the IR is annotated with
1052 // sample profile, and not whether the a sample profile was provided on the
1053 // command line. E.g. for flattened profiles where we will not be reloading
1054 // the sample profile in the ThinLTO backend, we ideally shouldn't have to
1055 // provide the sample profile file.
1056 if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
1057 MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
1058
1059 // Create an early function pass manager to cleanup the output of the
1060 // frontend. Not necessary with LTO post link pipelines since the pre link
1061 // pipeline already cleaned up the frontend output.
1063 // Do basic inference of function attributes from known properties of system
1064 // libraries and other oracles.
1067
1068 FunctionPassManager EarlyFPM;
1069 // Lower llvm.expect to metadata before attempting transforms.
1070 // Compare/branch metadata may alter the behavior of passes like
1071 // SimplifyCFG.
1073 EarlyFPM.addPass(SimplifyCFGPass());
1075 EarlyFPM.addPass(EarlyCSEPass());
1076 if (Level == OptimizationLevel::O3)
1077 EarlyFPM.addPass(CallSiteSplittingPass());
1079 std::move(EarlyFPM), PTO.EagerlyInvalidateAnalyses));
1080 }
1081
1082 if (LoadSampleProfile) {
1083 // Annotate sample profile right after early FPM to ensure freshness of
1084 // the debug info.
1085 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1086 PGOOpt->ProfileRemappingFile, Phase));
1087 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1088 // RequireAnalysisPass for PSI before subsequent non-module passes.
1090 // Do not invoke ICP in the LTOPrelink phase as it makes it hard
1091 // for the profile annotation to be accurate in the LTO backend.
1092 if (!isLTOPreLink(Phase))
1093 // We perform early indirect call promotion here, before globalopt.
1094 // This is important for the ThinLTO backend phase because otherwise
1095 // imported available_externally functions look unreferenced and are
1096 // removed.
1097 MPM.addPass(
1098 PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
1099 }
1100
1101 // Try to perform OpenMP specific optimizations on the module. This is a
1102 // (quick!) no-op if there are no OpenMP runtime calls present in the module.
1104
1107
1108 // Lower type metadata and the type.test intrinsic in the ThinLTO
1109 // post link pipeline after ICP. This is to enable usage of the type
1110 // tests in ICP sequences.
1112 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1113
1115
1116 // Interprocedural constant propagation now that basic cleanup has occurred
1117 // and prior to optimizing globals.
1118 // FIXME: This position in the pipeline hasn't been carefully considered in
1119 // years, it should be re-analyzed.
1121 IPSCCPOptions(/*AllowFuncSpec=*/
1122 Level != OptimizationLevel::Os &&
1123 Level != OptimizationLevel::Oz &&
1124 !isLTOPreLink(Phase))));
1125
1126 // Attach metadata to indirect call sites indicating the set of functions
1127 // they may target at run-time. This should follow IPSCCP.
1129
1130 // Optimize globals to try and fold them into constants.
1132
1133 // Create a small function pass pipeline to cleanup after all the global
1134 // optimizations.
1135 FunctionPassManager GlobalCleanupPM;
1136 // FIXME: Should this instead by a run of SROA?
1137 GlobalCleanupPM.addPass(PromotePass());
1138 GlobalCleanupPM.addPass(InstCombinePass());
1139 invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
1140 GlobalCleanupPM.addPass(
1141 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1142 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM),
1144
1145 // Invoke the pre-inliner passes for instrumentation PGO or MemProf.
1146 if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
1147 (PGOOpt->Action == PGOOptions::IRInstr ||
1148 PGOOpt->Action == PGOOptions::IRUse || !PGOOpt->MemoryProfile.empty()))
1149 addPreInlinerPasses(MPM, Level, Phase);
1150
1151 // Add all the requested passes for instrumentation PGO, if requested.
1152 if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
1153 (PGOOpt->Action == PGOOptions::IRInstr ||
1154 PGOOpt->Action == PGOOptions::IRUse)) {
1155 addPGOInstrPasses(MPM, Level,
1156 /*RunProfileGen=*/PGOOpt->Action == PGOOptions::IRInstr,
1157 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate,
1158 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
1159 PGOOpt->FS);
1160 MPM.addPass(PGOIndirectCallPromotion(false, false));
1161 }
1162 if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
1163 PGOOpt->CSAction == PGOOptions::CSIRInstr)
1164 MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile));
1165
1166 if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
1167 !PGOOpt->MemoryProfile.empty())
1168 MPM.addPass(MemProfUsePass(PGOOpt->MemoryProfile, PGOOpt->FS));
1169
1170 // Synthesize function entry counts for non-PGO compilation.
1171 if (EnableSyntheticCounts && !PGOOpt)
1173
1174 if (EnablePGOForceFunctionAttrs && PGOOpt)
1175 MPM.addPass(PGOForceFunctionAttrsPass(PGOOpt->ColdOptType));
1176
1177 MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/true));
1178
1181 else
1183
1184 // Remove any dead arguments exposed by cleanups, constant folding globals,
1185 // and argument promotion.
1187
1190
1191 // Optimize globals now that functions are fully simplified.
1194
1195 return MPM;
1196}
1197
1198/// TODO: Should LTO cause any differences to this set of passes?
1199void PassBuilder::addVectorPasses(OptimizationLevel Level,
1200 FunctionPassManager &FPM, bool IsFullLTO) {
1203
1206 if (IsFullLTO) {
1207 // The vectorizer may have significantly shortened a loop body; unroll
1208 // again. Unroll small loops to hide loop backedge latency and saturate any
1209 // parallel execution resources of an out-of-order processor. We also then
1210 // need to clean up redundancies and loop invariant code.
1211 // FIXME: It would be really good to use a loop-integrated instruction
1212 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1213 // across the loop nests.
1214 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1217 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1219 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1222 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1223 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1224 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1225 // NOTE: we are very late in the pipeline, and we don't have any LICM
1226 // or SimplifyCFG passes scheduled after us, that would cleanup
1227 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1229 }
1230
1231 if (!IsFullLTO) {
1232 // Eliminate loads by forwarding stores from the previous iteration to loads
1233 // of the current iteration.
1235 }
1236 // Cleanup after the loop optimization passes.
1237 FPM.addPass(InstCombinePass());
1238
1239 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1240 ExtraVectorPassManager ExtraPasses;
1241 // At higher optimization levels, try to clean up any runtime overlap and
1242 // alignment checks inserted by the vectorizer. We want to track correlated
1243 // runtime checks for two inner loops in the same outer loop, fold any
1244 // common computations, hoist loop-invariant aspects out of any outer loop,
1245 // and unswitch the runtime checks if possible. Once hoisted, we may have
1246 // dead (or speculatable) control flows or more combining opportunities.
1247 ExtraPasses.addPass(EarlyCSEPass());
1249 ExtraPasses.addPass(InstCombinePass());
1250 LoopPassManager LPM;
1252 /*AllowSpeculation=*/true));
1253 LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
1255 ExtraPasses.addPass(
1256 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true,
1257 /*UseBlockFrequencyInfo=*/true));
1258 ExtraPasses.addPass(
1259 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1260 ExtraPasses.addPass(InstCombinePass());
1261 FPM.addPass(std::move(ExtraPasses));
1262 }
1263
1264 // Now that we've formed fast to execute loop structures, we do further
1265 // optimizations. These are run afterward as they might block doing complex
1266 // analyses and transforms such as what are needed for loop vectorization.
1267
1268 // Cleanup after loop vectorization, etc. Simplification passes like CVP and
1269 // GVN, loop transforms, and others have already run, so it's now better to
1270 // convert to more optimized IR using more aggressive simplify CFG options.
1271 // The extra sinking transform can create larger basic blocks, so do this
1272 // before SLP vectorization.
1274 .forwardSwitchCondToPhi(true)
1275 .convertSwitchRangeToICmp(true)
1276 .convertSwitchToLookupTable(true)
1277 .needCanonicalLoops(false)
1278 .hoistCommonInsts(true)
1279 .sinkCommonInsts(true)));
1280
1281 if (IsFullLTO) {
1282 FPM.addPass(SCCPPass());
1283 FPM.addPass(InstCombinePass());
1284 FPM.addPass(BDCEPass());
1285 }
1286
1287 // Optimize parallel scalar instruction chains into SIMD instructions.
1288 if (PTO.SLPVectorization) {
1290 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1291 FPM.addPass(EarlyCSEPass());
1292 }
1293 }
1294 // Enhance/cleanup vector code.
1296
1297 if (!IsFullLTO) {
1298 FPM.addPass(InstCombinePass());
1299 // Unroll small loops to hide loop backedge latency and saturate any
1300 // parallel execution resources of an out-of-order processor. We also then
1301 // need to clean up redundancies and loop invariant code.
1302 // FIXME: It would be really good to use a loop-integrated instruction
1303 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1304 // across the loop nests.
1305 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1306 if (EnableUnrollAndJam && PTO.LoopUnrolling) {
1308 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1309 }
1311 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1314 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1315 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1316 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1317 // NOTE: we are very late in the pipeline, and we don't have any LICM
1318 // or SimplifyCFG passes scheduled after us, that would cleanup
1319 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1321 }
1322
1325 FPM.addPass(InstCombinePass());
1326
1327 // This is needed for two reasons:
1328 // 1. It works around problems that instcombine introduces, such as sinking
1329 // expensive FP divides into loops containing multiplications using the
1330 // divide result.
1331 // 2. It helps to clean up some loop-invariant code created by the loop
1332 // unroll pass when IsFullLTO=false.
1335 /*AllowSpeculation=*/true),
1336 /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
1337
1338 // Now that we've vectorized and unrolled loops, we may have more refined
1339 // alignment information, try to re-derive it here.
1341}
1342
1345 ThinOrFullLTOPhase LTOPhase) {
1346 const bool LTOPreLink = isLTOPreLink(LTOPhase);
1348
1349 // Run partial inlining pass to partially inline functions that have
1350 // large bodies.
1353
1354 // Remove avail extern fns and globals definitions since we aren't compiling
1355 // an object file for later LTO. For LTO we want to preserve these so they
1356 // are eligible for inlining at link-time. Note if they are unreferenced they
1357 // will be removed by GlobalDCE later, so this only impacts referenced
1358 // available externally globals. Eventually they will be suppressed during
1359 // codegen, but eliminating here enables more opportunity for GlobalDCE as it
1360 // may make globals referenced by available external functions dead and saves
1361 // running remaining passes on the eliminated functions. These should be
1362 // preserved during prelinking for link-time inlining decisions.
1363 if (!LTOPreLink)
1365
1368
1369 // Do RPO function attribute inference across the module to forward-propagate
1370 // attributes where applicable.
1371 // FIXME: Is this really an optimization rather than a canonicalization?
1373
1374 // Do a post inline PGO instrumentation and use pass. This is a context
1375 // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
1376 // cross-module inline has not been done yet. The context sensitive
1377 // instrumentation is after all the inlines are done.
1378 if (!LTOPreLink && PGOOpt) {
1379 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1380 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
1381 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1382 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile,
1383 PGOOpt->FS);
1384 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1385 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
1386 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1387 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
1388 PGOOpt->FS);
1389 }
1390
1391 // Re-compute GlobalsAA here prior to function passes. This is particularly
1392 // useful as the above will have inlined, DCE'ed, and function-attr
1393 // propagated everything. We should at this point have a reasonably minimal
1394 // and richly annotated call graph. By computing aliasing and mod/ref
1395 // information for all local globals here, the late loop passes and notably
1396 // the vectorizer will be able to use them to help recognize vectorizable
1397 // memory operations.
1400
1402
1403 FunctionPassManager OptimizePM;
1404 // Scheduling LoopVersioningLICM when inlining is over, because after that
1405 // we may see more accurate aliasing. Reason to run this late is that too
1406 // early versioning may prevent further inlining due to increase of code
1407 // size. Other optimizations which runs later might get benefit of no-alias
1408 // assumption in clone loop.
1410 OptimizePM.addPass(
1412 // LoopVersioningLICM pass might increase new LICM opportunities.
1415 /*AllowSpeculation=*/true),
1416 /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
1417 }
1418
1419 OptimizePM.addPass(Float2IntPass());
1421
1422 if (EnableMatrix) {
1423 OptimizePM.addPass(LowerMatrixIntrinsicsPass());
1424 OptimizePM.addPass(EarlyCSEPass());
1425 }
1426
1427 // CHR pass should only be applied with the profile information.
1428 // The check is to check the profile summary information in CHR.
1429 if (EnableCHR && Level == OptimizationLevel::O3)
1430 OptimizePM.addPass(ControlHeightReductionPass());
1431
1432 // FIXME: We need to run some loop optimizations to re-rotate loops after
1433 // simplifycfg and others undo their rotation.
1434
1435 // Optimize the loop execution. These passes operate on entire loop nests
1436 // rather than on each loop in an inside-out manner, and so they are actually
1437 // function passes.
1438
1439 invokeVectorizerStartEPCallbacks(OptimizePM, Level);
1440
1441 LoopPassManager LPM;
1442 // First rotate loops that may have been un-rotated by prior passes.
1443 // Disable header duplication at -Oz.
1445 Level != OptimizationLevel::Oz,
1446 LTOPreLink));
1447 // Some loops may have become dead by now. Try to delete them.
1448 // FIXME: see discussion in https://reviews.llvm.org/D112851,
1449 // this may need to be revisited once we run GVN before loop deletion
1450 // in the simplification pipeline.
1453 std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false));
1454
1455 // Distribute loops to allow partial vectorization. I.e. isolate dependences
1456 // into separate loop that would otherwise inhibit vectorization. This is
1457 // currently only performed for loops marked with the metadata
1458 // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1459 OptimizePM.addPass(LoopDistributePass());
1460
1461 // Populates the VFABI attribute with the scalar-to-vector mappings
1462 // from the TargetLibraryInfo.
1463 OptimizePM.addPass(InjectTLIMappings());
1464
1465 addVectorPasses(Level, OptimizePM, /* IsFullLTO */ false);
1466
1467 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
1468 // canonicalization pass that enables other optimizations. As a result,
1469 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1470 // result too early.
1471 OptimizePM.addPass(LoopSinkPass());
1472
1473 // And finally clean up LCSSA form before generating code.
1474 OptimizePM.addPass(InstSimplifyPass());
1475
1476 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
1477 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
1478 // flattening of blocks.
1479 OptimizePM.addPass(DivRemPairsPass());
1480
1481 // Try to annotate calls that were created during optimization.
1482 OptimizePM.addPass(TailCallElimPass());
1483
1484 // LoopSink (and other loop passes since the last simplifyCFG) might have
1485 // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
1486 OptimizePM.addPass(
1487 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1488
1489 // Add the core optimizing pipeline.
1490 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
1492
1494
1495 // Split out cold code. Splitting is done late to avoid hiding context from
1496 // other optimizations and inadvertently regressing performance. The tradeoff
1497 // is that this has a higher code size cost than splitting early.
1498 if (EnableHotColdSplit && !LTOPreLink)
1500
1501 // Search the code for similar regions of code. If enough similar regions can
1502 // be found where extracting the regions into their own function will decrease
1503 // the size of the program, we extract the regions, a deduplicate the
1504 // structurally similar regions.
1505 if (EnableIROutliner)
1507
1508 // Merge functions if requested.
1509 if (PTO.MergeFunctions)
1511
1512 // Now we need to do some global optimization transforms.
1513 // FIXME: It would seem like these should come first in the optimization
1514 // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
1515 // ordering here.
1518
1519 if (PTO.CallGraphProfile && !LTOPreLink)
1522
1523 // TODO: Relative look table converter pass caused an issue when full lto is
1524 // enabled. See https://reviews.llvm.org/D94355 for more details.
1525 // Until the issue fixed, disable this pass during pre-linking phase.
1526 if (!LTOPreLink)
1528
1529 return MPM;
1530}
1531
1534 bool LTOPreLink) {
1535 if (Level == OptimizationLevel::O0)
1536 return buildO0DefaultPipeline(Level, LTOPreLink);
1537
1539
1540 // Convert @llvm.global.annotations to !annotation metadata.
1542
1543 // Force any function attributes we want the rest of the pipeline to observe.
1545
1546 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1548
1549 // Apply module pipeline start EP callback.
1551
1552 const ThinOrFullLTOPhase LTOPhase = LTOPreLink
1555 // Add the core simplification pipeline.
1557
1558 // Now add the optimization pipeline.
1560
1561 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1562 PGOOpt->Action == PGOOptions::SampleUse)
1564
1565 // Emit annotation remarks.
1567
1568 if (LTOPreLink)
1569 addRequiredLTOPreLinkPasses(MPM);
1570 return MPM;
1571}
1572
1575 bool EmitSummary) {
1577 if (ThinLTO)
1579 else
1581 MPM.addPass(EmbedBitcodePass(ThinLTO, EmitSummary));
1582
1583 // Use the ThinLTO post-link pipeline with sample profiling
1584 if (ThinLTO && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1585 MPM.addPass(buildThinLTODefaultPipeline(Level, /*ImportSummary=*/nullptr));
1586 else {
1587 // otherwise, just use module optimization
1588 MPM.addPass(
1590 // Emit annotation remarks.
1592 }
1593 return MPM;
1594}
1595
1598 if (Level == OptimizationLevel::O0)
1599 return buildO0DefaultPipeline(Level, /*LTOPreLink*/true);
1600
1602
1603 // Convert @llvm.global.annotations to !annotation metadata.
1605
1606 // Force any function attributes we want the rest of the pipeline to observe.
1608
1609 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1611
1612 // Apply module pipeline start EP callback.
1614
1615 // If we are planning to perform ThinLTO later, we don't bloat the code with
1616 // unrolling/vectorization/... now. Just simplify the module as much as we
1617 // can.
1620
1621 // Run partial inlining pass to partially inline functions that have
1622 // large bodies.
1623 // FIXME: It isn't clear whether this is really the right place to run this
1624 // in ThinLTO. Because there is another canonicalization and simplification
1625 // phase that will run after the thin link, running this here ends up with
1626 // less information than will be available later and it may grow functions in
1627 // ways that aren't beneficial.
1630
1631 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1632 PGOOpt->Action == PGOOptions::SampleUse)
1634
1635 // Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual
1636 // optimization is going to be done in PostLink stage, but clang can't add
1637 // callbacks there in case of in-process ThinLTO called by linker.
1640
1641 // Emit annotation remarks.
1643
1644 addRequiredLTOPreLinkPasses(MPM);
1645
1646 return MPM;
1647}
1648
1650 OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
1652
1653 if (ImportSummary) {
1654 // For ThinLTO we must apply the context disambiguation decisions early, to
1655 // ensure we can correctly match the callsites to summary data.
1658
1659 // These passes import type identifier resolutions for whole-program
1660 // devirtualization and CFI. They must run early because other passes may
1661 // disturb the specific instruction patterns that these passes look for,
1662 // creating dependencies on resolutions that may not appear in the summary.
1663 //
1664 // For example, GVN may transform the pattern assume(type.test) appearing in
1665 // two basic blocks into assume(phi(type.test, type.test)), which would
1666 // transform a dependency on a WPD resolution into a dependency on a type
1667 // identifier resolution for CFI.
1668 //
1669 // Also, WPD has access to more precise information than ICP and can
1670 // devirtualize more effectively, so it should operate on the IR first.
1671 //
1672 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1673 // metadata and intrinsics.
1674 MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
1675 MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
1676 }
1677
1678 if (Level == OptimizationLevel::O0) {
1679 // Run a second time to clean up any type tests left behind by WPD for use
1680 // in ICP.
1681 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1682 // Drop available_externally and unreferenced globals. This is necessary
1683 // with ThinLTO in order to avoid leaving undefined references to dead
1684 // globals in the object file.
1687 return MPM;
1688 }
1689
1690 // Add the core simplification pipeline.
1693
1694 // Now add the optimization pipeline.
1697
1698 // Emit annotation remarks.
1700
1701 return MPM;
1702}
1703
1706 // FIXME: We should use a customized pre-link pipeline!
1707 return buildPerModuleDefaultPipeline(Level,
1708 /* LTOPreLink */ true);
1709}
1710
1713 ModuleSummaryIndex *ExportSummary) {
1715
1717
1718 // Create a function that performs CFI checks for cross-DSO calls with targets
1719 // in the current module.
1721
1722 if (Level == OptimizationLevel::O0) {
1723 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1724 // metadata and intrinsics.
1725 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1726 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1727 // Run a second time to clean up any type tests left behind by WPD for use
1728 // in ICP.
1729 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1730
1732
1733 // Emit annotation remarks.
1735
1736 return MPM;
1737 }
1738
1739 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
1740 // Load sample profile before running the LTO optimization pipeline.
1741 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1742 PGOOpt->ProfileRemappingFile,
1744 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1745 // RequireAnalysisPass for PSI before subsequent non-module passes.
1747 }
1748
1749 // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
1751
1752 // Remove unused virtual tables to improve the quality of code generated by
1753 // whole-program devirtualization and bitset lowering.
1754 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
1755
1756 // Do basic inference of function attributes from known properties of system
1757 // libraries and other oracles.
1759
1760 if (Level.getSpeedupLevel() > 1) {
1763
1764 // Indirect call promotion. This should promote all the targets that are
1765 // left by the earlier promotion pass that promotes intra-module targets.
1766 // This two-step promotion is to save the compile time. For LTO, it should
1767 // produce the same result as if we only do promotion here.
1769 true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1770
1771 // Propagate constants at call sites into the functions they call. This
1772 // opens opportunities for globalopt (and inlining) by substituting function
1773 // pointers passed as arguments to direct uses of functions.
1774 MPM.addPass(IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/
1775 Level != OptimizationLevel::Os &&
1776 Level != OptimizationLevel::Oz)));
1777
1778 // Attach metadata to indirect call sites indicating the set of functions
1779 // they may target at run-time. This should follow IPSCCP.
1781 }
1782
1783 // Now deduce any function attributes based in the current code.
1784 MPM.addPass(
1786
1787 // Do RPO function attribute inference across the module to forward-propagate
1788 // attributes where applicable.
1789 // FIXME: Is this really an optimization rather than a canonicalization?
1791
1792 // Use in-range annotations on GEP indices to split globals where beneficial.
1794
1795 // Run whole program optimization of virtual call when the list of callees
1796 // is fixed.
1797 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1798
1799 // Stop here at -O1.
1800 if (Level == OptimizationLevel::O1) {
1801 // The LowerTypeTestsPass needs to run to lower type metadata and the
1802 // type.test intrinsics. The pass does nothing if CFI is disabled.
1803 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1804 // Run a second time to clean up any type tests left behind by WPD for use
1805 // in ICP (which is performed earlier than this in the regular LTO
1806 // pipeline).
1807 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1808
1810
1811 // Emit annotation remarks.
1813
1814 return MPM;
1815 }
1816
1817 // Optimize globals to try and fold them into constants.
1819
1820 // Promote any localized globals to SSA registers.
1822
1823 // Linking modules together can lead to duplicate global constant, only
1824 // keep one copy of each constant.
1826
1827 // Remove unused arguments from functions.
1829
1830 // Reduce the code after globalopt and ipsccp. Both can open up significant
1831 // simplification opportunities, and both can propagate functions through
1832 // function pointers. When this happens, we often have to resolve varargs
1833 // calls, etc, so let instcombine do this.
1834 FunctionPassManager PeepholeFPM;
1835 PeepholeFPM.addPass(InstCombinePass());
1836 if (Level.getSpeedupLevel() > 1)
1837 PeepholeFPM.addPass(AggressiveInstCombinePass());
1838 invokePeepholeEPCallbacks(PeepholeFPM, Level);
1839
1840 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM),
1842
1843 // Note: historically, the PruneEH pass was run first to deduce nounwind and
1844 // generally clean up exception handling overhead. It isn't clear this is
1845 // valuable as the inliner doesn't currently care whether it is inlining an
1846 // invoke or a call.
1847 // Run the inliner now.
1848 if (EnableModuleInliner) {
1852 } else {
1855 /* MandatoryFirst */ true,
1858 }
1859
1860 // Perform context disambiguation after inlining, since that would reduce the
1861 // amount of additional cloning required to distinguish the allocation
1862 // contexts.
1865
1866 // Optimize globals again after we ran the inliner.
1868
1869 // Run the OpenMPOpt pass again after global optimizations.
1871
1872 // Garbage collect dead functions.
1873 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
1874
1875 // If we didn't decide to inline a function, check to see if we can
1876 // transform it to pass arguments by value instead of by reference.
1878
1880 // The IPO Passes may leave cruft around. Clean up after them.
1881 FPM.addPass(InstCombinePass());
1882 invokePeepholeEPCallbacks(FPM, Level);
1883
1886
1888
1889 // Do a post inline PGO instrumentation and use pass. This is a context
1890 // sensitive PGO pass.
1891 if (PGOOpt) {
1892 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1893 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
1894 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1895 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile,
1896 PGOOpt->FS);
1897 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1898 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
1899 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1900 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
1901 PGOOpt->FS);
1902 }
1903
1904 // Break up allocas
1906
1907 // LTO provides additional opportunities for tailcall elimination due to
1908 // link-time inlining, and visibility of nocapture attribute.
1910
1911 // Run a few AA driver optimizations here and now to cleanup the code.
1914
1915 MPM.addPass(
1917
1918 // Require the GlobalsAA analysis for the module so we can query it within
1919 // MainFPM.
1922 // Invalidate AAManager so it can be recreated and pick up the newly
1923 // available GlobalsAA.
1924 MPM.addPass(
1926 }
1927
1928 FunctionPassManager MainFPM;
1931 /*AllowSpeculation=*/true),
1932 /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
1933
1934 if (RunNewGVN)
1935 MainFPM.addPass(NewGVNPass());
1936 else
1937 MainFPM.addPass(GVNPass());
1938
1939 // Remove dead memcpy()'s.
1940 MainFPM.addPass(MemCpyOptPass());
1941
1942 // Nuke dead stores.
1943 MainFPM.addPass(DSEPass());
1944 MainFPM.addPass(MoveAutoInitPass());
1946
1947 LoopPassManager LPM;
1948 if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
1949 LPM.addPass(LoopFlattenPass());
1952 // FIXME: Add loop interchange.
1953
1954 // Unroll small loops and perform peeling.
1955 LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
1956 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
1958 // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
1959 // *All* loop passes must preserve it, in order to be able to use it.
1961 std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true));
1962
1963 MainFPM.addPass(LoopDistributePass());
1964
1965 addVectorPasses(Level, MainFPM, /* IsFullLTO */ true);
1966
1967 // Run the OpenMPOpt CGSCC pass again late.
1970
1971 invokePeepholeEPCallbacks(MainFPM, Level);
1972 MainFPM.addPass(JumpThreadingPass());
1975
1976 // Lower type metadata and the type.test intrinsic. This pass supports
1977 // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
1978 // to be run at link time if CFI is enabled. This pass does nothing if
1979 // CFI is disabled.
1980 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1981 // Run a second time to clean up any type tests left behind by WPD for use
1982 // in ICP (which is performed earlier than this in the regular LTO pipeline).
1983 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1984
1985 // Enable splitting late in the FullLTO post-link pipeline.
1988
1989 // Add late LTO optimization passes.
1990 FunctionPassManager LateFPM;
1991
1992 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
1993 // canonicalization pass that enables other optimizations. As a result,
1994 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1995 // result too early.
1996 LateFPM.addPass(LoopSinkPass());
1997
1998 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
1999 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
2000 // flattening of blocks.
2001 LateFPM.addPass(DivRemPairsPass());
2002
2003 // Delete basic blocks, which optimization passes may have killed.
2004 LateFPM.addPass(SimplifyCFGPass(
2005 SimplifyCFGOptions().convertSwitchRangeToICmp(true).hoistCommonInsts(
2006 true)));
2007 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(LateFPM)));
2008
2009 // Drop bodies of available eternally objects to improve GlobalDCE.
2011
2012 // Now that we have optimized the program, discard unreachable functions.
2013 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2014
2015 if (PTO.MergeFunctions)
2017
2018 if (PTO.CallGraphProfile)
2019 MPM.addPass(CGProfilePass(/*InLTOPostLink=*/true));
2020
2022
2023 // Emit annotation remarks.
2025
2026 return MPM;
2027}
2028
2030 bool LTOPreLink) {
2031 assert(Level == OptimizationLevel::O0 &&
2032 "buildO0DefaultPipeline should only be used with O0");
2033
2035
2036 // Perform pseudo probe instrumentation in O0 mode. This is for the
2037 // consistency between different build modes. For example, a LTO build can be
2038 // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
2039 // the postlink will require pseudo probe instrumentation in the prelink.
2040 if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
2042
2043 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
2044 PGOOpt->Action == PGOOptions::IRUse))
2046 MPM,
2047 /*RunProfileGen=*/(PGOOpt->Action == PGOOptions::IRInstr),
2048 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate, PGOOpt->ProfileFile,
2049 PGOOpt->ProfileRemappingFile, PGOOpt->FS);
2050
2052
2053 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
2055
2057
2058 // Build a minimal pipeline based on the semantics required by LLVM,
2059 // which is just that always inlining occurs. Further, disable generating
2060 // lifetime intrinsics to avoid enabling further optimizations during
2061 // code generation.
2063 /*InsertLifetimeIntrinsics=*/false));
2064
2065 if (PTO.MergeFunctions)
2067
2068 if (EnableMatrix)
2069 MPM.addPass(
2071
2072 if (!CGSCCOptimizerLateEPCallbacks.empty()) {
2073 CGSCCPassManager CGPM;
2075 if (!CGPM.isEmpty())
2077 }
2078 if (!LateLoopOptimizationsEPCallbacks.empty()) {
2079 LoopPassManager LPM;
2081 if (!LPM.isEmpty()) {
2083 createFunctionToLoopPassAdaptor(std::move(LPM))));
2084 }
2085 }
2086 if (!LoopOptimizerEndEPCallbacks.empty()) {
2087 LoopPassManager LPM;
2089 if (!LPM.isEmpty()) {
2091 createFunctionToLoopPassAdaptor(std::move(LPM))));
2092 }
2093 }
2094 if (!ScalarOptimizerLateEPCallbacks.empty()) {
2097 if (!FPM.isEmpty())
2099 }
2100
2102
2103 if (!VectorizerStartEPCallbacks.empty()) {
2106 if (!FPM.isEmpty())
2108 }
2109
2110 ModulePassManager CoroPM;
2111 CoroPM.addPass(CoroEarlyPass());
2112 CGSCCPassManager CGPM;
2113 CGPM.addPass(CoroSplitPass());
2114 CoroPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
2115 CoroPM.addPass(CoroCleanupPass());
2116 CoroPM.addPass(GlobalDCEPass());
2117 MPM.addPass(CoroConditionalWrapper(std::move(CoroPM)));
2118
2120
2121 if (LTOPreLink)
2122 addRequiredLTOPreLinkPasses(MPM);
2123
2125
2126 return MPM;
2127}
2128
2130 AAManager AA;
2131
2132 // The order in which these are registered determines their priority when
2133 // being queried.
2134
2135 // First we register the basic alias analysis that provides the majority of
2136 // per-function local AA logic. This is a stateless, on-demand local set of
2137 // AA techniques.
2139
2140 // Next we query fast, specialized alias analyses that wrap IR-embedded
2141 // information about aliasing.
2144
2145 // Add support for querying global aliasing information when available.
2146 // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
2147 // analysis, all that the `AAManager` can do is query for any *cached*
2148 // results from `GlobalsAA` through a readonly proxy.
2151
2152 // Add target-specific alias analyses.
2153 if (TM)
2155
2156 return AA;
2157}
aarch64 falkor hwpf fix Falkor HW Prefetch Fix Late Phase
AggressiveInstCombiner - Combine expression patterns to form expressions with fewer,...
Provides passes to inlining "always_inline" functions.
This is the interface for LLVM's primary stateless and local alias analysis.
This file provides the interface for LLVM's Call Graph Profile pass.
This header provides classes for managing passes over SCCs of the call graph.
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:693
This file provides the interface for a simple, fast CSE pass.
This file provides a pass which clones the current module and runs the provided pass pipeline on the ...
Super simple passes to force specific function attrs from the commandline into the IR for debugging p...
Provides passes for computing function attributes based on interprocedural analyses.
This file provides the interface for LLVM's Global Value Numbering pass which eliminates fully redund...
This is the interface for a simple mod/ref and alias analysis over globals.
AcceleratorCodeSelection - Identify all functions reachable from a kernel, removing those that are un...
Interfaces for passes which infer implicit function attributes from the name and signature of functio...
This file provides the primary interface to the instcombine pass.
Defines passes for running instruction simplification across chunks of IR.
This file provides the interface for LLVM's PGO Instrumentation lowering pass.
See the comments on JumpThreadingPass.
static LVOptions Options
Definition: LVOptions.cpp:25
This header defines the LoopLoadEliminationPass object.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
The header file for the LowerConstantIntrinsics pass as used by the new pass manager.
The header file for the LowerExpectIntrinsic pass as used by the new pass manager.
This pass performs merges of loads and stores on both sides of a.
This file provides the interface for LLVM's Global Value Numbering pass.
This header enumerates the LLVM-provided high-level optimization levels.
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
Define option tunables for PGO.
ModulePassManager MPM
static cl::opt< bool > EnableMergeFunctions("enable-merge-functions", cl::init(false), cl::Hidden, cl::desc("Enable function merging as part of the optimization pipeline"))
static cl::opt< bool > EnableGlobalAnalyses("enable-global-analyses", cl::init(true), cl::Hidden, cl::desc("Enable inter-procedural analyses"))
static cl::opt< bool > EnableIROutliner("ir-outliner", cl::init(false), cl::Hidden, cl::desc("Enable ir outliner pass"))
static cl::opt< bool > RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden, cl::desc("Run the NewGVN pass"))
static cl::opt< bool > DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden, cl::desc("Disable pre-instrumentation inliner"))
static cl::opt< bool > EnableEagerlyInvalidateAnalyses("eagerly-invalidate-analyses", cl::init(true), cl::Hidden, cl::desc("Eagerly invalidate more analyses in default pipelines"))
static cl::opt< bool > ExtraVectorizerPasses("extra-vectorizer-passes", cl::init(false), cl::Hidden, cl::desc("Run cleanup optimization passes after vectorization"))
static void addAnnotationRemarksPass(ModulePassManager &MPM)
static cl::opt< bool > EnablePostPGOLoopRotation("enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden, cl::desc("Run the loop rotation transformation after PGO instrumentation"))
static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level)
static cl::opt< bool > EnableGVNSink("enable-gvn-sink", cl::desc("Enable the GVN sinking pass (default = off)"))
static cl::opt< bool > PerformMandatoryInliningsFirst("mandatory-inlining-first", cl::init(false), cl::Hidden, cl::desc("Perform mandatory inlinings module-wide, before performing " "inlining"))
static cl::opt< bool > RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Run Partial inlinining pass"))
static cl::opt< bool > EnableGVNHoist("enable-gvn-hoist", cl::desc("Enable the GVN hoisting pass (default = off)"))
static cl::opt< bool > EnableDFAJumpThreading("enable-dfa-jump-thread", cl::desc("Enable DFA jump threading"), cl::init(false), cl::Hidden)
static cl::opt< bool > EnableCHR("enable-chr", cl::init(true), cl::Hidden, cl::desc("Enable control height reduction optimization (CHR)"))
static cl::opt< bool > EnableHotColdSplit("hot-cold-split", cl::desc("Enable hot-cold splitting pass"))
static cl::opt< bool > EnableLoopInterchange("enable-loopinterchange", cl::init(false), cl::Hidden, cl::desc("Enable the experimental LoopInterchange Pass"))
static cl::opt< int > PreInlineThreshold("preinline-threshold", cl::Hidden, cl::init(75), cl::desc("Control the amount of inlining in pre-instrumentation inliner " "(default = 75)"))
static cl::opt< bool > EnableLoopHeaderDuplication("enable-loop-header-duplication", cl::init(false), cl::Hidden, cl::desc("Enable loop header duplication at any optimization level"))
static cl::opt< bool > EnablePGOForceFunctionAttrs("enable-pgo-force-function-attrs", cl::desc("Enable pass to set function attributes based on PGO profiles"), cl::init(false))
static cl::opt< bool > EnableUnrollAndJam("enable-unroll-and-jam", cl::init(false), cl::Hidden, cl::desc("Enable Unroll And Jam Pass"))
static cl::opt< bool > EnableModuleInliner("enable-module-inliner", cl::init(false), cl::Hidden, cl::desc("Enable module inliner"))
static cl::opt< bool > EnableMatrix("enable-matrix", cl::init(false), cl::Hidden, cl::desc("Enable lowering of the matrix intrinsics"))
static cl::opt< AttributorRunOption > AttributorRun("attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE), cl::desc("Enable the attributor inter-procedural deduction pass"), cl::values(clEnumValN(AttributorRunOption::ALL, "all", "enable all attributor runs"), clEnumValN(AttributorRunOption::MODULE, "module", "enable module-wide attributor runs"), clEnumValN(AttributorRunOption::CGSCC, "cgscc", "enable call graph SCC attributor runs"), clEnumValN(AttributorRunOption::NONE, "none", "disable attributor runs")))
static cl::opt< bool > EnableOrderFileInstrumentation("enable-order-file-instrumentation", cl::init(false), cl::Hidden, cl::desc("Enable order file instrumentation (default = off)"))
static cl::opt< bool > UseLoopVersioningLICM("enable-loop-versioning-licm", cl::init(false), cl::Hidden, cl::desc("Enable the experimental Loop Versioning LICM pass"))
static cl::opt< bool > EnableSyntheticCounts("enable-npm-synthetic-counts", cl::Hidden, cl::desc("Run synthetic function entry count generation " "pass"))
static bool isLTOPreLink(ThinOrFullLTOPhase Phase)
static cl::opt< bool > EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true), cl::Hidden, cl::desc("Enable inline deferral during PGO"))
Flag to enable inline deferral during PGO.
static cl::opt< bool > EnableJumpTableToSwitch("enable-jump-table-to-switch", cl::desc("Enable JumpTableToSwitch pass (default = off)"))
static cl::opt< InliningAdvisorMode > UseInlineAdvisor("enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden, cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"), cl::values(clEnumValN(InliningAdvisorMode::Default, "default", "Heuristics-based inliner version"), clEnumValN(InliningAdvisorMode::Development, "development", "Use development mode (runtime-loadable model)"), clEnumValN(InliningAdvisorMode::Release, "release", "Use release mode (AOT-compiled model)")))
static cl::opt< bool > FlattenedProfileUsed("flattened-profile-used", cl::init(false), cl::Hidden, cl::desc("Indicate the sample profile being used is flattened, i.e., " "no inline hierachy exists in the profile"))
static cl::opt< bool > EnableConstraintElimination("enable-constraint-elimination", cl::init(true), cl::Hidden, cl::desc("Enable pass to eliminate conditions based on linear constraints"))
static cl::opt< bool > EnableLoopFlatten("enable-loop-flatten", cl::init(false), cl::Hidden, cl::desc("Enable the LoopFlatten Pass"))
This header defines various interfaces for pass management in LLVM.
This file implements relative lookup table converter that converts lookup tables to relative lookup t...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
This file provides the interface for the pseudo probe implementation for AutoFDO.
This file provides the interface for the sampled PGO loader pass.
This is the interface for a metadata-based scoped no-alias analysis.
This file provides the interface for the pass responsible for both simplifying and canonicalizing the...
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
This is the interface for a metadata-based TBAA.
Defines the virtual file system interface vfs::FileSystem.
A manager for alias analyses.
void registerFunctionAnalysis()
Register a specific AA result.
void registerModuleAnalysis()
Register a specific AA result.
Inlines functions marked as "always_inline".
Definition: AlwaysInliner.h:32
Argument promotion pass.
Analysis pass providing a never-invalidated alias analysis result.
Simple pass that canonicalizes aliases.
A pass that merges duplicate global constants into a single constant.
Definition: ConstantMerge.h:29
This class implements a trivial dead store elimination.
Eliminate dead arguments (and return values) from functions.
A pass that transforms external global definitions into declarations.
Pass embeds a copy of the module optimized with the provided pass pipeline into a global variable.
The core GVN pass object.
Definition: GVN.h:117
Pass to remove unused function declarations.
Definition: GlobalDCE.h:36
Optimize globals that never have their address taken.
Definition: GlobalOpt.h:25
Pass to perform split of global variables.
Definition: GlobalSplit.h:26
Analysis pass providing a never-invalidated alias analysis result.
Pass to outline cold regions.
Pass to perform interprocedural constant propagation.
Definition: SCCP.h:48
Pass to outline similar regions.
Definition: IROutliner.h:444
Run instruction simplification across each instruction in the function.
The instrumentation pass for recording function order.
Instrumentation based profiling lowering pass.
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
This pass performs 'jump threading', which looks at blocks that have multiple predecessors and multip...
Definition: JumpThreading.h:79
Performs Loop Invariant Code Motion Pass.
Definition: LICM.h:66
Loop unroll pass that only does full loop unrolling and peeling.
Performs Loop Idiom Recognize Pass.
Performs Loop Inst Simplify Pass.
A simple loop rotation transformation.
Definition: LoopRotation.h:24
Performs basic CFG simplifications to assist other loop passes.
A pass that does profile-guided sinking of instructions into loops.
Definition: LoopSink.h:33
A simple loop rotation transformation.
Loop unroll pass that will support both full and partial unrolling.
Merge identical functions.
The module inliner pass for the new pass manager.
Definition: ModuleInliner.h:27
Module pass, wrapping the inliner pass.
Definition: Inliner.h:62
void addModulePass(T Pass)
Add a module pass that runs before the CGSCC passes.
Definition: Inliner.h:78
Class to hold module path string table and global value map, and encapsulate methods for operating on...
Simple pass that provides a name to every anonymous globals.
OpenMP optimizations pass.
Definition: OpenMPOpt.h:42
static const OptimizationLevel O3
Optimize for fast execution as much as possible.
static const OptimizationLevel Oz
A very specialized mode that will optimize for code size at any and all costs.
static const OptimizationLevel O0
Disable as many optimizations as possible.
static const OptimizationLevel Os
Similar to O2 but tries to optimize for small code size instead of fast execution without triggering ...
static const OptimizationLevel O2
Optimize for fast execution as much as possible without triggering significant incremental compile ti...
static const OptimizationLevel O1
Optimize quickly without destroying debuggability.
The indirect function call promotion pass.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The profile annotation (profile-instr-use) pass for IR based PGO.
The profile size based optimization pass for memory intrinsics.
Pass to remove unused function declarations.
ModulePassManager buildO0DefaultPipeline(OptimizationLevel Level, bool LTOPreLink=false)
Build an O0 pipeline with the minimal semantically required passes.
void invokeFullLinkTimeOptimizationLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
ModuleInlinerWrapperPass buildInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining as well as the inlining-driven cleanups.
void invokeOptimizerLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
void invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
AAManager buildDefaultAAPipeline()
Build the default AAManager with the default alias analysis pipeline registered.
void invokeCGSCCOptimizerLateEPCallbacks(CGSCCPassManager &CGPM, OptimizationLevel Level)
ModulePassManager buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, ThinLTO-targeting default optimization pipeline to a pass manager.
void invokeScalarOptimizerLateEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level, bool LTOPreLink=false)
Build a per-module default optimization pipeline.
void invokePipelineStartEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
FunctionPassManager buildFunctionSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM function canonicalization and simplification pipeline.
void invokePeepholeEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
void invokeLoopOptimizerEndEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
ModulePassManager buildLTODefaultPipeline(OptimizationLevel Level, ModuleSummaryIndex *ExportSummary)
Build an LTO default optimization pipeline to a pass manager.
ModulePassManager buildModuleInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining with module inliner pass.
ModulePassManager buildThinLTODefaultPipeline(OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary)
Build a ThinLTO default optimization pipeline to a pass manager.
void invokeLateLoopOptimizationsEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
void invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
void invokePipelineEarlySimplificationEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
void invokeFullLinkTimeOptimizationEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
ModulePassManager buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO, bool EmitSummary)
Build a fat object default optimization pipeline.
ModulePassManager buildModuleSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM module canonicalization and simplification pipeline.
ModulePassManager buildModuleOptimizationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase LTOPhase)
Construct the core LLVM module optimization pipeline.
void addPGOInstrPassesForO0(ModulePassManager &MPM, bool RunProfileGen, bool IsCS, bool AtomicCounterUpdate, std::string ProfileFile, std::string ProfileRemappingFile, IntrusiveRefCntPtr< vfs::FileSystem > FS)
Add PGOInstrumenation passes for O0 only.
ModulePassManager buildLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, LTO-targeting default optimization pipeline to a pass manager.
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t< is_detected< HasRunOnLoopT, PassT >::value > addPass(PassT &&Pass)
LLVM_ATTRIBUTE_MINSIZE void addPass(PassT &&Pass)
Definition: PassManager.h:249
bool isEmpty() const
Returns if the pass manager contains any passes.
Definition: PassManager.h:269
unsigned LicmMssaNoAccForPromotionCap
Tuning option to disable promotion to scalars in LICM with MemorySSA, if the number of access is too ...
Definition: PassBuilder.h:72
bool SLPVectorization
Tuning option to enable/disable slp loop vectorization, set based on opt level.
Definition: PassBuilder.h:57
int InlinerThreshold
Tuning option to override the default inliner threshold.
Definition: PassBuilder.h:86
bool CallGraphProfile
Tuning option to enable/disable call graph profile.
Definition: PassBuilder.h:76
bool MergeFunctions
Tuning option to enable/disable function merging.
Definition: PassBuilder.h:83
bool ForgetAllSCEVInLoopUnroll
Tuning option to forget all SCEV loops in LoopUnroll.
Definition: PassBuilder.h:64
unsigned LicmMssaOptCap
Tuning option to cap the number of calls to retrive clobbering accesses in MemorySSA,...
Definition: PassBuilder.h:68
bool LoopInterleaving
Tuning option to set loop interleaving on/off, set based on opt level.
Definition: PassBuilder.h:49
PipelineTuningOptions()
Constructor sets pipeline tuning defaults based on cl::opts.
bool LoopUnrolling
Tuning option to enable/disable loop unrolling. Its default value is true.
Definition: PassBuilder.h:60
bool LoopVectorization
Tuning option to enable/disable loop vectorization, set based on opt level.
Definition: PassBuilder.h:53
Reassociate commutative expressions.
Definition: Reassociate.h:71
A pass to do RPO deduction and propagation of function attributes.
Definition: FunctionAttrs.h:73
This pass performs function-level constant propagation and merging.
Definition: SCCP.h:29
The sample profiler data loader pass.
Definition: SampleProfile.h:39
Analysis pass providing a never-invalidated alias analysis result.
This pass transforms loops that contain branches or switches on loop- invariant conditions to have mu...
A pass to simplify and canonicalize the CFG of a function.
Definition: SimplifyCFG.h:29
virtual void registerDefaultAliasAnalyses(AAManager &)
Allow the target to register alias analyses with the AAManager for use with the new pass manager.
Analysis pass providing a never-invalidated alias analysis result.
Optimize scalar/vector interactions in IR using target cost models.
Definition: VectorCombine.h:23
Interfaces for registering analysis passes, producing common pass manager configurations,...
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:718
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
cl::opt< bool > EnableKnowledgeRetention
ModuleToFunctionPassAdaptor createModuleToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
Definition: PassManager.h:916
@ MODULE
Definition: Attributor.h:6427
@ CGSCC
Definition: Attributor.h:6428
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition: Pass.h:76
@ FullLTOPreLink
Full LTO prelink phase.
@ ThinLTOPostLink
ThinLTO postlink (backend compile) phase.
@ None
No LTO/ThinLTO behavior needed.
@ FullLTOPostLink
Full LTO postlink (backend compile) phase.
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
ModuleToPostOrderCGSCCPassAdaptor createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT &&Pass)
A function to deduce a function pass type and wrap it in the templated adaptor.
CGSCCToFunctionPassAdaptor createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false, bool NoRerun=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
cl::opt< bool > ForgetSCEVInLoopUnroll
bool AreStatisticsEnabled()
Check if statistics are enabled.
Definition: Statistic.cpp:139
cl::opt< bool > EnableInferAlignmentPass
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
cl::opt< unsigned > SetLicmMssaNoAccForPromotionCap
std::enable_if_t< is_detected< HasRunOnLoopT, LoopPassT >::value, FunctionToLoopPassAdaptor > createFunctionToLoopPassAdaptor(LoopPassT &&Pass, bool UseMemorySSA=false, bool UseBlockFrequencyInfo=false, bool UseBranchProbabilityInfo=false)
A function to deduce a loop pass type and wrap it in the templated adaptor.
cl::opt< unsigned > MaxDevirtIterations("max-devirt-iterations", cl::ReallyHidden, cl::init(4))
cl::opt< unsigned > SetLicmMssaOptCap
A DCE pass that assumes instructions are dead until proven otherwise.
Definition: ADCE.h:31
Pass to convert @llvm.global.annotations to !annotation metadata.
This pass attempts to minimize the number of assume without loosing any information.
Hoist/decompose integer division and remainder instructions to enable CFG improvements and better cod...
Definition: DivRemPairs.h:23
A simple and fast domtree-based CSE pass.
Definition: EarlyCSE.h:30
A pass manager to run a set of extra function simplification passes after vectorization,...
Pass which forces specific function attributes into the IR, primarily as a debugging tool.
A simple and fast domtree-based GVN pass to hoist common expressions from sibling branches.
Definition: GVN.h:392
Uses an "inverted" value numbering to decide the similarity of expressions and sinks similar expressi...
Definition: GVN.h:399
A set of parameters to control various transforms performed by IPSCCP pass.
Definition: SCCP.h:35
A pass which infers function attributes from the names and signatures of function declarations in a m...
Provides context on when an inline advisor is constructed in the pipeline (e.g., link phase,...
Definition: InlineAdvisor.h:59
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:206
std::optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
Definition: InlineCost.h:223
int DefaultThreshold
The default threshold to start with for a callee.
Definition: InlineCost.h:208
std::optional< bool > EnableDeferral
Indicate whether we should allow inline deferral.
Definition: InlineCost.h:236
std::optional< int > HintThreshold
Threshold to use for callees with inline hint.
Definition: InlineCost.h:211
Options for the frontend instrumentation based profiling pass.
A no-op pass template which simply forces a specific analysis result to be invalidated.
Definition: PassManager.h:969
Pass to forward loads in a loop around the backedge to subsequent iterations.
A set of parameters used to control various transforms performed by the LoopUnroll pass.
The LoopVectorize Pass.
Computes function attributes in post-order over the call graph.
Definition: FunctionAttrs.h:49
A utility pass template to force an analysis result to be available.
Definition: PassManager.h:942