LLVM 20.0.0git
PassBuilderPipelines.cpp
Go to the documentation of this file.
1//===- Construction of pass pipelines -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// This file provides the implementation of the PassBuilder based on our
11/// static pass registry as well as related functionality. It also provides
12/// helpers to aid in analyzing, debugging, and testing passes and pass
13/// pipelines.
14///
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/Statistic.h"
28#include "llvm/IR/PassManager.h"
29#include "llvm/Pass.h"
148
149using namespace llvm;
150
152 "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
153 cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
154 cl::values(clEnumValN(InliningAdvisorMode::Default, "default",
155 "Heuristics-based inliner version"),
156 clEnumValN(InliningAdvisorMode::Development, "development",
157 "Use development mode (runtime-loadable model)"),
158 clEnumValN(InliningAdvisorMode::Release, "release",
159 "Use release mode (AOT-compiled model)")));
160
161/// Flag to enable inline deferral during PGO.
162static cl::opt<bool>
163 EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
165 cl::desc("Enable inline deferral during PGO"));
166
167static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
168 cl::init(false), cl::Hidden,
169 cl::desc("Enable module inliner"));
170
172 "mandatory-inlining-first", cl::init(false), cl::Hidden,
173 cl::desc("Perform mandatory inlinings module-wide, before performing "
174 "inlining"));
175
177 "eagerly-invalidate-analyses", cl::init(true), cl::Hidden,
178 cl::desc("Eagerly invalidate more analyses in default pipelines"));
179
181 "enable-merge-functions", cl::init(false), cl::Hidden,
182 cl::desc("Enable function merging as part of the optimization pipeline"));
183
185 "enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden,
186 cl::desc("Run the loop rotation transformation after PGO instrumentation"));
187
189 "enable-global-analyses", cl::init(true), cl::Hidden,
190 cl::desc("Enable inter-procedural analyses"));
191
192static cl::opt<bool>
193 RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden,
194 cl::desc("Run Partial inlinining pass"));
195
197 "extra-vectorizer-passes", cl::init(false), cl::Hidden,
198 cl::desc("Run cleanup optimization passes after vectorization"));
199
200static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
201 cl::desc("Run the NewGVN pass"));
202
204 "enable-loopinterchange", cl::init(false), cl::Hidden,
205 cl::desc("Enable the experimental LoopInterchange Pass"));
206
207static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam",
208 cl::init(false), cl::Hidden,
209 cl::desc("Enable Unroll And Jam Pass"));
210
211static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
213 cl::desc("Enable the LoopFlatten Pass"));
214
215// Experimentally allow loop header duplication. This should allow for better
216// optimization at Oz, since loop-idiom recognition can then recognize things
217// like memcpy. If this ends up being useful for many targets, we should drop
218// this flag and make a code generation option that can be controlled
219// independent of the opt level and exposed through the frontend.
221 "enable-loop-header-duplication", cl::init(false), cl::Hidden,
222 cl::desc("Enable loop header duplication at any optimization level"));
223
224static cl::opt<bool>
225 EnableDFAJumpThreading("enable-dfa-jump-thread",
226 cl::desc("Enable DFA jump threading"),
227 cl::init(false), cl::Hidden);
228
229static cl::opt<bool>
230 EnableHotColdSplit("hot-cold-split",
231 cl::desc("Enable hot-cold splitting pass"));
232
233static cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(false),
235 cl::desc("Enable ir outliner pass"));
236
237static cl::opt<bool>
238 DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden,
239 cl::desc("Disable pre-instrumentation inliner"));
240
242 "preinline-threshold", cl::Hidden, cl::init(75),
243 cl::desc("Control the amount of inlining in pre-instrumentation inliner "
244 "(default = 75)"));
245
246static cl::opt<bool>
247 EnableGVNHoist("enable-gvn-hoist",
248 cl::desc("Enable the GVN hoisting pass (default = off)"));
249
250static cl::opt<bool>
251 EnableGVNSink("enable-gvn-sink",
252 cl::desc("Enable the GVN sinking pass (default = off)"));
253
255 "enable-jump-table-to-switch",
256 cl::desc("Enable JumpTableToSwitch pass (default = off)"));
257
258// This option is used in simplifying testing SampleFDO optimizations for
259// profile loading.
260static cl::opt<bool>
261 EnableCHR("enable-chr", cl::init(true), cl::Hidden,
262 cl::desc("Enable control height reduction optimization (CHR)"));
263
265 "flattened-profile-used", cl::init(false), cl::Hidden,
266 cl::desc("Indicate the sample profile being used is flattened, i.e., "
267 "no inline hierachy exists in the profile"));
268
270 "enable-order-file-instrumentation", cl::init(false), cl::Hidden,
271 cl::desc("Enable order file instrumentation (default = off)"));
272
273static cl::opt<bool>
274 EnableMatrix("enable-matrix", cl::init(false), cl::Hidden,
275 cl::desc("Enable lowering of the matrix intrinsics"));
276
278 "enable-constraint-elimination", cl::init(true), cl::Hidden,
279 cl::desc(
280 "Enable pass to eliminate conditions based on linear constraints"));
281
283 "attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE),
284 cl::desc("Enable the attributor inter-procedural deduction pass"),
285 cl::values(clEnumValN(AttributorRunOption::ALL, "all",
286 "enable all attributor runs"),
287 clEnumValN(AttributorRunOption::MODULE, "module",
288 "enable module-wide attributor runs"),
289 clEnumValN(AttributorRunOption::CGSCC, "cgscc",
290 "enable call graph SCC attributor runs"),
291 clEnumValN(AttributorRunOption::NONE, "none",
292 "disable attributor runs")));
293
295 "enable-sampled-instrumentation", cl::init(false), cl::Hidden,
296 cl::desc("Enable profile instrumentation sampling (default = off)"));
298 "enable-loop-versioning-licm", cl::init(false), cl::Hidden,
299 cl::desc("Enable the experimental Loop Versioning LICM pass"));
300
302 "instrument-cold-function-only-path", cl::init(""),
303 cl::desc("File path for cold function only instrumentation(requires use "
304 "with --pgo-instrument-cold-function-only)"),
305 cl::Hidden);
306
309
310namespace llvm {
312} // namespace llvm
313
315 LoopInterleaving = true;
316 LoopVectorization = true;
317 SLPVectorization = false;
318 LoopUnrolling = true;
322 CallGraphProfile = true;
323 UnifiedLTO = false;
325 InlinerThreshold = -1;
327}
328
329namespace llvm {
331} // namespace llvm
332
334 OptimizationLevel Level) {
335 for (auto &C : PeepholeEPCallbacks)
336 C(FPM, Level);
337}
340 for (auto &C : LateLoopOptimizationsEPCallbacks)
341 C(LPM, Level);
342}
344 OptimizationLevel Level) {
345 for (auto &C : LoopOptimizerEndEPCallbacks)
346 C(LPM, Level);
347}
350 for (auto &C : ScalarOptimizerLateEPCallbacks)
351 C(FPM, Level);
352}
354 OptimizationLevel Level) {
355 for (auto &C : CGSCCOptimizerLateEPCallbacks)
356 C(CGPM, Level);
357}
359 OptimizationLevel Level) {
360 for (auto &C : VectorizerStartEPCallbacks)
361 C(FPM, Level);
362}
364 OptimizationLevel Level,
366 for (auto &C : OptimizerEarlyEPCallbacks)
367 C(MPM, Level, Phase);
368}
370 OptimizationLevel Level,
372 for (auto &C : OptimizerLastEPCallbacks)
373 C(MPM, Level, Phase);
374}
377 for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks)
378 C(MPM, Level);
379}
382 for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
383 C(MPM, Level);
384}
386 OptimizationLevel Level) {
387 for (auto &C : PipelineStartEPCallbacks)
388 C(MPM, Level);
389}
392 for (auto &C : PipelineEarlySimplificationEPCallbacks)
393 C(MPM, Level, Phase);
394}
395
396// Helper to add AnnotationRemarksPass.
399}
400
401// Helper to check if the current compilation phase is preparing for LTO
405}
406
407// TODO: Investigate the cost/benefit of tail call elimination on debugging.
409PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
411
413
416
417 // Form SSA out of local memory accesses after breaking apart aggregates into
418 // scalars.
420
421 // Catch trivial redundancies
422 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
423
424 // Hoisting of scalars and load expressions.
425 FPM.addPass(
426 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
428
430
431 invokePeepholeEPCallbacks(FPM, Level);
432
433 FPM.addPass(
434 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
435
436 // Form canonically associated expression trees, and simplify the trees using
437 // basic mathematical properties. For example, this will form (nearly)
438 // minimal multiplication trees.
440
441 // Add the primary loop simplification pipeline.
442 // FIXME: Currently this is split into two loop pass pipelines because we run
443 // some function passes in between them. These can and should be removed
444 // and/or replaced by scheduling the loop pass equivalents in the correct
445 // positions. But those equivalent passes aren't powerful enough yet.
446 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
447 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
448 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
449 // `LoopInstSimplify`.
450 LoopPassManager LPM1, LPM2;
451
452 // Simplify the loop body. We do this initially to clean up after other loop
453 // passes run, either when iterating on a loop or on inner loops with
454 // implications on the outer loop.
457
458 // Try to remove as much code from the loop header as possible,
459 // to reduce amount of IR that will have to be duplicated. However,
460 // do not perform speculative hoisting the first time as LICM
461 // will destroy metadata that may not need to be destroyed if run
462 // after loop rotation.
463 // TODO: Investigate promotion cap for O1.
465 /*AllowSpeculation=*/false));
466
467 LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true,
469 // TODO: Investigate promotion cap for O1.
471 /*AllowSpeculation=*/true));
474 LPM1.addPass(LoopFlattenPass());
475
478
480
482
485
486 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
487 // because it changes IR to makes profile annotation in back compile
488 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
489 // attributes so we need to make sure and allow the full unroll pass to pay
490 // attention to it.
491 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
492 PGOOpt->Action != PGOOptions::SampleUse)
493 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
494 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
496
498
499 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
500 /*UseMemorySSA=*/true,
501 /*UseBlockFrequencyInfo=*/true));
502 FPM.addPass(
503 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
505 // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
506 // *All* loop passes must preserve it, in order to be able to use it.
507 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
508 /*UseMemorySSA=*/false,
509 /*UseBlockFrequencyInfo=*/false));
510
511 // Delete small array after loop unroll.
513
514 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
515 FPM.addPass(MemCpyOptPass());
516
517 // Sparse conditional constant propagation.
518 // FIXME: It isn't clear why we do this *after* loop passes rather than
519 // before...
520 FPM.addPass(SCCPPass());
521
522 // Delete dead bit computations (instcombine runs after to fold away the dead
523 // computations, and then ADCE will run later to exploit any new DCE
524 // opportunities that creates).
525 FPM.addPass(BDCEPass());
526
527 // Run instcombine after redundancy and dead bit elimination to exploit
528 // opportunities opened up by them.
530 invokePeepholeEPCallbacks(FPM, Level);
531
532 FPM.addPass(CoroElidePass());
533
535
536 // Finally, do an expensive DCE pass to catch all the dead code exposed by
537 // the simplifications and basic cleanup after all the simplifications.
538 // TODO: Investigate if this is too expensive.
539 FPM.addPass(ADCEPass());
540 FPM.addPass(
541 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
543 invokePeepholeEPCallbacks(FPM, Level);
544
545 return FPM;
546}
547
551 assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
552
553 // The O1 pipeline has a separate pipeline creation function to simplify
554 // construction readability.
555 if (Level.getSpeedupLevel() == 1)
556 return buildO1FunctionSimplificationPipeline(Level, Phase);
557
559
562
563 // Form SSA out of local memory accesses after breaking apart aggregates into
564 // scalars.
566
567 // Catch trivial redundancies
568 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
571
572 // Hoisting of scalars and load expressions.
573 if (EnableGVNHoist)
574 FPM.addPass(GVNHoistPass());
575
576 // Global value numbering based sinking.
577 if (EnableGVNSink) {
578 FPM.addPass(GVNSinkPass());
579 FPM.addPass(
580 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
581 }
582
583 // Speculative execution if the target has divergent branches; otherwise nop.
584 FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
585
586 // Optimize based on known information about branches, and cleanup afterward.
589
590 // Jump table to switch conversion.
593
594 FPM.addPass(
595 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
598
599 if (!Level.isOptimizingForSize())
601
602 invokePeepholeEPCallbacks(FPM, Level);
603
604 // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
605 // using the size value profile. Don't perform this when optimizing for size.
606 if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
607 !Level.isOptimizingForSize())
609
611 FPM.addPass(
612 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
613
614 // Form canonically associated expression trees, and simplify the trees using
615 // basic mathematical properties. For example, this will form (nearly)
616 // minimal multiplication trees.
618
621
622 // Add the primary loop simplification pipeline.
623 // FIXME: Currently this is split into two loop pass pipelines because we run
624 // some function passes in between them. These can and should be removed
625 // and/or replaced by scheduling the loop pass equivalents in the correct
626 // positions. But those equivalent passes aren't powerful enough yet.
627 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
628 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
629 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
630 // `LoopInstSimplify`.
631 LoopPassManager LPM1, LPM2;
632
633 // Simplify the loop body. We do this initially to clean up after other loop
634 // passes run, either when iterating on a loop or on inner loops with
635 // implications on the outer loop.
638
639 // Try to remove as much code from the loop header as possible,
640 // to reduce amount of IR that will have to be duplicated. However,
641 // do not perform speculative hoisting the first time as LICM
642 // will destroy metadata that may not need to be destroyed if run
643 // after loop rotation.
644 // TODO: Investigate promotion cap for O1.
646 /*AllowSpeculation=*/false));
647
648 // Disable header duplication in loop rotation at -Oz.
650 Level != OptimizationLevel::Oz,
652 // TODO: Investigate promotion cap for O1.
654 /*AllowSpeculation=*/true));
655 LPM1.addPass(
656 SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3));
658 LPM1.addPass(LoopFlattenPass());
659
662
663 {
665 ExtraPasses.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
667 LPM2.addPass(std::move(ExtraPasses));
668 }
669
671
673
676
677 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
678 // because it changes IR to makes profile annotation in back compile
679 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
680 // attributes so we need to make sure and allow the full unroll pass to pay
681 // attention to it.
682 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
683 PGOOpt->Action != PGOOptions::SampleUse)
684 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
685 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
687
689
690 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
691 /*UseMemorySSA=*/true,
692 /*UseBlockFrequencyInfo=*/true));
693 FPM.addPass(
694 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
696 // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
697 // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
698 // *All* loop passes must preserve it, in order to be able to use it.
699 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
700 /*UseMemorySSA=*/false,
701 /*UseBlockFrequencyInfo=*/false));
702
703 // Delete small array after loop unroll.
705
706 // Try vectorization/scalarization transforms that are both improvements
707 // themselves and can allow further folds with GVN and InstCombine.
708 FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
709
710 // Eliminate redundancies.
712 if (RunNewGVN)
713 FPM.addPass(NewGVNPass());
714 else
715 FPM.addPass(GVNPass());
716
717 // Sparse conditional constant propagation.
718 // FIXME: It isn't clear why we do this *after* loop passes rather than
719 // before...
720 FPM.addPass(SCCPPass());
721
722 // Delete dead bit computations (instcombine runs after to fold away the dead
723 // computations, and then ADCE will run later to exploit any new DCE
724 // opportunities that creates).
725 FPM.addPass(BDCEPass());
726
727 // Run instcombine after redundancy and dead bit elimination to exploit
728 // opportunities opened up by them.
730 invokePeepholeEPCallbacks(FPM, Level);
731
732 // Re-consider control flow based optimizations after redundancy elimination,
733 // redo DCE, etc.
736
739
740 // Finally, do an expensive DCE pass to catch all the dead code exposed by
741 // the simplifications and basic cleanup after all the simplifications.
742 // TODO: Investigate if this is too expensive.
743 FPM.addPass(ADCEPass());
744
745 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
746 FPM.addPass(MemCpyOptPass());
747
748 FPM.addPass(DSEPass());
750
753 /*AllowSpeculation=*/true),
754 /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
755
756 FPM.addPass(CoroElidePass());
757
759
761 .convertSwitchRangeToICmp(true)
762 .hoistCommonInsts(true)
763 .sinkCommonInsts(true)));
765 invokePeepholeEPCallbacks(FPM, Level);
766
767 return FPM;
768}
769
770void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
773}
774
775void PassBuilder::addPreInlinerPasses(ModulePassManager &MPM,
776 OptimizationLevel Level,
777 ThinOrFullLTOPhase LTOPhase) {
778 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
780 return;
781 InlineParams IP;
782
784
785 // FIXME: The hint threshold has the same value used by the regular inliner
786 // when not optimzing for size. This should probably be lowered after
787 // performance testing.
788 // FIXME: this comment is cargo culted from the old pass manager, revisit).
789 IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
791 IP, /* MandatoryFirst */ true,
793 CGSCCPassManager &CGPipeline = MIWP.getPM();
794
797 FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
798 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
799 true))); // Merge & remove basic blocks.
800 FPM.addPass(InstCombinePass()); // Combine silly sequences.
801 invokePeepholeEPCallbacks(FPM, Level);
802
803 CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
804 std::move(FPM), PTO.EagerlyInvalidateAnalyses));
805
806 MPM.addPass(std::move(MIWP));
807
808 // Delete anything that is now dead to make sure that we don't instrument
809 // dead code. Instrumentation can end up keeping dead code around and
810 // dramatically increase code size.
811 MPM.addPass(GlobalDCEPass());
812}
813
814void PassBuilder::addPostPGOLoopRotation(ModulePassManager &MPM,
815 OptimizationLevel Level) {
817 // Disable header duplication in loop rotation at -Oz.
821 Level != OptimizationLevel::Oz),
822 /*UseMemorySSA=*/false,
823 /*UseBlockFrequencyInfo=*/false),
825 }
826}
827
828void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
829 OptimizationLevel Level, bool RunProfileGen,
830 bool IsCS, bool AtomicCounterUpdate,
831 std::string ProfileFile,
832 std::string ProfileRemappingFile,
834 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
835
836 if (!RunProfileGen) {
837 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
838 MPM.addPass(
839 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
840 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
841 // RequireAnalysisPass for PSI before subsequent non-module passes.
843 return;
844 }
845
846 // Perform PGO instrumentation.
849
850 addPostPGOLoopRotation(MPM, Level);
851 // Add the profile lowering pass.
853 if (!ProfileFile.empty())
854 Options.InstrProfileOutput = ProfileFile;
855 // Do counter promotion at Level greater than O0.
856 Options.DoCounterPromotion = true;
857 Options.UseBFIInPromotion = IsCS;
858 if (EnableSampledInstr) {
859 Options.Sampling = true;
860 // With sampling, there is little beneifit to enable counter promotion.
861 // But note that sampling does work with counter promotion.
862 Options.DoCounterPromotion = false;
863 }
864 Options.Atomic = AtomicCounterUpdate;
866}
867
869 ModulePassManager &MPM, bool RunProfileGen, bool IsCS,
870 bool AtomicCounterUpdate, std::string ProfileFile,
871 std::string ProfileRemappingFile, IntrusiveRefCntPtr<vfs::FileSystem> FS) {
872 if (!RunProfileGen) {
873 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
874 MPM.addPass(
875 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
876 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
877 // RequireAnalysisPass for PSI before subsequent non-module passes.
879 return;
880 }
881
882 // Perform PGO instrumentation.
885 // Add the profile lowering pass.
887 if (!ProfileFile.empty())
888 Options.InstrProfileOutput = ProfileFile;
889 // Do not do counter promotion at O0.
890 Options.DoCounterPromotion = false;
891 Options.UseBFIInPromotion = IsCS;
892 Options.Atomic = AtomicCounterUpdate;
894}
895
897 return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
898}
899
903 InlineParams IP;
904 if (PTO.InlinerThreshold == -1)
905 IP = getInlineParamsFromOptLevel(Level);
906 else
908 // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to
909 // disable hot callsite inline (as much as possible [1]) because it makes
910 // profile annotation in the backend inaccurate.
911 //
912 // [1] Note the cost of a function could be below zero due to erased
913 // prologue / epilogue.
914 if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
915 PGOOpt->Action == PGOOptions::SampleUse)
917
918 if (PGOOpt)
920
924
925 // Require the GlobalsAA analysis for the module so we can query it within
926 // the CGSCC pipeline.
929 // Invalidate AAManager so it can be recreated and pick up the newly
930 // available GlobalsAA.
931 MIWP.addModulePass(
933 }
934
935 // Require the ProfileSummaryAnalysis for the module so we can query it within
936 // the inliner pass.
938
939 // Now begin the main postorder CGSCC pipeline.
940 // FIXME: The current CGSCC pipeline has its origins in the legacy pass
941 // manager and trying to emulate its precise behavior. Much of this doesn't
942 // make a lot of sense and we should revisit the core CGSCC structure.
943 CGSCCPassManager &MainCGPipeline = MIWP.getPM();
944
945 // Note: historically, the PruneEH pass was run first to deduce nounwind and
946 // generally clean up exception handling overhead. It isn't clear this is
947 // valuable as the inliner doesn't currently care whether it is inlining an
948 // invoke or a call.
949
951 MainCGPipeline.addPass(AttributorCGSCCPass());
952
953 // Deduce function attributes. We do another run of this after the function
954 // simplification pipeline, so this only needs to run when it could affect the
955 // function simplification pipeline, which is only the case with recursive
956 // functions.
957 MainCGPipeline.addPass(PostOrderFunctionAttrsPass(/*SkipNonRecursive*/ true));
958
959 // When at O3 add argument promotion to the pass pipeline.
960 // FIXME: It isn't at all clear why this should be limited to O3.
961 if (Level == OptimizationLevel::O3)
962 MainCGPipeline.addPass(ArgumentPromotionPass());
963
964 // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
965 // there are no OpenMP runtime calls present in the module.
966 if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
967 MainCGPipeline.addPass(OpenMPOptCGSCCPass());
968
969 invokeCGSCCOptimizerLateEPCallbacks(MainCGPipeline, Level);
970
971 // Add the core function simplification pipeline nested inside the
972 // CGSCC walk.
975 PTO.EagerlyInvalidateAnalyses, /*NoRerun=*/true));
976
977 // Finally, deduce any function attributes based on the fully simplified
978 // function.
979 MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
980
981 // Mark that the function is fully simplified and that it shouldn't be
982 // simplified again if we somehow revisit it due to CGSCC mutations unless
983 // it's been modified since.
986
988 MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
989 MainCGPipeline.addPass(CoroAnnotationElidePass());
990 }
991
992 // Make sure we don't affect potential future NoRerun CGSCC adaptors.
993 MIWP.addLateModulePass(createModuleToFunctionPassAdaptor(
995
996 return MIWP;
997}
998
1003
1005 // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to
1006 // disable hot callsite inline (as much as possible [1]) because it makes
1007 // profile annotation in the backend inaccurate.
1008 //
1009 // [1] Note the cost of a function could be below zero due to erased
1010 // prologue / epilogue.
1011 if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
1012 PGOOpt->Action == PGOOptions::SampleUse)
1013 IP.HotCallSiteThreshold = 0;
1014
1015 if (PGOOpt)
1017
1018 // The inline deferral logic is used to avoid losing some
1019 // inlining chance in future. It is helpful in SCC inliner, in which
1020 // inlining is processed in bottom-up order.
1021 // While in module inliner, the inlining order is a priority-based order
1022 // by default. The inline deferral is unnecessary there. So we disable the
1023 // inline deferral logic in module inliner.
1024 IP.EnableDeferral = false;
1025
1028 MPM.addPass(GlobalOptPass());
1029 MPM.addPass(GlobalDCEPass());
1031 }
1032
1036
1040 MPM.addPass(
1042 }
1043
1044 return MPM;
1045}
1046
1050 assert(Level != OptimizationLevel::O0 &&
1051 "Should not be used for O0 pipeline");
1052
1054 "FullLTOPostLink shouldn't call buildModuleSimplificationPipeline!");
1055
1057
1058 // Place pseudo probe instrumentation as the first pass of the pipeline to
1059 // minimize the impact of optimization changes.
1060 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1063
1064 bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
1065
1066 // In ThinLTO mode, when flattened profile is used, all the available
1067 // profile information will be annotated in PreLink phase so there is
1068 // no need to load the profile again in PostLink.
1069 bool LoadSampleProfile =
1070 HasSampleProfile &&
1072
1073 // During the ThinLTO backend phase we perform early indirect call promotion
1074 // here, before globalopt. Otherwise imported available_externally functions
1075 // look unreferenced and are removed. If we are going to load the sample
1076 // profile then defer until later.
1077 // TODO: See if we can move later and consolidate with the location where
1078 // we perform ICP when we are loading a sample profile.
1079 // TODO: We pass HasSampleProfile (whether there was a sample profile file
1080 // passed to the compile) to the SamplePGO flag of ICP. This is used to
1081 // determine whether the new direct calls are annotated with prof metadata.
1082 // Ideally this should be determined from whether the IR is annotated with
1083 // sample profile, and not whether the a sample profile was provided on the
1084 // command line. E.g. for flattened profiles where we will not be reloading
1085 // the sample profile in the ThinLTO backend, we ideally shouldn't have to
1086 // provide the sample profile file.
1087 if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
1088 MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
1089
1090 // Create an early function pass manager to cleanup the output of the
1091 // frontend. Not necessary with LTO post link pipelines since the pre link
1092 // pipeline already cleaned up the frontend output.
1094 // Do basic inference of function attributes from known properties of system
1095 // libraries and other oracles.
1097 MPM.addPass(CoroEarlyPass());
1098
1099 FunctionPassManager EarlyFPM;
1100 EarlyFPM.addPass(EntryExitInstrumenterPass(/*PostInlining=*/false));
1101 // Lower llvm.expect to metadata before attempting transforms.
1102 // Compare/branch metadata may alter the behavior of passes like
1103 // SimplifyCFG.
1105 EarlyFPM.addPass(SimplifyCFGPass());
1107 EarlyFPM.addPass(EarlyCSEPass());
1108 if (Level == OptimizationLevel::O3)
1109 EarlyFPM.addPass(CallSiteSplittingPass());
1111 std::move(EarlyFPM), PTO.EagerlyInvalidateAnalyses));
1112 }
1113
1114 if (LoadSampleProfile) {
1115 // Annotate sample profile right after early FPM to ensure freshness of
1116 // the debug info.
1117 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1118 PGOOpt->ProfileRemappingFile, Phase));
1119 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1120 // RequireAnalysisPass for PSI before subsequent non-module passes.
1122 // Do not invoke ICP in the LTOPrelink phase as it makes it hard
1123 // for the profile annotation to be accurate in the LTO backend.
1124 if (!isLTOPreLink(Phase))
1125 // We perform early indirect call promotion here, before globalopt.
1126 // This is important for the ThinLTO backend phase because otherwise
1127 // imported available_externally functions look unreferenced and are
1128 // removed.
1129 MPM.addPass(
1130 PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
1131 }
1132
1133 // Try to perform OpenMP specific optimizations on the module. This is a
1134 // (quick!) no-op if there are no OpenMP runtime calls present in the module.
1135 MPM.addPass(OpenMPOptPass());
1136
1138 MPM.addPass(AttributorPass());
1139
1140 // Lower type metadata and the type.test intrinsic in the ThinLTO
1141 // post link pipeline after ICP. This is to enable usage of the type
1142 // tests in ICP sequences.
1144 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1146
1148
1149 // Interprocedural constant propagation now that basic cleanup has occurred
1150 // and prior to optimizing globals.
1151 // FIXME: This position in the pipeline hasn't been carefully considered in
1152 // years, it should be re-analyzed.
1153 MPM.addPass(IPSCCPPass(
1154 IPSCCPOptions(/*AllowFuncSpec=*/
1155 Level != OptimizationLevel::Os &&
1156 Level != OptimizationLevel::Oz &&
1157 !isLTOPreLink(Phase))));
1158
1159 // Attach metadata to indirect call sites indicating the set of functions
1160 // they may target at run-time. This should follow IPSCCP.
1162
1163 // Optimize globals to try and fold them into constants.
1164 MPM.addPass(GlobalOptPass());
1165
1166 // Create a small function pass pipeline to cleanup after all the global
1167 // optimizations.
1168 FunctionPassManager GlobalCleanupPM;
1169 // FIXME: Should this instead by a run of SROA?
1170 GlobalCleanupPM.addPass(PromotePass());
1171 GlobalCleanupPM.addPass(InstCombinePass());
1172 invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
1173 GlobalCleanupPM.addPass(
1174 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1175 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM),
1177
1178 // We already asserted this happens in non-FullLTOPostLink earlier.
1179 const bool IsPreLink = Phase != ThinOrFullLTOPhase::ThinLTOPostLink;
1180 const bool IsPGOPreLink = PGOOpt && IsPreLink;
1181 const bool IsPGOInstrGen =
1182 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRInstr;
1183 const bool IsPGOInstrUse =
1184 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRUse;
1185 const bool IsMemprofUse = IsPGOPreLink && !PGOOpt->MemoryProfile.empty();
1186 // We don't want to mix pgo ctx gen and pgo gen; we also don't currently
1187 // enable ctx profiling from the frontend.
1189 "Enabling both instrumented PGO and contextual instrumentation is not "
1190 "supported.");
1191 // Enable contextual profiling instrumentation.
1192 const bool IsCtxProfGen = !IsPGOInstrGen && IsPreLink &&
1194 const bool IsCtxProfUse =
1196
1197 assert(
1199 "--instrument-cold-function-only-path is provided but "
1200 "--pgo-instrument-cold-function-only is not enabled");
1201 const bool IsColdFuncOnlyInstrGen = PGOInstrumentColdFunctionOnly &&
1202 IsPGOPreLink &&
1204
1205 if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen ||
1206 IsCtxProfUse || IsColdFuncOnlyInstrGen)
1207 addPreInlinerPasses(MPM, Level, Phase);
1208
1209 // Add all the requested passes for instrumentation PGO, if requested.
1210 if (IsPGOInstrGen || IsPGOInstrUse) {
1211 addPGOInstrPasses(MPM, Level,
1212 /*RunProfileGen=*/IsPGOInstrGen,
1213 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate,
1214 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
1215 PGOOpt->FS);
1216 } else if (IsCtxProfGen || IsCtxProfUse) {
1218 // In pre-link, we just want the instrumented IR. We use the contextual
1219 // profile in the post-thinlink phase.
1220 // The instrumentation will be removed in post-thinlink after IPO.
1221 // FIXME(mtrofin): move AssignGUIDPass if there is agreement to use this
1222 // mechanism for GUIDs.
1223 MPM.addPass(AssignGUIDPass());
1224 if (IsCtxProfUse)
1225 return MPM;
1226 addPostPGOLoopRotation(MPM, Level);
1228 } else if (IsColdFuncOnlyInstrGen) {
1229 addPGOInstrPasses(
1230 MPM, Level, /* RunProfileGen */ true, /* IsCS */ false,
1231 /* AtomicCounterUpdate */ false, InstrumentColdFuncOnlyPath,
1232 /* ProfileRemappingFile */ "", IntrusiveRefCntPtr<vfs::FileSystem>());
1233 }
1234
1235 if (IsPGOInstrGen || IsPGOInstrUse || IsCtxProfGen)
1236 MPM.addPass(PGOIndirectCallPromotion(false, false));
1237
1238 if (IsPGOPreLink && PGOOpt->CSAction == PGOOptions::CSIRInstr)
1239 MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile,
1241
1242 if (IsMemprofUse)
1243 MPM.addPass(MemProfUsePass(PGOOpt->MemoryProfile, PGOOpt->FS));
1244
1245 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRUse ||
1246 PGOOpt->Action == PGOOptions::SampleUse))
1247 MPM.addPass(PGOForceFunctionAttrsPass(PGOOpt->ColdOptType));
1248
1249 MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/true));
1250
1253 else
1254 MPM.addPass(buildInlinerPipeline(Level, Phase));
1255
1256 // Remove any dead arguments exposed by cleanups, constant folding globals,
1257 // and argument promotion.
1259
1261 MPM.addPass(CoroCleanupPass());
1262
1263 // Optimize globals now that functions are fully simplified.
1264 MPM.addPass(GlobalOptPass());
1265 MPM.addPass(GlobalDCEPass());
1266
1267 return MPM;
1268}
1269
1270/// TODO: Should LTO cause any differences to this set of passes?
1271void PassBuilder::addVectorPasses(OptimizationLevel Level,
1272 FunctionPassManager &FPM, bool IsFullLTO) {
1275
1277 if (IsFullLTO) {
1278 // The vectorizer may have significantly shortened a loop body; unroll
1279 // again. Unroll small loops to hide loop backedge latency and saturate any
1280 // parallel execution resources of an out-of-order processor. We also then
1281 // need to clean up redundancies and loop invariant code.
1282 // FIXME: It would be really good to use a loop-integrated instruction
1283 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1284 // across the loop nests.
1285 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1288 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1290 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1293 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1294 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1295 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1296 // NOTE: we are very late in the pipeline, and we don't have any LICM
1297 // or SimplifyCFG passes scheduled after us, that would cleanup
1298 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1300 }
1301
1302 if (!IsFullLTO) {
1303 // Eliminate loads by forwarding stores from the previous iteration to loads
1304 // of the current iteration.
1306 }
1307 // Cleanup after the loop optimization passes.
1308 FPM.addPass(InstCombinePass());
1309
1310 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1312 // At higher optimization levels, try to clean up any runtime overlap and
1313 // alignment checks inserted by the vectorizer. We want to track correlated
1314 // runtime checks for two inner loops in the same outer loop, fold any
1315 // common computations, hoist loop-invariant aspects out of any outer loop,
1316 // and unswitch the runtime checks if possible. Once hoisted, we may have
1317 // dead (or speculatable) control flows or more combining opportunities.
1318 ExtraPasses.addPass(EarlyCSEPass());
1320 ExtraPasses.addPass(InstCombinePass());
1321 LoopPassManager LPM;
1323 /*AllowSpeculation=*/true));
1324 LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
1326 ExtraPasses.addPass(
1327 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true,
1328 /*UseBlockFrequencyInfo=*/true));
1329 ExtraPasses.addPass(
1330 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1331 ExtraPasses.addPass(InstCombinePass());
1332 FPM.addPass(std::move(ExtraPasses));
1333 }
1334
1335 // Now that we've formed fast to execute loop structures, we do further
1336 // optimizations. These are run afterward as they might block doing complex
1337 // analyses and transforms such as what are needed for loop vectorization.
1338
1339 // Cleanup after loop vectorization, etc. Simplification passes like CVP and
1340 // GVN, loop transforms, and others have already run, so it's now better to
1341 // convert to more optimized IR using more aggressive simplify CFG options.
1342 // The extra sinking transform can create larger basic blocks, so do this
1343 // before SLP vectorization.
1345 .forwardSwitchCondToPhi(true)
1346 .convertSwitchRangeToICmp(true)
1347 .convertSwitchToLookupTable(true)
1348 .needCanonicalLoops(false)
1349 .hoistCommonInsts(true)
1350 .sinkCommonInsts(true)));
1351
1352 if (IsFullLTO) {
1353 FPM.addPass(SCCPPass());
1354 FPM.addPass(InstCombinePass());
1355 FPM.addPass(BDCEPass());
1356 }
1357
1358 // Optimize parallel scalar instruction chains into SIMD instructions.
1359 if (PTO.SLPVectorization) {
1361 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1362 FPM.addPass(EarlyCSEPass());
1363 }
1364 }
1365 // Enhance/cleanup vector code.
1367
1368 if (!IsFullLTO) {
1369 FPM.addPass(InstCombinePass());
1370 // Unroll small loops to hide loop backedge latency and saturate any
1371 // parallel execution resources of an out-of-order processor. We also then
1372 // need to clean up redundancies and loop invariant code.
1373 // FIXME: It would be really good to use a loop-integrated instruction
1374 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1375 // across the loop nests.
1376 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1377 if (EnableUnrollAndJam && PTO.LoopUnrolling) {
1379 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1380 }
1382 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1385 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1386 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1387 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1388 // NOTE: we are very late in the pipeline, and we don't have any LICM
1389 // or SimplifyCFG passes scheduled after us, that would cleanup
1390 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1392 }
1393
1395 FPM.addPass(InstCombinePass());
1396
1397 // This is needed for two reasons:
1398 // 1. It works around problems that instcombine introduces, such as sinking
1399 // expensive FP divides into loops containing multiplications using the
1400 // divide result.
1401 // 2. It helps to clean up some loop-invariant code created by the loop
1402 // unroll pass when IsFullLTO=false.
1405 /*AllowSpeculation=*/true),
1406 /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
1407
1408 // Now that we've vectorized and unrolled loops, we may have more refined
1409 // alignment information, try to re-derive it here.
1411}
1412
1415 ThinOrFullLTOPhase LTOPhase) {
1416 const bool LTOPreLink = isLTOPreLink(LTOPhase);
1418
1419 // Run partial inlining pass to partially inline functions that have
1420 // large bodies.
1423
1424 // Remove avail extern fns and globals definitions since we aren't compiling
1425 // an object file for later LTO. For LTO we want to preserve these so they
1426 // are eligible for inlining at link-time. Note if they are unreferenced they
1427 // will be removed by GlobalDCE later, so this only impacts referenced
1428 // available externally globals. Eventually they will be suppressed during
1429 // codegen, but eliminating here enables more opportunity for GlobalDCE as it
1430 // may make globals referenced by available external functions dead and saves
1431 // running remaining passes on the eliminated functions. These should be
1432 // preserved during prelinking for link-time inlining decisions.
1433 if (!LTOPreLink)
1435
1438
1439 // Do RPO function attribute inference across the module to forward-propagate
1440 // attributes where applicable.
1441 // FIXME: Is this really an optimization rather than a canonicalization?
1443
1444 // Do a post inline PGO instrumentation and use pass. This is a context
1445 // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
1446 // cross-module inline has not been done yet. The context sensitive
1447 // instrumentation is after all the inlines are done.
1448 if (!LTOPreLink && PGOOpt) {
1449 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1450 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
1451 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1452 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile,
1453 PGOOpt->FS);
1454 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1455 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
1456 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1457 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
1458 PGOOpt->FS);
1459 }
1460
1461 // Re-compute GlobalsAA here prior to function passes. This is particularly
1462 // useful as the above will have inlined, DCE'ed, and function-attr
1463 // propagated everything. We should at this point have a reasonably minimal
1464 // and richly annotated call graph. By computing aliasing and mod/ref
1465 // information for all local globals here, the late loop passes and notably
1466 // the vectorizer will be able to use them to help recognize vectorizable
1467 // memory operations.
1470
1471 invokeOptimizerEarlyEPCallbacks(MPM, Level, LTOPhase);
1472
1473 FunctionPassManager OptimizePM;
1474 // Scheduling LoopVersioningLICM when inlining is over, because after that
1475 // we may see more accurate aliasing. Reason to run this late is that too
1476 // early versioning may prevent further inlining due to increase of code
1477 // size. Other optimizations which runs later might get benefit of no-alias
1478 // assumption in clone loop.
1480 OptimizePM.addPass(
1482 // LoopVersioningLICM pass might increase new LICM opportunities.
1485 /*AllowSpeculation=*/true),
1486 /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
1487 }
1488
1489 OptimizePM.addPass(Float2IntPass());
1491
1492 if (EnableMatrix) {
1493 OptimizePM.addPass(LowerMatrixIntrinsicsPass());
1494 OptimizePM.addPass(EarlyCSEPass());
1495 }
1496
1497 // CHR pass should only be applied with the profile information.
1498 // The check is to check the profile summary information in CHR.
1499 if (EnableCHR && Level == OptimizationLevel::O3)
1500 OptimizePM.addPass(ControlHeightReductionPass());
1501
1502 // FIXME: We need to run some loop optimizations to re-rotate loops after
1503 // simplifycfg and others undo their rotation.
1504
1505 // Optimize the loop execution. These passes operate on entire loop nests
1506 // rather than on each loop in an inside-out manner, and so they are actually
1507 // function passes.
1508
1509 invokeVectorizerStartEPCallbacks(OptimizePM, Level);
1510
1511 LoopPassManager LPM;
1512 // First rotate loops that may have been un-rotated by prior passes.
1513 // Disable header duplication at -Oz.
1515 Level != OptimizationLevel::Oz,
1516 LTOPreLink));
1517 // Some loops may have become dead by now. Try to delete them.
1518 // FIXME: see discussion in https://reviews.llvm.org/D112851,
1519 // this may need to be revisited once we run GVN before loop deletion
1520 // in the simplification pipeline.
1523 std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false));
1524
1525 // Distribute loops to allow partial vectorization. I.e. isolate dependences
1526 // into separate loop that would otherwise inhibit vectorization. This is
1527 // currently only performed for loops marked with the metadata
1528 // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1529 OptimizePM.addPass(LoopDistributePass());
1530
1531 // Populates the VFABI attribute with the scalar-to-vector mappings
1532 // from the TargetLibraryInfo.
1533 OptimizePM.addPass(InjectTLIMappings());
1534
1535 addVectorPasses(Level, OptimizePM, /* IsFullLTO */ false);
1536
1537 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
1538 // canonicalization pass that enables other optimizations. As a result,
1539 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1540 // result too early.
1541 OptimizePM.addPass(LoopSinkPass());
1542
1543 // And finally clean up LCSSA form before generating code.
1544 OptimizePM.addPass(InstSimplifyPass());
1545
1546 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
1547 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
1548 // flattening of blocks.
1549 OptimizePM.addPass(DivRemPairsPass());
1550
1551 // Try to annotate calls that were created during optimization.
1552 OptimizePM.addPass(TailCallElimPass());
1553
1554 // LoopSink (and other loop passes since the last simplifyCFG) might have
1555 // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
1556 OptimizePM.addPass(
1558 .convertSwitchRangeToICmp(true)
1559 .speculateUnpredictables(true)
1560 .hoistLoadsStoresWithCondFaulting(true)));
1561
1562 // Add the core optimizing pipeline.
1563 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
1565
1566 invokeOptimizerLastEPCallbacks(MPM, Level, LTOPhase);
1567
1568 // Split out cold code. Splitting is done late to avoid hiding context from
1569 // other optimizations and inadvertently regressing performance. The tradeoff
1570 // is that this has a higher code size cost than splitting early.
1571 if (EnableHotColdSplit && !LTOPreLink)
1573
1574 // Search the code for similar regions of code. If enough similar regions can
1575 // be found where extracting the regions into their own function will decrease
1576 // the size of the program, we extract the regions, a deduplicate the
1577 // structurally similar regions.
1578 if (EnableIROutliner)
1579 MPM.addPass(IROutlinerPass());
1580
1581 // Now we need to do some global optimization transforms.
1582 // FIXME: It would seem like these should come first in the optimization
1583 // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
1584 // ordering here.
1585 MPM.addPass(GlobalDCEPass());
1587
1588 // Merge functions if requested. It has a better chance to merge functions
1589 // after ConstantMerge folded jump tables.
1590 if (PTO.MergeFunctions)
1592
1593 if (PTO.CallGraphProfile && !LTOPreLink)
1596
1597 // TODO: Relative look table converter pass caused an issue when full lto is
1598 // enabled. See https://reviews.llvm.org/D94355 for more details.
1599 // Until the issue fixed, disable this pass during pre-linking phase.
1600 if (!LTOPreLink)
1602
1603 return MPM;
1604}
1605
1609 if (Level == OptimizationLevel::O0)
1610 return buildO0DefaultPipeline(Level, Phase);
1611
1613
1614 // Convert @llvm.global.annotations to !annotation metadata.
1616
1617 // Force any function attributes we want the rest of the pipeline to observe.
1619
1620 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1622
1623 // Apply module pipeline start EP callback.
1625
1626 // Add the core simplification pipeline.
1628
1629 // Now add the optimization pipeline.
1631
1632 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1633 PGOOpt->Action == PGOOptions::SampleUse)
1635
1636 // Emit annotation remarks.
1638
1639 if (isLTOPreLink(Phase))
1640 addRequiredLTOPreLinkPasses(MPM);
1641 return MPM;
1642}
1643
1646 bool EmitSummary) {
1648 if (ThinLTO)
1650 else
1652 MPM.addPass(EmbedBitcodePass(ThinLTO, EmitSummary));
1653
1654 // If we're doing FatLTO w/ CFI enabled, we don't want the type tests in the
1655 // object code, only in the bitcode section, so drop it before we run
1656 // module optimization and generate machine code. If llvm.type.test() isn't in
1657 // the IR, this won't do anything.
1658 MPM.addPass(
1660
1661 // Use the ThinLTO post-link pipeline with sample profiling
1662 if (ThinLTO && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1663 MPM.addPass(buildThinLTODefaultPipeline(Level, /*ImportSummary=*/nullptr));
1664 else {
1665 // otherwise, just use module optimization
1666 MPM.addPass(
1668 // Emit annotation remarks.
1670 }
1671 return MPM;
1672}
1673
1676 if (Level == OptimizationLevel::O0)
1678
1680
1681 // Convert @llvm.global.annotations to !annotation metadata.
1683
1684 // Force any function attributes we want the rest of the pipeline to observe.
1686
1687 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1689
1690 // Apply module pipeline start EP callback.
1692
1693 // If we are planning to perform ThinLTO later, we don't bloat the code with
1694 // unrolling/vectorization/... now. Just simplify the module as much as we
1695 // can.
1698 // In pre-link, for ctx prof use, we stop here with an instrumented IR. We let
1699 // thinlto use the contextual info to perform imports; then use the contextual
1700 // profile in the post-thinlink phase.
1701 if (!UseCtxProfile.empty()) {
1702 addRequiredLTOPreLinkPasses(MPM);
1703 return MPM;
1704 }
1705
1706 // Run partial inlining pass to partially inline functions that have
1707 // large bodies.
1708 // FIXME: It isn't clear whether this is really the right place to run this
1709 // in ThinLTO. Because there is another canonicalization and simplification
1710 // phase that will run after the thin link, running this here ends up with
1711 // less information than will be available later and it may grow functions in
1712 // ways that aren't beneficial.
1715
1716 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1717 PGOOpt->Action == PGOOptions::SampleUse)
1719
1720 // Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual
1721 // optimization is going to be done in PostLink stage, but clang can't add
1722 // callbacks there in case of in-process ThinLTO called by linker.
1727
1728 // Emit annotation remarks.
1730
1731 addRequiredLTOPreLinkPasses(MPM);
1732
1733 return MPM;
1734}
1735
1737 OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
1739
1740 if (ImportSummary) {
1741 // For ThinLTO we must apply the context disambiguation decisions early, to
1742 // ensure we can correctly match the callsites to summary data.
1745 ImportSummary, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1746
1747 // These passes import type identifier resolutions for whole-program
1748 // devirtualization and CFI. They must run early because other passes may
1749 // disturb the specific instruction patterns that these passes look for,
1750 // creating dependencies on resolutions that may not appear in the summary.
1751 //
1752 // For example, GVN may transform the pattern assume(type.test) appearing in
1753 // two basic blocks into assume(phi(type.test, type.test)), which would
1754 // transform a dependency on a WPD resolution into a dependency on a type
1755 // identifier resolution for CFI.
1756 //
1757 // Also, WPD has access to more precise information than ICP and can
1758 // devirtualize more effectively, so it should operate on the IR first.
1759 //
1760 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1761 // metadata and intrinsics.
1762 MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
1763 MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
1764 }
1765
1766 if (Level == OptimizationLevel::O0) {
1767 // Run a second time to clean up any type tests left behind by WPD for use
1768 // in ICP.
1769 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1771 // Drop available_externally and unreferenced globals. This is necessary
1772 // with ThinLTO in order to avoid leaving undefined references to dead
1773 // globals in the object file.
1775 MPM.addPass(GlobalDCEPass());
1776 return MPM;
1777 }
1778 if (!UseCtxProfile.empty()) {
1779 MPM.addPass(
1781 } else {
1782 // Add the core simplification pipeline.
1785 }
1786 // Now add the optimization pipeline.
1789
1790 // Emit annotation remarks.
1792
1793 return MPM;
1794}
1795
1798 // FIXME: We should use a customized pre-link pipeline!
1799 return buildPerModuleDefaultPipeline(Level,
1801}
1802
1805 ModuleSummaryIndex *ExportSummary) {
1807
1809
1810 // Create a function that performs CFI checks for cross-DSO calls with targets
1811 // in the current module.
1812 MPM.addPass(CrossDSOCFIPass());
1813
1814 if (Level == OptimizationLevel::O0) {
1815 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1816 // metadata and intrinsics.
1817 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1818 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1819 // Run a second time to clean up any type tests left behind by WPD for use
1820 // in ICP.
1821 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1823
1825
1826 // Emit annotation remarks.
1828
1829 return MPM;
1830 }
1831
1832 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
1833 // Load sample profile before running the LTO optimization pipeline.
1834 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1835 PGOOpt->ProfileRemappingFile,
1837 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1838 // RequireAnalysisPass for PSI before subsequent non-module passes.
1840 }
1841
1842 // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
1844
1845 // Remove unused virtual tables to improve the quality of code generated by
1846 // whole-program devirtualization and bitset lowering.
1847 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
1848
1849 // Do basic inference of function attributes from known properties of system
1850 // libraries and other oracles.
1852
1853 if (Level.getSpeedupLevel() > 1) {
1856
1857 // Indirect call promotion. This should promote all the targets that are
1858 // left by the earlier promotion pass that promotes intra-module targets.
1859 // This two-step promotion is to save the compile time. For LTO, it should
1860 // produce the same result as if we only do promotion here.
1862 true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1863
1864 // Promoting by-reference arguments to by-value exposes more constants to
1865 // IPSCCP.
1866 CGSCCPassManager CGPM;
1869 CGPM.addPass(
1872
1873 // Propagate constants at call sites into the functions they call. This
1874 // opens opportunities for globalopt (and inlining) by substituting function
1875 // pointers passed as arguments to direct uses of functions.
1876 MPM.addPass(IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/
1877 Level != OptimizationLevel::Os &&
1878 Level != OptimizationLevel::Oz)));
1879
1880 // Attach metadata to indirect call sites indicating the set of functions
1881 // they may target at run-time. This should follow IPSCCP.
1883 }
1884
1885 // Do RPO function attribute inference across the module to forward-propagate
1886 // attributes where applicable.
1887 // FIXME: Is this really an optimization rather than a canonicalization?
1889
1890 // Use in-range annotations on GEP indices to split globals where beneficial.
1891 MPM.addPass(GlobalSplitPass());
1892
1893 // Run whole program optimization of virtual call when the list of callees
1894 // is fixed.
1895 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1896
1897 // Stop here at -O1.
1898 if (Level == OptimizationLevel::O1) {
1899 // The LowerTypeTestsPass needs to run to lower type metadata and the
1900 // type.test intrinsics. The pass does nothing if CFI is disabled.
1901 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1902 // Run a second time to clean up any type tests left behind by WPD for use
1903 // in ICP (which is performed earlier than this in the regular LTO
1904 // pipeline).
1905 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1907
1909
1910 // Emit annotation remarks.
1912
1913 return MPM;
1914 }
1915
1916 // Optimize globals to try and fold them into constants.
1917 MPM.addPass(GlobalOptPass());
1918
1919 // Promote any localized globals to SSA registers.
1921
1922 // Linking modules together can lead to duplicate global constant, only
1923 // keep one copy of each constant.
1925
1926 // Remove unused arguments from functions.
1928
1929 // Reduce the code after globalopt and ipsccp. Both can open up significant
1930 // simplification opportunities, and both can propagate functions through
1931 // function pointers. When this happens, we often have to resolve varargs
1932 // calls, etc, so let instcombine do this.
1933 FunctionPassManager PeepholeFPM;
1934 PeepholeFPM.addPass(InstCombinePass());
1935 if (Level.getSpeedupLevel() > 1)
1936 PeepholeFPM.addPass(AggressiveInstCombinePass());
1937 invokePeepholeEPCallbacks(PeepholeFPM, Level);
1938
1939 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM),
1941
1942 // Lower variadic functions for supported targets prior to inlining.
1944
1945 // Note: historically, the PruneEH pass was run first to deduce nounwind and
1946 // generally clean up exception handling overhead. It isn't clear this is
1947 // valuable as the inliner doesn't currently care whether it is inlining an
1948 // invoke or a call.
1949 // Run the inliner now.
1950 if (EnableModuleInliner) {
1954 } else {
1957 /* MandatoryFirst */ true,
1960 }
1961
1962 // Perform context disambiguation after inlining, since that would reduce the
1963 // amount of additional cloning required to distinguish the allocation
1964 // contexts.
1967 /*Summary=*/nullptr,
1968 PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1969
1970 // Optimize globals again after we ran the inliner.
1971 MPM.addPass(GlobalOptPass());
1972
1973 // Run the OpenMPOpt pass again after global optimizations.
1975
1976 // Garbage collect dead functions.
1977 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
1978
1979 // If we didn't decide to inline a function, check to see if we can
1980 // transform it to pass arguments by value instead of by reference.
1982
1984 // The IPO Passes may leave cruft around. Clean up after them.
1985 FPM.addPass(InstCombinePass());
1986 invokePeepholeEPCallbacks(FPM, Level);
1987
1990
1992
1993 // Do a post inline PGO instrumentation and use pass. This is a context
1994 // sensitive PGO pass.
1995 if (PGOOpt) {
1996 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1997 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
1998 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1999 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile,
2000 PGOOpt->FS);
2001 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
2002 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
2003 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
2004 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
2005 PGOOpt->FS);
2006 }
2007
2008 // Break up allocas
2010
2011 // LTO provides additional opportunities for tailcall elimination due to
2012 // link-time inlining, and visibility of nocapture attribute.
2014
2015 // Run a few AA driver optimizations here and now to cleanup the code.
2016 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM),
2018
2019 MPM.addPass(
2021
2022 // Require the GlobalsAA analysis for the module so we can query it within
2023 // MainFPM.
2026 // Invalidate AAManager so it can be recreated and pick up the newly
2027 // available GlobalsAA.
2028 MPM.addPass(
2030 }
2031
2032 FunctionPassManager MainFPM;
2035 /*AllowSpeculation=*/true),
2036 /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
2037
2038 if (RunNewGVN)
2039 MainFPM.addPass(NewGVNPass());
2040 else
2041 MainFPM.addPass(GVNPass());
2042
2043 // Remove dead memcpy()'s.
2044 MainFPM.addPass(MemCpyOptPass());
2045
2046 // Nuke dead stores.
2047 MainFPM.addPass(DSEPass());
2048 MainFPM.addPass(MoveAutoInitPass());
2050
2051 LoopPassManager LPM;
2052 if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
2053 LPM.addPass(LoopFlattenPass());
2056 // FIXME: Add loop interchange.
2057
2058 // Unroll small loops and perform peeling.
2059 LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
2060 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
2062 // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
2063 // *All* loop passes must preserve it, in order to be able to use it.
2065 std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true));
2066
2067 MainFPM.addPass(LoopDistributePass());
2068
2069 addVectorPasses(Level, MainFPM, /* IsFullLTO */ true);
2070
2071 // Run the OpenMPOpt CGSCC pass again late.
2074
2075 invokePeepholeEPCallbacks(MainFPM, Level);
2076 MainFPM.addPass(JumpThreadingPass());
2077 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM),
2079
2080 // Lower type metadata and the type.test intrinsic. This pass supports
2081 // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
2082 // to be run at link time if CFI is enabled. This pass does nothing if
2083 // CFI is disabled.
2084 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
2085 // Run a second time to clean up any type tests left behind by WPD for use
2086 // in ICP (which is performed earlier than this in the regular LTO pipeline).
2087 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
2089
2090 // Enable splitting late in the FullLTO post-link pipeline.
2093
2094 // Add late LTO optimization passes.
2095 FunctionPassManager LateFPM;
2096
2097 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
2098 // canonicalization pass that enables other optimizations. As a result,
2099 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
2100 // result too early.
2101 LateFPM.addPass(LoopSinkPass());
2102
2103 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
2104 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
2105 // flattening of blocks.
2106 LateFPM.addPass(DivRemPairsPass());
2107
2108 // Delete basic blocks, which optimization passes may have killed.
2110 .convertSwitchRangeToICmp(true)
2111 .hoistCommonInsts(true)
2112 .speculateUnpredictables(true)));
2113 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(LateFPM)));
2114
2115 // Drop bodies of available eternally objects to improve GlobalDCE.
2117
2118 // Now that we have optimized the program, discard unreachable functions.
2119 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2120
2121 if (PTO.MergeFunctions)
2123
2124 if (PTO.CallGraphProfile)
2125 MPM.addPass(CGProfilePass(/*InLTOPostLink=*/true));
2126
2128
2129 // Emit annotation remarks.
2131
2132 return MPM;
2133}
2134
2138 assert(Level == OptimizationLevel::O0 &&
2139 "buildO0DefaultPipeline should only be used with O0");
2140
2142
2143 // Perform pseudo probe instrumentation in O0 mode. This is for the
2144 // consistency between different build modes. For example, a LTO build can be
2145 // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
2146 // the postlink will require pseudo probe instrumentation in the prelink.
2147 if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
2149
2150 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
2151 PGOOpt->Action == PGOOptions::IRUse))
2153 MPM,
2154 /*RunProfileGen=*/(PGOOpt->Action == PGOOptions::IRInstr),
2155 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate, PGOOpt->ProfileFile,
2156 PGOOpt->ProfileRemappingFile, PGOOpt->FS);
2157
2158 // Instrument function entry and exit before all inlining.
2160 EntryExitInstrumenterPass(/*PostInlining=*/false)));
2161
2163
2164 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
2166
2167 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
2168 // Explicitly disable sample loader inlining and use flattened profile in O0
2169 // pipeline.
2170 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
2171 PGOOpt->ProfileRemappingFile,
2172 ThinOrFullLTOPhase::None, nullptr,
2173 /*DisableSampleProfileInlining=*/true,
2174 /*UseFlattenedProfile=*/true));
2175 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
2176 // RequireAnalysisPass for PSI before subsequent non-module passes.
2178 }
2179
2181
2182 // Build a minimal pipeline based on the semantics required by LLVM,
2183 // which is just that always inlining occurs. Further, disable generating
2184 // lifetime intrinsics to avoid enabling further optimizations during
2185 // code generation.
2187 /*InsertLifetimeIntrinsics=*/false));
2188
2189 if (PTO.MergeFunctions)
2191
2192 if (EnableMatrix)
2193 MPM.addPass(
2195
2196 if (!CGSCCOptimizerLateEPCallbacks.empty()) {
2197 CGSCCPassManager CGPM;
2199 if (!CGPM.isEmpty())
2201 }
2202 if (!LateLoopOptimizationsEPCallbacks.empty()) {
2203 LoopPassManager LPM;
2205 if (!LPM.isEmpty()) {
2207 createFunctionToLoopPassAdaptor(std::move(LPM))));
2208 }
2209 }
2210 if (!LoopOptimizerEndEPCallbacks.empty()) {
2211 LoopPassManager LPM;
2213 if (!LPM.isEmpty()) {
2215 createFunctionToLoopPassAdaptor(std::move(LPM))));
2216 }
2217 }
2218 if (!ScalarOptimizerLateEPCallbacks.empty()) {
2221 if (!FPM.isEmpty())
2222 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2223 }
2224
2226
2227 if (!VectorizerStartEPCallbacks.empty()) {
2230 if (!FPM.isEmpty())
2231 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2232 }
2233
2234 ModulePassManager CoroPM;
2235 CoroPM.addPass(CoroEarlyPass());
2236 CGSCCPassManager CGPM;
2237 CGPM.addPass(CoroSplitPass());
2238 CoroPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
2239 CoroPM.addPass(CoroCleanupPass());
2240 CoroPM.addPass(GlobalDCEPass());
2241 MPM.addPass(CoroConditionalWrapper(std::move(CoroPM)));
2242
2244
2245 if (isLTOPreLink(Phase))
2246 addRequiredLTOPreLinkPasses(MPM);
2247
2249
2250 return MPM;
2251}
2252
2254 AAManager AA;
2255
2256 // The order in which these are registered determines their priority when
2257 // being queried.
2258
2259 // First we register the basic alias analysis that provides the majority of
2260 // per-function local AA logic. This is a stateless, on-demand local set of
2261 // AA techniques.
2263
2264 // Next we query fast, specialized alias analyses that wrap IR-embedded
2265 // information about aliasing.
2268
2269 // Add support for querying global aliasing information when available.
2270 // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
2271 // analysis, all that the `AAManager` can do is query for any *cached*
2272 // results from `GlobalsAA` through a readonly proxy.
2275
2276 // Add target-specific alias analyses.
2277 if (TM)
2279
2280 return AA;
2281}
aarch64 falkor hwpf fix Falkor HW Prefetch Fix Late Phase
AggressiveInstCombiner - Combine expression patterns to form expressions with fewer,...
Provides passes to inlining "always_inline" functions.
This is the interface for LLVM's primary stateless and local alias analysis.
This file provides the interface for LLVM's Call Graph Profile pass.
This header provides classes for managing passes over SCCs of the call graph.
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:686
cl::opt< std::string > UseCtxProfile("use-ctx-profile", cl::init(""), cl::Hidden, cl::desc("Use the specified contextual profile file"))
This file provides the interface for a simple, fast CSE pass.
This file provides a pass which clones the current module and runs the provided pass pipeline on the ...
This file provides a pass manager that only runs its passes if the provided marker analysis has been ...
Super simple passes to force specific function attrs from the commandline into the IR for debugging p...
Provides passes for computing function attributes based on interprocedural analyses.
This file provides the interface for LLVM's Global Value Numbering pass which eliminates fully redund...
This is the interface for a simple mod/ref and alias analysis over globals.
AcceleratorCodeSelection - Identify all functions reachable from a kernel, removing those that are un...
This header defines various interfaces for pass management in LLVM.
Interfaces for passes which infer implicit function attributes from the name and signature of functio...
This file provides the primary interface to the instcombine pass.
Defines passes for running instruction simplification across chunks of IR.
This file provides the interface for LLVM's PGO Instrumentation lowering pass.
See the comments on JumpThreadingPass.
static LVOptions Options
Definition: LVOptions.cpp:25
This header defines the LoopLoadEliminationPass object.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
The header file for the LowerConstantIntrinsics pass as used by the new pass manager.
The header file for the LowerExpectIntrinsic pass as used by the new pass manager.
This pass performs merges of loads and stores on both sides of a.
This file provides the interface for LLVM's Global Value Numbering pass.
This header enumerates the LLVM-provided high-level optimization levels.
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
Define option tunables for PGO.
static cl::opt< bool > EnableMergeFunctions("enable-merge-functions", cl::init(false), cl::Hidden, cl::desc("Enable function merging as part of the optimization pipeline"))
static cl::opt< bool > EnableGlobalAnalyses("enable-global-analyses", cl::init(true), cl::Hidden, cl::desc("Enable inter-procedural analyses"))
static cl::opt< bool > EnableIROutliner("ir-outliner", cl::init(false), cl::Hidden, cl::desc("Enable ir outliner pass"))
static cl::opt< bool > RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden, cl::desc("Run the NewGVN pass"))
static cl::opt< bool > DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden, cl::desc("Disable pre-instrumentation inliner"))
static cl::opt< bool > EnableEagerlyInvalidateAnalyses("eagerly-invalidate-analyses", cl::init(true), cl::Hidden, cl::desc("Eagerly invalidate more analyses in default pipelines"))
static cl::opt< bool > ExtraVectorizerPasses("extra-vectorizer-passes", cl::init(false), cl::Hidden, cl::desc("Run cleanup optimization passes after vectorization"))
static void addAnnotationRemarksPass(ModulePassManager &MPM)
static cl::opt< bool > EnablePostPGOLoopRotation("enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden, cl::desc("Run the loop rotation transformation after PGO instrumentation"))
static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level)
static cl::opt< bool > EnableGVNSink("enable-gvn-sink", cl::desc("Enable the GVN sinking pass (default = off)"))
static cl::opt< bool > PerformMandatoryInliningsFirst("mandatory-inlining-first", cl::init(false), cl::Hidden, cl::desc("Perform mandatory inlinings module-wide, before performing " "inlining"))
static cl::opt< bool > RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Run Partial inlinining pass"))
static cl::opt< bool > EnableGVNHoist("enable-gvn-hoist", cl::desc("Enable the GVN hoisting pass (default = off)"))
cl::opt< std::string > UseCtxProfile
static cl::opt< bool > EnableDFAJumpThreading("enable-dfa-jump-thread", cl::desc("Enable DFA jump threading"), cl::init(false), cl::Hidden)
static cl::opt< bool > EnableCHR("enable-chr", cl::init(true), cl::Hidden, cl::desc("Enable control height reduction optimization (CHR)"))
static cl::opt< bool > EnableHotColdSplit("hot-cold-split", cl::desc("Enable hot-cold splitting pass"))
static cl::opt< bool > EnableLoopInterchange("enable-loopinterchange", cl::init(false), cl::Hidden, cl::desc("Enable the experimental LoopInterchange Pass"))
static cl::opt< bool > EnableSampledInstr("enable-sampled-instrumentation", cl::init(false), cl::Hidden, cl::desc("Enable profile instrumentation sampling (default = off)"))
static cl::opt< int > PreInlineThreshold("preinline-threshold", cl::Hidden, cl::init(75), cl::desc("Control the amount of inlining in pre-instrumentation inliner " "(default = 75)"))
static cl::opt< bool > EnableLoopHeaderDuplication("enable-loop-header-duplication", cl::init(false), cl::Hidden, cl::desc("Enable loop header duplication at any optimization level"))
static cl::opt< bool > EnableUnrollAndJam("enable-unroll-and-jam", cl::init(false), cl::Hidden, cl::desc("Enable Unroll And Jam Pass"))
static cl::opt< std::string > InstrumentColdFuncOnlyPath("instrument-cold-function-only-path", cl::init(""), cl::desc("File path for cold function only instrumentation(requires use " "with --pgo-instrument-cold-function-only)"), cl::Hidden)
cl::opt< bool > PGOInstrumentColdFunctionOnly
static cl::opt< bool > EnableModuleInliner("enable-module-inliner", cl::init(false), cl::Hidden, cl::desc("Enable module inliner"))
static cl::opt< bool > EnableMatrix("enable-matrix", cl::init(false), cl::Hidden, cl::desc("Enable lowering of the matrix intrinsics"))
static cl::opt< AttributorRunOption > AttributorRun("attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE), cl::desc("Enable the attributor inter-procedural deduction pass"), cl::values(clEnumValN(AttributorRunOption::ALL, "all", "enable all attributor runs"), clEnumValN(AttributorRunOption::MODULE, "module", "enable module-wide attributor runs"), clEnumValN(AttributorRunOption::CGSCC, "cgscc", "enable call graph SCC attributor runs"), clEnumValN(AttributorRunOption::NONE, "none", "disable attributor runs")))
static cl::opt< bool > EnableOrderFileInstrumentation("enable-order-file-instrumentation", cl::init(false), cl::Hidden, cl::desc("Enable order file instrumentation (default = off)"))
static cl::opt< bool > UseLoopVersioningLICM("enable-loop-versioning-licm", cl::init(false), cl::Hidden, cl::desc("Enable the experimental Loop Versioning LICM pass"))
static bool isLTOPreLink(ThinOrFullLTOPhase Phase)
static cl::opt< bool > EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true), cl::Hidden, cl::desc("Enable inline deferral during PGO"))
Flag to enable inline deferral during PGO.
static cl::opt< bool > EnableJumpTableToSwitch("enable-jump-table-to-switch", cl::desc("Enable JumpTableToSwitch pass (default = off)"))
static cl::opt< InliningAdvisorMode > UseInlineAdvisor("enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden, cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"), cl::values(clEnumValN(InliningAdvisorMode::Default, "default", "Heuristics-based inliner version"), clEnumValN(InliningAdvisorMode::Development, "development", "Use development mode (runtime-loadable model)"), clEnumValN(InliningAdvisorMode::Release, "release", "Use release mode (AOT-compiled model)")))
static cl::opt< bool > FlattenedProfileUsed("flattened-profile-used", cl::init(false), cl::Hidden, cl::desc("Indicate the sample profile being used is flattened, i.e., " "no inline hierachy exists in the profile"))
static cl::opt< bool > EnableConstraintElimination("enable-constraint-elimination", cl::init(true), cl::Hidden, cl::desc("Enable pass to eliminate conditions based on linear constraints"))
static cl::opt< bool > EnableLoopFlatten("enable-loop-flatten", cl::init(false), cl::Hidden, cl::desc("Enable the LoopFlatten Pass"))
This file implements relative lookup table converter that converts lookup tables to relative lookup t...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
This file provides the interface for the pseudo probe implementation for AutoFDO.
This file provides the interface for the sampled PGO loader pass.
This is the interface for a metadata-based scoped no-alias analysis.
This file provides the interface for the pass responsible for both simplifying and canonicalizing the...
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
This is the interface for a metadata-based TBAA.
Defines the virtual file system interface vfs::FileSystem.
A manager for alias analyses.
void registerFunctionAnalysis()
Register a specific AA result.
void registerModuleAnalysis()
Register a specific AA result.
Inlines functions marked as "always_inline".
Definition: AlwaysInliner.h:32
Argument promotion pass.
Assign a GUID to functions as metadata.
Analysis pass providing a never-invalidated alias analysis result.
Simple pass that canonicalizes aliases.
A pass that merges duplicate global constants into a single constant.
Definition: ConstantMerge.h:29
This class implements a trivial dead store elimination.
Eliminate dead arguments (and return values) from functions.
A pass that transforms external global definitions into declarations.
Pass embeds a copy of the module optimized with the provided pass pipeline into a global variable.
A pass manager to run a set of extra function passes if the ShouldRunExtraPasses marker analysis is p...
A pass manager to run a set of extra loop passes if the MarkerTy analysis is present.
void addPass(PassT &&Pass)
The core GVN pass object.
Definition: GVN.h:117
Pass to remove unused function declarations.
Definition: GlobalDCE.h:36
Optimize globals that never have their address taken.
Definition: GlobalOpt.h:25
Pass to perform split of global variables.
Definition: GlobalSplit.h:26
Analysis pass providing a never-invalidated alias analysis result.
Pass to outline cold regions.
Pass to perform interprocedural constant propagation.
Definition: SCCP.h:48
Pass to outline similar regions.
Definition: IROutliner.h:444
Run instruction simplification across each instruction in the function.
The instrumentation pass for recording function order.
Instrumentation based profiling lowering pass.
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
This pass performs 'jump threading', which looks at blocks that have multiple predecessors and multip...
Definition: JumpThreading.h:79
Performs Loop Invariant Code Motion Pass.
Definition: LICM.h:66
Loop unroll pass that only does full loop unrolling and peeling.
Performs Loop Idiom Recognize Pass.
Performs Loop Inst Simplify Pass.
A simple loop rotation transformation.
Definition: LoopRotation.h:24
Performs basic CFG simplifications to assist other loop passes.
A pass that does profile-guided sinking of instructions into loops.
Definition: LoopSink.h:33
A simple loop rotation transformation.
Loop unroll pass that will support both full and partial unrolling.
Merge identical functions.
The module inliner pass for the new pass manager.
Definition: ModuleInliner.h:27
Module pass, wrapping the inliner pass.
Definition: Inliner.h:62
void addModulePass(T Pass)
Add a module pass that runs before the CGSCC passes.
Definition: Inliner.h:78
Class to hold module path string table and global value map, and encapsulate methods for operating on...
Simple pass that provides a name to every anonymous globals.
OpenMP optimizations pass.
Definition: OpenMPOpt.h:42
static const OptimizationLevel O3
Optimize for fast execution as much as possible.
static const OptimizationLevel Oz
A very specialized mode that will optimize for code size at any and all costs.
static const OptimizationLevel O0
Disable as many optimizations as possible.
static const OptimizationLevel Os
Similar to O2 but tries to optimize for small code size instead of fast execution without triggering ...
static const OptimizationLevel O2
Optimize for fast execution as much as possible without triggering significant incremental compile ti...
static const OptimizationLevel O1
Optimize quickly without destroying debuggability.
The indirect function call promotion pass.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The profile annotation (profile-instr-use) pass for IR based PGO.
The profile size based optimization pass for memory intrinsics.
Pass to remove unused function declarations.
void invokeFullLinkTimeOptimizationLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
ModuleInlinerWrapperPass buildInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining as well as the inlining-driven cleanups.
void invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
void invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
AAManager buildDefaultAAPipeline()
Build the default AAManager with the default alias analysis pipeline registered.
void invokeCGSCCOptimizerLateEPCallbacks(CGSCCPassManager &CGPM, OptimizationLevel Level)
ModulePassManager buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, ThinLTO-targeting default optimization pipeline to a pass manager.
void invokeScalarOptimizerLateEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase=ThinOrFullLTOPhase::None)
Build a per-module default optimization pipeline.
void invokePipelineStartEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
ModulePassManager buildO0DefaultPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase=ThinOrFullLTOPhase::None)
Build an O0 pipeline with the minimal semantically required passes.
FunctionPassManager buildFunctionSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM function canonicalization and simplification pipeline.
void invokePeepholeEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
void invokePipelineEarlySimplificationEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
void invokeLoopOptimizerEndEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
ModulePassManager buildLTODefaultPipeline(OptimizationLevel Level, ModuleSummaryIndex *ExportSummary)
Build an LTO default optimization pipeline to a pass manager.
ModulePassManager buildModuleInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining with module inliner pass.
ModulePassManager buildThinLTODefaultPipeline(OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary)
Build a ThinLTO default optimization pipeline to a pass manager.
void invokeLateLoopOptimizationsEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
void invokeFullLinkTimeOptimizationEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
ModulePassManager buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO, bool EmitSummary)
Build a fat object default optimization pipeline.
ModulePassManager buildModuleSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM module canonicalization and simplification pipeline.
ModulePassManager buildModuleOptimizationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase LTOPhase)
Construct the core LLVM module optimization pipeline.
void addPGOInstrPassesForO0(ModulePassManager &MPM, bool RunProfileGen, bool IsCS, bool AtomicCounterUpdate, std::string ProfileFile, std::string ProfileRemappingFile, IntrusiveRefCntPtr< vfs::FileSystem > FS)
Add PGOInstrumenation passes for O0 only.
void invokeOptimizerLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
ModulePassManager buildLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, LTO-targeting default optimization pipeline to a pass manager.
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t< is_detected< HasRunOnLoopT, PassT >::value > addPass(PassT &&Pass)
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t<!std::is_same_v< PassT, PassManager > > addPass(PassT &&Pass)
Definition: PassManager.h:195
bool isEmpty() const
Returns if the pass manager contains any passes.
Definition: PassManager.h:217
unsigned LicmMssaNoAccForPromotionCap
Tuning option to disable promotion to scalars in LICM with MemorySSA, if the number of access is too ...
Definition: PassBuilder.h:73
bool SLPVectorization
Tuning option to enable/disable slp loop vectorization, set based on opt level.
Definition: PassBuilder.h:58
int InlinerThreshold
Tuning option to override the default inliner threshold.
Definition: PassBuilder.h:87
bool CallGraphProfile
Tuning option to enable/disable call graph profile.
Definition: PassBuilder.h:77
bool MergeFunctions
Tuning option to enable/disable function merging.
Definition: PassBuilder.h:84
bool ForgetAllSCEVInLoopUnroll
Tuning option to forget all SCEV loops in LoopUnroll.
Definition: PassBuilder.h:65
unsigned LicmMssaOptCap
Tuning option to cap the number of calls to retrive clobbering accesses in MemorySSA,...
Definition: PassBuilder.h:69
bool LoopInterleaving
Tuning option to set loop interleaving on/off, set based on opt level.
Definition: PassBuilder.h:50
PipelineTuningOptions()
Constructor sets pipeline tuning defaults based on cl::opts.
bool LoopUnrolling
Tuning option to enable/disable loop unrolling. Its default value is true.
Definition: PassBuilder.h:61
bool LoopVectorization
Tuning option to enable/disable loop vectorization, set based on opt level.
Definition: PassBuilder.h:54
Reassociate commutative expressions.
Definition: Reassociate.h:85
A pass to do RPO deduction and propagation of function attributes.
Definition: FunctionAttrs.h:73
This pass performs function-level constant propagation and merging.
Definition: SCCP.h:29
The sample profiler data loader pass.
Definition: SampleProfile.h:39
Analysis pass providing a never-invalidated alias analysis result.
This pass transforms loops that contain branches or switches on loop- invariant conditions to have mu...
A pass to simplify and canonicalize the CFG of a function.
Definition: SimplifyCFG.h:29
virtual void registerDefaultAliasAnalyses(AAManager &)
Allow the target to register alias analyses with the AAManager for use with the new pass manager.
Analysis pass providing a never-invalidated alias analysis result.
Optimize scalar/vector interactions in IR using target cost models.
Definition: VectorCombine.h:23
Interfaces for registering analysis passes, producing common pass manager configurations,...
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:711
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
@ Assume
Do not drop type tests (default).
@ All
Drop only llvm.assumes using type test value.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
cl::opt< bool > EnableKnowledgeRetention
ModuleToFunctionPassAdaptor createModuleToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
Definition: PassManager.h:852
@ MODULE
Definition: Attributor.h:6476
@ CGSCC
Definition: Attributor.h:6477
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition: Pass.h:76
@ FullLTOPreLink
Full LTO prelink phase.
@ ThinLTOPostLink
ThinLTO postlink (backend compile) phase.
@ None
No LTO/ThinLTO behavior needed.
@ FullLTOPostLink
Full LTO postlink (backend compile) phase.
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
ModuleToPostOrderCGSCCPassAdaptor createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT &&Pass)
A function to deduce a function pass type and wrap it in the templated adaptor.
CGSCCToFunctionPassAdaptor createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false, bool NoRerun=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
cl::opt< bool > ForgetSCEVInLoopUnroll
bool AreStatisticsEnabled()
Check if statistics are enabled.
Definition: Statistic.cpp:139
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
cl::opt< unsigned > SetLicmMssaNoAccForPromotionCap
std::enable_if_t< is_detected< HasRunOnLoopT, LoopPassT >::value, FunctionToLoopPassAdaptor > createFunctionToLoopPassAdaptor(LoopPassT &&Pass, bool UseMemorySSA=false, bool UseBlockFrequencyInfo=false, bool UseBranchProbabilityInfo=false)
A function to deduce a loop pass type and wrap it in the templated adaptor.
cl::opt< unsigned > MaxDevirtIterations("max-devirt-iterations", cl::ReallyHidden, cl::init(4))
cl::opt< unsigned > SetLicmMssaOptCap
A DCE pass that assumes instructions are dead until proven otherwise.
Definition: ADCE.h:31
Pass to convert @llvm.global.annotations to !annotation metadata.
This pass attempts to minimize the number of assume without loosing any information.
Hoist/decompose integer division and remainder instructions to enable CFG improvements and better cod...
Definition: DivRemPairs.h:23
A simple and fast domtree-based CSE pass.
Definition: EarlyCSE.h:30
Pass which forces specific function attributes into the IR, primarily as a debugging tool.
A simple and fast domtree-based GVN pass to hoist common expressions from sibling branches.
Definition: GVN.h:392
Uses an "inverted" value numbering to decide the similarity of expressions and sinks similar expressi...
Definition: GVN.h:399
A set of parameters to control various transforms performed by IPSCCP pass.
Definition: SCCP.h:35
A pass which infers function attributes from the names and signatures of function declarations in a m...
Provides context on when an inline advisor is constructed in the pipeline (e.g., link phase,...
Definition: InlineAdvisor.h:58
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:205
std::optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
Definition: InlineCost.h:222
int DefaultThreshold
The default threshold to start with for a callee.
Definition: InlineCost.h:207
std::optional< bool > EnableDeferral
Indicate whether we should allow inline deferral.
Definition: InlineCost.h:235
std::optional< int > HintThreshold
Threshold to use for callees with inline hint.
Definition: InlineCost.h:210
Options for the frontend instrumentation based profiling pass.
A no-op pass template which simply forces a specific analysis result to be invalidated.
Definition: PassManager.h:905
Pass to forward loads in a loop around the backedge to subsequent iterations.
A set of parameters used to control various transforms performed by the LoopUnroll pass.
The LoopVectorize Pass.
Computes function attributes in post-order over the call graph.
Definition: FunctionAttrs.h:49
A utility pass template to force an analysis result to be available.
Definition: PassManager.h:878