LLVM 23.0.0git
PassBuilderPipelines.cpp
Go to the documentation of this file.
1//===- Construction of pass pipelines -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// This file provides the implementation of the PassBuilder based on our
11/// static pass registry as well as related functionality. It also provides
12/// helpers to aid in analyzing, debugging, and testing passes and pass
13/// pipelines.
14///
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/Statistic.h"
29#include "llvm/IR/PassManager.h"
30#include "llvm/Pass.h"
155
156using namespace llvm;
157
158namespace llvm {
159
161 "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
162 cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
164 "Heuristics-based inliner version"),
166 "Use development mode (runtime-loadable model)"),
168 "Use release mode (AOT-compiled model)")));
169
170/// Flag to enable inline deferral during PGO.
171static cl::opt<bool>
172 EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
174 cl::desc("Enable inline deferral during PGO"));
175
176static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
177 cl::init(false), cl::Hidden,
178 cl::desc("Enable module inliner"));
179
181 "mandatory-inlining-first", cl::init(false), cl::Hidden,
182 cl::desc("Perform mandatory inlinings module-wide, before performing "
183 "inlining"));
184
186 "eagerly-invalidate-analyses", cl::init(true), cl::Hidden,
187 cl::desc("Eagerly invalidate more analyses in default pipelines"));
188
190 "enable-merge-functions", cl::init(false), cl::Hidden,
191 cl::desc("Enable function merging as part of the optimization pipeline"));
192
194 "enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden,
195 cl::desc("Run the loop rotation transformation after PGO instrumentation"));
196
198 "enable-global-analyses", cl::init(true), cl::Hidden,
199 cl::desc("Enable inter-procedural analyses"));
200
201static cl::opt<bool> RunPartialInlining("enable-partial-inlining",
202 cl::init(false), cl::Hidden,
203 cl::desc("Run Partial inlining pass"));
204
206 "extra-vectorizer-passes", cl::init(false), cl::Hidden,
207 cl::desc("Run cleanup optimization passes after vectorization"));
208
209static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
210 cl::desc("Run the NewGVN pass"));
211
212static cl::opt<bool>
213 EnableLoopInterchange("enable-loopinterchange", cl::init(false), cl::Hidden,
214 cl::desc("Enable the LoopInterchange Pass"));
215
216static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam",
217 cl::init(false), cl::Hidden,
218 cl::desc("Enable Unroll And Jam Pass"));
219
220static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
222 cl::desc("Enable the LoopFlatten Pass"));
223
224static cl::opt<bool>
225 EnableDFAJumpThreading("enable-dfa-jump-thread",
226 cl::desc("Enable DFA jump threading"),
227 cl::init(false), cl::Hidden);
228
229static cl::opt<bool>
230 EnableHotColdSplit("hot-cold-split",
231 cl::desc("Enable hot-cold splitting pass"));
232
233static cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(false),
235 cl::desc("Enable ir outliner pass"));
236
237static cl::opt<bool>
238 DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden,
239 cl::desc("Disable pre-instrumentation inliner"));
240
242 "preinline-threshold", cl::Hidden, cl::init(75),
243 cl::desc("Control the amount of inlining in pre-instrumentation inliner "
244 "(default = 75)"));
245
246static cl::opt<bool>
247 EnableGVNHoist("enable-gvn-hoist",
248 cl::desc("Enable the GVN hoisting pass (default = off)"));
249
250static cl::opt<bool>
251 EnableGVNSink("enable-gvn-sink",
252 cl::desc("Enable the GVN sinking pass (default = off)"));
253
255 "enable-jump-table-to-switch",
256 cl::desc("Enable JumpTableToSwitch pass (default = off)"));
257
258// This option is used in simplifying testing SampleFDO optimizations for
259// profile loading.
260static cl::opt<bool>
261 EnableCHR("enable-chr", cl::init(true), cl::Hidden,
262 cl::desc("Enable control height reduction optimization (CHR)"));
263
265 "flattened-profile-used", cl::init(false), cl::Hidden,
266 cl::desc("Indicate the sample profile being used is flattened, i.e., "
267 "no inline hierarchy exists in the profile"));
268
269static cl::opt<bool>
270 EnableMatrix("enable-matrix", cl::init(false), cl::Hidden,
271 cl::desc("Enable lowering of the matrix intrinsics"));
272
274 "enable-mergeicmps", cl::init(true), cl::Hidden,
275 cl::desc("Enable MergeICmps pass in the optimization pipeline"));
276
278 "enable-constraint-elimination", cl::init(true), cl::Hidden,
279 cl::desc(
280 "Enable pass to eliminate conditions based on linear constraints"));
281
283 "attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE),
284 cl::desc("Enable the attributor inter-procedural deduction pass"),
286 "enable all full attributor runs"),
288 "enable all attributor-light runs"),
290 "enable module-wide attributor runs"),
292 "enable module-wide attributor-light runs"),
294 "enable call graph SCC attributor runs"),
296 "enable call graph SCC attributor-light runs"),
297 clEnumValN(AttributorRunOption::NONE, "none",
298 "disable attributor runs")));
299
301 "enable-sampled-instrumentation", cl::init(false), cl::Hidden,
302 cl::desc("Enable profile instrumentation sampling (default = off)"));
304 "enable-loop-versioning-licm", cl::init(false), cl::Hidden,
305 cl::desc("Enable the experimental Loop Versioning LICM pass"));
306
308 "instrument-cold-function-only-path", cl::init(""),
309 cl::desc("File path for cold function only instrumentation(requires use "
310 "with --pgo-instrument-cold-function-only)"),
311 cl::Hidden);
312
313// TODO: There is a similar flag in WPD pass, we should consolidate them by
314// parsing the option only once in PassBuilder and share it across both places.
316 "enable-devirtualize-speculatively",
317 cl::desc("Enable speculative devirtualization optimization"),
318 cl::init(false));
319
322
324} // namespace llvm
325
343
344namespace llvm {
346} // namespace llvm
347
349 OptimizationLevel Level) {
350 for (auto &C : PeepholeEPCallbacks)
351 C(FPM, Level);
352}
355 for (auto &C : LateLoopOptimizationsEPCallbacks)
356 C(LPM, Level);
357}
359 OptimizationLevel Level) {
360 for (auto &C : LoopOptimizerEndEPCallbacks)
361 C(LPM, Level);
362}
365 for (auto &C : ScalarOptimizerLateEPCallbacks)
366 C(FPM, Level);
367}
369 OptimizationLevel Level) {
370 for (auto &C : CGSCCOptimizerLateEPCallbacks)
371 C(CGPM, Level);
372}
374 OptimizationLevel Level) {
375 for (auto &C : VectorizerStartEPCallbacks)
376 C(FPM, Level);
377}
379 OptimizationLevel Level) {
380 for (auto &C : VectorizerEndEPCallbacks)
381 C(FPM, Level);
382}
384 OptimizationLevel Level,
386 for (auto &C : OptimizerEarlyEPCallbacks)
387 C(MPM, Level, Phase);
388}
390 OptimizationLevel Level,
392 for (auto &C : OptimizerLastEPCallbacks)
393 C(MPM, Level, Phase);
394}
397 for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks)
398 C(MPM, Level);
399}
402 for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
403 C(MPM, Level);
404}
406 OptimizationLevel Level) {
407 for (auto &C : PipelineStartEPCallbacks)
408 C(MPM, Level);
409}
412 for (auto &C : PipelineEarlySimplificationEPCallbacks)
413 C(MPM, Level, Phase);
414}
415
416// Helper to add AnnotationRemarksPass.
419 // Count the stats for InstCount and FunctionPropertiesAnalysis
420 if (AreStatisticsEnabled()) {
422 MPM.addPass(
424 }
425}
426
427// Helper to check if the current compilation phase is preparing for LTO
432
433// Helper to check if the current compilation phase is LTO backend
438
439// Helper to wrap conditionally Coro passes.
441 // TODO: Skip passes according to Phase.
442 ModulePassManager CoroPM;
443 CoroPM.addPass(CoroEarlyPass());
444 CGSCCPassManager CGPM;
445 CGPM.addPass(CoroSplitPass());
446 CoroPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
447 CoroPM.addPass(CoroCleanupPass());
448 CoroPM.addPass(GlobalDCEPass());
449 return CoroConditionalWrapper(std::move(CoroPM));
450}
451
452// TODO: Investigate the cost/benefit of tail call elimination on debugging.
454PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
456
458
460 FPM.addPass(CountVisitsPass());
461
462 // Form SSA out of local memory accesses after breaking apart aggregates into
463 // scalars.
464 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
465
466 // Catch trivial redundancies
467 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
468
469 // Hoisting of scalars and load expressions.
470 FPM.addPass(
471 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
472 FPM.addPass(InstCombinePass());
473
474 FPM.addPass(LibCallsShrinkWrapPass());
475
476 invokePeepholeEPCallbacks(FPM, Level);
477
478 FPM.addPass(
479 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
480
481 // Form canonically associated expression trees, and simplify the trees using
482 // basic mathematical properties. For example, this will form (nearly)
483 // minimal multiplication trees.
484 FPM.addPass(ReassociatePass());
485
486 // Add the primary loop simplification pipeline.
487 // FIXME: Currently this is split into two loop pass pipelines because we run
488 // some function passes in between them. These can and should be removed
489 // and/or replaced by scheduling the loop pass equivalents in the correct
490 // positions. But those equivalent passes aren't powerful enough yet.
491 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
492 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
493 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
494 // `LoopInstSimplify`.
495 LoopPassManager LPM1, LPM2;
496
497 // Simplify the loop body. We do this initially to clean up after other loop
498 // passes run, either when iterating on a loop or on inner loops with
499 // implications on the outer loop.
500 LPM1.addPass(LoopInstSimplifyPass());
501 LPM1.addPass(LoopSimplifyCFGPass());
502
503 // Try to remove as much code from the loop header as possible,
504 // to reduce amount of IR that will have to be duplicated. However,
505 // do not perform speculative hoisting the first time as LICM
506 // will destroy metadata that may not need to be destroyed if run
507 // after loop rotation.
508 // TODO: Investigate promotion cap for O1.
509 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
510 /*AllowSpeculation=*/false));
511
512 LPM1.addPass(
513 LoopRotatePass(/*EnableHeaderDuplication=*/true, isLTOPreLink(Phase)));
514 // TODO: Investigate promotion cap for O1.
515 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
516 /*AllowSpeculation=*/true));
517 LPM1.addPass(SimpleLoopUnswitchPass());
519 LPM1.addPass(LoopFlattenPass());
520
521 LPM2.addPass(LoopIdiomRecognizePass());
522 LPM2.addPass(IndVarSimplifyPass());
523
525
526 LPM2.addPass(LoopDeletionPass());
527
528 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
529 // because it changes IR to makes profile annotation in back compile
530 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
531 // attributes so we need to make sure and allow the full unroll pass to pay
532 // attention to it.
533 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
534 PGOOpt->Action != PGOOptions::SampleUse)
535 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
536 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
537 PTO.ForgetAllSCEVInLoopUnroll));
538
540
541 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
542 /*UseMemorySSA=*/true));
543 FPM.addPass(
544 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
545 FPM.addPass(InstCombinePass());
546 // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
547 // *All* loop passes must preserve it, in order to be able to use it.
548 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
549 /*UseMemorySSA=*/false));
550
551 // Delete small array after loop unroll.
552 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
553
554 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
555 FPM.addPass(MemCpyOptPass());
556
557 // Sparse conditional constant propagation.
558 // FIXME: It isn't clear why we do this *after* loop passes rather than
559 // before...
560 FPM.addPass(SCCPPass());
561
562 // Delete dead bit computations (instcombine runs after to fold away the dead
563 // computations, and then ADCE will run later to exploit any new DCE
564 // opportunities that creates).
565 FPM.addPass(BDCEPass());
566
567 // Run instcombine after redundancy and dead bit elimination to exploit
568 // opportunities opened up by them.
569 FPM.addPass(InstCombinePass());
570 invokePeepholeEPCallbacks(FPM, Level);
571
572 FPM.addPass(CoroElidePass());
573
575
576 // Finally, do an expensive DCE pass to catch all the dead code exposed by
577 // the simplifications and basic cleanup after all the simplifications.
578 // TODO: Investigate if this is too expensive.
579 FPM.addPass(ADCEPass());
580 FPM.addPass(
581 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
582 FPM.addPass(InstCombinePass());
583 invokePeepholeEPCallbacks(FPM, Level);
584
585 return FPM;
586}
587
591 assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
592
593 // The O1 pipeline has a separate pipeline creation function to simplify
594 // construction readability.
595 if (Level.getSpeedupLevel() == 1)
596 return buildO1FunctionSimplificationPipeline(Level, Phase);
597
599
602
603 // Form SSA out of local memory accesses after breaking apart aggregates into
604 // scalars.
606
607 // Catch trivial redundancies
608 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
611
612 // Hoisting of scalars and load expressions.
613 if (EnableGVNHoist)
614 FPM.addPass(GVNHoistPass());
615
616 // Global value numbering based sinking.
617 if (EnableGVNSink) {
618 FPM.addPass(GVNSinkPass());
619 FPM.addPass(
620 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
621 }
622
623 // Speculative execution if the target has divergent branches; otherwise nop.
624 FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
625
626 // Optimize based on known information about branches, and cleanup afterward.
629
630 // Jump table to switch conversion.
635
636 FPM.addPass(
637 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
641
642 invokePeepholeEPCallbacks(FPM, Level);
643
644 // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
645 // using the size value profile. Don't perform this when optimizing for size.
646 if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse)
648
649 FPM.addPass(TailCallElimPass(/*UpdateFunctionEntryCount=*/
650 isInstrumentedPGOUse()));
651 FPM.addPass(
652 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
653
654 // Form canonically associated expression trees, and simplify the trees using
655 // basic mathematical properties. For example, this will form (nearly)
656 // minimal multiplication trees.
658
661
662 // Add the primary loop simplification pipeline.
663 // FIXME: Currently this is split into two loop pass pipelines because we run
664 // some function passes in between them. These can and should be removed
665 // and/or replaced by scheduling the loop pass equivalents in the correct
666 // positions. But those equivalent passes aren't powerful enough yet.
667 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
668 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
669 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
670 // `LoopInstSimplify`.
671 LoopPassManager LPM1, LPM2;
672
673 // Simplify the loop body. We do this initially to clean up after other loop
674 // passes run, either when iterating on a loop or on inner loops with
675 // implications on the outer loop.
676 LPM1.addPass(LoopInstSimplifyPass());
677 LPM1.addPass(LoopSimplifyCFGPass());
678
679 // Try to remove as much code from the loop header as possible,
680 // to reduce amount of IR that will have to be duplicated. However,
681 // do not perform speculative hoisting the first time as LICM
682 // will destroy metadata that may not need to be destroyed if run
683 // after loop rotation.
684 // TODO: Investigate promotion cap for O1.
685 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
686 /*AllowSpeculation=*/false));
687
688 LPM1.addPass(
689 LoopRotatePass(/*EnableHeaderDuplication=*/true, isLTOPreLink(Phase)));
690 // TODO: Investigate promotion cap for O1.
691 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
692 /*AllowSpeculation=*/true));
693 LPM1.addPass(
694 SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3));
696 LPM1.addPass(LoopFlattenPass());
697
698 LPM2.addPass(LoopIdiomRecognizePass());
699 LPM2.addPass(IndVarSimplifyPass());
700
701 {
703 ExtraPasses.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
705 LPM2.addPass(std::move(ExtraPasses));
706 }
707
709
710 LPM2.addPass(LoopDeletionPass());
711
712 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
713 // because it changes IR to makes profile annotation in back compile
714 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
715 // attributes so we need to make sure and allow the full unroll pass to pay
716 // attention to it.
717 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
718 PGOOpt->Action != PGOOptions::SampleUse)
719 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
720 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
721 PTO.ForgetAllSCEVInLoopUnroll));
722
724
725 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
726 /*UseMemorySSA=*/true));
727 FPM.addPass(
728 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
730 // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
731 // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
732 // *All* loop passes must preserve it, in order to be able to use it.
733 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
734 /*UseMemorySSA=*/false));
735
736 // Delete small array after loop unroll.
738
739 // Try vectorization/scalarization transforms that are both improvements
740 // themselves and can allow further folds with GVN and InstCombine.
741 FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
742
743 // Eliminate redundancies.
745 if (RunNewGVN)
746 FPM.addPass(NewGVNPass());
747 else
748 FPM.addPass(GVNPass());
749
750 // Sparse conditional constant propagation.
751 // FIXME: It isn't clear why we do this *after* loop passes rather than
752 // before...
753 FPM.addPass(SCCPPass());
754
755 // Delete dead bit computations (instcombine runs after to fold away the dead
756 // computations, and then ADCE will run later to exploit any new DCE
757 // opportunities that creates).
758 FPM.addPass(BDCEPass());
759
760 // Run instcombine after redundancy and dead bit elimination to exploit
761 // opportunities opened up by them.
763 invokePeepholeEPCallbacks(FPM, Level);
764
765 // Re-consider control flow based optimizations after redundancy elimination,
766 // redo DCE, etc.
769
772
773 // Finally, do an expensive DCE pass to catch all the dead code exposed by
774 // the simplifications and basic cleanup after all the simplifications.
775 // TODO: Investigate if this is too expensive.
776 FPM.addPass(ADCEPass());
777
778 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
779 FPM.addPass(MemCpyOptPass());
780
781 FPM.addPass(DSEPass());
783
785 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
786 /*AllowSpeculation=*/true),
787 /*UseMemorySSA=*/true));
788
789 FPM.addPass(CoroElidePass());
790
792
794 .convertSwitchRangeToICmp(true)
795 .convertSwitchToArithmetic(true)
796 .hoistCommonInsts(true)
797 .sinkCommonInsts(true)));
799 invokePeepholeEPCallbacks(FPM, Level);
800
801 return FPM;
802}
803
804void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
807}
808
809void PassBuilder::addPreInlinerPasses(ModulePassManager &MPM,
810 OptimizationLevel Level,
811 ThinOrFullLTOPhase LTOPhase) {
812 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
814 return;
815 InlineParams IP;
816
818
819 // FIXME: The hint threshold has the same value used by the regular inliner
820 // when not optimzing for size. This should probably be lowered after
821 // performance testing.
822 // FIXME: this comment is cargo culted from the old pass manager, revisit).
823 IP.HintThreshold = 325;
826 IP, /* MandatoryFirst */ true,
828 CGSCCPassManager &CGPipeline = MIWP.getPM();
829
831 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
832 FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
833 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
834 true))); // Merge & remove basic blocks.
835 FPM.addPass(InstCombinePass()); // Combine silly sequences.
836 invokePeepholeEPCallbacks(FPM, Level);
837
838 CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
839 std::move(FPM), PTO.EagerlyInvalidateAnalyses));
840
841 MPM.addPass(std::move(MIWP));
842
843 // Delete anything that is now dead to make sure that we don't instrument
844 // dead code. Instrumentation can end up keeping dead code around and
845 // dramatically increase code size.
846 MPM.addPass(GlobalDCEPass());
847}
848
849void PassBuilder::addPostPGOLoopRotation(ModulePassManager &MPM,
850 OptimizationLevel Level) {
852 // Disable header duplication in loop rotation at -Oz.
854 createFunctionToLoopPassAdaptor(LoopRotatePass(),
855 /*UseMemorySSA=*/false),
856 PTO.EagerlyInvalidateAnalyses));
857 }
858}
859
860void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
861 OptimizationLevel Level, bool RunProfileGen,
862 bool IsCS, bool AtomicCounterUpdate,
863 std::string ProfileFile,
864 std::string ProfileRemappingFile) {
865 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
866
867 if (!RunProfileGen) {
868 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
869 MPM.addPass(
870 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
871 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
872 // RequireAnalysisPass for PSI before subsequent non-module passes.
873 MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
874 return;
875 }
876
877 // Perform PGO instrumentation.
878 MPM.addPass(PGOInstrumentationGen(IsCS ? PGOInstrumentationType::CSFDO
880
881 addPostPGOLoopRotation(MPM, Level);
882 // Add the profile lowering pass.
883 InstrProfOptions Options;
884 if (!ProfileFile.empty())
885 Options.InstrProfileOutput = ProfileFile;
886 // Do counter promotion at Level greater than O0.
887 Options.DoCounterPromotion = true;
888 Options.UseBFIInPromotion = IsCS;
889 if (EnableSampledInstr) {
890 Options.Sampling = true;
891 // With sampling, there is little beneifit to enable counter promotion.
892 // But note that sampling does work with counter promotion.
893 Options.DoCounterPromotion = false;
894 }
895 Options.Atomic = AtomicCounterUpdate;
896 MPM.addPass(InstrProfilingLoweringPass(Options, IsCS));
897}
898
900 bool RunProfileGen, bool IsCS,
901 bool AtomicCounterUpdate,
902 std::string ProfileFile,
903 std::string ProfileRemappingFile) {
904 if (!RunProfileGen) {
905 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
906 MPM.addPass(
907 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
908 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
909 // RequireAnalysisPass for PSI before subsequent non-module passes.
911 return;
912 }
913
914 // Perform PGO instrumentation.
917 // Add the profile lowering pass.
919 if (!ProfileFile.empty())
920 Options.InstrProfileOutput = ProfileFile;
921 // Do not do counter promotion at O0.
922 Options.DoCounterPromotion = false;
923 Options.UseBFIInPromotion = IsCS;
924 Options.Atomic = AtomicCounterUpdate;
926}
927
929 return getInlineParamsFromOptLevel(Level.getSpeedupLevel());
930}
931
935 InlineParams IP;
936 if (PTO.InlinerThreshold == -1)
938 else
939 IP = getInlineParams(PTO.InlinerThreshold);
940 // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO,
941 // set hot-caller threshold to 0 to disable hot
942 // callsite inline (as much as possible [1]) because it makes
943 // profile annotation in the backend inaccurate.
944 //
945 // [1] Note the cost of a function could be below zero due to erased
946 // prologue / epilogue.
947 if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
949
950 if (PGOOpt)
952
956
957 // Require the GlobalsAA analysis for the module so we can query it within
958 // the CGSCC pipeline.
960 MIWP.addModulePass(RequireAnalysisPass<GlobalsAA, Module>());
961 // Invalidate AAManager so it can be recreated and pick up the newly
962 // available GlobalsAA.
963 MIWP.addModulePass(
965 }
966
967 // Require the ProfileSummaryAnalysis for the module so we can query it within
968 // the inliner pass.
970
971 // Now begin the main postorder CGSCC pipeline.
972 // FIXME: The current CGSCC pipeline has its origins in the legacy pass
973 // manager and trying to emulate its precise behavior. Much of this doesn't
974 // make a lot of sense and we should revisit the core CGSCC structure.
975 CGSCCPassManager &MainCGPipeline = MIWP.getPM();
976
977 // Note: historically, the PruneEH pass was run first to deduce nounwind and
978 // generally clean up exception handling overhead. It isn't clear this is
979 // valuable as the inliner doesn't currently care whether it is inlining an
980 // invoke or a call.
981
983 MainCGPipeline.addPass(AttributorCGSCCPass());
985 MainCGPipeline.addPass(AttributorLightCGSCCPass());
986
987 // Deduce function attributes. We do another run of this after the function
988 // simplification pipeline, so this only needs to run when it could affect the
989 // function simplification pipeline, which is only the case with recursive
990 // functions.
991 MainCGPipeline.addPass(PostOrderFunctionAttrsPass(/*SkipNonRecursive*/ true));
992
993 // When at O3 add argument promotion to the pass pipeline.
994 // FIXME: It isn't at all clear why this should be limited to O3.
995 if (Level == OptimizationLevel::O3)
996 MainCGPipeline.addPass(ArgumentPromotionPass());
997
998 // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
999 // there are no OpenMP runtime calls present in the module.
1000 if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
1001 MainCGPipeline.addPass(OpenMPOptCGSCCPass(Phase));
1002
1003 invokeCGSCCOptimizerLateEPCallbacks(MainCGPipeline, Level);
1004
1005 // Add the core function simplification pipeline nested inside the
1006 // CGSCC walk.
1009 PTO.EagerlyInvalidateAnalyses, /*NoRerun=*/true));
1010
1011 // Finally, deduce any function attributes based on the fully simplified
1012 // function.
1013 MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
1014
1015 // Mark that the function is fully simplified and that it shouldn't be
1016 // simplified again if we somehow revisit it due to CGSCC mutations unless
1017 // it's been modified since.
1020
1022 MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
1023 MainCGPipeline.addPass(CoroAnnotationElidePass());
1024 }
1025
1026 // Make sure we don't affect potential future NoRerun CGSCC adaptors.
1027 MIWP.addLateModulePass(createModuleToFunctionPassAdaptor(
1029
1030 return MIWP;
1031}
1032
1037
1039 // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO,
1040 // set hot-caller threshold to 0 to disable hot
1041 // callsite inline (as much as possible [1]) because it makes
1042 // profile annotation in the backend inaccurate.
1043 //
1044 // [1] Note the cost of a function could be below zero due to erased
1045 // prologue / epilogue.
1046 if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1047 IP.HotCallSiteThreshold = 0;
1048
1049 if (PGOOpt)
1051
1052 // The inline deferral logic is used to avoid losing some
1053 // inlining chance in future. It is helpful in SCC inliner, in which
1054 // inlining is processed in bottom-up order.
1055 // While in module inliner, the inlining order is a priority-based order
1056 // by default. The inline deferral is unnecessary there. So we disable the
1057 // inline deferral logic in module inliner.
1058 IP.EnableDeferral = false;
1059
1062 MPM.addPass(GlobalOptPass());
1063 MPM.addPass(GlobalDCEPass());
1064 MPM.addPass(PGOCtxProfFlatteningPass(/*IsPreThinlink=*/false));
1065 }
1066
1069 PTO.EagerlyInvalidateAnalyses));
1070
1074 MPM.addPass(
1076 }
1077
1078 return MPM;
1079}
1080
1084 assert(Level != OptimizationLevel::O0 &&
1085 "Should not be used for O0 pipeline");
1086
1088 "FullLTOPostLink shouldn't call buildModuleSimplificationPipeline!");
1089
1091
1092 // Place pseudo probe instrumentation as the first pass of the pipeline to
1093 // minimize the impact of optimization changes.
1094 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1097
1098 bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
1099
1100 // In ThinLTO mode, when flattened profile is used, all the available
1101 // profile information will be annotated in PreLink phase so there is
1102 // no need to load the profile again in PostLink.
1103 bool LoadSampleProfile =
1104 HasSampleProfile &&
1106
1107 // During the ThinLTO backend phase we perform early indirect call promotion
1108 // here, before globalopt. Otherwise imported available_externally functions
1109 // look unreferenced and are removed. If we are going to load the sample
1110 // profile then defer until later.
1111 // TODO: See if we can move later and consolidate with the location where
1112 // we perform ICP when we are loading a sample profile.
1113 // TODO: We pass HasSampleProfile (whether there was a sample profile file
1114 // passed to the compile) to the SamplePGO flag of ICP. This is used to
1115 // determine whether the new direct calls are annotated with prof metadata.
1116 // Ideally this should be determined from whether the IR is annotated with
1117 // sample profile, and not whether the a sample profile was provided on the
1118 // command line. E.g. for flattened profiles where we will not be reloading
1119 // the sample profile in the ThinLTO backend, we ideally shouldn't have to
1120 // provide the sample profile file.
1121 if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
1122 MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
1123
1124 // Create an early function pass manager to cleanup the output of the
1125 // frontend. Not necessary with LTO post link pipelines since the pre link
1126 // pipeline already cleaned up the frontend output.
1128 // Do basic inference of function attributes from known properties of system
1129 // libraries and other oracles.
1131 MPM.addPass(CoroEarlyPass());
1132
1133 FunctionPassManager EarlyFPM;
1134 EarlyFPM.addPass(EntryExitInstrumenterPass(/*PostInlining=*/false));
1135 // Lower llvm.expect to metadata before attempting transforms.
1136 // Compare/branch metadata may alter the behavior of passes like
1137 // SimplifyCFG.
1139 EarlyFPM.addPass(SimplifyCFGPass());
1141 EarlyFPM.addPass(EarlyCSEPass());
1142 if (Level == OptimizationLevel::O3)
1143 EarlyFPM.addPass(CallSiteSplittingPass());
1145 std::move(EarlyFPM), PTO.EagerlyInvalidateAnalyses));
1146 }
1147
1148 if (LoadSampleProfile) {
1149 // Annotate sample profile right after early FPM to ensure freshness of
1150 // the debug info.
1152 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile, Phase, FS));
1153 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1154 // RequireAnalysisPass for PSI before subsequent non-module passes.
1156 // Do not invoke ICP in the LTOPrelink phase as it makes it hard
1157 // for the profile annotation to be accurate in the LTO backend.
1158 if (!isLTOPreLink(Phase))
1159 // We perform early indirect call promotion here, before globalopt.
1160 // This is important for the ThinLTO backend phase because otherwise
1161 // imported available_externally functions look unreferenced and are
1162 // removed.
1163 MPM.addPass(
1164 PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
1165 }
1166
1167 // Try to perform OpenMP specific optimizations on the module. This is a
1168 // (quick!) no-op if there are no OpenMP runtime calls present in the module.
1170
1172 MPM.addPass(AttributorPass());
1175
1176 // Lower type metadata and the type.test intrinsic in the ThinLTO
1177 // post link pipeline after ICP. This is to enable usage of the type
1178 // tests in ICP sequences.
1181
1183
1184 // Interprocedural constant propagation now that basic cleanup has occurred
1185 // and prior to optimizing globals.
1186 // FIXME: This position in the pipeline hasn't been carefully considered in
1187 // years, it should be re-analyzed.
1188 MPM.addPass(
1189 IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/!isLTOPreLink(Phase))));
1190
1191 // Attach metadata to indirect call sites indicating the set of functions
1192 // they may target at run-time. This should follow IPSCCP.
1194
1195 // Optimize globals to try and fold them into constants.
1196 MPM.addPass(GlobalOptPass());
1197
1198 // Create a small function pass pipeline to cleanup after all the global
1199 // optimizations.
1200 FunctionPassManager GlobalCleanupPM;
1201 // FIXME: Should this instead by a run of SROA?
1202 GlobalCleanupPM.addPass(PromotePass());
1203 GlobalCleanupPM.addPass(InstCombinePass());
1204 invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
1205 GlobalCleanupPM.addPass(
1206 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1207 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM),
1208 PTO.EagerlyInvalidateAnalyses));
1209
1210 // We already asserted this happens in non-FullLTOPostLink earlier.
1211 const bool IsPreLink = Phase != ThinOrFullLTOPhase::ThinLTOPostLink;
1212 // Enable contextual profiling instrumentation.
1213 const bool IsCtxProfGen =
1215 const bool IsPGOPreLink = !IsCtxProfGen && PGOOpt && IsPreLink;
1216 const bool IsPGOInstrGen =
1217 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRInstr;
1218 const bool IsPGOInstrUse =
1219 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRUse;
1220 const bool IsMemprofUse = IsPGOPreLink && !PGOOpt->MemoryProfile.empty();
1221 // We don't want to mix pgo ctx gen and pgo gen; we also don't currently
1222 // enable ctx profiling from the frontend.
1224 "Enabling both instrumented PGO and contextual instrumentation is not "
1225 "supported.");
1226 const bool IsCtxProfUse =
1228
1229 assert(
1231 "--instrument-cold-function-only-path is provided but "
1232 "--pgo-instrument-cold-function-only is not enabled");
1233 const bool IsColdFuncOnlyInstrGen = PGOInstrumentColdFunctionOnly &&
1234 IsPGOPreLink &&
1236
1237 if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen ||
1238 IsCtxProfUse || IsColdFuncOnlyInstrGen)
1239 addPreInlinerPasses(MPM, Level, Phase);
1240
1241 // Add all the requested passes for instrumentation PGO, if requested.
1242 if (IsPGOInstrGen || IsPGOInstrUse) {
1243 addPGOInstrPasses(MPM, Level,
1244 /*RunProfileGen=*/IsPGOInstrGen,
1245 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate,
1246 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
1247 } else if (IsCtxProfGen || IsCtxProfUse) {
1249 // In pre-link, we just want the instrumented IR. We use the contextual
1250 // profile in the post-thinlink phase.
1251 // The instrumentation will be removed in post-thinlink after IPO.
1252 // FIXME(mtrofin): move AssignGUIDPass if there is agreement to use this
1253 // mechanism for GUIDs.
1254 MPM.addPass(AssignGUIDPass());
1255 if (IsCtxProfUse) {
1256 MPM.addPass(PGOCtxProfFlatteningPass(/*IsPreThinlink=*/true));
1257 return MPM;
1258 }
1259 // Block further inlining in the instrumented ctxprof case. This avoids
1260 // confusingly collecting profiles for the same GUID corresponding to
1261 // different variants of the function. We could do like PGO and identify
1262 // functions by a (GUID, Hash) tuple, but since the ctxprof "use" waits for
1263 // thinlto to happen before performing any further optimizations, it's
1264 // unnecessary to collect profiles for non-prevailing copies.
1266 addPostPGOLoopRotation(MPM, Level);
1268 } else if (IsColdFuncOnlyInstrGen) {
1269 addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true, /* IsCS */ false,
1270 /* AtomicCounterUpdate */ false,
1272 /* ProfileRemappingFile */ "");
1273 }
1274
1275 if (IsPGOInstrGen || IsPGOInstrUse || IsCtxProfGen)
1276 MPM.addPass(PGOIndirectCallPromotion(false, false));
1277
1278 if (IsPGOPreLink && PGOOpt->CSAction == PGOOptions::CSIRInstr)
1279 MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile,
1281
1282 if (IsMemprofUse)
1283 MPM.addPass(MemProfUsePass(PGOOpt->MemoryProfile, FS));
1284
1285 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRUse ||
1286 PGOOpt->Action == PGOOptions::SampleUse))
1287 MPM.addPass(PGOForceFunctionAttrsPass(PGOOpt->ColdOptType));
1288
1289 MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/true));
1290
1293 else
1294 MPM.addPass(buildInlinerPipeline(Level, Phase));
1295
1296 // Remove any dead arguments exposed by cleanups, constant folding globals,
1297 // and argument promotion.
1299
1302
1304 MPM.addPass(CoroCleanupPass());
1305
1306 // Optimize globals now that functions are fully simplified.
1307 MPM.addPass(GlobalOptPass());
1308 MPM.addPass(GlobalDCEPass());
1309
1310 return MPM;
1311}
1312
1313/// TODO: Should LTO cause any differences to this set of passes?
1314void PassBuilder::addVectorPasses(OptimizationLevel Level,
1316 ThinOrFullLTOPhase LTOPhase) {
1317 const bool IsFullLTO = LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink;
1318
1321
1322 // Drop dereferenceable assumes after vectorization, as they are no longer
1323 // needed and can inhibit further optimization.
1324 if (!isLTOPreLink(LTOPhase))
1325 FPM.addPass(DropUnnecessaryAssumesPass(/*DropDereferenceable=*/true));
1326
1328 if (IsFullLTO) {
1329 // The vectorizer may have significantly shortened a loop body; unroll
1330 // again. Unroll small loops to hide loop backedge latency and saturate any
1331 // parallel execution resources of an out-of-order processor. We also then
1332 // need to clean up redundancies and loop invariant code.
1333 // FIXME: It would be really good to use a loop-integrated instruction
1334 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1335 // across the loop nests.
1336 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1339 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1341 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1344 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1345 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1346 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1347 // NOTE: we are very late in the pipeline, and we don't have any LICM
1348 // or SimplifyCFG passes scheduled after us, that would cleanup
1349 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1351 }
1352
1353 if (!IsFullLTO) {
1354 // Eliminate loads by forwarding stores from the previous iteration to loads
1355 // of the current iteration.
1357 }
1358 // Cleanup after the loop optimization passes.
1359 FPM.addPass(InstCombinePass());
1360
1361 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1362 ExtraFunctionPassManager<ShouldRunExtraVectorPasses> ExtraPasses;
1363 // At higher optimization levels, try to clean up any runtime overlap and
1364 // alignment checks inserted by the vectorizer. We want to track correlated
1365 // runtime checks for two inner loops in the same outer loop, fold any
1366 // common computations, hoist loop-invariant aspects out of any outer loop,
1367 // and unswitch the runtime checks if possible. Once hoisted, we may have
1368 // dead (or speculatable) control flows or more combining opportunities.
1369 ExtraPasses.addPass(EarlyCSEPass());
1370 ExtraPasses.addPass(CorrelatedValuePropagationPass());
1371 ExtraPasses.addPass(InstCombinePass());
1372 LoopPassManager LPM;
1373 LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1374 /*AllowSpeculation=*/true));
1375 LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
1377 ExtraPasses.addPass(
1378 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true));
1379 ExtraPasses.addPass(
1380 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1381 ExtraPasses.addPass(InstCombinePass());
1382 FPM.addPass(std::move(ExtraPasses));
1383 }
1384
1385 // Now that we've formed fast to execute loop structures, we do further
1386 // optimizations. These are run afterward as they might block doing complex
1387 // analyses and transforms such as what are needed for loop vectorization.
1388
1389 // Cleanup after loop vectorization, etc. Simplification passes like CVP and
1390 // GVN, loop transforms, and others have already run, so it's now better to
1391 // convert to more optimized IR using more aggressive simplify CFG options.
1392 // The extra sinking transform can create larger basic blocks, so do this
1393 // before SLP vectorization.
1394 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
1395 .forwardSwitchCondToPhi(true)
1396 .convertSwitchRangeToICmp(true)
1397 .convertSwitchToArithmetic(true)
1398 .convertSwitchToLookupTable(true)
1399 .needCanonicalLoops(false)
1400 .hoistCommonInsts(true)
1401 .sinkCommonInsts(true)));
1402
1403 if (IsFullLTO) {
1404 FPM.addPass(SCCPPass());
1405 FPM.addPass(InstCombinePass());
1406 FPM.addPass(BDCEPass());
1407 }
1408
1409 // Optimize parallel scalar instruction chains into SIMD instructions.
1410 if (PTO.SLPVectorization) {
1411 FPM.addPass(SLPVectorizerPass());
1412 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1413 FPM.addPass(EarlyCSEPass());
1414 }
1415 }
1416 // Enhance/cleanup vector code.
1417 FPM.addPass(VectorCombinePass());
1418
1419 if (!IsFullLTO) {
1420 FPM.addPass(InstCombinePass());
1421 // Unroll small loops to hide loop backedge latency and saturate any
1422 // parallel execution resources of an out-of-order processor. We also then
1423 // need to clean up redundancies and loop invariant code.
1424 // FIXME: It would be really good to use a loop-integrated instruction
1425 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1426 // across the loop nests.
1427 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1428 if (EnableUnrollAndJam && PTO.LoopUnrolling) {
1430 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1431 }
1432 FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
1433 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1434 PTO.ForgetAllSCEVInLoopUnroll)));
1435 FPM.addPass(WarnMissedTransformationsPass());
1436 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1437 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1438 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1439 // NOTE: we are very late in the pipeline, and we don't have any LICM
1440 // or SimplifyCFG passes scheduled after us, that would cleanup
1441 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1442 FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
1443 }
1444
1445 FPM.addPass(InferAlignmentPass());
1446 FPM.addPass(InstCombinePass());
1447
1448 // This is needed for two reasons:
1449 // 1. It works around problems that instcombine introduces, such as sinking
1450 // expensive FP divides into loops containing multiplications using the
1451 // divide result.
1452 // 2. It helps to clean up some loop-invariant code created by the loop
1453 // unroll pass when IsFullLTO=false.
1455 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1456 /*AllowSpeculation=*/true),
1457 /*UseMemorySSA=*/true));
1458
1459 // Now that we've vectorized and unrolled loops, we may have more refined
1460 // alignment information, try to re-derive it here.
1461 FPM.addPass(AlignmentFromAssumptionsPass());
1462}
1463
1466 ThinOrFullLTOPhase LTOPhase) {
1467 const bool LTOPreLink = isLTOPreLink(LTOPhase);
1469
1470 // Run partial inlining pass to partially inline functions that have
1471 // large bodies.
1474
1475 // Remove avail extern fns and globals definitions since we aren't compiling
1476 // an object file for later LTO. For LTO we want to preserve these so they
1477 // are eligible for inlining at link-time. Note if they are unreferenced they
1478 // will be removed by GlobalDCE later, so this only impacts referenced
1479 // available externally globals. Eventually they will be suppressed during
1480 // codegen, but eliminating here enables more opportunity for GlobalDCE as it
1481 // may make globals referenced by available external functions dead and saves
1482 // running remaining passes on the eliminated functions. These should be
1483 // preserved during prelinking for link-time inlining decisions.
1484 if (!LTOPreLink)
1486
1487 // Do RPO function attribute inference across the module to forward-propagate
1488 // attributes where applicable.
1489 // FIXME: Is this really an optimization rather than a canonicalization?
1491
1492 // Do a post inline PGO instrumentation and use pass. This is a context
1493 // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
1494 // cross-module inline has not been done yet. The context sensitive
1495 // instrumentation is after all the inlines are done.
1496 if (!LTOPreLink && PGOOpt) {
1497 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1498 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
1499 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1500 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile);
1501 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1502 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
1503 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1504 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
1505 }
1506
1507 // Re-compute GlobalsAA here prior to function passes. This is particularly
1508 // useful as the above will have inlined, DCE'ed, and function-attr
1509 // propagated everything. We should at this point have a reasonably minimal
1510 // and richly annotated call graph. By computing aliasing and mod/ref
1511 // information for all local globals here, the late loop passes and notably
1512 // the vectorizer will be able to use them to help recognize vectorizable
1513 // memory operations.
1516
1517 invokeOptimizerEarlyEPCallbacks(MPM, Level, LTOPhase);
1518
1519 FunctionPassManager OptimizePM;
1520
1521 // Only drop unnecessary assumes post-inline and post-link, as otherwise
1522 // additional uses of the affected value may be introduced through inlining
1523 // and CSE.
1524 if (!isLTOPreLink(LTOPhase))
1525 OptimizePM.addPass(DropUnnecessaryAssumesPass());
1526
1527 // Scheduling LoopVersioningLICM when inlining is over, because after that
1528 // we may see more accurate aliasing. Reason to run this late is that too
1529 // early versioning may prevent further inlining due to increase of code
1530 // size. Other optimizations which runs later might get benefit of no-alias
1531 // assumption in clone loop.
1533 OptimizePM.addPass(
1535 // LoopVersioningLICM pass might increase new LICM opportunities.
1537 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1538 /*AllowSpeculation=*/true),
1539 /*USeMemorySSA=*/true));
1540 }
1541
1542 OptimizePM.addPass(Float2IntPass());
1544
1545 if (EnableMatrix) {
1546 OptimizePM.addPass(LowerMatrixIntrinsicsPass());
1547 OptimizePM.addPass(EarlyCSEPass());
1548 }
1549
1550 // CHR pass should only be applied with the profile information.
1551 // The check is to check the profile summary information in CHR.
1552 if (EnableCHR && Level == OptimizationLevel::O3)
1553 OptimizePM.addPass(ControlHeightReductionPass());
1554
1555 // FIXME: We need to run some loop optimizations to re-rotate loops after
1556 // simplifycfg and others undo their rotation.
1557
1558 // Optimize the loop execution. These passes operate on entire loop nests
1559 // rather than on each loop in an inside-out manner, and so they are actually
1560 // function passes.
1561
1562 invokeVectorizerStartEPCallbacks(OptimizePM, Level);
1563
1564 LoopPassManager LPM;
1565 // First rotate loops that may have been un-rotated by prior passes.
1566 // Disable header duplication at -Oz.
1567 LPM.addPass(LoopRotatePass(/*EnableLoopHeaderDuplication=*/true, LTOPreLink,
1568 /*CheckExitCount=*/true));
1569 // Some loops may have become dead by now. Try to delete them.
1570 // FIXME: see discussion in https://reviews.llvm.org/D112851,
1571 // this may need to be revisited once we run GVN before loop deletion
1572 // in the simplification pipeline.
1573 LPM.addPass(LoopDeletionPass());
1574
1575 if (PTO.LoopInterchange)
1576 LPM.addPass(LoopInterchangePass());
1577
1578 OptimizePM.addPass(
1579 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/false));
1580
1581 // FIXME: This may not be the right place in the pipeline.
1582 // We need to have the data to support the right place.
1583 if (PTO.LoopFusion)
1584 OptimizePM.addPass(LoopFusePass());
1585
1586 // Distribute loops to allow partial vectorization. I.e. isolate dependences
1587 // into separate loop that would otherwise inhibit vectorization. This is
1588 // currently only performed for loops marked with the metadata
1589 // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1590 OptimizePM.addPass(LoopDistributePass());
1591
1592 // Populates the VFABI attribute with the scalar-to-vector mappings
1593 // from the TargetLibraryInfo.
1594 OptimizePM.addPass(InjectTLIMappings());
1595
1596 addVectorPasses(Level, OptimizePM, LTOPhase);
1597
1598 invokeVectorizerEndEPCallbacks(OptimizePM, Level);
1599
1600 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
1601 // canonicalization pass that enables other optimizations. As a result,
1602 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1603 // result too early.
1604 OptimizePM.addPass(LoopSinkPass());
1605
1606 // And finally clean up LCSSA form before generating code.
1607 OptimizePM.addPass(InstSimplifyPass());
1608
1609 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
1610 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
1611 // flattening of blocks.
1612 OptimizePM.addPass(DivRemPairsPass());
1613
1614 // Merge adjacent icmps into memcmp, then expand memcmp to loads/compares.
1615 // TODO: move this furter up so that it can be optimized by GVN, etc.
1616 if (EnableMergeICmps)
1617 OptimizePM.addPass(MergeICmpsPass());
1618 OptimizePM.addPass(ExpandMemCmpPass());
1619
1620 // Try to annotate calls that were created during optimization.
1621 OptimizePM.addPass(
1622 TailCallElimPass(/*UpdateFunctionEntryCount=*/isInstrumentedPGOUse()));
1623
1624 // LoopSink (and other loop passes since the last simplifyCFG) might have
1625 // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
1626 OptimizePM.addPass(
1628 .convertSwitchRangeToICmp(true)
1629 .convertSwitchToArithmetic(true)
1630 .speculateUnpredictables(true)
1631 .hoistLoadsStoresWithCondFaulting(true)));
1632
1633 // Add the core optimizing pipeline.
1634 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
1635 PTO.EagerlyInvalidateAnalyses));
1636
1637 // AllocToken transforms heap allocation calls; this needs to run late after
1638 // other allocation call transformations (such as those in InstCombine).
1639 if (!LTOPreLink)
1640 MPM.addPass(AllocTokenPass());
1641
1642 invokeOptimizerLastEPCallbacks(MPM, Level, LTOPhase);
1643
1644 // Split out cold code. Splitting is done late to avoid hiding context from
1645 // other optimizations and inadvertently regressing performance. The tradeoff
1646 // is that this has a higher code size cost than splitting early.
1647 if (EnableHotColdSplit && !LTOPreLink)
1649
1650 // Search the code for similar regions of code. If enough similar regions can
1651 // be found where extracting the regions into their own function will decrease
1652 // the size of the program, we extract the regions, a deduplicate the
1653 // structurally similar regions.
1654 if (EnableIROutliner)
1655 MPM.addPass(IROutlinerPass());
1656
1657 // Now we need to do some global optimization transforms.
1658 // FIXME: It would seem like these should come first in the optimization
1659 // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
1660 // ordering here.
1661 MPM.addPass(GlobalDCEPass());
1663
1664 // Merge functions if requested. It has a better chance to merge functions
1665 // after ConstantMerge folded jump tables.
1666 if (PTO.MergeFunctions)
1668
1669 if (PTO.CallGraphProfile && !LTOPreLink)
1670 MPM.addPass(CGProfilePass(isLTOPostLink(LTOPhase)));
1671
1672 // RelLookupTableConverterPass runs later in LTO post-link pipeline.
1673 if (!LTOPreLink)
1675
1676 // Add devirtualization pass only when LTO is not enabled, as otherwise
1677 // the pass is already enabled in the LTO pipeline.
1678 if (PTO.DevirtualizeSpeculatively && LTOPhase == ThinOrFullLTOPhase::None) {
1679 // TODO: explore a better pipeline configuration that can improve
1680 // compilation time overhead.
1682 /*ExportSummary*/ nullptr,
1683 /*ImportSummary*/ nullptr,
1684 /*DevirtSpeculatively*/ PTO.DevirtualizeSpeculatively));
1686 // Given that the devirtualization creates more opportunities for inlining,
1687 // we run the Inliner again here to maximize the optimization gain we
1688 // get from devirtualization.
1689 // Also, we can't run devirtualization before inlining because the
1690 // devirtualization depends on the passes optimizing/eliminating vtable GVs
1691 // and those passes are only effective after inlining.
1692 if (EnableModuleInliner) {
1696 } else {
1699 /* MandatoryFirst */ true,
1701 }
1702 }
1703 return MPM;
1704}
1705
1709 if (Level == OptimizationLevel::O0)
1710 return buildO0DefaultPipeline(Level, Phase);
1711
1713
1714 // Currently this pipeline is only invoked in an LTO pre link pass or when we
1715 // are not running LTO. If that changes the below checks may need updating.
1717
1718 // If we are invoking this in non-LTO mode, remove any MemProf related
1719 // attributes and metadata, as we don't know whether we are linking with
1720 // a library containing the necessary interfaces.
1723
1724 // Convert @llvm.global.annotations to !annotation metadata.
1726
1727 // Force any function attributes we want the rest of the pipeline to observe.
1729
1730 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1732
1733 // Apply module pipeline start EP callback.
1735
1736 // Add the core simplification pipeline.
1738
1739 // Now add the optimization pipeline.
1741
1742 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1743 PGOOpt->Action == PGOOptions::SampleUse)
1745
1746 // Emit annotation remarks.
1748
1749 if (isLTOPreLink(Phase))
1750 addRequiredLTOPreLinkPasses(MPM);
1751 return MPM;
1752}
1753
1756 bool EmitSummary) {
1758 if (ThinLTO)
1760 else
1762 MPM.addPass(EmbedBitcodePass(ThinLTO, EmitSummary));
1763
1764 // Perform any cleanups to the IR that aren't suitable for per TU compilation,
1765 // like removing CFI/WPD related instructions. Note, we reuse
1766 // DropTypeTestsPass to clean up type tests rather than duplicate that logic
1767 // in FatLtoCleanup.
1768 MPM.addPass(FatLtoCleanup());
1769
1770 // If we're doing FatLTO w/ CFI enabled, we don't want the type tests in the
1771 // object code, only in the bitcode section, so drop it before we run
1772 // module optimization and generate machine code. If llvm.type.test() isn't in
1773 // the IR, this won't do anything.
1775
1776 // Use the ThinLTO post-link pipeline with sample profiling
1777 if (ThinLTO && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1778 MPM.addPass(buildThinLTODefaultPipeline(Level, /*ImportSummary=*/nullptr));
1779 else {
1780 // ModuleSimplification does not run the coroutine passes for
1781 // ThinLTOPreLink, so we need the coroutine passes to run for ThinLTO
1782 // builds, otherwise they will miscompile.
1783 if (ThinLTO) {
1784 // TODO: replace w/ buildCoroWrapper() when it takes phase and level into
1785 // consideration.
1786 CGSCCPassManager CGPM;
1790 MPM.addPass(CoroCleanupPass());
1791 }
1792
1793 // otherwise, just use module optimization
1794 MPM.addPass(
1796 // Emit annotation remarks.
1798 }
1799 return MPM;
1800}
1801
1804 if (Level == OptimizationLevel::O0)
1806
1808
1809 // Convert @llvm.global.annotations to !annotation metadata.
1811
1812 // Force any function attributes we want the rest of the pipeline to observe.
1814
1815 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1817
1818 // Apply module pipeline start EP callback.
1820
1821 // If we are planning to perform ThinLTO later, we don't bloat the code with
1822 // unrolling/vectorization/... now. Just simplify the module as much as we
1823 // can.
1826 // In pre-link, for ctx prof use, we stop here with an instrumented IR. We let
1827 // thinlto use the contextual info to perform imports; then use the contextual
1828 // profile in the post-thinlink phase.
1829 if (!UseCtxProfile.empty()) {
1830 addRequiredLTOPreLinkPasses(MPM);
1831 return MPM;
1832 }
1833
1834 // Run partial inlining pass to partially inline functions that have
1835 // large bodies.
1836 // FIXME: It isn't clear whether this is really the right place to run this
1837 // in ThinLTO. Because there is another canonicalization and simplification
1838 // phase that will run after the thin link, running this here ends up with
1839 // less information than will be available later and it may grow functions in
1840 // ways that aren't beneficial.
1843
1844 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1845 PGOOpt->Action == PGOOptions::SampleUse)
1847
1848 // Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual
1849 // optimization is going to be done in PostLink stage, but clang can't add
1850 // callbacks there in case of in-process ThinLTO called by linker.
1855
1856 // Emit annotation remarks.
1858
1859 addRequiredLTOPreLinkPasses(MPM);
1860
1861 return MPM;
1862}
1863
1865 OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
1867
1868 // If we are invoking this without a summary index noting that we are linking
1869 // with a library containing the necessary APIs, remove any MemProf related
1870 // attributes and metadata.
1871 if (!ImportSummary || !ImportSummary->withSupportsHotColdNew())
1873
1874 if (ImportSummary) {
1875 // For ThinLTO we must apply the context disambiguation decisions early, to
1876 // ensure we can correctly match the callsites to summary data.
1879 ImportSummary, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1880
1881 // These passes import type identifier resolutions for whole-program
1882 // devirtualization and CFI. They must run early because other passes may
1883 // disturb the specific instruction patterns that these passes look for,
1884 // creating dependencies on resolutions that may not appear in the summary.
1885 //
1886 // For example, GVN may transform the pattern assume(type.test) appearing in
1887 // two basic blocks into assume(phi(type.test, type.test)), which would
1888 // transform a dependency on a WPD resolution into a dependency on a type
1889 // identifier resolution for CFI.
1890 //
1891 // Also, WPD has access to more precise information than ICP and can
1892 // devirtualize more effectively, so it should operate on the IR first.
1893 //
1894 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1895 // metadata and intrinsics.
1896 MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
1897 MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
1898 }
1899
1900 if (Level == OptimizationLevel::O0) {
1901 // Run a second time to clean up any type tests left behind by WPD for use
1902 // in ICP.
1905
1906 // AllocToken transforms heap allocation calls; this needs to run late after
1907 // other allocation call transformations (such as those in InstCombine).
1908 MPM.addPass(AllocTokenPass());
1909
1910 // Drop available_externally and unreferenced globals. This is necessary
1911 // with ThinLTO in order to avoid leaving undefined references to dead
1912 // globals in the object file.
1914 MPM.addPass(GlobalDCEPass());
1915 return MPM;
1916 }
1917 if (!UseCtxProfile.empty()) {
1918 MPM.addPass(
1920 } else {
1921 // Add the core simplification pipeline.
1924 }
1925 // Now add the optimization pipeline.
1928
1929 // Emit annotation remarks.
1931
1932 return MPM;
1933}
1934
1937 // FIXME: We should use a customized pre-link pipeline!
1938 return buildPerModuleDefaultPipeline(Level,
1940}
1941
1944 ModuleSummaryIndex *ExportSummary) {
1946
1948
1949 // If we are invoking this without a summary index noting that we are linking
1950 // with a library containing the necessary APIs, remove any MemProf related
1951 // attributes and metadata.
1952 if (!ExportSummary || !ExportSummary->withSupportsHotColdNew())
1954
1955 // Create a function that performs CFI checks for cross-DSO calls with targets
1956 // in the current module.
1957 MPM.addPass(CrossDSOCFIPass());
1958
1959 if (Level == OptimizationLevel::O0) {
1960 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1961 // metadata and intrinsics.
1962 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1963 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1964 // Run a second time to clean up any type tests left behind by WPD for use
1965 // in ICP.
1967
1969
1970 // AllocToken transforms heap allocation calls; this needs to run late after
1971 // other allocation call transformations (such as those in InstCombine).
1972 MPM.addPass(AllocTokenPass());
1973
1975
1976 // Emit annotation remarks.
1978
1979 return MPM;
1980 }
1981
1982 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
1983 // Load sample profile before running the LTO optimization pipeline.
1984 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1985 PGOOpt->ProfileRemappingFile,
1987 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1988 // RequireAnalysisPass for PSI before subsequent non-module passes.
1990 }
1991
1992 // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
1994
1995 // Remove unused virtual tables to improve the quality of code generated by
1996 // whole-program devirtualization and bitset lowering.
1997 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
1998
1999 // Do basic inference of function attributes from known properties of system
2000 // libraries and other oracles.
2002
2003 if (Level.getSpeedupLevel() > 1) {
2005 CallSiteSplittingPass(), PTO.EagerlyInvalidateAnalyses));
2006
2007 // Indirect call promotion. This should promote all the targets that are
2008 // left by the earlier promotion pass that promotes intra-module targets.
2009 // This two-step promotion is to save the compile time. For LTO, it should
2010 // produce the same result as if we only do promotion here.
2012 true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
2013
2014 // Promoting by-reference arguments to by-value exposes more constants to
2015 // IPSCCP.
2016 CGSCCPassManager CGPM;
2019 CGPM.addPass(
2022
2023 // Propagate constants at call sites into the functions they call. This
2024 // opens opportunities for globalopt (and inlining) by substituting function
2025 // pointers passed as arguments to direct uses of functions.
2026 MPM.addPass(IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/true)));
2027
2028 // Attach metadata to indirect call sites indicating the set of functions
2029 // they may target at run-time. This should follow IPSCCP.
2031 }
2032
2033 // Do RPO function attribute inference across the module to forward-propagate
2034 // attributes where applicable.
2035 // FIXME: Is this really an optimization rather than a canonicalization?
2037
2038 // Use in-range annotations on GEP indices to split globals where beneficial.
2039 MPM.addPass(GlobalSplitPass());
2040
2041 // Run whole program optimization of virtual call when the list of callees
2042 // is fixed.
2043 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
2044
2046 // Stop here at -O1.
2047 if (Level == OptimizationLevel::O1) {
2048 // The LowerTypeTestsPass needs to run to lower type metadata and the
2049 // type.test intrinsics. The pass does nothing if CFI is disabled.
2050 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
2051 // Run a second time to clean up any type tests left behind by WPD for use
2052 // in ICP (which is performed earlier than this in the regular LTO
2053 // pipeline).
2055
2057
2058 // AllocToken transforms heap allocation calls; this needs to run late after
2059 // other allocation call transformations (such as those in InstCombine).
2060 MPM.addPass(AllocTokenPass());
2061
2063
2064 // Emit annotation remarks.
2066
2067 return MPM;
2068 }
2069
2070 // TODO: Skip to match buildCoroWrapper.
2071 MPM.addPass(CoroEarlyPass());
2072
2073 // Optimize globals to try and fold them into constants.
2074 MPM.addPass(GlobalOptPass());
2075
2076 // Promote any localized globals to SSA registers.
2078
2079 // Linking modules together can lead to duplicate global constant, only
2080 // keep one copy of each constant.
2082
2083 // Remove unused arguments from functions.
2085
2086 // Reduce the code after globalopt and ipsccp. Both can open up significant
2087 // simplification opportunities, and both can propagate functions through
2088 // function pointers. When this happens, we often have to resolve varargs
2089 // calls, etc, so let instcombine do this.
2090 FunctionPassManager PeepholeFPM;
2091 PeepholeFPM.addPass(InstCombinePass());
2092 if (Level.getSpeedupLevel() > 1)
2093 PeepholeFPM.addPass(AggressiveInstCombinePass());
2094 invokePeepholeEPCallbacks(PeepholeFPM, Level);
2095
2096 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM),
2097 PTO.EagerlyInvalidateAnalyses));
2098
2099 // Lower variadic functions for supported targets prior to inlining.
2101
2102 // Note: historically, the PruneEH pass was run first to deduce nounwind and
2103 // generally clean up exception handling overhead. It isn't clear this is
2104 // valuable as the inliner doesn't currently care whether it is inlining an
2105 // invoke or a call.
2106 // Run the inliner now.
2107 if (EnableModuleInliner) {
2111 } else {
2114 /* MandatoryFirst */ true,
2117 }
2118
2119 // Perform context disambiguation after inlining, since that would reduce the
2120 // amount of additional cloning required to distinguish the allocation
2121 // contexts.
2124 /*Summary=*/nullptr,
2125 PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
2126
2127 // Optimize globals again after we ran the inliner.
2128 MPM.addPass(GlobalOptPass());
2129
2130 // Run the OpenMPOpt pass again after global optimizations.
2132
2133 // Garbage collect dead functions.
2134 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2135
2136 // If we didn't decide to inline a function, check to see if we can
2137 // transform it to pass arguments by value instead of by reference.
2138 CGSCCPassManager CGPM;
2143
2145 // The IPO Passes may leave cruft around. Clean up after them.
2146 FPM.addPass(InstCombinePass());
2147 invokePeepholeEPCallbacks(FPM, Level);
2148
2151
2153
2154 // Do a post inline PGO instrumentation and use pass. This is a context
2155 // sensitive PGO pass.
2156 if (PGOOpt) {
2157 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
2158 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
2159 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
2160 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile);
2161 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
2162 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
2163 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
2164 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
2165 }
2166
2167 // Break up allocas
2169
2170 // LTO provides additional opportunities for tailcall elimination due to
2171 // link-time inlining, and visibility of nocapture attribute.
2172 FPM.addPass(
2173 TailCallElimPass(/*UpdateFunctionEntryCount=*/isInstrumentedPGOUse()));
2174
2175 // Run a few AA driver optimizations here and now to cleanup the code.
2176 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM),
2177 PTO.EagerlyInvalidateAnalyses));
2178
2179 MPM.addPass(
2181
2182 // Require the GlobalsAA analysis for the module so we can query it within
2183 // MainFPM.
2186 // Invalidate AAManager so it can be recreated and pick up the newly
2187 // available GlobalsAA.
2188 MPM.addPass(
2190 }
2191
2192 FunctionPassManager MainFPM;
2194 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
2195 /*AllowSpeculation=*/true),
2196 /*USeMemorySSA=*/true));
2197
2198 if (RunNewGVN)
2199 MainFPM.addPass(NewGVNPass());
2200 else
2201 MainFPM.addPass(GVNPass());
2202
2203 // Remove dead memcpy()'s.
2204 MainFPM.addPass(MemCpyOptPass());
2205
2206 // Nuke dead stores.
2207 MainFPM.addPass(DSEPass());
2208 MainFPM.addPass(MoveAutoInitPass());
2210
2211 invokeVectorizerStartEPCallbacks(MainFPM, Level);
2212
2213 LoopPassManager LPM;
2214 if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
2215 LPM.addPass(LoopFlattenPass());
2216 LPM.addPass(IndVarSimplifyPass());
2217 LPM.addPass(LoopDeletionPass());
2218 // FIXME: Add loop interchange.
2219
2220 // Unroll small loops and perform peeling.
2221 LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
2222 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
2223 PTO.ForgetAllSCEVInLoopUnroll));
2224 // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
2225 // *All* loop passes must preserve it, in order to be able to use it.
2226 MainFPM.addPass(
2227 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/false));
2228
2229 MainFPM.addPass(LoopDistributePass());
2230
2231 addVectorPasses(Level, MainFPM, ThinOrFullLTOPhase::FullLTOPostLink);
2232
2233 invokeVectorizerEndEPCallbacks(MainFPM, Level);
2234
2235 // Run the OpenMPOpt CGSCC pass again late.
2238
2239 invokePeepholeEPCallbacks(MainFPM, Level);
2240 MainFPM.addPass(JumpThreadingPass());
2241 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM),
2242 PTO.EagerlyInvalidateAnalyses));
2243
2244 // Lower type metadata and the type.test intrinsic. This pass supports
2245 // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
2246 // to be run at link time if CFI is enabled. This pass does nothing if
2247 // CFI is disabled.
2248 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
2249 // Run a second time to clean up any type tests left behind by WPD for use
2250 // in ICP (which is performed earlier than this in the regular LTO pipeline).
2252
2253 // Enable splitting late in the FullLTO post-link pipeline.
2256
2257 // Add late LTO optimization passes.
2258 FunctionPassManager LateFPM;
2259
2260 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
2261 // canonicalization pass that enables other optimizations. As a result,
2262 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
2263 // result too early.
2264 LateFPM.addPass(LoopSinkPass());
2265
2266 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
2267 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
2268 // flattening of blocks.
2269 LateFPM.addPass(DivRemPairsPass());
2270
2271 // Delete basic blocks, which optimization passes may have killed.
2273 .convertSwitchRangeToICmp(true)
2274 .convertSwitchToArithmetic(true)
2275 .hoistCommonInsts(true)
2276 .speculateUnpredictables(true)));
2277 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(LateFPM)));
2278
2279 // Drop bodies of available eternally objects to improve GlobalDCE.
2281
2282 // Now that we have optimized the program, discard unreachable functions.
2283 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2284
2285 if (PTO.MergeFunctions)
2287
2289
2290 if (PTO.CallGraphProfile)
2291 MPM.addPass(CGProfilePass(/*InLTOPostLink=*/true));
2292
2293 MPM.addPass(CoroCleanupPass());
2294
2295 // AllocToken transforms heap allocation calls; this needs to run late after
2296 // other allocation call transformations (such as those in InstCombine).
2297 MPM.addPass(AllocTokenPass());
2298
2300
2301 // Emit annotation remarks.
2303
2304 return MPM;
2305}
2306
2310 assert(Level == OptimizationLevel::O0 &&
2311 "buildO0DefaultPipeline should only be used with O0");
2312
2314
2315 // Perform pseudo probe instrumentation in O0 mode. This is for the
2316 // consistency between different build modes. For example, a LTO build can be
2317 // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
2318 // the postlink will require pseudo probe instrumentation in the prelink.
2319 if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
2321
2322 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
2323 PGOOpt->Action == PGOOptions::IRUse))
2325 MPM,
2326 /*RunProfileGen=*/(PGOOpt->Action == PGOOptions::IRInstr),
2327 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate, PGOOpt->ProfileFile,
2328 PGOOpt->ProfileRemappingFile);
2329
2330 // Instrument function entry and exit before all inlining.
2332 EntryExitInstrumenterPass(/*PostInlining=*/false)));
2333
2335
2336 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
2338
2339 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
2340 // Explicitly disable sample loader inlining and use flattened profile in O0
2341 // pipeline.
2342 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
2343 PGOOpt->ProfileRemappingFile,
2345 /*DisableSampleProfileInlining=*/true,
2346 /*UseFlattenedProfile=*/true));
2347 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
2348 // RequireAnalysisPass for PSI before subsequent non-module passes.
2350 }
2351
2353
2354 // Build a minimal pipeline based on the semantics required by LLVM,
2355 // which is just that always inlining occurs. Further, disable generating
2356 // lifetime intrinsics to avoid enabling further optimizations during
2357 // code generation.
2359 /*InsertLifetimeIntrinsics=*/false));
2360
2361 if (PTO.MergeFunctions)
2363
2364 if (EnableMatrix)
2365 MPM.addPass(
2367
2368 if (!CGSCCOptimizerLateEPCallbacks.empty()) {
2369 CGSCCPassManager CGPM;
2371 if (!CGPM.isEmpty())
2373 }
2374 if (!LateLoopOptimizationsEPCallbacks.empty()) {
2375 LoopPassManager LPM;
2377 if (!LPM.isEmpty()) {
2379 createFunctionToLoopPassAdaptor(std::move(LPM))));
2380 }
2381 }
2382 if (!LoopOptimizerEndEPCallbacks.empty()) {
2383 LoopPassManager LPM;
2385 if (!LPM.isEmpty()) {
2387 createFunctionToLoopPassAdaptor(std::move(LPM))));
2388 }
2389 }
2390 if (!ScalarOptimizerLateEPCallbacks.empty()) {
2393 if (!FPM.isEmpty())
2394 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2395 }
2396
2398
2399 if (!VectorizerStartEPCallbacks.empty()) {
2402 if (!FPM.isEmpty())
2403 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2404 }
2405
2406 if (!VectorizerEndEPCallbacks.empty()) {
2409 if (!FPM.isEmpty())
2410 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2411 }
2412
2414
2415 // AllocToken transforms heap allocation calls; this needs to run late after
2416 // other allocation call transformations (such as those in InstCombine).
2417 if (!isLTOPreLink(Phase))
2418 MPM.addPass(AllocTokenPass());
2419
2421
2422 if (isLTOPreLink(Phase))
2423 addRequiredLTOPreLinkPasses(MPM);
2424
2425 // Emit annotation remarks.
2427
2428 return MPM;
2429}
2430
2432 AAManager AA;
2433
2434 // The order in which these are registered determines their priority when
2435 // being queried.
2436
2437 // Add any target-specific alias analyses that should be run early.
2438 if (TM)
2439 TM->registerEarlyDefaultAliasAnalyses(AA);
2440
2441 // First we register the basic alias analysis that provides the majority of
2442 // per-function local AA logic. This is a stateless, on-demand local set of
2443 // AA techniques.
2444 AA.registerFunctionAnalysis<BasicAA>();
2445
2446 // Next we query fast, specialized alias analyses that wrap IR-embedded
2447 // information about aliasing.
2448 AA.registerFunctionAnalysis<ScopedNoAliasAA>();
2449 AA.registerFunctionAnalysis<TypeBasedAA>();
2450
2451 // Add support for querying global aliasing information when available.
2452 // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
2453 // analysis, all that the `AAManager` can do is query for any *cached*
2454 // results from `GlobalsAA` through a readonly proxy.
2456 AA.registerModuleAnalysis<GlobalsAA>();
2457
2458 // Add target-specific alias analyses.
2459 if (TM)
2460 TM->registerDefaultAliasAnalyses(AA);
2461
2462 return AA;
2463}
2464
2465bool PassBuilder::isInstrumentedPGOUse() const {
2466 return (PGOOpt && PGOOpt->Action == PGOOptions::IRUse) ||
2467 !UseCtxProfile.empty();
2468}
aarch64 falkor hwpf fix Falkor HW Prefetch Fix Late Phase
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AggressiveInstCombiner - Combine expression patterns to form expressions with fewer,...
Provides passes to inlining "always_inline" functions.
This is the interface for LLVM's primary stateless and local alias analysis.
This file provides the interface for LLVM's Call Graph Profile pass.
This header provides classes for managing passes over SCCs of the call graph.
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file provides the interface for a simple, fast CSE pass.
This file provides a pass which clones the current module and runs the provided pass pipeline on the ...
This file provides a pass manager that only runs its passes if the provided marker analysis has been ...
Super simple passes to force specific function attrs from the commandline into the IR for debugging p...
Provides passes for computing function attributes based on interprocedural analyses.
This file provides the interface for LLVM's Global Value Numbering pass which eliminates fully redund...
This is the interface for a simple mod/ref and alias analysis over globals.
AcceleratorCodeSelection - Identify all functions reachable from a kernel, removing those that are un...
This header defines various interfaces for pass management in LLVM.
Interfaces for passes which infer implicit function attributes from the name and signature of functio...
This file provides the primary interface to the instcombine pass.
Defines passes for running instruction simplification across chunks of IR.
This file provides the interface for LLVM's PGO Instrumentation lowering pass.
See the comments on JumpThreadingPass.
static LVOptions Options
Definition LVOptions.cpp:25
This file implements the Loop Fusion pass.
This header defines the LoopLoadEliminationPass object.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
The header file for the LowerConstantIntrinsics pass as used by the new pass manager.
The header file for the LowerExpectIntrinsic pass as used by the new pass manager.
This pass performs merges of loads and stores on both sides of a.
This file provides the interface for LLVM's Global Value Numbering pass.
This header enumerates the LLVM-provided high-level optimization levels.
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
Define option tunables for PGO.
static void addAnnotationRemarksPass(ModulePassManager &MPM)
static CoroConditionalWrapper buildCoroWrapper(ThinOrFullLTOPhase Phase)
static bool isLTOPreLink(ThinOrFullLTOPhase Phase)
static bool isLTOPostLink(ThinOrFullLTOPhase Phase)
This file implements relative lookup table converter that converts lookup tables to relative lookup t...
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
This file provides the interface for the pseudo probe implementation for AutoFDO.
This file provides the interface for the sampled PGO loader pass.
This is the interface for a metadata-based scoped no-alias analysis.
This file provides the interface for the pass responsible for both simplifying and canonicalizing the...
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
This is the interface for a metadata-based TBAA.
Defines the virtual file system interface vfs::FileSystem.
A manager for alias analyses.
A module pass that rewrites heap allocations to use token-enabled allocation functions based on vario...
Definition AllocToken.h:36
Inlines functions marked as "always_inline".
Argument promotion pass.
Assign a GUID to functions as metadata.
Analysis pass providing a never-invalidated alias analysis result.
Simple pass that canonicalizes aliases.
A pass that merges duplicate global constants into a single constant.
This class implements a trivial dead store elimination.
Eliminate dead arguments (and return values) from functions.
A pass that transforms external global definitions into declarations.
Pass embeds a copy of the module optimized with the provided pass pipeline into a global variable.
A pass manager to run a set of extra loop passes if the MarkerTy analysis is present.
The core GVN pass object.
Definition GVN.h:128
Pass to remove unused function declarations.
Definition GlobalDCE.h:38
Optimize globals that never have their address taken.
Definition GlobalOpt.h:25
Pass to perform split of global variables.
Definition GlobalSplit.h:26
Analysis pass providing a never-invalidated alias analysis result.
Pass to outline cold regions.
Pass to perform interprocedural constant propagation.
Definition SCCP.h:48
Pass to outline similar regions.
Definition IROutliner.h:468
Run instruction simplification across each instruction in the function.
Instrumentation based profiling lowering pass.
This pass performs 'jump threading', which looks at blocks that have multiple predecessors and multip...
Performs Loop Invariant Code Motion Pass.
Definition LICM.h:66
Loop unroll pass that only does full loop unrolling and peeling.
Performs Loop Idiom Recognize Pass.
Performs Loop Inst Simplify Pass.
A simple loop rotation transformation.
Performs basic CFG simplifications to assist other loop passes.
A pass that does profile-guided sinking of instructions into loops.
Definition LoopSink.h:33
A simple loop rotation transformation.
Loop unroll pass that will support both full and partial unrolling.
Strips MemProf attributes and metadata.
Merge identical functions.
The module inliner pass for the new pass manager.
Module pass, wrapping the inliner pass.
Definition Inliner.h:65
void addModulePass(T Pass)
Add a module pass that runs before the CGSCC passes.
Definition Inliner.h:81
Class to hold module path string table and global value map, and encapsulate methods for operating on...
Simple pass that provides a name to every anonymous globals.
Additional 'norecurse' attribute deduction during postlink LTO phase.
OpenMP optimizations pass.
Definition OpenMPOpt.h:42
static LLVM_ABI const OptimizationLevel O3
Optimize for fast execution as much as possible.
static LLVM_ABI const OptimizationLevel O0
Disable as many optimizations as possible.
static LLVM_ABI const OptimizationLevel O2
Optimize for fast execution as much as possible without triggering significant incremental compile ti...
static LLVM_ABI const OptimizationLevel O1
Optimize quickly without destroying debuggability.
The indirect function call promotion pass.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The profile annotation (profile-instr-use) pass for IR based PGO.
The profile size based optimization pass for memory intrinsics.
Pass to remove unused function declarations.
LLVM_ABI void invokeFullLinkTimeOptimizationLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
LLVM_ABI ModuleInlinerWrapperPass buildInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining as well as the inlining-driven cleanups.
LLVM_ABI void invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
LLVM_ABI void invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI AAManager buildDefaultAAPipeline()
Build the default AAManager with the default alias analysis pipeline registered.
LLVM_ABI void invokeCGSCCOptimizerLateEPCallbacks(CGSCCPassManager &CGPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, ThinLTO-targeting default optimization pipeline to a pass manager.
LLVM_ABI void addPGOInstrPassesForO0(ModulePassManager &MPM, bool RunProfileGen, bool IsCS, bool AtomicCounterUpdate, std::string ProfileFile, std::string ProfileRemappingFile)
Add PGOInstrumenation passes for O0 only.
LLVM_ABI void invokeScalarOptimizerLateEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase=ThinOrFullLTOPhase::None)
Build a per-module default optimization pipeline.
LLVM_ABI void invokePipelineStartEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
LLVM_ABI void invokeVectorizerEndEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildO0DefaultPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase=ThinOrFullLTOPhase::None)
Build an O0 pipeline with the minimal semantically required passes.
LLVM_ABI FunctionPassManager buildFunctionSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM function canonicalization and simplification pipeline.
LLVM_ABI void invokePeepholeEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI void invokePipelineEarlySimplificationEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
LLVM_ABI void invokeLoopOptimizerEndEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildLTODefaultPipeline(OptimizationLevel Level, ModuleSummaryIndex *ExportSummary)
Build an LTO default optimization pipeline to a pass manager.
LLVM_ABI ModulePassManager buildModuleInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining with module inliner pass.
LLVM_ABI ModulePassManager buildThinLTODefaultPipeline(OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary)
Build a ThinLTO default optimization pipeline to a pass manager.
LLVM_ABI void invokeLateLoopOptimizationsEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
LLVM_ABI void invokeFullLinkTimeOptimizationEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO, bool EmitSummary)
Build a fat object default optimization pipeline.
LLVM_ABI ModulePassManager buildModuleSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM module canonicalization and simplification pipeline.
LLVM_ABI ModulePassManager buildModuleOptimizationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase LTOPhase)
Construct the core LLVM module optimization pipeline.
LLVM_ABI void invokeOptimizerLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
LLVM_ABI ModulePassManager buildLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, LTO-targeting default optimization pipeline to a pass manager.
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t<!std::is_same_v< PassT, PassManager > > addPass(PassT &&Pass)
bool isEmpty() const
Returns if the pass manager contains any passes.
unsigned LicmMssaNoAccForPromotionCap
Tuning option to disable promotion to scalars in LICM with MemorySSA, if the number of access is too ...
Definition PassBuilder.h:78
bool SLPVectorization
Tuning option to enable/disable slp loop vectorization, set based on opt level.
Definition PassBuilder.h:56
int InlinerThreshold
Tuning option to override the default inliner threshold.
Definition PassBuilder.h:92
bool LoopFusion
Tuning option to enable/disable loop fusion. Its default value is false.
Definition PassBuilder.h:66
bool CallGraphProfile
Tuning option to enable/disable call graph profile.
Definition PassBuilder.h:82
bool MergeFunctions
Tuning option to enable/disable function merging.
Definition PassBuilder.h:89
bool ForgetAllSCEVInLoopUnroll
Tuning option to forget all SCEV loops in LoopUnroll.
Definition PassBuilder.h:70
unsigned LicmMssaOptCap
Tuning option to cap the number of calls to retrive clobbering accesses in MemorySSA,...
Definition PassBuilder.h:74
bool LoopInterleaving
Tuning option to set loop interleaving on/off, set based on opt level.
Definition PassBuilder.h:48
LLVM_ABI PipelineTuningOptions()
Constructor sets pipeline tuning defaults based on cl::opts.
bool LoopUnrolling
Tuning option to enable/disable loop unrolling. Its default value is true.
Definition PassBuilder.h:59
bool LoopInterchange
Tuning option to enable/disable loop interchange.
Definition PassBuilder.h:63
bool LoopVectorization
Tuning option to enable/disable loop vectorization, set based on opt level.
Definition PassBuilder.h:52
Reassociate commutative expressions.
Definition Reassociate.h:74
A pass to do RPO deduction and propagation of function attributes.
This pass performs function-level constant propagation and merging.
Definition SCCP.h:30
The sample profiler data loader pass.
Analysis pass providing a never-invalidated alias analysis result.
This pass transforms loops that contain branches or switches on loop- invariant conditions to have mu...
A pass to simplify and canonicalize the CFG of a function.
Definition SimplifyCFG.h:30
Analysis pass providing a never-invalidated alias analysis result.
Optimize scalar/vector interactions in IR using target cost models.
Interfaces for registering analysis passes, producing common pass manager configurations,...
Abstract Attribute helper functions.
Definition Attributor.h:165
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
@ All
Drop only llvm.assumes using type test value.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI cl::opt< bool > EnableKnowledgeRetention
static cl::opt< bool > RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden, cl::desc("Run the NewGVN pass"))
static cl::opt< bool > DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden, cl::desc("Disable pre-instrumentation inliner"))
static cl::opt< bool > EnableJumpTableToSwitch("enable-jump-table-to-switch", cl::desc("Enable JumpTableToSwitch pass (default = off)"))
static cl::opt< bool > PerformMandatoryInliningsFirst("mandatory-inlining-first", cl::init(false), cl::Hidden, cl::desc("Perform mandatory inlinings module-wide, before performing " "inlining"))
static cl::opt< bool > RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Run Partial inlining pass"))
static cl::opt< bool > EnableGVNSink("enable-gvn-sink", cl::desc("Enable the GVN sinking pass (default = off)"))
static cl::opt< bool > EnableModuleInliner("enable-module-inliner", cl::init(false), cl::Hidden, cl::desc("Enable module inliner"))
static cl::opt< bool > EnableEagerlyInvalidateAnalyses("eagerly-invalidate-analyses", cl::init(true), cl::Hidden, cl::desc("Eagerly invalidate more analyses in default pipelines"))
static cl::opt< bool > EnableMatrix("enable-matrix", cl::init(false), cl::Hidden, cl::desc("Enable lowering of the matrix intrinsics"))
ModuleToFunctionPassAdaptor createModuleToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
cl::opt< std::string > UseCtxProfile("use-ctx-profile", cl::init(""), cl::Hidden, cl::desc("Use the specified contextual profile file"))
static cl::opt< bool > EnableSampledInstr("enable-sampled-instrumentation", cl::init(false), cl::Hidden, cl::desc("Enable profile instrumentation sampling (default = off)"))
static cl::opt< bool > EnableLoopFlatten("enable-loop-flatten", cl::init(false), cl::Hidden, cl::desc("Enable the LoopFlatten Pass"))
static cl::opt< InliningAdvisorMode > UseInlineAdvisor("enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden, cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"), cl::values(clEnumValN(InliningAdvisorMode::Default, "default", "Heuristics-based inliner version"), clEnumValN(InliningAdvisorMode::Development, "development", "Use development mode (runtime-loadable model)"), clEnumValN(InliningAdvisorMode::Release, "release", "Use release mode (AOT-compiled model)")))
PassManager< LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &, CGSCCUpdateResult & > CGSCCPassManager
The CGSCC pass manager.
static cl::opt< bool > EnableUnrollAndJam("enable-unroll-and-jam", cl::init(false), cl::Hidden, cl::desc("Enable Unroll And Jam Pass"))
@ CGSCC_LIGHT
@ MODULE_LIGHT
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition Pass.h:77
@ FullLTOPreLink
Full LTO prelink phase.
Definition Pass.h:85
@ ThinLTOPostLink
ThinLTO postlink (backend compile) phase.
Definition Pass.h:83
@ None
No LTO/ThinLTO behavior needed.
Definition Pass.h:79
@ FullLTOPostLink
Full LTO postlink (backend compile) phase.
Definition Pass.h:87
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
Definition Pass.h:81
PassManager< Loop, LoopAnalysisManager, LoopStandardAnalysisResults &, LPMUpdater & > LoopPassManager
The Loop pass manager.
static cl::opt< bool > EnableConstraintElimination("enable-constraint-elimination", cl::init(true), cl::Hidden, cl::desc("Enable pass to eliminate conditions based on linear constraints"))
ModuleToPostOrderCGSCCPassAdaptor createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT &&Pass)
A function to deduce a function pass type and wrap it in the templated adaptor.
static cl::opt< bool > EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true), cl::Hidden, cl::desc("Enable inline deferral during PGO"))
Flag to enable inline deferral during PGO.
FunctionToLoopPassAdaptor createFunctionToLoopPassAdaptor(LoopPassT &&Pass, bool UseMemorySSA=false)
A function to deduce a loop pass type and wrap it in the templated adaptor.
CGSCCToFunctionPassAdaptor createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false, bool NoRerun=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
cl::opt< bool > ForgetSCEVInLoopUnroll
PassManager< Module > ModulePassManager
Convenience typedef for a pass manager over modules.
static cl::opt< bool > EnablePostPGOLoopRotation("enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden, cl::desc("Run the loop rotation transformation after PGO instrumentation"))
LLVM_ABI bool AreStatisticsEnabled()
Check if statistics are enabled.
static cl::opt< std::string > InstrumentColdFuncOnlyPath("instrument-cold-function-only-path", cl::init(""), cl::desc("File path for cold function only instrumentation(requires use " "with --pgo-instrument-cold-function-only)"), cl::Hidden)
static cl::opt< bool > EnableGlobalAnalyses("enable-global-analyses", cl::init(true), cl::Hidden, cl::desc("Enable inter-procedural analyses"))
static cl::opt< bool > EnableDFAJumpThreading("enable-dfa-jump-thread", cl::desc("Enable DFA jump threading"), cl::init(false), cl::Hidden)
static cl::opt< bool > FlattenedProfileUsed("flattened-profile-used", cl::init(false), cl::Hidden, cl::desc("Indicate the sample profile being used is flattened, i.e., " "no inline hierarchy exists in the profile"))
static cl::opt< AttributorRunOption > AttributorRun("attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE), cl::desc("Enable the attributor inter-procedural deduction pass"), cl::values(clEnumValN(AttributorRunOption::FULL, "full", "enable all full attributor runs"), clEnumValN(AttributorRunOption::LIGHT, "light", "enable all attributor-light runs"), clEnumValN(AttributorRunOption::MODULE, "module", "enable module-wide attributor runs"), clEnumValN(AttributorRunOption::MODULE_LIGHT, "module-light", "enable module-wide attributor-light runs"), clEnumValN(AttributorRunOption::CGSCC, "cgscc", "enable call graph SCC attributor runs"), clEnumValN(AttributorRunOption::CGSCC_LIGHT, "cgscc-light", "enable call graph SCC attributor-light runs"), clEnumValN(AttributorRunOption::NONE, "none", "disable attributor runs")))
static cl::opt< bool > ExtraVectorizerPasses("extra-vectorizer-passes", cl::init(false), cl::Hidden, cl::desc("Run cleanup optimization passes after vectorization"))
static cl::opt< bool > EnableHotColdSplit("hot-cold-split", cl::desc("Enable hot-cold splitting pass"))
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
PassManager< Function > FunctionPassManager
Convenience typedef for a pass manager over functions.
LLVM_ABI InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
cl::opt< bool > PGOInstrumentColdFunctionOnly
static cl::opt< bool > EnableLoopInterchange("enable-loopinterchange", cl::init(false), cl::Hidden, cl::desc("Enable the LoopInterchange Pass"))
static cl::opt< bool > EnableCHR("enable-chr", cl::init(true), cl::Hidden, cl::desc("Enable control height reduction optimization (CHR)"))
static cl::opt< bool > EnableMergeFunctions("enable-merge-functions", cl::init(false), cl::Hidden, cl::desc("Enable function merging as part of the optimization pipeline"))
static cl::opt< bool > EnableDevirtualizeSpeculatively("enable-devirtualize-speculatively", cl::desc("Enable speculative devirtualization optimization"), cl::init(false))
static cl::opt< bool > EnableGVNHoist("enable-gvn-hoist", cl::desc("Enable the GVN hoisting pass (default = off)"))
cl::opt< unsigned > SetLicmMssaNoAccForPromotionCap
LLVM_ABI InlineParams getInlineParamsFromOptLevel(unsigned OptLevel)
Generate the parameters to tune the inline cost analysis based on command line options.
static cl::opt< bool > EnableIROutliner("ir-outliner", cl::init(false), cl::Hidden, cl::desc("Enable ir outliner pass"))
static cl::opt< int > PreInlineThreshold("preinline-threshold", cl::Hidden, cl::init(75), cl::desc("Control the amount of inlining in pre-instrumentation inliner " "(default = 75)"))
static cl::opt< bool > UseLoopVersioningLICM("enable-loop-versioning-licm", cl::init(false), cl::Hidden, cl::desc("Enable the experimental Loop Versioning LICM pass"))
cl::opt< unsigned > MaxDevirtIterations("max-devirt-iterations", cl::ReallyHidden, cl::init(4))
cl::opt< unsigned > SetLicmMssaOptCap
static cl::opt< bool > EnableMergeICmps("enable-mergeicmps", cl::init(true), cl::Hidden, cl::desc("Enable MergeICmps pass in the optimization pipeline"))
A DCE pass that assumes instructions are dead until proven otherwise.
Definition ADCE.h:31
Pass to convert @llvm.global.annotations to !annotation metadata.
This pass attempts to minimize the number of assume without loosing any information.
A more lightweight version of the Attributor which only runs attribute inference but no simplificatio...
A more lightweight version of the Attributor which only runs attribute inference but no simplificatio...
Hoist/decompose integer division and remainder instructions to enable CFG improvements and better cod...
Definition DivRemPairs.h:23
A simple and fast domtree-based CSE pass.
Definition EarlyCSE.h:31
Pass which forces specific function attributes into the IR, primarily as a debugging tool.
Statistics pass for the FunctionPropertiesAnalysis results.
A simple and fast domtree-based GVN pass to hoist common expressions from sibling branches.
Definition GVN.h:431
Uses an "inverted" value numbering to decide the similarity of expressions and sinks similar expressi...
Definition GVN.h:438
A set of parameters to control various transforms performed by IPSCCP pass.
Definition SCCP.h:35
A pass which infers function attributes from the names and signatures of function declarations in a m...
Provides context on when an inline advisor is constructed in the pipeline (e.g., link phase,...
Thresholds to tune inline cost analysis.
Definition InlineCost.h:207
std::optional< int > OptSizeHintThreshold
Threshold to use for callees with inline hint, when the caller is optimized for size.
Definition InlineCost.h:216
std::optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
Definition InlineCost.h:228
int DefaultThreshold
The default threshold to start with for a callee.
Definition InlineCost.h:209
std::optional< bool > EnableDeferral
Indicate whether we should allow inline deferral.
Definition InlineCost.h:241
std::optional< int > HintThreshold
Threshold to use for callees with inline hint.
Definition InlineCost.h:212
Options for the frontend instrumentation based profiling pass.
A no-op pass template which simply forces a specific analysis result to be invalidated.
Pass to forward loads in a loop around the backedge to subsequent iterations.
A set of parameters used to control various transforms performed by the LoopUnroll pass.
The LoopVectorize Pass.
Computes function attributes in post-order over the call graph.
A utility pass template to force an analysis result to be available.