LLVM 17.0.0git
PassBuilderPipelines.cpp
Go to the documentation of this file.
1//===- Construction of pass pipelines -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// This file provides the implementation of the PassBuilder based on our
11/// static pass registry as well as related functionality. It also provides
12/// helpers to aid in analyzing, debugging, and testing passes and pass
13/// pipelines.
14///
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/Statistic.h"
27#include "llvm/IR/PassManager.h"
134
135using namespace llvm;
136
138 "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
139 cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
140 cl::values(clEnumValN(InliningAdvisorMode::Default, "default",
141 "Heuristics-based inliner version"),
142 clEnumValN(InliningAdvisorMode::Development, "development",
143 "Use development mode (runtime-loadable model)"),
144 clEnumValN(InliningAdvisorMode::Release, "release",
145 "Use release mode (AOT-compiled model)")));
146
148 "enable-npm-synthetic-counts", cl::Hidden,
149 cl::desc("Run synthetic function entry count generation "
150 "pass"));
151
152/// Flag to enable inline deferral during PGO.
153static cl::opt<bool>
154 EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
156 cl::desc("Enable inline deferral during PGO"));
157
159 cl::desc("Enable memory profiler"));
160
161static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
162 cl::init(false), cl::Hidden,
163 cl::desc("Enable module inliner"));
164
166 "mandatory-inlining-first", cl::init(true), cl::Hidden,
167 cl::desc("Perform mandatory inlinings module-wide, before performing "
168 "inlining"));
169
171 "eagerly-invalidate-analyses", cl::init(true), cl::Hidden,
172 cl::desc("Eagerly invalidate more analyses in default pipelines"));
173
175 "enable-merge-functions", cl::init(false), cl::Hidden,
176 cl::desc("Enable function merging as part of the optimization pipeline"));
177
179 "enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden,
180 cl::desc("Run the loop rotation transformation after PGO instrumentation"));
181
183 "enable-global-analyses", cl::init(true), cl::Hidden,
184 cl::desc("Enable inter-procedural analyses"));
185
186static cl::opt<bool>
187 RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden,
188 cl::desc("Run Partial inlinining pass"));
189
191 "extra-vectorizer-passes", cl::init(false), cl::Hidden,
192 cl::desc("Run cleanup optimization passes after vectorization"));
193
194static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
195 cl::desc("Run the NewGVN pass"));
196
198 "enable-loopinterchange", cl::init(false), cl::Hidden,
199 cl::desc("Enable the experimental LoopInterchange Pass"));
200
201static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam",
202 cl::init(false), cl::Hidden,
203 cl::desc("Enable Unroll And Jam Pass"));
204
205static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
207 cl::desc("Enable the LoopFlatten Pass"));
208
209static cl::opt<bool>
210 EnableDFAJumpThreading("enable-dfa-jump-thread",
211 cl::desc("Enable DFA jump threading"),
212 cl::init(false), cl::Hidden);
213
214static cl::opt<bool>
215 EnableHotColdSplit("hot-cold-split",
216 cl::desc("Enable hot-cold splitting pass"));
217
218static cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(false),
220 cl::desc("Enable ir outliner pass"));
221
222static cl::opt<bool>
223 DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden,
224 cl::desc("Disable pre-instrumentation inliner"));
225
227 "preinline-threshold", cl::Hidden, cl::init(75),
228 cl::desc("Control the amount of inlining in pre-instrumentation inliner "
229 "(default = 75)"));
230
231static cl::opt<bool>
232 EnableGVNHoist("enable-gvn-hoist",
233 cl::desc("Enable the GVN hoisting pass (default = off)"));
234
235static cl::opt<bool>
236 EnableGVNSink("enable-gvn-sink",
237 cl::desc("Enable the GVN sinking pass (default = off)"));
238
239// This option is used in simplifying testing SampleFDO optimizations for
240// profile loading.
241static cl::opt<bool>
242 EnableCHR("enable-chr", cl::init(true), cl::Hidden,
243 cl::desc("Enable control height reduction optimization (CHR)"));
244
246 "flattened-profile-used", cl::init(false), cl::Hidden,
247 cl::desc("Indicate the sample profile being used is flattened, i.e., "
248 "no inline hierachy exists in the profile"));
249
251 "enable-order-file-instrumentation", cl::init(false), cl::Hidden,
252 cl::desc("Enable order file instrumentation (default = off)"));
253
254static cl::opt<bool>
255 EnableMatrix("enable-matrix", cl::init(false), cl::Hidden,
256 cl::desc("Enable lowering of the matrix intrinsics"));
257
259 "enable-constraint-elimination", cl::init(true), cl::Hidden,
260 cl::desc(
261 "Enable pass to eliminate conditions based on linear constraints"));
262
264 "attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE),
265 cl::desc("Enable the attributor inter-procedural deduction pass"),
266 cl::values(clEnumValN(AttributorRunOption::ALL, "all",
267 "enable all attributor runs"),
268 clEnumValN(AttributorRunOption::MODULE, "module",
269 "enable module-wide attributor runs"),
270 clEnumValN(AttributorRunOption::CGSCC, "cgscc",
271 "enable call graph SCC attributor runs"),
272 clEnumValN(AttributorRunOption::NONE, "none",
273 "disable attributor runs")));
274
276 "enable-memprof-context-disambiguation", cl::init(false), cl::Hidden,
277 cl::ZeroOrMore, cl::desc("Enable MemProf context disambiguation"));
278
280 LoopInterleaving = true;
281 LoopVectorization = true;
282 SLPVectorization = false;
283 LoopUnrolling = true;
287 CallGraphProfile = true;
289 InlinerThreshold = -1;
291}
292
293namespace llvm {
296} // namespace llvm
297
298void PassBuilder::invokePeepholeEPCallbacks(FunctionPassManager &FPM,
299 OptimizationLevel Level) {
300 for (auto &C : PeepholeEPCallbacks)
301 C(FPM, Level);
302}
303
304// Helper to add AnnotationRemarksPass.
307}
308
309// Helper to check if the current compilation phase is preparing for LTO
313}
314
315// TODO: Investigate the cost/benefit of tail call elimination on debugging.
317PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
319
321
324
325 // Form SSA out of local memory accesses after breaking apart aggregates into
326 // scalars.
328
329 // Catch trivial redundancies
330 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
331
332 // Hoisting of scalars and load expressions.
333 FPM.addPass(
334 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
336
338
339 invokePeepholeEPCallbacks(FPM, Level);
340
341 FPM.addPass(
342 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
343
344 // Form canonically associated expression trees, and simplify the trees using
345 // basic mathematical properties. For example, this will form (nearly)
346 // minimal multiplication trees.
348
349 // Add the primary loop simplification pipeline.
350 // FIXME: Currently this is split into two loop pass pipelines because we run
351 // some function passes in between them. These can and should be removed
352 // and/or replaced by scheduling the loop pass equivalents in the correct
353 // positions. But those equivalent passes aren't powerful enough yet.
354 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
355 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
356 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
357 // `LoopInstSimplify`.
358 LoopPassManager LPM1, LPM2;
359
360 // Simplify the loop body. We do this initially to clean up after other loop
361 // passes run, either when iterating on a loop or on inner loops with
362 // implications on the outer loop.
365
366 // Try to remove as much code from the loop header as possible,
367 // to reduce amount of IR that will have to be duplicated. However,
368 // do not perform speculative hoisting the first time as LICM
369 // will destroy metadata that may not need to be destroyed if run
370 // after loop rotation.
371 // TODO: Investigate promotion cap for O1.
373 /*AllowSpeculation=*/false));
374
375 LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true,
377 // TODO: Investigate promotion cap for O1.
379 /*AllowSpeculation=*/true));
382 LPM1.addPass(LoopFlattenPass());
383
386
387 for (auto &C : LateLoopOptimizationsEPCallbacks)
388 C(LPM2, Level);
389
391
394
395 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
396 // because it changes IR to makes profile annotation in back compile
397 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
398 // attributes so we need to make sure and allow the full unroll pass to pay
399 // attention to it.
400 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
401 PGOOpt->Action != PGOOptions::SampleUse)
402 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
403 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
405
406 for (auto &C : LoopOptimizerEndEPCallbacks)
407 C(LPM2, Level);
408
409 // We provide the opt remark emitter pass for LICM to use. We only need to do
410 // this once as it is immutable.
411 FPM.addPass(
413 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
414 /*UseMemorySSA=*/true,
415 /*UseBlockFrequencyInfo=*/true));
416 FPM.addPass(
417 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
419 // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
420 // *All* loop passes must preserve it, in order to be able to use it.
421 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
422 /*UseMemorySSA=*/false,
423 /*UseBlockFrequencyInfo=*/false));
424
425 // Delete small array after loop unroll.
427
428 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
429 FPM.addPass(MemCpyOptPass());
430
431 // Sparse conditional constant propagation.
432 // FIXME: It isn't clear why we do this *after* loop passes rather than
433 // before...
434 FPM.addPass(SCCPPass());
435
436 // Delete dead bit computations (instcombine runs after to fold away the dead
437 // computations, and then ADCE will run later to exploit any new DCE
438 // opportunities that creates).
439 FPM.addPass(BDCEPass());
440
441 // Run instcombine after redundancy and dead bit elimination to exploit
442 // opportunities opened up by them.
444 invokePeepholeEPCallbacks(FPM, Level);
445
446 FPM.addPass(CoroElidePass());
447
448 for (auto &C : ScalarOptimizerLateEPCallbacks)
449 C(FPM, Level);
450
451 // Finally, do an expensive DCE pass to catch all the dead code exposed by
452 // the simplifications and basic cleanup after all the simplifications.
453 // TODO: Investigate if this is too expensive.
454 FPM.addPass(ADCEPass());
455 FPM.addPass(
456 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
458 invokePeepholeEPCallbacks(FPM, Level);
459
460 return FPM;
461}
462
466 assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
467
468 // The O1 pipeline has a separate pipeline creation function to simplify
469 // construction readability.
470 if (Level.getSpeedupLevel() == 1)
471 return buildO1FunctionSimplificationPipeline(Level, Phase);
472
474
477
478 // Form SSA out of local memory accesses after breaking apart aggregates into
479 // scalars.
481
482 // Catch trivial redundancies
483 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
486
487 // Hoisting of scalars and load expressions.
488 if (EnableGVNHoist)
489 FPM.addPass(GVNHoistPass());
490
491 // Global value numbering based sinking.
492 if (EnableGVNSink) {
493 FPM.addPass(GVNSinkPass());
494 FPM.addPass(
495 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
496 }
497
498 // Speculative execution if the target has divergent branches; otherwise nop.
499 FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
500
501 // Optimize based on known information about branches, and cleanup afterward.
504
505 FPM.addPass(
506 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
508 if (Level == OptimizationLevel::O3)
510
513
514 if (!Level.isOptimizingForSize())
516
517 invokePeepholeEPCallbacks(FPM, Level);
518
519 // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
520 // using the size value profile. Don't perform this when optimizing for size.
521 if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
522 !Level.isOptimizingForSize())
524
526 FPM.addPass(
527 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
528
529 // Form canonically associated expression trees, and simplify the trees using
530 // basic mathematical properties. For example, this will form (nearly)
531 // minimal multiplication trees.
533
534 // Add the primary loop simplification pipeline.
535 // FIXME: Currently this is split into two loop pass pipelines because we run
536 // some function passes in between them. These can and should be removed
537 // and/or replaced by scheduling the loop pass equivalents in the correct
538 // positions. But those equivalent passes aren't powerful enough yet.
539 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
540 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
541 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
542 // `LoopInstSimplify`.
543 LoopPassManager LPM1, LPM2;
544
545 // Simplify the loop body. We do this initially to clean up after other loop
546 // passes run, either when iterating on a loop or on inner loops with
547 // implications on the outer loop.
550
551 // Try to remove as much code from the loop header as possible,
552 // to reduce amount of IR that will have to be duplicated. However,
553 // do not perform speculative hoisting the first time as LICM
554 // will destroy metadata that may not need to be destroyed if run
555 // after loop rotation.
556 // TODO: Investigate promotion cap for O1.
558 /*AllowSpeculation=*/false));
559
560 // Disable header duplication in loop rotation at -Oz.
561 LPM1.addPass(
563 // TODO: Investigate promotion cap for O1.
565 /*AllowSpeculation=*/true));
566 LPM1.addPass(
567 SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3));
569 LPM1.addPass(LoopFlattenPass());
570
573
574 for (auto &C : LateLoopOptimizationsEPCallbacks)
575 C(LPM2, Level);
576
578
581
582 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
583 // because it changes IR to makes profile annotation in back compile
584 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
585 // attributes so we need to make sure and allow the full unroll pass to pay
586 // attention to it.
587 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
588 PGOOpt->Action != PGOOptions::SampleUse)
589 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
590 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
592
593 for (auto &C : LoopOptimizerEndEPCallbacks)
594 C(LPM2, Level);
595
596 // We provide the opt remark emitter pass for LICM to use. We only need to do
597 // this once as it is immutable.
598 FPM.addPass(
600 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
601 /*UseMemorySSA=*/true,
602 /*UseBlockFrequencyInfo=*/true));
603 FPM.addPass(
604 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
606 // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
607 // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
608 // *All* loop passes must preserve it, in order to be able to use it.
609 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
610 /*UseMemorySSA=*/false,
611 /*UseBlockFrequencyInfo=*/false));
612
613 // Delete small array after loop unroll.
615
616 // Try vectorization/scalarization transforms that are both improvements
617 // themselves and can allow further folds with GVN and InstCombine.
618 FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
619
620 // Eliminate redundancies.
622 if (RunNewGVN)
623 FPM.addPass(NewGVNPass());
624 else
625 FPM.addPass(GVNPass());
626
627 // Sparse conditional constant propagation.
628 // FIXME: It isn't clear why we do this *after* loop passes rather than
629 // before...
630 FPM.addPass(SCCPPass());
631
632 // Delete dead bit computations (instcombine runs after to fold away the dead
633 // computations, and then ADCE will run later to exploit any new DCE
634 // opportunities that creates).
635 FPM.addPass(BDCEPass());
636
637 // Run instcombine after redundancy and dead bit elimination to exploit
638 // opportunities opened up by them.
640 invokePeepholeEPCallbacks(FPM, Level);
641
642 // Re-consider control flow based optimizations after redundancy elimination,
643 // redo DCE, etc.
644 if (EnableDFAJumpThreading && Level.getSizeLevel() == 0)
646
649
650 // Finally, do an expensive DCE pass to catch all the dead code exposed by
651 // the simplifications and basic cleanup after all the simplifications.
652 // TODO: Investigate if this is too expensive.
653 FPM.addPass(ADCEPass());
654
655 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
656 FPM.addPass(MemCpyOptPass());
657
658 FPM.addPass(DSEPass());
661 /*AllowSpeculation=*/true),
662 /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
663
664 FPM.addPass(CoroElidePass());
665
666 for (auto &C : ScalarOptimizerLateEPCallbacks)
667 C(FPM, Level);
668
670 .convertSwitchRangeToICmp(true)
671 .hoistCommonInsts(true)
672 .sinkCommonInsts(true)));
674 invokePeepholeEPCallbacks(FPM, Level);
675
676 return FPM;
677}
678
679void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
682}
683
684void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
685 OptimizationLevel Level, bool RunProfileGen,
686 bool IsCS, std::string ProfileFile,
687 std::string ProfileRemappingFile,
688 ThinOrFullLTOPhase LTOPhase,
690 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
691 if (!IsCS && !DisablePreInliner) {
692 InlineParams IP;
693
695
696 // FIXME: The hint threshold has the same value used by the regular inliner
697 // when not optimzing for size. This should probably be lowered after
698 // performance testing.
699 // FIXME: this comment is cargo culted from the old pass manager, revisit).
700 IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
702 IP, /* MandatoryFirst */ true,
704 CGSCCPassManager &CGPipeline = MIWP.getPM();
705
708 FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
709 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
710 true))); // Merge & remove basic blocks.
711 FPM.addPass(InstCombinePass()); // Combine silly sequences.
712 invokePeepholeEPCallbacks(FPM, Level);
713
714 CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
715 std::move(FPM), PTO.EagerlyInvalidateAnalyses));
716
717 MPM.addPass(std::move(MIWP));
718
719 // Delete anything that is now dead to make sure that we don't instrument
720 // dead code. Instrumentation can end up keeping dead code around and
721 // dramatically increase code size.
723 }
724
725 if (!RunProfileGen) {
726 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
727 MPM.addPass(
728 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
729 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
730 // RequireAnalysisPass for PSI before subsequent non-module passes.
732 return;
733 }
734
735 // Perform PGO instrumentation.
737
739 // Disable header duplication in loop rotation at -Oz.
743 /*UseMemorySSA=*/false,
744 /*UseBlockFrequencyInfo=*/false),
746 }
747
748 // Add the profile lowering pass.
750 if (!ProfileFile.empty())
751 Options.InstrProfileOutput = ProfileFile;
752 // Do counter promotion at Level greater than O0.
753 Options.DoCounterPromotion = true;
754 Options.UseBFIInPromotion = IsCS;
756}
757
759 ModulePassManager &MPM, bool RunProfileGen, bool IsCS,
760 std::string ProfileFile, std::string ProfileRemappingFile,
762 if (!RunProfileGen) {
763 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
764 MPM.addPass(
765 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
766 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
767 // RequireAnalysisPass for PSI before subsequent non-module passes.
769 return;
770 }
771
772 // Perform PGO instrumentation.
774 // Add the profile lowering pass.
776 if (!ProfileFile.empty())
777 Options.InstrProfileOutput = ProfileFile;
778 // Do not do counter promotion at O0.
779 Options.DoCounterPromotion = false;
780 Options.UseBFIInPromotion = IsCS;
782}
783
785 return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
786}
787
791 InlineParams IP;
792 if (PTO.InlinerThreshold == -1)
793 IP = getInlineParamsFromOptLevel(Level);
794 else
796 // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to
797 // disable hot callsite inline (as much as possible [1]) because it makes
798 // profile annotation in the backend inaccurate.
799 //
800 // [1] Note the cost of a function could be below zero due to erased
801 // prologue / epilogue.
802 if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
803 PGOOpt->Action == PGOOptions::SampleUse)
805
806 if (PGOOpt)
808
812
813 // Require the GlobalsAA analysis for the module so we can query it within
814 // the CGSCC pipeline.
816 // Invalidate AAManager so it can be recreated and pick up the newly available
817 // GlobalsAA.
818 MIWP.addModulePass(
820
821 // Require the ProfileSummaryAnalysis for the module so we can query it within
822 // the inliner pass.
824
825 // Now begin the main postorder CGSCC pipeline.
826 // FIXME: The current CGSCC pipeline has its origins in the legacy pass
827 // manager and trying to emulate its precise behavior. Much of this doesn't
828 // make a lot of sense and we should revisit the core CGSCC structure.
829 CGSCCPassManager &MainCGPipeline = MIWP.getPM();
830
831 // Note: historically, the PruneEH pass was run first to deduce nounwind and
832 // generally clean up exception handling overhead. It isn't clear this is
833 // valuable as the inliner doesn't currently care whether it is inlining an
834 // invoke or a call.
835
837 MainCGPipeline.addPass(AttributorCGSCCPass());
838
839 // Deduce function attributes. We do another run of this after the function
840 // simplification pipeline, so this only needs to run when it could affect the
841 // function simplification pipeline, which is only the case with recursive
842 // functions.
843 MainCGPipeline.addPass(PostOrderFunctionAttrsPass(/*SkipNonRecursive*/ true));
844
845 // When at O3 add argument promotion to the pass pipeline.
846 // FIXME: It isn't at all clear why this should be limited to O3.
847 if (Level == OptimizationLevel::O3)
848 MainCGPipeline.addPass(ArgumentPromotionPass());
849
850 // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
851 // there are no OpenMP runtime calls present in the module.
852 if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
853 MainCGPipeline.addPass(OpenMPOptCGSCCPass());
854
855 for (auto &C : CGSCCOptimizerLateEPCallbacks)
856 C(MainCGPipeline, Level);
857
858 // Add the core function simplification pipeline nested inside the
859 // CGSCC walk.
862 PTO.EagerlyInvalidateAnalyses, /*NoRerun=*/true));
863
864 // Finally, deduce any function attributes based on the fully simplified
865 // function.
866 MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
867
868 // Mark that the function is fully simplified and that it shouldn't be
869 // simplified again if we somehow revisit it due to CGSCC mutations unless
870 // it's been modified since.
873
874 MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
875
876 // Make sure we don't affect potential future NoRerun CGSCC adaptors.
877 MIWP.addLateModulePass(createModuleToFunctionPassAdaptor(
879
880 return MIWP;
881}
882
887
889 // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to
890 // disable hot callsite inline (as much as possible [1]) because it makes
891 // profile annotation in the backend inaccurate.
892 //
893 // [1] Note the cost of a function could be below zero due to erased
894 // prologue / epilogue.
895 if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
896 PGOOpt->Action == PGOOptions::SampleUse)
898
899 if (PGOOpt)
901
902 // The inline deferral logic is used to avoid losing some
903 // inlining chance in future. It is helpful in SCC inliner, in which
904 // inlining is processed in bottom-up order.
905 // While in module inliner, the inlining order is a priority-based order
906 // by default. The inline deferral is unnecessary there. So we disable the
907 // inline deferral logic in module inliner.
908 IP.EnableDeferral = false;
909
911
915
918
919 return MPM;
920}
921
926
927 // Place pseudo probe instrumentation as the first pass of the pipeline to
928 // minimize the impact of optimization changes.
929 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
932
933 bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
934
935 // In ThinLTO mode, when flattened profile is used, all the available
936 // profile information will be annotated in PreLink phase so there is
937 // no need to load the profile again in PostLink.
938 bool LoadSampleProfile =
939 HasSampleProfile &&
941
942 // During the ThinLTO backend phase we perform early indirect call promotion
943 // here, before globalopt. Otherwise imported available_externally functions
944 // look unreferenced and are removed. If we are going to load the sample
945 // profile then defer until later.
946 // TODO: See if we can move later and consolidate with the location where
947 // we perform ICP when we are loading a sample profile.
948 // TODO: We pass HasSampleProfile (whether there was a sample profile file
949 // passed to the compile) to the SamplePGO flag of ICP. This is used to
950 // determine whether the new direct calls are annotated with prof metadata.
951 // Ideally this should be determined from whether the IR is annotated with
952 // sample profile, and not whether the a sample profile was provided on the
953 // command line. E.g. for flattened profiles where we will not be reloading
954 // the sample profile in the ThinLTO backend, we ideally shouldn't have to
955 // provide the sample profile file.
956 if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
957 MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
958
959 // Do basic inference of function attributes from known properties of system
960 // libraries and other oracles.
963
964 // Create an early function pass manager to cleanup the output of the
965 // frontend.
966 FunctionPassManager EarlyFPM;
967 // Lower llvm.expect to metadata before attempting transforms.
968 // Compare/branch metadata may alter the behavior of passes like SimplifyCFG.
970 EarlyFPM.addPass(SimplifyCFGPass());
972 EarlyFPM.addPass(EarlyCSEPass());
973 if (Level == OptimizationLevel::O3)
974 EarlyFPM.addPass(CallSiteSplittingPass());
975
978
979 if (LoadSampleProfile) {
980 // Annotate sample profile right after early FPM to ensure freshness of
981 // the debug info.
982 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
983 PGOOpt->ProfileRemappingFile, Phase));
984 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
985 // RequireAnalysisPass for PSI before subsequent non-module passes.
987 // Do not invoke ICP in the LTOPrelink phase as it makes it hard
988 // for the profile annotation to be accurate in the LTO backend.
991 // We perform early indirect call promotion here, before globalopt.
992 // This is important for the ThinLTO backend phase because otherwise
993 // imported available_externally functions look unreferenced and are
994 // removed.
995 MPM.addPass(
996 PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
997 }
998
999 // Try to perform OpenMP specific optimizations on the module. This is a
1000 // (quick!) no-op if there are no OpenMP runtime calls present in the module.
1001 if (Level != OptimizationLevel::O0)
1003
1006
1007 // Lower type metadata and the type.test intrinsic in the ThinLTO
1008 // post link pipeline after ICP. This is to enable usage of the type
1009 // tests in ICP sequences.
1011 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1012
1013 for (auto &C : PipelineEarlySimplificationEPCallbacks)
1014 C(MPM, Level);
1015
1016 // Interprocedural constant propagation now that basic cleanup has occurred
1017 // and prior to optimizing globals.
1018 // FIXME: This position in the pipeline hasn't been carefully considered in
1019 // years, it should be re-analyzed.
1021 IPSCCPOptions(/*AllowFuncSpec=*/
1022 Level != OptimizationLevel::Os &&
1023 Level != OptimizationLevel::Oz &&
1026
1027 // Attach metadata to indirect call sites indicating the set of functions
1028 // they may target at run-time. This should follow IPSCCP.
1030
1031 // Optimize globals to try and fold them into constants.
1033
1034 // Create a small function pass pipeline to cleanup after all the global
1035 // optimizations.
1036 FunctionPassManager GlobalCleanupPM;
1037 // FIXME: Should this instead by a run of SROA?
1038 GlobalCleanupPM.addPass(PromotePass());
1039 GlobalCleanupPM.addPass(InstCombinePass());
1040 invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
1041 GlobalCleanupPM.addPass(
1042 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1043 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM),
1045
1046 // Add all the requested passes for instrumentation PGO, if requested.
1047 if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
1048 (PGOOpt->Action == PGOOptions::IRInstr ||
1049 PGOOpt->Action == PGOOptions::IRUse)) {
1050 addPGOInstrPasses(MPM, Level,
1051 /* RunProfileGen */ PGOOpt->Action == PGOOptions::IRInstr,
1052 /* IsCS */ false, PGOOpt->ProfileFile,
1053 PGOOpt->ProfileRemappingFile, Phase, PGOOpt->FS);
1054 MPM.addPass(PGOIndirectCallPromotion(false, false));
1055 }
1056 if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
1057 PGOOpt->CSAction == PGOOptions::CSIRInstr)
1058 MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile));
1059
1060 // Synthesize function entry counts for non-PGO compilation.
1061 if (EnableSyntheticCounts && !PGOOpt)
1063
1066 else
1068
1069 // Remove any dead arguments exposed by cleanups, constant folding globals,
1070 // and argument promotion.
1072
1074
1075 // Optimize globals now that functions are fully simplified.
1077
1078 // Remove dead code, except in the ThinLTO pre-link pipeline where we may want
1079 // to keep available_externally functions.
1082
1086 }
1087
1088 return MPM;
1089}
1090
1091/// TODO: Should LTO cause any differences to this set of passes?
1092void PassBuilder::addVectorPasses(OptimizationLevel Level,
1093 FunctionPassManager &FPM, bool IsFullLTO) {
1096
1097 if (IsFullLTO) {
1098 // The vectorizer may have significantly shortened a loop body; unroll
1099 // again. Unroll small loops to hide loop backedge latency and saturate any
1100 // parallel execution resources of an out-of-order processor. We also then
1101 // need to clean up redundancies and loop invariant code.
1102 // FIXME: It would be really good to use a loop-integrated instruction
1103 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1104 // across the loop nests.
1105 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1108 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1110 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1113 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1114 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1115 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1116 // NOTE: we are very late in the pipeline, and we don't have any LICM
1117 // or SimplifyCFG passes scheduled after us, that would cleanup
1118 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1120 }
1121
1122 if (!IsFullLTO) {
1123 // Eliminate loads by forwarding stores from the previous iteration to loads
1124 // of the current iteration.
1126 }
1127 // Cleanup after the loop optimization passes.
1128 FPM.addPass(InstCombinePass());
1129
1130 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1131 ExtraVectorPassManager ExtraPasses;
1132 // At higher optimization levels, try to clean up any runtime overlap and
1133 // alignment checks inserted by the vectorizer. We want to track correlated
1134 // runtime checks for two inner loops in the same outer loop, fold any
1135 // common computations, hoist loop-invariant aspects out of any outer loop,
1136 // and unswitch the runtime checks if possible. Once hoisted, we may have
1137 // dead (or speculatable) control flows or more combining opportunities.
1138 ExtraPasses.addPass(EarlyCSEPass());
1140 ExtraPasses.addPass(InstCombinePass());
1141 LoopPassManager LPM;
1143 /*AllowSpeculation=*/true));
1144 LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
1146 ExtraPasses.addPass(
1148 ExtraPasses.addPass(
1149 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true,
1150 /*UseBlockFrequencyInfo=*/true));
1151 ExtraPasses.addPass(
1152 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1153 ExtraPasses.addPass(InstCombinePass());
1154 FPM.addPass(std::move(ExtraPasses));
1155 }
1156
1157 // Now that we've formed fast to execute loop structures, we do further
1158 // optimizations. These are run afterward as they might block doing complex
1159 // analyses and transforms such as what are needed for loop vectorization.
1160
1161 // Cleanup after loop vectorization, etc. Simplification passes like CVP and
1162 // GVN, loop transforms, and others have already run, so it's now better to
1163 // convert to more optimized IR using more aggressive simplify CFG options.
1164 // The extra sinking transform can create larger basic blocks, so do this
1165 // before SLP vectorization.
1167 .forwardSwitchCondToPhi(true)
1168 .convertSwitchRangeToICmp(true)
1169 .convertSwitchToLookupTable(true)
1170 .needCanonicalLoops(false)
1171 .hoistCommonInsts(true)
1172 .sinkCommonInsts(true)));
1173
1174 if (IsFullLTO) {
1175 FPM.addPass(SCCPPass());
1176 FPM.addPass(InstCombinePass());
1177 FPM.addPass(BDCEPass());
1178 }
1179
1180 // Optimize parallel scalar instruction chains into SIMD instructions.
1181 if (PTO.SLPVectorization) {
1183 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1184 FPM.addPass(EarlyCSEPass());
1185 }
1186 }
1187 // Enhance/cleanup vector code.
1189
1190 if (!IsFullLTO) {
1191 FPM.addPass(InstCombinePass());
1192 // Unroll small loops to hide loop backedge latency and saturate any
1193 // parallel execution resources of an out-of-order processor. We also then
1194 // need to clean up redundancies and loop invariant code.
1195 // FIXME: It would be really good to use a loop-integrated instruction
1196 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1197 // across the loop nests.
1198 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1199 if (EnableUnrollAndJam && PTO.LoopUnrolling) {
1201 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1202 }
1204 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1207 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1208 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1209 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1210 // NOTE: we are very late in the pipeline, and we don't have any LICM
1211 // or SimplifyCFG passes scheduled after us, that would cleanup
1212 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1214 FPM.addPass(InstCombinePass());
1215 FPM.addPass(
1219 /*AllowSpeculation=*/true),
1220 /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
1221 }
1222
1223 // Now that we've vectorized and unrolled loops, we may have more refined
1224 // alignment information, try to re-derive it here.
1226
1227 if (IsFullLTO)
1228 FPM.addPass(InstCombinePass());
1229}
1230
1233 ThinOrFullLTOPhase LTOPhase) {
1234 const bool LTOPreLink = (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink ||
1237
1238 // Run partial inlining pass to partially inline functions that have
1239 // large bodies.
1242
1243 // Remove avail extern fns and globals definitions since we aren't compiling
1244 // an object file for later LTO. For LTO we want to preserve these so they
1245 // are eligible for inlining at link-time. Note if they are unreferenced they
1246 // will be removed by GlobalDCE later, so this only impacts referenced
1247 // available externally globals. Eventually they will be suppressed during
1248 // codegen, but eliminating here enables more opportunity for GlobalDCE as it
1249 // may make globals referenced by available external functions dead and saves
1250 // running remaining passes on the eliminated functions. These should be
1251 // preserved during prelinking for link-time inlining decisions.
1252 if (!LTOPreLink)
1254
1257
1258 // Do RPO function attribute inference across the module to forward-propagate
1259 // attributes where applicable.
1260 // FIXME: Is this really an optimization rather than a canonicalization?
1262
1263 // Do a post inline PGO instrumentation and use pass. This is a context
1264 // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
1265 // cross-module inline has not been done yet. The context sensitive
1266 // instrumentation is after all the inlines are done.
1267 if (!LTOPreLink && PGOOpt) {
1268 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1269 addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true,
1270 /* IsCS */ true, PGOOpt->CSProfileGenFile,
1271 PGOOpt->ProfileRemappingFile, LTOPhase, PGOOpt->FS);
1272 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1273 addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false,
1274 /* IsCS */ true, PGOOpt->ProfileFile,
1275 PGOOpt->ProfileRemappingFile, LTOPhase, PGOOpt->FS);
1276 }
1277
1278 // Re-compute GlobalsAA here prior to function passes. This is particularly
1279 // useful as the above will have inlined, DCE'ed, and function-attr
1280 // propagated everything. We should at this point have a reasonably minimal
1281 // and richly annotated call graph. By computing aliasing and mod/ref
1282 // information for all local globals here, the late loop passes and notably
1283 // the vectorizer will be able to use them to help recognize vectorizable
1284 // memory operations.
1286
1287 for (auto &C : OptimizerEarlyEPCallbacks)
1288 C(MPM, Level);
1289
1290 FunctionPassManager OptimizePM;
1291 OptimizePM.addPass(Float2IntPass());
1293
1294 if (EnableMatrix) {
1295 OptimizePM.addPass(LowerMatrixIntrinsicsPass());
1296 OptimizePM.addPass(EarlyCSEPass());
1297 }
1298
1299 // CHR pass should only be applied with the profile information.
1300 // The check is to check the profile summary information in CHR.
1301 if (EnableCHR && Level == OptimizationLevel::O3)
1302 OptimizePM.addPass(ControlHeightReductionPass());
1303
1304 // FIXME: We need to run some loop optimizations to re-rotate loops after
1305 // simplifycfg and others undo their rotation.
1306
1307 // Optimize the loop execution. These passes operate on entire loop nests
1308 // rather than on each loop in an inside-out manner, and so they are actually
1309 // function passes.
1310
1311 for (auto &C : VectorizerStartEPCallbacks)
1312 C(OptimizePM, Level);
1313
1314 LoopPassManager LPM;
1315 // First rotate loops that may have been un-rotated by prior passes.
1316 // Disable header duplication at -Oz.
1317 LPM.addPass(LoopRotatePass(Level != OptimizationLevel::Oz, LTOPreLink));
1318 // Some loops may have become dead by now. Try to delete them.
1319 // FIXME: see discussion in https://reviews.llvm.org/D112851,
1320 // this may need to be revisited once we run GVN before loop deletion
1321 // in the simplification pipeline.
1324 std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false));
1325
1326 // Distribute loops to allow partial vectorization. I.e. isolate dependences
1327 // into separate loop that would otherwise inhibit vectorization. This is
1328 // currently only performed for loops marked with the metadata
1329 // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1330 OptimizePM.addPass(LoopDistributePass());
1331
1332 // Populates the VFABI attribute with the scalar-to-vector mappings
1333 // from the TargetLibraryInfo.
1334 OptimizePM.addPass(InjectTLIMappings());
1335
1336 addVectorPasses(Level, OptimizePM, /* IsFullLTO */ false);
1337
1338 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
1339 // canonicalization pass that enables other optimizations. As a result,
1340 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1341 // result too early.
1342 OptimizePM.addPass(LoopSinkPass());
1343
1344 // And finally clean up LCSSA form before generating code.
1345 OptimizePM.addPass(InstSimplifyPass());
1346
1347 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
1348 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
1349 // flattening of blocks.
1350 OptimizePM.addPass(DivRemPairsPass());
1351
1352 // Try to annotate calls that were created during optimization.
1353 OptimizePM.addPass(TailCallElimPass());
1354
1355 // LoopSink (and other loop passes since the last simplifyCFG) might have
1356 // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
1357 OptimizePM.addPass(
1358 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1359
1360 // Add the core optimizing pipeline.
1361 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
1363
1364 for (auto &C : OptimizerLastEPCallbacks)
1365 C(MPM, Level);
1366
1367 // Split out cold code. Splitting is done late to avoid hiding context from
1368 // other optimizations and inadvertently regressing performance. The tradeoff
1369 // is that this has a higher code size cost than splitting early.
1370 if (EnableHotColdSplit && !LTOPreLink)
1372
1373 // Search the code for similar regions of code. If enough similar regions can
1374 // be found where extracting the regions into their own function will decrease
1375 // the size of the program, we extract the regions, a deduplicate the
1376 // structurally similar regions.
1377 if (EnableIROutliner)
1379
1380 // Merge functions if requested.
1381 if (PTO.MergeFunctions)
1383
1384 // Now we need to do some global optimization transforms.
1385 // FIXME: It would seem like these should come first in the optimization
1386 // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
1387 // ordering here.
1390
1391 if (PTO.CallGraphProfile && !LTOPreLink)
1393
1394 // TODO: Relative look table converter pass caused an issue when full lto is
1395 // enabled. See https://reviews.llvm.org/D94355 for more details.
1396 // Until the issue fixed, disable this pass during pre-linking phase.
1397 if (!LTOPreLink)
1399
1400 return MPM;
1401}
1402
1405 bool LTOPreLink) {
1406 if (Level == OptimizationLevel::O0)
1407 return buildO0DefaultPipeline(Level, LTOPreLink);
1408
1410
1411 // Convert @llvm.global.annotations to !annotation metadata.
1413
1414 // Force any function attributes we want the rest of the pipeline to observe.
1416
1417 // Apply module pipeline start EP callback.
1418 for (auto &C : PipelineStartEPCallbacks)
1419 C(MPM, Level);
1420
1421 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1423
1424 const ThinOrFullLTOPhase LTOPhase = LTOPreLink
1427 // Add the core simplification pipeline.
1429
1430 // Now add the optimization pipeline.
1432
1433 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1434 PGOOpt->Action == PGOOptions::SampleUse)
1436
1437 // Emit annotation remarks.
1439
1440 if (LTOPreLink)
1441 addRequiredLTOPreLinkPasses(MPM);
1442
1443 return MPM;
1444}
1445
1448 if (Level == OptimizationLevel::O0)
1449 return buildO0DefaultPipeline(Level, /*LTOPreLink*/true);
1450
1452
1453 // Convert @llvm.global.annotations to !annotation metadata.
1455
1456 // Force any function attributes we want the rest of the pipeline to observe.
1458
1459 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1461
1462 // Apply module pipeline start EP callback.
1463 for (auto &C : PipelineStartEPCallbacks)
1464 C(MPM, Level);
1465
1466 // If we are planning to perform ThinLTO later, we don't bloat the code with
1467 // unrolling/vectorization/... now. Just simplify the module as much as we
1468 // can.
1471
1472 // Run partial inlining pass to partially inline functions that have
1473 // large bodies.
1474 // FIXME: It isn't clear whether this is really the right place to run this
1475 // in ThinLTO. Because there is another canonicalization and simplification
1476 // phase that will run after the thin link, running this here ends up with
1477 // less information than will be available later and it may grow functions in
1478 // ways that aren't beneficial.
1481
1482 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1483 PGOOpt->Action == PGOOptions::SampleUse)
1485
1486 // Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual
1487 // optimization is going to be done in PostLink stage, but clang can't add
1488 // callbacks there in case of in-process ThinLTO called by linker.
1489 for (auto &C : OptimizerEarlyEPCallbacks)
1490 C(MPM, Level);
1491 for (auto &C : OptimizerLastEPCallbacks)
1492 C(MPM, Level);
1493
1494 // Emit annotation remarks.
1496
1497 addRequiredLTOPreLinkPasses(MPM);
1498
1499 return MPM;
1500}
1501
1503 OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
1505
1506 // Convert @llvm.global.annotations to !annotation metadata.
1508
1509 if (ImportSummary) {
1510 // These passes import type identifier resolutions for whole-program
1511 // devirtualization and CFI. They must run early because other passes may
1512 // disturb the specific instruction patterns that these passes look for,
1513 // creating dependencies on resolutions that may not appear in the summary.
1514 //
1515 // For example, GVN may transform the pattern assume(type.test) appearing in
1516 // two basic blocks into assume(phi(type.test, type.test)), which would
1517 // transform a dependency on a WPD resolution into a dependency on a type
1518 // identifier resolution for CFI.
1519 //
1520 // Also, WPD has access to more precise information than ICP and can
1521 // devirtualize more effectively, so it should operate on the IR first.
1522 //
1523 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1524 // metadata and intrinsics.
1525 MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
1526 MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
1527 }
1528
1529 if (Level == OptimizationLevel::O0) {
1530 // Run a second time to clean up any type tests left behind by WPD for use
1531 // in ICP.
1532 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1533 // Drop available_externally and unreferenced globals. This is necessary
1534 // with ThinLTO in order to avoid leaving undefined references to dead
1535 // globals in the object file.
1538 return MPM;
1539 }
1540
1541 // Force any function attributes we want the rest of the pipeline to observe.
1543
1544 // Add the core simplification pipeline.
1547
1548 // Now add the optimization pipeline.
1551
1552 // Emit annotation remarks.
1554
1555 return MPM;
1556}
1557
1560 // FIXME: We should use a customized pre-link pipeline!
1561 return buildPerModuleDefaultPipeline(Level,
1562 /* LTOPreLink */ true);
1563}
1564
1567 ModuleSummaryIndex *ExportSummary) {
1569
1570 // Convert @llvm.global.annotations to !annotation metadata.
1572
1573 for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks)
1574 C(MPM, Level);
1575
1576 // Create a function that performs CFI checks for cross-DSO calls with targets
1577 // in the current module.
1579
1580 if (Level == OptimizationLevel::O0) {
1581 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1582 // metadata and intrinsics.
1583 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1584 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1585 // Run a second time to clean up any type tests left behind by WPD for use
1586 // in ICP.
1587 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1588
1589 for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
1590 C(MPM, Level);
1591
1592 // Emit annotation remarks.
1594
1595 return MPM;
1596 }
1597
1598 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
1599 // Load sample profile before running the LTO optimization pipeline.
1600 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1601 PGOOpt->ProfileRemappingFile,
1603 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1604 // RequireAnalysisPass for PSI before subsequent non-module passes.
1606 }
1607
1608 // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
1610
1611 // Remove unused virtual tables to improve the quality of code generated by
1612 // whole-program devirtualization and bitset lowering.
1614
1615 // Force any function attributes we want the rest of the pipeline to observe.
1617
1618 // Do basic inference of function attributes from known properties of system
1619 // libraries and other oracles.
1621
1622 if (Level.getSpeedupLevel() > 1) {
1625
1626 // Indirect call promotion. This should promote all the targets that are
1627 // left by the earlier promotion pass that promotes intra-module targets.
1628 // This two-step promotion is to save the compile time. For LTO, it should
1629 // produce the same result as if we only do promotion here.
1631 true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1632
1633 // Propagate constants at call sites into the functions they call. This
1634 // opens opportunities for globalopt (and inlining) by substituting function
1635 // pointers passed as arguments to direct uses of functions.
1636 MPM.addPass(IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/
1637 Level != OptimizationLevel::Os &&
1638 Level != OptimizationLevel::Oz)));
1639
1640 // Attach metadata to indirect call sites indicating the set of functions
1641 // they may target at run-time. This should follow IPSCCP.
1643 }
1644
1645 // Now deduce any function attributes based in the current code.
1646 MPM.addPass(
1648
1649 // Do RPO function attribute inference across the module to forward-propagate
1650 // attributes where applicable.
1651 // FIXME: Is this really an optimization rather than a canonicalization?
1653
1654 // Use in-range annotations on GEP indices to split globals where beneficial.
1656
1657 // Run whole program optimization of virtual call when the list of callees
1658 // is fixed.
1659 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1660
1661 // Stop here at -O1.
1662 if (Level == OptimizationLevel::O1) {
1663 // The LowerTypeTestsPass needs to run to lower type metadata and the
1664 // type.test intrinsics. The pass does nothing if CFI is disabled.
1665 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1666 // Run a second time to clean up any type tests left behind by WPD for use
1667 // in ICP (which is performed earlier than this in the regular LTO
1668 // pipeline).
1669 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1670
1671 for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
1672 C(MPM, Level);
1673
1674 // Emit annotation remarks.
1676
1677 return MPM;
1678 }
1679
1680 // Optimize globals to try and fold them into constants.
1682
1683 // Promote any localized globals to SSA registers.
1685
1686 // Linking modules together can lead to duplicate global constant, only
1687 // keep one copy of each constant.
1689
1690 // Remove unused arguments from functions.
1692
1693 // Reduce the code after globalopt and ipsccp. Both can open up significant
1694 // simplification opportunities, and both can propagate functions through
1695 // function pointers. When this happens, we often have to resolve varargs
1696 // calls, etc, so let instcombine do this.
1697 FunctionPassManager PeepholeFPM;
1698 PeepholeFPM.addPass(InstCombinePass());
1699 if (Level == OptimizationLevel::O3)
1700 PeepholeFPM.addPass(AggressiveInstCombinePass());
1701 invokePeepholeEPCallbacks(PeepholeFPM, Level);
1702
1703 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM),
1705
1706 // Note: historically, the PruneEH pass was run first to deduce nounwind and
1707 // generally clean up exception handling overhead. It isn't clear this is
1708 // valuable as the inliner doesn't currently care whether it is inlining an
1709 // invoke or a call.
1710 // Run the inliner now.
1713 /* MandatoryFirst */ true,
1716
1717 // Perform context disambiguation after inlining, since that would reduce the
1718 // amount of additional cloning required to distinguish the allocation
1719 // contexts.
1722
1723 // Optimize globals again after we ran the inliner.
1725
1726 // Run the OpenMPOpt pass again after global optimizations.
1728
1729 // Garbage collect dead functions.
1731
1732 // If we didn't decide to inline a function, check to see if we can
1733 // transform it to pass arguments by value instead of by reference.
1735
1737 // The IPO Passes may leave cruft around. Clean up after them.
1738 FPM.addPass(InstCombinePass());
1739 invokePeepholeEPCallbacks(FPM, Level);
1740
1743
1745
1746 // Do a post inline PGO instrumentation and use pass. This is a context
1747 // sensitive PGO pass.
1748 if (PGOOpt) {
1749 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1750 addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true,
1751 /* IsCS */ true, PGOOpt->CSProfileGenFile,
1752 PGOOpt->ProfileRemappingFile,
1754 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1755 addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false,
1756 /* IsCS */ true, PGOOpt->ProfileFile,
1757 PGOOpt->ProfileRemappingFile,
1759 }
1760
1761 // Break up allocas
1763
1764 // LTO provides additional opportunities for tailcall elimination due to
1765 // link-time inlining, and visibility of nocapture attribute.
1767
1768 // Run a few AA driver optimizations here and now to cleanup the code.
1771
1772 MPM.addPass(
1774
1775 // Require the GlobalsAA analysis for the module so we can query it within
1776 // MainFPM.
1778 // Invalidate AAManager so it can be recreated and pick up the newly available
1779 // GlobalsAA.
1780 MPM.addPass(
1782
1783 FunctionPassManager MainFPM;
1786 /*AllowSpeculation=*/true),
1787 /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
1788
1789 if (RunNewGVN)
1790 MainFPM.addPass(NewGVNPass());
1791 else
1792 MainFPM.addPass(GVNPass());
1793
1794 // Remove dead memcpy()'s.
1795 MainFPM.addPass(MemCpyOptPass());
1796
1797 // Nuke dead stores.
1798 MainFPM.addPass(DSEPass());
1800
1801 LoopPassManager LPM;
1802 if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
1803 LPM.addPass(LoopFlattenPass());
1806 // FIXME: Add loop interchange.
1807
1808 // Unroll small loops and perform peeling.
1809 LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
1810 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
1812 // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
1813 // *All* loop passes must preserve it, in order to be able to use it.
1815 std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true));
1816
1817 MainFPM.addPass(LoopDistributePass());
1818
1819 addVectorPasses(Level, MainFPM, /* IsFullLTO */ true);
1820
1821 // Run the OpenMPOpt CGSCC pass again late.
1824
1825 invokePeepholeEPCallbacks(MainFPM, Level);
1826 MainFPM.addPass(JumpThreadingPass());
1829
1830 // Lower type metadata and the type.test intrinsic. This pass supports
1831 // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
1832 // to be run at link time if CFI is enabled. This pass does nothing if
1833 // CFI is disabled.
1834 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1835 // Run a second time to clean up any type tests left behind by WPD for use
1836 // in ICP (which is performed earlier than this in the regular LTO pipeline).
1837 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1838
1839 // Enable splitting late in the FullLTO post-link pipeline.
1842
1843 // Add late LTO optimization passes.
1844 // Delete basic blocks, which optimization passes may have killed.
1846 SimplifyCFGOptions().convertSwitchRangeToICmp(true).hoistCommonInsts(
1847 true))));
1848
1849 // Drop bodies of available eternally objects to improve GlobalDCE.
1851
1852 // Now that we have optimized the program, discard unreachable functions.
1854
1855 if (PTO.MergeFunctions)
1857
1858 if (PTO.CallGraphProfile)
1860
1861 for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
1862 C(MPM, Level);
1863
1864 // Emit annotation remarks.
1866
1867 return MPM;
1868}
1869
1871 bool LTOPreLink) {
1872 assert(Level == OptimizationLevel::O0 &&
1873 "buildO0DefaultPipeline should only be used with O0");
1874
1876
1877 // Perform pseudo probe instrumentation in O0 mode. This is for the
1878 // consistency between different build modes. For example, a LTO build can be
1879 // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
1880 // the postlink will require pseudo probe instrumentation in the prelink.
1881 if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
1883
1884 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
1885 PGOOpt->Action == PGOOptions::IRUse))
1887 MPM,
1888 /* RunProfileGen */ (PGOOpt->Action == PGOOptions::IRInstr),
1889 /* IsCS */ false, PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
1890 PGOOpt->FS);
1891
1892 for (auto &C : PipelineStartEPCallbacks)
1893 C(MPM, Level);
1894
1895 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1897
1898 for (auto &C : PipelineEarlySimplificationEPCallbacks)
1899 C(MPM, Level);
1900
1901 // Build a minimal pipeline based on the semantics required by LLVM,
1902 // which is just that always inlining occurs. Further, disable generating
1903 // lifetime intrinsics to avoid enabling further optimizations during
1904 // code generation.
1906 /*InsertLifetimeIntrinsics=*/false));
1907
1908 if (PTO.MergeFunctions)
1910
1911 if (EnableMatrix)
1912 MPM.addPass(
1914
1915 if (!CGSCCOptimizerLateEPCallbacks.empty()) {
1916 CGSCCPassManager CGPM;
1917 for (auto &C : CGSCCOptimizerLateEPCallbacks)
1918 C(CGPM, Level);
1919 if (!CGPM.isEmpty())
1921 }
1922 if (!LateLoopOptimizationsEPCallbacks.empty()) {
1923 LoopPassManager LPM;
1924 for (auto &C : LateLoopOptimizationsEPCallbacks)
1925 C(LPM, Level);
1926 if (!LPM.isEmpty()) {
1928 createFunctionToLoopPassAdaptor(std::move(LPM))));
1929 }
1930 }
1931 if (!LoopOptimizerEndEPCallbacks.empty()) {
1932 LoopPassManager LPM;
1933 for (auto &C : LoopOptimizerEndEPCallbacks)
1934 C(LPM, Level);
1935 if (!LPM.isEmpty()) {
1937 createFunctionToLoopPassAdaptor(std::move(LPM))));
1938 }
1939 }
1940 if (!ScalarOptimizerLateEPCallbacks.empty()) {
1942 for (auto &C : ScalarOptimizerLateEPCallbacks)
1943 C(FPM, Level);
1944 if (!FPM.isEmpty())
1946 }
1947
1948 for (auto &C : OptimizerEarlyEPCallbacks)
1949 C(MPM, Level);
1950
1951 if (!VectorizerStartEPCallbacks.empty()) {
1953 for (auto &C : VectorizerStartEPCallbacks)
1954 C(FPM, Level);
1955 if (!FPM.isEmpty())
1957 }
1958
1959 ModulePassManager CoroPM;
1960 CoroPM.addPass(CoroEarlyPass());
1961 CGSCCPassManager CGPM;
1962 CGPM.addPass(CoroSplitPass());
1963 CoroPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
1964 CoroPM.addPass(CoroCleanupPass());
1965 CoroPM.addPass(GlobalDCEPass());
1966 MPM.addPass(CoroConditionalWrapper(std::move(CoroPM)));
1967
1968 for (auto &C : OptimizerLastEPCallbacks)
1969 C(MPM, Level);
1970
1971 if (LTOPreLink)
1972 addRequiredLTOPreLinkPasses(MPM);
1973
1975
1976 return MPM;
1977}
1978
1980 AAManager AA;
1981
1982 // The order in which these are registered determines their priority when
1983 // being queried.
1984
1985 // First we register the basic alias analysis that provides the majority of
1986 // per-function local AA logic. This is a stateless, on-demand local set of
1987 // AA techniques.
1989
1990 // Next we query fast, specialized alias analyses that wrap IR-embedded
1991 // information about aliasing.
1994
1995 // Add support for querying global aliasing information when available.
1996 // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
1997 // analysis, all that the `AAManager` can do is query for any *cached*
1998 // results from `GlobalsAA` through a readonly proxy.
2001
2002 // Add target-specific alias analyses.
2003 if (TM)
2005
2006 return AA;
2007}
aarch64 falkor hwpf fix Falkor HW Prefetch Fix Late Phase
AggressiveInstCombiner - Combine expression patterns to form expressions with fewer,...
Provides passes to inlining "always_inline" functions.
This is the interface for LLVM's primary stateless and local alias analysis.
This file provides the interface for LLVM's Call Graph Profile pass.
This header provides classes for managing passes over SCCs of the call graph.
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:678
This file provides the interface for a simple, fast CSE pass.
Super simple passes to force specific function attrs from the commandline into the IR for debugging p...
Provides passes for computing function attributes based on interprocedural analyses.
This file provides the interface for LLVM's Global Value Numbering pass which eliminates fully redund...
This is the interface for a simple mod/ref and alias analysis over globals.
Interfaces for passes which infer implicit function attributes from the name and signature of functio...
This file provides the primary interface to the instcombine pass.
Defines passes for running instruction simplification across chunks of IR.
This file provides the interface for LLVM's PGO Instrumentation lowering pass.
See the comments on JumpThreadingPass.
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
static LVOptions Options
Definition: LVOptions.cpp:25
This header defines the LoopLoadEliminationPass object.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
The header file for the LowerConstantIntrinsics pass as used by the new pass manager.
The header file for the LowerExpectIntrinsic pass as used by the new pass manager.
This pass performs merges of loads and stores on both sides of a.
This file provides the interface for LLVM's Global Value Numbering pass.
This header enumerates the LLVM-provided high-level optimization levels.
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
Define option tunables for PGO.
ModulePassManager MPM
static cl::opt< bool > EnableMergeFunctions("enable-merge-functions", cl::init(false), cl::Hidden, cl::desc("Enable function merging as part of the optimization pipeline"))
static cl::opt< bool > EnableGlobalAnalyses("enable-global-analyses", cl::init(true), cl::Hidden, cl::desc("Enable inter-procedural analyses"))
static cl::opt< bool > EnableMemProfiler("enable-mem-prof", cl::Hidden, cl::desc("Enable memory profiler"))
static cl::opt< bool > EnableIROutliner("ir-outliner", cl::init(false), cl::Hidden, cl::desc("Enable ir outliner pass"))
static cl::opt< bool > RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden, cl::desc("Run the NewGVN pass"))
static cl::opt< bool > DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden, cl::desc("Disable pre-instrumentation inliner"))
static cl::opt< bool > EnableEagerlyInvalidateAnalyses("eagerly-invalidate-analyses", cl::init(true), cl::Hidden, cl::desc("Eagerly invalidate more analyses in default pipelines"))
static cl::opt< bool > ExtraVectorizerPasses("extra-vectorizer-passes", cl::init(false), cl::Hidden, cl::desc("Run cleanup optimization passes after vectorization"))
cl::opt< bool > EnableMemProfContextDisambiguation("enable-memprof-context-disambiguation", cl::init(false), cl::Hidden, cl::ZeroOrMore, cl::desc("Enable MemProf context disambiguation"))
static void addAnnotationRemarksPass(ModulePassManager &MPM)
static cl::opt< bool > EnablePostPGOLoopRotation("enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden, cl::desc("Run the loop rotation transformation after PGO instrumentation"))
static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level)
static cl::opt< bool > EnableGVNSink("enable-gvn-sink", cl::desc("Enable the GVN sinking pass (default = off)"))
static cl::opt< bool > RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Run Partial inlinining pass"))
static cl::opt< bool > EnableGVNHoist("enable-gvn-hoist", cl::desc("Enable the GVN hoisting pass (default = off)"))
static cl::opt< bool > EnableDFAJumpThreading("enable-dfa-jump-thread", cl::desc("Enable DFA jump threading"), cl::init(false), cl::Hidden)
static cl::opt< bool > EnableCHR("enable-chr", cl::init(true), cl::Hidden, cl::desc("Enable control height reduction optimization (CHR)"))
static cl::opt< bool > EnableHotColdSplit("hot-cold-split", cl::desc("Enable hot-cold splitting pass"))
static cl::opt< bool > EnableLoopInterchange("enable-loopinterchange", cl::init(false), cl::Hidden, cl::desc("Enable the experimental LoopInterchange Pass"))
static cl::opt< bool > PerformMandatoryInliningsFirst("mandatory-inlining-first", cl::init(true), cl::Hidden, cl::desc("Perform mandatory inlinings module-wide, before performing " "inlining"))
static cl::opt< int > PreInlineThreshold("preinline-threshold", cl::Hidden, cl::init(75), cl::desc("Control the amount of inlining in pre-instrumentation inliner " "(default = 75)"))
static cl::opt< bool > EnableUnrollAndJam("enable-unroll-and-jam", cl::init(false), cl::Hidden, cl::desc("Enable Unroll And Jam Pass"))
static cl::opt< bool > EnableModuleInliner("enable-module-inliner", cl::init(false), cl::Hidden, cl::desc("Enable module inliner"))
static cl::opt< bool > EnableMatrix("enable-matrix", cl::init(false), cl::Hidden, cl::desc("Enable lowering of the matrix intrinsics"))
static cl::opt< AttributorRunOption > AttributorRun("attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE), cl::desc("Enable the attributor inter-procedural deduction pass"), cl::values(clEnumValN(AttributorRunOption::ALL, "all", "enable all attributor runs"), clEnumValN(AttributorRunOption::MODULE, "module", "enable module-wide attributor runs"), clEnumValN(AttributorRunOption::CGSCC, "cgscc", "enable call graph SCC attributor runs"), clEnumValN(AttributorRunOption::NONE, "none", "disable attributor runs")))
static cl::opt< bool > EnableOrderFileInstrumentation("enable-order-file-instrumentation", cl::init(false), cl::Hidden, cl::desc("Enable order file instrumentation (default = off)"))
static cl::opt< bool > EnableSyntheticCounts("enable-npm-synthetic-counts", cl::Hidden, cl::desc("Run synthetic function entry count generation " "pass"))
static bool isLTOPreLink(ThinOrFullLTOPhase Phase)
static cl::opt< bool > EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true), cl::Hidden, cl::desc("Enable inline deferral during PGO"))
Flag to enable inline deferral during PGO.
static cl::opt< InliningAdvisorMode > UseInlineAdvisor("enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden, cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"), cl::values(clEnumValN(InliningAdvisorMode::Default, "default", "Heuristics-based inliner version"), clEnumValN(InliningAdvisorMode::Development, "development", "Use development mode (runtime-loadable model)"), clEnumValN(InliningAdvisorMode::Release, "release", "Use release mode (AOT-compiled model)")))
static cl::opt< bool > FlattenedProfileUsed("flattened-profile-used", cl::init(false), cl::Hidden, cl::desc("Indicate the sample profile being used is flattened, i.e., " "no inline hierachy exists in the profile"))
static cl::opt< bool > EnableConstraintElimination("enable-constraint-elimination", cl::init(true), cl::Hidden, cl::desc("Enable pass to eliminate conditions based on linear constraints"))
static cl::opt< bool > EnableLoopFlatten("enable-loop-flatten", cl::init(false), cl::Hidden, cl::desc("Enable the LoopFlatten Pass"))
This header defines various interfaces for pass management in LLVM.
This file implements relative lookup table converter that converts lookup tables to relative lookup t...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
This file provides the interface for the pseudo probe implementation for AutoFDO.
This file provides the interface for the sampled PGO loader pass.
This is the interface for a metadata-based scoped no-alias analysis.
This file provides the interface for the pass responsible for both simplifying and canonicalizing the...
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
This is the interface for a metadata-based TBAA.
Defines the virtual file system interface vfs::FileSystem.
A manager for alias analyses.
void registerFunctionAnalysis()
Register a specific AA result.
void registerModuleAnalysis()
Register a specific AA result.
Inlines functions marked as "always_inline".
Definition: AlwaysInliner.h:32
Argument promotion pass.
Analysis pass providing a never-invalidated alias analysis result.
Simple pass that canonicalizes aliases.
A pass that merges duplicate global constants into a single constant.
Definition: ConstantMerge.h:29
This class implements a trivial dead store elimination.
Eliminate dead arguments (and return values) from functions.
A pass that transforms external global definitions into declarations.
The core GVN pass object.
Definition: GVN.h:117
Pass to remove unused function declarations.
Definition: GlobalDCE.h:36
Optimize globals that never have their address taken.
Definition: GlobalOpt.h:25
Pass to perform split of global variables.
Definition: GlobalSplit.h:26
Analysis pass providing a never-invalidated alias analysis result.
Pass to outline cold regions.
Pass to perform interprocedural constant propagation.
Definition: SCCP.h:48
Pass to outline similar regions.
Definition: IROutliner.h:444
Run instruction simplification across each instruction in the function.
The instrumentation pass for recording function order.
Instrumentation based profiling lowering pass.
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
This pass performs 'jump threading', which looks at blocks that have multiple predecessors and multip...
Definition: JumpThreading.h:78
Performs Loop Invariant Code Motion Pass.
Definition: LICM.h:66
Loop unroll pass that only does full loop unrolling and peeling.
Performs Loop Idiom Recognize Pass.
Performs Loop Inst Simplify Pass.
A simple loop rotation transformation.
Definition: LoopRotation.h:24
Performs basic CFG simplifications to assist other loop passes.
A pass that does profile-guided sinking of instructions into loops.
Definition: LoopSink.h:33
A simple loop rotation transformation.
Loop unroll pass that will support both full and partial unrolling.
Public interface to the memory profiler pass for instrumenting code to profile memory accesses.
Definition: MemProfiler.h:30
Merge identical functions.
The module inliner pass for the new pass manager.
Definition: ModuleInliner.h:27
Module pass, wrapping the inliner pass.
Definition: Inliner.h:67
void addModulePass(T Pass)
Add a module pass that runs before the CGSCC passes.
Definition: Inliner.h:83
Public interface to the memory profiler module pass for instrumenting code to profile memory allocati...
Definition: MemProfiler.h:39
Class to hold module path string table and global value map, and encapsulate methods for operating on...
Simple pass that provides a name to every anonymous globals.
OpenMP optimizations pass.
Definition: OpenMPOpt.h:38
static const OptimizationLevel O3
Optimize for fast execution as much as possible.
static const OptimizationLevel Oz
A very specialized mode that will optimize for code size at any and all costs.
static const OptimizationLevel O0
Disable as many optimizations as possible.
static const OptimizationLevel Os
Similar to O2 but tries to optimize for small code size instead of fast execution without triggering ...
static const OptimizationLevel O2
Optimize for fast execution as much as possible without triggering significant incremental compile ti...
static const OptimizationLevel O1
Optimize quickly without destroying debuggability.
The indirect function call promotion pass.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The profile annotation (profile-instr-use) pass for IR based PGO.
The profile size based optimization pass for memory intrinsics.
Pass to remove unused function declarations.
ModulePassManager buildO0DefaultPipeline(OptimizationLevel Level, bool LTOPreLink=false)
Build an O0 pipeline with the minimal semantically required passes.
ModuleInlinerWrapperPass buildInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining as well as the inlining-driven cleanups.
AAManager buildDefaultAAPipeline()
Build the default AAManager with the default alias analysis pipeline registered.
ModulePassManager buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, ThinLTO-targeting default optimization pipeline to a pass manager.
ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level, bool LTOPreLink=false)
Build a per-module default optimization pipeline.
FunctionPassManager buildFunctionSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM function canonicalization and simplification pipeline.
ModulePassManager buildLTODefaultPipeline(OptimizationLevel Level, ModuleSummaryIndex *ExportSummary)
Build an LTO default optimization pipeline to a pass manager.
ModulePassManager buildModuleInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining with module inliner pass.
ModulePassManager buildThinLTODefaultPipeline(OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary)
Build an ThinLTO default optimization pipeline to a pass manager.
void addPGOInstrPassesForO0(ModulePassManager &MPM, bool RunProfileGen, bool IsCS, std::string ProfileFile, std::string ProfileRemappingFile, IntrusiveRefCntPtr< vfs::FileSystem > FS)
Add PGOInstrumenation passes for O0 only.
ModulePassManager buildModuleSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM module canonicalization and simplification pipeline.
ModulePassManager buildModuleOptimizationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase LTOPhase)
Construct the core LLVM module optimization pipeline.
ModulePassManager buildLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, LTO-targeting default optimization pipeline to a pass manager.
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t< is_detected< HasRunOnLoopT, PassT >::value > addPass(PassT &&Pass)
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t<!std::is_same< PassT, PassManager >::value > addPass(PassT &&Pass)
Definition: PassManager.h:544
bool isEmpty() const
Returns if the pass manager contains any passes.
Definition: PassManager.h:568
unsigned LicmMssaNoAccForPromotionCap
Tuning option to disable promotion to scalars in LICM with MemorySSA, if the number of access is too ...
Definition: PassBuilder.h:71
bool SLPVectorization
Tuning option to enable/disable slp loop vectorization, set based on opt level.
Definition: PassBuilder.h:56
int InlinerThreshold
Tuning option to override the default inliner threshold.
Definition: PassBuilder.h:82
bool CallGraphProfile
Tuning option to enable/disable call graph profile.
Definition: PassBuilder.h:75
bool MergeFunctions
Tuning option to enable/disable function merging.
Definition: PassBuilder.h:79
bool ForgetAllSCEVInLoopUnroll
Tuning option to forget all SCEV loops in LoopUnroll.
Definition: PassBuilder.h:63
unsigned LicmMssaOptCap
Tuning option to cap the number of calls to retrive clobbering accesses in MemorySSA,...
Definition: PassBuilder.h:67
bool LoopInterleaving
Tuning option to set loop interleaving on/off, set based on opt level.
Definition: PassBuilder.h:48
PipelineTuningOptions()
Constructor sets pipeline tuning defaults based on cl::opts.
bool LoopUnrolling
Tuning option to enable/disable loop unrolling. Its default value is true.
Definition: PassBuilder.h:59
bool LoopVectorization
Tuning option to enable/disable loop vectorization, set based on opt level.
Definition: PassBuilder.h:52
Reassociate commutative expressions.
Definition: Reassociate.h:71
A pass to do RPO deduction and propagation of function attributes.
Definition: FunctionAttrs.h:74
This pass performs function-level constant propagation and merging.
Definition: SCCP.h:38
An optimization pass providing Scalar Replacement of Aggregates.
Definition: SROA.h:95
The sample profiler data loader pass.
Definition: SampleProfile.h:31
Analysis pass providing a never-invalidated alias analysis result.
This pass transforms loops that contain branches or switches on loop- invariant conditions to have mu...
A pass to simplify and canonicalize the CFG of a function.
Definition: SimplifyCFG.h:29
virtual void registerDefaultAliasAnalyses(AAManager &)
Allow the target to register alias analyses with the AAManager for use with the new pass manager.
Analysis pass providing a never-invalidated alias analysis result.
Optimize scalar/vector interactions in IR using target cost models.
Definition: VectorCombine.h:23
Interfaces for registering analysis passes, producing common pass manager configurations,...
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:703
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
ModuleToFunctionPassAdaptor createModuleToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
Definition: PassManager.h:1218
@ MODULE
Definition: Attributor.h:5637
@ CGSCC
Definition: Attributor.h:5638
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition: Pass.h:73
@ FullLTOPreLink
Full LTO prelink phase.
@ ThinLTOPostLink
ThinLTO postlink (backend compile) phase.
@ None
No LTO/ThinLTO behavior needed.
@ FullLTOPostLink
Full LTO postlink (backend compile) phase.
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
ModuleToPostOrderCGSCCPassAdaptor createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT &&Pass)
A function to deduce a function pass type and wrap it in the templated adaptor.
CGSCCToFunctionPassAdaptor createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false, bool NoRerun=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
cl::opt< bool > ForgetSCEVInLoopUnroll
bool AreStatisticsEnabled()
Check if statistics are enabled.
Definition: Statistic.cpp:139
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
cl::opt< bool > EnableKnowledgeRetention
enable preservation of attributes in assume like: call void @llvm.assume(i1 true) [ "nonnull"(i32* PT...
cl::opt< unsigned > SetLicmMssaNoAccForPromotionCap
std::enable_if_t< is_detected< HasRunOnLoopT, LoopPassT >::value, FunctionToLoopPassAdaptor > createFunctionToLoopPassAdaptor(LoopPassT &&Pass, bool UseMemorySSA=false, bool UseBlockFrequencyInfo=false, bool UseBranchProbabilityInfo=false)
A function to deduce a loop pass type and wrap it in the templated adaptor.
cl::opt< unsigned > MaxDevirtIterations("max-devirt-iterations", cl::ReallyHidden, cl::init(4))
cl::opt< unsigned > SetLicmMssaOptCap
A DCE pass that assumes instructions are dead until proven otherwise.
Definition: ADCE.h:31
Pass to convert @llvm.global.annotations to !annotation metadata.
This pass attempts to minimize the number of assume without loosing any information.
Hoist/decompose integer division and remainder instructions to enable CFG improvements and better cod...
Definition: DivRemPairs.h:23
A simple and fast domtree-based CSE pass.
Definition: EarlyCSE.h:30
A pass manager to run a set of extra function simplification passes after vectorization,...
Pass which forces specific function attributes into the IR, primarily as a debugging tool.
A simple and fast domtree-based GVN pass to hoist common expressions from sibling branches.
Definition: GVN.h:376
Uses an "inverted" value numbering to decide the similarity of expressions and sinks similar expressi...
Definition: GVN.h:383
A set of parameters to control various transforms performed by IPSCCP pass.
Definition: SCCP.h:35
A pass which infers function attributes from the names and signatures of function declarations in a m...
Provides context on when an inline advisor is constructed in the pipeline (e.g., link phase,...
Definition: InlineAdvisor.h:60
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:205
std::optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
Definition: InlineCost.h:222
int DefaultThreshold
The default threshold to start with for a callee.
Definition: InlineCost.h:207
std::optional< bool > EnableDeferral
Indicate whether we should allow inline deferral.
Definition: InlineCost.h:235
std::optional< int > HintThreshold
Threshold to use for callees with inline hint.
Definition: InlineCost.h:210
Options for the frontend instrumentation based profiling pass.
A no-op pass template which simply forces a specific analysis result to be invalidated.
Definition: PassManager.h:1272
Pass to forward loads in a loop around the backedge to subsequent iterations.
A set of parameters used to control various transforms performed by the LoopUnroll pass.
The LoopVectorize Pass.
Computes function attributes in post-order over the call graph.
Definition: FunctionAttrs.h:50
A utility pass template to force an analysis result to be available.
Definition: PassManager.h:1245