LLVM 20.0.0git
PassBuilderPipelines.cpp
Go to the documentation of this file.
1//===- Construction of pass pipelines -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// This file provides the implementation of the PassBuilder based on our
11/// static pass registry as well as related functionality. It also provides
12/// helpers to aid in analyzing, debugging, and testing passes and pass
13/// pipelines.
14///
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/Statistic.h"
27#include "llvm/IR/PassManager.h"
144
145using namespace llvm;
146
148 "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
149 cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
150 cl::values(clEnumValN(InliningAdvisorMode::Default, "default",
151 "Heuristics-based inliner version"),
152 clEnumValN(InliningAdvisorMode::Development, "development",
153 "Use development mode (runtime-loadable model)"),
154 clEnumValN(InliningAdvisorMode::Release, "release",
155 "Use release mode (AOT-compiled model)")));
156
158 "enable-npm-synthetic-counts", cl::Hidden,
159 cl::desc("Run synthetic function entry count generation "
160 "pass"));
161
162/// Flag to enable inline deferral during PGO.
163static cl::opt<bool>
164 EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
166 cl::desc("Enable inline deferral during PGO"));
167
168static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
169 cl::init(false), cl::Hidden,
170 cl::desc("Enable module inliner"));
171
173 "mandatory-inlining-first", cl::init(false), cl::Hidden,
174 cl::desc("Perform mandatory inlinings module-wide, before performing "
175 "inlining"));
176
178 "eagerly-invalidate-analyses", cl::init(true), cl::Hidden,
179 cl::desc("Eagerly invalidate more analyses in default pipelines"));
180
182 "enable-merge-functions", cl::init(false), cl::Hidden,
183 cl::desc("Enable function merging as part of the optimization pipeline"));
184
186 "enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden,
187 cl::desc("Run the loop rotation transformation after PGO instrumentation"));
188
190 "enable-global-analyses", cl::init(true), cl::Hidden,
191 cl::desc("Enable inter-procedural analyses"));
192
193static cl::opt<bool>
194 RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden,
195 cl::desc("Run Partial inlinining pass"));
196
198 "extra-vectorizer-passes", cl::init(false), cl::Hidden,
199 cl::desc("Run cleanup optimization passes after vectorization"));
200
201static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
202 cl::desc("Run the NewGVN pass"));
203
205 "enable-loopinterchange", cl::init(false), cl::Hidden,
206 cl::desc("Enable the experimental LoopInterchange Pass"));
207
208static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam",
209 cl::init(false), cl::Hidden,
210 cl::desc("Enable Unroll And Jam Pass"));
211
212static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
214 cl::desc("Enable the LoopFlatten Pass"));
215
216// Experimentally allow loop header duplication. This should allow for better
217// optimization at Oz, since loop-idiom recognition can then recognize things
218// like memcpy. If this ends up being useful for many targets, we should drop
219// this flag and make a code generation option that can be controlled
220// independent of the opt level and exposed through the frontend.
222 "enable-loop-header-duplication", cl::init(false), cl::Hidden,
223 cl::desc("Enable loop header duplication at any optimization level"));
224
225static cl::opt<bool>
226 EnableDFAJumpThreading("enable-dfa-jump-thread",
227 cl::desc("Enable DFA jump threading"),
228 cl::init(false), cl::Hidden);
229
230// TODO: turn on and remove flag
232 "enable-pgo-force-function-attrs",
233 cl::desc("Enable pass to set function attributes based on PGO profiles"),
234 cl::init(false));
235
236static cl::opt<bool>
237 EnableHotColdSplit("hot-cold-split",
238 cl::desc("Enable hot-cold splitting pass"));
239
240static cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(false),
242 cl::desc("Enable ir outliner pass"));
243
244static cl::opt<bool>
245 DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden,
246 cl::desc("Disable pre-instrumentation inliner"));
247
249 "preinline-threshold", cl::Hidden, cl::init(75),
250 cl::desc("Control the amount of inlining in pre-instrumentation inliner "
251 "(default = 75)"));
252
253static cl::opt<bool>
254 EnableGVNHoist("enable-gvn-hoist",
255 cl::desc("Enable the GVN hoisting pass (default = off)"));
256
257static cl::opt<bool>
258 EnableGVNSink("enable-gvn-sink",
259 cl::desc("Enable the GVN sinking pass (default = off)"));
260
262 "enable-jump-table-to-switch",
263 cl::desc("Enable JumpTableToSwitch pass (default = off)"));
264
265// This option is used in simplifying testing SampleFDO optimizations for
266// profile loading.
267static cl::opt<bool>
268 EnableCHR("enable-chr", cl::init(true), cl::Hidden,
269 cl::desc("Enable control height reduction optimization (CHR)"));
270
272 "flattened-profile-used", cl::init(false), cl::Hidden,
273 cl::desc("Indicate the sample profile being used is flattened, i.e., "
274 "no inline hierachy exists in the profile"));
275
277 "enable-order-file-instrumentation", cl::init(false), cl::Hidden,
278 cl::desc("Enable order file instrumentation (default = off)"));
279
280static cl::opt<bool>
281 EnableMatrix("enable-matrix", cl::init(false), cl::Hidden,
282 cl::desc("Enable lowering of the matrix intrinsics"));
283
285 "enable-constraint-elimination", cl::init(true), cl::Hidden,
286 cl::desc(
287 "Enable pass to eliminate conditions based on linear constraints"));
288
290 "attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE),
291 cl::desc("Enable the attributor inter-procedural deduction pass"),
292 cl::values(clEnumValN(AttributorRunOption::ALL, "all",
293 "enable all attributor runs"),
294 clEnumValN(AttributorRunOption::MODULE, "module",
295 "enable module-wide attributor runs"),
296 clEnumValN(AttributorRunOption::CGSCC, "cgscc",
297 "enable call graph SCC attributor runs"),
298 clEnumValN(AttributorRunOption::NONE, "none",
299 "disable attributor runs")));
300
302 "enable-sampled-instrumentation", cl::init(false), cl::Hidden,
303 cl::desc("Enable profile instrumentation sampling (default = off)"));
305 "enable-loop-versioning-licm", cl::init(false), cl::Hidden,
306 cl::desc("Enable the experimental Loop Versioning LICM pass"));
307
309
310namespace llvm {
312
314} // namespace llvm
315
317 LoopInterleaving = true;
318 LoopVectorization = true;
319 SLPVectorization = false;
320 LoopUnrolling = true;
324 CallGraphProfile = true;
325 UnifiedLTO = false;
327 InlinerThreshold = -1;
329}
330
331namespace llvm {
333} // namespace llvm
334
336 OptimizationLevel Level) {
337 for (auto &C : PeepholeEPCallbacks)
338 C(FPM, Level);
339}
342 for (auto &C : LateLoopOptimizationsEPCallbacks)
343 C(LPM, Level);
344}
346 OptimizationLevel Level) {
347 for (auto &C : LoopOptimizerEndEPCallbacks)
348 C(LPM, Level);
349}
352 for (auto &C : ScalarOptimizerLateEPCallbacks)
353 C(FPM, Level);
354}
356 OptimizationLevel Level) {
357 for (auto &C : CGSCCOptimizerLateEPCallbacks)
358 C(CGPM, Level);
359}
361 OptimizationLevel Level) {
362 for (auto &C : VectorizerStartEPCallbacks)
363 C(FPM, Level);
364}
366 OptimizationLevel Level) {
367 for (auto &C : OptimizerEarlyEPCallbacks)
368 C(MPM, Level);
369}
371 OptimizationLevel Level) {
372 for (auto &C : OptimizerLastEPCallbacks)
373 C(MPM, Level);
374}
377 for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks)
378 C(MPM, Level);
379}
382 for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
383 C(MPM, Level);
384}
386 OptimizationLevel Level) {
387 for (auto &C : PipelineStartEPCallbacks)
388 C(MPM, Level);
389}
392 for (auto &C : PipelineEarlySimplificationEPCallbacks)
393 C(MPM, Level);
394}
395
396// Helper to add AnnotationRemarksPass.
399}
400
401// Helper to check if the current compilation phase is preparing for LTO
405}
406
407// TODO: Investigate the cost/benefit of tail call elimination on debugging.
409PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
411
413
416
417 // Form SSA out of local memory accesses after breaking apart aggregates into
418 // scalars.
420
421 // Catch trivial redundancies
422 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
423
424 // Hoisting of scalars and load expressions.
425 FPM.addPass(
426 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
428
430
431 invokePeepholeEPCallbacks(FPM, Level);
432
433 FPM.addPass(
434 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
435
436 // Form canonically associated expression trees, and simplify the trees using
437 // basic mathematical properties. For example, this will form (nearly)
438 // minimal multiplication trees.
440
441 // Add the primary loop simplification pipeline.
442 // FIXME: Currently this is split into two loop pass pipelines because we run
443 // some function passes in between them. These can and should be removed
444 // and/or replaced by scheduling the loop pass equivalents in the correct
445 // positions. But those equivalent passes aren't powerful enough yet.
446 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
447 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
448 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
449 // `LoopInstSimplify`.
450 LoopPassManager LPM1, LPM2;
451
452 // Simplify the loop body. We do this initially to clean up after other loop
453 // passes run, either when iterating on a loop or on inner loops with
454 // implications on the outer loop.
457
458 // Try to remove as much code from the loop header as possible,
459 // to reduce amount of IR that will have to be duplicated. However,
460 // do not perform speculative hoisting the first time as LICM
461 // will destroy metadata that may not need to be destroyed if run
462 // after loop rotation.
463 // TODO: Investigate promotion cap for O1.
465 /*AllowSpeculation=*/false));
466
467 LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true,
469 // TODO: Investigate promotion cap for O1.
471 /*AllowSpeculation=*/true));
474 LPM1.addPass(LoopFlattenPass());
475
478
480
482
485
486 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
487 // because it changes IR to makes profile annotation in back compile
488 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
489 // attributes so we need to make sure and allow the full unroll pass to pay
490 // attention to it.
491 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
492 PGOOpt->Action != PGOOptions::SampleUse)
493 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
494 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
496
498
499 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
500 /*UseMemorySSA=*/true,
501 /*UseBlockFrequencyInfo=*/true));
502 FPM.addPass(
503 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
505 // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
506 // *All* loop passes must preserve it, in order to be able to use it.
507 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
508 /*UseMemorySSA=*/false,
509 /*UseBlockFrequencyInfo=*/false));
510
511 // Delete small array after loop unroll.
513
514 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
515 FPM.addPass(MemCpyOptPass());
516
517 // Sparse conditional constant propagation.
518 // FIXME: It isn't clear why we do this *after* loop passes rather than
519 // before...
520 FPM.addPass(SCCPPass());
521
522 // Delete dead bit computations (instcombine runs after to fold away the dead
523 // computations, and then ADCE will run later to exploit any new DCE
524 // opportunities that creates).
525 FPM.addPass(BDCEPass());
526
527 // Run instcombine after redundancy and dead bit elimination to exploit
528 // opportunities opened up by them.
530 invokePeepholeEPCallbacks(FPM, Level);
531
532 FPM.addPass(CoroElidePass());
533
535
536 // Finally, do an expensive DCE pass to catch all the dead code exposed by
537 // the simplifications and basic cleanup after all the simplifications.
538 // TODO: Investigate if this is too expensive.
539 FPM.addPass(ADCEPass());
540 FPM.addPass(
541 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
543 invokePeepholeEPCallbacks(FPM, Level);
544
545 return FPM;
546}
547
551 assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
552
553 // The O1 pipeline has a separate pipeline creation function to simplify
554 // construction readability.
555 if (Level.getSpeedupLevel() == 1)
556 return buildO1FunctionSimplificationPipeline(Level, Phase);
557
559
562
563 // Form SSA out of local memory accesses after breaking apart aggregates into
564 // scalars.
566
567 // Catch trivial redundancies
568 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
571
572 // Hoisting of scalars and load expressions.
573 if (EnableGVNHoist)
574 FPM.addPass(GVNHoistPass());
575
576 // Global value numbering based sinking.
577 if (EnableGVNSink) {
578 FPM.addPass(GVNSinkPass());
579 FPM.addPass(
580 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
581 }
582
583 // Speculative execution if the target has divergent branches; otherwise nop.
584 FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
585
586 // Optimize based on known information about branches, and cleanup afterward.
589
590 // Jump table to switch conversion.
593
594 FPM.addPass(
595 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
598
599 if (!Level.isOptimizingForSize())
601
602 invokePeepholeEPCallbacks(FPM, Level);
603
604 // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
605 // using the size value profile. Don't perform this when optimizing for size.
606 if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
607 !Level.isOptimizingForSize())
609
611 FPM.addPass(
612 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
613
614 // Form canonically associated expression trees, and simplify the trees using
615 // basic mathematical properties. For example, this will form (nearly)
616 // minimal multiplication trees.
618
621
622 // Add the primary loop simplification pipeline.
623 // FIXME: Currently this is split into two loop pass pipelines because we run
624 // some function passes in between them. These can and should be removed
625 // and/or replaced by scheduling the loop pass equivalents in the correct
626 // positions. But those equivalent passes aren't powerful enough yet.
627 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
628 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
629 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
630 // `LoopInstSimplify`.
631 LoopPassManager LPM1, LPM2;
632
633 // Simplify the loop body. We do this initially to clean up after other loop
634 // passes run, either when iterating on a loop or on inner loops with
635 // implications on the outer loop.
638
639 // Try to remove as much code from the loop header as possible,
640 // to reduce amount of IR that will have to be duplicated. However,
641 // do not perform speculative hoisting the first time as LICM
642 // will destroy metadata that may not need to be destroyed if run
643 // after loop rotation.
644 // TODO: Investigate promotion cap for O1.
646 /*AllowSpeculation=*/false));
647
648 // Disable header duplication in loop rotation at -Oz.
650 Level != OptimizationLevel::Oz,
652 // TODO: Investigate promotion cap for O1.
654 /*AllowSpeculation=*/true));
655 LPM1.addPass(
656 SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3));
658 LPM1.addPass(LoopFlattenPass());
659
662
663 {
665 ExtraPasses.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
667 LPM2.addPass(std::move(ExtraPasses));
668 }
669
671
673
676
677 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
678 // because it changes IR to makes profile annotation in back compile
679 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
680 // attributes so we need to make sure and allow the full unroll pass to pay
681 // attention to it.
682 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
683 PGOOpt->Action != PGOOptions::SampleUse)
684 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
685 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
687
689
690 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
691 /*UseMemorySSA=*/true,
692 /*UseBlockFrequencyInfo=*/true));
693 FPM.addPass(
694 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
696 // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
697 // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
698 // *All* loop passes must preserve it, in order to be able to use it.
699 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
700 /*UseMemorySSA=*/false,
701 /*UseBlockFrequencyInfo=*/false));
702
703 // Delete small array after loop unroll.
705
706 // Try vectorization/scalarization transforms that are both improvements
707 // themselves and can allow further folds with GVN and InstCombine.
708 FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
709
710 // Eliminate redundancies.
712 if (RunNewGVN)
713 FPM.addPass(NewGVNPass());
714 else
715 FPM.addPass(GVNPass());
716
717 // Sparse conditional constant propagation.
718 // FIXME: It isn't clear why we do this *after* loop passes rather than
719 // before...
720 FPM.addPass(SCCPPass());
721
722 // Delete dead bit computations (instcombine runs after to fold away the dead
723 // computations, and then ADCE will run later to exploit any new DCE
724 // opportunities that creates).
725 FPM.addPass(BDCEPass());
726
727 // Run instcombine after redundancy and dead bit elimination to exploit
728 // opportunities opened up by them.
730 invokePeepholeEPCallbacks(FPM, Level);
731
732 // Re-consider control flow based optimizations after redundancy elimination,
733 // redo DCE, etc.
736
739
740 // Finally, do an expensive DCE pass to catch all the dead code exposed by
741 // the simplifications and basic cleanup after all the simplifications.
742 // TODO: Investigate if this is too expensive.
743 FPM.addPass(ADCEPass());
744
745 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
746 FPM.addPass(MemCpyOptPass());
747
748 FPM.addPass(DSEPass());
750
753 /*AllowSpeculation=*/true),
754 /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
755
756 FPM.addPass(CoroElidePass());
757
759
761 .convertSwitchRangeToICmp(true)
762 .hoistCommonInsts(true)
763 .sinkCommonInsts(true)));
765 invokePeepholeEPCallbacks(FPM, Level);
766
767 return FPM;
768}
769
770void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
773}
774
775void PassBuilder::addPreInlinerPasses(ModulePassManager &MPM,
776 OptimizationLevel Level,
777 ThinOrFullLTOPhase LTOPhase) {
778 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
780 return;
781 InlineParams IP;
782
784
785 // FIXME: The hint threshold has the same value used by the regular inliner
786 // when not optimzing for size. This should probably be lowered after
787 // performance testing.
788 // FIXME: this comment is cargo culted from the old pass manager, revisit).
789 IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
791 IP, /* MandatoryFirst */ true,
793 CGSCCPassManager &CGPipeline = MIWP.getPM();
794
797 FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
798 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
799 true))); // Merge & remove basic blocks.
800 FPM.addPass(InstCombinePass()); // Combine silly sequences.
801 invokePeepholeEPCallbacks(FPM, Level);
802
803 CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
804 std::move(FPM), PTO.EagerlyInvalidateAnalyses));
805
806 MPM.addPass(std::move(MIWP));
807
808 // Delete anything that is now dead to make sure that we don't instrument
809 // dead code. Instrumentation can end up keeping dead code around and
810 // dramatically increase code size.
811 MPM.addPass(GlobalDCEPass());
812}
813
814void PassBuilder::addPostPGOLoopRotation(ModulePassManager &MPM,
815 OptimizationLevel Level) {
817 // Disable header duplication in loop rotation at -Oz.
821 Level != OptimizationLevel::Oz),
822 /*UseMemorySSA=*/false,
823 /*UseBlockFrequencyInfo=*/false),
825 }
826}
827
828void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
829 OptimizationLevel Level, bool RunProfileGen,
830 bool IsCS, bool AtomicCounterUpdate,
831 std::string ProfileFile,
832 std::string ProfileRemappingFile,
834 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
835
836 if (!RunProfileGen) {
837 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
838 MPM.addPass(
839 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
840 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
841 // RequireAnalysisPass for PSI before subsequent non-module passes.
843 return;
844 }
845
846 // Perform PGO instrumentation.
849
850 addPostPGOLoopRotation(MPM, Level);
851 // Add the profile lowering pass.
853 if (!ProfileFile.empty())
854 Options.InstrProfileOutput = ProfileFile;
855 // Do counter promotion at Level greater than O0.
856 Options.DoCounterPromotion = true;
857 Options.UseBFIInPromotion = IsCS;
858 if (EnableSampledInstr) {
859 Options.Sampling = true;
860 // With sampling, there is little beneifit to enable counter promotion.
861 // But note that sampling does work with counter promotion.
862 Options.DoCounterPromotion = false;
863 }
864 Options.Atomic = AtomicCounterUpdate;
866}
867
869 ModulePassManager &MPM, bool RunProfileGen, bool IsCS,
870 bool AtomicCounterUpdate, std::string ProfileFile,
871 std::string ProfileRemappingFile, IntrusiveRefCntPtr<vfs::FileSystem> FS) {
872 if (!RunProfileGen) {
873 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
874 MPM.addPass(
875 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
876 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
877 // RequireAnalysisPass for PSI before subsequent non-module passes.
879 return;
880 }
881
882 // Perform PGO instrumentation.
885 // Add the profile lowering pass.
887 if (!ProfileFile.empty())
888 Options.InstrProfileOutput = ProfileFile;
889 // Do not do counter promotion at O0.
890 Options.DoCounterPromotion = false;
891 Options.UseBFIInPromotion = IsCS;
892 Options.Atomic = AtomicCounterUpdate;
894}
895
897 return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
898}
899
903 InlineParams IP;
904 if (PTO.InlinerThreshold == -1)
905 IP = getInlineParamsFromOptLevel(Level);
906 else
908 // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to
909 // disable hot callsite inline (as much as possible [1]) because it makes
910 // profile annotation in the backend inaccurate.
911 //
912 // [1] Note the cost of a function could be below zero due to erased
913 // prologue / epilogue.
914 if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
915 PGOOpt->Action == PGOOptions::SampleUse)
917
918 if (PGOOpt)
920
924
925 // Require the GlobalsAA analysis for the module so we can query it within
926 // the CGSCC pipeline.
929 // Invalidate AAManager so it can be recreated and pick up the newly
930 // available GlobalsAA.
931 MIWP.addModulePass(
933 }
934
935 // Require the ProfileSummaryAnalysis for the module so we can query it within
936 // the inliner pass.
938
939 // Now begin the main postorder CGSCC pipeline.
940 // FIXME: The current CGSCC pipeline has its origins in the legacy pass
941 // manager and trying to emulate its precise behavior. Much of this doesn't
942 // make a lot of sense and we should revisit the core CGSCC structure.
943 CGSCCPassManager &MainCGPipeline = MIWP.getPM();
944
945 // Note: historically, the PruneEH pass was run first to deduce nounwind and
946 // generally clean up exception handling overhead. It isn't clear this is
947 // valuable as the inliner doesn't currently care whether it is inlining an
948 // invoke or a call.
949
951 MainCGPipeline.addPass(AttributorCGSCCPass());
952
953 // Deduce function attributes. We do another run of this after the function
954 // simplification pipeline, so this only needs to run when it could affect the
955 // function simplification pipeline, which is only the case with recursive
956 // functions.
957 MainCGPipeline.addPass(PostOrderFunctionAttrsPass(/*SkipNonRecursive*/ true));
958
959 // When at O3 add argument promotion to the pass pipeline.
960 // FIXME: It isn't at all clear why this should be limited to O3.
961 if (Level == OptimizationLevel::O3)
962 MainCGPipeline.addPass(ArgumentPromotionPass());
963
964 // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
965 // there are no OpenMP runtime calls present in the module.
966 if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
967 MainCGPipeline.addPass(OpenMPOptCGSCCPass());
968
969 invokeCGSCCOptimizerLateEPCallbacks(MainCGPipeline, Level);
970
971 // Add the core function simplification pipeline nested inside the
972 // CGSCC walk.
975 PTO.EagerlyInvalidateAnalyses, /*NoRerun=*/true));
976
977 // Finally, deduce any function attributes based on the fully simplified
978 // function.
979 MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
980
981 // Mark that the function is fully simplified and that it shouldn't be
982 // simplified again if we somehow revisit it due to CGSCC mutations unless
983 // it's been modified since.
986
988 MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
989
990 // Make sure we don't affect potential future NoRerun CGSCC adaptors.
991 MIWP.addLateModulePass(createModuleToFunctionPassAdaptor(
993
994 return MIWP;
995}
996
1001
1003 // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to
1004 // disable hot callsite inline (as much as possible [1]) because it makes
1005 // profile annotation in the backend inaccurate.
1006 //
1007 // [1] Note the cost of a function could be below zero due to erased
1008 // prologue / epilogue.
1009 if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
1010 PGOOpt->Action == PGOOptions::SampleUse)
1011 IP.HotCallSiteThreshold = 0;
1012
1013 if (PGOOpt)
1015
1016 // The inline deferral logic is used to avoid losing some
1017 // inlining chance in future. It is helpful in SCC inliner, in which
1018 // inlining is processed in bottom-up order.
1019 // While in module inliner, the inlining order is a priority-based order
1020 // by default. The inline deferral is unnecessary there. So we disable the
1021 // inline deferral logic in module inliner.
1022 IP.EnableDeferral = false;
1023
1025
1029
1033
1034 return MPM;
1035}
1036
1040 assert(Level != OptimizationLevel::O0 &&
1041 "Should not be used for O0 pipeline");
1042
1044 "FullLTOPostLink shouldn't call buildModuleSimplificationPipeline!");
1045
1047
1048 // Place pseudo probe instrumentation as the first pass of the pipeline to
1049 // minimize the impact of optimization changes.
1050 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1053
1054 bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
1055
1056 // In ThinLTO mode, when flattened profile is used, all the available
1057 // profile information will be annotated in PreLink phase so there is
1058 // no need to load the profile again in PostLink.
1059 bool LoadSampleProfile =
1060 HasSampleProfile &&
1062
1063 // During the ThinLTO backend phase we perform early indirect call promotion
1064 // here, before globalopt. Otherwise imported available_externally functions
1065 // look unreferenced and are removed. If we are going to load the sample
1066 // profile then defer until later.
1067 // TODO: See if we can move later and consolidate with the location where
1068 // we perform ICP when we are loading a sample profile.
1069 // TODO: We pass HasSampleProfile (whether there was a sample profile file
1070 // passed to the compile) to the SamplePGO flag of ICP. This is used to
1071 // determine whether the new direct calls are annotated with prof metadata.
1072 // Ideally this should be determined from whether the IR is annotated with
1073 // sample profile, and not whether the a sample profile was provided on the
1074 // command line. E.g. for flattened profiles where we will not be reloading
1075 // the sample profile in the ThinLTO backend, we ideally shouldn't have to
1076 // provide the sample profile file.
1077 if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
1078 MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
1079
1080 // Create an early function pass manager to cleanup the output of the
1081 // frontend. Not necessary with LTO post link pipelines since the pre link
1082 // pipeline already cleaned up the frontend output.
1084 // Do basic inference of function attributes from known properties of system
1085 // libraries and other oracles.
1087 MPM.addPass(CoroEarlyPass());
1088
1089 FunctionPassManager EarlyFPM;
1090 EarlyFPM.addPass(EntryExitInstrumenterPass(/*PostInlining=*/false));
1091 // Lower llvm.expect to metadata before attempting transforms.
1092 // Compare/branch metadata may alter the behavior of passes like
1093 // SimplifyCFG.
1095 EarlyFPM.addPass(SimplifyCFGPass());
1097 EarlyFPM.addPass(EarlyCSEPass());
1098 if (Level == OptimizationLevel::O3)
1099 EarlyFPM.addPass(CallSiteSplittingPass());
1101 std::move(EarlyFPM), PTO.EagerlyInvalidateAnalyses));
1102 }
1103
1104 if (LoadSampleProfile) {
1105 // Annotate sample profile right after early FPM to ensure freshness of
1106 // the debug info.
1107 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1108 PGOOpt->ProfileRemappingFile, Phase));
1109 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1110 // RequireAnalysisPass for PSI before subsequent non-module passes.
1112 // Do not invoke ICP in the LTOPrelink phase as it makes it hard
1113 // for the profile annotation to be accurate in the LTO backend.
1114 if (!isLTOPreLink(Phase))
1115 // We perform early indirect call promotion here, before globalopt.
1116 // This is important for the ThinLTO backend phase because otherwise
1117 // imported available_externally functions look unreferenced and are
1118 // removed.
1119 MPM.addPass(
1120 PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
1121 }
1122
1123 // Try to perform OpenMP specific optimizations on the module. This is a
1124 // (quick!) no-op if there are no OpenMP runtime calls present in the module.
1125 MPM.addPass(OpenMPOptPass());
1126
1128 MPM.addPass(AttributorPass());
1129
1130 // Lower type metadata and the type.test intrinsic in the ThinLTO
1131 // post link pipeline after ICP. This is to enable usage of the type
1132 // tests in ICP sequences.
1134 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1135
1137
1138 // Interprocedural constant propagation now that basic cleanup has occurred
1139 // and prior to optimizing globals.
1140 // FIXME: This position in the pipeline hasn't been carefully considered in
1141 // years, it should be re-analyzed.
1142 MPM.addPass(IPSCCPPass(
1143 IPSCCPOptions(/*AllowFuncSpec=*/
1144 Level != OptimizationLevel::Os &&
1145 Level != OptimizationLevel::Oz &&
1146 !isLTOPreLink(Phase))));
1147
1148 // Attach metadata to indirect call sites indicating the set of functions
1149 // they may target at run-time. This should follow IPSCCP.
1151
1152 // Optimize globals to try and fold them into constants.
1153 MPM.addPass(GlobalOptPass());
1154
1155 // Create a small function pass pipeline to cleanup after all the global
1156 // optimizations.
1157 FunctionPassManager GlobalCleanupPM;
1158 // FIXME: Should this instead by a run of SROA?
1159 GlobalCleanupPM.addPass(PromotePass());
1160 GlobalCleanupPM.addPass(InstCombinePass());
1161 invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
1162 GlobalCleanupPM.addPass(
1163 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1164 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM),
1166
1167 // We already asserted this happens in non-FullLTOPostLink earlier.
1168 const bool IsPreLink = Phase != ThinOrFullLTOPhase::ThinLTOPostLink;
1169 const bool IsPGOPreLink = PGOOpt && IsPreLink;
1170 const bool IsPGOInstrGen =
1171 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRInstr;
1172 const bool IsPGOInstrUse =
1173 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRUse;
1174 const bool IsMemprofUse = IsPGOPreLink && !PGOOpt->MemoryProfile.empty();
1175 // We don't want to mix pgo ctx gen and pgo gen; we also don't currently
1176 // enable ctx profiling from the frontend.
1178 "Enabling both instrumented PGO and contextual instrumentation is not "
1179 "supported.");
1180 // Enable contextual profiling instrumentation.
1181 const bool IsCtxProfGen = !IsPGOInstrGen && IsPreLink &&
1183 const bool IsCtxProfUse = !UseCtxProfile.empty() && !PGOOpt &&
1185
1186 if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen ||
1187 IsCtxProfUse)
1188 addPreInlinerPasses(MPM, Level, Phase);
1189
1190 // Add all the requested passes for instrumentation PGO, if requested.
1191 if (IsPGOInstrGen || IsPGOInstrUse) {
1192 addPGOInstrPasses(MPM, Level,
1193 /*RunProfileGen=*/IsPGOInstrGen,
1194 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate,
1195 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
1196 PGOOpt->FS);
1197 } else if (IsCtxProfGen || IsCtxProfUse) {
1199 // In pre-link, we just want the instrumented IR. We use the contextual
1200 // profile in the post-thinlink phase.
1201 // The instrumentation will be removed in post-thinlink after IPO.
1202 // FIXME(mtrofin): move AssignGUIDPass if there is agreement to use this
1203 // mechanism for GUIDs.
1204 MPM.addPass(AssignGUIDPass());
1205 if (IsCtxProfUse)
1206 return MPM;
1207 addPostPGOLoopRotation(MPM, Level);
1209 }
1210
1211 if (IsPGOInstrGen || IsPGOInstrUse || IsCtxProfGen)
1212 MPM.addPass(PGOIndirectCallPromotion(false, false));
1213
1214 if (IsPGOPreLink && PGOOpt->CSAction == PGOOptions::CSIRInstr)
1215 MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile,
1217
1218 if (IsMemprofUse)
1219 MPM.addPass(MemProfUsePass(PGOOpt->MemoryProfile, PGOOpt->FS));
1220
1221 // Synthesize function entry counts for non-PGO compilation.
1222 if (EnableSyntheticCounts && !PGOOpt)
1224
1225 if (EnablePGOForceFunctionAttrs && PGOOpt)
1226 MPM.addPass(PGOForceFunctionAttrsPass(PGOOpt->ColdOptType));
1227
1228 MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/true));
1229
1232 else
1233 MPM.addPass(buildInlinerPipeline(Level, Phase));
1234
1235 // Remove any dead arguments exposed by cleanups, constant folding globals,
1236 // and argument promotion.
1238
1240 MPM.addPass(CoroCleanupPass());
1241
1242 // Optimize globals now that functions are fully simplified.
1243 MPM.addPass(GlobalOptPass());
1244 MPM.addPass(GlobalDCEPass());
1245
1246 return MPM;
1247}
1248
1249/// TODO: Should LTO cause any differences to this set of passes?
1250void PassBuilder::addVectorPasses(OptimizationLevel Level,
1251 FunctionPassManager &FPM, bool IsFullLTO) {
1254
1257 if (IsFullLTO) {
1258 // The vectorizer may have significantly shortened a loop body; unroll
1259 // again. Unroll small loops to hide loop backedge latency and saturate any
1260 // parallel execution resources of an out-of-order processor. We also then
1261 // need to clean up redundancies and loop invariant code.
1262 // FIXME: It would be really good to use a loop-integrated instruction
1263 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1264 // across the loop nests.
1265 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1268 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1270 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1273 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1274 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1275 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1276 // NOTE: we are very late in the pipeline, and we don't have any LICM
1277 // or SimplifyCFG passes scheduled after us, that would cleanup
1278 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1280 }
1281
1282 if (!IsFullLTO) {
1283 // Eliminate loads by forwarding stores from the previous iteration to loads
1284 // of the current iteration.
1286 }
1287 // Cleanup after the loop optimization passes.
1288 FPM.addPass(InstCombinePass());
1289
1290 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1291 ExtraVectorPassManager ExtraPasses;
1292 // At higher optimization levels, try to clean up any runtime overlap and
1293 // alignment checks inserted by the vectorizer. We want to track correlated
1294 // runtime checks for two inner loops in the same outer loop, fold any
1295 // common computations, hoist loop-invariant aspects out of any outer loop,
1296 // and unswitch the runtime checks if possible. Once hoisted, we may have
1297 // dead (or speculatable) control flows or more combining opportunities.
1298 ExtraPasses.addPass(EarlyCSEPass());
1300 ExtraPasses.addPass(InstCombinePass());
1301 LoopPassManager LPM;
1303 /*AllowSpeculation=*/true));
1304 LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
1306 ExtraPasses.addPass(
1307 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true,
1308 /*UseBlockFrequencyInfo=*/true));
1309 ExtraPasses.addPass(
1310 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1311 ExtraPasses.addPass(InstCombinePass());
1312 FPM.addPass(std::move(ExtraPasses));
1313 }
1314
1315 // Now that we've formed fast to execute loop structures, we do further
1316 // optimizations. These are run afterward as they might block doing complex
1317 // analyses and transforms such as what are needed for loop vectorization.
1318
1319 // Cleanup after loop vectorization, etc. Simplification passes like CVP and
1320 // GVN, loop transforms, and others have already run, so it's now better to
1321 // convert to more optimized IR using more aggressive simplify CFG options.
1322 // The extra sinking transform can create larger basic blocks, so do this
1323 // before SLP vectorization.
1325 .forwardSwitchCondToPhi(true)
1326 .convertSwitchRangeToICmp(true)
1327 .convertSwitchToLookupTable(true)
1328 .needCanonicalLoops(false)
1329 .hoistCommonInsts(true)
1330 .sinkCommonInsts(true)));
1331
1332 if (IsFullLTO) {
1333 FPM.addPass(SCCPPass());
1334 FPM.addPass(InstCombinePass());
1335 FPM.addPass(BDCEPass());
1336 }
1337
1338 // Optimize parallel scalar instruction chains into SIMD instructions.
1339 if (PTO.SLPVectorization) {
1341 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1342 FPM.addPass(EarlyCSEPass());
1343 }
1344 }
1345 // Enhance/cleanup vector code.
1347
1348 if (!IsFullLTO) {
1349 FPM.addPass(InstCombinePass());
1350 // Unroll small loops to hide loop backedge latency and saturate any
1351 // parallel execution resources of an out-of-order processor. We also then
1352 // need to clean up redundancies and loop invariant code.
1353 // FIXME: It would be really good to use a loop-integrated instruction
1354 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1355 // across the loop nests.
1356 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1357 if (EnableUnrollAndJam && PTO.LoopUnrolling) {
1359 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1360 }
1362 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1365 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1366 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1367 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1368 // NOTE: we are very late in the pipeline, and we don't have any LICM
1369 // or SimplifyCFG passes scheduled after us, that would cleanup
1370 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1372 }
1373
1376 FPM.addPass(InstCombinePass());
1377
1378 // This is needed for two reasons:
1379 // 1. It works around problems that instcombine introduces, such as sinking
1380 // expensive FP divides into loops containing multiplications using the
1381 // divide result.
1382 // 2. It helps to clean up some loop-invariant code created by the loop
1383 // unroll pass when IsFullLTO=false.
1386 /*AllowSpeculation=*/true),
1387 /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
1388
1389 // Now that we've vectorized and unrolled loops, we may have more refined
1390 // alignment information, try to re-derive it here.
1392}
1393
1396 ThinOrFullLTOPhase LTOPhase) {
1397 const bool LTOPreLink = isLTOPreLink(LTOPhase);
1399
1400 // Run partial inlining pass to partially inline functions that have
1401 // large bodies.
1404
1405 // Remove avail extern fns and globals definitions since we aren't compiling
1406 // an object file for later LTO. For LTO we want to preserve these so they
1407 // are eligible for inlining at link-time. Note if they are unreferenced they
1408 // will be removed by GlobalDCE later, so this only impacts referenced
1409 // available externally globals. Eventually they will be suppressed during
1410 // codegen, but eliminating here enables more opportunity for GlobalDCE as it
1411 // may make globals referenced by available external functions dead and saves
1412 // running remaining passes on the eliminated functions. These should be
1413 // preserved during prelinking for link-time inlining decisions.
1414 if (!LTOPreLink)
1416
1419
1420 // Do RPO function attribute inference across the module to forward-propagate
1421 // attributes where applicable.
1422 // FIXME: Is this really an optimization rather than a canonicalization?
1424
1425 // Do a post inline PGO instrumentation and use pass. This is a context
1426 // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
1427 // cross-module inline has not been done yet. The context sensitive
1428 // instrumentation is after all the inlines are done.
1429 if (!LTOPreLink && PGOOpt) {
1430 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1431 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
1432 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1433 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile,
1434 PGOOpt->FS);
1435 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1436 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
1437 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1438 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
1439 PGOOpt->FS);
1440 }
1441
1442 // Re-compute GlobalsAA here prior to function passes. This is particularly
1443 // useful as the above will have inlined, DCE'ed, and function-attr
1444 // propagated everything. We should at this point have a reasonably minimal
1445 // and richly annotated call graph. By computing aliasing and mod/ref
1446 // information for all local globals here, the late loop passes and notably
1447 // the vectorizer will be able to use them to help recognize vectorizable
1448 // memory operations.
1451
1453
1454 FunctionPassManager OptimizePM;
1455 // Scheduling LoopVersioningLICM when inlining is over, because after that
1456 // we may see more accurate aliasing. Reason to run this late is that too
1457 // early versioning may prevent further inlining due to increase of code
1458 // size. Other optimizations which runs later might get benefit of no-alias
1459 // assumption in clone loop.
1461 OptimizePM.addPass(
1463 // LoopVersioningLICM pass might increase new LICM opportunities.
1466 /*AllowSpeculation=*/true),
1467 /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
1468 }
1469
1470 OptimizePM.addPass(Float2IntPass());
1472
1473 if (EnableMatrix) {
1474 OptimizePM.addPass(LowerMatrixIntrinsicsPass());
1475 OptimizePM.addPass(EarlyCSEPass());
1476 }
1477
1478 // CHR pass should only be applied with the profile information.
1479 // The check is to check the profile summary information in CHR.
1480 if (EnableCHR && Level == OptimizationLevel::O3)
1481 OptimizePM.addPass(ControlHeightReductionPass());
1482
1483 // FIXME: We need to run some loop optimizations to re-rotate loops after
1484 // simplifycfg and others undo their rotation.
1485
1486 // Optimize the loop execution. These passes operate on entire loop nests
1487 // rather than on each loop in an inside-out manner, and so they are actually
1488 // function passes.
1489
1490 invokeVectorizerStartEPCallbacks(OptimizePM, Level);
1491
1492 LoopPassManager LPM;
1493 // First rotate loops that may have been un-rotated by prior passes.
1494 // Disable header duplication at -Oz.
1496 Level != OptimizationLevel::Oz,
1497 LTOPreLink));
1498 // Some loops may have become dead by now. Try to delete them.
1499 // FIXME: see discussion in https://reviews.llvm.org/D112851,
1500 // this may need to be revisited once we run GVN before loop deletion
1501 // in the simplification pipeline.
1504 std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false));
1505
1506 // Distribute loops to allow partial vectorization. I.e. isolate dependences
1507 // into separate loop that would otherwise inhibit vectorization. This is
1508 // currently only performed for loops marked with the metadata
1509 // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1510 OptimizePM.addPass(LoopDistributePass());
1511
1512 // Populates the VFABI attribute with the scalar-to-vector mappings
1513 // from the TargetLibraryInfo.
1514 OptimizePM.addPass(InjectTLIMappings());
1515
1516 addVectorPasses(Level, OptimizePM, /* IsFullLTO */ false);
1517
1518 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
1519 // canonicalization pass that enables other optimizations. As a result,
1520 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1521 // result too early.
1522 OptimizePM.addPass(LoopSinkPass());
1523
1524 // And finally clean up LCSSA form before generating code.
1525 OptimizePM.addPass(InstSimplifyPass());
1526
1527 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
1528 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
1529 // flattening of blocks.
1530 OptimizePM.addPass(DivRemPairsPass());
1531
1532 // Try to annotate calls that were created during optimization.
1533 OptimizePM.addPass(TailCallElimPass());
1534
1535 // LoopSink (and other loop passes since the last simplifyCFG) might have
1536 // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
1538 .convertSwitchRangeToICmp(true)
1539 .speculateUnpredictables(true)));
1540
1541 // Add the core optimizing pipeline.
1542 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
1544
1546
1547 // Split out cold code. Splitting is done late to avoid hiding context from
1548 // other optimizations and inadvertently regressing performance. The tradeoff
1549 // is that this has a higher code size cost than splitting early.
1550 if (EnableHotColdSplit && !LTOPreLink)
1552
1553 // Search the code for similar regions of code. If enough similar regions can
1554 // be found where extracting the regions into their own function will decrease
1555 // the size of the program, we extract the regions, a deduplicate the
1556 // structurally similar regions.
1557 if (EnableIROutliner)
1558 MPM.addPass(IROutlinerPass());
1559
1560 // Now we need to do some global optimization transforms.
1561 // FIXME: It would seem like these should come first in the optimization
1562 // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
1563 // ordering here.
1564 MPM.addPass(GlobalDCEPass());
1566
1567 // Merge functions if requested. It has a better chance to merge functions
1568 // after ConstantMerge folded jump tables.
1569 if (PTO.MergeFunctions)
1571
1572 if (PTO.CallGraphProfile && !LTOPreLink)
1575
1576 // TODO: Relative look table converter pass caused an issue when full lto is
1577 // enabled. See https://reviews.llvm.org/D94355 for more details.
1578 // Until the issue fixed, disable this pass during pre-linking phase.
1579 if (!LTOPreLink)
1581
1582 return MPM;
1583}
1584
1587 bool LTOPreLink) {
1588 if (Level == OptimizationLevel::O0)
1589 return buildO0DefaultPipeline(Level, LTOPreLink);
1590
1592
1593 // Convert @llvm.global.annotations to !annotation metadata.
1595
1596 // Force any function attributes we want the rest of the pipeline to observe.
1598
1599 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1601
1602 // Apply module pipeline start EP callback.
1604
1605 const ThinOrFullLTOPhase LTOPhase = LTOPreLink
1608 // Add the core simplification pipeline.
1609 MPM.addPass(buildModuleSimplificationPipeline(Level, LTOPhase));
1610
1611 // Now add the optimization pipeline.
1612 MPM.addPass(buildModuleOptimizationPipeline(Level, LTOPhase));
1613
1614 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1615 PGOOpt->Action == PGOOptions::SampleUse)
1617
1618 // Emit annotation remarks.
1620
1621 if (LTOPreLink)
1622 addRequiredLTOPreLinkPasses(MPM);
1623 return MPM;
1624}
1625
1628 bool EmitSummary) {
1630 if (ThinLTO)
1632 else
1634 MPM.addPass(EmbedBitcodePass(ThinLTO, EmitSummary));
1635
1636 // Use the ThinLTO post-link pipeline with sample profiling
1637 if (ThinLTO && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1638 MPM.addPass(buildThinLTODefaultPipeline(Level, /*ImportSummary=*/nullptr));
1639 else {
1640 // otherwise, just use module optimization
1641 MPM.addPass(
1643 // Emit annotation remarks.
1645 }
1646 return MPM;
1647}
1648
1651 if (Level == OptimizationLevel::O0)
1652 return buildO0DefaultPipeline(Level, /*LTOPreLink*/true);
1653
1655
1656 // Convert @llvm.global.annotations to !annotation metadata.
1658
1659 // Force any function attributes we want the rest of the pipeline to observe.
1661
1662 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1664
1665 // Apply module pipeline start EP callback.
1667
1668 // If we are planning to perform ThinLTO later, we don't bloat the code with
1669 // unrolling/vectorization/... now. Just simplify the module as much as we
1670 // can.
1673 // In pre-link, for ctx prof use, we stop here with an instrumented IR. We let
1674 // thinlto use the contextual info to perform imports; then use the contextual
1675 // profile in the post-thinlink phase.
1676 if (!UseCtxProfile.empty() && !PGOOpt) {
1677 addRequiredLTOPreLinkPasses(MPM);
1678 return MPM;
1679 }
1680
1681 // Run partial inlining pass to partially inline functions that have
1682 // large bodies.
1683 // FIXME: It isn't clear whether this is really the right place to run this
1684 // in ThinLTO. Because there is another canonicalization and simplification
1685 // phase that will run after the thin link, running this here ends up with
1686 // less information than will be available later and it may grow functions in
1687 // ways that aren't beneficial.
1690
1691 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1692 PGOOpt->Action == PGOOptions::SampleUse)
1694
1695 // Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual
1696 // optimization is going to be done in PostLink stage, but clang can't add
1697 // callbacks there in case of in-process ThinLTO called by linker.
1700
1701 // Emit annotation remarks.
1703
1704 addRequiredLTOPreLinkPasses(MPM);
1705
1706 return MPM;
1707}
1708
1710 OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
1712
1713 if (ImportSummary) {
1714 // For ThinLTO we must apply the context disambiguation decisions early, to
1715 // ensure we can correctly match the callsites to summary data.
1717 MPM.addPass(MemProfContextDisambiguation(ImportSummary));
1718
1719 // These passes import type identifier resolutions for whole-program
1720 // devirtualization and CFI. They must run early because other passes may
1721 // disturb the specific instruction patterns that these passes look for,
1722 // creating dependencies on resolutions that may not appear in the summary.
1723 //
1724 // For example, GVN may transform the pattern assume(type.test) appearing in
1725 // two basic blocks into assume(phi(type.test, type.test)), which would
1726 // transform a dependency on a WPD resolution into a dependency on a type
1727 // identifier resolution for CFI.
1728 //
1729 // Also, WPD has access to more precise information than ICP and can
1730 // devirtualize more effectively, so it should operate on the IR first.
1731 //
1732 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1733 // metadata and intrinsics.
1734 MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
1735 MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
1736 }
1737
1738 if (Level == OptimizationLevel::O0) {
1739 // Run a second time to clean up any type tests left behind by WPD for use
1740 // in ICP.
1741 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1742 // Drop available_externally and unreferenced globals. This is necessary
1743 // with ThinLTO in order to avoid leaving undefined references to dead
1744 // globals in the object file.
1746 MPM.addPass(GlobalDCEPass());
1747 return MPM;
1748 }
1749
1750 // Add the core simplification pipeline.
1753
1754 // Now add the optimization pipeline.
1757
1758 // Emit annotation remarks.
1760
1761 return MPM;
1762}
1763
1766 // FIXME: We should use a customized pre-link pipeline!
1767 return buildPerModuleDefaultPipeline(Level,
1768 /* LTOPreLink */ true);
1769}
1770
1773 ModuleSummaryIndex *ExportSummary) {
1775
1777
1778 // Create a function that performs CFI checks for cross-DSO calls with targets
1779 // in the current module.
1780 MPM.addPass(CrossDSOCFIPass());
1781
1782 if (Level == OptimizationLevel::O0) {
1783 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1784 // metadata and intrinsics.
1785 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1786 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1787 // Run a second time to clean up any type tests left behind by WPD for use
1788 // in ICP.
1789 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1790
1792
1793 // Emit annotation remarks.
1795
1796 return MPM;
1797 }
1798
1799 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
1800 // Load sample profile before running the LTO optimization pipeline.
1801 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1802 PGOOpt->ProfileRemappingFile,
1804 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1805 // RequireAnalysisPass for PSI before subsequent non-module passes.
1807 }
1808
1809 // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
1811
1812 // Remove unused virtual tables to improve the quality of code generated by
1813 // whole-program devirtualization and bitset lowering.
1814 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
1815
1816 // Do basic inference of function attributes from known properties of system
1817 // libraries and other oracles.
1819
1820 if (Level.getSpeedupLevel() > 1) {
1823
1824 // Indirect call promotion. This should promote all the targets that are
1825 // left by the earlier promotion pass that promotes intra-module targets.
1826 // This two-step promotion is to save the compile time. For LTO, it should
1827 // produce the same result as if we only do promotion here.
1829 true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1830
1831 // Propagate constants at call sites into the functions they call. This
1832 // opens opportunities for globalopt (and inlining) by substituting function
1833 // pointers passed as arguments to direct uses of functions.
1834 MPM.addPass(IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/
1835 Level != OptimizationLevel::Os &&
1836 Level != OptimizationLevel::Oz)));
1837
1838 // Attach metadata to indirect call sites indicating the set of functions
1839 // they may target at run-time. This should follow IPSCCP.
1841 }
1842
1843 // Now deduce any function attributes based in the current code.
1844 MPM.addPass(
1846
1847 // Do RPO function attribute inference across the module to forward-propagate
1848 // attributes where applicable.
1849 // FIXME: Is this really an optimization rather than a canonicalization?
1851
1852 // Use in-range annotations on GEP indices to split globals where beneficial.
1853 MPM.addPass(GlobalSplitPass());
1854
1855 // Run whole program optimization of virtual call when the list of callees
1856 // is fixed.
1857 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1858
1859 // Stop here at -O1.
1860 if (Level == OptimizationLevel::O1) {
1861 // The LowerTypeTestsPass needs to run to lower type metadata and the
1862 // type.test intrinsics. The pass does nothing if CFI is disabled.
1863 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1864 // Run a second time to clean up any type tests left behind by WPD for use
1865 // in ICP (which is performed earlier than this in the regular LTO
1866 // pipeline).
1867 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1868
1870
1871 // Emit annotation remarks.
1873
1874 return MPM;
1875 }
1876
1877 // Optimize globals to try and fold them into constants.
1878 MPM.addPass(GlobalOptPass());
1879
1880 // Promote any localized globals to SSA registers.
1882
1883 // Linking modules together can lead to duplicate global constant, only
1884 // keep one copy of each constant.
1886
1887 // Remove unused arguments from functions.
1889
1890 // Reduce the code after globalopt and ipsccp. Both can open up significant
1891 // simplification opportunities, and both can propagate functions through
1892 // function pointers. When this happens, we often have to resolve varargs
1893 // calls, etc, so let instcombine do this.
1894 FunctionPassManager PeepholeFPM;
1895 PeepholeFPM.addPass(InstCombinePass());
1896 if (Level.getSpeedupLevel() > 1)
1897 PeepholeFPM.addPass(AggressiveInstCombinePass());
1898 invokePeepholeEPCallbacks(PeepholeFPM, Level);
1899
1900 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM),
1902
1903 // Lower variadic functions for supported targets prior to inlining.
1905
1906 // Note: historically, the PruneEH pass was run first to deduce nounwind and
1907 // generally clean up exception handling overhead. It isn't clear this is
1908 // valuable as the inliner doesn't currently care whether it is inlining an
1909 // invoke or a call.
1910 // Run the inliner now.
1911 if (EnableModuleInliner) {
1915 } else {
1918 /* MandatoryFirst */ true,
1921 }
1922
1923 // Perform context disambiguation after inlining, since that would reduce the
1924 // amount of additional cloning required to distinguish the allocation
1925 // contexts.
1928
1929 // Optimize globals again after we ran the inliner.
1930 MPM.addPass(GlobalOptPass());
1931
1932 // Run the OpenMPOpt pass again after global optimizations.
1934
1935 // Garbage collect dead functions.
1936 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
1937
1938 // If we didn't decide to inline a function, check to see if we can
1939 // transform it to pass arguments by value instead of by reference.
1941
1943 // The IPO Passes may leave cruft around. Clean up after them.
1944 FPM.addPass(InstCombinePass());
1945 invokePeepholeEPCallbacks(FPM, Level);
1946
1949
1951
1952 // Do a post inline PGO instrumentation and use pass. This is a context
1953 // sensitive PGO pass.
1954 if (PGOOpt) {
1955 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1956 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
1957 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1958 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile,
1959 PGOOpt->FS);
1960 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1961 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
1962 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1963 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
1964 PGOOpt->FS);
1965 }
1966
1967 // Break up allocas
1969
1970 // LTO provides additional opportunities for tailcall elimination due to
1971 // link-time inlining, and visibility of nocapture attribute.
1973
1974 // Run a few AA driver optimizations here and now to cleanup the code.
1975 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM),
1977
1978 MPM.addPass(
1980
1981 // Require the GlobalsAA analysis for the module so we can query it within
1982 // MainFPM.
1985 // Invalidate AAManager so it can be recreated and pick up the newly
1986 // available GlobalsAA.
1987 MPM.addPass(
1989 }
1990
1991 FunctionPassManager MainFPM;
1994 /*AllowSpeculation=*/true),
1995 /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
1996
1997 if (RunNewGVN)
1998 MainFPM.addPass(NewGVNPass());
1999 else
2000 MainFPM.addPass(GVNPass());
2001
2002 // Remove dead memcpy()'s.
2003 MainFPM.addPass(MemCpyOptPass());
2004
2005 // Nuke dead stores.
2006 MainFPM.addPass(DSEPass());
2007 MainFPM.addPass(MoveAutoInitPass());
2009
2010 LoopPassManager LPM;
2011 if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
2012 LPM.addPass(LoopFlattenPass());
2015 // FIXME: Add loop interchange.
2016
2017 // Unroll small loops and perform peeling.
2018 LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
2019 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
2021 // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
2022 // *All* loop passes must preserve it, in order to be able to use it.
2024 std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true));
2025
2026 MainFPM.addPass(LoopDistributePass());
2027
2028 addVectorPasses(Level, MainFPM, /* IsFullLTO */ true);
2029
2030 // Run the OpenMPOpt CGSCC pass again late.
2033
2034 invokePeepholeEPCallbacks(MainFPM, Level);
2035 MainFPM.addPass(JumpThreadingPass());
2036 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM),
2038
2039 // Lower type metadata and the type.test intrinsic. This pass supports
2040 // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
2041 // to be run at link time if CFI is enabled. This pass does nothing if
2042 // CFI is disabled.
2043 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
2044 // Run a second time to clean up any type tests left behind by WPD for use
2045 // in ICP (which is performed earlier than this in the regular LTO pipeline).
2046 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
2047
2048 // Enable splitting late in the FullLTO post-link pipeline.
2051
2052 // Add late LTO optimization passes.
2053 FunctionPassManager LateFPM;
2054
2055 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
2056 // canonicalization pass that enables other optimizations. As a result,
2057 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
2058 // result too early.
2059 LateFPM.addPass(LoopSinkPass());
2060
2061 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
2062 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
2063 // flattening of blocks.
2064 LateFPM.addPass(DivRemPairsPass());
2065
2066 // Delete basic blocks, which optimization passes may have killed.
2068 .convertSwitchRangeToICmp(true)
2069 .hoistCommonInsts(true)
2070 .speculateUnpredictables(true)));
2071 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(LateFPM)));
2072
2073 // Drop bodies of available eternally objects to improve GlobalDCE.
2075
2076 // Now that we have optimized the program, discard unreachable functions.
2077 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2078
2079 if (PTO.MergeFunctions)
2081
2082 if (PTO.CallGraphProfile)
2083 MPM.addPass(CGProfilePass(/*InLTOPostLink=*/true));
2084
2086
2087 // Emit annotation remarks.
2089
2090 return MPM;
2091}
2092
2094 bool LTOPreLink) {
2095 assert(Level == OptimizationLevel::O0 &&
2096 "buildO0DefaultPipeline should only be used with O0");
2097
2099
2100 // Perform pseudo probe instrumentation in O0 mode. This is for the
2101 // consistency between different build modes. For example, a LTO build can be
2102 // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
2103 // the postlink will require pseudo probe instrumentation in the prelink.
2104 if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
2106
2107 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
2108 PGOOpt->Action == PGOOptions::IRUse))
2110 MPM,
2111 /*RunProfileGen=*/(PGOOpt->Action == PGOOptions::IRInstr),
2112 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate, PGOOpt->ProfileFile,
2113 PGOOpt->ProfileRemappingFile, PGOOpt->FS);
2114
2115 // Instrument function entry and exit before all inlining.
2117 EntryExitInstrumenterPass(/*PostInlining=*/false)));
2118
2120
2121 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
2123
2125
2126 // Build a minimal pipeline based on the semantics required by LLVM,
2127 // which is just that always inlining occurs. Further, disable generating
2128 // lifetime intrinsics to avoid enabling further optimizations during
2129 // code generation.
2131 /*InsertLifetimeIntrinsics=*/false));
2132
2133 if (PTO.MergeFunctions)
2135
2136 if (EnableMatrix)
2137 MPM.addPass(
2139
2140 if (!CGSCCOptimizerLateEPCallbacks.empty()) {
2141 CGSCCPassManager CGPM;
2143 if (!CGPM.isEmpty())
2145 }
2146 if (!LateLoopOptimizationsEPCallbacks.empty()) {
2147 LoopPassManager LPM;
2149 if (!LPM.isEmpty()) {
2151 createFunctionToLoopPassAdaptor(std::move(LPM))));
2152 }
2153 }
2154 if (!LoopOptimizerEndEPCallbacks.empty()) {
2155 LoopPassManager LPM;
2157 if (!LPM.isEmpty()) {
2159 createFunctionToLoopPassAdaptor(std::move(LPM))));
2160 }
2161 }
2162 if (!ScalarOptimizerLateEPCallbacks.empty()) {
2165 if (!FPM.isEmpty())
2166 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2167 }
2168
2170
2171 if (!VectorizerStartEPCallbacks.empty()) {
2174 if (!FPM.isEmpty())
2175 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2176 }
2177
2178 ModulePassManager CoroPM;
2179 CoroPM.addPass(CoroEarlyPass());
2180 CGSCCPassManager CGPM;
2181 CGPM.addPass(CoroSplitPass());
2182 CoroPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
2183 CoroPM.addPass(CoroCleanupPass());
2184 CoroPM.addPass(GlobalDCEPass());
2185 MPM.addPass(CoroConditionalWrapper(std::move(CoroPM)));
2186
2188
2189 if (LTOPreLink)
2190 addRequiredLTOPreLinkPasses(MPM);
2191
2193
2194 return MPM;
2195}
2196
2198 AAManager AA;
2199
2200 // The order in which these are registered determines their priority when
2201 // being queried.
2202
2203 // First we register the basic alias analysis that provides the majority of
2204 // per-function local AA logic. This is a stateless, on-demand local set of
2205 // AA techniques.
2207
2208 // Next we query fast, specialized alias analyses that wrap IR-embedded
2209 // information about aliasing.
2212
2213 // Add support for querying global aliasing information when available.
2214 // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
2215 // analysis, all that the `AAManager` can do is query for any *cached*
2216 // results from `GlobalsAA` through a readonly proxy.
2219
2220 // Add target-specific alias analyses.
2221 if (TM)
2223
2224 return AA;
2225}
aarch64 falkor hwpf fix Falkor HW Prefetch Fix Late Phase
AggressiveInstCombiner - Combine expression patterns to form expressions with fewer,...
Provides passes to inlining "always_inline" functions.
This is the interface for LLVM's primary stateless and local alias analysis.
This file provides the interface for LLVM's Call Graph Profile pass.
This header provides classes for managing passes over SCCs of the call graph.
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:686
cl::opt< std::string > UseCtxProfile("use-ctx-profile", cl::init(""), cl::Hidden, cl::desc("Use the specified contextual profile file"))
This file provides the interface for a simple, fast CSE pass.
This file provides a pass which clones the current module and runs the provided pass pipeline on the ...
Super simple passes to force specific function attrs from the commandline into the IR for debugging p...
Provides passes for computing function attributes based on interprocedural analyses.
This file provides the interface for LLVM's Global Value Numbering pass which eliminates fully redund...
This is the interface for a simple mod/ref and alias analysis over globals.
AcceleratorCodeSelection - Identify all functions reachable from a kernel, removing those that are un...
Interfaces for passes which infer implicit function attributes from the name and signature of functio...
This file provides the primary interface to the instcombine pass.
Defines passes for running instruction simplification across chunks of IR.
This file provides the interface for LLVM's PGO Instrumentation lowering pass.
See the comments on JumpThreadingPass.
static LVOptions Options
Definition: LVOptions.cpp:25
This header defines the LoopLoadEliminationPass object.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
The header file for the LowerConstantIntrinsics pass as used by the new pass manager.
The header file for the LowerExpectIntrinsic pass as used by the new pass manager.
This pass performs merges of loads and stores on both sides of a.
This file provides the interface for LLVM's Global Value Numbering pass.
This header enumerates the LLVM-provided high-level optimization levels.
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
Define option tunables for PGO.
static cl::opt< bool > EnableMergeFunctions("enable-merge-functions", cl::init(false), cl::Hidden, cl::desc("Enable function merging as part of the optimization pipeline"))
static cl::opt< bool > EnableGlobalAnalyses("enable-global-analyses", cl::init(true), cl::Hidden, cl::desc("Enable inter-procedural analyses"))
static cl::opt< bool > EnableIROutliner("ir-outliner", cl::init(false), cl::Hidden, cl::desc("Enable ir outliner pass"))
static cl::opt< bool > RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden, cl::desc("Run the NewGVN pass"))
static cl::opt< bool > DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden, cl::desc("Disable pre-instrumentation inliner"))
static cl::opt< bool > EnableEagerlyInvalidateAnalyses("eagerly-invalidate-analyses", cl::init(true), cl::Hidden, cl::desc("Eagerly invalidate more analyses in default pipelines"))
static cl::opt< bool > ExtraVectorizerPasses("extra-vectorizer-passes", cl::init(false), cl::Hidden, cl::desc("Run cleanup optimization passes after vectorization"))
static void addAnnotationRemarksPass(ModulePassManager &MPM)
static cl::opt< bool > EnablePostPGOLoopRotation("enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden, cl::desc("Run the loop rotation transformation after PGO instrumentation"))
static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level)
static cl::opt< bool > EnableGVNSink("enable-gvn-sink", cl::desc("Enable the GVN sinking pass (default = off)"))
static cl::opt< bool > PerformMandatoryInliningsFirst("mandatory-inlining-first", cl::init(false), cl::Hidden, cl::desc("Perform mandatory inlinings module-wide, before performing " "inlining"))
static cl::opt< bool > RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Run Partial inlinining pass"))
static cl::opt< bool > EnableGVNHoist("enable-gvn-hoist", cl::desc("Enable the GVN hoisting pass (default = off)"))
cl::opt< std::string > UseCtxProfile
static cl::opt< bool > EnableDFAJumpThreading("enable-dfa-jump-thread", cl::desc("Enable DFA jump threading"), cl::init(false), cl::Hidden)
static cl::opt< bool > EnableCHR("enable-chr", cl::init(true), cl::Hidden, cl::desc("Enable control height reduction optimization (CHR)"))
static cl::opt< bool > EnableHotColdSplit("hot-cold-split", cl::desc("Enable hot-cold splitting pass"))
static cl::opt< bool > EnableLoopInterchange("enable-loopinterchange", cl::init(false), cl::Hidden, cl::desc("Enable the experimental LoopInterchange Pass"))
static cl::opt< bool > EnableSampledInstr("enable-sampled-instrumentation", cl::init(false), cl::Hidden, cl::desc("Enable profile instrumentation sampling (default = off)"))
static cl::opt< int > PreInlineThreshold("preinline-threshold", cl::Hidden, cl::init(75), cl::desc("Control the amount of inlining in pre-instrumentation inliner " "(default = 75)"))
static cl::opt< bool > EnableLoopHeaderDuplication("enable-loop-header-duplication", cl::init(false), cl::Hidden, cl::desc("Enable loop header duplication at any optimization level"))
static cl::opt< bool > EnablePGOForceFunctionAttrs("enable-pgo-force-function-attrs", cl::desc("Enable pass to set function attributes based on PGO profiles"), cl::init(false))
static cl::opt< bool > EnableUnrollAndJam("enable-unroll-and-jam", cl::init(false), cl::Hidden, cl::desc("Enable Unroll And Jam Pass"))
static cl::opt< bool > EnableModuleInliner("enable-module-inliner", cl::init(false), cl::Hidden, cl::desc("Enable module inliner"))
static cl::opt< bool > EnableMatrix("enable-matrix", cl::init(false), cl::Hidden, cl::desc("Enable lowering of the matrix intrinsics"))
static cl::opt< AttributorRunOption > AttributorRun("attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE), cl::desc("Enable the attributor inter-procedural deduction pass"), cl::values(clEnumValN(AttributorRunOption::ALL, "all", "enable all attributor runs"), clEnumValN(AttributorRunOption::MODULE, "module", "enable module-wide attributor runs"), clEnumValN(AttributorRunOption::CGSCC, "cgscc", "enable call graph SCC attributor runs"), clEnumValN(AttributorRunOption::NONE, "none", "disable attributor runs")))
static cl::opt< bool > EnableOrderFileInstrumentation("enable-order-file-instrumentation", cl::init(false), cl::Hidden, cl::desc("Enable order file instrumentation (default = off)"))
static cl::opt< bool > UseLoopVersioningLICM("enable-loop-versioning-licm", cl::init(false), cl::Hidden, cl::desc("Enable the experimental Loop Versioning LICM pass"))
static cl::opt< bool > EnableSyntheticCounts("enable-npm-synthetic-counts", cl::Hidden, cl::desc("Run synthetic function entry count generation " "pass"))
static bool isLTOPreLink(ThinOrFullLTOPhase Phase)
static cl::opt< bool > EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true), cl::Hidden, cl::desc("Enable inline deferral during PGO"))
Flag to enable inline deferral during PGO.
static cl::opt< bool > EnableJumpTableToSwitch("enable-jump-table-to-switch", cl::desc("Enable JumpTableToSwitch pass (default = off)"))
static cl::opt< InliningAdvisorMode > UseInlineAdvisor("enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden, cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"), cl::values(clEnumValN(InliningAdvisorMode::Default, "default", "Heuristics-based inliner version"), clEnumValN(InliningAdvisorMode::Development, "development", "Use development mode (runtime-loadable model)"), clEnumValN(InliningAdvisorMode::Release, "release", "Use release mode (AOT-compiled model)")))
static cl::opt< bool > FlattenedProfileUsed("flattened-profile-used", cl::init(false), cl::Hidden, cl::desc("Indicate the sample profile being used is flattened, i.e., " "no inline hierachy exists in the profile"))
static cl::opt< bool > EnableConstraintElimination("enable-constraint-elimination", cl::init(true), cl::Hidden, cl::desc("Enable pass to eliminate conditions based on linear constraints"))
static cl::opt< bool > EnableLoopFlatten("enable-loop-flatten", cl::init(false), cl::Hidden, cl::desc("Enable the LoopFlatten Pass"))
This header defines various interfaces for pass management in LLVM.
This file implements relative lookup table converter that converts lookup tables to relative lookup t...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
This file provides the interface for the pseudo probe implementation for AutoFDO.
This file provides the interface for the sampled PGO loader pass.
This is the interface for a metadata-based scoped no-alias analysis.
This file provides the interface for the pass responsible for both simplifying and canonicalizing the...
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
This is the interface for a metadata-based TBAA.
Defines the virtual file system interface vfs::FileSystem.
A manager for alias analyses.
void registerFunctionAnalysis()
Register a specific AA result.
void registerModuleAnalysis()
Register a specific AA result.
Inlines functions marked as "always_inline".
Definition: AlwaysInliner.h:32
Argument promotion pass.
Assign a GUID to functions as metadata.
Analysis pass providing a never-invalidated alias analysis result.
Simple pass that canonicalizes aliases.
A pass that merges duplicate global constants into a single constant.
Definition: ConstantMerge.h:29
This class implements a trivial dead store elimination.
Eliminate dead arguments (and return values) from functions.
A pass that transforms external global definitions into declarations.
Pass embeds a copy of the module optimized with the provided pass pipeline into a global variable.
The core GVN pass object.
Definition: GVN.h:117
Pass to remove unused function declarations.
Definition: GlobalDCE.h:36
Optimize globals that never have their address taken.
Definition: GlobalOpt.h:25
Pass to perform split of global variables.
Definition: GlobalSplit.h:26
Analysis pass providing a never-invalidated alias analysis result.
Pass to outline cold regions.
Pass to perform interprocedural constant propagation.
Definition: SCCP.h:48
Pass to outline similar regions.
Definition: IROutliner.h:444
Run instruction simplification across each instruction in the function.
The instrumentation pass for recording function order.
Instrumentation based profiling lowering pass.
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
This pass performs 'jump threading', which looks at blocks that have multiple predecessors and multip...
Definition: JumpThreading.h:79
Performs Loop Invariant Code Motion Pass.
Definition: LICM.h:66
Loop unroll pass that only does full loop unrolling and peeling.
Performs Loop Idiom Recognize Pass.
Performs Loop Inst Simplify Pass.
A simple loop rotation transformation.
Definition: LoopRotation.h:24
Performs basic CFG simplifications to assist other loop passes.
A pass that does profile-guided sinking of instructions into loops.
Definition: LoopSink.h:33
A simple loop rotation transformation.
Loop unroll pass that will support both full and partial unrolling.
Merge identical functions.
The module inliner pass for the new pass manager.
Definition: ModuleInliner.h:27
Module pass, wrapping the inliner pass.
Definition: Inliner.h:62
void addModulePass(T Pass)
Add a module pass that runs before the CGSCC passes.
Definition: Inliner.h:78
Class to hold module path string table and global value map, and encapsulate methods for operating on...
Simple pass that provides a name to every anonymous globals.
OpenMP optimizations pass.
Definition: OpenMPOpt.h:42
static const OptimizationLevel O3
Optimize for fast execution as much as possible.
static const OptimizationLevel Oz
A very specialized mode that will optimize for code size at any and all costs.
static const OptimizationLevel O0
Disable as many optimizations as possible.
static const OptimizationLevel Os
Similar to O2 but tries to optimize for small code size instead of fast execution without triggering ...
static const OptimizationLevel O2
Optimize for fast execution as much as possible without triggering significant incremental compile ti...
static const OptimizationLevel O1
Optimize quickly without destroying debuggability.
The indirect function call promotion pass.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The profile annotation (profile-instr-use) pass for IR based PGO.
The profile size based optimization pass for memory intrinsics.
Pass to remove unused function declarations.
ModulePassManager buildO0DefaultPipeline(OptimizationLevel Level, bool LTOPreLink=false)
Build an O0 pipeline with the minimal semantically required passes.
void invokeFullLinkTimeOptimizationLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
ModuleInlinerWrapperPass buildInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining as well as the inlining-driven cleanups.
void invokeOptimizerLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
void invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
AAManager buildDefaultAAPipeline()
Build the default AAManager with the default alias analysis pipeline registered.
void invokeCGSCCOptimizerLateEPCallbacks(CGSCCPassManager &CGPM, OptimizationLevel Level)
ModulePassManager buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, ThinLTO-targeting default optimization pipeline to a pass manager.
void invokeScalarOptimizerLateEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level, bool LTOPreLink=false)
Build a per-module default optimization pipeline.
void invokePipelineStartEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
FunctionPassManager buildFunctionSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM function canonicalization and simplification pipeline.
void invokePeepholeEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
void invokeLoopOptimizerEndEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
ModulePassManager buildLTODefaultPipeline(OptimizationLevel Level, ModuleSummaryIndex *ExportSummary)
Build an LTO default optimization pipeline to a pass manager.
ModulePassManager buildModuleInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining with module inliner pass.
ModulePassManager buildThinLTODefaultPipeline(OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary)
Build a ThinLTO default optimization pipeline to a pass manager.
void invokeLateLoopOptimizationsEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
void invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
void invokePipelineEarlySimplificationEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
void invokeFullLinkTimeOptimizationEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
ModulePassManager buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO, bool EmitSummary)
Build a fat object default optimization pipeline.
ModulePassManager buildModuleSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM module canonicalization and simplification pipeline.
ModulePassManager buildModuleOptimizationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase LTOPhase)
Construct the core LLVM module optimization pipeline.
void addPGOInstrPassesForO0(ModulePassManager &MPM, bool RunProfileGen, bool IsCS, bool AtomicCounterUpdate, std::string ProfileFile, std::string ProfileRemappingFile, IntrusiveRefCntPtr< vfs::FileSystem > FS)
Add PGOInstrumenation passes for O0 only.
ModulePassManager buildLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, LTO-targeting default optimization pipeline to a pass manager.
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t< is_detected< HasRunOnLoopT, PassT >::value > addPass(PassT &&Pass)
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t<!std::is_same_v< PassT, PassManager > > addPass(PassT &&Pass)
Definition: PassManager.h:195
bool isEmpty() const
Returns if the pass manager contains any passes.
Definition: PassManager.h:217
unsigned LicmMssaNoAccForPromotionCap
Tuning option to disable promotion to scalars in LICM with MemorySSA, if the number of access is too ...
Definition: PassBuilder.h:74
bool SLPVectorization
Tuning option to enable/disable slp loop vectorization, set based on opt level.
Definition: PassBuilder.h:59
int InlinerThreshold
Tuning option to override the default inliner threshold.
Definition: PassBuilder.h:88
bool CallGraphProfile
Tuning option to enable/disable call graph profile.
Definition: PassBuilder.h:78
bool MergeFunctions
Tuning option to enable/disable function merging.
Definition: PassBuilder.h:85
bool ForgetAllSCEVInLoopUnroll
Tuning option to forget all SCEV loops in LoopUnroll.
Definition: PassBuilder.h:66
unsigned LicmMssaOptCap
Tuning option to cap the number of calls to retrive clobbering accesses in MemorySSA,...
Definition: PassBuilder.h:70
bool LoopInterleaving
Tuning option to set loop interleaving on/off, set based on opt level.
Definition: PassBuilder.h:51
PipelineTuningOptions()
Constructor sets pipeline tuning defaults based on cl::opts.
bool LoopUnrolling
Tuning option to enable/disable loop unrolling. Its default value is true.
Definition: PassBuilder.h:62
bool LoopVectorization
Tuning option to enable/disable loop vectorization, set based on opt level.
Definition: PassBuilder.h:55
Reassociate commutative expressions.
Definition: Reassociate.h:85
A pass to do RPO deduction and propagation of function attributes.
Definition: FunctionAttrs.h:73
This pass performs function-level constant propagation and merging.
Definition: SCCP.h:29
The sample profiler data loader pass.
Definition: SampleProfile.h:39
Analysis pass providing a never-invalidated alias analysis result.
This pass transforms loops that contain branches or switches on loop- invariant conditions to have mu...
A pass to simplify and canonicalize the CFG of a function.
Definition: SimplifyCFG.h:29
virtual void registerDefaultAliasAnalyses(AAManager &)
Allow the target to register alias analyses with the AAManager for use with the new pass manager.
Analysis pass providing a never-invalidated alias analysis result.
Optimize scalar/vector interactions in IR using target cost models.
Definition: VectorCombine.h:23
Interfaces for registering analysis passes, producing common pass manager configurations,...
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:711
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
cl::opt< bool > EnableKnowledgeRetention
ModuleToFunctionPassAdaptor createModuleToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
Definition: PassManager.h:848
@ MODULE
Definition: Attributor.h:6419
@ CGSCC
Definition: Attributor.h:6420
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition: Pass.h:76
@ FullLTOPreLink
Full LTO prelink phase.
@ ThinLTOPostLink
ThinLTO postlink (backend compile) phase.
@ None
No LTO/ThinLTO behavior needed.
@ FullLTOPostLink
Full LTO postlink (backend compile) phase.
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
ModuleToPostOrderCGSCCPassAdaptor createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT &&Pass)
A function to deduce a function pass type and wrap it in the templated adaptor.
CGSCCToFunctionPassAdaptor createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false, bool NoRerun=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
cl::opt< bool > ForgetSCEVInLoopUnroll
bool AreStatisticsEnabled()
Check if statistics are enabled.
Definition: Statistic.cpp:139
cl::opt< bool > EnableInferAlignmentPass
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
cl::opt< unsigned > SetLicmMssaNoAccForPromotionCap
std::enable_if_t< is_detected< HasRunOnLoopT, LoopPassT >::value, FunctionToLoopPassAdaptor > createFunctionToLoopPassAdaptor(LoopPassT &&Pass, bool UseMemorySSA=false, bool UseBlockFrequencyInfo=false, bool UseBranchProbabilityInfo=false)
A function to deduce a loop pass type and wrap it in the templated adaptor.
cl::opt< unsigned > MaxDevirtIterations("max-devirt-iterations", cl::ReallyHidden, cl::init(4))
cl::opt< unsigned > SetLicmMssaOptCap
A DCE pass that assumes instructions are dead until proven otherwise.
Definition: ADCE.h:31
Pass to convert @llvm.global.annotations to !annotation metadata.
This pass attempts to minimize the number of assume without loosing any information.
Hoist/decompose integer division and remainder instructions to enable CFG improvements and better cod...
Definition: DivRemPairs.h:23
A simple and fast domtree-based CSE pass.
Definition: EarlyCSE.h:30
A pass manager to run a set of extra function simplification passes after vectorization,...
Pass which forces specific function attributes into the IR, primarily as a debugging tool.
A simple and fast domtree-based GVN pass to hoist common expressions from sibling branches.
Definition: GVN.h:392
Uses an "inverted" value numbering to decide the similarity of expressions and sinks similar expressi...
Definition: GVN.h:399
A set of parameters to control various transforms performed by IPSCCP pass.
Definition: SCCP.h:35
A pass which infers function attributes from the names and signatures of function declarations in a m...
Provides context on when an inline advisor is constructed in the pipeline (e.g., link phase,...
Definition: InlineAdvisor.h:58
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:206
std::optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
Definition: InlineCost.h:223
int DefaultThreshold
The default threshold to start with for a callee.
Definition: InlineCost.h:208
std::optional< bool > EnableDeferral
Indicate whether we should allow inline deferral.
Definition: InlineCost.h:236
std::optional< int > HintThreshold
Threshold to use for callees with inline hint.
Definition: InlineCost.h:211
Options for the frontend instrumentation based profiling pass.
A no-op pass template which simply forces a specific analysis result to be invalidated.
Definition: PassManager.h:901
Pass to forward loads in a loop around the backedge to subsequent iterations.
A set of parameters used to control various transforms performed by the LoopUnroll pass.
The LoopVectorize Pass.
Computes function attributes in post-order over the call graph.
Definition: FunctionAttrs.h:49
A utility pass template to force an analysis result to be available.
Definition: PassManager.h:874