48#define DEBUG_TYPE "code-layout"
53 cl::desc(
"Enable machine block placement based on the ext-tsp model, "
54 "optimizing I-cache utilization."));
57 "ext-tsp-apply-without-profile",
58 cl::desc(
"Whether to apply ext-tsp placement for instances w/o profile"),
66 cl::desc(
"The weight of conditional forward jumps for ExtTSP value"));
70 cl::desc(
"The weight of unconditional forward jumps for ExtTSP value"));
74 cl::desc(
"The weight of conditonal backward jumps for ExtTSP value"));
78 cl::desc(
"The weight of unconditonal backward jumps for ExtTSP value"));
82 cl::desc(
"The weight of conditional fallthrough jumps for ExtTSP value"));
86 cl::desc(
"The weight of unconditional fallthrough jumps for ExtTSP value"));
90 cl::desc(
"The maximum distance (in bytes) of a forward jump for ExtTSP"));
94 cl::desc(
"The maximum distance (in bytes) of a backward jump for ExtTSP"));
100 cl::desc(
"The maximum size of a chain to create."));
106 cl::desc(
"The maximum size of a chain to apply splitting"));
112 cl::desc(
"The maximum size of a chain to apply splitting"));
117constexpr double EPS = 1e-8;
122 if (JumpDist > JumpMaxDist)
124 double Prob = 1.0 -
static_cast<double>(JumpDist) / JumpMaxDist;
125 return Weight * Prob * Count;
131 uint64_t Count,
bool IsConditional) {
133 if (SrcAddr + SrcSize == DstAddr) {
134 return jumpExtTSPScore(0, 1, Count,
139 if (SrcAddr + SrcSize < DstAddr) {
140 const uint64_t Dist = DstAddr - (SrcAddr + SrcSize);
146 const uint64_t Dist = SrcAddr + SrcSize - DstAddr;
154enum class MergeTypeTy :
int { X_Y, X1_Y_X2, Y_X2_X1, X2_X1_Y };
160 explicit MergeGainTy() =
default;
161 explicit MergeGainTy(
double Score,
size_t MergeOffset, MergeTypeTy MergeType)
162 : Score(Score), MergeOffset(MergeOffset), MergeType(MergeType) {}
164 double score()
const {
return Score; }
166 size_t mergeOffset()
const {
return MergeOffset; }
168 MergeTypeTy mergeType()
const {
return MergeType; }
172 return (
Other.Score > EPS &&
Other.Score > Score + EPS);
176 void updateIfLessThan(
const MergeGainTy &
Other) {
183 size_t MergeOffset{0};
184 MergeTypeTy MergeType{MergeTypeTy::X_Y};
194 Block(
const Block &) =
delete;
195 Block(Block &&) =
default;
196 Block &operator=(
const Block &) =
delete;
197 Block &operator=(Block &&) =
default;
208 Chain *CurChain{
nullptr};
212 Block *ForcedSucc{
nullptr};
214 Block *ForcedPred{
nullptr};
216 std::vector<Jump *> OutJumps;
218 std::vector<Jump *> InJumps;
223 bool isEntry()
const {
return Index == 0; }
229 Jump(
const Jump &) =
delete;
230 Jump(Jump &&) =
default;
231 Jump &operator=(
const Jump &) =
delete;
232 Jump &operator=(Jump &&) =
default;
241 bool IsConditional{
false};
244 explicit Jump(Block *Source, Block *
Target,
uint64_t ExecutionCount)
251 Chain(
const Chain &) =
delete;
252 Chain(Chain &&) =
default;
253 Chain &operator=(
const Chain &) =
delete;
254 Chain &operator=(Chain &&) =
default;
256 explicit Chain(
uint64_t Id, Block *Block)
261 bool isEntry()
const {
return Blocks[0]->Index == 0; }
263 bool isCold()
const {
264 for (
auto *Block : Blocks) {
265 if (
Block->ExecutionCount > 0)
271 double score()
const {
return Score; }
273 void setScore(
double NewScore) { Score = NewScore; }
275 const std::vector<Block *> &blocks()
const {
return Blocks; }
277 size_t numBlocks()
const {
return Blocks.size(); }
279 const std::vector<std::pair<Chain *, ChainEdge *>> &edges()
const {
283 ChainEdge *getEdge(Chain *
Other)
const {
284 for (
auto It : Edges) {
285 if (It.first ==
Other)
291 void removeEdge(Chain *
Other) {
292 auto It = Edges.begin();
293 while (It != Edges.end()) {
294 if (It->first ==
Other) {
303 Edges.push_back(std::make_pair(
Other, Edge));
306 void merge(Chain *
Other,
const std::vector<Block *> &MergedBlocks) {
315 void mergeEdges(Chain *
Other);
321 Edges.shrink_to_fit();
330 std::vector<Block *>
Blocks;
332 std::vector<std::pair<Chain *, ChainEdge *>> Edges;
340 ChainEdge(
const ChainEdge &) =
delete;
341 ChainEdge(ChainEdge &&) =
default;
342 ChainEdge &operator=(
const ChainEdge &) =
delete;
343 ChainEdge &operator=(ChainEdge &&) =
default;
345 explicit ChainEdge(Jump *Jump)
346 : SrcChain(Jump->
Source->CurChain), DstChain(Jump->
Target->CurChain),
349 const std::vector<Jump *> &jumps()
const {
return Jumps; }
351 void changeEndpoint(Chain *
From, Chain *To) {
352 if (
From == SrcChain)
354 if (
From == DstChain)
358 void appendJump(Jump *Jump) { Jumps.push_back(Jump); }
360 void moveJumps(ChainEdge *
Other) {
361 Jumps.insert(Jumps.end(),
Other->Jumps.begin(),
Other->Jumps.end());
362 Other->Jumps.clear();
363 Other->Jumps.shrink_to_fit();
366 bool hasCachedMergeGain(Chain *Src, Chain *Dst)
const {
367 return Src == SrcChain ? CacheValidForward : CacheValidBackward;
370 MergeGainTy getCachedMergeGain(Chain *Src, Chain *Dst)
const {
371 return Src == SrcChain ? CachedGainForward : CachedGainBackward;
374 void setCachedMergeGain(Chain *Src, Chain *Dst, MergeGainTy MergeGain) {
375 if (Src == SrcChain) {
376 CachedGainForward = MergeGain;
377 CacheValidForward =
true;
379 CachedGainBackward = MergeGain;
380 CacheValidBackward =
true;
384 void invalidateCache() {
385 CacheValidForward =
false;
386 CacheValidBackward =
false;
391 Chain *SrcChain{
nullptr};
393 Chain *DstChain{
nullptr};
395 std::vector<Jump *> Jumps;
399 MergeGainTy CachedGainForward;
400 MergeGainTy CachedGainBackward;
402 bool CacheValidForward{
false};
403 bool CacheValidBackward{
false};
406void Chain::mergeEdges(Chain *
Other) {
407 assert(
this !=
Other &&
"cannot merge a chain with itself");
410 for (
auto EdgeIt :
Other->Edges) {
411 Chain *DstChain = EdgeIt.first;
412 ChainEdge *DstEdge = EdgeIt.second;
413 Chain *TargetChain = DstChain ==
Other ? this : DstChain;
414 ChainEdge *CurEdge = getEdge(TargetChain);
415 if (CurEdge ==
nullptr) {
416 DstEdge->changeEndpoint(
Other,
this);
417 this->
addEdge(TargetChain, DstEdge);
418 if (DstChain !=
this && DstChain !=
Other) {
419 DstChain->addEdge(
this, DstEdge);
422 CurEdge->moveJumps(DstEdge);
425 if (DstChain !=
Other) {
426 DstChain->removeEdge(
Other);
431using BlockIter = std::vector<Block *>::const_iterator;
437 MergedChain(BlockIter Begin1, BlockIter End1, BlockIter Begin2 = BlockIter(),
438 BlockIter End2 = BlockIter(), BlockIter Begin3 = BlockIter(),
439 BlockIter End3 = BlockIter())
440 : Begin1(Begin1), End1(End1), Begin2(Begin2), End2(End2), Begin3(Begin3),
443 template <
typename F>
void forEach(
const F &Func)
const {
444 for (
auto It = Begin1; It != End1; It++)
446 for (
auto It = Begin2; It != End2; It++)
448 for (
auto It = Begin3; It != End3; It++)
452 std::vector<Block *> getBlocks()
const {
453 std::vector<Block *>
Result;
454 Result.reserve(std::distance(Begin1, End1) + std::distance(Begin2, End2) +
455 std::distance(Begin3, End3));
462 const Block *getFirstBlock()
const {
return *Begin1; }
475 using EdgeT = std::pair<uint64_t, uint64_t>;
476 using EdgeCountMap = std::vector<std::pair<EdgeT, uint64_t>>;
479 ExtTSPImpl(
size_t NumNodes,
const std::vector<uint64_t> &NodeSizes,
480 const std::vector<uint64_t> &NodeCounts,
481 const EdgeCountMap &EdgeCounts)
482 : NumNodes(NumNodes) {
483 initialize(NodeSizes, NodeCounts, EdgeCounts);
487 void run(std::vector<uint64_t> &Result) {
498 concatChains(Result);
503 void initialize(
const std::vector<uint64_t> &NodeSizes,
504 const std::vector<uint64_t> &NodeCounts,
505 const EdgeCountMap &EdgeCounts) {
507 AllBlocks.reserve(NumNodes);
512 if (
Node == 0 && ExecutionCount == 0)
514 AllBlocks.emplace_back(
Node,
Size, ExecutionCount);
518 SuccNodes.resize(NumNodes);
519 PredNodes.resize(NumNodes);
520 std::vector<uint64_t> OutDegree(NumNodes, 0);
521 AllJumps.reserve(EdgeCounts.size());
522 for (
auto It : EdgeCounts) {
523 auto Pred = It.first.first;
524 auto Succ = It.first.second;
530 SuccNodes[Pred].push_back(Succ);
531 PredNodes[Succ].push_back(Pred);
532 auto ExecutionCount = It.second;
533 if (ExecutionCount > 0) {
534 auto &
Block = AllBlocks[Pred];
535 auto &SuccBlock = AllBlocks[Succ];
536 AllJumps.emplace_back(&Block, &SuccBlock, ExecutionCount);
537 SuccBlock.InJumps.push_back(&AllJumps.back());
538 Block.OutJumps.push_back(&AllJumps.back());
541 for (
auto &Jump : AllJumps) {
542 assert(OutDegree[Jump.Source->Index] > 0);
543 Jump.IsConditional = OutDegree[Jump.Source->Index] > 1;
547 AllChains.reserve(NumNodes);
548 HotChains.reserve(NumNodes);
549 for (Block &Block : AllBlocks) {
550 AllChains.emplace_back(
Block.Index, &Block);
551 Block.CurChain = &AllChains.back();
552 if (
Block.ExecutionCount > 0) {
553 HotChains.push_back(&AllChains.back());
558 AllEdges.reserve(AllJumps.size());
559 for (Block &Block : AllBlocks) {
560 for (
auto &Jump :
Block.OutJumps) {
561 auto SuccBlock = Jump->Target;
562 ChainEdge *CurEdge =
Block.CurChain->getEdge(SuccBlock->CurChain);
564 if (CurEdge !=
nullptr) {
565 assert(SuccBlock->CurChain->getEdge(
Block.CurChain) !=
nullptr);
566 CurEdge->appendJump(Jump);
570 AllEdges.emplace_back(Jump);
571 Block.CurChain->addEdge(SuccBlock->CurChain, &AllEdges.back());
572 SuccBlock->CurChain->addEdge(
Block.CurChain, &AllEdges.back());
581 void mergeForcedPairs() {
583 for (
auto &Block : AllBlocks) {
584 if (SuccNodes[
Block.Index].size() == 1 &&
585 PredNodes[SuccNodes[
Block.Index][0]].size() == 1 &&
586 SuccNodes[
Block.Index][0] != 0) {
587 size_t SuccIndex = SuccNodes[
Block.Index][0];
588 Block.ForcedSucc = &AllBlocks[SuccIndex];
589 AllBlocks[SuccIndex].ForcedPred = &
Block;
599 for (
auto &Block : AllBlocks) {
600 if (
Block.ForcedSucc ==
nullptr ||
Block.ForcedPred ==
nullptr)
603 auto SuccBlock =
Block.ForcedSucc;
604 while (SuccBlock !=
nullptr && SuccBlock != &Block) {
605 SuccBlock = SuccBlock->ForcedSucc;
607 if (SuccBlock ==
nullptr)
610 AllBlocks[
Block.ForcedPred->Index].ForcedSucc =
nullptr;
611 Block.ForcedPred =
nullptr;
615 for (
auto &Block : AllBlocks) {
616 if (
Block.ForcedPred ==
nullptr &&
Block.ForcedSucc !=
nullptr) {
617 auto CurBlock = &
Block;
618 while (CurBlock->ForcedSucc !=
nullptr) {
619 const auto NextBlock = CurBlock->ForcedSucc;
620 mergeChains(
Block.CurChain, NextBlock->CurChain, 0, MergeTypeTy::X_Y);
621 CurBlock = NextBlock;
628 void mergeChainPairs() {
630 auto compareChainPairs = [](
const Chain *A1,
const Chain *B1,
631 const Chain *A2,
const Chain *B2) {
633 return A1->id() < A2->id();
634 return B1->id() < B2->id();
637 while (HotChains.size() > 1) {
638 Chain *BestChainPred =
nullptr;
639 Chain *BestChainSucc =
nullptr;
640 auto BestGain = MergeGainTy();
642 for (Chain *ChainPred : HotChains) {
644 for (
auto EdgeIter : ChainPred->edges()) {
645 Chain *ChainSucc = EdgeIter.first;
646 class ChainEdge *ChainEdge = EdgeIter.second;
648 if (ChainPred == ChainSucc)
652 if (ChainPred->numBlocks() + ChainSucc->numBlocks() >=
MaxChainSize)
656 MergeGainTy CurGain =
657 getBestMergeGain(ChainPred, ChainSucc, ChainEdge);
658 if (CurGain.score() <= EPS)
661 if (BestGain < CurGain ||
662 (std::abs(CurGain.score() - BestGain.score()) < EPS &&
663 compareChainPairs(ChainPred, ChainSucc, BestChainPred,
666 BestChainPred = ChainPred;
667 BestChainSucc = ChainSucc;
673 if (BestGain.score() <= EPS)
677 mergeChains(BestChainPred, BestChainSucc, BestGain.mergeOffset(),
678 BestGain.mergeType());
685 void mergeColdChains() {
686 for (
size_t SrcBB = 0; SrcBB < NumNodes; SrcBB++) {
689 size_t NumSuccs = SuccNodes[SrcBB].size();
690 for (
size_t Idx = 0;
Idx < NumSuccs;
Idx++) {
691 auto DstBB = SuccNodes[SrcBB][NumSuccs -
Idx - 1];
692 auto SrcChain = AllBlocks[SrcBB].CurChain;
693 auto DstChain = AllBlocks[DstBB].CurChain;
694 if (SrcChain != DstChain && !DstChain->isEntry() &&
695 SrcChain->blocks().back()->Index == SrcBB &&
696 DstChain->blocks().front()->Index == DstBB &&
697 SrcChain->isCold() == DstChain->isCold()) {
698 mergeChains(SrcChain, DstChain, 0, MergeTypeTy::X_Y);
705 double extTSPScore(
const MergedChain &MergedBlocks,
706 const std::vector<Jump *> &Jumps)
const {
710 MergedBlocks.forEach([&](
const Block *BB) {
711 BB->EstimatedAddr = CurAddr;
716 for (
auto &Jump : Jumps) {
717 const Block *SrcBlock = Jump->Source;
718 const Block *DstBlock = Jump->Target;
719 Score += ::extTSPScore(SrcBlock->EstimatedAddr, SrcBlock->Size,
720 DstBlock->EstimatedAddr, Jump->ExecutionCount,
721 Jump->IsConditional);
732 MergeGainTy getBestMergeGain(Chain *ChainPred, Chain *ChainSucc,
733 ChainEdge *Edge)
const {
734 if (Edge->hasCachedMergeGain(ChainPred, ChainSucc)) {
735 return Edge->getCachedMergeGain(ChainPred, ChainSucc);
739 auto Jumps = Edge->jumps();
740 ChainEdge *EdgePP = ChainPred->getEdge(ChainPred);
741 if (EdgePP !=
nullptr) {
742 Jumps.insert(Jumps.end(), EdgePP->jumps().begin(), EdgePP->jumps().end());
744 assert(!Jumps.empty() &&
"trying to merge chains w/o jumps");
747 MergeGainTy Gain = MergeGainTy();
751 auto tryChainMerging = [&](
size_t Offset,
752 const std::vector<MergeTypeTy> &MergeTypes) {
754 if (
Offset == 0 ||
Offset == ChainPred->blocks().size())
757 auto BB = ChainPred->blocks()[
Offset - 1];
758 if (BB->ForcedSucc !=
nullptr)
762 for (
const auto &MergeType : MergeTypes) {
763 Gain.updateIfLessThan(
764 computeMergeGain(ChainPred, ChainSucc, Jumps,
Offset, MergeType));
769 Gain.updateIfLessThan(
770 computeMergeGain(ChainPred, ChainSucc, Jumps, 0, MergeTypeTy::X_Y));
774 for (
auto &Jump : ChainSucc->blocks().front()->InJumps) {
775 const auto SrcBlock = Jump->Source;
776 if (SrcBlock->CurChain != ChainPred)
778 size_t Offset = SrcBlock->CurIndex + 1;
779 tryChainMerging(
Offset, {MergeTypeTy::X1_Y_X2, MergeTypeTy::X2_X1_Y});
783 for (
auto &Jump : ChainSucc->blocks().back()->OutJumps) {
784 const auto DstBlock = Jump->Source;
785 if (DstBlock->CurChain != ChainPred)
787 size_t Offset = DstBlock->CurIndex;
788 tryChainMerging(
Offset, {MergeTypeTy::X1_Y_X2, MergeTypeTy::Y_X2_X1});
798 tryChainMerging(
Offset, {MergeTypeTy::X1_Y_X2, MergeTypeTy::Y_X2_X1,
799 MergeTypeTy::X2_X1_Y});
802 Edge->setCachedMergeGain(ChainPred, ChainSucc, Gain);
810 MergeGainTy computeMergeGain(
const Chain *ChainPred,
const Chain *ChainSucc,
811 const std::vector<Jump *> &Jumps,
813 MergeTypeTy MergeType)
const {
814 auto MergedBlocks = mergeBlocks(ChainPred->blocks(), ChainSucc->blocks(),
815 MergeOffset, MergeType);
818 if ((ChainPred->isEntry() || ChainSucc->isEntry()) &&
819 !MergedBlocks.getFirstBlock()->isEntry())
820 return MergeGainTy();
823 auto NewGainScore = extTSPScore(MergedBlocks, Jumps) - ChainPred->score();
824 return MergeGainTy(NewGainScore, MergeOffset, MergeType);
832 MergedChain mergeBlocks(
const std::vector<Block *> &
X,
833 const std::vector<Block *> &
Y,
size_t MergeOffset,
834 MergeTypeTy MergeType)
const {
836 BlockIter BeginX1 =
X.begin();
837 BlockIter EndX1 =
X.begin() + MergeOffset;
838 BlockIter BeginX2 =
X.begin() + MergeOffset;
839 BlockIter EndX2 =
X.end();
840 BlockIter BeginY =
Y.begin();
841 BlockIter EndY =
Y.end();
845 case MergeTypeTy::X_Y:
846 return MergedChain(BeginX1, EndX2, BeginY, EndY);
847 case MergeTypeTy::X1_Y_X2:
848 return MergedChain(BeginX1, EndX1, BeginY, EndY, BeginX2, EndX2);
849 case MergeTypeTy::Y_X2_X1:
850 return MergedChain(BeginY, EndY, BeginX2, EndX2, BeginX1, EndX1);
851 case MergeTypeTy::X2_X1_Y:
852 return MergedChain(BeginX2, EndX2, BeginX1, EndX1, BeginY, EndY);
859 void mergeChains(Chain *Into, Chain *
From,
size_t MergeOffset,
860 MergeTypeTy MergeType) {
861 assert(Into !=
From &&
"a chain cannot be merged with itself");
864 MergedChain MergedBlocks =
865 mergeBlocks(Into->blocks(),
From->blocks(), MergeOffset, MergeType);
866 Into->merge(
From, MergedBlocks.getBlocks());
867 Into->mergeEdges(
From);
871 ChainEdge *SelfEdge = Into->getEdge(Into);
872 if (SelfEdge !=
nullptr) {
873 MergedBlocks = MergedChain(Into->blocks().begin(), Into->blocks().end());
874 Into->setScore(extTSPScore(MergedBlocks, SelfEdge->jumps()));
881 for (
auto EdgeIter : Into->edges()) {
882 EdgeIter.second->invalidateCache();
887 void concatChains(std::vector<uint64_t> &Order) {
889 std::vector<Chain *> SortedChains;
891 for (
auto &Chain : AllChains) {
892 if (!Chain.blocks().empty()) {
893 SortedChains.push_back(&Chain);
896 double ExecutionCount = 0;
897 for (
auto *Block : Chain.blocks()) {
898 Size +=
static_cast<double>(
Block->Size);
899 ExecutionCount +=
static_cast<double>(
Block->ExecutionCount);
902 ChainDensity[&Chain] = ExecutionCount /
Size;
907 std::stable_sort(SortedChains.begin(), SortedChains.end(),
908 [&](
const Chain *C1,
const Chain *C2) {
911 if (C1->isEntry() != C2->isEntry()) {
912 return C1->isEntry();
915 const double D1 = ChainDensity[C1];
916 const double D2 = ChainDensity[C2];
918 return (D1 != D2) ? (D1 > D2) : (C1->id() < C2->id());
922 Order.reserve(NumNodes);
923 for (Chain *Chain : SortedChains) {
924 for (Block *Block : Chain->blocks()) {
925 Order.push_back(
Block->Index);
932 const size_t NumNodes;
935 std::vector<std::vector<uint64_t>> SuccNodes;
938 std::vector<std::vector<uint64_t>> PredNodes;
941 std::vector<Block> AllBlocks;
944 std::vector<Jump> AllJumps;
947 std::vector<Chain> AllChains;
950 std::vector<ChainEdge> AllEdges;
953 std::vector<Chain *> HotChains;
959 const std::vector<uint64_t> &NodeSizes,
960 const std::vector<uint64_t> &NodeCounts,
961 const std::vector<std::pair<EdgeT, uint64_t>> &EdgeCounts) {
962 size_t NumNodes = NodeSizes.size();
965 assert(NodeCounts.size() == NodeSizes.size() &&
"Incorrect input");
966 assert(NumNodes > 2 &&
"Incorrect input");
969 auto Alg = ExtTSPImpl(NumNodes, NodeSizes, NodeCounts, EdgeCounts);
970 std::vector<uint64_t>
Result;
974 assert(
Result.front() == 0 &&
"Original entry point is not preserved");
975 assert(
Result.size() == NumNodes &&
"Incorrect size of reordered layout");
980 const std::vector<uint64_t> &Order,
const std::vector<uint64_t> &NodeSizes,
981 const std::vector<uint64_t> &NodeCounts,
982 const std::vector<std::pair<EdgeT, uint64_t>> &EdgeCounts) {
984 std::vector<uint64_t>
Addr(NodeSizes.size(), 0);
985 for (
size_t Idx = 1;
Idx < Order.size();
Idx++) {
988 std::vector<uint64_t> OutDegree(NodeSizes.size(), 0);
989 for (
auto It : EdgeCounts) {
990 auto Pred = It.first.first;
996 for (
auto It : EdgeCounts) {
997 auto Pred = It.first.first;
998 auto Succ = It.first.second;
1000 bool IsConditional = OutDegree[Pred] > 1;
1001 Score += ::extTSPScore(
Addr[Pred], NodeSizes[Pred],
Addr[Succ], Count,
1008 const std::vector<uint64_t> &NodeSizes,
1009 const std::vector<uint64_t> &NodeCounts,
1010 const std::vector<std::pair<EdgeT, uint64_t>> &EdgeCounts) {
1011 std::vector<uint64_t> Order(NodeSizes.size());
1012 for (
size_t Idx = 0;
Idx < NodeSizes.size();
Idx++) {
BlockVerifier::State From
static cl::opt< double > BackwardWeightCond("ext-tsp-backward-weight-cond", cl::ReallyHidden, cl::init(0.1), cl::desc("The weight of conditonal backward jumps for ExtTSP value"))
static cl::opt< double > BackwardWeightUncond("ext-tsp-backward-weight-uncond", cl::ReallyHidden, cl::init(0.1), cl::desc("The weight of unconditonal backward jumps for ExtTSP value"))
static cl::opt< bool > EnableChainSplitAlongJumps("ext-tsp-enable-chain-split-along-jumps", cl::ReallyHidden, cl::init(true), cl::desc("The maximum size of a chain to apply splitting"))
static cl::opt< unsigned > ForwardDistance("ext-tsp-forward-distance", cl::ReallyHidden, cl::init(1024), cl::desc("The maximum distance (in bytes) of a forward jump for ExtTSP"))
static cl::opt< unsigned > BackwardDistance("ext-tsp-backward-distance", cl::ReallyHidden, cl::init(640), cl::desc("The maximum distance (in bytes) of a backward jump for ExtTSP"))
static cl::opt< double > ForwardWeightUncond("ext-tsp-forward-weight-uncond", cl::ReallyHidden, cl::init(0.1), cl::desc("The weight of unconditional forward jumps for ExtTSP value"))
static cl::opt< unsigned > ChainSplitThreshold("ext-tsp-chain-split-threshold", cl::ReallyHidden, cl::init(128), cl::desc("The maximum size of a chain to apply splitting"))
static cl::opt< unsigned > MaxChainSize("ext-tsp-max-chain-size", cl::ReallyHidden, cl::init(4096), cl::desc("The maximum size of a chain to create."))
static cl::opt< double > FallthroughWeightUncond("ext-tsp-fallthrough-weight-uncond", cl::ReallyHidden, cl::init(1.05), cl::desc("The weight of unconditional fallthrough jumps for ExtTSP value"))
static cl::opt< double > ForwardWeightCond("ext-tsp-forward-weight-cond", cl::ReallyHidden, cl::init(0.1), cl::desc("The weight of conditional forward jumps for ExtTSP value"))
static cl::opt< double > FallthroughWeightCond("ext-tsp-fallthrough-weight-cond", cl::ReallyHidden, cl::init(1.0), cl::desc("The weight of conditional fallthrough jumps for ExtTSP value"))
Declares methods and data structures for code layout algorithms.
static void clear(coro::Shape &Shape)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::optional< std::vector< StOtherPiece > > Other
DenseMap< Block *, BlockRelaxAux > Blocks
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static void addEdge(SmallVectorImpl< LazyCallGraph::Edge > &Edges, DenseMap< LazyCallGraph::Node *, int > &EdgeIndexMap, LazyCallGraph::Node &N, LazyCallGraph::Edge::Kind EK)
static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
Target - Wrapper for Target specific information.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
bool operator<(int64_t V1, const APSInt &V2)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
std::pair< uint64_t, uint64_t > EdgeT
cl::opt< bool > ApplyExtTspWithoutProfile
std::vector< uint64_t > applyExtTspLayout(const std::vector< uint64_t > &NodeSizes, const std::vector< uint64_t > &NodeCounts, const std::vector< EdgeCountT > &EdgeCounts)
Find a layout of nodes (basic blocks) of a given CFG optimizing jump locality and thus processor I-ca...
double calcExtTspScore(const std::vector< uint64_t > &Order, const std::vector< uint64_t > &NodeSizes, const std::vector< uint64_t > &NodeCounts, const std::vector< EdgeCountT > &EdgeCounts)
Estimate the "quality" of a given node order in CFG.
void erase_value(Container &C, ValueType V)
Wrapper function to remove a value from a container:
cl::opt< bool > EnableExtTspBlockPlacement