18 #if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL) || defined(LLVM_HAVE_TFLITE)
45 #define DEBUG_TYPE "ml-regalloc"
48 #if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL)
49 #include "RegallocEvictModel.h"
56 #ifdef LLVM_HAVE_TFLITE
62 cl::desc(
"Training log for the register allocator eviction model"));
66 cl::desc(
"The model being trained for register allocation eviction"));
69 "regalloc-enable-development-features",
cl::Hidden,
70 cl::desc(
"Whether or not to enable features under development for the ML "
75 #endif // #ifdef LLVM_HAVE_TFLITE
94 return "Register Allocation Pass Scoring";
116 "Register Allocation Scoring Pass",
false,
false)
124 static const int OpcodeValueCutoff = 17716;
148 #define RA_EVICT_FEATURES_LIST(M) \
149 M(int64_t, mask, PerLiveRangeShape, \
150 "boolean values, 0 for unavailable candidates (i.e. if a position is 0, " \
152 "can't be evicted)") \
153 M(int64_t, is_free, PerLiveRangeShape, \
154 "boolean values, 1 if this phys reg is actually free (no interferences)") \
155 M(float, nr_urgent, PerLiveRangeShape, \
156 "number of 'urgent' intervals, normalized. Urgent are those that are OK " \
157 "to break cascades") \
158 M(float, nr_broken_hints, PerLiveRangeShape, \
159 "if this position were evicted, how many broken hints would there be") \
160 M(int64_t, is_hint, PerLiveRangeShape, \
161 "is this a preferred phys reg for the candidate") \
162 M(int64_t, is_local, PerLiveRangeShape, \
163 "is this live range local to a basic block") \
164 M(float, nr_rematerializable, PerLiveRangeShape, \
165 "nr rematerializable ranges") \
166 M(float, nr_defs_and_uses, PerLiveRangeShape, \
167 "bb freq - weighed nr defs and uses") \
168 M(float, weighed_reads_by_max, PerLiveRangeShape, \
169 "bb freq - weighed nr of reads, normalized") \
170 M(float, weighed_writes_by_max, PerLiveRangeShape, \
171 "bb feq - weighed nr of writes, normalized") \
172 M(float, weighed_read_writes_by_max, PerLiveRangeShape, \
173 "bb freq - weighed nr of uses that are both read and writes, normalized") \
174 M(float, weighed_indvars_by_max, PerLiveRangeShape, \
175 "bb freq - weighed nr of uses that are indvars, normalized") \
176 M(float, hint_weights_by_max, PerLiveRangeShape, \
177 "bb freq - weighed nr of uses that are hints, normalized") \
178 M(float, start_bb_freq_by_max, PerLiveRangeShape, \
179 "the freq in the start block, normalized") \
180 M(float, end_bb_freq_by_max, PerLiveRangeShape, \
181 "freq of end block, normalized") \
182 M(float, hottest_bb_freq_by_max, PerLiveRangeShape, \
183 "hottest BB freq, normalized") \
184 M(float, liverange_size, PerLiveRangeShape, \
185 "size (instr index diff) of the LR") \
186 M(float, use_def_density, PerLiveRangeShape, \
187 "the max weight, as computed by the manual heuristic") \
188 M(int64_t, max_stage, PerLiveRangeShape, \
189 "largest stage of an interval in this LR") \
190 M(int64_t, min_stage, PerLiveRangeShape, \
191 "lowest stage of an interval in this LR") \
192 M(float, progress, {1}, "ratio of current queue size to initial size")
194 #ifdef LLVM_HAVE_TFLITE
195 #define RA_EVICT_FIRST_DEVELOPMENT_FEATURE(M) \
196 M(int64_t, instructions, InstructionsShape, \
197 "Opcodes of the instructions covered by the eviction problem")
199 #define RA_EVICT_REST_DEVELOPMENT_FEATURES(M) \
200 M(int64_t, instructions_mapping, InstructionsMappingShape, \
201 "A binary matrix mapping LRs to instruction opcodes") \
202 M(float, mbb_frequencies, MBBFrequencyShape, \
203 "A vector of machine basic block frequencies") \
204 M(int64_t, mbb_mapping, InstructionsShape, \
205 "A vector of indicies mapping instructions to MBBs")
207 #define RA_EVICT_FIRST_DEVELOPMENT_FEATURE(M)
208 #define RA_EVICT_REST_DEVELOPMENT_FEATURES(M)
215 #define DecisionName "index_to_evict"
219 #define _FEATURE_IDX_SIMPLE(_, name, __, ___) name
220 #define _FEATURE_IDX(A, B, C, D) _FEATURE_IDX_SIMPLE(A, B, C, D),
222 #ifdef LLVM_HAVE_TFLITE
226 #endif // #ifdef LLVM_HAVE_TFLITE
229 #undef _FEATURE_IDX_SIMPLE
236 template <
typename T>
size_t getTotalSize(
const std::vector<int64_t> &Shape) {
237 size_t Ret =
sizeof(
T);
238 for (
const auto V : Shape)
244 #define _RESET(TYPE, NAME, SHAPE, __) \
245 std::memset(Runner.getTensorUntyped(FeatureIDs::NAME), 0, \
246 getTotalSize<TYPE>(SHAPE));
257 struct LIFeatureComponents {
261 double IndVarUpdates = 0;
262 double HintWeights = 0.0;
263 int64_t NrDefsAndUses = 0;
264 float HottestBlockFreq = 0.0;
265 bool IsRemat =
false;
268 using CandidateRegList =
270 using FeaturesListNormalizer =
294 tryFindEvictionCandidatePosition(
const LiveInterval &VirtReg,
296 unsigned OrderLimit, uint8_t CostPerUseLimit,
312 uint8_t CostPerUseLimit,
317 int64_t IsHint, int64_t LocalIntfsCount,
float NrUrgent,
322 bool canEvictHintInterference(
325 return getDefaultAdvisor().canEvictHintInterference(VirtReg, PhysReg,
329 const LIFeatureComponents &
343 std::bitset<FeatureIDs::FeatureCount> DoNotNormalize;
344 const float InitialQSize;
346 using RegID = unsigned;
350 #define _DECL_FEATURES(type, name, shape, _) \
351 TensorSpec::createSpec<type>(#name, shape),
356 class ReleaseModeEvictionAdvisorAnalysis final
359 ReleaseModeEvictionAdvisorAnalysis()
371 return R->getAdvisorMode() == AdvisorMode::Release;
383 std::unique_ptr<RegAllocEvictionAdvisor>
386 Runner = std::make_unique<ReleaseModeModelRunner<CompiledModelType>>(
388 return std::make_unique<MLEvictAdvisor>(
389 MF,
RA, Runner.get(), getAnalysis<MachineBlockFrequencyInfo>(),
390 getAnalysis<MachineLoopInfo>());
392 std::unique_ptr<ReleaseModeModelRunner<CompiledModelType>> Runner;
400 #ifdef LLVM_HAVE_TFLITE
403 static const TensorSpec Reward = TensorSpec::createSpec<float>(
"reward", {1});
409 #define _DECL_TRAIN_FEATURES(type, name, shape, _) \
410 TensorSpec::createSpec<type>(std::string("action_") + #name, shape),
412 class DevelopmentModeEvictAdvisor :
public MLEvictAdvisor {
418 : MLEvictAdvisor(MF,
RA, Runner, MBFI,
Loops), Log(Log) {}
421 int64_t tryFindEvictionCandidatePosition(
423 unsigned OrderLimit, uint8_t CostPerUseLimit,
429 class DevelopmentModeEvictionAdvisorAnalysis final
432 DevelopmentModeEvictionAdvisorAnalysis()
438 TrainingInputFeatures = {
442 TensorSpec::createSpec<float>(
"action_discount", {1}),
443 TensorSpec::createSpec<int32_t>(
"action_step_type", {1}),
444 TensorSpec::createSpec<float>(
"action_reward", {1})};
447 TrainingInputFeatures = {
449 TensorSpec::createSpec<float>(
"action_discount", {1}),
450 TensorSpec::createSpec<int32_t>(
"action_step_type", {1}),
451 TensorSpec::createSpec<float>(
"action_reward", {1})};
456 return R->getAdvisorMode() == AdvisorMode::Development;
467 if (Log->currentContext() != MF.
getName()) {
469 "The training log context shouldn't have had changed.");
471 if (Log->hasObservationInProgress())
472 Log->logReward<
float>(GetReward());
477 std::vector<TensorSpec> TrainingInputFeatures;
485 bool doInitialization(
Module &
M)
override {
487 if (ModelUnderTraining.empty() && TrainingLog.empty()) {
488 Ctx.
emitError(
"Regalloc development mode should be requested with at "
489 "least logging enabled and/or a training model");
492 if (ModelUnderTraining.empty())
493 Runner = std::make_unique<NoInferenceModelRunner>(Ctx,
InputFeatures);
495 Runner = ModelUnderTrainingRunner::createAndEnsureValid(
496 Ctx, ModelUnderTraining,
DecisionName, TrainingInputFeatures);
498 Ctx.
emitError(
"Regalloc: could not set up the model runner");
501 if (TrainingLog.empty())
504 auto OS = std::make_unique<raw_fd_ostream>(TrainingLog, EC);
506 M.getContext().emitError(EC.message() +
":" + TrainingLog);
510 if (
auto *MUTR = dyn_cast<ModelUnderTrainingRunner>(Runner.get()))
515 LFS.push_back(Output);
517 Log = std::make_unique<Logger>(
std::move(OS), LFS, Reward,
522 std::unique_ptr<RegAllocEvictionAdvisor>
527 Log->switchContext(MF.
getName());
528 return std::make_unique<DevelopmentModeEvictAdvisor>(
529 MF,
RA, Runner.get(), getAnalysis<MachineBlockFrequencyInfo>(),
530 getAnalysis<MachineLoopInfo>(), Log.get());
533 std::unique_ptr<MLModelRunner> Runner;
534 std::unique_ptr<Logger> Log;
537 #endif //#ifdef LLVM_HAVE_TFLITE
558 InitialQSize(MLEvictAdvisor::getInitialQueueSize(MF)) {
561 DoNotNormalize.set(FeatureIDs::is_free);
562 DoNotNormalize.set(FeatureIDs::is_hint);
564 DoNotNormalize.set(FeatureIDs::min_stage);
565 DoNotNormalize.set(FeatureIDs::max_stage);
566 DoNotNormalize.set(FeatureIDs::progress);
569 int64_t MLEvictAdvisor::tryFindEvictionCandidatePosition(
578 bool MLEvictAdvisor::loadInterferenceFeatures(
584 if (
Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg) {
589 const bool IsLocal = LIS->intervalIsInOneMBB(VirtReg);
590 int64_t LocalIntfs = 0;
591 float NrUrgent = 0.0f;
594 unsigned Cascade =
RA.getExtraInfo().getCascadeOrCurrentNext(VirtReg.
reg());
602 if (IFIntervals.empty() && InterferingIntervals.empty())
606 InterferingIntervals.
append(IFIntervals.begin(), IFIntervals.end());
608 assert(Intf->reg().isVirtual() &&
609 "Only expecting virtual register interference from query");
616 if (FixedRegisters.
count(Intf->reg()))
618 if (
RA.getExtraInfo().getStage(*Intf) ==
RS_Done)
622 (Intf->isSpillable() ||
624 RegClassInfo.getNumAllocatableRegs(
627 unsigned IntfCascade =
RA.getExtraInfo().getCascade(Intf->reg());
628 if (Cascade <= IntfCascade) {
634 LocalIntfs += (IsLocal && LIS->intervalIsInOneMBB(*Intf) &&
635 (!EnableLocalReassign || !canReassign(*Intf, PhysReg)));
640 extractFeatures(InterferingIntervals, Largest, Pos, IsHint, LocalIntfs,
641 NrUrgent, LRPosInfo);
645 MCRegister MLEvictAdvisor::tryFindEvictionCandidate(
647 uint8_t CostPerUseLimit,
const SmallVirtRegSet &FixedRegisters)
const {
648 auto MaybeOrderLimit = getOrderLimit(VirtReg, Order, CostPerUseLimit);
649 if (!MaybeOrderLimit)
650 return MCRegister::NoRegister;
651 unsigned OrderLimit = *MaybeOrderLimit;
659 const bool MustFindEviction =
660 (!VirtReg.
isSpillable() && CostPerUseLimit ==
static_cast<uint8_t
>(~0u));
665 resetInputs(*Runner);
670 CandidateRegList Regs;
671 Regs.fill({0,
false});
689 assert(!Regs[Pos].second);
691 if (!canAllocatePhysReg(CostPerUseLimit, PhysReg)) {
694 if (loadInterferenceFeatures(VirtReg, PhysReg,
I.isHint(), FixedRegisters,
695 Largest, Pos, LRPosInfo)) {
697 Regs[Pos] = std::make_pair(PhysReg,
true);
700 if (Available == 0) {
702 assert(!MustFindEviction);
703 return MCRegister::NoRegister;
705 const size_t ValidPosLimit = Pos;
709 if (!MustFindEviction)
714 assert(InitialQSize > 0.0 &&
"We couldn't have gotten here if we had "
715 "nothing to allocate initially.");
716 #ifdef LLVM_HAVE_TFLITE
721 auto *CurrentMachineInstruction =
722 LIS->getInstructionFromIndex(InputIndex);
723 if (!CurrentMachineInstruction) {
726 return CurrentMachineInstruction->getOpcode();
729 auto *CurrentMachineInstruction =
730 LIS->getInstructionFromIndex(InputIndex);
732 CurrentMachineInstruction->getParent());
735 auto *CurrentMachineInstruction =
736 LIS->getInstructionFromIndex(InputIndex);
737 return CurrentMachineInstruction->
getParent();
740 FeatureIDs::mbb_frequencies, FeatureIDs::mbb_mapping,
741 LIS->getSlotIndexes()->getLastIndex());
743 #endif // #ifdef LLVM_HAVE_TFLITE
745 for (
auto &V : Largest)
755 *Runner->
getTensor<
float>(FeatureIDs::progress) =
756 static_cast<float>(
RA.getQueueSize()) / InitialQSize;
759 size_t CandidatePos = tryFindEvictionCandidatePosition(
760 VirtReg, Order, OrderLimit, CostPerUseLimit, FixedRegisters);
763 assert(Regs[CandidatePos].second);
765 assert(!MustFindEviction);
766 return MCRegister::NoRegister;
768 assert(CandidatePos < ValidPosLimit);
770 return Regs[CandidatePos].first;
773 const LIFeatureComponents &
774 MLEvictAdvisor::getLIFeatureComponents(
const LiveInterval &LI)
const {
776 LIFeatureComponents
Empty;
777 auto I = CachedFeatures.insert(std::make_pair(
ID,
Empty));
778 LIFeatureComponents &
Ret =
I.first->getSecond();
795 if (
MI->isIdentityCopy() ||
MI->isImplicitDef())
799 std::tie(Reads, Writes) =
MI->readsWritesVirtualRegister(LI.
reg());
804 Ret.R += (Reads && !Writes) * Freq;
805 Ret.W += (!Reads && Writes) * Freq;
806 Ret.RW += (Reads && Writes) * Freq;
808 auto *
MBB =
MI->getParent();
812 if (Writes && IsExiting && LIS->isLiveOutOfMBB(LI,
MBB))
813 Ret.IndVarUpdates += Freq;
816 Ret.HintWeights += Freq;
818 Ret.IsRemat = VirtRegAuxInfo::isRematerializable(
825 void MLEvictAdvisor::extractFeatures(
828 int64_t LocalIntfsCount,
float NrUrgent,
830 int64_t NrDefsAndUses = 0;
831 int64_t NrBrokenHints = 0;
835 double IndVarUpdates = 0.0;
836 double HintWeights = 0.0;
837 float StartBBFreq = 0.0;
838 float EndBBFreq = 0.0;
839 float HottestBlockFreq = 0.0;
840 int32_t NrRematerializable = 0;
841 float TotalWeight = 0.0;
843 SlotIndex EndSI = LIS->getSlotIndexes()->getZeroIndex();
844 SlotIndex StartSI = LIS->getSlotIndexes()->getLastIndex();
845 int64_t MaxStage = 0;
849 for (
const auto *L : Intervals) {
851 MaxStage = std::max<int64_t>(
852 MaxStage,
static_cast<int64_t
>(
RA.getExtraInfo().getStage(LI)));
853 MinStage = std::min<int64_t>(
854 MinStage,
static_cast<int64_t
>(
RA.getExtraInfo().getStage(LI)));
863 const LIFeatureComponents &LIFC = getLIFeatureComponents(LI);
864 NrBrokenHints += VRM->hasPreferredPhys(LI.
reg());
866 NrDefsAndUses += LIFC.NrDefsAndUses;
867 HottestBlockFreq =
std::max(HottestBlockFreq, LIFC.HottestBlockFreq);
872 IndVarUpdates += LIFC.IndVarUpdates;
874 HintWeights += LIFC.HintWeights;
875 NrRematerializable += LIFC.IsRemat;
878 for (
auto CurrentSegment : LI) {
885 if (!Intervals.empty()) {
888 if (EndSI >= LIS->getSlotIndexes()->getLastIndex())
889 EndSI = LIS->getSlotIndexes()->getLastIndex().
getPrevIndex();
895 #define SET(ID, TYPE, VAL) \
897 Runner->getTensor<TYPE>(FeatureIDs::ID)[Pos] = static_cast<TYPE>(VAL); \
898 if (!DoNotNormalize.test(FeatureIDs::ID)) \
899 Largest[FeatureIDs::ID] = \
900 std::max(Largest[FeatureIDs::ID], static_cast<float>(VAL)); \
903 SET(is_free, int64_t, Intervals.empty());
904 SET(nr_urgent,
float, NrUrgent);
905 SET(nr_broken_hints,
float, NrBrokenHints);
906 SET(is_hint, int64_t, IsHint);
908 SET(nr_rematerializable,
float, NrRematerializable);
909 SET(nr_defs_and_uses,
float, NrDefsAndUses);
910 SET(weighed_reads_by_max,
float,
R);
911 SET(weighed_writes_by_max,
float,
W);
912 SET(weighed_read_writes_by_max,
float, RW);
913 SET(weighed_indvars_by_max,
float, IndVarUpdates);
914 SET(hint_weights_by_max,
float, HintWeights);
915 SET(start_bb_freq_by_max,
float, StartBBFreq);
916 SET(end_bb_freq_by_max,
float, EndBBFreq);
917 SET(hottest_bb_freq_by_max,
float, HottestBlockFreq);
918 SET(liverange_size,
float,
Size);
919 SET(use_def_density,
float, TotalWeight);
920 SET(max_stage, int64_t, MaxStage);
921 SET(min_stage, int64_t, MinStage);
930 const int InstructionsIndex,
const int InstructionsMappingIndex,
931 const int MBBFreqIndex,
const int MBBMappingIndex,
949 LRPosInfo.begin(), LRPosInfo.end(),
951 size_t InstructionIndex = 0;
952 size_t CurrentSegmentIndex = 0;
953 SlotIndex CurrentIndex = LRPosInfo[0].Begin;
954 std::map<MachineBasicBlock *, size_t> VisitedMBBs;
955 size_t CurrentMBBIndex = 0;
967 while (CurrentIndex <= LRPosInfo[CurrentSegmentIndex].End &&
969 int CurrentOpcode = GetOpcode(CurrentIndex);
971 if (CurrentOpcode == -1) {
974 if (CurrentIndex >= LastIndex) {
981 if (VisitedMBBs.count(CurrentMBBReference) == 0) {
982 VisitedMBBs[CurrentMBBReference] = CurrentMBBIndex;
986 GetMBBFreq, CurrentMBBReference, RegallocRunner,
987 MBBFreqIndex, MBBMappingIndex);
989 assert(LRPosInfo[CurrentSegmentIndex].Begin <= CurrentIndex);
990 RegallocRunner->
getTensor<int64_t>(InstructionsIndex)[InstructionIndex] =
991 CurrentOpcode < OpcodeValueCutoff ? CurrentOpcode : 0;
993 auto CurrentSegmentPosition = LRPosInfo[CurrentSegmentIndex].Pos;
995 InstructionsMappingIndex)[CurrentSegmentPosition *
997 InstructionIndex] = 1;
1006 size_t OverlapCheckCurrentSegment = CurrentSegmentIndex + 1;
1007 while (OverlapCheckCurrentSegment < LRPosInfo.size() &&
1008 LRPosInfo[OverlapCheckCurrentSegment].Begin <= CurrentIndex) {
1009 auto OverlapCurrentSegmentPosition =
1010 LRPosInfo[OverlapCheckCurrentSegment].Pos;
1011 if (LRPosInfo[OverlapCheckCurrentSegment].End >= CurrentIndex) {
1013 InstructionsMappingIndex)[OverlapCurrentSegmentPosition *
1015 InstructionIndex] = 1;
1017 ++OverlapCheckCurrentSegment;
1020 if (CurrentIndex >= LastIndex) {
1027 if (CurrentSegmentIndex == LRPosInfo.size() - 1 ||
1034 if (LRPosInfo[CurrentSegmentIndex + 1].Begin >
1035 LRPosInfo[CurrentSegmentIndex].End) {
1036 CurrentIndex = LRPosInfo[CurrentSegmentIndex + 1].Begin;
1038 ++CurrentSegmentIndex;
1043 const size_t CurrentInstructionIndex,
1044 std::map<MachineBasicBlock *, size_t> &VisitedMBBs,
1048 const int MBBMappingIndex) {
1049 size_t CurrentMBBIndex = VisitedMBBs[CurrentMBBReference];
1050 float CurrentMBBFreq = GetMBBFreq(CurrentIndex);
1052 RegallocRunner->
getTensor<
float>(MBBFreqIndex)[CurrentMBBIndex] =
1055 MBBMappingIndex)[CurrentInstructionIndex] = CurrentMBBIndex;
1060 #ifdef LLVM_HAVE_TFLITE
1063 return new DevelopmentModeEvictionAdvisorAnalysis();
1066 int64_t DevelopmentModeEvictAdvisor::tryFindEvictionCandidatePosition(
1068 unsigned OrderLimit, uint8_t CostPerUseLimit,
1071 if (isa<ModelUnderTrainingRunner>(getRunner())) {
1072 Ret = MLEvictAdvisor::tryFindEvictionCandidatePosition(
1073 VirtReg, Order, OrderLimit, CostPerUseLimit, FixedRegisters);
1075 MCRegister PhysReg = getDefaultAdvisor().tryFindEvictionCandidate(
1076 VirtReg, Order, CostPerUseLimit, FixedRegisters);
1088 if (TrainingLog.empty())
1093 if (Log->hasObservationInProgress())
1094 Log->logReward<
float>(0.0);
1096 Log->startObservation();
1097 size_t CurrentFeature = 0;
1099 ? FeatureIDs::FeaturesWithDevelopmentCount
1101 for (; CurrentFeature <
FeatureCount; ++CurrentFeature) {
1102 Log->logTensorValue(CurrentFeature,
1103 reinterpret_cast<const char *
>(
1104 getRunner().getTensorUntyped(CurrentFeature)));
1106 if (
auto *MUTR = dyn_cast<ModelUnderTrainingRunner>(&getRunner()))
1107 for (
size_t I = 0;
I < MUTR->extraOutputsForLoggingSpecs().
size();
1108 ++
I, ++CurrentFeature)
1109 Log->logTensorValue(
1111 reinterpret_cast<const char *
>(MUTR->getUntypedExtraOutputValue(
I)));
1113 Log->logTensorValue(CurrentFeature,
reinterpret_cast<const char *
>(&
Ret));
1114 Log->endObservation();
1119 std::optional<float> CachedReward;
1120 auto GetReward = [&]() {
1122 CachedReward =
static_cast<float>(
1125 return *CachedReward;
1128 getAnalysis<RegAllocEvictionAdvisorAnalysis>().logRewardIfNeeded(MF,
1130 getAnalysis<RegAllocPriorityAdvisorAnalysis>().logRewardIfNeeded(MF,
1134 #endif // #ifdef LLVM_HAVE_TFLITE
1137 return new ReleaseModeEvictionAdvisorAnalysis();
1141 #if !defined(LLVM_HAVE_TFLITE)