25 std::istringstream SS(Str);
26 return Parse(SS, ParseCoverage);
30 if (!
Parse(IS, ParseCoverage)) {
31 Printf(
"MERGE: failed to parse the control file (unexpected error)\n");
53 if (!std::getline(IS, Line,
'\n'))
return false;
54 std::istringstream L1(Line);
57 if (NumFiles == 0 || NumFiles > 10000000)
return false;
60 if (!std::getline(IS, Line,
'\n'))
return false;
61 std::istringstream L2(Line);
64 if (NumFilesInFirstCorpus > NumFiles)
return false;
67 Files.resize(NumFiles);
68 for (
size_t i = 0;
i < NumFiles;
i++)
73 size_t ExpectedStartMarker = 0;
74 const size_t kInvalidStartMarker = -1;
75 size_t LastSeenStartMarker = kInvalidStartMarker;
76 while (std::getline(IS, Line,
'\n')) {
77 std::istringstream ISS1(Line);
82 if (Marker ==
"STARTED") {
84 if (ExpectedStartMarker != N)
86 ISS1 >>
Files[ExpectedStartMarker].Size;
87 LastSeenStartMarker = ExpectedStartMarker;
89 ExpectedStartMarker++;
90 }
else if (Marker ==
"DONE") {
92 size_t CurrentFileIdx =
N;
93 if (CurrentFileIdx != LastSeenStartMarker)
95 LastSeenStartMarker = kInvalidStartMarker;
97 auto &V =
Files[CurrentFileIdx].Features;
99 while (ISS1 >> std::hex >> N)
101 std::sort(V.begin(), V.end());
107 if (LastSeenStartMarker != kInvalidStartMarker)
119 std::set<uint32_t> AllFeatures;
123 auto &Cur =
Files[
i].Features;
124 AllFeatures.insert(Cur.begin(), Cur.end());
126 size_t InitialNumFeatures = AllFeatures.size();
129 for (
size_t i = NumFilesInFirstCorpus;
i <
Files.size();
i++) {
130 auto &Cur =
Files[
i].Features;
131 std::vector<uint32_t> Tmp;
133 AllFeatures.end(), std::inserter(Tmp, Tmp.begin()));
142 if (a.
Size != b.Size)
143 return a.
Size < b.Size;
144 return a.
Features.size() > b.Features.size();
149 for (
size_t i = NumFilesInFirstCorpus;
i <
Files.size();
i++) {
150 auto &Cur =
Files[
i].Features;
153 size_t OldSize = AllFeatures.size();
154 AllFeatures.insert(Cur.begin(), Cur.end());
155 if (AllFeatures.size() > OldSize)
156 NewFiles->push_back(
Files[
i].Name);
158 return AllFeatures.size() - InitialNumFeatures;
163 Printf(
"MERGE-INNER: using the control file '%s'\n", CFPath.c_str());
165 std::ifstream IF(CFPath);
166 M.ParseOrExit(IF,
false);
168 if (!M.LastFailure.empty())
169 Printf(
"MERGE-INNER: '%s' caused a failure at the previous merge step\n",
170 M.LastFailure.c_str());
172 Printf(
"MERGE-INNER: %zd total files;"
173 " %zd processed earlier; will process %zd files now\n",
174 M.Files.size(), M.FirstNotProcessedFile,
175 M.Files.size() - M.FirstNotProcessedFile);
177 std::ofstream OF(CFPath, std::ofstream::out | std::ofstream::app);
178 for (
size_t i = M.FirstNotProcessedFile;
i < M.Files.size();
i++) {
180 if (U.size() > MaxInputLen) {
181 U.resize(MaxInputLen);
184 std::ostringstream StartedLine;
186 OF <<
"STARTED " << std::dec <<
i <<
" " << U.size() <<
"\n";
194 Features.insert(Feature);
199 if (!(TotalNumberOfRuns & (TotalNumberOfRuns - 1)))
200 PrintStats(
"pulse ");
203 for (
size_t F : Features)
204 OF <<
" " << std::hex <<
F;
211 const std::vector<std::string> &Corpora) {
212 if (Corpora.size() <= 1) {
213 Printf(
"Merge requires two or more corpus dirs\n");
216 std::vector<std::string> AllFiles;
218 size_t NumFilesInFirstCorpus = AllFiles.size();
219 for (
size_t i = 1;
i < Corpora.size();
i++)
221 Printf(
"MERGE-OUTER: %zd files, %zd in the initial corpus\n",
222 AllFiles.size(), NumFilesInFirstCorpus);
227 std::ofstream ControlFile(CFPath);
228 ControlFile << AllFiles.size() <<
"\n";
229 ControlFile << NumFilesInFirstCorpus <<
"\n";
230 for (
auto &Path: AllFiles)
231 ControlFile << Path <<
"\n";
233 Printf(
"MERGE-OUTER: failed to write to the control file: %s\n",
242 for (
size_t i = 1; i <= AllFiles.size(); i++) {
243 Printf(
"MERGE-OUTER: attempt %zd\n", i);
247 Printf(
"MERGE-OUTER: succesfull in %zd attempt(s)\n", i);
253 std::ifstream IF(CFPath);
255 Printf(
"MERGE-OUTER: the control file has %zd bytes\n", (
size_t)IF.tellg());
259 std::vector<std::string> NewFiles;
260 size_t NumNewFeatures = M.
Merge(&NewFiles);
261 Printf(
"MERGE-OUTER: %zd new files with %zd new features added\n",
262 NewFiles.size(), NumNewFeatures);
263 for (
auto &
F: NewFiles)
std::vector< uint32_t > Features
void CrashResistantMergeInternalStep(const std::string &ControlFilePath)
void ListFilesInDirRecursive(const std::string &Dir, long *Epoch, std::vector< std::string > *V, bool TopDir)
size_t Merge(std::vector< std::string > *NewFiles)
size_t NumFilesInFirstCorpus
bool Parse(std::istream &IS, bool ParseCoverage)
void ParseOrExit(std::istream &IS, bool ParseCoverage)
S1Ty set_difference(const S1Ty &S1, const S2Ty &S2)
set_difference(A, B) - Return A - B
void Printf(const char *Fmt,...)
void ExecuteCallback(const uint8_t *Data, size_t Size)
std::string DirPlusFile(const std::string &DirPath, const std::string &FileName)
size_t FirstNotProcessedFile
Unit FileToVector(const std::string &Path, size_t MaxSize, bool ExitOnError)
void CrashResistantMerge(const std::vector< std::string > &Args, const std::vector< std::string > &Corpora)
void RemoveFile(const std::string &Path)
const std::string to_string(const T &Value)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::vector< MergeFileInfo > Files
const FeatureBitset Features
size_t CollectFeatures(Callback CB)
int ExecuteCommand(const std::string &Command)
std::string CloneArgsWithoutX(const std::vector< std::string > &Args, const char *X1, const char *X2)