Line data Source code
1 : //===- FileCheck.cpp - Check that File's Contents match what is expected --===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : // FileCheck does a line-by line check of a file that validates whether it
11 : // contains the expected content. This is useful for regression tests etc.
12 : //
13 : // This file implements most of the API that will be used by the FileCheck utility
14 : // as well as various unittests.
15 : //===----------------------------------------------------------------------===//
16 :
17 : #include "llvm/Support/FileCheck.h"
18 : #include "llvm/ADT/StringSet.h"
19 : #include <list>
20 : #include <map>
21 :
22 : using namespace llvm;
23 :
24 : /// Parses the given string into the Pattern.
25 : ///
26 : /// \p Prefix provides which prefix is being matched, \p SM provides the
27 : /// SourceMgr used for error reports, and \p LineNumber is the line number in
28 : /// the input file from which the pattern string was read. Returns true in
29 : /// case of an error, false otherwise.
30 2700526 : bool FileCheckPattern::ParsePattern(StringRef PatternStr, StringRef Prefix,
31 : SourceMgr &SM, unsigned LineNumber,
32 : const FileCheckRequest &Req) {
33 2700526 : bool MatchFullLinesHere = Req.MatchFullLines && CheckTy != Check::CheckNot;
34 :
35 2700526 : this->LineNumber = LineNumber;
36 2700526 : PatternLoc = SMLoc::getFromPointer(PatternStr.data());
37 :
38 2700526 : if (!(Req.NoCanonicalizeWhiteSpace && Req.MatchFullLines))
39 : // Ignore trailing whitespace.
40 2776357 : while (!PatternStr.empty() &&
41 2776300 : (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
42 75854 : PatternStr = PatternStr.substr(0, PatternStr.size() - 1);
43 :
44 : // Check that there is something on the line.
45 2700526 : if (PatternStr.empty() && CheckTy != Check::CheckEmpty) {
46 0 : SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
47 0 : "found empty check string with prefix '" + Prefix + ":'");
48 0 : return true;
49 : }
50 :
51 2700526 : if (!PatternStr.empty() && CheckTy == Check::CheckEmpty) {
52 2 : SM.PrintMessage(
53 : PatternLoc, SourceMgr::DK_Error,
54 2 : "found non-empty check string for empty check with prefix '" + Prefix +
55 2 : ":'");
56 2 : return true;
57 : }
58 :
59 2700524 : if (CheckTy == Check::CheckEmpty) {
60 57 : RegExStr = "(\n$)";
61 57 : return false;
62 : }
63 :
64 : // Check to see if this is a fixed string, or if it has regex pieces.
65 2700467 : if (!MatchFullLinesHere &&
66 4971041 : (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos &&
67 2316812 : PatternStr.find("[[") == StringRef::npos))) {
68 2011301 : FixedStr = PatternStr;
69 2011301 : return false;
70 : }
71 :
72 689166 : if (MatchFullLinesHere) {
73 20152 : RegExStr += '^';
74 20152 : if (!Req.NoCanonicalizeWhiteSpace)
75 : RegExStr += " *";
76 : }
77 :
78 : // Paren value #0 is for the fully matched string. Any new parenthesized
79 : // values add from there.
80 689166 : unsigned CurParen = 1;
81 :
82 : // Otherwise, there is at least one regex piece. Build up the regex pattern
83 : // by escaping scary characters in fixed strings, building up one big regex.
84 3188876 : while (!PatternStr.empty()) {
85 : // RegEx matches.
86 : if (PatternStr.startswith("{{")) {
87 : // This is the start of a regex match. Scan for the }}.
88 490619 : size_t End = PatternStr.find("}}");
89 490619 : if (End == StringRef::npos) {
90 0 : SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
91 : SourceMgr::DK_Error,
92 : "found start of regex string with no end '}}'");
93 1 : return true;
94 : }
95 :
96 : // Enclose {{}} patterns in parens just like [[]] even though we're not
97 : // capturing the result for any purpose. This is required in case the
98 : // expression contains an alternation like: CHECK: abc{{x|z}}def. We
99 : // want this to turn into: "abc(x|z)def" not "abcx|zdef".
100 490619 : RegExStr += '(';
101 490619 : ++CurParen;
102 :
103 981238 : if (AddRegExToRegEx(PatternStr.substr(2, End - 2), CurParen, SM))
104 : return true;
105 : RegExStr += ')';
106 :
107 490619 : PatternStr = PatternStr.substr(End + 2);
108 947776 : continue;
109 : }
110 :
111 : // Named RegEx matches. These are of two forms: [[foo:.*]] which matches .*
112 : // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
113 : // second form is [[foo]] which is a reference to foo. The variable name
114 : // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
115 : // it. This is to catch some common errors.
116 : if (PatternStr.startswith("[[")) {
117 : // Find the closing bracket pair ending the match. End is going to be an
118 : // offset relative to the beginning of the match string.
119 780028 : size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
120 :
121 780028 : if (End == StringRef::npos) {
122 0 : SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
123 : SourceMgr::DK_Error,
124 : "invalid named regex reference, no ]] found");
125 1 : return true;
126 : }
127 :
128 780028 : StringRef MatchStr = PatternStr.substr(2, End);
129 1560056 : PatternStr = PatternStr.substr(End + 4);
130 :
131 : // Get the regex name (e.g. "foo").
132 780028 : size_t NameEnd = MatchStr.find(':');
133 780028 : StringRef Name = MatchStr.substr(0, NameEnd);
134 :
135 780028 : if (Name.empty()) {
136 0 : SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
137 : "invalid name in named regex: empty name");
138 0 : return true;
139 : }
140 :
141 : // Verify that the name/expression is well formed. FileCheck currently
142 : // supports @LINE, @LINE+number, @LINE-number expressions. The check here
143 : // is relaxed, more strict check is performed in \c EvaluateExpression.
144 : bool IsExpression = false;
145 5197513 : for (unsigned i = 0, e = Name.size(); i != e; ++i) {
146 4417486 : if (i == 0) {
147 780028 : if (Name[i] == '$') // Global vars start with '$'
148 : continue;
149 779780 : if (Name[i] == '@') {
150 24757 : if (NameEnd != StringRef::npos) {
151 1 : SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
152 : SourceMgr::DK_Error,
153 : "invalid name in named regex definition");
154 1 : return true;
155 : }
156 : IsExpression = true;
157 : continue;
158 : }
159 : }
160 8784962 : if (Name[i] != '_' && !isalnum(Name[i]) &&
161 19514 : (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
162 0 : SM.PrintMessage(SMLoc::getFromPointer(Name.data() + i),
163 : SourceMgr::DK_Error, "invalid name in named regex");
164 0 : return true;
165 : }
166 : }
167 :
168 : // Name can't start with a digit.
169 780027 : if (isdigit(static_cast<unsigned char>(Name[0]))) {
170 0 : SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
171 : "invalid name in named regex");
172 0 : return true;
173 : }
174 :
175 : // Handle [[foo]].
176 780027 : if (NameEnd == StringRef::npos) {
177 : // Handle variables that were defined earlier on the same line by
178 : // emitting a backreference.
179 457157 : if (VariableDefs.find(Name) != VariableDefs.end()) {
180 772 : unsigned VarParenNum = VariableDefs[Name];
181 772 : if (VarParenNum < 1 || VarParenNum > 9) {
182 0 : SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
183 : SourceMgr::DK_Error,
184 : "Can't back-reference more than 9 variables");
185 0 : return true;
186 : }
187 772 : AddBackrefToRegEx(VarParenNum);
188 : } else {
189 456385 : VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
190 : }
191 457157 : continue;
192 : }
193 :
194 : // Handle [[foo:.*]].
195 322870 : VariableDefs[Name] = CurParen;
196 322870 : RegExStr += '(';
197 322870 : ++CurParen;
198 :
199 645740 : if (AddRegExToRegEx(MatchStr.substr(NameEnd + 1), CurParen, SM))
200 : return true;
201 :
202 : RegExStr += ')';
203 : }
204 :
205 : // Handle fixed string matches.
206 : // Find the end, which is the start of the next regex.
207 1551934 : size_t FixedMatchEnd = PatternStr.find("{{");
208 2104690 : FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
209 3103868 : RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
210 1551934 : PatternStr = PatternStr.substr(FixedMatchEnd);
211 : }
212 :
213 689165 : if (MatchFullLinesHere) {
214 20152 : if (!Req.NoCanonicalizeWhiteSpace)
215 20130 : RegExStr += " *";
216 20152 : RegExStr += '$';
217 : }
218 :
219 : return false;
220 : }
221 :
222 813489 : bool FileCheckPattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) {
223 1626978 : Regex R(RS);
224 : std::string Error;
225 813489 : if (!R.isValid(Error)) {
226 0 : SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
227 0 : "invalid regex: " + Error);
228 0 : return true;
229 : }
230 :
231 813489 : RegExStr += RS.str();
232 813489 : CurParen += R.getNumMatches();
233 813489 : return false;
234 : }
235 :
236 772 : void FileCheckPattern::AddBackrefToRegEx(unsigned BackrefNum) {
237 : assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
238 1544 : std::string Backref = std::string("\\") + std::string(1, '0' + BackrefNum);
239 772 : RegExStr += Backref;
240 772 : }
241 :
242 : /// Evaluates expression and stores the result to \p Value.
243 : ///
244 : /// Returns true on success and false when the expression has invalid syntax.
245 24709 : bool FileCheckPattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
246 : // The only supported expression is @LINE([\+-]\d+)?
247 : if (!Expr.startswith("@LINE"))
248 : return false;
249 24709 : Expr = Expr.substr(StringRef("@LINE").size());
250 : int Offset = 0;
251 24709 : if (!Expr.empty()) {
252 19514 : if (Expr[0] == '+')
253 4334 : Expr = Expr.substr(1);
254 15180 : else if (Expr[0] != '-')
255 : return false;
256 19514 : if (Expr.getAsInteger(10, Offset))
257 0 : return false;
258 : }
259 24709 : Value = llvm::itostr(LineNumber + Offset);
260 24709 : return true;
261 : }
262 :
263 : /// Matches the pattern string against the input buffer \p Buffer
264 : ///
265 : /// This returns the position that is matched or npos if there is no match. If
266 : /// there is a match, the size of the matched string is returned in \p
267 : /// MatchLen.
268 : ///
269 : /// The \p VariableTable StringMap provides the current values of filecheck
270 : /// variables and is updated if this match defines new values.
271 2992636 : size_t FileCheckPattern::Match(StringRef Buffer, size_t &MatchLen,
272 : StringMap<StringRef> &VariableTable) const {
273 : // If this is the EOF pattern, match it immediately.
274 2992636 : if (CheckTy == Check::CheckEOF) {
275 9333 : MatchLen = 0;
276 9333 : return Buffer.size();
277 : }
278 :
279 : // If this is a fixed string pattern, just match it now.
280 2983303 : if (!FixedStr.empty()) {
281 2252573 : MatchLen = FixedStr.size();
282 2252573 : return Buffer.find(FixedStr);
283 : }
284 :
285 : // Regex match.
286 :
287 : // If there are variable uses, we need to create a temporary string with the
288 : // actual value.
289 : StringRef RegExToMatch = RegExStr;
290 : std::string TmpStr;
291 730730 : if (!VariableUses.empty()) {
292 333721 : TmpStr = RegExStr;
293 :
294 : unsigned InsertOffset = 0;
295 795748 : for (const auto &VariableUse : VariableUses) {
296 : std::string Value;
297 :
298 924294 : if (VariableUse.first[0] == '@') {
299 24697 : if (!EvaluateExpression(VariableUse.first, Value))
300 : return StringRef::npos;
301 : } else {
302 : StringMap<StringRef>::iterator it =
303 437450 : VariableTable.find(VariableUse.first);
304 : // If the variable is undefined, return an error.
305 874900 : if (it == VariableTable.end())
306 : return StringRef::npos;
307 :
308 : // Look up the value and escape it so that we can put it into the regex.
309 874660 : Value += Regex::escape(it->second);
310 : }
311 :
312 : // Plop it into the regex at the adjusted offset.
313 462027 : TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset,
314 : Value.begin(), Value.end());
315 462027 : InsertOffset += Value.size();
316 : }
317 :
318 : // Match the newly constructed regex.
319 333601 : RegExToMatch = TmpStr;
320 : }
321 :
322 : SmallVector<StringRef, 4> MatchInfo;
323 730610 : if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
324 : return StringRef::npos;
325 :
326 : // Successful regex match.
327 : assert(!MatchInfo.empty() && "Didn't get any match");
328 713598 : StringRef FullMatch = MatchInfo[0];
329 :
330 : // If this defines any variables, remember their values.
331 1045955 : for (const auto &VariableDef : VariableDefs) {
332 : assert(VariableDef.second < MatchInfo.size() && "Internal paren error");
333 332357 : VariableTable[VariableDef.first] = MatchInfo[VariableDef.second];
334 : }
335 :
336 : // Like CHECK-NEXT, CHECK-EMPTY's match range is considered to start after
337 : // the required preceding newline, which is consumed by the pattern in the
338 : // case of CHECK-EMPTY but not CHECK-NEXT.
339 713598 : size_t MatchStartSkip = CheckTy == Check::CheckEmpty;
340 713598 : MatchLen = FullMatch.size() - MatchStartSkip;
341 713598 : return FullMatch.data() - Buffer.data() + MatchStartSkip;
342 : }
343 :
344 :
345 : /// Computes an arbitrary estimate for the quality of matching this pattern at
346 : /// the start of \p Buffer; a distance of zero should correspond to a perfect
347 : /// match.
348 : unsigned
349 148790 : FileCheckPattern::ComputeMatchDistance(StringRef Buffer,
350 : const StringMap<StringRef> &VariableTable) const {
351 : // Just compute the number of matching characters. For regular expressions, we
352 : // just compare against the regex itself and hope for the best.
353 : //
354 : // FIXME: One easy improvement here is have the regex lib generate a single
355 : // example regular expression which matches, and use that as the example
356 : // string.
357 148790 : StringRef ExampleString(FixedStr);
358 148790 : if (ExampleString.empty())
359 46335 : ExampleString = RegExStr;
360 :
361 : // Only compare up to the first line in the buffer, or the string size.
362 148790 : StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
363 148790 : BufferPrefix = BufferPrefix.split('\n').first;
364 148790 : return BufferPrefix.edit_distance(ExampleString);
365 : }
366 :
367 306 : void FileCheckPattern::PrintVariableUses(const SourceMgr &SM, StringRef Buffer,
368 : const StringMap<StringRef> &VariableTable,
369 : SMRange MatchRange) const {
370 : // If this was a regular expression using variables, print the current
371 : // variable values.
372 306 : if (!VariableUses.empty()) {
373 41 : for (const auto &VariableUse : VariableUses) {
374 : SmallString<256> Msg;
375 : raw_svector_ostream OS(Msg);
376 22 : StringRef Var = VariableUse.first;
377 22 : if (Var[0] == '@') {
378 : std::string Value;
379 12 : if (EvaluateExpression(Var, Value)) {
380 12 : OS << "with expression \"";
381 12 : OS.write_escaped(Var) << "\" equal to \"";
382 12 : OS.write_escaped(Value) << "\"";
383 : } else {
384 0 : OS << "uses incorrect expression \"";
385 0 : OS.write_escaped(Var) << "\"";
386 : }
387 : } else {
388 10 : StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
389 :
390 : // Check for undefined variable references.
391 20 : if (it == VariableTable.end()) {
392 1 : OS << "uses undefined variable \"";
393 1 : OS.write_escaped(Var) << "\"";
394 : } else {
395 9 : OS << "with variable \"";
396 9 : OS.write_escaped(Var) << "\" equal to \"";
397 9 : OS.write_escaped(it->second) << "\"";
398 : }
399 : }
400 :
401 22 : if (MatchRange.isValid())
402 4 : SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, OS.str(),
403 : {MatchRange});
404 : else
405 21 : SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()),
406 42 : SourceMgr::DK_Note, OS.str());
407 : }
408 : }
409 306 : }
410 :
411 251 : void FileCheckPattern::PrintFuzzyMatch(
412 : const SourceMgr &SM, StringRef Buffer,
413 : const StringMap<StringRef> &VariableTable) const {
414 : // Attempt to find the closest/best fuzzy match. Usually an error happens
415 : // because some string in the output didn't exactly match. In these cases, we
416 : // would like to show the user a best guess at what "should have" matched, to
417 : // save them having to actually check the input manually.
418 : size_t NumLinesForward = 0;
419 : size_t Best = StringRef::npos;
420 : double BestQuality = 0;
421 :
422 : // Use an arbitrary 4k limit on how far we will search.
423 171221 : for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
424 341456 : if (Buffer[i] == '\n')
425 6262 : ++NumLinesForward;
426 :
427 : // Patterns have leading whitespace stripped, so skip whitespace when
428 : // looking for something which looks like a pattern.
429 170728 : if (Buffer[i] == ' ' || Buffer[i] == '\t')
430 : continue;
431 :
432 : // Compute the "quality" of this match as an arbitrary combination of the
433 : // match distance and the number of lines skipped to get to this match.
434 148790 : unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
435 148790 : double Quality = Distance + (NumLinesForward / 100.);
436 :
437 148790 : if (Quality < BestQuality || Best == StringRef::npos) {
438 : Best = i;
439 : BestQuality = Quality;
440 : }
441 : }
442 :
443 : // Print the "possible intended match here" line if we found something
444 : // reasonable and not equal to what we showed in the "scanning from here"
445 : // line.
446 251 : if (Best && Best != StringRef::npos && BestQuality < 50) {
447 215 : SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
448 : SourceMgr::DK_Note, "possible intended match here");
449 :
450 : // FIXME: If we wanted to be really friendly we would show why the match
451 : // failed, as it can be hard to spot simple one character differences.
452 : }
453 251 : }
454 :
455 : /// Finds the closing sequence of a regex variable usage or definition.
456 : ///
457 : /// \p Str has to point in the beginning of the definition (right after the
458 : /// opening sequence). Returns the offset of the closing sequence within Str,
459 : /// or npos if it was not found.
460 780028 : size_t FileCheckPattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
461 : // Offset keeps track of the current offset within the input Str
462 : size_t Offset = 0;
463 : // [...] Nesting depth
464 : size_t BracketDepth = 0;
465 :
466 6915694 : while (!Str.empty()) {
467 782866 : if (Str.startswith("]]") && BracketDepth == 0)
468 780028 : return Offset;
469 12271332 : if (Str[0] == '\\') {
470 : // Backslash escapes the next char within regexes, so skip them both.
471 11962 : Str = Str.substr(2);
472 11962 : Offset += 2;
473 : } else {
474 6123704 : switch (Str[0]) {
475 : default:
476 : break;
477 101262 : case '[':
478 101262 : BracketDepth++;
479 101262 : break;
480 101262 : case ']':
481 101262 : if (BracketDepth == 0) {
482 0 : SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
483 : SourceMgr::DK_Error,
484 : "missing closing \"]\" for regex variable");
485 0 : exit(1);
486 : }
487 101262 : BracketDepth--;
488 101262 : break;
489 : }
490 6123704 : Str = Str.substr(1);
491 6123704 : Offset++;
492 : }
493 : }
494 :
495 : return StringRef::npos;
496 : }
497 :
498 : /// Canonicalize whitespaces in the file. Line endings are replaced with
499 : /// UNIX-style '\n'.
500 : StringRef
501 122249 : llvm::FileCheck::CanonicalizeFile(MemoryBuffer &MB,
502 : SmallVectorImpl<char> &OutputBuffer) {
503 122249 : OutputBuffer.reserve(MB.getBufferSize());
504 :
505 1733564294 : for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd();
506 1733564294 : Ptr != End; ++Ptr) {
507 : // Eliminate trailing dosish \r.
508 1733442045 : if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
509 : continue;
510 : }
511 :
512 : // If current char is not a horizontal whitespace or if horizontal
513 : // whitespace canonicalization is disabled, dump it to output as is.
514 1733425543 : if (Req.NoCanonicalizeWhiteSpace || (*Ptr != ' ' && *Ptr != '\t')) {
515 1528481280 : OutputBuffer.push_back(*Ptr);
516 1528481280 : continue;
517 : }
518 :
519 : // Otherwise, add one space and advance over neighboring space.
520 204944263 : OutputBuffer.push_back(' ');
521 319884081 : while (Ptr + 1 != End && (Ptr[1] == ' ' || Ptr[1] == '\t'))
522 : ++Ptr;
523 : }
524 :
525 : // Add a null byte and then return all but that byte.
526 122249 : OutputBuffer.push_back('\0');
527 244498 : return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1);
528 : }
529 :
530 : static bool IsPartOfWord(char c) {
531 5321540 : return (isalnum(c) || c == '-' || c == '_');
532 : }
533 :
534 : // Get the size of the prefix extension.
535 2699512 : static size_t CheckTypeSize(Check::FileCheckType Ty) {
536 2699512 : switch (Ty) {
537 : case Check::CheckNone:
538 : case Check::CheckBadNot:
539 : return 0;
540 :
541 1347584 : case Check::CheckPlain:
542 1347584 : return sizeof(":") - 1;
543 :
544 969555 : case Check::CheckNext:
545 969555 : return sizeof("-NEXT:") - 1;
546 :
547 3733 : case Check::CheckSame:
548 3733 : return sizeof("-SAME:") - 1;
549 :
550 44578 : case Check::CheckNot:
551 44578 : return sizeof("-NOT:") - 1;
552 :
553 99792 : case Check::CheckDAG:
554 99792 : return sizeof("-DAG:") - 1;
555 :
556 234203 : case Check::CheckLabel:
557 234203 : return sizeof("-LABEL:") - 1;
558 :
559 59 : case Check::CheckEmpty:
560 59 : return sizeof("-EMPTY:") - 1;
561 :
562 : case Check::CheckEOF:
563 : llvm_unreachable("Should not be using EOF size");
564 : }
565 :
566 0 : llvm_unreachable("Bad check type");
567 : }
568 :
569 : // Get a description of the type.
570 306 : static std::string CheckTypeName(StringRef Prefix, Check::FileCheckType Ty) {
571 306 : switch (Ty) {
572 : case Check::CheckNone:
573 0 : return "invalid";
574 : case Check::CheckPlain:
575 : return Prefix;
576 19 : case Check::CheckNext:
577 38 : return Prefix.str() + "-NEXT";
578 0 : case Check::CheckSame:
579 0 : return Prefix.str() + "-SAME";
580 37 : case Check::CheckNot:
581 74 : return Prefix.str() + "-NOT";
582 22 : case Check::CheckDAG:
583 44 : return Prefix.str() + "-DAG";
584 4 : case Check::CheckLabel:
585 8 : return Prefix.str() + "-LABEL";
586 3 : case Check::CheckEmpty:
587 6 : return Prefix.str() + "-EMPTY";
588 : case Check::CheckEOF:
589 1 : return "implicit EOF";
590 : case Check::CheckBadNot:
591 0 : return "bad NOT";
592 : }
593 0 : llvm_unreachable("unknown FileCheckType");
594 : }
595 :
596 0 : static Check::FileCheckType FindCheckType(StringRef Buffer, StringRef Prefix) {
597 0 : if (Buffer.size() <= Prefix.size())
598 0 : return Check::CheckNone;
599 :
600 0 : char NextChar = Buffer[Prefix.size()];
601 :
602 : // Verify that the : is present after the prefix.
603 0 : if (NextChar == ':')
604 0 : return Check::CheckPlain;
605 :
606 0 : if (NextChar != '-')
607 0 : return Check::CheckNone;
608 :
609 0 : StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
610 : if (Rest.startswith("NEXT:"))
611 0 : return Check::CheckNext;
612 :
613 : if (Rest.startswith("SAME:"))
614 0 : return Check::CheckSame;
615 :
616 : if (Rest.startswith("NOT:"))
617 0 : return Check::CheckNot;
618 :
619 : if (Rest.startswith("DAG:"))
620 0 : return Check::CheckDAG;
621 :
622 : if (Rest.startswith("LABEL:"))
623 0 : return Check::CheckLabel;
624 :
625 : if (Rest.startswith("EMPTY:"))
626 0 : return Check::CheckEmpty;
627 :
628 : // You can't combine -NOT with another suffix.
629 : if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") ||
630 : Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") ||
631 : Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:") ||
632 : Rest.startswith("EMPTY-NOT:") || Rest.startswith("NOT-EMPTY:"))
633 0 : return Check::CheckBadNot;
634 :
635 : return Check::CheckNone;
636 : }
637 :
638 : // From the given position, find the next character after the word.
639 924717 : static size_t SkipWord(StringRef Str, size_t Loc) {
640 6213157 : while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
641 5288440 : ++Loc;
642 924717 : return Loc;
643 : }
644 :
645 : /// Search the buffer for the first prefix in the prefix regular expression.
646 : ///
647 : /// This searches the buffer using the provided regular expression, however it
648 : /// enforces constraints beyond that:
649 : /// 1) The found prefix must not be a suffix of something that looks like
650 : /// a valid prefix.
651 : /// 2) The found prefix must be followed by a valid check type suffix using \c
652 : /// FindCheckType above.
653 : ///
654 : /// The first match of the regular expression to satisfy these two is returned,
655 : /// otherwise an empty StringRef is returned to indicate failure.
656 : ///
657 : /// If this routine returns a valid prefix, it will also shrink \p Buffer to
658 : /// start at the beginning of the returned prefix, increment \p LineNumber for
659 : /// each new line consumed from \p Buffer, and set \p CheckTy to the type of
660 : /// check found by examining the suffix.
661 : ///
662 : /// If no valid prefix is found, the state of Buffer, LineNumber, and CheckTy
663 : /// is unspecified.
664 2760651 : static StringRef FindFirstMatchingPrefix(Regex &PrefixRE, StringRef &Buffer,
665 : unsigned &LineNumber,
666 : Check::FileCheckType &CheckTy) {
667 : SmallVector<StringRef, 2> Matches;
668 :
669 3685368 : while (!Buffer.empty()) {
670 : // Find the first (longest) match using the RE.
671 3685300 : if (!PrefixRE.match(Buffer, &Matches))
672 : // No match at all, bail.
673 61071 : return StringRef();
674 :
675 3624229 : StringRef Prefix = Matches[0];
676 : Matches.clear();
677 :
678 : assert(Prefix.data() >= Buffer.data() &&
679 : Prefix.data() < Buffer.data() + Buffer.size() &&
680 : "Prefix doesn't start inside of buffer!");
681 3624229 : size_t Loc = Prefix.data() - Buffer.data();
682 3624229 : StringRef Skipped = Buffer.substr(0, Loc);
683 3624229 : Buffer = Buffer.drop_front(Loc);
684 3624229 : LineNumber += Skipped.count('\n');
685 :
686 : // Check that the matched prefix isn't a suffix of some other check-like
687 : // word.
688 : // FIXME: This is a very ad-hoc check. it would be better handled in some
689 : // other way. Among other things it seems hard to distinguish between
690 : // intentional and unintentional uses of this feature.
691 3624229 : if (Skipped.empty() || !IsPartOfWord(Skipped.back())) {
692 : // Now extract the type.
693 3495604 : CheckTy = FindCheckType(Buffer, Prefix);
694 :
695 : // If we've found a valid check type for this prefix, we're done.
696 3495604 : if (CheckTy != Check::CheckNone)
697 2699512 : return Prefix;
698 : }
699 :
700 : // If we didn't successfully find a prefix, we need to skip this invalid
701 : // prefix and continue scanning. We directly skip the prefix that was
702 : // matched and any additional parts of that check-like word.
703 1849434 : Buffer = Buffer.drop_front(SkipWord(Buffer, Prefix.size()));
704 : }
705 :
706 : // We ran out of buffer while skipping partial matches so give up.
707 68 : return StringRef();
708 : }
709 :
710 : /// Read the check file, which specifies the sequence of expected strings.
711 : ///
712 : /// The strings are added to the CheckStrings vector. Returns true in case of
713 : /// an error, false otherwise.
714 61151 : bool llvm::FileCheck::ReadCheckFile(SourceMgr &SM, StringRef Buffer,
715 : Regex &PrefixRE,
716 : std::vector<FileCheckString> &CheckStrings) {
717 61151 : std::vector<FileCheckPattern> ImplicitNegativeChecks;
718 62173 : for (const auto &PatternString : Req.ImplicitCheckNot) {
719 : // Create a buffer with fake command line content in order to display the
720 : // command line option responsible for the specific implicit CHECK-NOT.
721 1022 : std::string Prefix = "-implicit-check-not='";
722 1022 : std::string Suffix = "'";
723 : std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy(
724 2044 : Prefix + PatternString + Suffix, "command line");
725 :
726 : StringRef PatternInBuffer =
727 1022 : CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
728 3066 : SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
729 :
730 1022 : ImplicitNegativeChecks.push_back(FileCheckPattern(Check::CheckNot));
731 1022 : ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer,
732 1022 : "IMPLICIT-CHECK", SM, 0, Req);
733 : }
734 :
735 122302 : std::vector<FileCheckPattern> DagNotMatches = ImplicitNegativeChecks;
736 :
737 : // LineNumber keeps track of the line on which CheckPrefix instances are
738 : // found.
739 61151 : unsigned LineNumber = 1;
740 :
741 : while (1) {
742 : Check::FileCheckType CheckTy;
743 :
744 : // See if a prefix occurs in the memory buffer.
745 : StringRef UsedPrefix = FindFirstMatchingPrefix(PrefixRE, Buffer, LineNumber,
746 2760651 : CheckTy);
747 2760651 : if (UsedPrefix.empty())
748 : break;
749 : assert(UsedPrefix.data() == Buffer.data() &&
750 : "Failed to move Buffer's start forward, or pointed prefix outside "
751 : "of the buffer!");
752 :
753 : // Location to use for error messages.
754 : const char *UsedPrefixStart = UsedPrefix.data();
755 :
756 : // Skip the buffer to the end.
757 2699512 : Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy));
758 :
759 : // Complain about useful-looking but unsupported suffixes.
760 2699512 : if (CheckTy == Check::CheckBadNot) {
761 8 : SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error,
762 8 : "unsupported -NOT combo on prefix '" + UsedPrefix + "'");
763 12 : return true;
764 : }
765 :
766 : // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
767 : // leading whitespace.
768 2699504 : if (!(Req.NoCanonicalizeWhiteSpace && Req.MatchFullLines))
769 5398962 : Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
770 :
771 : // Scan ahead to the end of line.
772 2699504 : size_t EOL = Buffer.find_first_of("\n\r");
773 :
774 : // Remember the location of the start of the pattern, for diagnostics.
775 2699504 : SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
776 :
777 : // Parse the pattern.
778 5254634 : FileCheckPattern P(CheckTy);
779 2699571 : if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber, Req))
780 4 : return true;
781 :
782 : // Verify that CHECK-LABEL lines do not define or use variables
783 2699501 : if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
784 0 : SM.PrintMessage(
785 : SMLoc::getFromPointer(UsedPrefixStart), SourceMgr::DK_Error,
786 0 : "found '" + UsedPrefix + "-LABEL:'"
787 0 : " with variable definition or use");
788 0 : return true;
789 : }
790 :
791 2699501 : Buffer = Buffer.substr(EOL);
792 :
793 : // Verify that CHECK-NEXT/SAME/EMPTY lines have at least one CHECK line before them.
794 2699501 : if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame ||
795 2699501 : CheckTy == Check::CheckEmpty) &&
796 : CheckStrings.empty()) {
797 : StringRef Type = CheckTy == Check::CheckNext
798 : ? "NEXT"
799 1 : : CheckTy == Check::CheckEmpty ? "EMPTY" : "SAME";
800 1 : SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
801 : SourceMgr::DK_Error,
802 1 : "found '" + UsedPrefix + "-" + Type +
803 1 : "' without previous '" + UsedPrefix + ": line");
804 : return true;
805 : }
806 :
807 : // Handle CHECK-DAG/-NOT.
808 2699500 : if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
809 144370 : DagNotMatches.push_back(P);
810 144370 : continue;
811 : }
812 :
813 : // Okay, add the string we captured to the output vector and move on.
814 2555130 : CheckStrings.emplace_back(P, UsedPrefix, PatternLoc);
815 : std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
816 2555130 : DagNotMatches = ImplicitNegativeChecks;
817 : }
818 :
819 : // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
820 : // prefix as a filler for the error message.
821 61139 : if (!DagNotMatches.empty()) {
822 9351 : CheckStrings.emplace_back(FileCheckPattern(Check::CheckEOF), *Req.CheckPrefixes.begin(),
823 18702 : SMLoc::getFromPointer(Buffer.data()));
824 : std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
825 : }
826 :
827 61139 : if (CheckStrings.empty()) {
828 7 : errs() << "error: no check strings found with prefix"
829 18 : << (Req.CheckPrefixes.size() > 1 ? "es " : " ");
830 : auto I = Req.CheckPrefixes.begin();
831 : auto E = Req.CheckPrefixes.end();
832 7 : if (I != E) {
833 14 : errs() << "\'" << *I << ":'";
834 : ++I;
835 : }
836 10 : for (; I != E; ++I)
837 6 : errs() << ", \'" << *I << ":'";
838 :
839 7 : errs() << '\n';
840 : return true;
841 : }
842 :
843 : return false;
844 : }
845 :
846 2896509 : static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM,
847 : StringRef Prefix, SMLoc Loc, const FileCheckPattern &Pat,
848 : StringRef Buffer, StringMap<StringRef> &VariableTable,
849 : size_t MatchPos, size_t MatchLen,
850 : const FileCheckRequest &Req) {
851 2896509 : if (ExpectedMatch) {
852 2896474 : if (!Req.Verbose)
853 : return;
854 19 : if (!Req.VerboseVerbose && Pat.getCheckTy() == Check::CheckEOF)
855 : return;
856 : }
857 53 : SMLoc MatchStart = SMLoc::getFromPointer(Buffer.data() + MatchPos);
858 53 : SMLoc MatchEnd = SMLoc::getFromPointer(Buffer.data() + MatchPos + MatchLen);
859 : SMRange MatchRange(MatchStart, MatchEnd);
860 88 : SM.PrintMessage(
861 : Loc, ExpectedMatch ? SourceMgr::DK_Remark : SourceMgr::DK_Error,
862 141 : CheckTypeName(Prefix, Pat.getCheckTy()) + ": " +
863 106 : (ExpectedMatch ? "expected" : "excluded") +
864 : " string found in input");
865 106 : SM.PrintMessage(MatchStart, SourceMgr::DK_Note, "found here", {MatchRange});
866 53 : Pat.PrintVariableUses(SM, Buffer, VariableTable, MatchRange);
867 : }
868 :
869 : static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM,
870 : const FileCheckString &CheckStr, StringRef Buffer,
871 : StringMap<StringRef> &VariableTable, size_t MatchPos,
872 : size_t MatchLen, FileCheckRequest &Req) {
873 2796824 : PrintMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat,
874 : Buffer, VariableTable, MatchPos, MatchLen, Req);
875 : }
876 :
877 75917 : static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM,
878 : StringRef Prefix, SMLoc Loc, const FileCheckPattern &Pat,
879 : StringRef Buffer,
880 : StringMap<StringRef> &VariableTable,
881 : bool VerboseVerbose) {
882 75917 : if (!ExpectedMatch && !VerboseVerbose)
883 : return;
884 :
885 : // Otherwise, we have an error, emit an error message.
886 255 : SM.PrintMessage(Loc,
887 : ExpectedMatch ? SourceMgr::DK_Error : SourceMgr::DK_Remark,
888 508 : CheckTypeName(Prefix, Pat.getCheckTy()) + ": " +
889 506 : (ExpectedMatch ? "expected" : "excluded") +
890 : " string not found in input");
891 :
892 : // Print the "scanning from here" line. If the current position is at the
893 : // end of a line, advance to the start of the next line.
894 506 : Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
895 :
896 253 : SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
897 : "scanning from here");
898 :
899 : // Allow the pattern to print additional information if desired.
900 253 : Pat.PrintVariableUses(SM, Buffer, VariableTable);
901 253 : if (ExpectedMatch)
902 251 : Pat.PrintFuzzyMatch(SM, Buffer, VariableTable);
903 : }
904 :
905 : static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM,
906 : const FileCheckString &CheckStr, StringRef Buffer,
907 : StringMap<StringRef> &VariableTable,
908 : bool VerboseVerbose) {
909 234 : PrintNoMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat,
910 : Buffer, VariableTable, VerboseVerbose);
911 : }
912 :
913 : /// Count the number of newlines in the specified range.
914 972982 : static unsigned CountNumNewlinesBetween(StringRef Range,
915 : const char *&FirstNewLine) {
916 : unsigned NumNewLines = 0;
917 : while (1) {
918 : // Scan for newline.
919 1942313 : Range = Range.substr(Range.find_first_of("\n\r"));
920 1942313 : if (Range.empty())
921 972982 : return NumNewLines;
922 :
923 969331 : ++NumNewLines;
924 :
925 : // Handle \n\r and \r\n as a single newline.
926 969331 : if (Range.size() > 1 && (Range[1] == '\n' || Range[1] == '\r') &&
927 : (Range[0] != Range[1]))
928 0 : Range = Range.substr(1);
929 969331 : Range = Range.substr(1);
930 :
931 969331 : if (NumNewLines == 1)
932 969268 : FirstNewLine = Range.begin();
933 : }
934 : }
935 :
936 : /// Match check string and its "not strings" and/or "dag strings".
937 2797078 : size_t FileCheckString::Check(const SourceMgr &SM, StringRef Buffer,
938 : bool IsLabelScanMode, size_t &MatchLen,
939 : StringMap<StringRef> &VariableTable,
940 : FileCheckRequest &Req) const {
941 : size_t LastPos = 0;
942 : std::vector<const FileCheckPattern *> NotStrings;
943 :
944 : // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
945 : // bounds; we have not processed variable definitions within the bounded block
946 : // yet so cannot handle any final CHECK-DAG yet; this is handled when going
947 : // over the block again (including the last CHECK-LABEL) in normal mode.
948 2797078 : if (!IsLabelScanMode) {
949 : // Match "dag strings" (with mixed "not strings" if any).
950 2562928 : LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable, Req);
951 2562928 : if (LastPos == StringRef::npos)
952 : return StringRef::npos;
953 : }
954 :
955 : // Match itself from the last position after matching CHECK-DAG.
956 2797058 : StringRef MatchBuffer = Buffer.substr(LastPos);
957 2797058 : size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
958 2797058 : if (MatchPos == StringRef::npos) {
959 234 : PrintNoMatch(true, SM, *this, MatchBuffer, VariableTable, Req.VerboseVerbose);
960 234 : return StringRef::npos;
961 : }
962 2796824 : PrintMatch(true, SM, *this, MatchBuffer, VariableTable, MatchPos, MatchLen, Req);
963 :
964 : // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
965 : // or CHECK-NOT
966 2796824 : if (!IsLabelScanMode) {
967 2562678 : StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
968 :
969 : // If this check is a "CHECK-NEXT", verify that the previous match was on
970 : // the previous line (i.e. that there is one newline between them).
971 2562678 : if (CheckNext(SM, SkippedRegion))
972 45 : return StringRef::npos;
973 :
974 : // If this check is a "CHECK-SAME", verify that the previous match was on
975 : // the same line (i.e. that there is no newline between them).
976 2562666 : if (CheckSame(SM, SkippedRegion))
977 : return StringRef::npos;
978 :
979 : // If this match had "not strings", verify that they don't exist in the
980 : // skipped region.
981 2562665 : if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable, Req))
982 : return StringRef::npos;
983 : }
984 :
985 2796779 : return LastPos + MatchPos;
986 : }
987 :
988 : /// Verify there is a single line in the given buffer.
989 2562678 : bool FileCheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
990 2562678 : if (Pat.getCheckTy() != Check::CheckNext &&
991 : Pat.getCheckTy() != Check::CheckEmpty)
992 : return false;
993 :
994 : Twine CheckName =
995 969267 : Prefix +
996 969267 : Twine(Pat.getCheckTy() == Check::CheckEmpty ? "-EMPTY" : "-NEXT");
997 :
998 : // Count the number of newlines between the previous match and this one.
999 : assert(Buffer.data() !=
1000 : SM.getMemoryBuffer(SM.FindBufferContainingLoc(
1001 : SMLoc::getFromPointer(Buffer.data())))
1002 : ->getBufferStart() &&
1003 : "CHECK-NEXT and CHECK-EMPTY can't be the first check in a file");
1004 :
1005 969267 : const char *FirstNewLine = nullptr;
1006 969267 : unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1007 :
1008 969267 : if (NumNewLines == 0) {
1009 0 : SM.PrintMessage(Loc, SourceMgr::DK_Error,
1010 0 : CheckName + ": is on the same line as previous match");
1011 0 : SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1012 : "'next' match was here");
1013 0 : SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1014 : "previous match ended here");
1015 0 : return true;
1016 : }
1017 :
1018 969267 : if (NumNewLines != 1) {
1019 12 : SM.PrintMessage(Loc, SourceMgr::DK_Error,
1020 12 : CheckName +
1021 12 : ": is not on the line after the previous match");
1022 12 : SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1023 : "'next' match was here");
1024 12 : SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1025 : "previous match ended here");
1026 12 : SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
1027 : "non-matching line after previous match is here");
1028 12 : return true;
1029 : }
1030 :
1031 : return false;
1032 : }
1033 :
1034 : /// Verify there is no newline in the given buffer.
1035 2562666 : bool FileCheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const {
1036 2562666 : if (Pat.getCheckTy() != Check::CheckSame)
1037 : return false;
1038 :
1039 : // Count the number of newlines between the previous match and this one.
1040 : assert(Buffer.data() !=
1041 : SM.getMemoryBuffer(SM.FindBufferContainingLoc(
1042 : SMLoc::getFromPointer(Buffer.data())))
1043 : ->getBufferStart() &&
1044 : "CHECK-SAME can't be the first check in a file");
1045 :
1046 3715 : const char *FirstNewLine = nullptr;
1047 3715 : unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1048 :
1049 3715 : if (NumNewLines != 0) {
1050 1 : SM.PrintMessage(Loc, SourceMgr::DK_Error,
1051 1 : Prefix +
1052 : "-SAME: is not on the same line as the previous match");
1053 2 : SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1054 : "'next' match was here");
1055 1 : SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1056 : "previous match ended here");
1057 1 : return true;
1058 : }
1059 :
1060 : return false;
1061 : }
1062 :
1063 : /// Verify there's no "not strings" in the given buffer.
1064 2563351 : bool FileCheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
1065 : const std::vector<const FileCheckPattern *> &NotStrings,
1066 : StringMap<StringRef> &VariableTable,
1067 : const FileCheckRequest &Req) const {
1068 2639017 : for (const FileCheckPattern *Pat : NotStrings) {
1069 : assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
1070 :
1071 75701 : size_t MatchLen = 0;
1072 75701 : size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
1073 :
1074 75701 : if (Pos == StringRef::npos) {
1075 75666 : PrintNoMatch(false, SM, Prefix, Pat->getLoc(), *Pat, Buffer,
1076 75666 : VariableTable, Req.VerboseVerbose);
1077 75666 : continue;
1078 : }
1079 :
1080 35 : PrintMatch(false, SM, Prefix, Pat->getLoc(), *Pat, Buffer, VariableTable,
1081 : Pos, MatchLen, Req);
1082 :
1083 35 : return true;
1084 : }
1085 :
1086 : return false;
1087 : }
1088 :
1089 : /// Match "dag strings" and their mixed "not strings".
1090 2562928 : size_t FileCheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
1091 : std::vector<const FileCheckPattern *> &NotStrings,
1092 : StringMap<StringRef> &VariableTable,
1093 : const FileCheckRequest &Req) const {
1094 2562928 : if (DagNotStrings.empty())
1095 : return 0;
1096 :
1097 : // The start of the search range.
1098 : size_t StartPos = 0;
1099 :
1100 : struct MatchRange {
1101 : size_t Pos;
1102 : size_t End;
1103 : };
1104 : // A sorted list of ranges for non-overlapping CHECK-DAG matches. Match
1105 : // ranges are erased from this list once they are no longer in the search
1106 : // range.
1107 : std::list<MatchRange> MatchRanges;
1108 :
1109 : // We need PatItr and PatEnd later for detecting the end of a CHECK-DAG
1110 : // group, so we don't use a range-based for loop here.
1111 : for (auto PatItr = DagNotStrings.begin(), PatEnd = DagNotStrings.end();
1112 244951 : PatItr != PatEnd; ++PatItr) {
1113 : const FileCheckPattern &Pat = *PatItr;
1114 : assert((Pat.getCheckTy() == Check::CheckDAG ||
1115 : Pat.getCheckTy() == Check::CheckNot) &&
1116 : "Invalid CHECK-DAG or CHECK-NOT!");
1117 :
1118 175415 : if (Pat.getCheckTy() == Check::CheckNot) {
1119 75749 : NotStrings.push_back(&Pat);
1120 75749 : continue;
1121 : }
1122 :
1123 : assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
1124 :
1125 : // CHECK-DAG always matches from the start.
1126 99666 : size_t MatchLen = 0, MatchPos = StartPos;
1127 :
1128 : // Search for a match that doesn't overlap a previous match in this
1129 : // CHECK-DAG group.
1130 99666 : for (auto MI = MatchRanges.begin(), ME = MatchRanges.end(); true; ++MI) {
1131 119877 : StringRef MatchBuffer = Buffer.substr(MatchPos);
1132 119877 : size_t MatchPosBuf = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1133 : // With a group of CHECK-DAGs, a single mismatching means the match on
1134 : // that group of CHECK-DAGs fails immediately.
1135 119877 : if (MatchPosBuf == StringRef::npos) {
1136 17 : PrintNoMatch(true, SM, Prefix, Pat.getLoc(), Pat, MatchBuffer,
1137 17 : VariableTable, Req.VerboseVerbose);
1138 17 : return StringRef::npos;
1139 : }
1140 : // Re-calc it as the offset relative to the start of the original string.
1141 119860 : MatchPos += MatchPosBuf;
1142 119860 : if (Req.VerboseVerbose)
1143 3 : PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, Buffer, VariableTable,
1144 : MatchPos, MatchLen, Req);
1145 119860 : MatchRange M{MatchPos, MatchPos + MatchLen};
1146 119860 : if (Req.AllowDeprecatedDagOverlap) {
1147 : // We don't need to track all matches in this mode, so we just maintain
1148 : // one match range that encompasses the current CHECK-DAG group's
1149 : // matches.
1150 29998 : if (MatchRanges.empty())
1151 2627 : MatchRanges.insert(MatchRanges.end(), M);
1152 : else {
1153 : auto Block = MatchRanges.begin();
1154 27371 : Block->Pos = std::min(Block->Pos, M.Pos);
1155 33832 : Block->End = std::max(Block->End, M.End);
1156 : }
1157 99649 : break;
1158 : }
1159 : // Iterate previous matches until overlapping match or insertion point.
1160 : bool Overlap = false;
1161 549628 : for (; MI != ME; ++MI) {
1162 497350 : if (M.Pos < MI->End) {
1163 : // !Overlap => New match has no overlap and is before this old match.
1164 : // Overlap => New match overlaps this old match.
1165 37584 : Overlap = MI->Pos < M.End;
1166 37584 : break;
1167 : }
1168 : }
1169 89862 : if (!Overlap) {
1170 : // Insert non-overlapping match into list.
1171 69651 : MatchRanges.insert(MI, M);
1172 69651 : break;
1173 : }
1174 20211 : if (Req.VerboseVerbose) {
1175 1 : SMLoc OldStart = SMLoc::getFromPointer(Buffer.data() + MI->Pos);
1176 1 : SMLoc OldEnd = SMLoc::getFromPointer(Buffer.data() + MI->End);
1177 : SMRange OldRange(OldStart, OldEnd);
1178 2 : SM.PrintMessage(OldStart, SourceMgr::DK_Note,
1179 : "match discarded, overlaps earlier DAG match here",
1180 : {OldRange});
1181 : }
1182 20211 : MatchPos = MI->End;
1183 : }
1184 99649 : if (!Req.VerboseVerbose)
1185 99647 : PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, Buffer, VariableTable,
1186 : MatchPos, MatchLen, Req);
1187 :
1188 : // Handle the end of a CHECK-DAG group.
1189 99649 : if (std::next(PatItr) == PatEnd ||
1190 82747 : std::next(PatItr)->getCheckTy() == Check::CheckNot) {
1191 17704 : if (!NotStrings.empty()) {
1192 : // If there are CHECK-NOTs between two CHECK-DAGs or from CHECK to
1193 : // CHECK-DAG, verify that there are no 'not' strings occurred in that
1194 : // region.
1195 : StringRef SkippedRegion =
1196 686 : Buffer.slice(StartPos, MatchRanges.begin()->Pos);
1197 686 : if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable, Req))
1198 3 : return StringRef::npos;
1199 : // Clear "not strings".
1200 : NotStrings.clear();
1201 : }
1202 : // All subsequent CHECK-DAGs and CHECK-NOTs should be matched from the
1203 : // end of this CHECK-DAG group's match range.
1204 17701 : StartPos = MatchRanges.rbegin()->End;
1205 : // Don't waste time checking for (impossible) overlaps before that.
1206 : MatchRanges.clear();
1207 : }
1208 : }
1209 :
1210 : return StartPos;
1211 : }
1212 :
1213 : // A check prefix must contain only alphanumeric, hyphens and underscores.
1214 49704 : static bool ValidateCheckPrefix(StringRef CheckPrefix) {
1215 99408 : Regex Validator("^[a-zA-Z0-9_-]*$");
1216 49704 : return Validator.match(CheckPrefix);
1217 : }
1218 :
1219 61142 : bool llvm::FileCheck::ValidateCheckPrefixes() {
1220 : StringSet<> PrefixSet;
1221 :
1222 110844 : for (StringRef Prefix : Req.CheckPrefixes) {
1223 : // Reject empty prefixes.
1224 : if (Prefix == "")
1225 : return false;
1226 :
1227 49705 : if (!PrefixSet.insert(Prefix).second)
1228 : return false;
1229 :
1230 49704 : if (!ValidateCheckPrefix(Prefix))
1231 : return false;
1232 : }
1233 :
1234 : return true;
1235 : }
1236 :
1237 : // Combines the check prefixes into a single regex so that we can efficiently
1238 : // scan for any of the set.
1239 : //
1240 : // The semantics are that the longest-match wins which matches our regex
1241 : // library.
1242 61151 : Regex llvm::FileCheck::buildCheckPrefixRegex() {
1243 : // I don't think there's a way to specify an initial value for cl::list,
1244 : // so if nothing was specified, add the default
1245 61151 : if (Req.CheckPrefixes.empty())
1246 52194 : Req.CheckPrefixes.push_back("CHECK");
1247 :
1248 : // We already validated the contents of CheckPrefixes so just concatenate
1249 : // them as alternatives.
1250 : SmallString<32> PrefixRegexStr;
1251 136948 : for (StringRef Prefix : Req.CheckPrefixes) {
1252 : if (Prefix != Req.CheckPrefixes.front())
1253 14646 : PrefixRegexStr.push_back('|');
1254 :
1255 : PrefixRegexStr.append(Prefix);
1256 : }
1257 :
1258 61151 : return Regex(PrefixRegexStr);
1259 : }
1260 :
1261 : // Remove local variables from \p VariableTable. Global variables
1262 : // (start with '$') are preserved.
1263 9136 : static void ClearLocalVars(StringMap<StringRef> &VariableTable) {
1264 : SmallVector<StringRef, 16> LocalVars;
1265 29712 : for (const auto &Var : VariableTable)
1266 11440 : if (Var.first()[0] != '$')
1267 11299 : LocalVars.push_back(Var.first());
1268 :
1269 20435 : for (const auto &Var : LocalVars)
1270 11299 : VariableTable.erase(Var);
1271 9136 : }
1272 :
1273 : /// Check the input to FileCheck provided in the \p Buffer against the \p
1274 : /// CheckStrings read from the check file.
1275 : ///
1276 : /// Returns false if the input fails to satisfy the checks.
1277 61111 : bool llvm::FileCheck::CheckInput(SourceMgr &SM, StringRef Buffer,
1278 : ArrayRef<FileCheckString> CheckStrings) {
1279 : bool ChecksFailed = false;
1280 :
1281 : /// VariableTable - This holds all the current filecheck variables.
1282 61111 : StringMap<StringRef> VariableTable;
1283 :
1284 61146 : for (const auto& Def : Req.GlobalDefines)
1285 35 : VariableTable.insert(StringRef(Def).split('='));
1286 :
1287 61111 : unsigned i = 0, j = 0, e = CheckStrings.size();
1288 : while (true) {
1289 2624591 : StringRef CheckRegion;
1290 2624591 : if (j == e) {
1291 60290 : CheckRegion = Buffer;
1292 : } else {
1293 2564301 : const FileCheckString &CheckLabelStr = CheckStrings[j];
1294 2564301 : if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) {
1295 2330151 : ++j;
1296 2330151 : continue;
1297 : }
1298 :
1299 : // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
1300 234150 : size_t MatchLabelLen = 0;
1301 : size_t MatchLabelPos =
1302 234150 : CheckLabelStr.Check(SM, Buffer, true, MatchLabelLen, VariableTable,
1303 234150 : Req);
1304 234150 : if (MatchLabelPos == StringRef::npos)
1305 : // Immediately bail of CHECK-LABEL fails, nothing else we can do.
1306 4 : return false;
1307 :
1308 234146 : CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
1309 234146 : Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
1310 234146 : ++j;
1311 : }
1312 :
1313 294436 : if (Req.EnableVarScope)
1314 9136 : ClearLocalVars(VariableTable);
1315 :
1316 2857069 : for (; i != j; ++i) {
1317 2562928 : const FileCheckString &CheckStr = CheckStrings[i];
1318 :
1319 : // Check each string within the scanned region, including a second check
1320 : // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
1321 2562928 : size_t MatchLen = 0;
1322 : size_t MatchPos =
1323 2562928 : CheckStr.Check(SM, CheckRegion, false, MatchLen, VariableTable, Req);
1324 :
1325 2562928 : if (MatchPos == StringRef::npos) {
1326 : ChecksFailed = true;
1327 : i = j;
1328 295 : break;
1329 : }
1330 :
1331 5125266 : CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
1332 : }
1333 :
1334 294436 : if (j == e)
1335 : break;
1336 : }
1337 :
1338 : // Success if no checks failed.
1339 61107 : return !ChecksFailed;
1340 : }
|