Line data Source code
1 : //===- StringMatcher.cpp - Generate a matcher for input strings -----------===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : // This file implements the StringMatcher class.
11 : //
12 : //===----------------------------------------------------------------------===//
13 :
14 : #include "llvm/TableGen/StringMatcher.h"
15 : #include "llvm/ADT/StringRef.h"
16 : #include "llvm/Support/raw_ostream.h"
17 : #include <cassert>
18 : #include <map>
19 : #include <string>
20 : #include <utility>
21 : #include <vector>
22 :
23 : using namespace llvm;
24 :
25 : /// FindFirstNonCommonLetter - Find the first character in the keys of the
26 : /// string pairs that is not shared across the whole set of strings. All
27 : /// strings are assumed to have the same length.
28 : static unsigned
29 3269 : FindFirstNonCommonLetter(const std::vector<const
30 : StringMatcher::StringPair*> &Matches) {
31 : assert(!Matches.empty());
32 40618 : for (unsigned i = 0, e = Matches[0]->first.size(); i != e; ++i) {
33 : // Check to see if letter i is the same across the set.
34 38023 : char Letter = Matches[0]->first[i];
35 :
36 130254 : for (unsigned str = 0, e = Matches.size(); str != e; ++str)
37 164646 : if (Matches[str]->first[i] != Letter)
38 674 : return i;
39 : }
40 :
41 : return Matches[0]->first.size();
42 : }
43 :
44 : /// EmitStringMatcherForChar - Given a set of strings that are known to be the
45 : /// same length and whose characters leading up to CharNo are the same, emit
46 : /// code to verify that CharNo and later are the same.
47 : ///
48 : /// \return - True if control can leave the emitted code fragment.
49 8854 : bool StringMatcher::EmitStringMatcherForChar(
50 : const std::vector<const StringPair *> &Matches, unsigned CharNo,
51 : unsigned IndentCount, bool IgnoreDuplicates) const {
52 : assert(!Matches.empty() && "Must have at least one string to match!");
53 8854 : std::string Indent(IndentCount * 2 + 4, ' ');
54 :
55 : // If we have verified that the entire string matches, we're done: output the
56 : // matching code.
57 8854 : if (CharNo == Matches[0]->first.size()) {
58 7954 : if (Matches.size() > 1 && !IgnoreDuplicates)
59 0 : report_fatal_error("Had duplicate keys to match on");
60 :
61 : // If the to-execute code has \n's in it, indent each subsequent line.
62 : StringRef Code = Matches[0]->second;
63 :
64 3977 : std::pair<StringRef, StringRef> Split = Code.split('\n');
65 3977 : OS << Indent << Split.first << "\t // \"" << Matches[0]->first << "\"\n";
66 :
67 3977 : Code = Split.second;
68 5596 : while (!Code.empty()) {
69 1619 : Split = Code.split('\n');
70 1619 : OS << Indent << Split.first << "\n";
71 1619 : Code = Split.second;
72 : }
73 : return false;
74 : }
75 :
76 : // Bucket the matches by the character we are comparing.
77 : std::map<char, std::vector<const StringPair*>> MatchesByLetter;
78 :
79 26929 : for (unsigned i = 0, e = Matches.size(); i != e; ++i)
80 51525 : MatchesByLetter[Matches[i]->first[CharNo]].push_back(Matches[i]);
81 :
82 :
83 : // If we have exactly one bucket to match, see how many characters are common
84 : // across the whole set and match all of them at once.
85 4877 : if (MatchesByLetter.size() == 1) {
86 3269 : unsigned FirstNonCommonLetter = FindFirstNonCommonLetter(Matches);
87 3269 : unsigned NumChars = FirstNonCommonLetter-CharNo;
88 :
89 : // Emit code to break out if the prefix doesn't match.
90 3269 : if (NumChars == 1) {
91 : // Do the comparison with if (Str[1] != 'f')
92 : // FIXME: Need to escape general characters.
93 896 : OS << Indent << "if (" << StrVariableName << "[" << CharNo << "] != '"
94 1344 : << Matches[0]->first[CharNo] << "')\n";
95 448 : OS << Indent << " break;\n";
96 : } else {
97 : // Do the comparison with if memcmp(Str.data()+1, "foo", 3).
98 : // FIXME: Need to escape general strings.
99 2821 : OS << Indent << "if (memcmp(" << StrVariableName << ".data()+" << CharNo
100 5642 : << ", \"" << Matches[0]->first.substr(CharNo, NumChars) << "\", "
101 2821 : << NumChars << ") != 0)\n";
102 2821 : OS << Indent << " break;\n";
103 : }
104 :
105 3269 : return EmitStringMatcherForChar(Matches, FirstNonCommonLetter, IndentCount,
106 3269 : IgnoreDuplicates);
107 : }
108 :
109 : // Otherwise, we have multiple possible things, emit a switch on the
110 : // character.
111 3216 : OS << Indent << "switch (" << StrVariableName << "[" << CharNo << "]) {\n";
112 1608 : OS << Indent << "default: break;\n";
113 :
114 : for (std::map<char, std::vector<const StringPair*>>::iterator LI =
115 6853 : MatchesByLetter.begin(), E = MatchesByLetter.end(); LI != E; ++LI) {
116 : // TODO: escape hard stuff (like \n) if we ever care about it.
117 10490 : OS << Indent << "case '" << LI->first << "':\t // "
118 10490 : << LI->second.size() << " string";
119 10490 : if (LI->second.size() != 1) OS << 's';
120 5245 : OS << " to match.\n";
121 5245 : if (EmitStringMatcherForChar(LI->second, CharNo + 1, IndentCount + 1,
122 : IgnoreDuplicates))
123 1349 : OS << Indent << " break;\n";
124 : }
125 :
126 1608 : OS << Indent << "}\n";
127 1608 : return true;
128 : }
129 :
130 : /// Emit - Top level entry point.
131 : ///
132 50 : void StringMatcher::Emit(unsigned Indent, bool IgnoreDuplicates) const {
133 : // If nothing to match, just fall through.
134 100 : if (Matches.empty()) return;
135 :
136 : // First level categorization: group strings by length.
137 : std::map<unsigned, std::vector<const StringPair*>> MatchesByLength;
138 :
139 4031 : for (unsigned i = 0, e = Matches.size(); i != e; ++i)
140 7970 : MatchesByLength[Matches[i].first.size()].push_back(&Matches[i]);
141 :
142 : // Output a switch statement on length and categorize the elements within each
143 : // bin.
144 46 : OS.indent(Indent*2+2) << "switch (" << StrVariableName << ".size()) {\n";
145 46 : OS.indent(Indent*2+2) << "default: break;\n";
146 :
147 : for (std::map<unsigned, std::vector<const StringPair*>>::iterator LI =
148 386 : MatchesByLength.begin(), E = MatchesByLength.end(); LI != E; ++LI) {
149 340 : OS.indent(Indent*2+2) << "case " << LI->first << ":\t // "
150 680 : << LI->second.size()
151 599 : << " string" << (LI->second.size() == 1 ? "" : "s") << " to match.\n";
152 340 : if (EmitStringMatcherForChar(LI->second, 0, Indent, IgnoreDuplicates))
153 259 : OS.indent(Indent*2+4) << "break;\n";
154 : }
155 :
156 46 : OS.indent(Indent*2+2) << "}\n";
157 : }
|