LCOV - code coverage report
Current view: top level - lib/TableGen - TGLexer.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 192 221 86.9 %
Date: 2018-07-13 00:08:38 Functions: 14 15 93.3 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- TGLexer.cpp - Lexer for TableGen -----------------------------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // Implement the Lexer for TableGen.
      11             : //
      12             : //===----------------------------------------------------------------------===//
      13             : 
      14             : #include "TGLexer.h"
      15             : #include "llvm/ADT/StringSwitch.h"
      16             : #include "llvm/ADT/Twine.h"
      17             : #include "llvm/Config/config.h" // for strtoull()/strtoll() define
      18             : #include "llvm/Support/Compiler.h"
      19             : #include "llvm/Support/MemoryBuffer.h"
      20             : #include "llvm/Support/SourceMgr.h"
      21             : #include "llvm/TableGen/Error.h"
      22             : #include <cctype>
      23             : #include <cerrno>
      24             : #include <cstdint>
      25             : #include <cstdio>
      26             : #include <cstdlib>
      27             : #include <cstring>
      28             : 
      29             : using namespace llvm;
      30             : 
      31         348 : TGLexer::TGLexer(SourceMgr &SM) : SrcMgr(SM) {
      32         348 :   CurBuffer = SrcMgr.getMainFileID();
      33         348 :   CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
      34         348 :   CurPtr = CurBuf.begin();
      35         348 :   TokStart = nullptr;
      36         348 : }
      37             : 
      38    21371156 : SMLoc TGLexer::getLoc() const {
      39    21371156 :   return SMLoc::getFromPointer(TokStart);
      40             : }
      41             : 
      42             : /// ReturnError - Set the error to the specified string at the specified
      43             : /// location.  This is defined to always return tgtok::Error.
      44           0 : tgtok::TokKind TGLexer::ReturnError(const char *Loc, const Twine &Msg) {
      45           0 :   PrintError(Loc, Msg);
      46           0 :   return tgtok::Error;
      47             : }
      48             : 
      49   135045458 : int TGLexer::getNextChar() {
      50   135053633 :   char CurChar = *CurPtr++;
      51   135053633 :   switch (CurChar) {
      52   124975691 :   default:
      53   124975691 :     return (unsigned char)CurChar;
      54        8519 :   case 0: {
      55             :     // A nul character in the stream is either the end of the current buffer or
      56             :     // a random nul in the file.  Disambiguate that here.
      57        8519 :     if (CurPtr-1 != CurBuf.end())
      58             :       return 0;  // Just whitespace.
      59             : 
      60             :     // If this is the end of an included file, pop the parent file off the
      61             :     // include stack.
      62        8519 :     SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
      63        8519 :     if (ParentIncludeLoc != SMLoc()) {
      64        8175 :       CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc);
      65       16350 :       CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
      66        8175 :       CurPtr = ParentIncludeLoc.getPointer();
      67        8175 :       return getNextChar();
      68             :     }
      69             : 
      70             :     // Otherwise, return end of file.
      71         344 :     --CurPtr;  // Another call to lex will return EOF again.
      72         344 :     return EOF;
      73             :   }
      74    10069423 :   case '\n':
      75             :   case '\r':
      76             :     // Handle the newline character by ignoring it and incrementing the line
      77             :     // count.  However, be careful about 'dos style' files with \n\r in them.
      78             :     // Only treat a \n\r or \r\n as a single line.
      79    10069423 :     if ((*CurPtr == '\n' || (*CurPtr == '\r')) &&
      80             :         *CurPtr != CurChar)
      81           0 :       ++CurPtr;  // Eat the two char newline sequence.
      82             :     return '\n';
      83             :   }
      84             : }
      85             : 
      86     2571761 : int TGLexer::peekNextChar(int Index) {
      87     2571761 :   return *(CurPtr + Index);
      88             : }
      89             : 
      90   131327195 : tgtok::TokKind TGLexer::LexToken() {
      91   131327195 :   TokStart = CurPtr;
      92             :   // This always consumes at least one character.
      93   131327195 :   int CurChar = getNextChar();
      94             : 
      95   131327195 :   switch (CurChar) {
      96    19077455 :   default:
      97             :     // Handle letters: [a-zA-Z_]
      98    19077455 :     if (isalpha(CurChar) || CurChar == '_')
      99    19077455 :       return LexIdentifier();
     100             : 
     101             :     // Unknown character, emit an error.
     102           0 :     return ReturnError(TokStart, "Unexpected character");
     103             :   case EOF: return tgtok::Eof;
     104     3408150 :   case ':': return tgtok::colon;
     105     3060192 :   case ';': return tgtok::semi;
     106      114173 :   case '.': return tgtok::period;
     107     8032284 :   case ',': return tgtok::comma;
     108     3534533 :   case '<': return tgtok::less;
     109     3534533 :   case '>': return tgtok::greater;
     110     2961649 :   case ']': return tgtok::r_square;
     111      578785 :   case '{': return tgtok::l_brace;
     112      578785 :   case '}': return tgtok::r_brace;
     113     1166334 :   case '(': return tgtok::l_paren;
     114     1166334 :   case ')': return tgtok::r_paren;
     115      951828 :   case '=': return tgtok::equal;
     116       24269 :   case '?': return tgtok::question;
     117      107609 :   case '#': return tgtok::paste;
     118             : 
     119    72662162 :   case 0:
     120             :   case ' ':
     121             :   case '\t':
     122             :   case '\n':
     123             :   case '\r':
     124             :     // Ignore whitespace.
     125    72662162 :     return LexToken();
     126     2179591 :   case '/':
     127             :     // If this is the start of a // comment, skip until the end of the line or
     128             :     // the end of the buffer.
     129     2179591 :     if (*CurPtr == '/')
     130     2166836 :       SkipBCPLComment();
     131       12755 :     else if (*CurPtr == '*') {
     132       12755 :       if (SkipCComment())
     133             :         return tgtok::Error;
     134             :     } else // Otherwise, this is an error.
     135           0 :       return ReturnError(TokStart, "Unexpected character");
     136     2179590 :     return LexToken();
     137     2064751 :   case '-': case '+':
     138             :   case '0': case '1': case '2': case '3': case '4': case '5': case '6':
     139             :   case '7': case '8': case '9': {
     140             :     int NextChar = 0;
     141     2064751 :     if (isdigit(CurChar)) {
     142             :       // Allow identifiers to start with a number if it is followed by
     143             :       // an identifier.  This can happen with paste operations like
     144             :       // foo#8i.
     145             :       int i = 0;
     146             :       do {
     147     2212993 :         NextChar = peekNextChar(i++);
     148     2212993 :       } while (isdigit(NextChar));
     149             : 
     150     1857104 :       if (NextChar == 'x' || NextChar == 'b') {
     151             :         // If this is [0-9]b[01] or [0-9]x[0-9A-fa-f] this is most
     152             :         // likely a number.
     153      358768 :         int NextNextChar = peekNextChar(i);
     154             :         switch (NextNextChar) {
     155             :         default:
     156             :           break;
     157      276999 :         case '0': case '1':
     158      276999 :           if (NextChar == 'b')
     159      250545 :             return LexNumber();
     160             :           LLVM_FALLTHROUGH;
     161             :         case '2': case '3': case '4': case '5':
     162             :         case '6': case '7': case '8': case '9':
     163             :         case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
     164             :         case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
     165      108223 :           if (NextChar == 'x')
     166      108223 :             return LexNumber();
     167             :           break;
     168             :         }
     169             :       }
     170             :     }
     171             : 
     172     1705983 :     if (isalpha(NextChar) || NextChar == '_')
     173        2119 :       return LexIdentifier();
     174             : 
     175     1703864 :     return LexNumber();
     176             :   }
     177     1999420 :   case '"': return LexString();
     178     1059051 :   case '$': return LexVarName();
     179     2981380 :   case '[': return LexBracket();
     180       83584 :   case '!': return LexExclaim();
     181             :   }
     182             : }
     183             : 
     184             : /// LexString - Lex "[^"]*"
     185     1999420 : tgtok::TokKind TGLexer::LexString() {
     186     1999420 :   const char *StrStart = CurPtr;
     187             : 
     188     1999420 :   CurStrVal = "";
     189             : 
     190    40843658 :   while (*CurPtr != '"') {
     191             :     // If we hit the end of the buffer, report an error.
     192    38844238 :     if (*CurPtr == 0 && CurPtr == CurBuf.end())
     193           0 :       return ReturnError(StrStart, "End of file in string literal");
     194             : 
     195    38844238 :     if (*CurPtr == '\n' || *CurPtr == '\r')
     196           0 :       return ReturnError(StrStart, "End of line in string literal");
     197             : 
     198    77610272 :     if (*CurPtr != '\\') {
     199    38766034 :       CurStrVal += *CurPtr++;
     200    38766034 :       continue;
     201             :     }
     202             : 
     203       78204 :     ++CurPtr;
     204             : 
     205       78204 :     switch (*CurPtr) {
     206       13646 :     case '\\': case '\'': case '"':
     207             :       // These turn into their literal character.
     208       13646 :       CurStrVal += *CurPtr++;
     209             :       break;
     210       63756 :     case 't':
     211             :       CurStrVal += '\t';
     212       63756 :       ++CurPtr;
     213       63756 :       break;
     214         802 :     case 'n':
     215             :       CurStrVal += '\n';
     216         802 :       ++CurPtr;
     217         802 :       break;
     218             : 
     219             :     case '\n':
     220             :     case '\r':
     221           0 :       return ReturnError(CurPtr, "escaped newlines not supported in tblgen");
     222             : 
     223             :     // If we hit the end of the buffer, report an error.
     224           0 :     case '\0':
     225           0 :       if (CurPtr == CurBuf.end())
     226           0 :         return ReturnError(StrStart, "End of file in string literal");
     227             :       LLVM_FALLTHROUGH;
     228             :     default:
     229           0 :       return ReturnError(CurPtr, "invalid escape in string literal");
     230             :     }
     231             :   }
     232             : 
     233     1999420 :   ++CurPtr;
     234     1999420 :   return tgtok::StrVal;
     235             : }
     236             : 
     237     1059051 : tgtok::TokKind TGLexer::LexVarName() {
     238     1059051 :   if (!isalpha(CurPtr[0]) && CurPtr[0] != '_')
     239           0 :     return ReturnError(TokStart, "Invalid variable name");
     240             : 
     241             :   // Otherwise, we're ok, consume the rest of the characters.
     242     1059051 :   const char *VarNameStart = CurPtr++;
     243             : 
     244     5484429 :   while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')
     245     2212689 :     ++CurPtr;
     246             : 
     247     1059051 :   CurStrVal.assign(VarNameStart, CurPtr);
     248     1059051 :   return tgtok::VarName;
     249             : }
     250             : 
     251    19079574 : tgtok::TokKind TGLexer::LexIdentifier() {
     252             :   // The first letter is [a-zA-Z_#].
     253    19079574 :   const char *IdentStart = TokStart;
     254             : 
     255             :   // Match the rest of the identifier regex: [0-9a-zA-Z_#]*
     256   343646096 :   while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')
     257   162283261 :     ++CurPtr;
     258             : 
     259             :   // Check to see if this identifier is a keyword.
     260    19079574 :   StringRef Str(IdentStart, CurPtr-IdentStart);
     261             : 
     262             :   if (Str == "include") {
     263        8175 :     if (LexInclude()) return tgtok::Error;
     264        8175 :     return Lex();
     265             :   }
     266             : 
     267    19071399 :   tgtok::TokKind Kind = StringSwitch<tgtok::TokKind>(Str)
     268             :     .Case("int", tgtok::Int)
     269             :     .Case("bit", tgtok::Bit)
     270             :     .Case("bits", tgtok::Bits)
     271             :     .Case("string", tgtok::String)
     272             :     .Case("list", tgtok::List)
     273             :     .Case("code", tgtok::Code)
     274             :     .Case("dag", tgtok::Dag)
     275             :     .Case("class", tgtok::Class)
     276             :     .Case("def", tgtok::Def)
     277             :     .Case("foreach", tgtok::Foreach)
     278             :     .Case("defm", tgtok::Defm)
     279             :     .Case("defset", tgtok::Defset)
     280             :     .Case("multiclass", tgtok::MultiClass)
     281             :     .Case("field", tgtok::Field)
     282             :     .Case("let", tgtok::Let)
     283             :     .Case("in", tgtok::In)
     284             :     .Default(tgtok::Id);
     285             : 
     286     3792539 :   if (Kind == tgtok::Id)
     287    15278860 :     CurStrVal.assign(Str.begin(), Str.end());
     288             :   return Kind;
     289             : }
     290             : 
     291             : /// LexInclude - We just read the "include" token.  Get the string token that
     292             : /// comes next and enter the include.
     293        8175 : bool TGLexer::LexInclude() {
     294             :   // The token after the include must be a string.
     295        8175 :   tgtok::TokKind Tok = LexToken();
     296        8175 :   if (Tok == tgtok::Error) return true;
     297        8175 :   if (Tok != tgtok::StrVal) {
     298           0 :     PrintError(getLoc(), "Expected filename after include");
     299           0 :     return true;
     300             :   }
     301             : 
     302             :   // Get the string.
     303             :   std::string Filename = CurStrVal;
     304             :   std::string IncludedFile;
     305             : 
     306        8175 :   CurBuffer = SrcMgr.AddIncludeFile(Filename, SMLoc::getFromPointer(CurPtr),
     307             :                                     IncludedFile);
     308        8175 :   if (!CurBuffer) {
     309           0 :     PrintError(getLoc(), "Could not find include file '" + Filename + "'");
     310           0 :     return true;
     311             :   }
     312             : 
     313             :   DependenciesMapTy::const_iterator Found = Dependencies.find(IncludedFile);
     314        8175 :   if (Found != Dependencies.end()) {
     315           0 :     PrintError(getLoc(),
     316           0 :                "File '" + IncludedFile + "' has already been included.");
     317           0 :     SrcMgr.PrintMessage(Found->second, SourceMgr::DK_Note,
     318             :                         "previously included here");
     319           0 :     return true;
     320             :   }
     321       16350 :   Dependencies.insert(std::make_pair(IncludedFile, getLoc()));
     322             :   // Save the line number and lex buffer of the includer.
     323       16350 :   CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
     324        8175 :   CurPtr = CurBuf.begin();
     325        8175 :   return false;
     326             : }
     327             : 
     328     2166836 : void TGLexer::SkipBCPLComment() {
     329     2166836 :   ++CurPtr;  // skip the second slash.
     330    69400450 :   while (true) {
     331    71567286 :     switch (*CurPtr) {
     332             :     case '\n':
     333             :     case '\r':
     334             :       return;  // Newline is end of comment.
     335           1 :     case 0:
     336             :       // If this is the end of the buffer, end the comment.
     337           1 :       if (CurPtr == CurBuf.end())
     338             :         return;
     339             :       break;
     340             :     }
     341             :     // Otherwise, skip the character.
     342    69400450 :     ++CurPtr;
     343             :   }
     344             : }
     345             : 
     346             : /// SkipCComment - This skips C-style /**/ comments.  The only difference from C
     347             : /// is that we allow nesting.
     348       12755 : bool TGLexer::SkipCComment() {
     349       12755 :   ++CurPtr;  // skip the star.
     350             :   unsigned CommentDepth = 1;
     351             : 
     352             :   while (true) {
     353      261444 :     int CurChar = getNextChar();
     354      261444 :     switch (CurChar) {
     355             :     case EOF:
     356           1 :       PrintError(TokStart, "Unterminated comment!");
     357           1 :       return true;
     358       18753 :     case '*':
     359             :       // End of the comment?
     360       18753 :       if (CurPtr[0] != '/') break;
     361             : 
     362       12757 :       ++CurPtr;   // End the */.
     363       12757 :       if (--CommentDepth == 0)
     364             :         return false;
     365             :       break;
     366         128 :     case '/':
     367             :       // Start of a nested comment?
     368         128 :       if (CurPtr[0] != '*') break;
     369           4 :       ++CurPtr;
     370           4 :       ++CommentDepth;
     371           4 :       break;
     372             :     }
     373             :   }
     374             : }
     375             : 
     376             : /// LexNumber - Lex:
     377             : ///    [-+]?[0-9]+
     378             : ///    0x[0-9a-fA-F]+
     379             : ///    0b[01]+
     380     2062632 : tgtok::TokKind TGLexer::LexNumber() {
     381     2062632 :   if (CurPtr[-1] == '0') {
     382      698958 :     if (CurPtr[0] == 'x') {
     383      108223 :       ++CurPtr;
     384             :       const char *NumStart = CurPtr;
     385      634499 :       while (isxdigit(CurPtr[0]))
     386      263138 :         ++CurPtr;
     387             : 
     388             :       // Requires at least one hex digit.
     389      108223 :       if (CurPtr == NumStart)
     390           0 :         return ReturnError(TokStart, "Invalid hexadecimal number");
     391             : 
     392      108223 :       errno = 0;
     393      108223 :       CurIntVal = strtoll(NumStart, nullptr, 16);
     394      108223 :       if (errno == EINVAL)
     395           0 :         return ReturnError(TokStart, "Invalid hexadecimal number");
     396      108223 :       if (errno == ERANGE) {
     397          94 :         errno = 0;
     398          94 :         CurIntVal = (int64_t)strtoull(NumStart, nullptr, 16);
     399          94 :         if (errno == EINVAL)
     400           0 :           return ReturnError(TokStart, "Invalid hexadecimal number");
     401          94 :         if (errno == ERANGE)
     402           0 :           return ReturnError(TokStart, "Hexadecimal number out of range");
     403             :       }
     404             :       return tgtok::IntVal;
     405      590735 :     } else if (CurPtr[0] == 'b') {
     406      250545 :       ++CurPtr;
     407             :       const char *NumStart = CurPtr;
     408     2331529 :       while (CurPtr[0] == '0' || CurPtr[0] == '1')
     409     1040492 :         ++CurPtr;
     410             : 
     411             :       // Requires at least one binary digit.
     412      250545 :       if (CurPtr == NumStart)
     413           0 :         return ReturnError(CurPtr-2, "Invalid binary number");
     414      250545 :       CurIntVal = strtoll(NumStart, nullptr, 2);
     415      250545 :       return tgtok::BinaryIntVal;
     416             :     }
     417             :   }
     418             : 
     419             :   // Check for a sign without a digit.
     420     1703864 :   if (!isdigit(CurPtr[0])) {
     421     1182463 :     if (CurPtr[-1] == '-')
     422             :       return tgtok::minus;
     423     1182013 :     else if (CurPtr[-1] == '+')
     424             :       return tgtok::plus;
     425             :   }
     426             : 
     427     3039372 :   while (isdigit(CurPtr[0]))
     428      667979 :     ++CurPtr;
     429     1703414 :   CurIntVal = strtoll(TokStart, nullptr, 10);
     430     1703414 :   return tgtok::IntVal;
     431             : }
     432             : 
     433             : /// LexBracket - We just read '['.  If this is a code block, return it,
     434             : /// otherwise return the bracket.  Match: '[' and '[{ ( [^}]+ | }[^]] )* }]'
     435     2981380 : tgtok::TokKind TGLexer::LexBracket() {
     436     2981380 :   if (CurPtr[0] != '{')
     437             :     return tgtok::l_square;
     438       19731 :   ++CurPtr;
     439             :   const char *CodeStart = CurPtr;
     440             :   while (true) {
     441     3434166 :     int Char = getNextChar();
     442     3434166 :     if (Char == EOF) break;
     443             : 
     444     3434166 :     if (Char != '}') continue;
     445             : 
     446       22653 :     Char = getNextChar();
     447       22653 :     if (Char == EOF) break;
     448       22653 :     if (Char == ']') {
     449       19731 :       CurStrVal.assign(CodeStart, CurPtr-2);
     450       19731 :       return tgtok::CodeFragment;
     451             :     }
     452             :   }
     453             : 
     454           0 :   return ReturnError(CodeStart-2, "Unterminated Code Block");
     455             : }
     456             : 
     457             : /// LexExclaim - Lex '!' and '![a-zA-Z]+'.
     458       83584 : tgtok::TokKind TGLexer::LexExclaim() {
     459       83584 :   if (!isalpha(*CurPtr))
     460           0 :     return ReturnError(CurPtr - 1, "Invalid \"!operator\"");
     461             : 
     462       83584 :   const char *Start = CurPtr++;
     463      771172 :   while (isalpha(*CurPtr))
     464      343794 :     ++CurPtr;
     465             : 
     466             :   // Check to see which operator this is.
     467             :   tgtok::TokKind Kind =
     468      167168 :     StringSwitch<tgtok::TokKind>(StringRef(Start, CurPtr - Start))
     469             :     .Case("eq", tgtok::XEq)
     470             :     .Case("ne", tgtok::XNe)
     471             :     .Case("le", tgtok::XLe)
     472             :     .Case("lt", tgtok::XLt)
     473             :     .Case("ge", tgtok::XGe)
     474             :     .Case("gt", tgtok::XGt)
     475             :     .Case("if", tgtok::XIf)
     476             :     .Case("isa", tgtok::XIsA)
     477             :     .Case("head", tgtok::XHead)
     478             :     .Case("tail", tgtok::XTail)
     479             :     .Case("size", tgtok::XSize)
     480             :     .Case("con", tgtok::XConcat)
     481             :     .Case("dag", tgtok::XDag)
     482             :     .Case("add", tgtok::XADD)
     483             :     .Case("and", tgtok::XAND)
     484             :     .Case("or", tgtok::XOR)
     485             :     .Case("shl", tgtok::XSHL)
     486             :     .Case("sra", tgtok::XSRA)
     487             :     .Case("srl", tgtok::XSRL)
     488             :     .Case("cast", tgtok::XCast)
     489             :     .Case("empty", tgtok::XEmpty)
     490             :     .Case("subst", tgtok::XSubst)
     491             :     .Case("foldl", tgtok::XFoldl)
     492             :     .Case("foreach", tgtok::XForEach)
     493             :     .Case("listconcat", tgtok::XListConcat)
     494             :     .Case("strconcat", tgtok::XStrConcat)
     495             :     .Default(tgtok::Error);
     496             : 
     497       83584 :   return Kind != tgtok::Error ? Kind : ReturnError(Start-1, "Unknown operator");
     498             : }

Generated by: LCOV version 1.13