LCOV - code coverage report
Current view: top level - lib/TableGen - TGLexer.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 239 268 89.2 %
Date: 2017-09-14 15:23:50 Functions: 14 15 93.3 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- TGLexer.cpp - Lexer for TableGen -----------------------------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // Implement the Lexer for TableGen.
      11             : //
      12             : //===----------------------------------------------------------------------===//
      13             : 
      14             : #include "TGLexer.h"
      15             : #include "llvm/ADT/StringSwitch.h"
      16             : #include "llvm/ADT/Twine.h"
      17             : #include "llvm/Config/config.h" // for strtoull()/strtoll() define
      18             : #include "llvm/Support/Compiler.h"
      19             : #include "llvm/Support/MemoryBuffer.h"
      20             : #include "llvm/Support/SourceMgr.h"
      21             : #include "llvm/TableGen/Error.h"
      22             : #include <cctype>
      23             : #include <cerrno>
      24             : #include <cstdint>
      25             : #include <cstdio>
      26             : #include <cstdlib>
      27             : #include <cstring>
      28             : 
      29             : using namespace llvm;
      30             : 
      31        1100 : TGLexer::TGLexer(SourceMgr &SM) : SrcMgr(SM) {
      32         275 :   CurBuffer = SrcMgr.getMainFileID();
      33         825 :   CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
      34         275 :   CurPtr = CurBuf.begin();
      35         275 :   TokStart = nullptr;
      36         275 : }
      37             : 
      38    17998873 : SMLoc TGLexer::getLoc() const {
      39    17998873 :   return SMLoc::getFromPointer(TokStart);
      40             : }
      41             : 
      42             : /// ReturnError - Set the error to the specified string at the specified
      43             : /// location.  This is defined to always return tgtok::Error.
      44           0 : tgtok::TokKind TGLexer::ReturnError(const char *Loc, const Twine &Msg) {
      45           0 :   PrintError(Loc, Msg);
      46           0 :   return tgtok::Error;
      47             : }
      48             : 
      49   112242831 : int TGLexer::getNextChar() {
      50   112249218 :   char CurChar = *CurPtr++;
      51   112249218 :   switch (CurChar) {
      52   104093662 :   default:
      53   104093662 :     return (unsigned char)CurChar;
      54        6662 :   case 0: {
      55             :     // A nul character in the stream is either the end of the current buffer or
      56             :     // a random nul in the file.  Disambiguate that here.
      57       13324 :     if (CurPtr-1 != CurBuf.end())
      58             :       return 0;  // Just whitespace.
      59             :     
      60             :     // If this is the end of an included file, pop the parent file off the
      61             :     // include stack.
      62       13324 :     SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
      63        6662 :     if (ParentIncludeLoc != SMLoc()) {
      64        6387 :       CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc);
      65       19161 :       CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
      66        6387 :       CurPtr = ParentIncludeLoc.getPointer();
      67        6387 :       return getNextChar();
      68             :     }
      69             :     
      70             :     // Otherwise, return end of file.
      71         275 :     --CurPtr;  // Another call to lex will return EOF again.  
      72         275 :     return EOF;
      73             :   }
      74     8148894 :   case '\n':
      75             :   case '\r':
      76             :     // Handle the newline character by ignoring it and incrementing the line
      77             :     // count.  However, be careful about 'dos style' files with \n\r in them.
      78             :     // Only treat a \n\r or \r\n as a single line.
      79     8148894 :     if ((*CurPtr == '\n' || (*CurPtr == '\r')) &&
      80             :         *CurPtr != CurChar)
      81       11947 :       ++CurPtr;  // Eat the two char newline sequence.
      82             :     return '\n';
      83             :   }  
      84             : }
      85             : 
      86     2155914 : int TGLexer::peekNextChar(int Index) {
      87     2155914 :   return *(CurPtr + Index);
      88             : }
      89             : 
      90   108083479 : tgtok::TokKind TGLexer::LexToken() {
      91   108083479 :   TokStart = CurPtr;
      92             :   // This always consumes at least one character.
      93   108083479 :   int CurChar = getNextChar();
      94             : 
      95   108083479 :   switch (CurChar) {
      96    15853696 :   default:
      97             :     // Handle letters: [a-zA-Z_]
      98    15853696 :     if (isalpha(CurChar) || CurChar == '_')
      99    15853696 :       return LexIdentifier();
     100             : 
     101             :     // Unknown character, emit an error.
     102           0 :     return ReturnError(TokStart, "Unexpected character");
     103             :   case EOF: return tgtok::Eof;
     104     2903530 :   case ':': return tgtok::colon;
     105     2536945 :   case ';': return tgtok::semi;
     106       74607 :   case '.': return tgtok::period;
     107     6729793 :   case ',': return tgtok::comma;
     108     2901441 :   case '<': return tgtok::less;
     109     2901441 :   case '>': return tgtok::greater;
     110     2436854 :   case ']': return tgtok::r_square;
     111      480536 :   case '{': return tgtok::l_brace;
     112      480536 :   case '}': return tgtok::r_brace;
     113     1078299 :   case '(': return tgtok::l_paren;
     114     1078299 :   case ')': return tgtok::r_paren;
     115      788389 :   case '=': return tgtok::equal;
     116       17801 :   case '?': return tgtok::question;
     117       79937 :   case '#': return tgtok::paste;
     118             :       
     119    59203955 :   case 0:
     120             :   case ' ':
     121             :   case '\t':
     122             :   case '\n':
     123             :   case '\r':
     124             :     // Ignore whitespace.
     125    59203955 :     return LexToken();
     126     1642223 :   case '/':
     127             :     // If this is the start of a // comment, skip until the end of the line or
     128             :     // the end of the buffer.
     129     1642223 :     if (*CurPtr == '/')
     130     1633274 :       SkipBCPLComment();
     131        8949 :     else if (*CurPtr == '*') {
     132        8949 :       if (SkipCComment())
     133             :         return tgtok::Error;
     134             :     } else // Otherwise, this is an error.
     135           0 :       return ReturnError(TokStart, "Unexpected character");
     136     1642222 :     return LexToken();
     137     1714859 :   case '-': case '+':
     138             :   case '0': case '1': case '2': case '3': case '4': case '5': case '6':
     139             :   case '7': case '8': case '9': {
     140     1714859 :     int NextChar = 0;
     141     1714859 :     if (isdigit(CurChar)) {
     142             :       // Allow identifiers to start with a number if it is followed by
     143             :       // an identifier.  This can happen with paste operations like
     144             :       // foo#8i.
     145             :       int i = 0;
     146             :       do {
     147     1836829 :         NextChar = peekNextChar(i++);
     148     1836829 :       } while (isdigit(NextChar));
     149             : 
     150     1527028 :       if (NextChar == 'x' || NextChar == 'b') {
     151             :         // If this is [0-9]b[01] or [0-9]x[0-9A-fa-f] this is most
     152             :         // likely a number.
     153      319085 :         int NextNextChar = peekNextChar(i);
     154             :         switch (NextNextChar) {
     155             :         default:
     156             :           break;
     157      246130 :         case '0': case '1': 
     158      246130 :           if (NextChar == 'b')
     159      223610 :             return LexNumber();
     160             :           LLVM_FALLTHROUGH;
     161             :         case '2': case '3': case '4': case '5':
     162             :         case '6': case '7': case '8': case '9':
     163             :         case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
     164             :         case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
     165       95475 :           if (NextChar == 'x')
     166       95475 :             return LexNumber();
     167             :           break;
     168             :         }
     169             :       }
     170             :     }
     171             : 
     172     1395774 :     if (isalpha(NextChar) || NextChar == '_')
     173        2119 :       return LexIdentifier();
     174             : 
     175     1393655 :     return LexNumber();
     176             :   }
     177     1692937 :   case '"': return LexString();
     178      965817 :   case '$': return LexVarName();
     179     2464771 :   case '[': return LexBracket();
     180       56539 :   case '!': return LexExclaim();
     181             :   }
     182             : }
     183             : 
     184             : /// LexString - Lex "[^"]*"
     185     1692937 : tgtok::TokKind TGLexer::LexString() {
     186     1692937 :   const char *StrStart = CurPtr;
     187             :   
     188     1692937 :   CurStrVal = "";
     189             :   
     190    34580811 :   while (*CurPtr != '"') {
     191             :     // If we hit the end of the buffer, report an error.
     192    32887874 :     if (*CurPtr == 0 && CurPtr == CurBuf.end())
     193           0 :       return ReturnError(StrStart, "End of file in string literal");
     194             :     
     195    32887874 :     if (*CurPtr == '\n' || *CurPtr == '\r')
     196           0 :       return ReturnError(StrStart, "End of line in string literal");
     197             :     
     198    65705595 :     if (*CurPtr != '\\') {
     199    65635442 :       CurStrVal += *CurPtr++;
     200    32817721 :       continue;
     201             :     }
     202             : 
     203       70153 :     ++CurPtr;
     204             :     
     205       70153 :     switch (*CurPtr) {
     206       13264 :     case '\\': case '\'': case '"':
     207             :       // These turn into their literal character.
     208       13264 :       CurStrVal += *CurPtr++;
     209             :       break;
     210       56184 :     case 't':
     211      112368 :       CurStrVal += '\t';
     212       56184 :       ++CurPtr;
     213       56184 :       break;
     214         705 :     case 'n':
     215        1410 :       CurStrVal += '\n';
     216         705 :       ++CurPtr;
     217         705 :       break;
     218             :         
     219           0 :     case '\n':
     220             :     case '\r':
     221           0 :       return ReturnError(CurPtr, "escaped newlines not supported in tblgen");
     222             : 
     223             :     // If we hit the end of the buffer, report an error.
     224           0 :     case '\0':
     225           0 :       if (CurPtr == CurBuf.end())
     226           0 :         return ReturnError(StrStart, "End of file in string literal");
     227             :       LLVM_FALLTHROUGH;
     228             :     default:
     229           0 :       return ReturnError(CurPtr, "invalid escape in string literal");
     230             :     }
     231             :   }
     232             :   
     233     1692937 :   ++CurPtr;
     234     1692937 :   return tgtok::StrVal;
     235             : }
     236             : 
     237      965817 : tgtok::TokKind TGLexer::LexVarName() {
     238      965817 :   if (!isalpha(CurPtr[0]) && CurPtr[0] != '_')
     239           0 :     return ReturnError(TokStart, "Invalid variable name");
     240             :   
     241             :   // Otherwise, we're ok, consume the rest of the characters.
     242      965817 :   const char *VarNameStart = CurPtr++;
     243             :   
     244     5044145 :   while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')
     245     2039164 :     ++CurPtr;
     246             : 
     247     1931634 :   CurStrVal.assign(VarNameStart, CurPtr);
     248      965817 :   return tgtok::VarName;
     249             : }
     250             : 
     251    15855815 : tgtok::TokKind TGLexer::LexIdentifier() {
     252             :   // The first letter is [a-zA-Z_#].
     253    15855815 :   const char *IdentStart = TokStart;
     254             : 
     255             :   // Match the rest of the identifier regex: [0-9a-zA-Z_#]*
     256   281729743 :   while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')
     257   132936964 :     ++CurPtr;
     258             : 
     259             :   // Check to see if this identifier is a keyword.
     260    31711630 :   StringRef Str(IdentStart, CurPtr-IdentStart);
     261             : 
     262    15862202 :   if (Str == "include") {
     263        6387 :     if (LexInclude()) return tgtok::Error;
     264        6387 :     return Lex();
     265             :   }
     266             : 
     267    15849428 :   tgtok::TokKind Kind = StringSwitch<tgtok::TokKind>(Str)
     268    47548284 :     .Case("int", tgtok::Int)
     269    47548284 :     .Case("bit", tgtok::Bit)
     270    47548284 :     .Case("bits", tgtok::Bits)
     271    47548284 :     .Case("string", tgtok::String)
     272    47548284 :     .Case("list", tgtok::List)
     273    47548284 :     .Case("code", tgtok::Code)
     274    47548284 :     .Case("dag", tgtok::Dag)
     275    47548284 :     .Case("class", tgtok::Class)
     276    47548284 :     .Case("def", tgtok::Def)
     277    47548284 :     .Case("foreach", tgtok::Foreach)
     278    47548284 :     .Case("defm", tgtok::Defm)
     279    47548284 :     .Case("multiclass", tgtok::MultiClass)
     280    47548284 :     .Case("field", tgtok::Field)
     281    47548284 :     .Case("let", tgtok::Let)
     282    47548284 :     .Case("in", tgtok::In)
     283    31698856 :     .Default(tgtok::Id);
     284             : 
     285     3105972 :   if (Kind == tgtok::Id)
     286    12743456 :     CurStrVal.assign(Str.begin(), Str.end());
     287             :   return Kind;
     288             : }
     289             : 
     290             : /// LexInclude - We just read the "include" token.  Get the string token that
     291             : /// comes next and enter the include.
     292        6387 : bool TGLexer::LexInclude() {
     293             :   // The token after the include must be a string.
     294        6387 :   tgtok::TokKind Tok = LexToken();
     295        6387 :   if (Tok == tgtok::Error) return true;
     296        6387 :   if (Tok != tgtok::StrVal) {
     297           0 :     PrintError(getLoc(), "Expected filename after include");
     298           0 :     return true;
     299             :   }
     300             : 
     301             :   // Get the string.
     302       12774 :   std::string Filename = CurStrVal;
     303       12774 :   std::string IncludedFile;
     304             : 
     305       12774 :   CurBuffer = SrcMgr.AddIncludeFile(Filename, SMLoc::getFromPointer(CurPtr),
     306             :                                     IncludedFile);
     307        6387 :   if (!CurBuffer) {
     308           0 :     PrintError(getLoc(), "Could not find include file '" + Filename + "'");
     309           0 :     return true;
     310             :   }
     311             :   
     312       19161 :   DependenciesMapTy::const_iterator Found = Dependencies.find(IncludedFile);
     313       12774 :   if (Found != Dependencies.end()) {
     314           0 :     PrintError(getLoc(),
     315           0 :                "File '" + IncludedFile + "' has already been included.");
     316           0 :     SrcMgr.PrintMessage(Found->second, SourceMgr::DK_Note,
     317             :                         "previously included here");
     318           0 :     return true;
     319             :   }
     320       25548 :   Dependencies.insert(std::make_pair(IncludedFile, getLoc()));
     321             :   // Save the line number and lex buffer of the includer.
     322       19161 :   CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
     323        6387 :   CurPtr = CurBuf.begin();
     324        6387 :   return false;
     325             : }
     326             : 
     327     1633274 : void TGLexer::SkipBCPLComment() {
     328     1633274 :   ++CurPtr;  // skip the second slash.
     329    51101090 :   while (true) {
     330    52734364 :     switch (*CurPtr) {
     331             :     case '\n':
     332             :     case '\r':
     333             :       return;  // Newline is end of comment.
     334           1 :     case 0:
     335             :       // If this is the end of the buffer, end the comment.
     336           2 :       if (CurPtr == CurBuf.end())
     337             :         return;
     338             :       break;
     339             :     }
     340             :     // Otherwise, skip the character.
     341    51101090 :     ++CurPtr;
     342             :   }
     343             : }
     344             : 
     345             : /// SkipCComment - This skips C-style /**/ comments.  The only difference from C
     346             : /// is that we allow nesting.
     347        8949 : bool TGLexer::SkipCComment() {
     348        8949 :   ++CurPtr;  // skip the star.
     349        8949 :   unsigned CommentDepth = 1;
     350             :   
     351             :   while (true) {
     352      223967 :     int CurChar = getNextChar();
     353      223967 :     switch (CurChar) {
     354           1 :     case EOF:
     355           1 :       PrintError(TokStart, "Unterminated comment!");
     356           1 :       return true;
     357       14204 :     case '*':
     358             :       // End of the comment?
     359       14204 :       if (CurPtr[0] != '/') break;
     360             :       
     361        8951 :       ++CurPtr;   // End the */.
     362        8951 :       if (--CommentDepth == 0)
     363             :         return false;
     364             :       break;
     365         126 :     case '/':
     366             :       // Start of a nested comment?
     367         126 :       if (CurPtr[0] != '*') break;
     368           4 :       ++CurPtr;
     369           4 :       ++CommentDepth;
     370           4 :       break;
     371             :     }
     372             :   }
     373             : }
     374             : 
     375             : /// LexNumber - Lex:
     376             : ///    [-+]?[0-9]+
     377             : ///    0x[0-9a-fA-F]+
     378             : ///    0b[01]+
     379     1712740 : tgtok::TokKind TGLexer::LexNumber() {
     380     1712740 :   if (CurPtr[-1] == '0') {
     381      603069 :     if (CurPtr[0] == 'x') {
     382       95475 :       ++CurPtr;
     383       95475 :       const char *NumStart = CurPtr;
     384      563523 :       while (isxdigit(CurPtr[0]))
     385      234024 :         ++CurPtr;
     386             :       
     387             :       // Requires at least one hex digit.
     388       95475 :       if (CurPtr == NumStart)
     389           0 :         return ReturnError(TokStart, "Invalid hexadecimal number");
     390             : 
     391       95475 :       errno = 0;
     392       95475 :       CurIntVal = strtoll(NumStart, nullptr, 16);
     393       95475 :       if (errno == EINVAL)
     394           0 :         return ReturnError(TokStart, "Invalid hexadecimal number");
     395       95475 :       if (errno == ERANGE) {
     396          84 :         errno = 0;
     397          84 :         CurIntVal = (int64_t)strtoull(NumStart, nullptr, 16);
     398          84 :         if (errno == EINVAL)
     399           0 :           return ReturnError(TokStart, "Invalid hexadecimal number");
     400          84 :         if (errno == ERANGE)
     401           0 :           return ReturnError(TokStart, "Hexadecimal number out of range");
     402             :       }
     403             :       return tgtok::IntVal;
     404      507594 :     } else if (CurPtr[0] == 'b') {
     405      223610 :       ++CurPtr;
     406      223610 :       const char *NumStart = CurPtr;
     407     2136664 :       while (CurPtr[0] == '0' || CurPtr[0] == '1')
     408      956527 :         ++CurPtr;
     409             : 
     410             :       // Requires at least one binary digit.
     411      223610 :       if (CurPtr == NumStart)
     412           0 :         return ReturnError(CurPtr-2, "Invalid binary number");
     413      223610 :       CurIntVal = strtoll(NumStart, nullptr, 2);
     414      223610 :       return tgtok::BinaryIntVal;
     415             :     }
     416             :   }
     417             : 
     418             :   // Check for a sign without a digit.
     419     1393655 :   if (!isdigit(CurPtr[0])) {
     420      931578 :     if (CurPtr[-1] == '-')
     421             :       return tgtok::minus;
     422      931128 :     else if (CurPtr[-1] == '+')
     423             :       return tgtok::plus;
     424             :   }
     425             :   
     426     2583395 :   while (isdigit(CurPtr[0]))
     427      595095 :     ++CurPtr;
     428     1393205 :   CurIntVal = strtoll(TokStart, nullptr, 10);
     429     1393205 :   return tgtok::IntVal;
     430             : }
     431             : 
     432             : /// LexBracket - We just read '['.  If this is a code block, return it,
     433             : /// otherwise return the bracket.  Match: '[' and '[{ ( [^}]+ | }[^]] )* }]'
     434     2464771 : tgtok::TokKind TGLexer::LexBracket() {
     435     2464771 :   if (CurPtr[0] != '{')
     436             :     return tgtok::l_square;
     437       27917 :   ++CurPtr;
     438       27917 :   const char *CodeStart = CurPtr;
     439             :   while (true) {
     440     3904920 :     int Char = getNextChar();
     441     3904920 :     if (Char == EOF) break;
     442             :     
     443     3904920 :     if (Char != '}') continue;
     444             :     
     445       30465 :     Char = getNextChar();
     446       30465 :     if (Char == EOF) break;
     447       30465 :     if (Char == ']') {
     448       55834 :       CurStrVal.assign(CodeStart, CurPtr-2);
     449       27917 :       return tgtok::CodeFragment;
     450             :     }
     451             :   }
     452             :   
     453           0 :   return ReturnError(CodeStart-2, "Unterminated Code Block");
     454             : }
     455             : 
     456             : /// LexExclaim - Lex '!' and '![a-zA-Z]+'.
     457       56539 : tgtok::TokKind TGLexer::LexExclaim() {
     458       56539 :   if (!isalpha(*CurPtr))
     459           0 :     return ReturnError(CurPtr - 1, "Invalid \"!operator\"");
     460             :   
     461       56539 :   const char *Start = CurPtr++;
     462      556611 :   while (isalpha(*CurPtr))
     463      250036 :     ++CurPtr;
     464             :   
     465             :   // Check to see which operator this is.
     466             :   tgtok::TokKind Kind =
     467       56539 :     StringSwitch<tgtok::TokKind>(StringRef(Start, CurPtr - Start))
     468      169617 :     .Case("eq", tgtok::XEq)
     469      169617 :     .Case("if", tgtok::XIf)
     470      169617 :     .Case("head", tgtok::XHead)
     471      169617 :     .Case("tail", tgtok::XTail)
     472      169617 :     .Case("con", tgtok::XConcat)
     473      169617 :     .Case("add", tgtok::XADD)
     474      169617 :     .Case("and", tgtok::XAND)
     475      169617 :     .Case("or", tgtok::XOR)
     476      169617 :     .Case("shl", tgtok::XSHL)
     477      169617 :     .Case("sra", tgtok::XSRA)
     478      169617 :     .Case("srl", tgtok::XSRL)
     479      169617 :     .Case("cast", tgtok::XCast)
     480      169617 :     .Case("empty", tgtok::XEmpty)
     481      169617 :     .Case("subst", tgtok::XSubst)
     482      169617 :     .Case("foreach", tgtok::XForEach)
     483      169617 :     .Case("listconcat", tgtok::XListConcat)
     484      169617 :     .Case("strconcat", tgtok::XStrConcat)
     485      113078 :     .Default(tgtok::Error);
     486             : 
     487       56539 :   return Kind != tgtok::Error ? Kind : ReturnError(Start-1, "Unknown operator");
     488             : }

Generated by: LCOV version 1.13