LCOV - code coverage report
Current view: top level - lib/TableGen - TGLexer.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 223 252 88.5 %
Date: 2018-02-18 03:11:45 Functions: 14 15 93.3 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- TGLexer.cpp - Lexer for TableGen -----------------------------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // Implement the Lexer for TableGen.
      11             : //
      12             : //===----------------------------------------------------------------------===//
      13             : 
      14             : #include "TGLexer.h"
      15             : #include "llvm/ADT/StringSwitch.h"
      16             : #include "llvm/ADT/Twine.h"
      17             : #include "llvm/Config/config.h" // for strtoull()/strtoll() define
      18             : #include "llvm/Support/Compiler.h"
      19             : #include "llvm/Support/MemoryBuffer.h"
      20             : #include "llvm/Support/SourceMgr.h"
      21             : #include "llvm/TableGen/Error.h"
      22             : #include <cctype>
      23             : #include <cerrno>
      24             : #include <cstdint>
      25             : #include <cstdio>
      26             : #include <cstdlib>
      27             : #include <cstring>
      28             : 
      29             : using namespace llvm;
      30             : 
      31         288 : TGLexer::TGLexer(SourceMgr &SM) : SrcMgr(SM) {
      32         288 :   CurBuffer = SrcMgr.getMainFileID();
      33         288 :   CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
      34         288 :   CurPtr = CurBuf.begin();
      35         288 :   TokStart = nullptr;
      36         288 : }
      37             : 
      38    19842799 : SMLoc TGLexer::getLoc() const {
      39    19842799 :   return SMLoc::getFromPointer(TokStart);
      40             : }
      41             : 
      42             : /// ReturnError - Set the error to the specified string at the specified
      43             : /// location.  This is defined to always return tgtok::Error.
      44           0 : tgtok::TokKind TGLexer::ReturnError(const char *Loc, const Twine &Msg) {
      45           0 :   PrintError(Loc, Msg);
      46           0 :   return tgtok::Error;
      47             : }
      48             : 
      49   121862836 : int TGLexer::getNextChar() {
      50   121869731 :   char CurChar = *CurPtr++;
      51   121869731 :   switch (CurChar) {
      52   112934971 :   default:
      53   112934971 :     return (unsigned char)CurChar;
      54        7183 :   case 0: {
      55             :     // A nul character in the stream is either the end of the current buffer or
      56             :     // a random nul in the file.  Disambiguate that here.
      57        7183 :     if (CurPtr-1 != CurBuf.end())
      58             :       return 0;  // Just whitespace.
      59             :     
      60             :     // If this is the end of an included file, pop the parent file off the
      61             :     // include stack.
      62        7183 :     SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
      63        7183 :     if (ParentIncludeLoc != SMLoc()) {
      64        6895 :       CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc);
      65       13790 :       CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
      66        6895 :       CurPtr = ParentIncludeLoc.getPointer();
      67        6895 :       return getNextChar();
      68             :     }
      69             :     
      70             :     // Otherwise, return end of file.
      71         288 :     --CurPtr;  // Another call to lex will return EOF again.  
      72         288 :     return EOF;
      73             :   }
      74     8927577 :   case '\n':
      75             :   case '\r':
      76             :     // Handle the newline character by ignoring it and incrementing the line
      77             :     // count.  However, be careful about 'dos style' files with \n\r in them.
      78             :     // Only treat a \n\r or \r\n as a single line.
      79     8927577 :     if ((*CurPtr == '\n' || (*CurPtr == '\r')) &&
      80             :         *CurPtr != CurChar)
      81           0 :       ++CurPtr;  // Eat the two char newline sequence.
      82             :     return '\n';
      83             :   }  
      84             : }
      85             : 
      86     2293300 : int TGLexer::peekNextChar(int Index) {
      87     2293300 :   return *(CurPtr + Index);
      88             : }
      89             : 
      90   118343940 : tgtok::TokKind TGLexer::LexToken() {
      91   118343940 :   TokStart = CurPtr;
      92             :   // This always consumes at least one character.
      93   118343940 :   int CurChar = getNextChar();
      94             : 
      95   118343940 :   switch (CurChar) {
      96    17466983 :   default:
      97             :     // Handle letters: [a-zA-Z_]
      98    17466983 :     if (isalpha(CurChar) || CurChar == '_')
      99    17466983 :       return LexIdentifier();
     100             : 
     101             :     // Unknown character, emit an error.
     102           0 :     return ReturnError(TokStart, "Unexpected character");
     103             :   case EOF: return tgtok::Eof;
     104     3162780 :   case ':': return tgtok::colon;
     105     2829522 :   case ';': return tgtok::semi;
     106       87255 :   case '.': return tgtok::period;
     107     7317213 :   case ',': return tgtok::comma;
     108     3226735 :   case '<': return tgtok::less;
     109     3226735 :   case '>': return tgtok::greater;
     110     2754819 :   case ']': return tgtok::r_square;
     111      522051 :   case '{': return tgtok::l_brace;
     112      522051 :   case '}': return tgtok::r_brace;
     113     1211667 :   case '(': return tgtok::l_paren;
     114     1211667 :   case ')': return tgtok::r_paren;
     115      860596 :   case '=': return tgtok::equal;
     116       21148 :   case '?': return tgtok::question;
     117       89090 :   case '#': return tgtok::paste;
     118             :       
     119    64479346 :   case 0:
     120             :   case ' ':
     121             :   case '\t':
     122             :   case '\n':
     123             :   case '\r':
     124             :     // Ignore whitespace.
     125    64479346 :     return LexToken();
     126     1798809 :   case '/':
     127             :     // If this is the start of a // comment, skip until the end of the line or
     128             :     // the end of the buffer.
     129     1798809 :     if (*CurPtr == '/')
     130     1783815 :       SkipBCPLComment();
     131       14994 :     else if (*CurPtr == '*') {
     132       14994 :       if (SkipCComment())
     133             :         return tgtok::Error;
     134             :     } else // Otherwise, this is an error.
     135           0 :       return ReturnError(TokStart, "Unexpected character");
     136     1798808 :     return LexToken();
     137     1842973 :   case '-': case '+':
     138             :   case '0': case '1': case '2': case '3': case '4': case '5': case '6':
     139             :   case '7': case '8': case '9': {
     140             :     int NextChar = 0;
     141     1842973 :     if (isdigit(CurChar)) {
     142             :       // Allow identifiers to start with a number if it is followed by
     143             :       // an identifier.  This can happen with paste operations like
     144             :       // foo#8i.
     145             :       int i = 0;
     146             :       do {
     147     1977418 :         NextChar = peekNextChar(i++);
     148     1977418 :       } while (isdigit(NextChar));
     149             : 
     150     1651224 :       if (NextChar == 'x' || NextChar == 'b') {
     151             :         // If this is [0-9]b[01] or [0-9]x[0-9A-fa-f] this is most
     152             :         // likely a number.
     153      315882 :         int NextNextChar = peekNextChar(i);
     154             :         switch (NextNextChar) {
     155             :         default:
     156             :           break;
     157      241897 :         case '0': case '1': 
     158      241897 :           if (NextChar == 'b')
     159      218464 :             return LexNumber();
     160             :           LLVM_FALLTHROUGH;
     161             :         case '2': case '3': case '4': case '5':
     162             :         case '6': case '7': case '8': case '9':
     163             :         case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
     164             :         case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
     165       97418 :           if (NextChar == 'x')
     166       97418 :             return LexNumber();
     167             :           break;
     168             :         }
     169             :       }
     170             :     }
     171             : 
     172     1527091 :     if (isalpha(NextChar) || NextChar == '_')
     173        2119 :       return LexIdentifier();
     174             : 
     175     1524972 :     return LexNumber();
     176             :   }
     177     1903903 :   case '"': return LexString();
     178      974209 :   case '$': return LexVarName();
     179     2771874 :   case '[': return LexBracket();
     180       62227 :   case '!': return LexExclaim();
     181             :   }
     182             : }
     183             : 
     184             : /// LexString - Lex "[^"]*"
     185     1903903 : tgtok::TokKind TGLexer::LexString() {
     186     1903903 :   const char *StrStart = CurPtr;
     187             :   
     188     1903903 :   CurStrVal = "";
     189             :   
     190    38832043 :   while (*CurPtr != '"') {
     191             :     // If we hit the end of the buffer, report an error.
     192    36928140 :     if (*CurPtr == 0 && CurPtr == CurBuf.end())
     193           0 :       return ReturnError(StrStart, "End of file in string literal");
     194             :     
     195    36928140 :     if (*CurPtr == '\n' || *CurPtr == '\r')
     196           0 :       return ReturnError(StrStart, "End of line in string literal");
     197             :     
     198    73785187 :     if (*CurPtr != '\\') {
     199    36857047 :       CurStrVal += *CurPtr++;
     200    36857047 :       continue;
     201             :     }
     202             : 
     203       71093 :     ++CurPtr;
     204             :     
     205       71093 :     switch (*CurPtr) {
     206       13538 :     case '\\': case '\'': case '"':
     207             :       // These turn into their literal character.
     208       13538 :       CurStrVal += *CurPtr++;
     209             :       break;
     210       56825 :     case 't':
     211             :       CurStrVal += '\t';
     212       56825 :       ++CurPtr;
     213       56825 :       break;
     214         730 :     case 'n':
     215             :       CurStrVal += '\n';
     216         730 :       ++CurPtr;
     217         730 :       break;
     218             :         
     219             :     case '\n':
     220             :     case '\r':
     221           0 :       return ReturnError(CurPtr, "escaped newlines not supported in tblgen");
     222             : 
     223             :     // If we hit the end of the buffer, report an error.
     224           0 :     case '\0':
     225           0 :       if (CurPtr == CurBuf.end())
     226           0 :         return ReturnError(StrStart, "End of file in string literal");
     227             :       LLVM_FALLTHROUGH;
     228             :     default:
     229           0 :       return ReturnError(CurPtr, "invalid escape in string literal");
     230             :     }
     231             :   }
     232             :   
     233     1903903 :   ++CurPtr;
     234     1903903 :   return tgtok::StrVal;
     235             : }
     236             : 
     237      974209 : tgtok::TokKind TGLexer::LexVarName() {
     238      974209 :   if (!isalpha(CurPtr[0]) && CurPtr[0] != '_')
     239           0 :     return ReturnError(TokStart, "Invalid variable name");
     240             :   
     241             :   // Otherwise, we're ok, consume the rest of the characters.
     242      974209 :   const char *VarNameStart = CurPtr++;
     243             :   
     244     5044279 :   while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')
     245     2035035 :     ++CurPtr;
     246             : 
     247      974209 :   CurStrVal.assign(VarNameStart, CurPtr);
     248      974209 :   return tgtok::VarName;
     249             : }
     250             : 
     251    17469102 : tgtok::TokKind TGLexer::LexIdentifier() {
     252             :   // The first letter is [a-zA-Z_#].
     253    17469102 :   const char *IdentStart = TokStart;
     254             : 
     255             :   // Match the rest of the identifier regex: [0-9a-zA-Z_#]*
     256   312049932 :   while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')
     257   147290415 :     ++CurPtr;
     258             : 
     259             :   // Check to see if this identifier is a keyword.
     260    17469102 :   StringRef Str(IdentStart, CurPtr-IdentStart);
     261             : 
     262             :   if (Str == "include") {
     263        6895 :     if (LexInclude()) return tgtok::Error;
     264        6895 :     return Lex();
     265             :   }
     266             : 
     267             :   tgtok::TokKind Kind = StringSwitch<tgtok::TokKind>(Str)
     268    34924414 :     .Case("int", tgtok::Int)
     269    34924414 :     .Case("bit", tgtok::Bit)
     270    34924414 :     .Case("bits", tgtok::Bits)
     271    34924414 :     .Case("string", tgtok::String)
     272    34924414 :     .Case("list", tgtok::List)
     273    34924414 :     .Case("code", tgtok::Code)
     274    34924414 :     .Case("dag", tgtok::Dag)
     275    34924414 :     .Case("class", tgtok::Class)
     276    34924414 :     .Case("def", tgtok::Def)
     277    34924414 :     .Case("foreach", tgtok::Foreach)
     278    34924414 :     .Case("defm", tgtok::Defm)
     279    34924414 :     .Case("multiclass", tgtok::MultiClass)
     280    34924414 :     .Case("field", tgtok::Field)
     281    34924414 :     .Case("let", tgtok::Let)
     282    34924414 :     .Case("in", tgtok::In)
     283             :     .Default(tgtok::Id);
     284             : 
     285     3462156 :   if (Kind == tgtok::Id)
     286    14000051 :     CurStrVal.assign(Str.begin(), Str.end());
     287             :   return Kind;
     288             : }
     289             : 
     290             : /// LexInclude - We just read the "include" token.  Get the string token that
     291             : /// comes next and enter the include.
     292        6895 : bool TGLexer::LexInclude() {
     293             :   // The token after the include must be a string.
     294        6895 :   tgtok::TokKind Tok = LexToken();
     295        6895 :   if (Tok == tgtok::Error) return true;
     296        6895 :   if (Tok != tgtok::StrVal) {
     297           0 :     PrintError(getLoc(), "Expected filename after include");
     298           0 :     return true;
     299             :   }
     300             : 
     301             :   // Get the string.
     302             :   std::string Filename = CurStrVal;
     303             :   std::string IncludedFile;
     304             : 
     305        6895 :   CurBuffer = SrcMgr.AddIncludeFile(Filename, SMLoc::getFromPointer(CurPtr),
     306             :                                     IncludedFile);
     307        6895 :   if (!CurBuffer) {
     308           0 :     PrintError(getLoc(), "Could not find include file '" + Filename + "'");
     309           0 :     return true;
     310             :   }
     311             :   
     312             :   DependenciesMapTy::const_iterator Found = Dependencies.find(IncludedFile);
     313        6895 :   if (Found != Dependencies.end()) {
     314           0 :     PrintError(getLoc(),
     315           0 :                "File '" + IncludedFile + "' has already been included.");
     316           0 :     SrcMgr.PrintMessage(Found->second, SourceMgr::DK_Note,
     317             :                         "previously included here");
     318           0 :     return true;
     319             :   }
     320       13790 :   Dependencies.insert(std::make_pair(IncludedFile, getLoc()));
     321             :   // Save the line number and lex buffer of the includer.
     322       13790 :   CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
     323        6895 :   CurPtr = CurBuf.begin();
     324        6895 :   return false;
     325             : }
     326             : 
     327     1783815 : void TGLexer::SkipBCPLComment() {
     328     1783815 :   ++CurPtr;  // skip the second slash.
     329    56330521 :   while (true) {
     330    58114336 :     switch (*CurPtr) {
     331             :     case '\n':
     332             :     case '\r':
     333             :       return;  // Newline is end of comment.
     334           1 :     case 0:
     335             :       // If this is the end of the buffer, end the comment.
     336           1 :       if (CurPtr == CurBuf.end())
     337             :         return;
     338             :       break;
     339             :     }
     340             :     // Otherwise, skip the character.
     341    56330521 :     ++CurPtr;
     342             :   }
     343             : }
     344             : 
     345             : /// SkipCComment - This skips C-style /**/ comments.  The only difference from C
     346             : /// is that we allow nesting.
     347       14994 : bool TGLexer::SkipCComment() {
     348       14994 :   ++CurPtr;  // skip the star.
     349             :   unsigned CommentDepth = 1;
     350             :   
     351             :   while (true) {
     352      279877 :     int CurChar = getNextChar();
     353      279877 :     switch (CurChar) {
     354             :     case EOF:
     355           1 :       PrintError(TokStart, "Unterminated comment!");
     356           1 :       return true;
     357       20249 :     case '*':
     358             :       // End of the comment?
     359       20249 :       if (CurPtr[0] != '/') break;
     360             :       
     361       14996 :       ++CurPtr;   // End the */.
     362       14996 :       if (--CommentDepth == 0)
     363             :         return false;
     364             :       break;
     365         126 :     case '/':
     366             :       // Start of a nested comment?
     367         126 :       if (CurPtr[0] != '*') break;
     368           4 :       ++CurPtr;
     369           4 :       ++CommentDepth;
     370           4 :       break;
     371             :     }
     372             :   }
     373             : }
     374             : 
     375             : /// LexNumber - Lex:
     376             : ///    [-+]?[0-9]+
     377             : ///    0x[0-9a-fA-F]+
     378             : ///    0b[01]+
     379     1840854 : tgtok::TokKind TGLexer::LexNumber() {
     380     1840854 :   if (CurPtr[-1] == '0') {
     381      611166 :     if (CurPtr[0] == 'x') {
     382       97418 :       ++CurPtr;
     383             :       const char *NumStart = CurPtr;
     384      572138 :       while (isxdigit(CurPtr[0]))
     385      237360 :         ++CurPtr;
     386             :       
     387             :       // Requires at least one hex digit.
     388       97418 :       if (CurPtr == NumStart)
     389           0 :         return ReturnError(TokStart, "Invalid hexadecimal number");
     390             : 
     391       97418 :       errno = 0;
     392       97418 :       CurIntVal = strtoll(NumStart, nullptr, 16);
     393       97418 :       if (errno == EINVAL)
     394           0 :         return ReturnError(TokStart, "Invalid hexadecimal number");
     395       97418 :       if (errno == ERANGE) {
     396          84 :         errno = 0;
     397          84 :         CurIntVal = (int64_t)strtoull(NumStart, nullptr, 16);
     398          84 :         if (errno == EINVAL)
     399           0 :           return ReturnError(TokStart, "Invalid hexadecimal number");
     400          84 :         if (errno == ERANGE)
     401           0 :           return ReturnError(TokStart, "Hexadecimal number out of range");
     402             :       }
     403             :       return tgtok::IntVal;
     404      513748 :     } else if (CurPtr[0] == 'b') {
     405      218464 :       ++CurPtr;
     406             :       const char *NumStart = CurPtr;
     407     2065774 :       while (CurPtr[0] == '0' || CurPtr[0] == '1')
     408      923655 :         ++CurPtr;
     409             : 
     410             :       // Requires at least one binary digit.
     411      218464 :       if (CurPtr == NumStart)
     412           0 :         return ReturnError(CurPtr-2, "Invalid binary number");
     413      218464 :       CurIntVal = strtoll(NumStart, nullptr, 2);
     414      218464 :       return tgtok::BinaryIntVal;
     415             :     }
     416             :   }
     417             : 
     418             :   // Check for a sign without a digit.
     419     1524972 :   if (!isdigit(CurPtr[0])) {
     420     1044886 :     if (CurPtr[-1] == '-')
     421             :       return tgtok::minus;
     422     1044436 :     else if (CurPtr[-1] == '+')
     423             :       return tgtok::plus;
     424             :   }
     425             :   
     426     2752212 :   while (isdigit(CurPtr[0]))
     427      613845 :     ++CurPtr;
     428     1524522 :   CurIntVal = strtoll(TokStart, nullptr, 10);
     429     1524522 :   return tgtok::IntVal;
     430             : }
     431             : 
     432             : /// LexBracket - We just read '['.  If this is a code block, return it,
     433             : /// otherwise return the bracket.  Match: '[' and '[{ ( [^}]+ | }[^]] )* }]'
     434     2771874 : tgtok::TokKind TGLexer::LexBracket() {
     435     2771874 :   if (CurPtr[0] != '{')
     436             :     return tgtok::l_square;
     437       17055 :   ++CurPtr;
     438             :   const char *CodeStart = CurPtr;
     439             :   while (true) {
     440     3219238 :     int Char = getNextChar();
     441     3219238 :     if (Char == EOF) break;
     442             :     
     443     3219238 :     if (Char != '}') continue;
     444             :     
     445       19781 :     Char = getNextChar();
     446       19781 :     if (Char == EOF) break;
     447       19781 :     if (Char == ']') {
     448       17055 :       CurStrVal.assign(CodeStart, CurPtr-2);
     449       17055 :       return tgtok::CodeFragment;
     450             :     }
     451             :   }
     452             :   
     453           0 :   return ReturnError(CodeStart-2, "Unterminated Code Block");
     454             : }
     455             : 
     456             : /// LexExclaim - Lex '!' and '![a-zA-Z]+'.
     457       62227 : tgtok::TokKind TGLexer::LexExclaim() {
     458       62227 :   if (!isalpha(*CurPtr))
     459           0 :     return ReturnError(CurPtr - 1, "Invalid \"!operator\"");
     460             :   
     461       62227 :   const char *Start = CurPtr++;
     462      581401 :   while (isalpha(*CurPtr))
     463      259587 :     ++CurPtr;
     464             :   
     465             :   // Check to see which operator this is.
     466             :   tgtok::TokKind Kind =
     467       62227 :     StringSwitch<tgtok::TokKind>(StringRef(Start, CurPtr - Start))
     468      124454 :     .Case("eq", tgtok::XEq)
     469      124454 :     .Case("if", tgtok::XIf)
     470      124454 :     .Case("head", tgtok::XHead)
     471      124454 :     .Case("tail", tgtok::XTail)
     472      124454 :     .Case("con", tgtok::XConcat)
     473      124454 :     .Case("add", tgtok::XADD)
     474      124454 :     .Case("and", tgtok::XAND)
     475      124454 :     .Case("or", tgtok::XOR)
     476      124454 :     .Case("shl", tgtok::XSHL)
     477      124454 :     .Case("sra", tgtok::XSRA)
     478      124454 :     .Case("srl", tgtok::XSRL)
     479      124454 :     .Case("cast", tgtok::XCast)
     480      124454 :     .Case("empty", tgtok::XEmpty)
     481      124454 :     .Case("subst", tgtok::XSubst)
     482      124454 :     .Case("foreach", tgtok::XForEach)
     483      124454 :     .Case("listconcat", tgtok::XListConcat)
     484      124454 :     .Case("strconcat", tgtok::XStrConcat)
     485             :     .Default(tgtok::Error);
     486             : 
     487       62227 :   return Kind != tgtok::Error ? Kind : ReturnError(Start-1, "Unknown operator");
     488             : }

Generated by: LCOV version 1.13