LCOV - llvm-toolchain.info - lib/TableGen/TGLexer.cpp

LCOV - code coverage report

Current view:	top level - lib/TableGen - TGLexer.cpp (source / functions)		Hit	Total	Coverage
Test:	llvm-toolchain.info	Lines:	218	247	88.3 %
Date:	2018-10-20 13:21:21	Functions:	14	15	93.3 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : //===- TGLexer.cpp - Lexer for TableGen -----------------------------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // Implement the Lexer for TableGen.
      11             : //
      12             : //===----------------------------------------------------------------------===//
      13             : 
      14             : #include "TGLexer.h"
      15             : #include "llvm/ADT/StringSwitch.h"
      16             : #include "llvm/ADT/Twine.h"
      17             : #include "llvm/Config/config.h" // for strtoull()/strtoll() define
      18             : #include "llvm/Support/Compiler.h"
      19             : #include "llvm/Support/MemoryBuffer.h"
      20             : #include "llvm/Support/SourceMgr.h"
      21             : #include "llvm/TableGen/Error.h"
      22             : #include <cctype>
      23             : #include <cerrno>
      24             : #include <cstdint>
      25             : #include <cstdio>
      26             : #include <cstdlib>
      27             : #include <cstring>
      28             : 
      29             : using namespace llvm;
      30             : 
      31         352 : TGLexer::TGLexer(SourceMgr &SM) : SrcMgr(SM) {
      32         352 :   CurBuffer = SrcMgr.getMainFileID();
      33         352 :   CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
      34         352 :   CurPtr = CurBuf.begin();
      35         352 :   TokStart = nullptr;
      36         352 : }
      37             : 
      38    21844850 : SMLoc TGLexer::getLoc() const {
      39    21844850 :   return SMLoc::getFromPointer(TokStart);
      40             : }
      41             : 
      42             : /// ReturnError - Set the error to the specified string at the specified
      43             : /// location.  This is defined to always return tgtok::Error.
      44           0 : tgtok::TokKind TGLexer::ReturnError(const char *Loc, const Twine &Msg) {
      45           0 :   PrintError(Loc, Msg);
      46           0 :   return tgtok::Error;
      47             : }
      48             : 
      49   137885815 : int TGLexer::getNextChar() {
      50   137894267 :   char CurChar = *CurPtr++;
      51   137894267 :   switch (CurChar) {
      52   127573537 :   default:
      53   127573537 :     return (unsigned char)CurChar;
      54        8800 :   case 0: {
      55             :     // A nul character in the stream is either the end of the current buffer or
      56             :     // a random nul in the file.  Disambiguate that here.
      57       17600 :     if (CurPtr-1 != CurBuf.end())
      58             :       return 0;  // Just whitespace.
      59             : 
      60             :     // If this is the end of an included file, pop the parent file off the
      61             :     // include stack.
      62        8800 :     SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
      63        8800 :     if (ParentIncludeLoc != SMLoc()) {
      64        8452 :       CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc);
      65        8452 :       CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
      66        8452 :       CurPtr = ParentIncludeLoc.getPointer();
      67        8452 :       return getNextChar();
      68             :     }
      69             : 
      70             :     // Otherwise, return end of file.
      71         348 :     --CurPtr;  // Another call to lex will return EOF again.
      72         348 :     return EOF;
      73             :   }
      74    10311930 :   case '\n':
      75             :   case '\r':
      76             :     // Handle the newline character by ignoring it and incrementing the line
      77             :     // count.  However, be careful about 'dos style' files with \n\r in them.
      78             :     // Only treat a \n\r or \r\n as a single line.
      79    10311930 :     if ((*CurPtr == '\n' || (*CurPtr == '\r')) &&
      80             :         *CurPtr != CurChar)
      81           0 :       ++CurPtr;  // Eat the two char newline sequence.
      82             :     return '\n';
      83             :   }
      84             : }
      85             : 
      86     2646843 : int TGLexer::peekNextChar(int Index) {
      87     2646843 :   return *(CurPtr + Index);
      88             : }
      89             : 
      90   133946215 : tgtok::TokKind TGLexer::LexToken() {
      91   133946215 :   TokStart = CurPtr;
      92             :   // This always consumes at least one character.
      93   133946215 :   int CurChar = getNextChar();
      94             : 
      95   133946215 :   switch (CurChar) {
      96    19523396 :   default:
      97             :     // Handle letters: [a-zA-Z_]
      98    19523396 :     if (isalpha(CurChar) || CurChar == '_')
      99    19523396 :       return LexIdentifier();
     100             : 
     101             :     // Unknown character, emit an error.
     102           0 :     return ReturnError(TokStart, "Unexpected character");
     103             :   case EOF: return tgtok::Eof;
     104     3481361 :   case ':': return tgtok::colon;
     105     3127853 :   case ';': return tgtok::semi;
     106      123623 :   case '.': return tgtok::period;
     107     8214572 :   case ',': return tgtok::comma;
     108     3609746 :   case '<': return tgtok::less;
     109     3609746 :   case '>': return tgtok::greater;
     110     3005986 :   case ']': return tgtok::r_square;
     111      593268 :   case '{': return tgtok::l_brace;
     112      593268 :   case '}': return tgtok::r_brace;
     113     1194006 :   case '(': return tgtok::l_paren;
     114     1194006 :   case ')': return tgtok::r_paren;
     115      976359 :   case '=': return tgtok::equal;
     116       24561 :   case '?': return tgtok::question;
     117      112116 :   case '#': return tgtok::paste;
     118             : 
     119    73963432 :   case 0:
     120             :   case ' ':
     121             :   case '\t':
     122             :   case '\n':
     123             :   case '\r':
     124             :     // Ignore whitespace.
     125    73963432 :     return LexToken();
     126     2250944 :   case '/':
     127             :     // If this is the start of a // comment, skip until the end of the line or
     128             :     // the end of the buffer.
     129     2250944 :     if (*CurPtr == '/')
     130     2238193 :       SkipBCPLComment();
     131       12751 :     else if (*CurPtr == '*') {
     132       12751 :       if (SkipCComment())
     133             :         return tgtok::Error;
     134             :     } else // Otherwise, this is an error.
     135           0 :       return ReturnError(TokStart, "Unexpected character");
     136     2250943 :     return LexToken();
     137     2124691 :   case '-': case '+':
     138             :   case '0': case '1': case '2': case '3': case '4': case '5': case '6':
     139             :   case '7': case '8': case '9': {
     140             :     int NextChar = 0;
     141     2124691 :     if (isdigit(CurChar)) {
     142             :       // Allow identifiers to start with a number if it is followed by
     143             :       // an identifier.  This can happen with paste operations like
     144             :       // foo#8i.
     145             :       int i = 0;
     146             :       do {
     147     2276596 :         NextChar = peekNextChar(i++);
     148     2276596 :       } while (isdigit(NextChar));
     149             : 
     150     1911391 :       if (NextChar == 'x' || NextChar == 'b') {
     151             :         // If this is [0-9]b[01] or [0-9]x[0-9A-fa-f] this is most
     152             :         // likely a number.
     153      370247 :         int NextNextChar = peekNextChar(i);
     154             :         switch (NextNextChar) {
     155             :         default:
     156             :           break;
     157      288315 :         case '0': case '1':
     158      288315 :           if (NextChar == 'b')
     159      261719 :             return LexNumber();
     160             :           LLVM_FALLTHROUGH;
     161             :         case '2': case '3': case '4': case '5':
     162             :         case '6': case '7': case '8': case '9':
     163             :         case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
     164             :         case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
     165      108528 :           if (NextChar == 'x')
     166      108528 :             return LexNumber();
     167             :           break;
     168             :         }
     169             :       }
     170             :     }
     171             : 
     172     1754444 :     if (isalpha(NextChar) || NextChar == '_')
     173        2041 :       return LexIdentifier();
     174             : 
     175     1752403 :     return LexNumber();
     176             :   }
     177     2024271 :   case '"': return LexString();
     178     1085439 :   case '$': return LexVarName();
     179     3026716 :   case '[': return LexBracket();
     180       86508 :   case '!': return LexExclaim();
     181             :   }
     182             : }
     183             : 
     184             : /// LexString - Lex "[^"]*"
     185     2024271 : tgtok::TokKind TGLexer::LexString() {
     186     2024271 :   const char *StrStart = CurPtr;
     187             : 
     188     2024271 :   CurStrVal = "";
     189             : 
     190    41228815 :   while (*CurPtr != '"') {
     191             :     // If we hit the end of the buffer, report an error.
     192    39204544 :     if (*CurPtr == 0 && CurPtr == CurBuf.end())
     193           0 :       return ReturnError(StrStart, "End of file in string literal");
     194             : 
     195    39204544 :     if (*CurPtr == '\n' || *CurPtr == '\r')
     196           0 :       return ReturnError(StrStart, "End of line in string literal");
     197             : 
     198    39204544 :     if (*CurPtr != '\\') {
     199    39124954 :       CurStrVal += *CurPtr++;
     200    39124954 :       continue;
     201             :     }
     202             : 
     203       79590 :     ++CurPtr;
     204             : 
     205       79590 :     switch (*CurPtr) {
     206       13646 :     case '\\': case '\'': case '"':
     207             :       // These turn into their literal character.
     208       13646 :       CurStrVal += *CurPtr++;
     209             :       break;
     210       65142 :     case 't':
     211             :       CurStrVal += '\t';
     212       65142 :       ++CurPtr;
     213       65142 :       break;
     214         802 :     case 'n':
     215             :       CurStrVal += '\n';
     216         802 :       ++CurPtr;
     217         802 :       break;
     218             : 
     219             :     case '\n':
     220             :     case '\r':
     221           0 :       return ReturnError(CurPtr, "escaped newlines not supported in tblgen");
     222             : 
     223             :     // If we hit the end of the buffer, report an error.
     224           0 :     case '\0':
     225           0 :       if (CurPtr == CurBuf.end())
     226           0 :         return ReturnError(StrStart, "End of file in string literal");
     227             :       LLVM_FALLTHROUGH;
     228             :     default:
     229           0 :       return ReturnError(CurPtr, "invalid escape in string literal");
     230             :     }
     231             :   }
     232             : 
     233     2024271 :   ++CurPtr;
     234     2024271 :   return tgtok::StrVal;
     235             : }
     236             : 
     237     1085439 : tgtok::TokKind TGLexer::LexVarName() {
     238     1085439 :   if (!isalpha(CurPtr[0]) && CurPtr[0] != '_')
     239           0 :     return ReturnError(TokStart, "Invalid variable name");
     240             : 
     241             :   // Otherwise, we're ok, consume the rest of the characters.
     242     1085439 :   const char *VarNameStart = CurPtr++;
     243             : 
     244     3360273 :   while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')
     245     2274834 :     ++CurPtr;
     246             : 
     247     1085439 :   CurStrVal.assign(VarNameStart, CurPtr);
     248     1085439 :   return tgtok::VarName;
     249             : }
     250             : 
     251    19525437 : tgtok::TokKind TGLexer::LexIdentifier() {
     252             :   // The first letter is [a-zA-Z_#].
     253    19525437 :   const char *IdentStart = TokStart;
     254             : 
     255             :   // Match the rest of the identifier regex: [0-9a-zA-Z_#]*
     256   185477364 :   while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')
     257   165951927 :     ++CurPtr;
     258             : 
     259             :   // Check to see if this identifier is a keyword.
     260    19525437 :   StringRef Str(IdentStart, CurPtr-IdentStart);
     261             : 
     262             :   if (Str == "include") {
     263        8452 :     if (LexInclude()) return tgtok::Error;
     264        8452 :     return Lex();
     265             :   }
     266             : 
     267    15635308 :   tgtok::TokKind Kind = StringSwitch<tgtok::TokKind>(Str)
     268             :     .Case("int", tgtok::Int)
     269             :     .Case("bit", tgtok::Bit)
     270             :     .Case("bits", tgtok::Bits)
     271             :     .Case("string", tgtok::String)
     272             :     .Case("list", tgtok::List)
     273             :     .Case("code", tgtok::Code)
     274             :     .Case("dag", tgtok::Dag)
     275             :     .Case("class", tgtok::Class)
     276             :     .Case("def", tgtok::Def)
     277             :     .Case("foreach", tgtok::Foreach)
     278             :     .Case("defm", tgtok::Defm)
     279             :     .Case("defset", tgtok::Defset)
     280             :     .Case("multiclass", tgtok::MultiClass)
     281             :     .Case("field", tgtok::Field)
     282             :     .Case("let", tgtok::Let)
     283             :     .Case("in", tgtok::In)
     284             :     .Default(tgtok::Id);
     285             : 
     286     3881677 :   if (Kind == tgtok::Id)
     287    15635308 :     CurStrVal.assign(Str.begin(), Str.end());
     288             :   return Kind;
     289             : }
     290             : 
     291             : /// LexInclude - We just read the "include" token.  Get the string token that
     292             : /// comes next and enter the include.
     293        8452 : bool TGLexer::LexInclude() {
     294             :   // The token after the include must be a string.
     295        8452 :   tgtok::TokKind Tok = LexToken();
     296        8452 :   if (Tok == tgtok::Error) return true;
     297        8452 :   if (Tok != tgtok::StrVal) {
     298           0 :     PrintError(getLoc(), "Expected filename after include");
     299           0 :     return true;
     300             :   }
     301             : 
     302             :   // Get the string.
     303             :   std::string Filename = CurStrVal;
     304             :   std::string IncludedFile;
     305             : 
     306        8452 :   CurBuffer = SrcMgr.AddIncludeFile(Filename, SMLoc::getFromPointer(CurPtr),
     307             :                                     IncludedFile);
     308        8452 :   if (!CurBuffer) {
     309           0 :     PrintError(getLoc(), "Could not find include file '" + Filename + "'");
     310           0 :     return true;
     311             :   }
     312             : 
     313             :   DependenciesMapTy::const_iterator Found = Dependencies.find(IncludedFile);
     314        8452 :   if (Found != Dependencies.end()) {
     315           0 :     PrintError(getLoc(),
     316           0 :                "File '" + IncludedFile + "' has already been included.");
     317           0 :     SrcMgr.PrintMessage(Found->second, SourceMgr::DK_Note,
     318             :                         "previously included here");
     319           0 :     return true;
     320             :   }
     321        8452 :   Dependencies.insert(std::make_pair(IncludedFile, getLoc()));
     322             :   // Save the line number and lex buffer of the includer.
     323        8452 :   CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
     324        8452 :   CurPtr = CurBuf.begin();
     325        8452 :   return false;
     326             : }
     327             : 
     328     2238193 : void TGLexer::SkipBCPLComment() {
     329     2238193 :   ++CurPtr;  // skip the second slash.
     330             :   while (true) {
     331    74788217 :     switch (*CurPtr) {
     332             :     case '\n':
     333             :     case '\r':
     334             :       return;  // Newline is end of comment.
     335           1 :     case 0:
     336             :       // If this is the end of the buffer, end the comment.
     337           2 :       if (CurPtr == CurBuf.end())
     338             :         return;
     339             :       break;
     340             :     }
     341             :     // Otherwise, skip the character.
     342    72550024 :     ++CurPtr;
     343             :   }
     344             : }
     345             : 
     346             : /// SkipCComment - This skips C-style /**/ comments.  The only difference from C
     347             : /// is that we allow nesting.
     348       12751 : bool TGLexer::SkipCComment() {
     349       12751 :   ++CurPtr;  // skip the star.
     350             :   unsigned CommentDepth = 1;
     351             : 
     352             :   while (true) {
     353      254846 :     int CurChar = getNextChar();
     354      254846 :     switch (CurChar) {
     355             :     case EOF:
     356           1 :       PrintError(TokStart, "Unterminated comment!");
     357           1 :       return true;
     358       18749 :     case '*':
     359             :       // End of the comment?
     360       18749 :       if (CurPtr[0] != '/') break;
     361             : 
     362       12753 :       ++CurPtr;   // End the */.
     363       12753 :       if (--CommentDepth == 0)
     364             :         return false;
     365             :       break;
     366         102 :     case '/':
     367             :       // Start of a nested comment?
     368         102 :       if (CurPtr[0] != '*') break;
     369           4 :       ++CurPtr;
     370           4 :       ++CommentDepth;
     371           4 :       break;
     372             :     }
     373             :   }
     374             : }
     375             : 
     376             : /// LexNumber - Lex:
     377             : ///    [-+]?[0-9]+
     378             : ///    0x[0-9a-fA-F]+
     379             : ///    0b[01]+
     380     2122650 : tgtok::TokKind TGLexer::LexNumber() {
     381     2122650 :   if (CurPtr[-1] == '0') {
     382      725752 :     if (CurPtr[0] == 'x') {
     383      108528 :       ++CurPtr;
     384             :       const char *NumStart = CurPtr;
     385      373425 :       while (isxdigit(CurPtr[0]))
     386      264897 :         ++CurPtr;
     387             : 
     388             :       // Requires at least one hex digit.
     389      108528 :       if (CurPtr == NumStart)
     390           0 :         return ReturnError(TokStart, "Invalid hexadecimal number");
     391             : 
     392      108528 :       errno = 0;
     393      108528 :       CurIntVal = strtoll(NumStart, nullptr, 16);
     394      108528 :       if (errno == EINVAL)
     395           0 :         return ReturnError(TokStart, "Invalid hexadecimal number");
     396      108528 :       if (errno == ERANGE) {
     397          94 :         errno = 0;
     398          94 :         CurIntVal = (int64_t)strtoull(NumStart, nullptr, 16);
     399          94 :         if (errno == EINVAL)
     400           0 :           return ReturnError(TokStart, "Invalid hexadecimal number");
     401          94 :         if (errno == ERANGE)
     402           0 :           return ReturnError(TokStart, "Hexadecimal number out of range");
     403             :       }
     404      108528 :       return tgtok::IntVal;
     405      617224 :     } else if (CurPtr[0] == 'b') {
     406      261719 :       ++CurPtr;
     407             :       const char *NumStart = CurPtr;
     408     1337245 :       while (CurPtr[0] == '0' || CurPtr[0] == '1')
     409     1075526 :         ++CurPtr;
     410             : 
     411             :       // Requires at least one binary digit.
     412      261719 :       if (CurPtr == NumStart)
     413           0 :         return ReturnError(CurPtr-2, "Invalid binary number");
     414      261719 :       CurIntVal = strtoll(NumStart, nullptr, 2);
     415      261719 :       return tgtok::BinaryIntVal;
     416             :     }
     417             :   }
     418             : 
     419             :   // Check for a sign without a digit.
     420     1752403 :   if (!isdigit(CurPtr[0])) {
     421     1217566 :     if (CurPtr[-1] == '-')
     422             :       return tgtok::minus;
     423     1217098 :     else if (CurPtr[-1] == '+')
     424             :       return tgtok::plus;
     425             :   }
     426             : 
     427     2438479 :   while (isdigit(CurPtr[0]))
     428      686544 :     ++CurPtr;
     429     1751935 :   CurIntVal = strtoll(TokStart, nullptr, 10);
     430     1751935 :   return tgtok::IntVal;
     431             : }
     432             : 
     433             : /// LexBracket - We just read '['.  If this is a code block, return it,
     434             : /// otherwise return the bracket.  Match: '[' and '[{ ( [^}]+ | }[^]] )* }]'
     435     3026716 : tgtok::TokKind TGLexer::LexBracket() {
     436     3026716 :   if (CurPtr[0] != '{')
     437             :     return tgtok::l_square;
     438       20730 :   ++CurPtr;
     439             :   const char *CodeStart = CurPtr;
     440             :   while (true) {
     441     3660623 :     int Char = getNextChar();
     442     3660623 :     if (Char == EOF) break;
     443             : 
     444     3660623 :     if (Char != '}') continue;
     445             : 
     446       24131 :     Char = getNextChar();
     447       24131 :     if (Char == EOF) break;
     448       24131 :     if (Char == ']') {
     449       20730 :       CurStrVal.assign(CodeStart, CurPtr-2);
     450       20730 :       return tgtok::CodeFragment;
     451             :     }
     452             :   }
     453             : 
     454           0 :   return ReturnError(CodeStart-2, "Unterminated Code Block");
     455             : }
     456             : 
     457             : /// LexExclaim - Lex '!' and '![a-zA-Z]+'.
     458       86508 : tgtok::TokKind TGLexer::LexExclaim() {
     459       86508 :   if (!isalpha(*CurPtr))
     460           0 :     return ReturnError(CurPtr - 1, "Invalid \"!operator\"");
     461             : 
     462       86508 :   const char *Start = CurPtr++;
     463      437966 :   while (isalpha(*CurPtr))
     464      351458 :     ++CurPtr;
     465             : 
     466             :   // Check to see which operator this is.
     467             :   tgtok::TokKind Kind =
     468       86508 :     StringSwitch<tgtok::TokKind>(StringRef(Start, CurPtr - Start))
     469       86508 :     .Case("eq", tgtok::XEq)
     470       86508 :     .Case("ne", tgtok::XNe)
     471       86508 :     .Case("le", tgtok::XLe)
     472       86508 :     .Case("lt", tgtok::XLt)
     473       86508 :     .Case("ge", tgtok::XGe)
     474       86508 :     .Case("gt", tgtok::XGt)
     475       86508 :     .Case("if", tgtok::XIf)
     476       86508 :     .Case("isa", tgtok::XIsA)
     477       86508 :     .Case("head", tgtok::XHead)
     478       86508 :     .Case("tail", tgtok::XTail)
     479       86508 :     .Case("size", tgtok::XSize)
     480       86508 :     .Case("con", tgtok::XConcat)
     481       86508 :     .Case("dag", tgtok::XDag)
     482       86508 :     .Case("add", tgtok::XADD)
     483       86508 :     .Case("and", tgtok::XAND)
     484       86508 :     .Case("or", tgtok::XOR)
     485       86508 :     .Case("shl", tgtok::XSHL)
     486       86508 :     .Case("sra", tgtok::XSRA)
     487       86508 :     .Case("srl", tgtok::XSRL)
     488       86508 :     .Case("cast", tgtok::XCast)
     489       86508 :     .Case("empty", tgtok::XEmpty)
     490       86508 :     .Case("subst", tgtok::XSubst)
     491       86508 :     .Case("foldl", tgtok::XFoldl)
     492       86508 :     .Case("foreach", tgtok::XForEach)
     493       86508 :     .Case("listconcat", tgtok::XListConcat)
     494       86508 :     .Case("strconcat", tgtok::XStrConcat)
     495             :     .Default(tgtok::Error);
     496             : 
     497       86508 :   return Kind != tgtok::Error ? Kind : ReturnError(Start-1, "Unknown operator");
     498             : }

Generated by: LCOV version 1.13