LLVM API Documentation
00001 //===- LLLexer.cpp - Lexer for .ll Files ----------------------------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // Implement the Lexer for .ll files. 00011 // 00012 //===----------------------------------------------------------------------===// 00013 00014 #include "LLLexer.h" 00015 #include "llvm/ADT/StringExtras.h" 00016 #include "llvm/ADT/Twine.h" 00017 #include "llvm/Assembly/Parser.h" 00018 #include "llvm/IR/DerivedTypes.h" 00019 #include "llvm/IR/Instruction.h" 00020 #include "llvm/IR/LLVMContext.h" 00021 #include "llvm/Support/ErrorHandling.h" 00022 #include "llvm/Support/MathExtras.h" 00023 #include "llvm/Support/MemoryBuffer.h" 00024 #include "llvm/Support/SourceMgr.h" 00025 #include "llvm/Support/raw_ostream.h" 00026 #include <cctype> 00027 #include <cstdio> 00028 #include <cstdlib> 00029 #include <cstring> 00030 using namespace llvm; 00031 00032 bool LLLexer::Error(LocTy ErrorLoc, const Twine &Msg) const { 00033 ErrorInfo = SM.GetMessage(ErrorLoc, SourceMgr::DK_Error, Msg); 00034 return true; 00035 } 00036 00037 //===----------------------------------------------------------------------===// 00038 // Helper functions. 00039 //===----------------------------------------------------------------------===// 00040 00041 // atoull - Convert an ascii string of decimal digits into the unsigned long 00042 // long representation... this does not have to do input error checking, 00043 // because we know that the input will be matched by a suitable regex... 00044 // 00045 uint64_t LLLexer::atoull(const char *Buffer, const char *End) { 00046 uint64_t Result = 0; 00047 for (; Buffer != End; Buffer++) { 00048 uint64_t OldRes = Result; 00049 Result *= 10; 00050 Result += *Buffer-'0'; 00051 if (Result < OldRes) { // Uh, oh, overflow detected!!! 00052 Error("constant bigger than 64 bits detected!"); 00053 return 0; 00054 } 00055 } 00056 return Result; 00057 } 00058 00059 uint64_t LLLexer::HexIntToVal(const char *Buffer, const char *End) { 00060 uint64_t Result = 0; 00061 for (; Buffer != End; ++Buffer) { 00062 uint64_t OldRes = Result; 00063 Result *= 16; 00064 Result += hexDigitValue(*Buffer); 00065 00066 if (Result < OldRes) { // Uh, oh, overflow detected!!! 00067 Error("constant bigger than 64 bits detected!"); 00068 return 0; 00069 } 00070 } 00071 return Result; 00072 } 00073 00074 void LLLexer::HexToIntPair(const char *Buffer, const char *End, 00075 uint64_t Pair[2]) { 00076 Pair[0] = 0; 00077 for (int i=0; i<16; i++, Buffer++) { 00078 assert(Buffer != End); 00079 Pair[0] *= 16; 00080 Pair[0] += hexDigitValue(*Buffer); 00081 } 00082 Pair[1] = 0; 00083 for (int i=0; i<16 && Buffer != End; i++, Buffer++) { 00084 Pair[1] *= 16; 00085 Pair[1] += hexDigitValue(*Buffer); 00086 } 00087 if (Buffer != End) 00088 Error("constant bigger than 128 bits detected!"); 00089 } 00090 00091 /// FP80HexToIntPair - translate an 80 bit FP80 number (20 hexits) into 00092 /// { low64, high16 } as usual for an APInt. 00093 void LLLexer::FP80HexToIntPair(const char *Buffer, const char *End, 00094 uint64_t Pair[2]) { 00095 Pair[1] = 0; 00096 for (int i=0; i<4 && Buffer != End; i++, Buffer++) { 00097 assert(Buffer != End); 00098 Pair[1] *= 16; 00099 Pair[1] += hexDigitValue(*Buffer); 00100 } 00101 Pair[0] = 0; 00102 for (int i=0; i<16; i++, Buffer++) { 00103 Pair[0] *= 16; 00104 Pair[0] += hexDigitValue(*Buffer); 00105 } 00106 if (Buffer != End) 00107 Error("constant bigger than 128 bits detected!"); 00108 } 00109 00110 // UnEscapeLexed - Run through the specified buffer and change \xx codes to the 00111 // appropriate character. 00112 static void UnEscapeLexed(std::string &Str) { 00113 if (Str.empty()) return; 00114 00115 char *Buffer = &Str[0], *EndBuffer = Buffer+Str.size(); 00116 char *BOut = Buffer; 00117 for (char *BIn = Buffer; BIn != EndBuffer; ) { 00118 if (BIn[0] == '\\') { 00119 if (BIn < EndBuffer-1 && BIn[1] == '\\') { 00120 *BOut++ = '\\'; // Two \ becomes one 00121 BIn += 2; 00122 } else if (BIn < EndBuffer-2 && 00123 isxdigit(static_cast<unsigned char>(BIn[1])) && 00124 isxdigit(static_cast<unsigned char>(BIn[2]))) { 00125 *BOut = hexDigitValue(BIn[1]) * 16 + hexDigitValue(BIn[2]); 00126 BIn += 3; // Skip over handled chars 00127 ++BOut; 00128 } else { 00129 *BOut++ = *BIn++; 00130 } 00131 } else { 00132 *BOut++ = *BIn++; 00133 } 00134 } 00135 Str.resize(BOut-Buffer); 00136 } 00137 00138 /// isLabelChar - Return true for [-a-zA-Z$._0-9]. 00139 static bool isLabelChar(char C) { 00140 return isalnum(static_cast<unsigned char>(C)) || C == '-' || C == '$' || 00141 C == '.' || C == '_'; 00142 } 00143 00144 00145 /// isLabelTail - Return true if this pointer points to a valid end of a label. 00146 static const char *isLabelTail(const char *CurPtr) { 00147 while (1) { 00148 if (CurPtr[0] == ':') return CurPtr+1; 00149 if (!isLabelChar(CurPtr[0])) return 0; 00150 ++CurPtr; 00151 } 00152 } 00153 00154 00155 00156 //===----------------------------------------------------------------------===// 00157 // Lexer definition. 00158 //===----------------------------------------------------------------------===// 00159 00160 LLLexer::LLLexer(MemoryBuffer *StartBuf, SourceMgr &sm, SMDiagnostic &Err, 00161 LLVMContext &C) 00162 : CurBuf(StartBuf), ErrorInfo(Err), SM(sm), Context(C), APFloatVal(0.0) { 00163 CurPtr = CurBuf->getBufferStart(); 00164 } 00165 00166 std::string LLLexer::getFilename() const { 00167 return CurBuf->getBufferIdentifier(); 00168 } 00169 00170 int LLLexer::getNextChar() { 00171 char CurChar = *CurPtr++; 00172 switch (CurChar) { 00173 default: return (unsigned char)CurChar; 00174 case 0: 00175 // A nul character in the stream is either the end of the current buffer or 00176 // a random nul in the file. Disambiguate that here. 00177 if (CurPtr-1 != CurBuf->getBufferEnd()) 00178 return 0; // Just whitespace. 00179 00180 // Otherwise, return end of file. 00181 --CurPtr; // Another call to lex will return EOF again. 00182 return EOF; 00183 } 00184 } 00185 00186 00187 lltok::Kind LLLexer::LexToken() { 00188 TokStart = CurPtr; 00189 00190 int CurChar = getNextChar(); 00191 switch (CurChar) { 00192 default: 00193 // Handle letters: [a-zA-Z_] 00194 if (isalpha(static_cast<unsigned char>(CurChar)) || CurChar == '_') 00195 return LexIdentifier(); 00196 00197 return lltok::Error; 00198 case EOF: return lltok::Eof; 00199 case 0: 00200 case ' ': 00201 case '\t': 00202 case '\n': 00203 case '\r': 00204 // Ignore whitespace. 00205 return LexToken(); 00206 case '+': return LexPositive(); 00207 case '@': return LexAt(); 00208 case '%': return LexPercent(); 00209 case '"': return LexQuote(); 00210 case '.': 00211 if (const char *Ptr = isLabelTail(CurPtr)) { 00212 CurPtr = Ptr; 00213 StrVal.assign(TokStart, CurPtr-1); 00214 return lltok::LabelStr; 00215 } 00216 if (CurPtr[0] == '.' && CurPtr[1] == '.') { 00217 CurPtr += 2; 00218 return lltok::dotdotdot; 00219 } 00220 return lltok::Error; 00221 case '$': 00222 if (const char *Ptr = isLabelTail(CurPtr)) { 00223 CurPtr = Ptr; 00224 StrVal.assign(TokStart, CurPtr-1); 00225 return lltok::LabelStr; 00226 } 00227 return lltok::Error; 00228 case ';': 00229 SkipLineComment(); 00230 return LexToken(); 00231 case '!': return LexExclaim(); 00232 case '#': return LexHash(); 00233 case '0': case '1': case '2': case '3': case '4': 00234 case '5': case '6': case '7': case '8': case '9': 00235 case '-': 00236 return LexDigitOrNegative(); 00237 case '=': return lltok::equal; 00238 case '[': return lltok::lsquare; 00239 case ']': return lltok::rsquare; 00240 case '{': return lltok::lbrace; 00241 case '}': return lltok::rbrace; 00242 case '<': return lltok::less; 00243 case '>': return lltok::greater; 00244 case '(': return lltok::lparen; 00245 case ')': return lltok::rparen; 00246 case ',': return lltok::comma; 00247 case '*': return lltok::star; 00248 case '\\': return lltok::backslash; 00249 } 00250 } 00251 00252 void LLLexer::SkipLineComment() { 00253 while (1) { 00254 if (CurPtr[0] == '\n' || CurPtr[0] == '\r' || getNextChar() == EOF) 00255 return; 00256 } 00257 } 00258 00259 /// LexAt - Lex all tokens that start with an @ character: 00260 /// GlobalVar @\"[^\"]*\" 00261 /// GlobalVar @[-a-zA-Z$._][-a-zA-Z$._0-9]* 00262 /// GlobalVarID @[0-9]+ 00263 lltok::Kind LLLexer::LexAt() { 00264 // Handle AtStringConstant: @\"[^\"]*\" 00265 if (CurPtr[0] == '"') { 00266 ++CurPtr; 00267 00268 while (1) { 00269 int CurChar = getNextChar(); 00270 00271 if (CurChar == EOF) { 00272 Error("end of file in global variable name"); 00273 return lltok::Error; 00274 } 00275 if (CurChar == '"') { 00276 StrVal.assign(TokStart+2, CurPtr-1); 00277 UnEscapeLexed(StrVal); 00278 return lltok::GlobalVar; 00279 } 00280 } 00281 } 00282 00283 // Handle GlobalVarName: @[-a-zA-Z$._][-a-zA-Z$._0-9]* 00284 if (ReadVarName()) 00285 return lltok::GlobalVar; 00286 00287 // Handle GlobalVarID: @[0-9]+ 00288 if (isdigit(static_cast<unsigned char>(CurPtr[0]))) { 00289 for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr) 00290 /*empty*/; 00291 00292 uint64_t Val = atoull(TokStart+1, CurPtr); 00293 if ((unsigned)Val != Val) 00294 Error("invalid value number (too large)!"); 00295 UIntVal = unsigned(Val); 00296 return lltok::GlobalID; 00297 } 00298 00299 return lltok::Error; 00300 } 00301 00302 /// ReadString - Read a string until the closing quote. 00303 lltok::Kind LLLexer::ReadString(lltok::Kind kind) { 00304 const char *Start = CurPtr; 00305 while (1) { 00306 int CurChar = getNextChar(); 00307 00308 if (CurChar == EOF) { 00309 Error("end of file in string constant"); 00310 return lltok::Error; 00311 } 00312 if (CurChar == '"') { 00313 StrVal.assign(Start, CurPtr-1); 00314 UnEscapeLexed(StrVal); 00315 return kind; 00316 } 00317 } 00318 } 00319 00320 /// ReadVarName - Read the rest of a token containing a variable name. 00321 bool LLLexer::ReadVarName() { 00322 const char *NameStart = CurPtr; 00323 if (isalpha(static_cast<unsigned char>(CurPtr[0])) || 00324 CurPtr[0] == '-' || CurPtr[0] == '$' || 00325 CurPtr[0] == '.' || CurPtr[0] == '_') { 00326 ++CurPtr; 00327 while (isalnum(static_cast<unsigned char>(CurPtr[0])) || 00328 CurPtr[0] == '-' || CurPtr[0] == '$' || 00329 CurPtr[0] == '.' || CurPtr[0] == '_') 00330 ++CurPtr; 00331 00332 StrVal.assign(NameStart, CurPtr); 00333 return true; 00334 } 00335 return false; 00336 } 00337 00338 /// LexPercent - Lex all tokens that start with a % character: 00339 /// LocalVar ::= %\"[^\"]*\" 00340 /// LocalVar ::= %[-a-zA-Z$._][-a-zA-Z$._0-9]* 00341 /// LocalVarID ::= %[0-9]+ 00342 lltok::Kind LLLexer::LexPercent() { 00343 // Handle LocalVarName: %\"[^\"]*\" 00344 if (CurPtr[0] == '"') { 00345 ++CurPtr; 00346 return ReadString(lltok::LocalVar); 00347 } 00348 00349 // Handle LocalVarName: %[-a-zA-Z$._][-a-zA-Z$._0-9]* 00350 if (ReadVarName()) 00351 return lltok::LocalVar; 00352 00353 // Handle LocalVarID: %[0-9]+ 00354 if (isdigit(static_cast<unsigned char>(CurPtr[0]))) { 00355 for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr) 00356 /*empty*/; 00357 00358 uint64_t Val = atoull(TokStart+1, CurPtr); 00359 if ((unsigned)Val != Val) 00360 Error("invalid value number (too large)!"); 00361 UIntVal = unsigned(Val); 00362 return lltok::LocalVarID; 00363 } 00364 00365 return lltok::Error; 00366 } 00367 00368 /// LexQuote - Lex all tokens that start with a " character: 00369 /// QuoteLabel "[^"]+": 00370 /// StringConstant "[^"]*" 00371 lltok::Kind LLLexer::LexQuote() { 00372 lltok::Kind kind = ReadString(lltok::StringConstant); 00373 if (kind == lltok::Error || kind == lltok::Eof) 00374 return kind; 00375 00376 if (CurPtr[0] == ':') { 00377 ++CurPtr; 00378 kind = lltok::LabelStr; 00379 } 00380 00381 return kind; 00382 } 00383 00384 /// LexExclaim: 00385 /// !foo 00386 /// ! 00387 lltok::Kind LLLexer::LexExclaim() { 00388 // Lex a metadata name as a MetadataVar. 00389 if (isalpha(static_cast<unsigned char>(CurPtr[0])) || 00390 CurPtr[0] == '-' || CurPtr[0] == '$' || 00391 CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\') { 00392 ++CurPtr; 00393 while (isalnum(static_cast<unsigned char>(CurPtr[0])) || 00394 CurPtr[0] == '-' || CurPtr[0] == '$' || 00395 CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\') 00396 ++CurPtr; 00397 00398 StrVal.assign(TokStart+1, CurPtr); // Skip ! 00399 UnEscapeLexed(StrVal); 00400 return lltok::MetadataVar; 00401 } 00402 return lltok::exclaim; 00403 } 00404 00405 /// LexHash - Lex all tokens that start with a # character: 00406 /// AttrGrpID ::= #[0-9]+ 00407 lltok::Kind LLLexer::LexHash() { 00408 // Handle AttrGrpID: #[0-9]+ 00409 if (isdigit(static_cast<unsigned char>(CurPtr[0]))) { 00410 for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr) 00411 /*empty*/; 00412 00413 uint64_t Val = atoull(TokStart+1, CurPtr); 00414 if ((unsigned)Val != Val) 00415 Error("invalid value number (too large)!"); 00416 UIntVal = unsigned(Val); 00417 return lltok::AttrGrpID; 00418 } 00419 00420 return lltok::Error; 00421 } 00422 00423 /// LexIdentifier: Handle several related productions: 00424 /// Label [-a-zA-Z$._0-9]+: 00425 /// IntegerType i[0-9]+ 00426 /// Keyword sdiv, float, ... 00427 /// HexIntConstant [us]0x[0-9A-Fa-f]+ 00428 lltok::Kind LLLexer::LexIdentifier() { 00429 const char *StartChar = CurPtr; 00430 const char *IntEnd = CurPtr[-1] == 'i' ? 0 : StartChar; 00431 const char *KeywordEnd = 0; 00432 00433 for (; isLabelChar(*CurPtr); ++CurPtr) { 00434 // If we decide this is an integer, remember the end of the sequence. 00435 if (!IntEnd && !isdigit(static_cast<unsigned char>(*CurPtr))) 00436 IntEnd = CurPtr; 00437 if (!KeywordEnd && !isalnum(static_cast<unsigned char>(*CurPtr)) && 00438 *CurPtr != '_') 00439 KeywordEnd = CurPtr; 00440 } 00441 00442 // If we stopped due to a colon, this really is a label. 00443 if (*CurPtr == ':') { 00444 StrVal.assign(StartChar-1, CurPtr++); 00445 return lltok::LabelStr; 00446 } 00447 00448 // Otherwise, this wasn't a label. If this was valid as an integer type, 00449 // return it. 00450 if (IntEnd == 0) IntEnd = CurPtr; 00451 if (IntEnd != StartChar) { 00452 CurPtr = IntEnd; 00453 uint64_t NumBits = atoull(StartChar, CurPtr); 00454 if (NumBits < IntegerType::MIN_INT_BITS || 00455 NumBits > IntegerType::MAX_INT_BITS) { 00456 Error("bitwidth for integer type out of range!"); 00457 return lltok::Error; 00458 } 00459 TyVal = IntegerType::get(Context, NumBits); 00460 return lltok::Type; 00461 } 00462 00463 // Otherwise, this was a letter sequence. See which keyword this is. 00464 if (KeywordEnd == 0) KeywordEnd = CurPtr; 00465 CurPtr = KeywordEnd; 00466 --StartChar; 00467 unsigned Len = CurPtr-StartChar; 00468 #define KEYWORD(STR) \ 00469 do { \ 00470 if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) \ 00471 return lltok::kw_##STR; \ 00472 } while (0) 00473 00474 KEYWORD(true); KEYWORD(false); 00475 KEYWORD(declare); KEYWORD(define); 00476 KEYWORD(global); KEYWORD(constant); 00477 00478 KEYWORD(private); 00479 KEYWORD(linker_private); 00480 KEYWORD(linker_private_weak); 00481 KEYWORD(linker_private_weak_def_auto); // FIXME: For backwards compatibility. 00482 KEYWORD(internal); 00483 KEYWORD(available_externally); 00484 KEYWORD(linkonce); 00485 KEYWORD(linkonce_odr); 00486 KEYWORD(linkonce_odr_auto_hide); 00487 KEYWORD(weak); 00488 KEYWORD(weak_odr); 00489 KEYWORD(appending); 00490 KEYWORD(dllimport); 00491 KEYWORD(dllexport); 00492 KEYWORD(common); 00493 KEYWORD(default); 00494 KEYWORD(hidden); 00495 KEYWORD(protected); 00496 KEYWORD(unnamed_addr); 00497 KEYWORD(externally_initialized); 00498 KEYWORD(extern_weak); 00499 KEYWORD(external); 00500 KEYWORD(thread_local); 00501 KEYWORD(localdynamic); 00502 KEYWORD(initialexec); 00503 KEYWORD(localexec); 00504 KEYWORD(zeroinitializer); 00505 KEYWORD(undef); 00506 KEYWORD(null); 00507 KEYWORD(to); 00508 KEYWORD(tail); 00509 KEYWORD(target); 00510 KEYWORD(triple); 00511 KEYWORD(unwind); 00512 KEYWORD(deplibs); // FIXME: Remove in 4.0. 00513 KEYWORD(datalayout); 00514 KEYWORD(volatile); 00515 KEYWORD(atomic); 00516 KEYWORD(unordered); 00517 KEYWORD(monotonic); 00518 KEYWORD(acquire); 00519 KEYWORD(release); 00520 KEYWORD(acq_rel); 00521 KEYWORD(seq_cst); 00522 KEYWORD(singlethread); 00523 00524 KEYWORD(nnan); 00525 KEYWORD(ninf); 00526 KEYWORD(nsz); 00527 KEYWORD(arcp); 00528 KEYWORD(fast); 00529 KEYWORD(nuw); 00530 KEYWORD(nsw); 00531 KEYWORD(exact); 00532 KEYWORD(inbounds); 00533 KEYWORD(align); 00534 KEYWORD(addrspace); 00535 KEYWORD(section); 00536 KEYWORD(alias); 00537 KEYWORD(module); 00538 KEYWORD(asm); 00539 KEYWORD(sideeffect); 00540 KEYWORD(alignstack); 00541 KEYWORD(inteldialect); 00542 KEYWORD(gc); 00543 00544 KEYWORD(ccc); 00545 KEYWORD(fastcc); 00546 KEYWORD(coldcc); 00547 KEYWORD(x86_stdcallcc); 00548 KEYWORD(x86_fastcallcc); 00549 KEYWORD(x86_thiscallcc); 00550 KEYWORD(arm_apcscc); 00551 KEYWORD(arm_aapcscc); 00552 KEYWORD(arm_aapcs_vfpcc); 00553 KEYWORD(msp430_intrcc); 00554 KEYWORD(ptx_kernel); 00555 KEYWORD(ptx_device); 00556 KEYWORD(spir_kernel); 00557 KEYWORD(spir_func); 00558 KEYWORD(intel_ocl_bicc); 00559 00560 KEYWORD(cc); 00561 KEYWORD(c); 00562 00563 KEYWORD(attributes); 00564 00565 KEYWORD(alwaysinline); 00566 KEYWORD(byval); 00567 KEYWORD(cold); 00568 KEYWORD(inlinehint); 00569 KEYWORD(inreg); 00570 KEYWORD(minsize); 00571 KEYWORD(naked); 00572 KEYWORD(nest); 00573 KEYWORD(noalias); 00574 KEYWORD(nobuiltin); 00575 KEYWORD(nocapture); 00576 KEYWORD(noduplicate); 00577 KEYWORD(noimplicitfloat); 00578 KEYWORD(noinline); 00579 KEYWORD(nonlazybind); 00580 KEYWORD(noredzone); 00581 KEYWORD(noreturn); 00582 KEYWORD(nounwind); 00583 KEYWORD(optsize); 00584 KEYWORD(readnone); 00585 KEYWORD(readonly); 00586 KEYWORD(returned); 00587 KEYWORD(returns_twice); 00588 KEYWORD(signext); 00589 KEYWORD(sret); 00590 KEYWORD(ssp); 00591 KEYWORD(sspreq); 00592 KEYWORD(sspstrong); 00593 KEYWORD(sanitize_address); 00594 KEYWORD(sanitize_thread); 00595 KEYWORD(sanitize_memory); 00596 KEYWORD(uwtable); 00597 KEYWORD(zeroext); 00598 00599 KEYWORD(type); 00600 KEYWORD(opaque); 00601 00602 KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle); 00603 KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge); 00604 KEYWORD(oeq); KEYWORD(one); KEYWORD(olt); KEYWORD(ogt); KEYWORD(ole); 00605 KEYWORD(oge); KEYWORD(ord); KEYWORD(uno); KEYWORD(ueq); KEYWORD(une); 00606 00607 KEYWORD(xchg); KEYWORD(nand); KEYWORD(max); KEYWORD(min); KEYWORD(umax); 00608 KEYWORD(umin); 00609 00610 KEYWORD(x); 00611 KEYWORD(blockaddress); 00612 00613 KEYWORD(personality); 00614 KEYWORD(cleanup); 00615 KEYWORD(catch); 00616 KEYWORD(filter); 00617 #undef KEYWORD 00618 00619 // Keywords for types. 00620 #define TYPEKEYWORD(STR, LLVMTY) \ 00621 if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \ 00622 TyVal = LLVMTY; return lltok::Type; } 00623 TYPEKEYWORD("void", Type::getVoidTy(Context)); 00624 TYPEKEYWORD("half", Type::getHalfTy(Context)); 00625 TYPEKEYWORD("float", Type::getFloatTy(Context)); 00626 TYPEKEYWORD("double", Type::getDoubleTy(Context)); 00627 TYPEKEYWORD("x86_fp80", Type::getX86_FP80Ty(Context)); 00628 TYPEKEYWORD("fp128", Type::getFP128Ty(Context)); 00629 TYPEKEYWORD("ppc_fp128", Type::getPPC_FP128Ty(Context)); 00630 TYPEKEYWORD("label", Type::getLabelTy(Context)); 00631 TYPEKEYWORD("metadata", Type::getMetadataTy(Context)); 00632 TYPEKEYWORD("x86_mmx", Type::getX86_MMXTy(Context)); 00633 #undef TYPEKEYWORD 00634 00635 // Keywords for instructions. 00636 #define INSTKEYWORD(STR, Enum) \ 00637 if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) { \ 00638 UIntVal = Instruction::Enum; return lltok::kw_##STR; } 00639 00640 INSTKEYWORD(add, Add); INSTKEYWORD(fadd, FAdd); 00641 INSTKEYWORD(sub, Sub); INSTKEYWORD(fsub, FSub); 00642 INSTKEYWORD(mul, Mul); INSTKEYWORD(fmul, FMul); 00643 INSTKEYWORD(udiv, UDiv); INSTKEYWORD(sdiv, SDiv); INSTKEYWORD(fdiv, FDiv); 00644 INSTKEYWORD(urem, URem); INSTKEYWORD(srem, SRem); INSTKEYWORD(frem, FRem); 00645 INSTKEYWORD(shl, Shl); INSTKEYWORD(lshr, LShr); INSTKEYWORD(ashr, AShr); 00646 INSTKEYWORD(and, And); INSTKEYWORD(or, Or); INSTKEYWORD(xor, Xor); 00647 INSTKEYWORD(icmp, ICmp); INSTKEYWORD(fcmp, FCmp); 00648 00649 INSTKEYWORD(phi, PHI); 00650 INSTKEYWORD(call, Call); 00651 INSTKEYWORD(trunc, Trunc); 00652 INSTKEYWORD(zext, ZExt); 00653 INSTKEYWORD(sext, SExt); 00654 INSTKEYWORD(fptrunc, FPTrunc); 00655 INSTKEYWORD(fpext, FPExt); 00656 INSTKEYWORD(uitofp, UIToFP); 00657 INSTKEYWORD(sitofp, SIToFP); 00658 INSTKEYWORD(fptoui, FPToUI); 00659 INSTKEYWORD(fptosi, FPToSI); 00660 INSTKEYWORD(inttoptr, IntToPtr); 00661 INSTKEYWORD(ptrtoint, PtrToInt); 00662 INSTKEYWORD(bitcast, BitCast); 00663 INSTKEYWORD(select, Select); 00664 INSTKEYWORD(va_arg, VAArg); 00665 INSTKEYWORD(ret, Ret); 00666 INSTKEYWORD(br, Br); 00667 INSTKEYWORD(switch, Switch); 00668 INSTKEYWORD(indirectbr, IndirectBr); 00669 INSTKEYWORD(invoke, Invoke); 00670 INSTKEYWORD(resume, Resume); 00671 INSTKEYWORD(unreachable, Unreachable); 00672 00673 INSTKEYWORD(alloca, Alloca); 00674 INSTKEYWORD(load, Load); 00675 INSTKEYWORD(store, Store); 00676 INSTKEYWORD(cmpxchg, AtomicCmpXchg); 00677 INSTKEYWORD(atomicrmw, AtomicRMW); 00678 INSTKEYWORD(fence, Fence); 00679 INSTKEYWORD(getelementptr, GetElementPtr); 00680 00681 INSTKEYWORD(extractelement, ExtractElement); 00682 INSTKEYWORD(insertelement, InsertElement); 00683 INSTKEYWORD(shufflevector, ShuffleVector); 00684 INSTKEYWORD(extractvalue, ExtractValue); 00685 INSTKEYWORD(insertvalue, InsertValue); 00686 INSTKEYWORD(landingpad, LandingPad); 00687 #undef INSTKEYWORD 00688 00689 // Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by 00690 // the CFE to avoid forcing it to deal with 64-bit numbers. 00691 if ((TokStart[0] == 'u' || TokStart[0] == 's') && 00692 TokStart[1] == '0' && TokStart[2] == 'x' && 00693 isxdigit(static_cast<unsigned char>(TokStart[3]))) { 00694 int len = CurPtr-TokStart-3; 00695 uint32_t bits = len * 4; 00696 APInt Tmp(bits, StringRef(TokStart+3, len), 16); 00697 uint32_t activeBits = Tmp.getActiveBits(); 00698 if (activeBits > 0 && activeBits < bits) 00699 Tmp = Tmp.trunc(activeBits); 00700 APSIntVal = APSInt(Tmp, TokStart[0] == 'u'); 00701 return lltok::APSInt; 00702 } 00703 00704 // If this is "cc1234", return this as just "cc". 00705 if (TokStart[0] == 'c' && TokStart[1] == 'c') { 00706 CurPtr = TokStart+2; 00707 return lltok::kw_cc; 00708 } 00709 00710 // Finally, if this isn't known, return an error. 00711 CurPtr = TokStart+1; 00712 return lltok::Error; 00713 } 00714 00715 00716 /// Lex0x: Handle productions that start with 0x, knowing that it matches and 00717 /// that this is not a label: 00718 /// HexFPConstant 0x[0-9A-Fa-f]+ 00719 /// HexFP80Constant 0xK[0-9A-Fa-f]+ 00720 /// HexFP128Constant 0xL[0-9A-Fa-f]+ 00721 /// HexPPC128Constant 0xM[0-9A-Fa-f]+ 00722 /// HexHalfConstant 0xH[0-9A-Fa-f]+ 00723 lltok::Kind LLLexer::Lex0x() { 00724 CurPtr = TokStart + 2; 00725 00726 char Kind; 00727 if ((CurPtr[0] >= 'K' && CurPtr[0] <= 'M') || CurPtr[0] == 'H') { 00728 Kind = *CurPtr++; 00729 } else { 00730 Kind = 'J'; 00731 } 00732 00733 if (!isxdigit(static_cast<unsigned char>(CurPtr[0]))) { 00734 // Bad token, return it as an error. 00735 CurPtr = TokStart+1; 00736 return lltok::Error; 00737 } 00738 00739 while (isxdigit(static_cast<unsigned char>(CurPtr[0]))) 00740 ++CurPtr; 00741 00742 if (Kind == 'J') { 00743 // HexFPConstant - Floating point constant represented in IEEE format as a 00744 // hexadecimal number for when exponential notation is not precise enough. 00745 // Half, Float, and double only. 00746 APFloatVal = APFloat(BitsToDouble(HexIntToVal(TokStart+2, CurPtr))); 00747 return lltok::APFloat; 00748 } 00749 00750 uint64_t Pair[2]; 00751 switch (Kind) { 00752 default: llvm_unreachable("Unknown kind!"); 00753 case 'K': 00754 // F80HexFPConstant - x87 long double in hexadecimal format (10 bytes) 00755 FP80HexToIntPair(TokStart+3, CurPtr, Pair); 00756 APFloatVal = APFloat(APFloat::x87DoubleExtended, APInt(80, Pair)); 00757 return lltok::APFloat; 00758 case 'L': 00759 // F128HexFPConstant - IEEE 128-bit in hexadecimal format (16 bytes) 00760 HexToIntPair(TokStart+3, CurPtr, Pair); 00761 APFloatVal = APFloat(APFloat::IEEEquad, APInt(128, Pair)); 00762 return lltok::APFloat; 00763 case 'M': 00764 // PPC128HexFPConstant - PowerPC 128-bit in hexadecimal format (16 bytes) 00765 HexToIntPair(TokStart+3, CurPtr, Pair); 00766 APFloatVal = APFloat(APFloat::PPCDoubleDouble, APInt(128, Pair)); 00767 return lltok::APFloat; 00768 case 'H': 00769 APFloatVal = APFloat(APFloat::IEEEhalf, 00770 APInt(16,HexIntToVal(TokStart+3, CurPtr))); 00771 return lltok::APFloat; 00772 } 00773 } 00774 00775 /// LexIdentifier: Handle several related productions: 00776 /// Label [-a-zA-Z$._0-9]+: 00777 /// NInteger -[0-9]+ 00778 /// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)? 00779 /// PInteger [0-9]+ 00780 /// HexFPConstant 0x[0-9A-Fa-f]+ 00781 /// HexFP80Constant 0xK[0-9A-Fa-f]+ 00782 /// HexFP128Constant 0xL[0-9A-Fa-f]+ 00783 /// HexPPC128Constant 0xM[0-9A-Fa-f]+ 00784 lltok::Kind LLLexer::LexDigitOrNegative() { 00785 // If the letter after the negative is not a number, this is probably a label. 00786 if (!isdigit(static_cast<unsigned char>(TokStart[0])) && 00787 !isdigit(static_cast<unsigned char>(CurPtr[0]))) { 00788 // Okay, this is not a number after the -, it's probably a label. 00789 if (const char *End = isLabelTail(CurPtr)) { 00790 StrVal.assign(TokStart, End-1); 00791 CurPtr = End; 00792 return lltok::LabelStr; 00793 } 00794 00795 return lltok::Error; 00796 } 00797 00798 // At this point, it is either a label, int or fp constant. 00799 00800 // Skip digits, we have at least one. 00801 for (; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr) 00802 /*empty*/; 00803 00804 // Check to see if this really is a label afterall, e.g. "-1:". 00805 if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') { 00806 if (const char *End = isLabelTail(CurPtr)) { 00807 StrVal.assign(TokStart, End-1); 00808 CurPtr = End; 00809 return lltok::LabelStr; 00810 } 00811 } 00812 00813 // If the next character is a '.', then it is a fp value, otherwise its 00814 // integer. 00815 if (CurPtr[0] != '.') { 00816 if (TokStart[0] == '0' && TokStart[1] == 'x') 00817 return Lex0x(); 00818 unsigned Len = CurPtr-TokStart; 00819 uint32_t numBits = ((Len * 64) / 19) + 2; 00820 APInt Tmp(numBits, StringRef(TokStart, Len), 10); 00821 if (TokStart[0] == '-') { 00822 uint32_t minBits = Tmp.getMinSignedBits(); 00823 if (minBits > 0 && minBits < numBits) 00824 Tmp = Tmp.trunc(minBits); 00825 APSIntVal = APSInt(Tmp, false); 00826 } else { 00827 uint32_t activeBits = Tmp.getActiveBits(); 00828 if (activeBits > 0 && activeBits < numBits) 00829 Tmp = Tmp.trunc(activeBits); 00830 APSIntVal = APSInt(Tmp, true); 00831 } 00832 return lltok::APSInt; 00833 } 00834 00835 ++CurPtr; 00836 00837 // Skip over [0-9]*([eE][-+]?[0-9]+)? 00838 while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr; 00839 00840 if (CurPtr[0] == 'e' || CurPtr[0] == 'E') { 00841 if (isdigit(static_cast<unsigned char>(CurPtr[1])) || 00842 ((CurPtr[1] == '-' || CurPtr[1] == '+') && 00843 isdigit(static_cast<unsigned char>(CurPtr[2])))) { 00844 CurPtr += 2; 00845 while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr; 00846 } 00847 } 00848 00849 APFloatVal = APFloat(std::atof(TokStart)); 00850 return lltok::APFloat; 00851 } 00852 00853 /// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)? 00854 lltok::Kind LLLexer::LexPositive() { 00855 // If the letter after the negative is a number, this is probably not a 00856 // label. 00857 if (!isdigit(static_cast<unsigned char>(CurPtr[0]))) 00858 return lltok::Error; 00859 00860 // Skip digits. 00861 for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr) 00862 /*empty*/; 00863 00864 // At this point, we need a '.'. 00865 if (CurPtr[0] != '.') { 00866 CurPtr = TokStart+1; 00867 return lltok::Error; 00868 } 00869 00870 ++CurPtr; 00871 00872 // Skip over [0-9]*([eE][-+]?[0-9]+)? 00873 while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr; 00874 00875 if (CurPtr[0] == 'e' || CurPtr[0] == 'E') { 00876 if (isdigit(static_cast<unsigned char>(CurPtr[1])) || 00877 ((CurPtr[1] == '-' || CurPtr[1] == '+') && 00878 isdigit(static_cast<unsigned char>(CurPtr[2])))) { 00879 CurPtr += 2; 00880 while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr; 00881 } 00882 } 00883 00884 APFloatVal = APFloat(std::atof(TokStart)); 00885 return lltok::APFloat; 00886 }