LLVM  3.7.0
LLLexer.cpp
Go to the documentation of this file.
1 //===- LLLexer.cpp - Lexer for .ll Files ----------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Implement the Lexer for .ll files.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "LLLexer.h"
15 #include "llvm/ADT/StringExtras.h"
16 #include "llvm/ADT/Twine.h"
17 #include "llvm/AsmParser/Parser.h"
18 #include "llvm/IR/DerivedTypes.h"
19 #include "llvm/IR/Instruction.h"
20 #include "llvm/IR/LLVMContext.h"
24 #include "llvm/Support/SourceMgr.h"
26 #include <cctype>
27 #include <cstdio>
28 #include <cstdlib>
29 #include <cstring>
30 using namespace llvm;
31 
32 bool LLLexer::Error(LocTy ErrorLoc, const Twine &Msg) const {
33  ErrorInfo = SM.GetMessage(ErrorLoc, SourceMgr::DK_Error, Msg);
34  return true;
35 }
36 
37 void LLLexer::Warning(LocTy WarningLoc, const Twine &Msg) const {
38  SM.PrintMessage(WarningLoc, SourceMgr::DK_Warning, Msg);
39 }
40 
41 //===----------------------------------------------------------------------===//
42 // Helper functions.
43 //===----------------------------------------------------------------------===//
44 
45 // atoull - Convert an ascii string of decimal digits into the unsigned long
46 // long representation... this does not have to do input error checking,
47 // because we know that the input will be matched by a suitable regex...
48 //
49 uint64_t LLLexer::atoull(const char *Buffer, const char *End) {
50  uint64_t Result = 0;
51  for (; Buffer != End; Buffer++) {
52  uint64_t OldRes = Result;
53  Result *= 10;
54  Result += *Buffer-'0';
55  if (Result < OldRes) { // Uh, oh, overflow detected!!!
56  Error("constant bigger than 64 bits detected!");
57  return 0;
58  }
59  }
60  return Result;
61 }
62 
63 uint64_t LLLexer::HexIntToVal(const char *Buffer, const char *End) {
64  uint64_t Result = 0;
65  for (; Buffer != End; ++Buffer) {
66  uint64_t OldRes = Result;
67  Result *= 16;
68  Result += hexDigitValue(*Buffer);
69 
70  if (Result < OldRes) { // Uh, oh, overflow detected!!!
71  Error("constant bigger than 64 bits detected!");
72  return 0;
73  }
74  }
75  return Result;
76 }
77 
78 void LLLexer::HexToIntPair(const char *Buffer, const char *End,
79  uint64_t Pair[2]) {
80  Pair[0] = 0;
81  if (End - Buffer >= 16) {
82  for (int i = 0; i < 16; i++, Buffer++) {
83  assert(Buffer != End);
84  Pair[0] *= 16;
85  Pair[0] += hexDigitValue(*Buffer);
86  }
87  }
88  Pair[1] = 0;
89  for (int i = 0; i < 16 && Buffer != End; i++, Buffer++) {
90  Pair[1] *= 16;
91  Pair[1] += hexDigitValue(*Buffer);
92  }
93  if (Buffer != End)
94  Error("constant bigger than 128 bits detected!");
95 }
96 
97 /// FP80HexToIntPair - translate an 80 bit FP80 number (20 hexits) into
98 /// { low64, high16 } as usual for an APInt.
99 void LLLexer::FP80HexToIntPair(const char *Buffer, const char *End,
100  uint64_t Pair[2]) {
101  Pair[1] = 0;
102  for (int i=0; i<4 && Buffer != End; i++, Buffer++) {
103  assert(Buffer != End);
104  Pair[1] *= 16;
105  Pair[1] += hexDigitValue(*Buffer);
106  }
107  Pair[0] = 0;
108  for (int i=0; i<16; i++, Buffer++) {
109  Pair[0] *= 16;
110  Pair[0] += hexDigitValue(*Buffer);
111  }
112  if (Buffer != End)
113  Error("constant bigger than 128 bits detected!");
114 }
115 
116 // UnEscapeLexed - Run through the specified buffer and change \xx codes to the
117 // appropriate character.
118 static void UnEscapeLexed(std::string &Str) {
119  if (Str.empty()) return;
120 
121  char *Buffer = &Str[0], *EndBuffer = Buffer+Str.size();
122  char *BOut = Buffer;
123  for (char *BIn = Buffer; BIn != EndBuffer; ) {
124  if (BIn[0] == '\\') {
125  if (BIn < EndBuffer-1 && BIn[1] == '\\') {
126  *BOut++ = '\\'; // Two \ becomes one
127  BIn += 2;
128  } else if (BIn < EndBuffer-2 &&
129  isxdigit(static_cast<unsigned char>(BIn[1])) &&
130  isxdigit(static_cast<unsigned char>(BIn[2]))) {
131  *BOut = hexDigitValue(BIn[1]) * 16 + hexDigitValue(BIn[2]);
132  BIn += 3; // Skip over handled chars
133  ++BOut;
134  } else {
135  *BOut++ = *BIn++;
136  }
137  } else {
138  *BOut++ = *BIn++;
139  }
140  }
141  Str.resize(BOut-Buffer);
142 }
143 
144 /// isLabelChar - Return true for [-a-zA-Z$._0-9].
145 static bool isLabelChar(char C) {
146  return isalnum(static_cast<unsigned char>(C)) || C == '-' || C == '$' ||
147  C == '.' || C == '_';
148 }
149 
150 
151 /// isLabelTail - Return true if this pointer points to a valid end of a label.
152 static const char *isLabelTail(const char *CurPtr) {
153  while (1) {
154  if (CurPtr[0] == ':') return CurPtr+1;
155  if (!isLabelChar(CurPtr[0])) return nullptr;
156  ++CurPtr;
157  }
158 }
159 
160 
161 
162 //===----------------------------------------------------------------------===//
163 // Lexer definition.
164 //===----------------------------------------------------------------------===//
165 
167  LLVMContext &C)
168  : CurBuf(StartBuf), ErrorInfo(Err), SM(sm), Context(C), APFloatVal(0.0) {
169  CurPtr = CurBuf.begin();
170 }
171 
172 int LLLexer::getNextChar() {
173  char CurChar = *CurPtr++;
174  switch (CurChar) {
175  default: return (unsigned char)CurChar;
176  case 0:
177  // A nul character in the stream is either the end of the current buffer or
178  // a random nul in the file. Disambiguate that here.
179  if (CurPtr-1 != CurBuf.end())
180  return 0; // Just whitespace.
181 
182  // Otherwise, return end of file.
183  --CurPtr; // Another call to lex will return EOF again.
184  return EOF;
185  }
186 }
187 
188 
189 lltok::Kind LLLexer::LexToken() {
190  TokStart = CurPtr;
191 
192  int CurChar = getNextChar();
193  switch (CurChar) {
194  default:
195  // Handle letters: [a-zA-Z_]
196  if (isalpha(static_cast<unsigned char>(CurChar)) || CurChar == '_')
197  return LexIdentifier();
198 
199  return lltok::Error;
200  case EOF: return lltok::Eof;
201  case 0:
202  case ' ':
203  case '\t':
204  case '\n':
205  case '\r':
206  // Ignore whitespace.
207  return LexToken();
208  case '+': return LexPositive();
209  case '@': return LexAt();
210  case '$': return LexDollar();
211  case '%': return LexPercent();
212  case '"': return LexQuote();
213  case '.':
214  if (const char *Ptr = isLabelTail(CurPtr)) {
215  CurPtr = Ptr;
216  StrVal.assign(TokStart, CurPtr-1);
217  return lltok::LabelStr;
218  }
219  if (CurPtr[0] == '.' && CurPtr[1] == '.') {
220  CurPtr += 2;
221  return lltok::dotdotdot;
222  }
223  return lltok::Error;
224  case ';':
225  SkipLineComment();
226  return LexToken();
227  case '!': return LexExclaim();
228  case '#': return LexHash();
229  case '0': case '1': case '2': case '3': case '4':
230  case '5': case '6': case '7': case '8': case '9':
231  case '-':
232  return LexDigitOrNegative();
233  case '=': return lltok::equal;
234  case '[': return lltok::lsquare;
235  case ']': return lltok::rsquare;
236  case '{': return lltok::lbrace;
237  case '}': return lltok::rbrace;
238  case '<': return lltok::less;
239  case '>': return lltok::greater;
240  case '(': return lltok::lparen;
241  case ')': return lltok::rparen;
242  case ',': return lltok::comma;
243  case '*': return lltok::star;
244  case '|': return lltok::bar;
245  }
246 }
247 
248 void LLLexer::SkipLineComment() {
249  while (1) {
250  if (CurPtr[0] == '\n' || CurPtr[0] == '\r' || getNextChar() == EOF)
251  return;
252  }
253 }
254 
255 /// Lex all tokens that start with an @ character.
256 /// GlobalVar @\"[^\"]*\"
257 /// GlobalVar @[-a-zA-Z$._][-a-zA-Z$._0-9]*
258 /// GlobalVarID @[0-9]+
259 lltok::Kind LLLexer::LexAt() {
260  return LexVar(lltok::GlobalVar, lltok::GlobalID);
261 }
262 
263 lltok::Kind LLLexer::LexDollar() {
264  if (const char *Ptr = isLabelTail(TokStart)) {
265  CurPtr = Ptr;
266  StrVal.assign(TokStart, CurPtr - 1);
267  return lltok::LabelStr;
268  }
269 
270  // Handle DollarStringConstant: $\"[^\"]*\"
271  if (CurPtr[0] == '"') {
272  ++CurPtr;
273 
274  while (1) {
275  int CurChar = getNextChar();
276 
277  if (CurChar == EOF) {
278  Error("end of file in COMDAT variable name");
279  return lltok::Error;
280  }
281  if (CurChar == '"') {
282  StrVal.assign(TokStart + 2, CurPtr - 1);
283  UnEscapeLexed(StrVal);
284  if (StringRef(StrVal).find_first_of(0) != StringRef::npos) {
285  Error("Null bytes are not allowed in names");
286  return lltok::Error;
287  }
288  return lltok::ComdatVar;
289  }
290  }
291  }
292 
293  // Handle ComdatVarName: $[-a-zA-Z$._][-a-zA-Z$._0-9]*
294  if (ReadVarName())
295  return lltok::ComdatVar;
296 
297  return lltok::Error;
298 }
299 
300 /// ReadString - Read a string until the closing quote.
301 lltok::Kind LLLexer::ReadString(lltok::Kind kind) {
302  const char *Start = CurPtr;
303  while (1) {
304  int CurChar = getNextChar();
305 
306  if (CurChar == EOF) {
307  Error("end of file in string constant");
308  return lltok::Error;
309  }
310  if (CurChar == '"') {
311  StrVal.assign(Start, CurPtr-1);
312  UnEscapeLexed(StrVal);
313  return kind;
314  }
315  }
316 }
317 
318 /// ReadVarName - Read the rest of a token containing a variable name.
319 bool LLLexer::ReadVarName() {
320  const char *NameStart = CurPtr;
321  if (isalpha(static_cast<unsigned char>(CurPtr[0])) ||
322  CurPtr[0] == '-' || CurPtr[0] == '$' ||
323  CurPtr[0] == '.' || CurPtr[0] == '_') {
324  ++CurPtr;
325  while (isalnum(static_cast<unsigned char>(CurPtr[0])) ||
326  CurPtr[0] == '-' || CurPtr[0] == '$' ||
327  CurPtr[0] == '.' || CurPtr[0] == '_')
328  ++CurPtr;
329 
330  StrVal.assign(NameStart, CurPtr);
331  return true;
332  }
333  return false;
334 }
335 
336 lltok::Kind LLLexer::LexVar(lltok::Kind Var, lltok::Kind VarID) {
337  // Handle StringConstant: \"[^\"]*\"
338  if (CurPtr[0] == '"') {
339  ++CurPtr;
340 
341  while (1) {
342  int CurChar = getNextChar();
343 
344  if (CurChar == EOF) {
345  Error("end of file in global variable name");
346  return lltok::Error;
347  }
348  if (CurChar == '"') {
349  StrVal.assign(TokStart+2, CurPtr-1);
350  UnEscapeLexed(StrVal);
351  if (StringRef(StrVal).find_first_of(0) != StringRef::npos) {
352  Error("Null bytes are not allowed in names");
353  return lltok::Error;
354  }
355  return Var;
356  }
357  }
358  }
359 
360  // Handle VarName: [-a-zA-Z$._][-a-zA-Z$._0-9]*
361  if (ReadVarName())
362  return Var;
363 
364  // Handle VarID: [0-9]+
365  if (isdigit(static_cast<unsigned char>(CurPtr[0]))) {
366  for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
367  /*empty*/;
368 
369  uint64_t Val = atoull(TokStart+1, CurPtr);
370  if ((unsigned)Val != Val)
371  Error("invalid value number (too large)!");
372  UIntVal = unsigned(Val);
373  return VarID;
374  }
375  return lltok::Error;
376 }
377 
378 /// Lex all tokens that start with a % character.
379 /// LocalVar ::= %\"[^\"]*\"
380 /// LocalVar ::= %[-a-zA-Z$._][-a-zA-Z$._0-9]*
381 /// LocalVarID ::= %[0-9]+
382 lltok::Kind LLLexer::LexPercent() {
383  return LexVar(lltok::LocalVar, lltok::LocalVarID);
384 }
385 
386 /// Lex all tokens that start with a " character.
387 /// QuoteLabel "[^"]+":
388 /// StringConstant "[^"]*"
389 lltok::Kind LLLexer::LexQuote() {
390  lltok::Kind kind = ReadString(lltok::StringConstant);
391  if (kind == lltok::Error || kind == lltok::Eof)
392  return kind;
393 
394  if (CurPtr[0] == ':') {
395  ++CurPtr;
396  if (StringRef(StrVal).find_first_of(0) != StringRef::npos) {
397  Error("Null bytes are not allowed in names");
398  kind = lltok::Error;
399  } else {
400  kind = lltok::LabelStr;
401  }
402  }
403 
404  return kind;
405 }
406 
407 /// Lex all tokens that start with a ! character.
408 /// !foo
409 /// !
410 lltok::Kind LLLexer::LexExclaim() {
411  // Lex a metadata name as a MetadataVar.
412  if (isalpha(static_cast<unsigned char>(CurPtr[0])) ||
413  CurPtr[0] == '-' || CurPtr[0] == '$' ||
414  CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\') {
415  ++CurPtr;
416  while (isalnum(static_cast<unsigned char>(CurPtr[0])) ||
417  CurPtr[0] == '-' || CurPtr[0] == '$' ||
418  CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\')
419  ++CurPtr;
420 
421  StrVal.assign(TokStart+1, CurPtr); // Skip !
422  UnEscapeLexed(StrVal);
423  return lltok::MetadataVar;
424  }
425  return lltok::exclaim;
426 }
427 
428 /// Lex all tokens that start with a # character.
429 /// AttrGrpID ::= #[0-9]+
430 lltok::Kind LLLexer::LexHash() {
431  // Handle AttrGrpID: #[0-9]+
432  if (isdigit(static_cast<unsigned char>(CurPtr[0]))) {
433  for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
434  /*empty*/;
435 
436  uint64_t Val = atoull(TokStart+1, CurPtr);
437  if ((unsigned)Val != Val)
438  Error("invalid value number (too large)!");
439  UIntVal = unsigned(Val);
440  return lltok::AttrGrpID;
441  }
442 
443  return lltok::Error;
444 }
445 
446 /// Lex a label, integer type, keyword, or hexadecimal integer constant.
447 /// Label [-a-zA-Z$._0-9]+:
448 /// IntegerType i[0-9]+
449 /// Keyword sdiv, float, ...
450 /// HexIntConstant [us]0x[0-9A-Fa-f]+
451 lltok::Kind LLLexer::LexIdentifier() {
452  const char *StartChar = CurPtr;
453  const char *IntEnd = CurPtr[-1] == 'i' ? nullptr : StartChar;
454  const char *KeywordEnd = nullptr;
455 
456  for (; isLabelChar(*CurPtr); ++CurPtr) {
457  // If we decide this is an integer, remember the end of the sequence.
458  if (!IntEnd && !isdigit(static_cast<unsigned char>(*CurPtr)))
459  IntEnd = CurPtr;
460  if (!KeywordEnd && !isalnum(static_cast<unsigned char>(*CurPtr)) &&
461  *CurPtr != '_')
462  KeywordEnd = CurPtr;
463  }
464 
465  // If we stopped due to a colon, this really is a label.
466  if (*CurPtr == ':') {
467  StrVal.assign(StartChar-1, CurPtr++);
468  return lltok::LabelStr;
469  }
470 
471  // Otherwise, this wasn't a label. If this was valid as an integer type,
472  // return it.
473  if (!IntEnd) IntEnd = CurPtr;
474  if (IntEnd != StartChar) {
475  CurPtr = IntEnd;
476  uint64_t NumBits = atoull(StartChar, CurPtr);
477  if (NumBits < IntegerType::MIN_INT_BITS ||
478  NumBits > IntegerType::MAX_INT_BITS) {
479  Error("bitwidth for integer type out of range!");
480  return lltok::Error;
481  }
482  TyVal = IntegerType::get(Context, NumBits);
483  return lltok::Type;
484  }
485 
486  // Otherwise, this was a letter sequence. See which keyword this is.
487  if (!KeywordEnd) KeywordEnd = CurPtr;
488  CurPtr = KeywordEnd;
489  --StartChar;
490  StringRef Keyword(StartChar, CurPtr - StartChar);
491 #define KEYWORD(STR) \
492  do { \
493  if (Keyword == #STR) \
494  return lltok::kw_##STR; \
495  } while (0)
496 
497  KEYWORD(true); KEYWORD(false);
498  KEYWORD(declare); KEYWORD(define);
499  KEYWORD(global); KEYWORD(constant);
500 
501  KEYWORD(private);
502  KEYWORD(internal);
503  KEYWORD(available_externally);
504  KEYWORD(linkonce);
505  KEYWORD(linkonce_odr);
506  KEYWORD(weak); // Use as a linkage, and a modifier for "cmpxchg".
507  KEYWORD(weak_odr);
508  KEYWORD(appending);
509  KEYWORD(dllimport);
510  KEYWORD(dllexport);
511  KEYWORD(common);
512  KEYWORD(default);
513  KEYWORD(hidden);
514  KEYWORD(protected);
515  KEYWORD(unnamed_addr);
516  KEYWORD(externally_initialized);
517  KEYWORD(extern_weak);
518  KEYWORD(external);
519  KEYWORD(thread_local);
520  KEYWORD(localdynamic);
521  KEYWORD(initialexec);
522  KEYWORD(localexec);
523  KEYWORD(zeroinitializer);
524  KEYWORD(undef);
525  KEYWORD(null);
526  KEYWORD(to);
527  KEYWORD(tail);
528  KEYWORD(musttail);
529  KEYWORD(target);
530  KEYWORD(triple);
531  KEYWORD(unwind);
532  KEYWORD(deplibs); // FIXME: Remove in 4.0.
533  KEYWORD(datalayout);
534  KEYWORD(volatile);
535  KEYWORD(atomic);
536  KEYWORD(unordered);
537  KEYWORD(monotonic);
538  KEYWORD(acquire);
539  KEYWORD(release);
540  KEYWORD(acq_rel);
541  KEYWORD(seq_cst);
542  KEYWORD(singlethread);
543 
544  KEYWORD(nnan);
545  KEYWORD(ninf);
546  KEYWORD(nsz);
547  KEYWORD(arcp);
548  KEYWORD(fast);
549  KEYWORD(nuw);
550  KEYWORD(nsw);
551  KEYWORD(exact);
552  KEYWORD(inbounds);
553  KEYWORD(align);
554  KEYWORD(addrspace);
555  KEYWORD(section);
556  KEYWORD(alias);
557  KEYWORD(module);
558  KEYWORD(asm);
559  KEYWORD(sideeffect);
560  KEYWORD(alignstack);
561  KEYWORD(inteldialect);
562  KEYWORD(gc);
563  KEYWORD(prefix);
564  KEYWORD(prologue);
565 
566  KEYWORD(ccc);
567  KEYWORD(fastcc);
568  KEYWORD(coldcc);
569  KEYWORD(x86_stdcallcc);
570  KEYWORD(x86_fastcallcc);
571  KEYWORD(x86_thiscallcc);
572  KEYWORD(x86_vectorcallcc);
573  KEYWORD(arm_apcscc);
574  KEYWORD(arm_aapcscc);
575  KEYWORD(arm_aapcs_vfpcc);
576  KEYWORD(msp430_intrcc);
577  KEYWORD(ptx_kernel);
578  KEYWORD(ptx_device);
579  KEYWORD(spir_kernel);
580  KEYWORD(spir_func);
581  KEYWORD(intel_ocl_bicc);
582  KEYWORD(x86_64_sysvcc);
583  KEYWORD(x86_64_win64cc);
584  KEYWORD(webkit_jscc);
585  KEYWORD(anyregcc);
586  KEYWORD(preserve_mostcc);
587  KEYWORD(preserve_allcc);
588  KEYWORD(ghccc);
589 
590  KEYWORD(cc);
591  KEYWORD(c);
592 
594 
595  KEYWORD(alwaysinline);
596  KEYWORD(argmemonly);
597  KEYWORD(builtin);
598  KEYWORD(byval);
599  KEYWORD(inalloca);
600  KEYWORD(cold);
601  KEYWORD(convergent);
602  KEYWORD(dereferenceable);
603  KEYWORD(dereferenceable_or_null);
604  KEYWORD(inlinehint);
605  KEYWORD(inreg);
606  KEYWORD(jumptable);
607  KEYWORD(minsize);
608  KEYWORD(naked);
609  KEYWORD(nest);
610  KEYWORD(noalias);
611  KEYWORD(nobuiltin);
612  KEYWORD(nocapture);
613  KEYWORD(noduplicate);
614  KEYWORD(noimplicitfloat);
615  KEYWORD(noinline);
616  KEYWORD(nonlazybind);
617  KEYWORD(nonnull);
618  KEYWORD(noredzone);
619  KEYWORD(noreturn);
620  KEYWORD(nounwind);
621  KEYWORD(optnone);
622  KEYWORD(optsize);
623  KEYWORD(readnone);
624  KEYWORD(readonly);
625  KEYWORD(returned);
626  KEYWORD(returns_twice);
627  KEYWORD(signext);
628  KEYWORD(sret);
629  KEYWORD(ssp);
630  KEYWORD(sspreq);
631  KEYWORD(sspstrong);
632  KEYWORD(safestack);
633  KEYWORD(sanitize_address);
634  KEYWORD(sanitize_thread);
635  KEYWORD(sanitize_memory);
636  KEYWORD(uwtable);
637  KEYWORD(zeroext);
638 
639  KEYWORD(type);
640  KEYWORD(opaque);
641 
642  KEYWORD(comdat);
643 
644  // Comdat types
645  KEYWORD(any);
646  KEYWORD(exactmatch);
647  KEYWORD(largest);
648  KEYWORD(noduplicates);
649  KEYWORD(samesize);
650 
651  KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle);
652  KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge);
653  KEYWORD(oeq); KEYWORD(one); KEYWORD(olt); KEYWORD(ogt); KEYWORD(ole);
654  KEYWORD(oge); KEYWORD(ord); KEYWORD(uno); KEYWORD(ueq); KEYWORD(une);
655 
656  KEYWORD(xchg); KEYWORD(nand); KEYWORD(max); KEYWORD(min); KEYWORD(umax);
657  KEYWORD(umin);
658 
659  KEYWORD(x);
660  KEYWORD(blockaddress);
661 
662  // Metadata types.
663  KEYWORD(distinct);
664 
665  // Use-list order directives.
666  KEYWORD(uselistorder);
667  KEYWORD(uselistorder_bb);
668 
669  KEYWORD(personality);
670  KEYWORD(cleanup);
671  KEYWORD(catch);
672  KEYWORD(filter);
673 #undef KEYWORD
674 
675  // Keywords for types.
676 #define TYPEKEYWORD(STR, LLVMTY) \
677  do { \
678  if (Keyword == STR) { \
679  TyVal = LLVMTY; \
680  return lltok::Type; \
681  } \
682  } while (false)
683  TYPEKEYWORD("void", Type::getVoidTy(Context));
684  TYPEKEYWORD("half", Type::getHalfTy(Context));
685  TYPEKEYWORD("float", Type::getFloatTy(Context));
686  TYPEKEYWORD("double", Type::getDoubleTy(Context));
687  TYPEKEYWORD("x86_fp80", Type::getX86_FP80Ty(Context));
688  TYPEKEYWORD("fp128", Type::getFP128Ty(Context));
689  TYPEKEYWORD("ppc_fp128", Type::getPPC_FP128Ty(Context));
690  TYPEKEYWORD("label", Type::getLabelTy(Context));
691  TYPEKEYWORD("metadata", Type::getMetadataTy(Context));
692  TYPEKEYWORD("x86_mmx", Type::getX86_MMXTy(Context));
693 #undef TYPEKEYWORD
694 
695  // Keywords for instructions.
696 #define INSTKEYWORD(STR, Enum) \
697  do { \
698  if (Keyword == #STR) { \
699  UIntVal = Instruction::Enum; \
700  return lltok::kw_##STR; \
701  } \
702  } while (false)
703 
704  INSTKEYWORD(add, Add); INSTKEYWORD(fadd, FAdd);
705  INSTKEYWORD(sub, Sub); INSTKEYWORD(fsub, FSub);
706  INSTKEYWORD(mul, Mul); INSTKEYWORD(fmul, FMul);
707  INSTKEYWORD(udiv, UDiv); INSTKEYWORD(sdiv, SDiv); INSTKEYWORD(fdiv, FDiv);
708  INSTKEYWORD(urem, URem); INSTKEYWORD(srem, SRem); INSTKEYWORD(frem, FRem);
709  INSTKEYWORD(shl, Shl); INSTKEYWORD(lshr, LShr); INSTKEYWORD(ashr, AShr);
710  INSTKEYWORD(and, And); INSTKEYWORD(or, Or); INSTKEYWORD(xor, Xor);
711  INSTKEYWORD(icmp, ICmp); INSTKEYWORD(fcmp, FCmp);
712 
713  INSTKEYWORD(phi, PHI);
714  INSTKEYWORD(call, Call);
715  INSTKEYWORD(trunc, Trunc);
716  INSTKEYWORD(zext, ZExt);
717  INSTKEYWORD(sext, SExt);
718  INSTKEYWORD(fptrunc, FPTrunc);
719  INSTKEYWORD(fpext, FPExt);
720  INSTKEYWORD(uitofp, UIToFP);
721  INSTKEYWORD(sitofp, SIToFP);
722  INSTKEYWORD(fptoui, FPToUI);
723  INSTKEYWORD(fptosi, FPToSI);
724  INSTKEYWORD(inttoptr, IntToPtr);
725  INSTKEYWORD(ptrtoint, PtrToInt);
726  INSTKEYWORD(bitcast, BitCast);
727  INSTKEYWORD(addrspacecast, AddrSpaceCast);
728  INSTKEYWORD(select, Select);
729  INSTKEYWORD(va_arg, VAArg);
730  INSTKEYWORD(ret, Ret);
731  INSTKEYWORD(br, Br);
732  INSTKEYWORD(switch, Switch);
733  INSTKEYWORD(indirectbr, IndirectBr);
734  INSTKEYWORD(invoke, Invoke);
735  INSTKEYWORD(resume, Resume);
736  INSTKEYWORD(unreachable, Unreachable);
737 
738  INSTKEYWORD(alloca, Alloca);
739  INSTKEYWORD(load, Load);
740  INSTKEYWORD(store, Store);
741  INSTKEYWORD(cmpxchg, AtomicCmpXchg);
742  INSTKEYWORD(atomicrmw, AtomicRMW);
743  INSTKEYWORD(fence, Fence);
744  INSTKEYWORD(getelementptr, GetElementPtr);
745 
746  INSTKEYWORD(extractelement, ExtractElement);
747  INSTKEYWORD(insertelement, InsertElement);
748  INSTKEYWORD(shufflevector, ShuffleVector);
749  INSTKEYWORD(extractvalue, ExtractValue);
750  INSTKEYWORD(insertvalue, InsertValue);
751  INSTKEYWORD(landingpad, LandingPad);
752 #undef INSTKEYWORD
753 
754 #define DWKEYWORD(TYPE, TOKEN) \
755  do { \
756  if (Keyword.startswith("DW_" #TYPE "_")) { \
757  StrVal.assign(Keyword.begin(), Keyword.end()); \
758  return lltok::TOKEN; \
759  } \
760  } while (false)
761  DWKEYWORD(TAG, DwarfTag);
763  DWKEYWORD(VIRTUALITY, DwarfVirtuality);
764  DWKEYWORD(LANG, DwarfLang);
765  DWKEYWORD(OP, DwarfOp);
766 #undef DWKEYWORD
767 
768  if (Keyword.startswith("DIFlag")) {
769  StrVal.assign(Keyword.begin(), Keyword.end());
770  return lltok::DIFlag;
771  }
772 
773  // Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by
774  // the CFE to avoid forcing it to deal with 64-bit numbers.
775  if ((TokStart[0] == 'u' || TokStart[0] == 's') &&
776  TokStart[1] == '0' && TokStart[2] == 'x' &&
777  isxdigit(static_cast<unsigned char>(TokStart[3]))) {
778  int len = CurPtr-TokStart-3;
779  uint32_t bits = len * 4;
780  StringRef HexStr(TokStart + 3, len);
781  if (!std::all_of(HexStr.begin(), HexStr.end(), isxdigit)) {
782  // Bad token, return it as an error.
783  CurPtr = TokStart+3;
784  return lltok::Error;
785  }
786  APInt Tmp(bits, HexStr, 16);
787  uint32_t activeBits = Tmp.getActiveBits();
788  if (activeBits > 0 && activeBits < bits)
789  Tmp = Tmp.trunc(activeBits);
790  APSIntVal = APSInt(Tmp, TokStart[0] == 'u');
791  return lltok::APSInt;
792  }
793 
794  // If this is "cc1234", return this as just "cc".
795  if (TokStart[0] == 'c' && TokStart[1] == 'c') {
796  CurPtr = TokStart+2;
797  return lltok::kw_cc;
798  }
799 
800  // Finally, if this isn't known, return an error.
801  CurPtr = TokStart+1;
802  return lltok::Error;
803 }
804 
805 /// Lex all tokens that start with a 0x prefix, knowing they match and are not
806 /// labels.
807 /// HexFPConstant 0x[0-9A-Fa-f]+
808 /// HexFP80Constant 0xK[0-9A-Fa-f]+
809 /// HexFP128Constant 0xL[0-9A-Fa-f]+
810 /// HexPPC128Constant 0xM[0-9A-Fa-f]+
811 /// HexHalfConstant 0xH[0-9A-Fa-f]+
812 lltok::Kind LLLexer::Lex0x() {
813  CurPtr = TokStart + 2;
814 
815  char Kind;
816  if ((CurPtr[0] >= 'K' && CurPtr[0] <= 'M') || CurPtr[0] == 'H') {
817  Kind = *CurPtr++;
818  } else {
819  Kind = 'J';
820  }
821 
822  if (!isxdigit(static_cast<unsigned char>(CurPtr[0]))) {
823  // Bad token, return it as an error.
824  CurPtr = TokStart+1;
825  return lltok::Error;
826  }
827 
828  while (isxdigit(static_cast<unsigned char>(CurPtr[0])))
829  ++CurPtr;
830 
831  if (Kind == 'J') {
832  // HexFPConstant - Floating point constant represented in IEEE format as a
833  // hexadecimal number for when exponential notation is not precise enough.
834  // Half, Float, and double only.
835  APFloatVal = APFloat(BitsToDouble(HexIntToVal(TokStart+2, CurPtr)));
836  return lltok::APFloat;
837  }
838 
839  uint64_t Pair[2];
840  switch (Kind) {
841  default: llvm_unreachable("Unknown kind!");
842  case 'K':
843  // F80HexFPConstant - x87 long double in hexadecimal format (10 bytes)
844  FP80HexToIntPair(TokStart+3, CurPtr, Pair);
845  APFloatVal = APFloat(APFloat::x87DoubleExtended, APInt(80, Pair));
846  return lltok::APFloat;
847  case 'L':
848  // F128HexFPConstant - IEEE 128-bit in hexadecimal format (16 bytes)
849  HexToIntPair(TokStart+3, CurPtr, Pair);
850  APFloatVal = APFloat(APFloat::IEEEquad, APInt(128, Pair));
851  return lltok::APFloat;
852  case 'M':
853  // PPC128HexFPConstant - PowerPC 128-bit in hexadecimal format (16 bytes)
854  HexToIntPair(TokStart+3, CurPtr, Pair);
855  APFloatVal = APFloat(APFloat::PPCDoubleDouble, APInt(128, Pair));
856  return lltok::APFloat;
857  case 'H':
858  APFloatVal = APFloat(APFloat::IEEEhalf,
859  APInt(16,HexIntToVal(TokStart+3, CurPtr)));
860  return lltok::APFloat;
861  }
862 }
863 
864 /// Lex tokens for a label or a numeric constant, possibly starting with -.
865 /// Label [-a-zA-Z$._0-9]+:
866 /// NInteger -[0-9]+
867 /// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
868 /// PInteger [0-9]+
869 /// HexFPConstant 0x[0-9A-Fa-f]+
870 /// HexFP80Constant 0xK[0-9A-Fa-f]+
871 /// HexFP128Constant 0xL[0-9A-Fa-f]+
872 /// HexPPC128Constant 0xM[0-9A-Fa-f]+
873 lltok::Kind LLLexer::LexDigitOrNegative() {
874  // If the letter after the negative is not a number, this is probably a label.
875  if (!isdigit(static_cast<unsigned char>(TokStart[0])) &&
876  !isdigit(static_cast<unsigned char>(CurPtr[0]))) {
877  // Okay, this is not a number after the -, it's probably a label.
878  if (const char *End = isLabelTail(CurPtr)) {
879  StrVal.assign(TokStart, End-1);
880  CurPtr = End;
881  return lltok::LabelStr;
882  }
883 
884  return lltok::Error;
885  }
886 
887  // At this point, it is either a label, int or fp constant.
888 
889  // Skip digits, we have at least one.
890  for (; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
891  /*empty*/;
892 
893  // Check to see if this really is a label afterall, e.g. "-1:".
894  if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') {
895  if (const char *End = isLabelTail(CurPtr)) {
896  StrVal.assign(TokStart, End-1);
897  CurPtr = End;
898  return lltok::LabelStr;
899  }
900  }
901 
902  // If the next character is a '.', then it is a fp value, otherwise its
903  // integer.
904  if (CurPtr[0] != '.') {
905  if (TokStart[0] == '0' && TokStart[1] == 'x')
906  return Lex0x();
907  APSIntVal = APSInt(StringRef(TokStart, CurPtr - TokStart));
908  return lltok::APSInt;
909  }
910 
911  ++CurPtr;
912 
913  // Skip over [0-9]*([eE][-+]?[0-9]+)?
914  while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
915 
916  if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
917  if (isdigit(static_cast<unsigned char>(CurPtr[1])) ||
918  ((CurPtr[1] == '-' || CurPtr[1] == '+') &&
919  isdigit(static_cast<unsigned char>(CurPtr[2])))) {
920  CurPtr += 2;
921  while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
922  }
923  }
924 
925  APFloatVal = APFloat(std::atof(TokStart));
926  return lltok::APFloat;
927 }
928 
929 /// Lex a floating point constant starting with +.
930 /// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
931 lltok::Kind LLLexer::LexPositive() {
932  // If the letter after the negative is a number, this is probably not a
933  // label.
934  if (!isdigit(static_cast<unsigned char>(CurPtr[0])))
935  return lltok::Error;
936 
937  // Skip digits.
938  for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
939  /*empty*/;
940 
941  // At this point, we need a '.'.
942  if (CurPtr[0] != '.') {
943  CurPtr = TokStart+1;
944  return lltok::Error;
945  }
946 
947  ++CurPtr;
948 
949  // Skip over [0-9]*([eE][-+]?[0-9]+)?
950  while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
951 
952  if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
953  if (isdigit(static_cast<unsigned char>(CurPtr[1])) ||
954  ((CurPtr[1] == '-' || CurPtr[1] == '+') &&
955  isdigit(static_cast<unsigned char>(CurPtr[2])))) {
956  CurPtr += 2;
957  while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
958  }
959  }
960 
961  APFloatVal = APFloat(std::atof(TokStart));
962  return lltok::APFloat;
963 }
static Type * getDoubleTy(LLVMContext &C)
Definition: Type.cpp:229
static const char * isLabelTail(const char *CurPtr)
isLabelTail - Return true if this pointer points to a valid end of a label.
Definition: LLLexer.cpp:152
#define DWKEYWORD(TYPE, TOKEN)
static void cleanup(BlockFrequencyInfoImplBase &BFI)
Clear all memory not needed downstream.
static Type * getMetadataTy(LLVMContext &C)
Definition: Type.cpp:230
static Type * getX86_MMXTy(LLVMContext &C)
Definition: Type.cpp:234
static Type * getX86_FP80Ty(LLVMContext &C)
Definition: Type.cpp:231
Minimum number of bits that can be specified.
Definition: DerivedTypes.h:47
static const fltSemantics x87DoubleExtended
Definition: APFloat.h:136
void Warning(LocTy WarningLoc, const Twine &Msg) const
Definition: LLLexer.cpp:37
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:79
static Type * getFloatTy(LLVMContext &C)
Definition: Type.cpp:228
APInt urem(const APInt &LHS, const APInt &RHS)
Function for unsigned remainder operation.
Definition: APInt.h:1869
#define TYPEKEYWORD(STR, LLVMTY)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:98
Number of individual test Apply this number of consecutive mutations to each input exit after the first new interesting input is found the minimized corpus is saved into the first input directory Number of jobs to run If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
static const fltSemantics IEEEquad
Definition: APFloat.h:134
APInt umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
Definition: APInt.h:1755
static ConstantInt * ExtractElement(Constant *V, Constant *Idx)
static Type * getPPC_FP128Ty(LLVMContext &C)
Definition: Type.cpp:233
LLLexer(StringRef StartBuf, SourceMgr &SM, SMDiagnostic &, LLVMContext &C)
Definition: LLLexer.cpp:166
static bool isLabelChar(char C)
isLabelChar - Return true for [-a-zA-Z$._0-9].
Definition: LLLexer.cpp:145
APInt lshr(const APInt &LHS, unsigned shiftAmt)
Logical right-shift function.
Definition: APInt.h:1840
static Type * getLabelTy(LLVMContext &C)
Definition: Type.cpp:226
static bool sub(uint64_t *dest, const uint64_t *x, const uint64_t *y, unsigned len)
Subtracts the integer array y from the integer array x.
Definition: APInt.cpp:265
static void mul(uint64_t dest[], uint64_t x[], unsigned xlen, uint64_t y[], unsigned ylen)
Multiplies integer array x by integer array y and stores the result into the integer array dest...
Definition: APInt.cpp:326
APInt udiv(const APInt &LHS, const APInt &RHS)
Unsigned division function for APInt.
Definition: APInt.h:1859
static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y, unsigned len)
This function adds the integer array x to the integer array Y and places the result in dest...
Definition: APInt.cpp:238
APInt sdiv(const APInt &LHS, const APInt &RHS)
Signed division function for APInt.
Definition: APInt.h:1854
APInt ashr(const APInt &LHS, unsigned shiftAmt)
Arithmetic right-shift function.
Definition: APInt.h:1833
iterator begin() const
Definition: StringRef.h:90
APInt srem(const APInt &LHS, const APInt &RHS)
Function for signed remainder operation.
Definition: APInt.h:1864
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:41
APInt Or(const APInt &LHS, const APInt &RHS)
Bitwise OR function for APInt.
Definition: APInt.h:1895
APInt Xor(const APInt &LHS, const APInt &RHS)
Bitwise XOR function for APInt.
Definition: APInt.h:1900
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:225
rewrite statepoints for gc
This owns the files read by a parser, handles include stacks, and handles diagnostic wrangling...
Definition: SourceMgr.h:35
bool Error(LocTy L, const Twine &Msg) const
Definition: LLLexer.cpp:32
static const fltSemantics IEEEhalf
Definition: APFloat.h:131
static Type * getFP128Ty(LLVMContext &C)
Definition: Type.cpp:232
static Type * getHalfTy(LLVMContext &C)
Definition: Type.cpp:227
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:304
static const fltSemantics PPCDoubleDouble
Definition: APFloat.h:135
Maximum number of bits that can be specified.
Definition: DerivedTypes.h:48
APInt umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition: APInt.h:1752
double BitsToDouble(uint64_t Bits)
BitsToDouble - This function takes a 64-bit integer and returns the bit equivalent double...
Definition: MathExtras.h:504
Class for arbitrary precision integers.
Definition: APInt.h:73
APInt And(const APInt &LHS, const APInt &RHS)
Bitwise AND function for APInt.
Definition: APInt.h:1890
static const size_t npos
Definition: StringRef.h:44
Deduce function attributes
bool all_of(R &&Range, UnaryPredicate &&P)
Provide wrappers to std::all_of which take ranges instead of having to pass being/end explicitly...
Definition: STLExtras.h:334
static void UnEscapeLexed(std::string &Str)
Definition: LLLexer.cpp:118
const ARM::ArchExtKind Kind
static unsigned hexDigitValue(char C)
Interpret the given character C as a hexadecimal digit and return its value.
Definition: StringExtras.h:40
APInt shl(const APInt &LHS, unsigned shiftAmt)
Left-shift function.
Definition: APInt.h:1847
iterator end() const
Definition: StringRef.h:92
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:40
SMDiagnostic GetMessage(SMLoc Loc, DiagKind Kind, const Twine &Msg, ArrayRef< SMRange > Ranges=None, ArrayRef< SMFixIt > FixIts=None) const
Return an SMDiagnostic at the specified location with the specified string.
Definition: SourceMgr.cpp:135
#define KEYWORD(STR)
void PrintMessage(raw_ostream &OS, SMLoc Loc, DiagKind Kind, const Twine &Msg, ArrayRef< SMRange > Ranges=None, ArrayRef< SMFixIt > FixIts=None, bool ShowColors=true) const
Emit a message about the specified location with the specified string.
Definition: SourceMgr.cpp:215
Represents a location in source code.
Definition: SMLoc.h:23
#define OP(n)
Definition: regex2.h:70
#define INSTKEYWORD(STR, Enum)
Instances of this class encapsulate one diagnostic report, allowing printing to a raw_ostream as a ca...
Definition: SourceMgr.h:233