LLVM  4.0.0
LLLexer.cpp
Go to the documentation of this file.
1 //===- LLLexer.cpp - Lexer for .ll Files ----------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Implement the Lexer for .ll files.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "LLLexer.h"
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/StringExtras.h"
18 #include "llvm/ADT/Twine.h"
19 #include "llvm/IR/DerivedTypes.h"
20 #include "llvm/IR/Instruction.h"
22 #include "llvm/Support/SourceMgr.h"
23 #include <cassert>
24 #include <cctype>
25 #include <cstdio>
26 
27 using namespace llvm;
28 
29 bool LLLexer::Error(LocTy ErrorLoc, const Twine &Msg) const {
30  ErrorInfo = SM.GetMessage(ErrorLoc, SourceMgr::DK_Error, Msg);
31  return true;
32 }
33 
34 void LLLexer::Warning(LocTy WarningLoc, const Twine &Msg) const {
35  SM.PrintMessage(WarningLoc, SourceMgr::DK_Warning, Msg);
36 }
37 
38 //===----------------------------------------------------------------------===//
39 // Helper functions.
40 //===----------------------------------------------------------------------===//
41 
42 // atoull - Convert an ascii string of decimal digits into the unsigned long
43 // long representation... this does not have to do input error checking,
44 // because we know that the input will be matched by a suitable regex...
45 //
46 uint64_t LLLexer::atoull(const char *Buffer, const char *End) {
47  uint64_t Result = 0;
48  for (; Buffer != End; Buffer++) {
49  uint64_t OldRes = Result;
50  Result *= 10;
51  Result += *Buffer-'0';
52  if (Result < OldRes) { // Uh, oh, overflow detected!!!
53  Error("constant bigger than 64 bits detected!");
54  return 0;
55  }
56  }
57  return Result;
58 }
59 
60 uint64_t LLLexer::HexIntToVal(const char *Buffer, const char *End) {
61  uint64_t Result = 0;
62  for (; Buffer != End; ++Buffer) {
63  uint64_t OldRes = Result;
64  Result *= 16;
65  Result += hexDigitValue(*Buffer);
66 
67  if (Result < OldRes) { // Uh, oh, overflow detected!!!
68  Error("constant bigger than 64 bits detected!");
69  return 0;
70  }
71  }
72  return Result;
73 }
74 
75 void LLLexer::HexToIntPair(const char *Buffer, const char *End,
76  uint64_t Pair[2]) {
77  Pair[0] = 0;
78  if (End - Buffer >= 16) {
79  for (int i = 0; i < 16; i++, Buffer++) {
80  assert(Buffer != End);
81  Pair[0] *= 16;
82  Pair[0] += hexDigitValue(*Buffer);
83  }
84  }
85  Pair[1] = 0;
86  for (int i = 0; i < 16 && Buffer != End; i++, Buffer++) {
87  Pair[1] *= 16;
88  Pair[1] += hexDigitValue(*Buffer);
89  }
90  if (Buffer != End)
91  Error("constant bigger than 128 bits detected!");
92 }
93 
94 /// FP80HexToIntPair - translate an 80 bit FP80 number (20 hexits) into
95 /// { low64, high16 } as usual for an APInt.
96 void LLLexer::FP80HexToIntPair(const char *Buffer, const char *End,
97  uint64_t Pair[2]) {
98  Pair[1] = 0;
99  for (int i=0; i<4 && Buffer != End; i++, Buffer++) {
100  assert(Buffer != End);
101  Pair[1] *= 16;
102  Pair[1] += hexDigitValue(*Buffer);
103  }
104  Pair[0] = 0;
105  for (int i = 0; i < 16 && Buffer != End; i++, Buffer++) {
106  Pair[0] *= 16;
107  Pair[0] += hexDigitValue(*Buffer);
108  }
109  if (Buffer != End)
110  Error("constant bigger than 128 bits detected!");
111 }
112 
113 // UnEscapeLexed - Run through the specified buffer and change \xx codes to the
114 // appropriate character.
115 static void UnEscapeLexed(std::string &Str) {
116  if (Str.empty()) return;
117 
118  char *Buffer = &Str[0], *EndBuffer = Buffer+Str.size();
119  char *BOut = Buffer;
120  for (char *BIn = Buffer; BIn != EndBuffer; ) {
121  if (BIn[0] == '\\') {
122  if (BIn < EndBuffer-1 && BIn[1] == '\\') {
123  *BOut++ = '\\'; // Two \ becomes one
124  BIn += 2;
125  } else if (BIn < EndBuffer-2 &&
126  isxdigit(static_cast<unsigned char>(BIn[1])) &&
127  isxdigit(static_cast<unsigned char>(BIn[2]))) {
128  *BOut = hexDigitValue(BIn[1]) * 16 + hexDigitValue(BIn[2]);
129  BIn += 3; // Skip over handled chars
130  ++BOut;
131  } else {
132  *BOut++ = *BIn++;
133  }
134  } else {
135  *BOut++ = *BIn++;
136  }
137  }
138  Str.resize(BOut-Buffer);
139 }
140 
141 /// isLabelChar - Return true for [-a-zA-Z$._0-9].
142 static bool isLabelChar(char C) {
143  return isalnum(static_cast<unsigned char>(C)) || C == '-' || C == '$' ||
144  C == '.' || C == '_';
145 }
146 
147 /// isLabelTail - Return true if this pointer points to a valid end of a label.
148 static const char *isLabelTail(const char *CurPtr) {
149  while (true) {
150  if (CurPtr[0] == ':') return CurPtr+1;
151  if (!isLabelChar(CurPtr[0])) return nullptr;
152  ++CurPtr;
153  }
154 }
155 
156 //===----------------------------------------------------------------------===//
157 // Lexer definition.
158 //===----------------------------------------------------------------------===//
159 
161  LLVMContext &C)
162  : CurBuf(StartBuf), ErrorInfo(Err), SM(sm), Context(C), APFloatVal(0.0) {
163  CurPtr = CurBuf.begin();
164 }
165 
166 int LLLexer::getNextChar() {
167  char CurChar = *CurPtr++;
168  switch (CurChar) {
169  default: return (unsigned char)CurChar;
170  case 0:
171  // A nul character in the stream is either the end of the current buffer or
172  // a random nul in the file. Disambiguate that here.
173  if (CurPtr-1 != CurBuf.end())
174  return 0; // Just whitespace.
175 
176  // Otherwise, return end of file.
177  --CurPtr; // Another call to lex will return EOF again.
178  return EOF;
179  }
180 }
181 
182 lltok::Kind LLLexer::LexToken() {
183  while (true) {
184  TokStart = CurPtr;
185 
186  int CurChar = getNextChar();
187  switch (CurChar) {
188  default:
189  // Handle letters: [a-zA-Z_]
190  if (isalpha(static_cast<unsigned char>(CurChar)) || CurChar == '_')
191  return LexIdentifier();
192 
193  return lltok::Error;
194  case EOF: return lltok::Eof;
195  case 0:
196  case ' ':
197  case '\t':
198  case '\n':
199  case '\r':
200  // Ignore whitespace.
201  continue;
202  case '+': return LexPositive();
203  case '@': return LexAt();
204  case '$': return LexDollar();
205  case '%': return LexPercent();
206  case '"': return LexQuote();
207  case '.':
208  if (const char *Ptr = isLabelTail(CurPtr)) {
209  CurPtr = Ptr;
210  StrVal.assign(TokStart, CurPtr-1);
211  return lltok::LabelStr;
212  }
213  if (CurPtr[0] == '.' && CurPtr[1] == '.') {
214  CurPtr += 2;
215  return lltok::dotdotdot;
216  }
217  return lltok::Error;
218  case ';':
219  SkipLineComment();
220  continue;
221  case '!': return LexExclaim();
222  case '#': return LexHash();
223  case '0': case '1': case '2': case '3': case '4':
224  case '5': case '6': case '7': case '8': case '9':
225  case '-':
226  return LexDigitOrNegative();
227  case '=': return lltok::equal;
228  case '[': return lltok::lsquare;
229  case ']': return lltok::rsquare;
230  case '{': return lltok::lbrace;
231  case '}': return lltok::rbrace;
232  case '<': return lltok::less;
233  case '>': return lltok::greater;
234  case '(': return lltok::lparen;
235  case ')': return lltok::rparen;
236  case ',': return lltok::comma;
237  case '*': return lltok::star;
238  case '|': return lltok::bar;
239  }
240  }
241 }
242 
243 void LLLexer::SkipLineComment() {
244  while (true) {
245  if (CurPtr[0] == '\n' || CurPtr[0] == '\r' || getNextChar() == EOF)
246  return;
247  }
248 }
249 
250 /// Lex all tokens that start with an @ character.
251 /// GlobalVar @\"[^\"]*\"
252 /// GlobalVar @[-a-zA-Z$._][-a-zA-Z$._0-9]*
253 /// GlobalVarID @[0-9]+
254 lltok::Kind LLLexer::LexAt() {
255  return LexVar(lltok::GlobalVar, lltok::GlobalID);
256 }
257 
258 lltok::Kind LLLexer::LexDollar() {
259  if (const char *Ptr = isLabelTail(TokStart)) {
260  CurPtr = Ptr;
261  StrVal.assign(TokStart, CurPtr - 1);
262  return lltok::LabelStr;
263  }
264 
265  // Handle DollarStringConstant: $\"[^\"]*\"
266  if (CurPtr[0] == '"') {
267  ++CurPtr;
268 
269  while (true) {
270  int CurChar = getNextChar();
271 
272  if (CurChar == EOF) {
273  Error("end of file in COMDAT variable name");
274  return lltok::Error;
275  }
276  if (CurChar == '"') {
277  StrVal.assign(TokStart + 2, CurPtr - 1);
278  UnEscapeLexed(StrVal);
279  if (StringRef(StrVal).find_first_of(0) != StringRef::npos) {
280  Error("Null bytes are not allowed in names");
281  return lltok::Error;
282  }
283  return lltok::ComdatVar;
284  }
285  }
286  }
287 
288  // Handle ComdatVarName: $[-a-zA-Z$._][-a-zA-Z$._0-9]*
289  if (ReadVarName())
290  return lltok::ComdatVar;
291 
292  return lltok::Error;
293 }
294 
295 /// ReadString - Read a string until the closing quote.
296 lltok::Kind LLLexer::ReadString(lltok::Kind kind) {
297  const char *Start = CurPtr;
298  while (true) {
299  int CurChar = getNextChar();
300 
301  if (CurChar == EOF) {
302  Error("end of file in string constant");
303  return lltok::Error;
304  }
305  if (CurChar == '"') {
306  StrVal.assign(Start, CurPtr-1);
307  UnEscapeLexed(StrVal);
308  return kind;
309  }
310  }
311 }
312 
313 /// ReadVarName - Read the rest of a token containing a variable name.
314 bool LLLexer::ReadVarName() {
315  const char *NameStart = CurPtr;
316  if (isalpha(static_cast<unsigned char>(CurPtr[0])) ||
317  CurPtr[0] == '-' || CurPtr[0] == '$' ||
318  CurPtr[0] == '.' || CurPtr[0] == '_') {
319  ++CurPtr;
320  while (isalnum(static_cast<unsigned char>(CurPtr[0])) ||
321  CurPtr[0] == '-' || CurPtr[0] == '$' ||
322  CurPtr[0] == '.' || CurPtr[0] == '_')
323  ++CurPtr;
324 
325  StrVal.assign(NameStart, CurPtr);
326  return true;
327  }
328  return false;
329 }
330 
331 lltok::Kind LLLexer::LexVar(lltok::Kind Var, lltok::Kind VarID) {
332  // Handle StringConstant: \"[^\"]*\"
333  if (CurPtr[0] == '"') {
334  ++CurPtr;
335 
336  while (true) {
337  int CurChar = getNextChar();
338 
339  if (CurChar == EOF) {
340  Error("end of file in global variable name");
341  return lltok::Error;
342  }
343  if (CurChar == '"') {
344  StrVal.assign(TokStart+2, CurPtr-1);
345  UnEscapeLexed(StrVal);
346  if (StringRef(StrVal).find_first_of(0) != StringRef::npos) {
347  Error("Null bytes are not allowed in names");
348  return lltok::Error;
349  }
350  return Var;
351  }
352  }
353  }
354 
355  // Handle VarName: [-a-zA-Z$._][-a-zA-Z$._0-9]*
356  if (ReadVarName())
357  return Var;
358 
359  // Handle VarID: [0-9]+
360  if (isdigit(static_cast<unsigned char>(CurPtr[0]))) {
361  for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
362  /*empty*/;
363 
364  uint64_t Val = atoull(TokStart+1, CurPtr);
365  if ((unsigned)Val != Val)
366  Error("invalid value number (too large)!");
367  UIntVal = unsigned(Val);
368  return VarID;
369  }
370  return lltok::Error;
371 }
372 
373 /// Lex all tokens that start with a % character.
374 /// LocalVar ::= %\"[^\"]*\"
375 /// LocalVar ::= %[-a-zA-Z$._][-a-zA-Z$._0-9]*
376 /// LocalVarID ::= %[0-9]+
377 lltok::Kind LLLexer::LexPercent() {
378  return LexVar(lltok::LocalVar, lltok::LocalVarID);
379 }
380 
381 /// Lex all tokens that start with a " character.
382 /// QuoteLabel "[^"]+":
383 /// StringConstant "[^"]*"
384 lltok::Kind LLLexer::LexQuote() {
385  lltok::Kind kind = ReadString(lltok::StringConstant);
386  if (kind == lltok::Error || kind == lltok::Eof)
387  return kind;
388 
389  if (CurPtr[0] == ':') {
390  ++CurPtr;
391  if (StringRef(StrVal).find_first_of(0) != StringRef::npos) {
392  Error("Null bytes are not allowed in names");
393  kind = lltok::Error;
394  } else {
395  kind = lltok::LabelStr;
396  }
397  }
398 
399  return kind;
400 }
401 
402 /// Lex all tokens that start with a ! character.
403 /// !foo
404 /// !
405 lltok::Kind LLLexer::LexExclaim() {
406  // Lex a metadata name as a MetadataVar.
407  if (isalpha(static_cast<unsigned char>(CurPtr[0])) ||
408  CurPtr[0] == '-' || CurPtr[0] == '$' ||
409  CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\') {
410  ++CurPtr;
411  while (isalnum(static_cast<unsigned char>(CurPtr[0])) ||
412  CurPtr[0] == '-' || CurPtr[0] == '$' ||
413  CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\')
414  ++CurPtr;
415 
416  StrVal.assign(TokStart+1, CurPtr); // Skip !
417  UnEscapeLexed(StrVal);
418  return lltok::MetadataVar;
419  }
420  return lltok::exclaim;
421 }
422 
423 /// Lex all tokens that start with a # character.
424 /// AttrGrpID ::= #[0-9]+
425 lltok::Kind LLLexer::LexHash() {
426  // Handle AttrGrpID: #[0-9]+
427  if (isdigit(static_cast<unsigned char>(CurPtr[0]))) {
428  for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
429  /*empty*/;
430 
431  uint64_t Val = atoull(TokStart+1, CurPtr);
432  if ((unsigned)Val != Val)
433  Error("invalid value number (too large)!");
434  UIntVal = unsigned(Val);
435  return lltok::AttrGrpID;
436  }
437 
438  return lltok::Error;
439 }
440 
441 /// Lex a label, integer type, keyword, or hexadecimal integer constant.
442 /// Label [-a-zA-Z$._0-9]+:
443 /// IntegerType i[0-9]+
444 /// Keyword sdiv, float, ...
445 /// HexIntConstant [us]0x[0-9A-Fa-f]+
446 lltok::Kind LLLexer::LexIdentifier() {
447  const char *StartChar = CurPtr;
448  const char *IntEnd = CurPtr[-1] == 'i' ? nullptr : StartChar;
449  const char *KeywordEnd = nullptr;
450 
451  for (; isLabelChar(*CurPtr); ++CurPtr) {
452  // If we decide this is an integer, remember the end of the sequence.
453  if (!IntEnd && !isdigit(static_cast<unsigned char>(*CurPtr)))
454  IntEnd = CurPtr;
455  if (!KeywordEnd && !isalnum(static_cast<unsigned char>(*CurPtr)) &&
456  *CurPtr != '_')
457  KeywordEnd = CurPtr;
458  }
459 
460  // If we stopped due to a colon, this really is a label.
461  if (*CurPtr == ':') {
462  StrVal.assign(StartChar-1, CurPtr++);
463  return lltok::LabelStr;
464  }
465 
466  // Otherwise, this wasn't a label. If this was valid as an integer type,
467  // return it.
468  if (!IntEnd) IntEnd = CurPtr;
469  if (IntEnd != StartChar) {
470  CurPtr = IntEnd;
471  uint64_t NumBits = atoull(StartChar, CurPtr);
472  if (NumBits < IntegerType::MIN_INT_BITS ||
473  NumBits > IntegerType::MAX_INT_BITS) {
474  Error("bitwidth for integer type out of range!");
475  return lltok::Error;
476  }
477  TyVal = IntegerType::get(Context, NumBits);
478  return lltok::Type;
479  }
480 
481  // Otherwise, this was a letter sequence. See which keyword this is.
482  if (!KeywordEnd) KeywordEnd = CurPtr;
483  CurPtr = KeywordEnd;
484  --StartChar;
485  StringRef Keyword(StartChar, CurPtr - StartChar);
486 
487 #define KEYWORD(STR) \
488  do { \
489  if (Keyword == #STR) \
490  return lltok::kw_##STR; \
491  } while (false)
492 
493  KEYWORD(true); KEYWORD(false);
494  KEYWORD(declare); KEYWORD(define);
495  KEYWORD(global); KEYWORD(constant);
496 
497  KEYWORD(private);
498  KEYWORD(internal);
499  KEYWORD(available_externally);
500  KEYWORD(linkonce);
501  KEYWORD(linkonce_odr);
502  KEYWORD(weak); // Use as a linkage, and a modifier for "cmpxchg".
503  KEYWORD(weak_odr);
504  KEYWORD(appending);
505  KEYWORD(dllimport);
506  KEYWORD(dllexport);
507  KEYWORD(common);
508  KEYWORD(default);
509  KEYWORD(hidden);
510  KEYWORD(protected);
511  KEYWORD(unnamed_addr);
512  KEYWORD(local_unnamed_addr);
513  KEYWORD(externally_initialized);
514  KEYWORD(extern_weak);
515  KEYWORD(external);
516  KEYWORD(thread_local);
517  KEYWORD(localdynamic);
518  KEYWORD(initialexec);
519  KEYWORD(localexec);
520  KEYWORD(zeroinitializer);
521  KEYWORD(undef);
522  KEYWORD(null);
523  KEYWORD(none);
524  KEYWORD(to);
525  KEYWORD(caller);
526  KEYWORD(within);
527  KEYWORD(from);
528  KEYWORD(tail);
529  KEYWORD(musttail);
530  KEYWORD(notail);
531  KEYWORD(target);
532  KEYWORD(triple);
533  KEYWORD(source_filename);
534  KEYWORD(unwind);
535  KEYWORD(deplibs); // FIXME: Remove in 4.0.
536  KEYWORD(datalayout);
537  KEYWORD(volatile);
538  KEYWORD(atomic);
539  KEYWORD(unordered);
540  KEYWORD(monotonic);
541  KEYWORD(acquire);
542  KEYWORD(release);
543  KEYWORD(acq_rel);
544  KEYWORD(seq_cst);
545  KEYWORD(singlethread);
546 
547  KEYWORD(nnan);
548  KEYWORD(ninf);
549  KEYWORD(nsz);
550  KEYWORD(arcp);
551  KEYWORD(fast);
552  KEYWORD(nuw);
553  KEYWORD(nsw);
554  KEYWORD(exact);
555  KEYWORD(inbounds);
556  KEYWORD(inrange);
557  KEYWORD(align);
558  KEYWORD(addrspace);
559  KEYWORD(section);
560  KEYWORD(alias);
561  KEYWORD(ifunc);
562  KEYWORD(module);
563  KEYWORD(asm);
564  KEYWORD(sideeffect);
565  KEYWORD(alignstack);
566  KEYWORD(inteldialect);
567  KEYWORD(gc);
568  KEYWORD(prefix);
569  KEYWORD(prologue);
570 
571  KEYWORD(ccc);
572  KEYWORD(fastcc);
573  KEYWORD(coldcc);
574  KEYWORD(x86_stdcallcc);
575  KEYWORD(x86_fastcallcc);
576  KEYWORD(x86_thiscallcc);
577  KEYWORD(x86_vectorcallcc);
578  KEYWORD(arm_apcscc);
579  KEYWORD(arm_aapcscc);
580  KEYWORD(arm_aapcs_vfpcc);
581  KEYWORD(msp430_intrcc);
582  KEYWORD(avr_intrcc);
583  KEYWORD(avr_signalcc);
584  KEYWORD(ptx_kernel);
585  KEYWORD(ptx_device);
586  KEYWORD(spir_kernel);
587  KEYWORD(spir_func);
588  KEYWORD(intel_ocl_bicc);
589  KEYWORD(x86_64_sysvcc);
590  KEYWORD(x86_64_win64cc);
591  KEYWORD(x86_regcallcc);
592  KEYWORD(webkit_jscc);
593  KEYWORD(swiftcc);
594  KEYWORD(anyregcc);
595  KEYWORD(preserve_mostcc);
596  KEYWORD(preserve_allcc);
597  KEYWORD(ghccc);
598  KEYWORD(x86_intrcc);
599  KEYWORD(hhvmcc);
600  KEYWORD(hhvm_ccc);
601  KEYWORD(cxx_fast_tlscc);
602  KEYWORD(amdgpu_vs);
603  KEYWORD(amdgpu_gs);
604  KEYWORD(amdgpu_ps);
605  KEYWORD(amdgpu_cs);
606  KEYWORD(amdgpu_kernel);
607 
608  KEYWORD(cc);
609  KEYWORD(c);
610 
612 
613  KEYWORD(alwaysinline);
614  KEYWORD(allocsize);
615  KEYWORD(argmemonly);
616  KEYWORD(builtin);
617  KEYWORD(byval);
618  KEYWORD(inalloca);
619  KEYWORD(cold);
620  KEYWORD(convergent);
621  KEYWORD(dereferenceable);
622  KEYWORD(dereferenceable_or_null);
623  KEYWORD(inaccessiblememonly);
624  KEYWORD(inaccessiblemem_or_argmemonly);
625  KEYWORD(inlinehint);
626  KEYWORD(inreg);
627  KEYWORD(jumptable);
628  KEYWORD(minsize);
629  KEYWORD(naked);
630  KEYWORD(nest);
631  KEYWORD(noalias);
632  KEYWORD(nobuiltin);
633  KEYWORD(nocapture);
634  KEYWORD(noduplicate);
635  KEYWORD(noimplicitfloat);
636  KEYWORD(noinline);
637  KEYWORD(norecurse);
638  KEYWORD(nonlazybind);
639  KEYWORD(nonnull);
640  KEYWORD(noredzone);
641  KEYWORD(noreturn);
642  KEYWORD(nounwind);
643  KEYWORD(optnone);
644  KEYWORD(optsize);
645  KEYWORD(readnone);
646  KEYWORD(readonly);
647  KEYWORD(returned);
648  KEYWORD(returns_twice);
649  KEYWORD(signext);
650  KEYWORD(sret);
651  KEYWORD(ssp);
652  KEYWORD(sspreq);
653  KEYWORD(sspstrong);
654  KEYWORD(safestack);
655  KEYWORD(sanitize_address);
656  KEYWORD(sanitize_thread);
657  KEYWORD(sanitize_memory);
658  KEYWORD(swifterror);
659  KEYWORD(swiftself);
660  KEYWORD(uwtable);
661  KEYWORD(writeonly);
662  KEYWORD(zeroext);
663 
664  KEYWORD(type);
665  KEYWORD(opaque);
666 
667  KEYWORD(comdat);
668 
669  // Comdat types
670  KEYWORD(any);
671  KEYWORD(exactmatch);
672  KEYWORD(largest);
673  KEYWORD(noduplicates);
674  KEYWORD(samesize);
675 
676  KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle);
677  KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge);
678  KEYWORD(oeq); KEYWORD(one); KEYWORD(olt); KEYWORD(ogt); KEYWORD(ole);
679  KEYWORD(oge); KEYWORD(ord); KEYWORD(uno); KEYWORD(ueq); KEYWORD(une);
680 
681  KEYWORD(xchg); KEYWORD(nand); KEYWORD(max); KEYWORD(min); KEYWORD(umax);
682  KEYWORD(umin);
683 
684  KEYWORD(x);
685  KEYWORD(blockaddress);
686 
687  // Metadata types.
688  KEYWORD(distinct);
689 
690  // Use-list order directives.
691  KEYWORD(uselistorder);
692  KEYWORD(uselistorder_bb);
693 
694  KEYWORD(personality);
695  KEYWORD(cleanup);
696  KEYWORD(catch);
697  KEYWORD(filter);
698 
699 #undef KEYWORD
700 
701  // Keywords for types.
702 #define TYPEKEYWORD(STR, LLVMTY) \
703  do { \
704  if (Keyword == STR) { \
705  TyVal = LLVMTY; \
706  return lltok::Type; \
707  } \
708  } while (false)
709 
710  TYPEKEYWORD("void", Type::getVoidTy(Context));
711  TYPEKEYWORD("half", Type::getHalfTy(Context));
712  TYPEKEYWORD("float", Type::getFloatTy(Context));
713  TYPEKEYWORD("double", Type::getDoubleTy(Context));
714  TYPEKEYWORD("x86_fp80", Type::getX86_FP80Ty(Context));
715  TYPEKEYWORD("fp128", Type::getFP128Ty(Context));
716  TYPEKEYWORD("ppc_fp128", Type::getPPC_FP128Ty(Context));
717  TYPEKEYWORD("label", Type::getLabelTy(Context));
718  TYPEKEYWORD("metadata", Type::getMetadataTy(Context));
719  TYPEKEYWORD("x86_mmx", Type::getX86_MMXTy(Context));
720  TYPEKEYWORD("token", Type::getTokenTy(Context));
721 
722 #undef TYPEKEYWORD
723 
724  // Keywords for instructions.
725 #define INSTKEYWORD(STR, Enum) \
726  do { \
727  if (Keyword == #STR) { \
728  UIntVal = Instruction::Enum; \
729  return lltok::kw_##STR; \
730  } \
731  } while (false)
732 
733  INSTKEYWORD(add, Add); INSTKEYWORD(fadd, FAdd);
734  INSTKEYWORD(sub, Sub); INSTKEYWORD(fsub, FSub);
735  INSTKEYWORD(mul, Mul); INSTKEYWORD(fmul, FMul);
736  INSTKEYWORD(udiv, UDiv); INSTKEYWORD(sdiv, SDiv); INSTKEYWORD(fdiv, FDiv);
737  INSTKEYWORD(urem, URem); INSTKEYWORD(srem, SRem); INSTKEYWORD(frem, FRem);
738  INSTKEYWORD(shl, Shl); INSTKEYWORD(lshr, LShr); INSTKEYWORD(ashr, AShr);
739  INSTKEYWORD(and, And); INSTKEYWORD(or, Or); INSTKEYWORD(xor, Xor);
740  INSTKEYWORD(icmp, ICmp); INSTKEYWORD(fcmp, FCmp);
741 
742  INSTKEYWORD(phi, PHI);
743  INSTKEYWORD(call, Call);
744  INSTKEYWORD(trunc, Trunc);
745  INSTKEYWORD(zext, ZExt);
746  INSTKEYWORD(sext, SExt);
747  INSTKEYWORD(fptrunc, FPTrunc);
748  INSTKEYWORD(fpext, FPExt);
749  INSTKEYWORD(uitofp, UIToFP);
750  INSTKEYWORD(sitofp, SIToFP);
751  INSTKEYWORD(fptoui, FPToUI);
752  INSTKEYWORD(fptosi, FPToSI);
753  INSTKEYWORD(inttoptr, IntToPtr);
754  INSTKEYWORD(ptrtoint, PtrToInt);
755  INSTKEYWORD(bitcast, BitCast);
756  INSTKEYWORD(addrspacecast, AddrSpaceCast);
757  INSTKEYWORD(select, Select);
758  INSTKEYWORD(va_arg, VAArg);
759  INSTKEYWORD(ret, Ret);
760  INSTKEYWORD(br, Br);
761  INSTKEYWORD(switch, Switch);
762  INSTKEYWORD(indirectbr, IndirectBr);
763  INSTKEYWORD(invoke, Invoke);
764  INSTKEYWORD(resume, Resume);
765  INSTKEYWORD(unreachable, Unreachable);
766 
767  INSTKEYWORD(alloca, Alloca);
768  INSTKEYWORD(load, Load);
769  INSTKEYWORD(store, Store);
770  INSTKEYWORD(cmpxchg, AtomicCmpXchg);
771  INSTKEYWORD(atomicrmw, AtomicRMW);
772  INSTKEYWORD(fence, Fence);
773  INSTKEYWORD(getelementptr, GetElementPtr);
774 
775  INSTKEYWORD(extractelement, ExtractElement);
776  INSTKEYWORD(insertelement, InsertElement);
777  INSTKEYWORD(shufflevector, ShuffleVector);
778  INSTKEYWORD(extractvalue, ExtractValue);
779  INSTKEYWORD(insertvalue, InsertValue);
780  INSTKEYWORD(landingpad, LandingPad);
781  INSTKEYWORD(cleanupret, CleanupRet);
782  INSTKEYWORD(catchret, CatchRet);
783  INSTKEYWORD(catchswitch, CatchSwitch);
784  INSTKEYWORD(catchpad, CatchPad);
785  INSTKEYWORD(cleanuppad, CleanupPad);
786 
787 #undef INSTKEYWORD
788 
789 #define DWKEYWORD(TYPE, TOKEN) \
790  do { \
791  if (Keyword.startswith("DW_" #TYPE "_")) { \
792  StrVal.assign(Keyword.begin(), Keyword.end()); \
793  return lltok::TOKEN; \
794  } \
795  } while (false)
796 
797  DWKEYWORD(TAG, DwarfTag);
799  DWKEYWORD(VIRTUALITY, DwarfVirtuality);
800  DWKEYWORD(LANG, DwarfLang);
801  DWKEYWORD(CC, DwarfCC);
802  DWKEYWORD(OP, DwarfOp);
803  DWKEYWORD(MACINFO, DwarfMacinfo);
804 
805 #undef DWKEYWORD
806 
807  if (Keyword.startswith("DIFlag")) {
808  StrVal.assign(Keyword.begin(), Keyword.end());
809  return lltok::DIFlag;
810  }
811 
812  if (Keyword.startswith("CSK_")) {
813  StrVal.assign(Keyword.begin(), Keyword.end());
814  return lltok::ChecksumKind;
815  }
816 
817  if (Keyword == "NoDebug" || Keyword == "FullDebug" ||
818  Keyword == "LineTablesOnly") {
819  StrVal.assign(Keyword.begin(), Keyword.end());
820  return lltok::EmissionKind;
821  }
822 
823  // Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by
824  // the CFE to avoid forcing it to deal with 64-bit numbers.
825  if ((TokStart[0] == 'u' || TokStart[0] == 's') &&
826  TokStart[1] == '0' && TokStart[2] == 'x' &&
827  isxdigit(static_cast<unsigned char>(TokStart[3]))) {
828  int len = CurPtr-TokStart-3;
829  uint32_t bits = len * 4;
830  StringRef HexStr(TokStart + 3, len);
831  if (!all_of(HexStr, isxdigit)) {
832  // Bad token, return it as an error.
833  CurPtr = TokStart+3;
834  return lltok::Error;
835  }
836  APInt Tmp(bits, HexStr, 16);
837  uint32_t activeBits = Tmp.getActiveBits();
838  if (activeBits > 0 && activeBits < bits)
839  Tmp = Tmp.trunc(activeBits);
840  APSIntVal = APSInt(Tmp, TokStart[0] == 'u');
841  return lltok::APSInt;
842  }
843 
844  // If this is "cc1234", return this as just "cc".
845  if (TokStart[0] == 'c' && TokStart[1] == 'c') {
846  CurPtr = TokStart+2;
847  return lltok::kw_cc;
848  }
849 
850  // Finally, if this isn't known, return an error.
851  CurPtr = TokStart+1;
852  return lltok::Error;
853 }
854 
855 /// Lex all tokens that start with a 0x prefix, knowing they match and are not
856 /// labels.
857 /// HexFPConstant 0x[0-9A-Fa-f]+
858 /// HexFP80Constant 0xK[0-9A-Fa-f]+
859 /// HexFP128Constant 0xL[0-9A-Fa-f]+
860 /// HexPPC128Constant 0xM[0-9A-Fa-f]+
861 /// HexHalfConstant 0xH[0-9A-Fa-f]+
862 lltok::Kind LLLexer::Lex0x() {
863  CurPtr = TokStart + 2;
864 
865  char Kind;
866  if ((CurPtr[0] >= 'K' && CurPtr[0] <= 'M') || CurPtr[0] == 'H') {
867  Kind = *CurPtr++;
868  } else {
869  Kind = 'J';
870  }
871 
872  if (!isxdigit(static_cast<unsigned char>(CurPtr[0]))) {
873  // Bad token, return it as an error.
874  CurPtr = TokStart+1;
875  return lltok::Error;
876  }
877 
878  while (isxdigit(static_cast<unsigned char>(CurPtr[0])))
879  ++CurPtr;
880 
881  if (Kind == 'J') {
882  // HexFPConstant - Floating point constant represented in IEEE format as a
883  // hexadecimal number for when exponential notation is not precise enough.
884  // Half, Float, and double only.
885  APFloatVal = APFloat(APFloat::IEEEdouble(),
886  APInt(64, HexIntToVal(TokStart + 2, CurPtr)));
887  return lltok::APFloat;
888  }
889 
890  uint64_t Pair[2];
891  switch (Kind) {
892  default: llvm_unreachable("Unknown kind!");
893  case 'K':
894  // F80HexFPConstant - x87 long double in hexadecimal format (10 bytes)
895  FP80HexToIntPair(TokStart+3, CurPtr, Pair);
896  APFloatVal = APFloat(APFloat::x87DoubleExtended(), APInt(80, Pair));
897  return lltok::APFloat;
898  case 'L':
899  // F128HexFPConstant - IEEE 128-bit in hexadecimal format (16 bytes)
900  HexToIntPair(TokStart+3, CurPtr, Pair);
901  APFloatVal = APFloat(APFloat::IEEEquad(), APInt(128, Pair));
902  return lltok::APFloat;
903  case 'M':
904  // PPC128HexFPConstant - PowerPC 128-bit in hexadecimal format (16 bytes)
905  HexToIntPair(TokStart+3, CurPtr, Pair);
906  APFloatVal = APFloat(APFloat::PPCDoubleDouble(), APInt(128, Pair));
907  return lltok::APFloat;
908  case 'H':
909  APFloatVal = APFloat(APFloat::IEEEhalf(),
910  APInt(16,HexIntToVal(TokStart+3, CurPtr)));
911  return lltok::APFloat;
912  }
913 }
914 
915 /// Lex tokens for a label or a numeric constant, possibly starting with -.
916 /// Label [-a-zA-Z$._0-9]+:
917 /// NInteger -[0-9]+
918 /// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
919 /// PInteger [0-9]+
920 /// HexFPConstant 0x[0-9A-Fa-f]+
921 /// HexFP80Constant 0xK[0-9A-Fa-f]+
922 /// HexFP128Constant 0xL[0-9A-Fa-f]+
923 /// HexPPC128Constant 0xM[0-9A-Fa-f]+
924 lltok::Kind LLLexer::LexDigitOrNegative() {
925  // If the letter after the negative is not a number, this is probably a label.
926  if (!isdigit(static_cast<unsigned char>(TokStart[0])) &&
927  !isdigit(static_cast<unsigned char>(CurPtr[0]))) {
928  // Okay, this is not a number after the -, it's probably a label.
929  if (const char *End = isLabelTail(CurPtr)) {
930  StrVal.assign(TokStart, End-1);
931  CurPtr = End;
932  return lltok::LabelStr;
933  }
934 
935  return lltok::Error;
936  }
937 
938  // At this point, it is either a label, int or fp constant.
939 
940  // Skip digits, we have at least one.
941  for (; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
942  /*empty*/;
943 
944  // Check to see if this really is a label afterall, e.g. "-1:".
945  if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') {
946  if (const char *End = isLabelTail(CurPtr)) {
947  StrVal.assign(TokStart, End-1);
948  CurPtr = End;
949  return lltok::LabelStr;
950  }
951  }
952 
953  // If the next character is a '.', then it is a fp value, otherwise its
954  // integer.
955  if (CurPtr[0] != '.') {
956  if (TokStart[0] == '0' && TokStart[1] == 'x')
957  return Lex0x();
958  APSIntVal = APSInt(StringRef(TokStart, CurPtr - TokStart));
959  return lltok::APSInt;
960  }
961 
962  ++CurPtr;
963 
964  // Skip over [0-9]*([eE][-+]?[0-9]+)?
965  while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
966 
967  if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
968  if (isdigit(static_cast<unsigned char>(CurPtr[1])) ||
969  ((CurPtr[1] == '-' || CurPtr[1] == '+') &&
970  isdigit(static_cast<unsigned char>(CurPtr[2])))) {
971  CurPtr += 2;
972  while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
973  }
974  }
975 
976  APFloatVal = APFloat(APFloat::IEEEdouble(),
977  StringRef(TokStart, CurPtr - TokStart));
978  return lltok::APFloat;
979 }
980 
981 /// Lex a floating point constant starting with +.
982 /// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
983 lltok::Kind LLLexer::LexPositive() {
984  // If the letter after the negative is a number, this is probably not a
985  // label.
986  if (!isdigit(static_cast<unsigned char>(CurPtr[0])))
987  return lltok::Error;
988 
989  // Skip digits.
990  for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
991  /*empty*/;
992 
993  // At this point, we need a '.'.
994  if (CurPtr[0] != '.') {
995  CurPtr = TokStart+1;
996  return lltok::Error;
997  }
998 
999  ++CurPtr;
1000 
1001  // Skip over [0-9]*([eE][-+]?[0-9]+)?
1002  while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
1003 
1004  if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
1005  if (isdigit(static_cast<unsigned char>(CurPtr[1])) ||
1006  ((CurPtr[1] == '-' || CurPtr[1] == '+') &&
1007  isdigit(static_cast<unsigned char>(CurPtr[2])))) {
1008  CurPtr += 2;
1009  while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
1010  }
1011  }
1012 
1013  APFloatVal = APFloat(APFloat::IEEEdouble(),
1014  StringRef(TokStart, CurPtr - TokStart));
1015  return lltok::APFloat;
1016 }
Maximum number of bits that can be specified.
Definition: DerivedTypes.h:51
static Type * getDoubleTy(LLVMContext &C)
Definition: Type.cpp:158
LLVMContext & Context
Minimum number of bits that can be specified.
Definition: DerivedTypes.h:50
size_t i
static const char * isLabelTail(const char *CurPtr)
isLabelTail - Return true if this pointer points to a valid end of a label.
Definition: LLLexer.cpp:148
demanded bits
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:736
#define DWKEYWORD(TYPE, TOKEN)
static void cleanup(BlockFrequencyInfoImplBase &BFI)
Clear all memory not needed downstream.
static Type * getMetadataTy(LLVMContext &C)
Definition: Type.cpp:159
static Type * getX86_MMXTy(LLVMContext &C)
Definition: Type.cpp:164
static Type * getX86_FP80Ty(LLVMContext &C)
Definition: Type.cpp:161
void Warning(LocTy WarningLoc, const Twine &Msg) const
Definition: LLLexer.cpp:34
static Type * getTokenTy(LLVMContext &C)
Definition: Type.cpp:160
static const fltSemantics & x87DoubleExtended()
Definition: APFloat.cpp:109
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static Type * getFloatTy(LLVMContext &C)
Definition: Type.cpp:157
APInt urem(const APInt &LHS, const APInt &RHS)
Function for unsigned remainder operation.
Definition: APInt.h:1921
#define TYPEKEYWORD(STR, LLVMTY)
static Type * getPPC_FP128Ty(LLVMContext &C)
Definition: Type.cpp:163
LLLexer(StringRef StartBuf, SourceMgr &SM, SMDiagnostic &, LLVMContext &C)
Definition: LLLexer.cpp:160
static bool isLabelChar(char C)
isLabelChar - Return true for [-a-zA-Z$._0-9].
Definition: LLLexer.cpp:142
This file implements a class to represent arbitrary precision integral constant values and operations...
APInt lshr(const APInt &LHS, unsigned shiftAmt)
Logical right-shift function.
Definition: APInt.h:1892
static Type * getLabelTy(LLVMContext &C)
Definition: Type.cpp:155
static bool sub(uint64_t *dest, const uint64_t *x, const uint64_t *y, unsigned len)
Subtracts the integer array y from the integer array x.
Definition: APInt.cpp:274
static void mul(uint64_t dest[], uint64_t x[], unsigned xlen, uint64_t y[], unsigned ylen)
Multiplies integer array x by integer array y and stores the result into the integer array dest...
Definition: APInt.cpp:343
APInt udiv(const APInt &LHS, const APInt &RHS)
Unsigned division function for APInt.
Definition: APInt.h:1911
static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y, unsigned len)
This function adds the integer array x to the integer array Y and places the result in dest...
Definition: APInt.cpp:239
APInt sdiv(const APInt &LHS, const APInt &RHS)
Signed division function for APInt.
Definition: APInt.h:1906
APInt ashr(const APInt &LHS, unsigned shiftAmt)
Arithmetic right-shift function.
Definition: APInt.h:1885
int Switch(int a)
Definition: Switch2Test.cpp:11
Maximum length of the test input libFuzzer tries to guess a good value based on the corpus and reports it always prefer smaller inputs during the corpus shuffle When libFuzzer itself reports a bug this exit code will be used If indicates the maximal total time in seconds to run the fuzzer minimizes the provided crash input Use with etc Experimental Use value profile to guide fuzzing Number of simultaneous worker processes to run the jobs If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
iterator begin() const
Definition: StringRef.h:103
APInt srem(const APInt &LHS, const APInt &RHS)
Function for signed remainder operation.
Definition: APInt.h:1916
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:48
APInt Or(const APInt &LHS, const APInt &RHS)
Bitwise OR function for APInt.
Definition: APInt.h:1947
APInt Xor(const APInt &LHS, const APInt &RHS)
Bitwise XOR function for APInt.
Definition: APInt.h:1952
static const fltSemantics & IEEEhalf()
Definition: APFloat.cpp:97
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:154
static const unsigned End
rewrite statepoints for gc
This owns the files read by a parser, handles include stacks, and handles diagnostic wrangling...
Definition: SourceMgr.h:35
bool Error(LocTy L, const Twine &Msg) const
Definition: LLLexer.cpp:29
static const fltSemantics & IEEEquad()
Definition: APFloat.cpp:106
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static Type * getFP128Ty(LLVMContext &C)
Definition: Type.cpp:162
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition: APInt.h:1793
static Type * getHalfTy(LLVMContext &C)
Definition: Type.cpp:156
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:234
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
Class for arbitrary precision integers.
Definition: APInt.h:77
Base class for user error types.
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
Definition: APInt.h:1798
static const fltSemantics & IEEEdouble()
Definition: APFloat.cpp:103
APInt And(const APInt &LHS, const APInt &RHS)
Bitwise AND function for APInt.
Definition: APInt.h:1942
static const size_t npos
Definition: StringRef.h:51
Deduce function attributes
static const fltSemantics & PPCDoubleDouble()
Definition: APFloat.cpp:115
static void UnEscapeLexed(std::string &Str)
Definition: LLLexer.cpp:115
const unsigned Kind
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned hexDigitValue(char C)
Interpret the given character C as a hexadecimal digit and return its value.
Definition: StringExtras.h:41
APInt shl(const APInt &LHS, unsigned shiftAmt)
Left-shift function.
Definition: APInt.h:1899
iterator end() const
Definition: StringRef.h:105
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:47
SMDiagnostic GetMessage(SMLoc Loc, DiagKind Kind, const Twine &Msg, ArrayRef< SMRange > Ranges=None, ArrayRef< SMFixIt > FixIts=None) const
Return an SMDiagnostic at the specified location with the specified string.
Definition: SourceMgr.cpp:136
#define KEYWORD(STR)
int * Ptr
void PrintMessage(raw_ostream &OS, SMLoc Loc, DiagKind Kind, const Twine &Msg, ArrayRef< SMRange > Ranges=None, ArrayRef< SMFixIt > FixIts=None, bool ShowColors=true) const
Emit a message about the specified location with the specified string.
Definition: SourceMgr.cpp:216
Represents a location in source code.
Definition: SMLoc.h:24
#define OP(n)
Definition: regex2.h:70
#define INSTKEYWORD(STR, Enum)
Instances of this class encapsulate one diagnostic report, allowing printing to a raw_ostream as a ca...
Definition: SourceMgr.h:228