/build/source/clang/lib/Lex/LiteralSupport.cpp

Bug Summary

File:	build/source/clang/lib/Lex/LiteralSupport.cpp
Warning:	line 1031, column 11 Value stored to 'HasSize' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

Show analyzer invocation

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name LiteralSupport.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -relaxed-aliasing -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-16/lib/clang/16 -I tools/clang/lib/Lex -I /build/source/clang/lib/Lex -I /build/source/clang/include -I tools/clang/include -I include -I /build/source/llvm/include -D CLANG_REPOSITORY_STRING="++20230112100753+3c5b1f2d94d0-1~exp1~20230112220902.1029" -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-16/lib/clang/16/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/source/= -fcoverage-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/source/= -source-date-epoch 1673561342 -O2 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2023-01-13-042150-16221-1 -x c++ /build/source/clang/lib/Lex/LiteralSupport.cpp

1	//===--- LiteralSupport.cpp - Code to parse and process literals ----------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements the NumericLiteralParser, CharLiteralParser, and
10	// StringLiteralParser interfaces.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "clang/Lex/LiteralSupport.h"
15	#include "clang/Basic/CharInfo.h"
16	#include "clang/Basic/LangOptions.h"
17	#include "clang/Basic/SourceLocation.h"
18	#include "clang/Basic/TargetInfo.h"
19	#include "clang/Lex/LexDiagnostic.h"
20	#include "clang/Lex/Lexer.h"
21	#include "clang/Lex/Preprocessor.h"
22	#include "clang/Lex/Token.h"
23	#include "llvm/ADT/APInt.h"
24	#include "llvm/ADT/SmallVector.h"
25	#include "llvm/ADT/StringExtras.h"
26	#include "llvm/ADT/StringSwitch.h"
27	#include "llvm/Support/ConvertUTF.h"
28	#include "llvm/Support/Error.h"
29	#include "llvm/Support/ErrorHandling.h"
30	#include "llvm/Support/Unicode.h"
31	#include <algorithm>
32	#include <cassert>
33	#include <cstddef>
34	#include <cstdint>
35	#include <cstring>
36	#include <string>
37
38	using namespace clang;
39
40	static unsigned getCharWidth(tok::TokenKind kind, const TargetInfo &Target) {
41	switch (kind) {
42	default: llvm_unreachable("Unknown token type!")::llvm::llvm_unreachable_internal("Unknown token type!", "clang/lib/Lex/LiteralSupport.cpp" , 42);
43	case tok::char_constant:
44	case tok::string_literal:
45	case tok::utf8_char_constant:
46	case tok::utf8_string_literal:
47	return Target.getCharWidth();
48	case tok::wide_char_constant:
49	case tok::wide_string_literal:
50	return Target.getWCharWidth();
51	case tok::utf16_char_constant:
52	case tok::utf16_string_literal:
53	return Target.getChar16Width();
54	case tok::utf32_char_constant:
55	case tok::utf32_string_literal:
56	return Target.getChar32Width();
57	}
58	}
59
60	static CharSourceRange MakeCharSourceRange(const LangOptions &Features,
61	FullSourceLoc TokLoc,
62	const char *TokBegin,
63	const char *TokRangeBegin,
64	const char *TokRangeEnd) {
65	SourceLocation Begin =
66	Lexer::AdvanceToTokenCharacter(TokLoc, TokRangeBegin - TokBegin,
67	TokLoc.getManager(), Features);
68	SourceLocation End =
69	Lexer::AdvanceToTokenCharacter(Begin, TokRangeEnd - TokRangeBegin,
70	TokLoc.getManager(), Features);
71	return CharSourceRange::getCharRange(Begin, End);
72	}
73
74	/// Produce a diagnostic highlighting some portion of a literal.
75	///
76	/// Emits the diagnostic \p DiagID, highlighting the range of characters from
77	/// \p TokRangeBegin (inclusive) to \p TokRangeEnd (exclusive), which must be
78	/// a substring of a spelling buffer for the token beginning at \p TokBegin.
79	static DiagnosticBuilder Diag(DiagnosticsEngine *Diags,
80	const LangOptions &Features, FullSourceLoc TokLoc,
81	const char TokBegin, const char TokRangeBegin,
82	const char *TokRangeEnd, unsigned DiagID) {
83	SourceLocation Begin =
84	Lexer::AdvanceToTokenCharacter(TokLoc, TokRangeBegin - TokBegin,
85	TokLoc.getManager(), Features);
86	return Diags->Report(Begin, DiagID) <<
87	MakeCharSourceRange(Features, TokLoc, TokBegin, TokRangeBegin, TokRangeEnd);
88	}
89
90	/// ProcessCharEscape - Parse a standard C escape sequence, which can occur in
91	/// either a character or a string literal.
92	static unsigned ProcessCharEscape(const char *ThisTokBegin,
93	const char *&ThisTokBuf,
94	const char *ThisTokEnd, bool &HadError,
95	FullSourceLoc Loc, unsigned CharWidth,
96	DiagnosticsEngine *Diags,
97	const LangOptions &Features) {
98	const char *EscapeBegin = ThisTokBuf;
99	bool Delimited = false;
100	bool EndDelimiterFound = false;
101
102	// Skip the '\' char.
103	++ThisTokBuf;
104
105	// We know that this character can't be off the end of the buffer, because
106	// that would have been \", which would not have been the end of string.
107	unsigned ResultChar = *ThisTokBuf++;
108	switch (ResultChar) {
109	// These map to themselves.
110	case '\\': case '\'': case '"': case '?': break;
111
112	// These have fixed mappings.
113	case 'a':
114	// TODO: K&R: the meaning of '\\a' is different in traditional C
115	ResultChar = 7;
116	break;
117	case 'b':
118	ResultChar = 8;
119	break;
120	case 'e':
121	if (Diags)
122	Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
123	diag::ext_nonstandard_escape) << "e";
124	ResultChar = 27;
125	break;
126	case 'E':
127	if (Diags)
128	Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
129	diag::ext_nonstandard_escape) << "E";
130	ResultChar = 27;
131	break;
132	case 'f':
133	ResultChar = 12;
134	break;
135	case 'n':
136	ResultChar = 10;
137	break;
138	case 'r':
139	ResultChar = 13;
140	break;
141	case 't':
142	ResultChar = 9;
143	break;
144	case 'v':
145	ResultChar = 11;
146	break;
147	case 'x': { // Hex escape.
148	ResultChar = 0;
149	if (ThisTokBuf != ThisTokEnd && *ThisTokBuf == '{') {
150	Delimited = true;
151	ThisTokBuf++;
152	if (*ThisTokBuf == '}') {
153	Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
154	diag::err_delimited_escape_empty);
155	return ResultChar;
156	}
157	} else if (ThisTokBuf == ThisTokEnd \|\| !isHexDigit(*ThisTokBuf)) {
158	if (Diags)
159	Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
160	diag::err_hex_escape_no_digits) << "x";
161	return ResultChar;
162	}
163
164	// Hex escapes are a maximal series of hex digits.
165	bool Overflow = false;
166	for (; ThisTokBuf != ThisTokEnd; ++ThisTokBuf) {
167	if (Delimited && *ThisTokBuf == '}') {
168	ThisTokBuf++;
169	EndDelimiterFound = true;
170	break;
171	}
172	int CharVal = llvm::hexDigitValue(*ThisTokBuf);
173	if (CharVal == -1) {
174	// Non delimited hex escape sequences stop at the first non-hex digit.
175	if (!Delimited)
176	break;
177	HadError = true;
178	if (Diags)
179	Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
180	diag::err_delimited_escape_invalid)
181	<< StringRef(ThisTokBuf, 1);
182	continue;
183	}
184	// About to shift out a digit?
185	if (ResultChar & 0xF0000000)
186	Overflow = true;
187	ResultChar <<= 4;
188	ResultChar \|= CharVal;
189	}
190	// See if any bits will be truncated when evaluated as a character.
191	if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
192	Overflow = true;
193	ResultChar &= ~0U >> (32-CharWidth);
194	}
195
196	// Check for overflow.
197	if (!HadError && Overflow) { // Too many digits to fit in
198	HadError = true;
199	if (Diags)
200	Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
201	diag::err_escape_too_large)
202	<< 0;
203	}
204	break;
205	}
206	case '0': case '1': case '2': case '3':
207	case '4': case '5': case '6': case '7': {
208	// Octal escapes.
209	--ThisTokBuf;
210	ResultChar = 0;
211
212	// Octal escapes are a series of octal digits with maximum length 3.
213	// "\0123" is a two digit sequence equal to "\012" "3".
214	unsigned NumDigits = 0;
215	do {
216	ResultChar <<= 3;
217	ResultChar \|= *ThisTokBuf++ - '0';
218	++NumDigits;
219	} while (ThisTokBuf != ThisTokEnd && NumDigits < 3 &&
220	ThisTokBuf[0] >= '0' && ThisTokBuf[0] <= '7');
221
222	// Check for overflow. Reject '\777', but not L'\777'.
223	if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
224	if (Diags)
225	Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
226	diag::err_escape_too_large) << 1;
227	ResultChar &= ~0U >> (32-CharWidth);
228	}
229	break;
230	}
231	case 'o': {
232	bool Overflow = false;
233	if (ThisTokBuf == ThisTokEnd \|\| *ThisTokBuf != '{') {
234	HadError = true;
235	if (Diags)
236	Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
237	diag::err_delimited_escape_missing_brace)
238	<< "o";
239
240	break;
241	}
242	ResultChar = 0;
243	Delimited = true;
244	++ThisTokBuf;
245	if (*ThisTokBuf == '}') {
246	Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
247	diag::err_delimited_escape_empty);
248	return ResultChar;
249	}
250
251	while (ThisTokBuf != ThisTokEnd) {
252	if (*ThisTokBuf == '}') {
253	EndDelimiterFound = true;
254	ThisTokBuf++;
255	break;
256	}
257	if (ThisTokBuf < '0' \|\| ThisTokBuf > '7') {
258	HadError = true;
259	if (Diags)
260	Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
261	diag::err_delimited_escape_invalid)
262	<< StringRef(ThisTokBuf, 1);
263	ThisTokBuf++;
264	continue;
265	}
266	if (ResultChar & 0x020000000)
267	Overflow = true;
268
269	ResultChar <<= 3;
270	ResultChar \|= *ThisTokBuf++ - '0';
271	}
272	// Check for overflow. Reject '\777', but not L'\777'.
273	if (!HadError &&
274	(Overflow \|\| (CharWidth != 32 && (ResultChar >> CharWidth) != 0))) {
275	HadError = true;
276	if (Diags)
277	Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
278	diag::err_escape_too_large)
279	<< 1;
280	ResultChar &= ~0U >> (32 - CharWidth);
281	}
282	break;
283	}
284	// Otherwise, these are not valid escapes.
285	case '(': case '{': case '[': case '%':
286	// GCC accepts these as extensions. We warn about them as such though.
287	if (Diags)
288	Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
289	diag::ext_nonstandard_escape)
290	<< std::string(1, ResultChar);
291	break;
292	default:
293	if (!Diags)
294	break;
295
296	if (isPrintable(ResultChar))
297	Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
298	diag::ext_unknown_escape)
299	<< std::string(1, ResultChar);
300	else
301	Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
302	diag::ext_unknown_escape)
303	<< "x" + llvm::utohexstr(ResultChar);
304	break;
305	}
306
307	if (Delimited && Diags) {
308	if (!EndDelimiterFound)
309	Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
310	diag::err_expected)
311	<< tok::r_brace;
312	else if (!HadError) {
313	Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
314	Features.CPlusPlus2b ? diag::warn_cxx2b_delimited_escape_sequence
315	: diag::ext_delimited_escape_sequence)
316	<< /delimited/ 0 << (Features.CPlusPlus ? 1 : 0);
317	}
318	}
319
320	return ResultChar;
321	}
322
323	static void appendCodePoint(unsigned Codepoint,
324	llvm::SmallVectorImpl<char> &Str) {
325	char ResultBuf[4];
326	char *ResultPtr = ResultBuf;
327	if (llvm::ConvertCodePointToUTF8(Codepoint, ResultPtr))
328	Str.append(ResultBuf, ResultPtr);
329	}
330
331	void clang::expandUCNs(SmallVectorImpl<char> &Buf, StringRef Input) {
332	for (StringRef::iterator I = Input.begin(), E = Input.end(); I != E; ++I) {
333	if (*I != '\\') {
334	Buf.push_back(*I);
335	continue;
336	}
337
338	++I;
339	char Kind = *I;
340	++I;
341
342	assert(Kind == 'u' \|\| Kind == 'U' \|\| Kind == 'N')(static_cast <bool> (Kind == 'u' \|\| Kind == 'U' \|\| Kind == 'N') ? void (0) : __assert_fail ("Kind == 'u' \|\| Kind == 'U' \|\| Kind == 'N'" , "clang/lib/Lex/LiteralSupport.cpp", 342, __extension__ __PRETTY_FUNCTION__ ));
343	uint32_t CodePoint = 0;
344
345	if (Kind == 'u' && *I == '{') {
346	for (++I; *I != '}'; ++I) {
347	unsigned Value = llvm::hexDigitValue(*I);
348	assert(Value != -1U)(static_cast <bool> (Value != -1U) ? void (0) : __assert_fail ("Value != -1U", "clang/lib/Lex/LiteralSupport.cpp", 348, __extension__ __PRETTY_FUNCTION__));
349	CodePoint <<= 4;
350	CodePoint += Value;
351	}
352	appendCodePoint(CodePoint, Buf);
353	continue;
354	}
355
356	if (Kind == 'N') {
357	assert(I == '{')(static_cast <bool> (I == '{') ? void (0) : __assert_fail ("*I == '{'", "clang/lib/Lex/LiteralSupport.cpp", 357, __extension__ __PRETTY_FUNCTION__));
358	++I;
359	auto Delim = std::find(I, Input.end(), '}');
360	assert(Delim != Input.end())(static_cast <bool> (Delim != Input.end()) ? void (0) : __assert_fail ("Delim != Input.end()", "clang/lib/Lex/LiteralSupport.cpp" , 360, __extension__ __PRETTY_FUNCTION__));
361	std::optional<llvm::sys::unicode::LooseMatchingResult> Res =
362	llvm::sys::unicode::nameToCodepointLooseMatching(
363	StringRef(I, std::distance(I, Delim)));
364	assert(Res)(static_cast <bool> (Res) ? void (0) : __assert_fail ("Res" , "clang/lib/Lex/LiteralSupport.cpp", 364, __extension__ __PRETTY_FUNCTION__ ));
365	CodePoint = Res->CodePoint;
366	assert(CodePoint != 0xFFFFFFFF)(static_cast <bool> (CodePoint != 0xFFFFFFFF) ? void (0 ) : __assert_fail ("CodePoint != 0xFFFFFFFF", "clang/lib/Lex/LiteralSupport.cpp" , 366, __extension__ __PRETTY_FUNCTION__));
367	appendCodePoint(CodePoint, Buf);
368	I = Delim;
369	continue;
370	}
371
372	unsigned NumHexDigits;
373	if (Kind == 'u')
374	NumHexDigits = 4;
375	else
376	NumHexDigits = 8;
377
378	assert(I + NumHexDigits <= E)(static_cast <bool> (I + NumHexDigits <= E) ? void ( 0) : __assert_fail ("I + NumHexDigits <= E", "clang/lib/Lex/LiteralSupport.cpp" , 378, __extension__ __PRETTY_FUNCTION__));
379
380	for (; NumHexDigits != 0; ++I, --NumHexDigits) {
381	unsigned Value = llvm::hexDigitValue(*I);
382	assert(Value != -1U)(static_cast <bool> (Value != -1U) ? void (0) : __assert_fail ("Value != -1U", "clang/lib/Lex/LiteralSupport.cpp", 382, __extension__ __PRETTY_FUNCTION__));
383
384	CodePoint <<= 4;
385	CodePoint += Value;
386	}
387
388	appendCodePoint(CodePoint, Buf);
389	--I;
390	}
391	}
392
393	static bool ProcessNumericUCNEscape(const char *ThisTokBegin,
394	const char *&ThisTokBuf,
395	const char *ThisTokEnd, uint32_t &UcnVal,
396	unsigned short &UcnLen, bool &Delimited,
397	FullSourceLoc Loc, DiagnosticsEngine *Diags,
398	const LangOptions &Features,
399	bool in_char_string_literal = false) {
400	const char *UcnBegin = ThisTokBuf;
401	bool HasError = false;
402	bool EndDelimiterFound = false;
403
404	// Skip the '\u' char's.
405	ThisTokBuf += 2;
406	Delimited = false;
407	if (UcnBegin[1] == 'u' && in_char_string_literal &&
408	ThisTokBuf != ThisTokEnd && *ThisTokBuf == '{') {
409	Delimited = true;
410	ThisTokBuf++;
411	} else if (ThisTokBuf == ThisTokEnd \|\| !isHexDigit(*ThisTokBuf)) {
412	if (Diags)
413	Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
414	diag::err_hex_escape_no_digits)
415	<< StringRef(&ThisTokBuf[-1], 1);
416	return false;
417	}
418	UcnLen = (ThisTokBuf[-1] == 'u' ? 4 : 8);
419
420	bool Overflow = false;
421	unsigned short Count = 0;
422	for (; ThisTokBuf != ThisTokEnd && (Delimited \|\| Count != UcnLen);
423	++ThisTokBuf) {
424	if (Delimited && *ThisTokBuf == '}') {
425	++ThisTokBuf;
426	EndDelimiterFound = true;
427	break;
428	}
429	int CharVal = llvm::hexDigitValue(*ThisTokBuf);
430	if (CharVal == -1) {
431	HasError = true;
432	if (!Delimited)
433	break;
434	if (Diags) {
435	Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
436	diag::err_delimited_escape_invalid)
437	<< StringRef(ThisTokBuf, 1);
438	}
439	Count++;
440	continue;
441	}
442	if (UcnVal & 0xF0000000) {
443	Overflow = true;
444	continue;
445	}
446	UcnVal <<= 4;
447	UcnVal \|= CharVal;
448	Count++;
449	}
450
451	if (Overflow) {
452	if (Diags)
453	Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
454	diag::err_escape_too_large)
455	<< 0;
456	return false;
457	}
458
459	if (Delimited && !EndDelimiterFound) {
460	if (Diags) {
461	Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
462	diag::err_expected)
463	<< tok::r_brace;
464	}
465	return false;
466	}
467
468	// If we didn't consume the proper number of digits, there is a problem.
469	if (Count == 0 \|\| (!Delimited && Count != UcnLen)) {
470	if (Diags)
471	Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
472	Delimited ? diag::err_delimited_escape_empty
473	: diag::err_ucn_escape_incomplete);
474	return false;
475	}
476	return !HasError;
477	}
478
479	static void DiagnoseInvalidUnicodeCharacterName(
480	DiagnosticsEngine *Diags, const LangOptions &Features, FullSourceLoc Loc,
481	const char TokBegin, const char TokRangeBegin, const char *TokRangeEnd,
482	llvm::StringRef Name) {
483
484	Diag(Diags, Features, Loc, TokBegin, TokRangeBegin, TokRangeEnd,
485	diag::err_invalid_ucn_name)
486	<< Name;
487
488	namespace u = llvm::sys::unicode;
489
490	std::optional<u::LooseMatchingResult> Res =
491	u::nameToCodepointLooseMatching(Name);
492	if (Res) {
493	Diag(Diags, Features, Loc, TokBegin, TokRangeBegin, TokRangeEnd,
494	diag::note_invalid_ucn_name_loose_matching)
495	<< FixItHint::CreateReplacement(
496	MakeCharSourceRange(Features, Loc, TokBegin, TokRangeBegin,
497	TokRangeEnd),
498	Res->Name);
499	return;
500	}
501
502	unsigned Distance = 0;
503	SmallVector<u::MatchForCodepointName> Matches =
504	u::nearestMatchesForCodepointName(Name, 5);
505	assert(!Matches.empty() && "No unicode characters found")(static_cast <bool> (!Matches.empty() && "No unicode characters found" ) ? void (0) : __assert_fail ("!Matches.empty() && \"No unicode characters found\"" , "clang/lib/Lex/LiteralSupport.cpp", 505, __extension__ __PRETTY_FUNCTION__ ));
506
507	for (const auto &Match : Matches) {
508	if (Distance == 0)
509	Distance = Match.Distance;
510	if (std::max(Distance, Match.Distance) -
511	std::min(Distance, Match.Distance) >
512	3)
513	break;
514	Distance = Match.Distance;
515
516	std::string Str;
517	llvm::UTF32 V = Match.Value;
518	bool Converted =
519	llvm::convertUTF32ToUTF8String(llvm::ArrayRef<llvm::UTF32>(&V, 1), Str);
520	(void)Converted;
521	assert(Converted && "Found a match wich is not a unicode character")(static_cast <bool> (Converted && "Found a match wich is not a unicode character" ) ? void (0) : __assert_fail ("Converted && \"Found a match wich is not a unicode character\"" , "clang/lib/Lex/LiteralSupport.cpp", 521, __extension__ __PRETTY_FUNCTION__ ));
522
523	Diag(Diags, Features, Loc, TokBegin, TokRangeBegin, TokRangeEnd,
524	diag::note_invalid_ucn_name_candidate)
525	<< Match.Name << llvm::utohexstr(Match.Value)
526	<< Str // FIXME: Fix the rendering of non printable characters
527	<< FixItHint::CreateReplacement(
528	MakeCharSourceRange(Features, Loc, TokBegin, TokRangeBegin,
529	TokRangeEnd),
530	Match.Name);
531	}
532	}
533
534	static bool ProcessNamedUCNEscape(const char *ThisTokBegin,
535	const char *&ThisTokBuf,
536	const char *ThisTokEnd, uint32_t &UcnVal,
537	unsigned short &UcnLen, FullSourceLoc Loc,
538	DiagnosticsEngine *Diags,
539	const LangOptions &Features) {
540	const char *UcnBegin = ThisTokBuf;
541	assert(UcnBegin[0] == '\\' && UcnBegin[1] == 'N')(static_cast <bool> (UcnBegin[0] == '\\' && UcnBegin [1] == 'N') ? void (0) : __assert_fail ("UcnBegin[0] == '\\\\' && UcnBegin[1] == 'N'" , "clang/lib/Lex/LiteralSupport.cpp", 541, __extension__ __PRETTY_FUNCTION__ ));
542	ThisTokBuf += 2;
543	if (ThisTokBuf == ThisTokEnd \|\| *ThisTokBuf != '{') {
544	if (Diags) {
545	Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
546	diag::err_delimited_escape_missing_brace)
547	<< StringRef(&ThisTokBuf[-1], 1);
548	}
549	return false;
550	}
551	ThisTokBuf++;
552	const char *ClosingBrace = std::find_if(ThisTokBuf, ThisTokEnd, [](char C) {
553	return C == '}' \|\| isVerticalWhitespace(C);
554	});
555	bool Incomplete = ClosingBrace == ThisTokEnd;
556	bool Empty = ClosingBrace == ThisTokBuf;
557	if (Incomplete \|\| Empty) {
558	if (Diags) {
559	Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
560	Incomplete ? diag::err_ucn_escape_incomplete
561	: diag::err_delimited_escape_empty)
562	<< StringRef(&UcnBegin[1], 1);
563	}
564	ThisTokBuf = ClosingBrace == ThisTokEnd ? ClosingBrace : ClosingBrace + 1;
565	return false;
566	}
567	StringRef Name(ThisTokBuf, ClosingBrace - ThisTokBuf);
568	ThisTokBuf = ClosingBrace + 1;
569	std::optional<char32_t> Res = llvm::sys::unicode::nameToCodepointStrict(Name);
570	if (!Res) {
571	if (Diags)
572	DiagnoseInvalidUnicodeCharacterName(Diags, Features, Loc, ThisTokBegin,
573	&UcnBegin[3], ClosingBrace, Name);
574	return false;
575	}
576	UcnVal = *Res;
577	UcnLen = UcnVal > 0xFFFF ? 8 : 4;
578	return true;
579	}
580
581	/// ProcessUCNEscape - Read the Universal Character Name, check constraints and
582	/// return the UTF32.
583	static bool ProcessUCNEscape(const char ThisTokBegin, const char &ThisTokBuf,
584	const char *ThisTokEnd, uint32_t &UcnVal,
585	unsigned short &UcnLen, FullSourceLoc Loc,
586	DiagnosticsEngine *Diags,
587	const LangOptions &Features,
588	bool in_char_string_literal = false) {
589
590	bool HasError;
591	const char *UcnBegin = ThisTokBuf;
592	bool IsDelimitedEscapeSequence = false;
593	bool IsNamedEscapeSequence = false;
594	if (ThisTokBuf[1] == 'N') {
595	IsNamedEscapeSequence = true;
596	HasError = !ProcessNamedUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd,
597	UcnVal, UcnLen, Loc, Diags, Features);
598	} else {
599	HasError =
600	!ProcessNumericUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, UcnVal,
601	UcnLen, IsDelimitedEscapeSequence, Loc, Diags,
602	Features, in_char_string_literal);
603	}
604	if (HasError)
605	return false;
606
607	// Check UCN constraints (C99 6.4.3p2) [C++11 lex.charset p2]
608	if ((0xD800 <= UcnVal && UcnVal <= 0xDFFF) \|\| // surrogate codepoints
609	UcnVal > 0x10FFFF) { // maximum legal UTF32 value
610	if (Diags)
611	Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
612	diag::err_ucn_escape_invalid);
613	return false;
614	}
615
616	// C++11 allows UCNs that refer to control characters and basic source
617	// characters inside character and string literals
618	if (UcnVal < 0xa0 &&
619	(UcnVal != 0x24 && UcnVal != 0x40 && UcnVal != 0x60)) { // $, @, `
620	bool IsError = (!Features.CPlusPlus11 \|\| !in_char_string_literal);
621	if (Diags) {
622	char BasicSCSChar = UcnVal;
623	if (UcnVal >= 0x20 && UcnVal < 0x7f)
624	Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
625	IsError ? diag::err_ucn_escape_basic_scs :
626	diag::warn_cxx98_compat_literal_ucn_escape_basic_scs)
627	<< StringRef(&BasicSCSChar, 1);
628	else
629	Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
630	IsError ? diag::err_ucn_control_character :
631	diag::warn_cxx98_compat_literal_ucn_control_character);
632	}
633	if (IsError)
634	return false;
635	}
636
637	if (!Features.CPlusPlus && !Features.C99 && Diags)
638	Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
639	diag::warn_ucn_not_valid_in_c89_literal);
640
641	if ((IsDelimitedEscapeSequence \|\| IsNamedEscapeSequence) && Diags)
642	Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
643	Features.CPlusPlus2b ? diag::warn_cxx2b_delimited_escape_sequence
644	: diag::ext_delimited_escape_sequence)
645	<< (IsNamedEscapeSequence ? 1 : 0) << (Features.CPlusPlus ? 1 : 0);
646
647	return true;
648	}
649
650	/// MeasureUCNEscape - Determine the number of bytes within the resulting string
651	/// which this UCN will occupy.
652	static int MeasureUCNEscape(const char ThisTokBegin, const char &ThisTokBuf,
653	const char *ThisTokEnd, unsigned CharByteWidth,
654	const LangOptions &Features, bool &HadError) {
655	// UTF-32: 4 bytes per escape.
656	if (CharByteWidth == 4)
657	return 4;
658
659	uint32_t UcnVal = 0;
660	unsigned short UcnLen = 0;
661	FullSourceLoc Loc;
662
663	if (!ProcessUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, UcnVal,
664	UcnLen, Loc, nullptr, Features, true)) {
665	HadError = true;
666	return 0;
667	}
668
669	// UTF-16: 2 bytes for BMP, 4 bytes otherwise.
670	if (CharByteWidth == 2)
671	return UcnVal <= 0xFFFF ? 2 : 4;
672
673	// UTF-8.
674	if (UcnVal < 0x80)
675	return 1;
676	if (UcnVal < 0x800)
677	return 2;
678	if (UcnVal < 0x10000)
679	return 3;
680	return 4;
681	}
682
683	/// EncodeUCNEscape - Read the Universal Character Name, check constraints and
684	/// convert the UTF32 to UTF8 or UTF16. This is a subroutine of
685	/// StringLiteralParser. When we decide to implement UCN's for identifiers,
686	/// we will likely rework our support for UCN's.
687	static void EncodeUCNEscape(const char ThisTokBegin, const char &ThisTokBuf,
688	const char *ThisTokEnd,
689	char *&ResultBuf, bool &HadError,
690	FullSourceLoc Loc, unsigned CharByteWidth,
691	DiagnosticsEngine *Diags,
692	const LangOptions &Features) {
693	typedef uint32_t UTF32;
694	UTF32 UcnVal = 0;
695	unsigned short UcnLen = 0;
696	if (!ProcessUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, UcnVal, UcnLen,
697	Loc, Diags, Features, true)) {
698	HadError = true;
699	return;
700	}
701
702	assert((CharByteWidth == 1 \|\| CharByteWidth == 2 \|\| CharByteWidth == 4) &&(static_cast <bool> ((CharByteWidth == 1 \|\| CharByteWidth == 2 \|\| CharByteWidth == 4) && "only character widths of 1, 2, or 4 bytes supported" ) ? void (0) : __assert_fail ("(CharByteWidth == 1 \|\| CharByteWidth == 2 \|\| CharByteWidth == 4) && \"only character widths of 1, 2, or 4 bytes supported\"" , "clang/lib/Lex/LiteralSupport.cpp", 703, __extension__ __PRETTY_FUNCTION__ ))
703	"only character widths of 1, 2, or 4 bytes supported")(static_cast <bool> ((CharByteWidth == 1 \|\| CharByteWidth == 2 \|\| CharByteWidth == 4) && "only character widths of 1, 2, or 4 bytes supported" ) ? void (0) : __assert_fail ("(CharByteWidth == 1 \|\| CharByteWidth == 2 \|\| CharByteWidth == 4) && \"only character widths of 1, 2, or 4 bytes supported\"" , "clang/lib/Lex/LiteralSupport.cpp", 703, __extension__ __PRETTY_FUNCTION__ ));
704
705	(void)UcnLen;
706	assert((UcnLen== 4 \|\| UcnLen== 8) && "only ucn length of 4 or 8 supported")(static_cast <bool> ((UcnLen== 4 \|\| UcnLen== 8) && "only ucn length of 4 or 8 supported") ? void (0) : __assert_fail ("(UcnLen== 4 \|\| UcnLen== 8) && \"only ucn length of 4 or 8 supported\"" , "clang/lib/Lex/LiteralSupport.cpp", 706, __extension__ __PRETTY_FUNCTION__ ));
707
708	if (CharByteWidth == 4) {
709	// FIXME: Make the type of the result buffer correct instead of
710	// using reinterpret_cast.
711	llvm::UTF32 ResultPtr = reinterpret_cast<llvm::UTF32>(ResultBuf);
712	*ResultPtr = UcnVal;
713	ResultBuf += 4;
714	return;
715	}
716
717	if (CharByteWidth == 2) {
718	// FIXME: Make the type of the result buffer correct instead of
719	// using reinterpret_cast.
720	llvm::UTF16 ResultPtr = reinterpret_cast<llvm::UTF16>(ResultBuf);
721
722	if (UcnVal <= (UTF32)0xFFFF) {
723	*ResultPtr = UcnVal;
724	ResultBuf += 2;
725	return;
726	}
727
728	// Convert to UTF16.
729	UcnVal -= 0x10000;
730	*ResultPtr = 0xD800 + (UcnVal >> 10);
731	*(ResultPtr+1) = 0xDC00 + (UcnVal & 0x3FF);
732	ResultBuf += 4;
733	return;
734	}
735
736	assert(CharByteWidth == 1 && "UTF-8 encoding is only for 1 byte characters")(static_cast <bool> (CharByteWidth == 1 && "UTF-8 encoding is only for 1 byte characters" ) ? void (0) : __assert_fail ("CharByteWidth == 1 && \"UTF-8 encoding is only for 1 byte characters\"" , "clang/lib/Lex/LiteralSupport.cpp", 736, __extension__ __PRETTY_FUNCTION__ ));
737
738	// Now that we've parsed/checked the UCN, we convert from UTF32->UTF8.
739	// The conversion below was inspired by:
740	// http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c
741	// First, we determine how many bytes the result will require.
742	typedef uint8_t UTF8;
743
744	unsigned short bytesToWrite = 0;
745	if (UcnVal < (UTF32)0x80)
746	bytesToWrite = 1;
747	else if (UcnVal < (UTF32)0x800)
748	bytesToWrite = 2;
749	else if (UcnVal < (UTF32)0x10000)
750	bytesToWrite = 3;
751	else
752	bytesToWrite = 4;
753
754	const unsigned byteMask = 0xBF;
755	const unsigned byteMark = 0x80;
756
757	// Once the bits are split out into bytes of UTF8, this is a mask OR-ed
758	// into the first byte, depending on how many bytes follow.
759	static const UTF8 firstByteMark[5] = {
760	0x00, 0x00, 0xC0, 0xE0, 0xF0
761	};
762	// Finally, we write the bytes into ResultBuf.
763	ResultBuf += bytesToWrite;
764	switch (bytesToWrite) { // note: everything falls through.
765	case 4:
766	*--ResultBuf = (UTF8)((UcnVal \| byteMark) & byteMask); UcnVal >>= 6;
767	[[fallthrough]];
768	case 3:
769	*--ResultBuf = (UTF8)((UcnVal \| byteMark) & byteMask); UcnVal >>= 6;
770	[[fallthrough]];
771	case 2:
772	*--ResultBuf = (UTF8)((UcnVal \| byteMark) & byteMask); UcnVal >>= 6;
773	[[fallthrough]];
774	case 1:
775	*--ResultBuf = (UTF8) (UcnVal \| firstByteMark[bytesToWrite]);
776	}
777	// Update the buffer.
778	ResultBuf += bytesToWrite;
779	}
780
781	/// integer-constant: [C99 6.4.4.1]
782	/// decimal-constant integer-suffix
783	/// octal-constant integer-suffix
784	/// hexadecimal-constant integer-suffix
785	/// binary-literal integer-suffix [GNU, C++1y]
786	/// user-defined-integer-literal: [C++11 lex.ext]
787	/// decimal-literal ud-suffix
788	/// octal-literal ud-suffix
789	/// hexadecimal-literal ud-suffix
790	/// binary-literal ud-suffix [GNU, C++1y]
791	/// decimal-constant:
792	/// nonzero-digit
793	/// decimal-constant digit
794	/// octal-constant:
795	/// 0
796	/// octal-constant octal-digit
797	/// hexadecimal-constant:
798	/// hexadecimal-prefix hexadecimal-digit
799	/// hexadecimal-constant hexadecimal-digit
800	/// hexadecimal-prefix: one of
801	/// 0x 0X
802	/// binary-literal:
803	/// 0b binary-digit
804	/// 0B binary-digit
805	/// binary-literal binary-digit
806	/// integer-suffix:
807	/// unsigned-suffix [long-suffix]
808	/// unsigned-suffix [long-long-suffix]
809	/// long-suffix [unsigned-suffix]
810	/// long-long-suffix [unsigned-sufix]
811	/// nonzero-digit:
812	/// 1 2 3 4 5 6 7 8 9
813	/// octal-digit:
814	/// 0 1 2 3 4 5 6 7
815	/// hexadecimal-digit:
816	/// 0 1 2 3 4 5 6 7 8 9
817	/// a b c d e f
818	/// A B C D E F
819	/// binary-digit:
820	/// 0
821	/// 1
822	/// unsigned-suffix: one of
823	/// u U
824	/// long-suffix: one of
825	/// l L
826	/// long-long-suffix: one of
827	/// ll LL
828	///
829	/// floating-constant: [C99 6.4.4.2]
830	/// TODO: add rules...
831	///
832	NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
833	SourceLocation TokLoc,
834	const SourceManager &SM,
835	const LangOptions &LangOpts,
836	const TargetInfo &Target,
837	DiagnosticsEngine &Diags)
838	: SM(SM), LangOpts(LangOpts), Diags(Diags),
839	ThisTokBegin(TokSpelling.begin()), ThisTokEnd(TokSpelling.end()) {
840
841	s = DigitsBegin = ThisTokBegin;
842	saw_exponent = false;
843	saw_period = false;
844	saw_ud_suffix = false;
845	saw_fixed_point_suffix = false;
846	isLong = false;
847	isUnsigned = false;
848	isLongLong = false;
849	isSizeT = false;
850	isHalf = false;
851	isFloat = false;
852	isImaginary = false;
853	isFloat16 = false;
854	isFloat128 = false;
855	MicrosoftInteger = 0;
856	isFract = false;
857	isAccum = false;
858	hadError = false;
859	isBitInt = false;
860
861	// This routine assumes that the range begin/end matches the regex for integer
862	// and FP constants (specifically, the 'pp-number' regex), and assumes that
863	// the byte at "*end" is both valid and not part of the regex. Because of
864	// this, it doesn't have to check for 'overscan' in various places.
865	if (isPreprocessingNumberBody(*ThisTokEnd)) {
866	Diags.Report(TokLoc, diag::err_lexing_numeric);
867	hadError = true;
868	return;
869	}
870
871	if (*s == '0') { // parse radix
872	ParseNumberStartingWithZero(TokLoc);
873	if (hadError)
874	return;
875	} else { // the first digit is non-zero
876	radix = 10;
877	s = SkipDigits(s);
878	if (s == ThisTokEnd) {
879	// Done.
880	} else {
881	ParseDecimalOrOctalCommon(TokLoc);
882	if (hadError)
883	return;
884	}
885	}
886
887	SuffixBegin = s;
888	checkSeparator(TokLoc, s, CSK_AfterDigits);
889
890	// Initial scan to lookahead for fixed point suffix.
891	if (LangOpts.FixedPoint) {
892	for (const char *c = s; c != ThisTokEnd; ++c) {
893	if (c == 'r' \|\| c == 'k' \|\| c == 'R' \|\| c == 'K') {
894	saw_fixed_point_suffix = true;
895	break;
896	}
897	}
898	}
899
900	// Parse the suffix. At this point we can classify whether we have an FP or
901	// integer constant.
902	bool isFixedPointConstant = isFixedPointLiteral();
903	bool isFPConstant = isFloatingLiteral();
904	bool HasSize = false;
905
906	// Loop over all of the characters of the suffix. If we see something bad,
907	// we break out of the loop.
908	for (; s != ThisTokEnd; ++s) {
909	switch (*s) {
910	case 'R':
911	case 'r':
912	if (!LangOpts.FixedPoint)
913	break;
914	if (isFract \|\| isAccum) break;
915	if (!(saw_period \|\| saw_exponent)) break;
916	isFract = true;
917	continue;
918	case 'K':
919	case 'k':
920	if (!LangOpts.FixedPoint)
921	break;
922	if (isFract \|\| isAccum) break;
923	if (!(saw_period \|\| saw_exponent)) break;
924	isAccum = true;
925	continue;
926	case 'h': // FP Suffix for "half".
927	case 'H':
928	// OpenCL Extension v1.2 s9.5 - h or H suffix for half type.
929	if (!(LangOpts.Half \|\| LangOpts.FixedPoint))
930	break;
931	if (isIntegerLiteral()) break; // Error for integer constant.
932	if (HasSize)
933	break;
934	HasSize = true;
935	isHalf = true;
936	continue; // Success.
937	case 'f': // FP Suffix for "float"
938	case 'F':
939	if (!isFPConstant) break; // Error for integer constant.
940	if (HasSize)
941	break;
942	HasSize = true;
943
944	// CUDA host and device may have different _Float16 support, therefore
945	// allows f16 literals to avoid false alarm.
946	// ToDo: more precise check for CUDA.
947	if ((Target.hasFloat16Type() \|\| LangOpts.CUDA) && s + 2 < ThisTokEnd &&
948	s[1] == '1' && s[2] == '6') {
949	s += 2; // success, eat up 2 characters.
950	isFloat16 = true;
951	continue;
952	}
953
954	isFloat = true;
955	continue; // Success.
956	case 'q': // FP Suffix for "__float128"
957	case 'Q':
958	if (!isFPConstant) break; // Error for integer constant.
959	if (HasSize)
960	break;
961	HasSize = true;
962	isFloat128 = true;
963	continue; // Success.
964	case 'u':
965	case 'U':
966	if (isFPConstant) break; // Error for floating constant.
967	if (isUnsigned) break; // Cannot be repeated.
968	isUnsigned = true;
969	continue; // Success.
970	case 'l':
971	case 'L':
972	if (HasSize)
973	break;
974	HasSize = true;
975
976	// Check for long long. The L's need to be adjacent and the same case.
977	if (s[1] == s[0]) {
978	assert(s + 1 < ThisTokEnd && "didn't maximally munch?")(static_cast <bool> (s + 1 < ThisTokEnd && "didn't maximally munch?" ) ? void (0) : __assert_fail ("s + 1 < ThisTokEnd && \"didn't maximally munch?\"" , "clang/lib/Lex/LiteralSupport.cpp", 978, __extension__ __PRETTY_FUNCTION__ ));
979	if (isFPConstant) break; // long long invalid for floats.
980	isLongLong = true;
981	++s; // Eat both of them.
982	} else {
983	isLong = true;
984	}
985	continue; // Success.
986	case 'z':
987	case 'Z':
988	if (isFPConstant)
989	break; // Invalid for floats.
990	if (HasSize)
991	break;
992	HasSize = true;
993	isSizeT = true;
994	continue;
995	case 'i':
996	case 'I':
997	if (LangOpts.MicrosoftExt && !isFPConstant) {
998	// Allow i8, i16, i32, and i64. First, look ahead and check if
999	// suffixes are Microsoft integers and not the imaginary unit.
1000	uint8_t Bits = 0;
1001	size_t ToSkip = 0;
1002	switch (s[1]) {
1003	case '8': // i8 suffix
1004	Bits = 8;
1005	ToSkip = 2;
1006	break;
1007	case '1':
1008	if (s[2] == '6') { // i16 suffix
1009	Bits = 16;
1010	ToSkip = 3;
1011	}
1012	break;
1013	case '3':
1014	if (s[2] == '2') { // i32 suffix
1015	Bits = 32;
1016	ToSkip = 3;
1017	}
1018	break;
1019	case '6':
1020	if (s[2] == '4') { // i64 suffix
1021	Bits = 64;
1022	ToSkip = 3;
1023	}
1024	break;
1025	default:
1026	break;
1027	}
1028	if (Bits) {
1029	if (HasSize)
1030	break;
1031	HasSize = true;
	Value stored to 'HasSize' is never read
1032	MicrosoftInteger = Bits;
1033	s += ToSkip;
1034	assert(s <= ThisTokEnd && "didn't maximally munch?")(static_cast <bool> (s <= ThisTokEnd && "didn't maximally munch?" ) ? void (0) : __assert_fail ("s <= ThisTokEnd && \"didn't maximally munch?\"" , "clang/lib/Lex/LiteralSupport.cpp", 1034, __extension__ __PRETTY_FUNCTION__ ));
1035	break;
1036	}
1037	}
1038	[[fallthrough]];
1039	case 'j':
1040	case 'J':
1041	if (isImaginary) break; // Cannot be repeated.
1042	isImaginary = true;
1043	continue; // Success.
1044	case 'w':
1045	case 'W':
1046	if (isFPConstant)
1047	break; // Invalid for floats.
1048	if (HasSize)
1049	break; // Invalid if we already have a size for the literal.
1050
1051	// wb and WB are allowed, but a mixture of cases like Wb or wB is not. We
1052	// explicitly do not support the suffix in C++ as an extension because a
1053	// library-based UDL that resolves to a library type may be more
1054	// appropriate there.
1055	if (!LangOpts.CPlusPlus && ((s[0] == 'w' && s[1] == 'b') \|\|
1056	(s[0] == 'W' && s[1] == 'B'))) {
1057	isBitInt = true;
1058	HasSize = true;
1059	++s; // Skip both characters (2nd char skipped on continue).
1060	continue; // Success.
1061	}
1062	}
1063	// If we reached here, there was an error or a ud-suffix.
1064	break;
1065	}
1066
1067	// "i", "if", and "il" are user-defined suffixes in C++1y.
1068	if (s != ThisTokEnd \|\| isImaginary) {
1069	// FIXME: Don't bother expanding UCNs if !tok.hasUCN().
1070	expandUCNs(UDSuffixBuf, StringRef(SuffixBegin, ThisTokEnd - SuffixBegin));
1071	if (isValidUDSuffix(LangOpts, UDSuffixBuf)) {
1072	if (!isImaginary) {
1073	// Any suffix pieces we might have parsed are actually part of the
1074	// ud-suffix.
1075	isLong = false;
1076	isUnsigned = false;
1077	isLongLong = false;
1078	isSizeT = false;
1079	isFloat = false;
1080	isFloat16 = false;
1081	isHalf = false;
1082	isImaginary = false;
1083	isBitInt = false;
1084	MicrosoftInteger = 0;
1085	saw_fixed_point_suffix = false;
1086	isFract = false;
1087	isAccum = false;
1088	}
1089
1090	saw_ud_suffix = true;
1091	return;
1092	}
1093
1094	if (s != ThisTokEnd) {
1095	// Report an error if there are any.
1096	Diags.Report(Lexer::AdvanceToTokenCharacter(
1097	TokLoc, SuffixBegin - ThisTokBegin, SM, LangOpts),
1098	diag::err_invalid_suffix_constant)
1099	<< StringRef(SuffixBegin, ThisTokEnd - SuffixBegin)
1100	<< (isFixedPointConstant ? 2 : isFPConstant);
1101	hadError = true;
1102	}
1103	}
1104
1105	if (!hadError && saw_fixed_point_suffix) {
1106	assert(isFract \|\| isAccum)(static_cast <bool> (isFract \|\| isAccum) ? void (0) : __assert_fail ("isFract \|\| isAccum", "clang/lib/Lex/LiteralSupport.cpp", 1106 , __extension__ __PRETTY_FUNCTION__));
1107	}
1108	}
1109
1110	/// ParseDecimalOrOctalCommon - This method is called for decimal or octal
1111	/// numbers. It issues an error for illegal digits, and handles floating point
1112	/// parsing. If it detects a floating point number, the radix is set to 10.
1113	void NumericLiteralParser::ParseDecimalOrOctalCommon(SourceLocation TokLoc){
1114	assert((radix == 8 \|\| radix == 10) && "Unexpected radix")(static_cast <bool> ((radix == 8 \|\| radix == 10) && "Unexpected radix") ? void (0) : __assert_fail ("(radix == 8 \|\| radix == 10) && \"Unexpected radix\"" , "clang/lib/Lex/LiteralSupport.cpp", 1114, __extension__ __PRETTY_FUNCTION__ ));
1115
1116	// If we have a hex digit other than 'e' (which denotes a FP exponent) then
1117	// the code is using an incorrect base.
1118	if (isHexDigit(s) && s != 'e' && *s != 'E' &&
1119	!isValidUDSuffix(LangOpts, StringRef(s, ThisTokEnd - s))) {
1120	Diags.Report(
1121	Lexer::AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin, SM, LangOpts),
1122	diag::err_invalid_digit)
1123	<< StringRef(s, 1) << (radix == 8 ? 1 : 0);
1124	hadError = true;
1125	return;
1126	}
1127
1128	if (*s == '.') {
1129	checkSeparator(TokLoc, s, CSK_AfterDigits);
1130	s++;
1131	radix = 10;
1132	saw_period = true;
1133	checkSeparator(TokLoc, s, CSK_BeforeDigits);
1134	s = SkipDigits(s); // Skip suffix.
1135	}
1136	if (s == 'e' \|\| s == 'E') { // exponent
1137	checkSeparator(TokLoc, s, CSK_AfterDigits);
1138	const char *Exponent = s;
1139	s++;
1140	radix = 10;
1141	saw_exponent = true;
1142	if (s != ThisTokEnd && (s == '+' \|\| s == '-')) s++; // sign
1143	const char *first_non_digit = SkipDigits(s);
1144	if (containsDigits(s, first_non_digit)) {
1145	checkSeparator(TokLoc, s, CSK_BeforeDigits);
1146	s = first_non_digit;
1147	} else {
1148	if (!hadError) {
1149	Diags.Report(Lexer::AdvanceToTokenCharacter(
1150	TokLoc, Exponent - ThisTokBegin, SM, LangOpts),
1151	diag::err_exponent_has_no_digits);
1152	hadError = true;
1153	}
1154	return;
1155	}
1156	}
1157	}
1158
1159	/// Determine whether a suffix is a valid ud-suffix. We avoid treating reserved
1160	/// suffixes as ud-suffixes, because the diagnostic experience is better if we
1161	/// treat it as an invalid suffix.
1162	bool NumericLiteralParser::isValidUDSuffix(const LangOptions &LangOpts,
1163	StringRef Suffix) {
1164	if (!LangOpts.CPlusPlus11 \|\| Suffix.empty())
1165	return false;
1166
1167	// By C++11 [lex.ext]p10, ud-suffixes starting with an '_' are always valid.
1168	if (Suffix[0] == '_')
1169	return true;
1170
1171	// In C++11, there are no library suffixes.
1172	if (!LangOpts.CPlusPlus14)
1173	return false;
1174
1175	// In C++14, "s", "h", "min", "ms", "us", and "ns" are used in the library.
1176	// Per tweaked N3660, "il", "i", and "if" are also used in the library.
1177	// In C++2a "d" and "y" are used in the library.
1178	return llvm::StringSwitch<bool>(Suffix)
1179	.Cases("h", "min", "s", true)
1180	.Cases("ms", "us", "ns", true)
1181	.Cases("il", "i", "if", true)
1182	.Cases("d", "y", LangOpts.CPlusPlus20)
1183	.Default(false);
1184	}
1185
1186	void NumericLiteralParser::checkSeparator(SourceLocation TokLoc,
1187	const char *Pos,
1188	CheckSeparatorKind IsAfterDigits) {
1189	if (IsAfterDigits == CSK_AfterDigits) {
1190	if (Pos == ThisTokBegin)
1191	return;
1192	--Pos;
1193	} else if (Pos == ThisTokEnd)
1194	return;
1195
1196	if (isDigitSeparator(*Pos)) {
1197	Diags.Report(Lexer::AdvanceToTokenCharacter(TokLoc, Pos - ThisTokBegin, SM,
1198	LangOpts),
1199	diag::err_digit_separator_not_between_digits)
1200	<< IsAfterDigits;
1201	hadError = true;
1202	}
1203	}
1204
1205	/// ParseNumberStartingWithZero - This method is called when the first character
1206	/// of the number is found to be a zero. This means it is either an octal
1207	/// number (like '04') or a hex number ('0x123a') a binary number ('0b1010') or
1208	/// a floating point number (01239.123e4). Eat the prefix, determining the
1209	/// radix etc.
1210	void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
1211	assert(s[0] == '0' && "Invalid method call")(static_cast <bool> (s[0] == '0' && "Invalid method call" ) ? void (0) : __assert_fail ("s[0] == '0' && \"Invalid method call\"" , "clang/lib/Lex/LiteralSupport.cpp", 1211, __extension__ __PRETTY_FUNCTION__ ));
1212	s++;
1213
1214	int c1 = s[0];
1215
1216	// Handle a hex number like 0x1234.
1217	if ((c1 == 'x' \|\| c1 == 'X') && (isHexDigit(s[1]) \|\| s[1] == '.')) {
1218	s++;
1219	assert(s < ThisTokEnd && "didn't maximally munch?")(static_cast <bool> (s < ThisTokEnd && "didn't maximally munch?" ) ? void (0) : __assert_fail ("s < ThisTokEnd && \"didn't maximally munch?\"" , "clang/lib/Lex/LiteralSupport.cpp", 1219, __extension__ __PRETTY_FUNCTION__ ));
1220	radix = 16;
1221	DigitsBegin = s;
1222	s = SkipHexDigits(s);
1223	bool HasSignificandDigits = containsDigits(DigitsBegin, s);
1224	if (s == ThisTokEnd) {
1225	// Done.
1226	} else if (*s == '.') {
1227	s++;
1228	saw_period = true;
1229	const char *floatDigitsBegin = s;
1230	s = SkipHexDigits(s);
1231	if (containsDigits(floatDigitsBegin, s))
1232	HasSignificandDigits = true;
1233	if (HasSignificandDigits)
1234	checkSeparator(TokLoc, floatDigitsBegin, CSK_BeforeDigits);
1235	}
1236
1237	if (!HasSignificandDigits) {
1238	Diags.Report(Lexer::AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin, SM,
1239	LangOpts),
1240	diag::err_hex_constant_requires)
1241	<< LangOpts.CPlusPlus << 1;
1242	hadError = true;
1243	return;
1244	}
1245
1246	// A binary exponent can appear with or with a '.'. If dotted, the
1247	// binary exponent is required.
1248	if (s == 'p' \|\| s == 'P') {
1249	checkSeparator(TokLoc, s, CSK_AfterDigits);
1250	const char *Exponent = s;
1251	s++;
1252	saw_exponent = true;
1253	if (s != ThisTokEnd && (s == '+' \|\| s == '-')) s++; // sign
1254	const char *first_non_digit = SkipDigits(s);
1255	if (!containsDigits(s, first_non_digit)) {
1256	if (!hadError) {
1257	Diags.Report(Lexer::AdvanceToTokenCharacter(
1258	TokLoc, Exponent - ThisTokBegin, SM, LangOpts),
1259	diag::err_exponent_has_no_digits);
1260	hadError = true;
1261	}
1262	return;
1263	}
1264	checkSeparator(TokLoc, s, CSK_BeforeDigits);
1265	s = first_non_digit;
1266
1267	if (!LangOpts.HexFloats)
1268	Diags.Report(TokLoc, LangOpts.CPlusPlus
1269	? diag::ext_hex_literal_invalid
1270	: diag::ext_hex_constant_invalid);
1271	else if (LangOpts.CPlusPlus17)
1272	Diags.Report(TokLoc, diag::warn_cxx17_hex_literal);
1273	} else if (saw_period) {
1274	Diags.Report(Lexer::AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin, SM,
1275	LangOpts),
1276	diag::err_hex_constant_requires)
1277	<< LangOpts.CPlusPlus << 0;
1278	hadError = true;
1279	}
1280	return;
1281	}
1282
1283	// Handle simple binary numbers 0b01010
1284	if ((c1 == 'b' \|\| c1 == 'B') && (s[1] == '0' \|\| s[1] == '1')) {
1285	// 0b101010 is a C++1y / GCC extension.
1286	Diags.Report(TokLoc, LangOpts.CPlusPlus14
1287	? diag::warn_cxx11_compat_binary_literal
1288	: LangOpts.CPlusPlus ? diag::ext_binary_literal_cxx14
1289	: diag::ext_binary_literal);
1290	++s;
1291	assert(s < ThisTokEnd && "didn't maximally munch?")(static_cast <bool> (s < ThisTokEnd && "didn't maximally munch?" ) ? void (0) : __assert_fail ("s < ThisTokEnd && \"didn't maximally munch?\"" , "clang/lib/Lex/LiteralSupport.cpp", 1291, __extension__ __PRETTY_FUNCTION__ ));
1292	radix = 2;
1293	DigitsBegin = s;
1294	s = SkipBinaryDigits(s);
1295	if (s == ThisTokEnd) {
1296	// Done.
1297	} else if (isHexDigit(*s) &&
1298	!isValidUDSuffix(LangOpts, StringRef(s, ThisTokEnd - s))) {
1299	Diags.Report(Lexer::AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin, SM,
1300	LangOpts),
1301	diag::err_invalid_digit)
1302	<< StringRef(s, 1) << 2;
1303	hadError = true;
1304	}
1305	// Other suffixes will be diagnosed by the caller.
1306	return;
1307	}
1308
1309	// For now, the radix is set to 8. If we discover that we have a
1310	// floating point constant, the radix will change to 10. Octal floating
1311	// point constants are not permitted (only decimal and hexadecimal).
1312	radix = 8;
1313	const char *PossibleNewDigitStart = s;
1314	s = SkipOctalDigits(s);
1315	// When the value is 0 followed by a suffix (like 0wb), we want to leave 0
1316	// as the start of the digits. So if skipping octal digits does not skip
1317	// anything, we leave the digit start where it was.
1318	if (s != PossibleNewDigitStart)
1319	DigitsBegin = PossibleNewDigitStart;
1320
1321	if (s == ThisTokEnd)
1322	return; // Done, simple octal number like 01234
1323
1324	// If we have some other non-octal digit that is a decimal digit, see if
1325	// this is part of a floating point number like 094.123 or 09e1.
1326	if (isDigit(*s)) {
1327	const char *EndDecimal = SkipDigits(s);
1328	if (EndDecimal[0] == '.' \|\| EndDecimal[0] == 'e' \|\| EndDecimal[0] == 'E') {
1329	s = EndDecimal;
1330	radix = 10;
1331	}
1332	}
1333
1334	ParseDecimalOrOctalCommon(TokLoc);
1335	}
1336
1337	static bool alwaysFitsInto64Bits(unsigned Radix, unsigned NumDigits) {
1338	switch (Radix) {
1339	case 2:
1340	return NumDigits <= 64;
1341	case 8:
1342	return NumDigits <= 64 / 3; // Digits are groups of 3 bits.
1343	case 10:
1344	return NumDigits <= 19; // floor(log10(2^64))
1345	case 16:
1346	return NumDigits <= 64 / 4; // Digits are groups of 4 bits.
1347	default:
1348	llvm_unreachable("impossible Radix")::llvm::llvm_unreachable_internal("impossible Radix", "clang/lib/Lex/LiteralSupport.cpp" , 1348);
1349	}
1350	}
1351
1352	/// GetIntegerValue - Convert this numeric literal value to an APInt that
1353	/// matches Val's input width. If there is an overflow, set Val to the low bits
1354	/// of the result and return true. Otherwise, return false.
1355	bool NumericLiteralParser::GetIntegerValue(llvm::APInt &Val) {
1356	// Fast path: Compute a conservative bound on the maximum number of
1357	// bits per digit in this radix. If we can't possibly overflow a
1358	// uint64 based on that bound then do the simple conversion to
1359	// integer. This avoids the expensive overflow checking below, and
1360	// handles the common cases that matter (small decimal integers and
1361	// hex/octal values which don't overflow).
1362	const unsigned NumDigits = SuffixBegin - DigitsBegin;
1363	if (alwaysFitsInto64Bits(radix, NumDigits)) {
1364	uint64_t N = 0;
1365	for (const char *Ptr = DigitsBegin; Ptr != SuffixBegin; ++Ptr)
1366	if (!isDigitSeparator(*Ptr))
1367	N = N * radix + llvm::hexDigitValue(*Ptr);
1368
1369	// This will truncate the value to Val's input width. Simply check
1370	// for overflow by comparing.
1371	Val = N;
1372	return Val.getZExtValue() != N;
1373	}
1374
1375	Val = 0;
1376	const char *Ptr = DigitsBegin;
1377
1378	llvm::APInt RadixVal(Val.getBitWidth(), radix);
1379	llvm::APInt CharVal(Val.getBitWidth(), 0);
1380	llvm::APInt OldVal = Val;
1381
1382	bool OverflowOccurred = false;
1383	while (Ptr < SuffixBegin) {
1384	if (isDigitSeparator(*Ptr)) {
1385	++Ptr;
1386	continue;
1387	}
1388
1389	unsigned C = llvm::hexDigitValue(*Ptr++);
1390
1391	// If this letter is out of bound for this radix, reject it.
1392	assert(C < radix && "NumericLiteralParser ctor should have rejected this")(static_cast <bool> (C < radix && "NumericLiteralParser ctor should have rejected this" ) ? void (0) : __assert_fail ("C < radix && \"NumericLiteralParser ctor should have rejected this\"" , "clang/lib/Lex/LiteralSupport.cpp", 1392, __extension__ __PRETTY_FUNCTION__ ));
1393
1394	CharVal = C;
1395
1396	// Add the digit to the value in the appropriate radix. If adding in digits
1397	// made the value smaller, then this overflowed.
1398	OldVal = Val;
1399
1400	// Multiply by radix, did overflow occur on the multiply?
1401	Val *= RadixVal;
1402	OverflowOccurred \|= Val.udiv(RadixVal) != OldVal;
1403
1404	// Add value, did overflow occur on the value?
1405	// (a + b) ult b <=> overflow
1406	Val += CharVal;
1407	OverflowOccurred \|= Val.ult(CharVal);
1408	}
1409	return OverflowOccurred;
1410	}
1411
1412	llvm::APFloat::opStatus
1413	NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) {
1414	using llvm::APFloat;
1415
1416	unsigned n = std::min(SuffixBegin - ThisTokBegin, ThisTokEnd - ThisTokBegin);
1417
1418	llvm::SmallString<16> Buffer;
1419	StringRef Str(ThisTokBegin, n);
1420	if (Str.contains('\'')) {
1421	Buffer.reserve(n);
1422	std::remove_copy_if(Str.begin(), Str.end(), std::back_inserter(Buffer),
1423	&isDigitSeparator);
1424	Str = Buffer;
1425	}
1426
1427	auto StatusOrErr =
1428	Result.convertFromString(Str, APFloat::rmNearestTiesToEven);
1429	assert(StatusOrErr && "Invalid floating point representation")(static_cast <bool> (StatusOrErr && "Invalid floating point representation" ) ? void (0) : __assert_fail ("StatusOrErr && \"Invalid floating point representation\"" , "clang/lib/Lex/LiteralSupport.cpp", 1429, __extension__ __PRETTY_FUNCTION__ ));
1430	return !errorToBool(StatusOrErr.takeError()) ? *StatusOrErr
1431	: APFloat::opInvalidOp;
1432	}
1433
1434	static inline bool IsExponentPart(char c) {
1435	return c == 'p' \|\| c == 'P' \|\| c == 'e' \|\| c == 'E';
1436	}
1437
1438	bool NumericLiteralParser::GetFixedPointValue(llvm::APInt &StoreVal, unsigned Scale) {
1439	assert(radix == 16 \|\| radix == 10)(static_cast <bool> (radix == 16 \|\| radix == 10) ? void (0) : __assert_fail ("radix == 16 \|\| radix == 10", "clang/lib/Lex/LiteralSupport.cpp" , 1439, __extension__ __PRETTY_FUNCTION__));
1440
1441	// Find how many digits are needed to store the whole literal.
1442	unsigned NumDigits = SuffixBegin - DigitsBegin;
1443	if (saw_period) --NumDigits;
1444
1445	// Initial scan of the exponent if it exists
1446	bool ExpOverflowOccurred = false;
1447	bool NegativeExponent = false;
1448	const char *ExponentBegin;
1449	uint64_t Exponent = 0;
1450	int64_t BaseShift = 0;
1451	if (saw_exponent) {
1452	const char *Ptr = DigitsBegin;
1453
1454	while (!IsExponentPart(*Ptr)) ++Ptr;
1455	ExponentBegin = Ptr;
1456	++Ptr;
1457	NegativeExponent = *Ptr == '-';
1458	if (NegativeExponent) ++Ptr;
1459
1460	unsigned NumExpDigits = SuffixBegin - Ptr;
1461	if (alwaysFitsInto64Bits(radix, NumExpDigits)) {
1462	llvm::StringRef ExpStr(Ptr, NumExpDigits);
1463	llvm::APInt ExpInt(/numBits=/64, ExpStr, /radix=/10);
1464	Exponent = ExpInt.getZExtValue();
1465	} else {
1466	ExpOverflowOccurred = true;
1467	}
1468
1469	if (NegativeExponent) BaseShift -= Exponent;
1470	else BaseShift += Exponent;
1471	}
1472
1473	// Number of bits needed for decimal literal is
1474	// ceil(NumDigits * log2(10)) Integral part
1475	// + Scale Fractional part
1476	// + ceil(Exponent * log2(10)) Exponent
1477	// --------------------------------------------------
1478	// ceil((NumDigits + Exponent) * log2(10)) + Scale
1479	//
1480	// But for simplicity in handling integers, we can round up log2(10) to 4,
1481	// making:
1482	// 4 * (NumDigits + Exponent) + Scale
1483	//
1484	// Number of digits needed for hexadecimal literal is
1485	// 4 * NumDigits Integral part
1486	// + Scale Fractional part
1487	// + Exponent Exponent
1488	// --------------------------------------------------
1489	// (4 * NumDigits) + Scale + Exponent
1490	uint64_t NumBitsNeeded;
1491	if (radix == 10)
1492	NumBitsNeeded = 4 * (NumDigits + Exponent) + Scale;
1493	else
1494	NumBitsNeeded = 4 * NumDigits + Exponent + Scale;
1495
1496	if (NumBitsNeeded > std::numeric_limits<unsigned>::max())
1497	ExpOverflowOccurred = true;
1498	llvm::APInt Val(static_cast<unsigned>(NumBitsNeeded), 0, /isSigned=/false);
1499
1500	bool FoundDecimal = false;
1501
1502	int64_t FractBaseShift = 0;
1503	const char *End = saw_exponent ? ExponentBegin : SuffixBegin;
1504	for (const char *Ptr = DigitsBegin; Ptr < End; ++Ptr) {
1505	if (*Ptr == '.') {
1506	FoundDecimal = true;
1507	continue;
1508	}
1509
1510	// Normal reading of an integer
1511	unsigned C = llvm::hexDigitValue(*Ptr);
1512	assert(C < radix && "NumericLiteralParser ctor should have rejected this")(static_cast <bool> (C < radix && "NumericLiteralParser ctor should have rejected this" ) ? void (0) : __assert_fail ("C < radix && \"NumericLiteralParser ctor should have rejected this\"" , "clang/lib/Lex/LiteralSupport.cpp", 1512, __extension__ __PRETTY_FUNCTION__ ));
1513
1514	Val *= radix;
1515	Val += C;
1516
1517	if (FoundDecimal)
1518	// Keep track of how much we will need to adjust this value by from the
1519	// number of digits past the radix point.
1520	--FractBaseShift;
1521	}
1522
1523	// For a radix of 16, we will be multiplying by 2 instead of 16.
1524	if (radix == 16) FractBaseShift *= 4;
1525	BaseShift += FractBaseShift;
1526
1527	Val <<= Scale;
1528
1529	uint64_t Base = (radix == 16) ? 2 : 10;
1530	if (BaseShift > 0) {
1531	for (int64_t i = 0; i < BaseShift; ++i) {
1532	Val *= Base;
1533	}
1534	} else if (BaseShift < 0) {
1535	for (int64_t i = BaseShift; i < 0 && !Val.isZero(); ++i)
1536	Val = Val.udiv(Base);
1537	}
1538
1539	bool IntOverflowOccurred = false;
1540	auto MaxVal = llvm::APInt::getMaxValue(StoreVal.getBitWidth());
1541	if (Val.getBitWidth() > StoreVal.getBitWidth()) {
1542	IntOverflowOccurred \|= Val.ugt(MaxVal.zext(Val.getBitWidth()));
1543	StoreVal = Val.trunc(StoreVal.getBitWidth());
1544	} else if (Val.getBitWidth() < StoreVal.getBitWidth()) {
1545	IntOverflowOccurred \|= Val.zext(MaxVal.getBitWidth()).ugt(MaxVal);
1546	StoreVal = Val.zext(StoreVal.getBitWidth());
1547	} else {
1548	StoreVal = Val;
1549	}
1550
1551	return IntOverflowOccurred \|\| ExpOverflowOccurred;
1552	}
1553
1554	/// \verbatim
1555	/// user-defined-character-literal: [C++11 lex.ext]
1556	/// character-literal ud-suffix
1557	/// ud-suffix:
1558	/// identifier
1559	/// character-literal: [C++11 lex.ccon]
1560	/// ' c-char-sequence '
1561	/// u' c-char-sequence '
1562	/// U' c-char-sequence '
1563	/// L' c-char-sequence '
1564	/// u8' c-char-sequence ' [C++1z lex.ccon]
1565	/// c-char-sequence:
1566	/// c-char
1567	/// c-char-sequence c-char
1568	/// c-char:
1569	/// any member of the source character set except the single-quote ',
1570	/// backslash \, or new-line character
1571	/// escape-sequence
1572	/// universal-character-name
1573	/// escape-sequence:
1574	/// simple-escape-sequence
1575	/// octal-escape-sequence
1576	/// hexadecimal-escape-sequence
1577	/// simple-escape-sequence:
1578	/// one of \' \" \? \\ \a \b \f \n \r \t \v
1579	/// octal-escape-sequence:
1580	/// \ octal-digit
1581	/// \ octal-digit octal-digit
1582	/// \ octal-digit octal-digit octal-digit
1583	/// hexadecimal-escape-sequence:
1584	/// \x hexadecimal-digit
1585	/// hexadecimal-escape-sequence hexadecimal-digit
1586	/// universal-character-name: [C++11 lex.charset]
1587	/// \u hex-quad
1588	/// \U hex-quad hex-quad
1589	/// hex-quad:
1590	/// hex-digit hex-digit hex-digit hex-digit
1591	/// \endverbatim
1592	///
1593	CharLiteralParser::CharLiteralParser(const char begin, const char end,
1594	SourceLocation Loc, Preprocessor &PP,
1595	tok::TokenKind kind) {
1596	// At this point we know that the character matches the regex "(L\|u\|U)?'.*'".
1597	HadError = false;
1598
1599	Kind = kind;
1600
1601	const char *TokBegin = begin;
1602
1603	// Skip over wide character determinant.
1604	if (Kind != tok::char_constant)
1605	++begin;
1606	if (Kind == tok::utf8_char_constant)
1607	++begin;
1608
1609	// Skip over the entry quote.
1610	if (begin[0] != '\'') {
1611	PP.Diag(Loc, diag::err_lexing_char);
1612	HadError = true;
1613	return;
1614	}
1615
1616	++begin;
1617
1618	// Remove an optional ud-suffix.
1619	if (end[-1] != '\'') {
1620	const char *UDSuffixEnd = end;
1621	do {
1622	--end;
1623	} while (end[-1] != '\'');
1624	// FIXME: Don't bother with this if !tok.hasUCN().
1625	expandUCNs(UDSuffixBuf, StringRef(end, UDSuffixEnd - end));
1626	UDSuffixOffset = end - TokBegin;
1627	}
1628
1629	// Trim the ending quote.
1630	assert(end != begin && "Invalid token lexed")(static_cast <bool> (end != begin && "Invalid token lexed" ) ? void (0) : __assert_fail ("end != begin && \"Invalid token lexed\"" , "clang/lib/Lex/LiteralSupport.cpp", 1630, __extension__ __PRETTY_FUNCTION__ ));
1631	--end;
1632
1633	// FIXME: The "Value" is an uint64_t so we can handle char literals of
1634	// up to 64-bits.
1635	// FIXME: This extensively assumes that 'char' is 8-bits.
1636	assert(PP.getTargetInfo().getCharWidth() == 8 &&(static_cast <bool> (PP.getTargetInfo().getCharWidth() == 8 && "Assumes char is 8 bits") ? void (0) : __assert_fail ("PP.getTargetInfo().getCharWidth() == 8 && \"Assumes char is 8 bits\"" , "clang/lib/Lex/LiteralSupport.cpp", 1637, __extension__ __PRETTY_FUNCTION__ ))
1637	"Assumes char is 8 bits")(static_cast <bool> (PP.getTargetInfo().getCharWidth() == 8 && "Assumes char is 8 bits") ? void (0) : __assert_fail ("PP.getTargetInfo().getCharWidth() == 8 && \"Assumes char is 8 bits\"" , "clang/lib/Lex/LiteralSupport.cpp", 1637, __extension__ __PRETTY_FUNCTION__ ));
1638	assert(PP.getTargetInfo().getIntWidth() <= 64 &&(static_cast <bool> (PP.getTargetInfo().getIntWidth() <= 64 && (PP.getTargetInfo().getIntWidth() & 7) == 0 && "Assumes sizeof(int) on target is <= 64 and a multiple of char" ) ? void (0) : __assert_fail ("PP.getTargetInfo().getIntWidth() <= 64 && (PP.getTargetInfo().getIntWidth() & 7) == 0 && \"Assumes sizeof(int) on target is <= 64 and a multiple of char\"" , "clang/lib/Lex/LiteralSupport.cpp", 1640, __extension__ __PRETTY_FUNCTION__ ))
1639	(PP.getTargetInfo().getIntWidth() & 7) == 0 &&(static_cast <bool> (PP.getTargetInfo().getIntWidth() <= 64 && (PP.getTargetInfo().getIntWidth() & 7) == 0 && "Assumes sizeof(int) on target is <= 64 and a multiple of char" ) ? void (0) : __assert_fail ("PP.getTargetInfo().getIntWidth() <= 64 && (PP.getTargetInfo().getIntWidth() & 7) == 0 && \"Assumes sizeof(int) on target is <= 64 and a multiple of char\"" , "clang/lib/Lex/LiteralSupport.cpp", 1640, __extension__ __PRETTY_FUNCTION__ ))
1640	"Assumes sizeof(int) on target is <= 64 and a multiple of char")(static_cast <bool> (PP.getTargetInfo().getIntWidth() <= 64 && (PP.getTargetInfo().getIntWidth() & 7) == 0 && "Assumes sizeof(int) on target is <= 64 and a multiple of char" ) ? void (0) : __assert_fail ("PP.getTargetInfo().getIntWidth() <= 64 && (PP.getTargetInfo().getIntWidth() & 7) == 0 && \"Assumes sizeof(int) on target is <= 64 and a multiple of char\"" , "clang/lib/Lex/LiteralSupport.cpp", 1640, __extension__ __PRETTY_FUNCTION__ ));
1641	assert(PP.getTargetInfo().getWCharWidth() <= 64 &&(static_cast <bool> (PP.getTargetInfo().getWCharWidth() <= 64 && "Assumes sizeof(wchar) on target is <= 64" ) ? void (0) : __assert_fail ("PP.getTargetInfo().getWCharWidth() <= 64 && \"Assumes sizeof(wchar) on target is <= 64\"" , "clang/lib/Lex/LiteralSupport.cpp", 1642, __extension__ __PRETTY_FUNCTION__ ))
1642	"Assumes sizeof(wchar) on target is <= 64")(static_cast <bool> (PP.getTargetInfo().getWCharWidth() <= 64 && "Assumes sizeof(wchar) on target is <= 64" ) ? void (0) : __assert_fail ("PP.getTargetInfo().getWCharWidth() <= 64 && \"Assumes sizeof(wchar) on target is <= 64\"" , "clang/lib/Lex/LiteralSupport.cpp", 1642, __extension__ __PRETTY_FUNCTION__ ));
1643
1644	SmallVector<uint32_t, 4> codepoint_buffer;
1645	codepoint_buffer.resize(end - begin);
1646	uint32_t *buffer_begin = &codepoint_buffer.front();
1647	uint32_t *buffer_end = buffer_begin + codepoint_buffer.size();
1648
1649	// Unicode escapes representing characters that cannot be correctly
1650	// represented in a single code unit are disallowed in character literals
1651	// by this implementation.
1652	uint32_t largest_character_for_kind;
1653	if (tok::wide_char_constant == Kind) {
1654	largest_character_for_kind =
1655	0xFFFFFFFFu >> (32-PP.getTargetInfo().getWCharWidth());
1656	} else if (tok::utf8_char_constant == Kind) {
1657	largest_character_for_kind = 0x7F;
1658	} else if (tok::utf16_char_constant == Kind) {
1659	largest_character_for_kind = 0xFFFF;
1660	} else if (tok::utf32_char_constant == Kind) {
1661	largest_character_for_kind = 0x10FFFF;
1662	} else {
1663	largest_character_for_kind = 0x7Fu;
1664	}
1665
1666	while (begin != end) {
1667	// Is this a span of non-escape characters?
1668	if (begin[0] != '\\') {
1669	char const *start = begin;
1670	do {
1671	++begin;
1672	} while (begin != end && *begin != '\\');
1673
1674	char const *tmp_in_start = start;
1675	uint32_t *tmp_out_start = buffer_begin;
1676	llvm::ConversionResult res =
1677	llvm::ConvertUTF8toUTF32(reinterpret_cast<llvm::UTF8 const **>(&start),
1678	reinterpret_cast<llvm::UTF8 const *>(begin),
1679	&buffer_begin, buffer_end, llvm::strictConversion);
1680	if (res != llvm::conversionOK) {
1681	// If we see bad encoding for unprefixed character literals, warn and
1682	// simply copy the byte values, for compatibility with gcc and
1683	// older versions of clang.
1684	bool NoErrorOnBadEncoding = isOrdinary();
1685	unsigned Msg = diag::err_bad_character_encoding;
1686	if (NoErrorOnBadEncoding)
1687	Msg = diag::warn_bad_character_encoding;
1688	PP.Diag(Loc, Msg);
1689	if (NoErrorOnBadEncoding) {
1690	start = tmp_in_start;
1691	buffer_begin = tmp_out_start;
1692	for (; start != begin; ++start, ++buffer_begin)
1693	buffer_begin = static_cast<uint8_t>(start);
1694	} else {
1695	HadError = true;
1696	}
1697	} else {
1698	for (; tmp_out_start < buffer_begin; ++tmp_out_start) {
1699	if (*tmp_out_start > largest_character_for_kind) {
1700	HadError = true;
1701	PP.Diag(Loc, diag::err_character_too_large);
1702	}
1703	}
1704	}
1705
1706	continue;
1707	}
1708	// Is this a Universal Character Name escape?
1709	if (begin[1] == 'u' \|\| begin[1] == 'U' \|\| begin[1] == 'N') {
1710	unsigned short UcnLen = 0;
1711	if (!ProcessUCNEscape(TokBegin, begin, end, *buffer_begin, UcnLen,
1712	FullSourceLoc(Loc, PP.getSourceManager()),
1713	&PP.getDiagnostics(), PP.getLangOpts(), true)) {
1714	HadError = true;
1715	} else if (*buffer_begin > largest_character_for_kind) {
1716	HadError = true;
1717	PP.Diag(Loc, diag::err_character_too_large);
1718	}
1719
1720	++buffer_begin;
1721	continue;
1722	}
1723	unsigned CharWidth = getCharWidth(Kind, PP.getTargetInfo());
1724	uint64_t result =
1725	ProcessCharEscape(TokBegin, begin, end, HadError,
1726	FullSourceLoc(Loc,PP.getSourceManager()),
1727	CharWidth, &PP.getDiagnostics(), PP.getLangOpts());
1728	*buffer_begin++ = result;
1729	}
1730
1731	unsigned NumCharsSoFar = buffer_begin - &codepoint_buffer.front();
1732
1733	if (NumCharsSoFar > 1) {
1734	if (isOrdinary() && NumCharsSoFar == 4)
1735	PP.Diag(Loc, diag::warn_four_char_character_literal);
1736	else if (isOrdinary())
1737	PP.Diag(Loc, diag::warn_multichar_character_literal);
1738	else {
1739	PP.Diag(Loc, diag::err_multichar_character_literal) << (isWide() ? 0 : 1);
1740	HadError = true;
1741	}
1742	IsMultiChar = true;
1743	} else {
1744	IsMultiChar = false;
1745	}
1746
1747	llvm::APInt LitVal(PP.getTargetInfo().getIntWidth(), 0);
1748
1749	// Narrow character literals act as though their value is concatenated
1750	// in this implementation, but warn on overflow.
1751	bool multi_char_too_long = false;
1752	if (isOrdinary() && isMultiChar()) {
1753	LitVal = 0;
1754	for (size_t i = 0; i < NumCharsSoFar; ++i) {
1755	// check for enough leading zeros to shift into
1756	multi_char_too_long \|= (LitVal.countLeadingZeros() < 8);
1757	LitVal <<= 8;
1758	LitVal = LitVal + (codepoint_buffer[i] & 0xFF);
1759	}
1760	} else if (NumCharsSoFar > 0) {
1761	// otherwise just take the last character
1762	LitVal = buffer_begin[-1];
1763	}
1764
1765	if (!HadError && multi_char_too_long) {
1766	PP.Diag(Loc, diag::warn_char_constant_too_large);
1767	}
1768
1769	// Transfer the value from APInt to uint64_t
1770	Value = LitVal.getZExtValue();
1771
1772	// If this is a single narrow character, sign extend it (e.g. '\xFF' is "-1")
1773	// if 'char' is signed for this target (C99 6.4.4.4p10). Note that multiple
1774	// character constants are not sign extended in the this implementation:
1775	// '\xFF\xFF' = 65536 and '\x0\xFF' = 255, which matches GCC.
1776	if (isOrdinary() && NumCharsSoFar == 1 && (Value & 128) &&
1777	PP.getLangOpts().CharIsSigned)
1778	Value = (signed char)Value;
1779	}
1780
1781	/// \verbatim
1782	/// string-literal: [C++0x lex.string]
1783	/// encoding-prefix " [s-char-sequence] "
1784	/// encoding-prefix R raw-string
1785	/// encoding-prefix:
1786	/// u8
1787	/// u
1788	/// U
1789	/// L
1790	/// s-char-sequence:
1791	/// s-char
1792	/// s-char-sequence s-char
1793	/// s-char:
1794	/// any member of the source character set except the double-quote ",
1795	/// backslash \, or new-line character
1796	/// escape-sequence
1797	/// universal-character-name
1798	/// raw-string:
1799	/// " d-char-sequence ( r-char-sequence ) d-char-sequence "
1800	/// r-char-sequence:
1801	/// r-char
1802	/// r-char-sequence r-char
1803	/// r-char:
1804	/// any member of the source character set, except a right parenthesis )
1805	/// followed by the initial d-char-sequence (which may be empty)
1806	/// followed by a double quote ".
1807	/// d-char-sequence:
1808	/// d-char
1809	/// d-char-sequence d-char
1810	/// d-char:
1811	/// any member of the basic source character set except:
1812	/// space, the left parenthesis (, the right parenthesis ),
1813	/// the backslash \, and the control characters representing horizontal
1814	/// tab, vertical tab, form feed, and newline.
1815	/// escape-sequence: [C++0x lex.ccon]
1816	/// simple-escape-sequence
1817	/// octal-escape-sequence
1818	/// hexadecimal-escape-sequence
1819	/// simple-escape-sequence:
1820	/// one of \' \" \? \\ \a \b \f \n \r \t \v
1821	/// octal-escape-sequence:
1822	/// \ octal-digit
1823	/// \ octal-digit octal-digit
1824	/// \ octal-digit octal-digit octal-digit
1825	/// hexadecimal-escape-sequence:
1826	/// \x hexadecimal-digit
1827	/// hexadecimal-escape-sequence hexadecimal-digit
1828	/// universal-character-name:
1829	/// \u hex-quad
1830	/// \U hex-quad hex-quad
1831	/// hex-quad:
1832	/// hex-digit hex-digit hex-digit hex-digit
1833	/// \endverbatim
1834	///
1835	StringLiteralParser::
1836	StringLiteralParser(ArrayRef<Token> StringToks,
1837	Preprocessor &PP)
1838	: SM(PP.getSourceManager()), Features(PP.getLangOpts()),
1839	Target(PP.getTargetInfo()), Diags(&PP.getDiagnostics()),
1840	MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
1841	ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) {
1842	init(StringToks);
1843	}
1844
1845	void StringLiteralParser::init(ArrayRef<Token> StringToks){
1846	// The literal token may have come from an invalid source location (e.g. due
1847	// to a PCH error), in which case the token length will be 0.
1848	if (StringToks.empty() \|\| StringToks[0].getLength() < 2)
1849	return DiagnoseLexingError(SourceLocation());
1850
1851	// Scan all of the string portions, remember the max individual token length,
1852	// computing a bound on the concatenated string length, and see whether any
1853	// piece is a wide-string. If any of the string portions is a wide-string
1854	// literal, the result is a wide-string literal [C99 6.4.5p4].
1855	assert(!StringToks.empty() && "expected at least one token")(static_cast <bool> (!StringToks.empty() && "expected at least one token" ) ? void (0) : __assert_fail ("!StringToks.empty() && \"expected at least one token\"" , "clang/lib/Lex/LiteralSupport.cpp", 1855, __extension__ __PRETTY_FUNCTION__ ));
1856	MaxTokenLength = StringToks[0].getLength();
1857	assert(StringToks[0].getLength() >= 2 && "literal token is invalid!")(static_cast <bool> (StringToks[0].getLength() >= 2 && "literal token is invalid!") ? void (0) : __assert_fail ("StringToks[0].getLength() >= 2 && \"literal token is invalid!\"" , "clang/lib/Lex/LiteralSupport.cpp", 1857, __extension__ __PRETTY_FUNCTION__ ));
1858	SizeBound = StringToks[0].getLength()-2; // -2 for "".
1859	Kind = StringToks[0].getKind();
1860
1861	hadError = false;
1862
1863	// Implement Translation Phase #6: concatenation of string literals
1864	/// (C99 5.1.1.2p1). The common case is only one string fragment.
1865	for (unsigned i = 1; i != StringToks.size(); ++i) {
1866	if (StringToks[i].getLength() < 2)
1867	return DiagnoseLexingError(StringToks[i].getLocation());
1868
1869	// The string could be shorter than this if it needs cleaning, but this is a
1870	// reasonable bound, which is all we need.
1871	assert(StringToks[i].getLength() >= 2 && "literal token is invalid!")(static_cast <bool> (StringToks[i].getLength() >= 2 && "literal token is invalid!") ? void (0) : __assert_fail ("StringToks[i].getLength() >= 2 && \"literal token is invalid!\"" , "clang/lib/Lex/LiteralSupport.cpp", 1871, __extension__ __PRETTY_FUNCTION__ ));
1872	SizeBound += StringToks[i].getLength()-2; // -2 for "".
1873
1874	// Remember maximum string piece length.
1875	if (StringToks[i].getLength() > MaxTokenLength)
1876	MaxTokenLength = StringToks[i].getLength();
1877
1878	// Remember if we see any wide or utf-8/16/32 strings.
1879	// Also check for illegal concatenations.
1880	if (StringToks[i].isNot(Kind) && StringToks[i].isNot(tok::string_literal)) {
1881	if (isOrdinary()) {
1882	Kind = StringToks[i].getKind();
1883	} else {
1884	if (Diags)
1885	Diags->Report(StringToks[i].getLocation(),
1886	diag::err_unsupported_string_concat);
1887	hadError = true;
1888	}
1889	}
1890	}
1891
1892	// Include space for the null terminator.
1893	++SizeBound;
1894
1895	// TODO: K&R warning: "traditional C rejects string constant concatenation"
1896
1897	// Get the width in bytes of char/wchar_t/char16_t/char32_t
1898	CharByteWidth = getCharWidth(Kind, Target);
1899	assert((CharByteWidth & 7) == 0 && "Assumes character size is byte multiple")(static_cast <bool> ((CharByteWidth & 7) == 0 && "Assumes character size is byte multiple") ? void (0) : __assert_fail ("(CharByteWidth & 7) == 0 && \"Assumes character size is byte multiple\"" , "clang/lib/Lex/LiteralSupport.cpp", 1899, __extension__ __PRETTY_FUNCTION__ ));
1900	CharByteWidth /= 8;
1901
1902	// The output buffer size needs to be large enough to hold wide characters.
1903	// This is a worst-case assumption which basically corresponds to L"" "long".
1904	SizeBound *= CharByteWidth;
1905
1906	// Size the temporary buffer to hold the result string data.
1907	ResultBuf.resize(SizeBound);
1908
1909	// Likewise, but for each string piece.
1910	SmallString<512> TokenBuf;
1911	TokenBuf.resize(MaxTokenLength);
1912
1913	// Loop over all the strings, getting their spelling, and expanding them to
1914	// wide strings as appropriate.
1915	ResultPtr = &ResultBuf[0]; // Next byte to fill in.
1916
1917	Pascal = false;
1918
1919	SourceLocation UDSuffixTokLoc;
1920
1921	for (unsigned i = 0, e = StringToks.size(); i != e; ++i) {
1922	const char *ThisTokBuf = &TokenBuf[0];
1923	// Get the spelling of the token, which eliminates trigraphs, etc. We know
1924	// that ThisTokBuf points to a buffer that is big enough for the whole token
1925	// and 'spelled' tokens can only shrink.
1926	bool StringInvalid = false;
1927	unsigned ThisTokLen =
1928	Lexer::getSpelling(StringToks[i], ThisTokBuf, SM, Features,
1929	&StringInvalid);
1930	if (StringInvalid)
1931	return DiagnoseLexingError(StringToks[i].getLocation());
1932
1933	const char *ThisTokBegin = ThisTokBuf;
1934	const char *ThisTokEnd = ThisTokBuf+ThisTokLen;
1935
1936	// Remove an optional ud-suffix.
1937	if (ThisTokEnd[-1] != '"') {
1938	const char *UDSuffixEnd = ThisTokEnd;
1939	do {
1940	--ThisTokEnd;
1941	} while (ThisTokEnd[-1] != '"');
1942
1943	StringRef UDSuffix(ThisTokEnd, UDSuffixEnd - ThisTokEnd);
1944
1945	if (UDSuffixBuf.empty()) {
1946	if (StringToks[i].hasUCN())
1947	expandUCNs(UDSuffixBuf, UDSuffix);
1948	else
1949	UDSuffixBuf.assign(UDSuffix);
1950	UDSuffixToken = i;
1951	UDSuffixOffset = ThisTokEnd - ThisTokBuf;
1952	UDSuffixTokLoc = StringToks[i].getLocation();
1953	} else {
1954	SmallString<32> ExpandedUDSuffix;
1955	if (StringToks[i].hasUCN()) {
1956	expandUCNs(ExpandedUDSuffix, UDSuffix);
1957	UDSuffix = ExpandedUDSuffix;
1958	}
1959
1960	// C++11 [lex.ext]p8: At the end of phase 6, if a string literal is the
1961	// result of a concatenation involving at least one user-defined-string-
1962	// literal, all the participating user-defined-string-literals shall
1963	// have the same ud-suffix.
1964	if (UDSuffixBuf != UDSuffix) {
1965	if (Diags) {
1966	SourceLocation TokLoc = StringToks[i].getLocation();
1967	Diags->Report(TokLoc, diag::err_string_concat_mixed_suffix)
1968	<< UDSuffixBuf << UDSuffix
1969	<< SourceRange(UDSuffixTokLoc, UDSuffixTokLoc)
1970	<< SourceRange(TokLoc, TokLoc);
1971	}
1972	hadError = true;
1973	}
1974	}
1975	}
1976
1977	// Strip the end quote.
1978	--ThisTokEnd;
1979
1980	// TODO: Input character set mapping support.
1981
1982	// Skip marker for wide or unicode strings.
1983	if (ThisTokBuf[0] == 'L' \|\| ThisTokBuf[0] == 'u' \|\| ThisTokBuf[0] == 'U') {
1984	++ThisTokBuf;
1985	// Skip 8 of u8 marker for utf8 strings.
1986	if (ThisTokBuf[0] == '8')
1987	++ThisTokBuf;
1988	}
1989
1990	// Check for raw string
1991	if (ThisTokBuf[0] == 'R') {
1992	if (ThisTokBuf[1] != '"') {
1993	// The file may have come from PCH and then changed after loading the
1994	// PCH; Fail gracefully.
1995	return DiagnoseLexingError(StringToks[i].getLocation());
1996	}
1997	ThisTokBuf += 2; // skip R"
1998
1999	// C++11 [lex.string]p2: A `d-char-sequence` shall consist of at most 16
2000	// characters.
2001	constexpr unsigned MaxRawStrDelimLen = 16;
2002
2003	const char *Prefix = ThisTokBuf;
2004	while (static_cast<unsigned>(ThisTokBuf - Prefix) < MaxRawStrDelimLen &&
2005	ThisTokBuf[0] != '(')
2006	++ThisTokBuf;
2007	if (ThisTokBuf[0] != '(')
2008	return DiagnoseLexingError(StringToks[i].getLocation());
2009	++ThisTokBuf; // skip '('
2010
2011	// Remove same number of characters from the end
2012	ThisTokEnd -= ThisTokBuf - Prefix;
2013	if (ThisTokEnd < ThisTokBuf)
2014	return DiagnoseLexingError(StringToks[i].getLocation());
2015
2016	// C++14 [lex.string]p4: A source-file new-line in a raw string literal
2017	// results in a new-line in the resulting execution string-literal.
2018	StringRef RemainingTokenSpan(ThisTokBuf, ThisTokEnd - ThisTokBuf);
2019	while (!RemainingTokenSpan.empty()) {
2020	// Split the string literal on \r\n boundaries.
2021	size_t CRLFPos = RemainingTokenSpan.find("\r\n");
2022	StringRef BeforeCRLF = RemainingTokenSpan.substr(0, CRLFPos);
2023	StringRef AfterCRLF = RemainingTokenSpan.substr(CRLFPos);
2024
2025	// Copy everything before the \r\n sequence into the string literal.
2026	if (CopyStringFragment(StringToks[i], ThisTokBegin, BeforeCRLF))
2027	hadError = true;
2028
2029	// Point into the \n inside the \r\n sequence and operate on the
2030	// remaining portion of the literal.
2031	RemainingTokenSpan = AfterCRLF.substr(1);
2032	}
2033	} else {
2034	if (ThisTokBuf[0] != '"') {
2035	// The file may have come from PCH and then changed after loading the
2036	// PCH; Fail gracefully.
2037	return DiagnoseLexingError(StringToks[i].getLocation());
2038	}
2039	++ThisTokBuf; // skip "
2040
2041	// Check if this is a pascal string
2042	if (Features.PascalStrings && ThisTokBuf + 1 != ThisTokEnd &&
2043	ThisTokBuf[0] == '\\' && ThisTokBuf[1] == 'p') {
2044
2045	// If the \p sequence is found in the first token, we have a pascal string
2046	// Otherwise, if we already have a pascal string, ignore the first \p
2047	if (i == 0) {
2048	++ThisTokBuf;
2049	Pascal = true;
2050	} else if (Pascal)
2051	ThisTokBuf += 2;
2052	}
2053
2054	while (ThisTokBuf != ThisTokEnd) {
2055	// Is this a span of non-escape characters?
2056	if (ThisTokBuf[0] != '\\') {
2057	const char *InStart = ThisTokBuf;
2058	do {
2059	++ThisTokBuf;
2060	} while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');
2061
2062	// Copy the character span over.
2063	if (CopyStringFragment(StringToks[i], ThisTokBegin,
2064	StringRef(InStart, ThisTokBuf - InStart)))
2065	hadError = true;
2066	continue;
2067	}
2068	// Is this a Universal Character Name escape?
2069	if (ThisTokBuf[1] == 'u' \|\| ThisTokBuf[1] == 'U' \|\|
2070	ThisTokBuf[1] == 'N') {
2071	EncodeUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd,
2072	ResultPtr, hadError,
2073	FullSourceLoc(StringToks[i].getLocation(), SM),
2074	CharByteWidth, Diags, Features);
2075	continue;
2076	}
2077	// Otherwise, this is a non-UCN escape character. Process it.
2078	unsigned ResultChar =
2079	ProcessCharEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, hadError,
2080	FullSourceLoc(StringToks[i].getLocation(), SM),
2081	CharByteWidth*8, Diags, Features);
2082
2083	if (CharByteWidth == 4) {
2084	// FIXME: Make the type of the result buffer correct instead of
2085	// using reinterpret_cast.
2086	llvm::UTF32 ResultWidePtr = reinterpret_cast<llvm::UTF32>(ResultPtr);
2087	*ResultWidePtr = ResultChar;
2088	ResultPtr += 4;
2089	} else if (CharByteWidth == 2) {
2090	// FIXME: Make the type of the result buffer correct instead of
2091	// using reinterpret_cast.
2092	llvm::UTF16 ResultWidePtr = reinterpret_cast<llvm::UTF16>(ResultPtr);
2093	*ResultWidePtr = ResultChar & 0xFFFF;
2094	ResultPtr += 2;
2095	} else {
2096	assert(CharByteWidth == 1 && "Unexpected char width")(static_cast <bool> (CharByteWidth == 1 && "Unexpected char width" ) ? void (0) : __assert_fail ("CharByteWidth == 1 && \"Unexpected char width\"" , "clang/lib/Lex/LiteralSupport.cpp", 2096, __extension__ __PRETTY_FUNCTION__ ));
2097	*ResultPtr++ = ResultChar & 0xFF;
2098	}
2099	}
2100	}
2101	}
2102
2103	if (Pascal) {
2104	if (CharByteWidth == 4) {
2105	// FIXME: Make the type of the result buffer correct instead of
2106	// using reinterpret_cast.
2107	llvm::UTF32 ResultWidePtr = reinterpret_cast<llvm::UTF32>(ResultBuf.data());
2108	ResultWidePtr[0] = GetNumStringChars() - 1;
2109	} else if (CharByteWidth == 2) {
2110	// FIXME: Make the type of the result buffer correct instead of
2111	// using reinterpret_cast.
2112	llvm::UTF16 ResultWidePtr = reinterpret_cast<llvm::UTF16>(ResultBuf.data());
2113	ResultWidePtr[0] = GetNumStringChars() - 1;
2114	} else {
2115	assert(CharByteWidth == 1 && "Unexpected char width")(static_cast <bool> (CharByteWidth == 1 && "Unexpected char width" ) ? void (0) : __assert_fail ("CharByteWidth == 1 && \"Unexpected char width\"" , "clang/lib/Lex/LiteralSupport.cpp", 2115, __extension__ __PRETTY_FUNCTION__ ));
2116	ResultBuf[0] = GetNumStringChars() - 1;
2117	}
2118
2119	// Verify that pascal strings aren't too large.
2120	if (GetStringLength() > 256) {
2121	if (Diags)
2122	Diags->Report(StringToks.front().getLocation(),
2123	diag::err_pascal_string_too_long)
2124	<< SourceRange(StringToks.front().getLocation(),
2125	StringToks.back().getLocation());
2126	hadError = true;
2127	return;
2128	}
2129	} else if (Diags) {
2130	// Complain if this string literal has too many characters.
2131	unsigned MaxChars = Features.CPlusPlus? 65536 : Features.C99 ? 4095 : 509;
2132
2133	if (GetNumStringChars() > MaxChars)
2134	Diags->Report(StringToks.front().getLocation(),
2135	diag::ext_string_too_long)
2136	<< GetNumStringChars() << MaxChars
2137	<< (Features.CPlusPlus ? 2 : Features.C99 ? 1 : 0)
2138	<< SourceRange(StringToks.front().getLocation(),
2139	StringToks.back().getLocation());
2140	}
2141	}
2142
2143	static const char resyncUTF8(const char Err, const char *End) {
2144	if (Err == End)
2145	return End;
2146	End = Err + std::min<unsigned>(llvm::getNumBytesForUTF8(*Err), End-Err);
2147	while (++Err != End && (*Err & 0xC0) == 0x80)
2148	;
2149	return Err;
2150	}
2151
2152	/// This function copies from Fragment, which is a sequence of bytes
2153	/// within Tok's contents (which begin at TokBegin) into ResultPtr.
2154	/// Performs widening for multi-byte characters.
2155	bool StringLiteralParser::CopyStringFragment(const Token &Tok,
2156	const char *TokBegin,
2157	StringRef Fragment) {
2158	const llvm::UTF8 *ErrorPtrTmp;
2159	if (ConvertUTF8toWide(CharByteWidth, Fragment, ResultPtr, ErrorPtrTmp))
2160	return false;
2161
2162	// If we see bad encoding for unprefixed string literals, warn and
2163	// simply copy the byte values, for compatibility with gcc and older
2164	// versions of clang.
2165	bool NoErrorOnBadEncoding = isOrdinary();
2166	if (NoErrorOnBadEncoding) {
2167	memcpy(ResultPtr, Fragment.data(), Fragment.size());
2168	ResultPtr += Fragment.size();
2169	}
2170
2171	if (Diags) {
2172	const char ErrorPtr = reinterpret_cast<const char >(ErrorPtrTmp);
2173
2174	FullSourceLoc SourceLoc(Tok.getLocation(), SM);
2175	const DiagnosticBuilder &Builder =
2176	Diag(Diags, Features, SourceLoc, TokBegin,
2177	ErrorPtr, resyncUTF8(ErrorPtr, Fragment.end()),
2178	NoErrorOnBadEncoding ? diag::warn_bad_string_encoding
2179	: diag::err_bad_string_encoding);
2180
2181	const char *NextStart = resyncUTF8(ErrorPtr, Fragment.end());
2182	StringRef NextFragment(NextStart, Fragment.end()-NextStart);
2183
2184	// Decode into a dummy buffer.
2185	SmallString<512> Dummy;
2186	Dummy.reserve(Fragment.size() * CharByteWidth);
2187	char *Ptr = Dummy.data();
2188
2189	while (!ConvertUTF8toWide(CharByteWidth, NextFragment, Ptr, ErrorPtrTmp)) {
2190	const char ErrorPtr = reinterpret_cast<const char >(ErrorPtrTmp);
2191	NextStart = resyncUTF8(ErrorPtr, Fragment.end());
2192	Builder << MakeCharSourceRange(Features, SourceLoc, TokBegin,
2193	ErrorPtr, NextStart);
2194	NextFragment = StringRef(NextStart, Fragment.end()-NextStart);
2195	}
2196	}
2197	return !NoErrorOnBadEncoding;
2198	}
2199
2200	void StringLiteralParser::DiagnoseLexingError(SourceLocation Loc) {
2201	hadError = true;
2202	if (Diags)
2203	Diags->Report(Loc, diag::err_lexing_string);
2204	}
2205
2206	/// getOffsetOfStringByte - This function returns the offset of the
2207	/// specified byte of the string data represented by Token. This handles
2208	/// advancing over escape sequences in the string.
2209	unsigned StringLiteralParser::getOffsetOfStringByte(const Token &Tok,
2210	unsigned ByteNo) const {
2211	// Get the spelling of the token.
2212	SmallString<32> SpellingBuffer;
2213	SpellingBuffer.resize(Tok.getLength());
2214
2215	bool StringInvalid = false;
2216	const char *SpellingPtr = &SpellingBuffer[0];
2217	unsigned TokLen = Lexer::getSpelling(Tok, SpellingPtr, SM, Features,
2218	&StringInvalid);
2219	if (StringInvalid)
2220	return 0;
2221
2222	const char *SpellingStart = SpellingPtr;
2223	const char *SpellingEnd = SpellingPtr+TokLen;
2224
2225	// Handle UTF-8 strings just like narrow strings.
2226	if (SpellingPtr[0] == 'u' && SpellingPtr[1] == '8')
2227	SpellingPtr += 2;
2228
2229	assert(SpellingPtr[0] != 'L' && SpellingPtr[0] != 'u' &&(static_cast <bool> (SpellingPtr[0] != 'L' && SpellingPtr [0] != 'u' && SpellingPtr[0] != 'U' && "Doesn't handle wide or utf strings yet" ) ? void (0) : __assert_fail ("SpellingPtr[0] != 'L' && SpellingPtr[0] != 'u' && SpellingPtr[0] != 'U' && \"Doesn't handle wide or utf strings yet\"" , "clang/lib/Lex/LiteralSupport.cpp", 2230, __extension__ __PRETTY_FUNCTION__ ))
2230	SpellingPtr[0] != 'U' && "Doesn't handle wide or utf strings yet")(static_cast <bool> (SpellingPtr[0] != 'L' && SpellingPtr [0] != 'u' && SpellingPtr[0] != 'U' && "Doesn't handle wide or utf strings yet" ) ? void (0) : __assert_fail ("SpellingPtr[0] != 'L' && SpellingPtr[0] != 'u' && SpellingPtr[0] != 'U' && \"Doesn't handle wide or utf strings yet\"" , "clang/lib/Lex/LiteralSupport.cpp", 2230, __extension__ __PRETTY_FUNCTION__ ));
2231
2232	// For raw string literals, this is easy.
2233	if (SpellingPtr[0] == 'R') {
2234	assert(SpellingPtr[1] == '"' && "Should be a raw string literal!")(static_cast <bool> (SpellingPtr[1] == '"' && "Should be a raw string literal!" ) ? void (0) : __assert_fail ("SpellingPtr[1] == '\"' && \"Should be a raw string literal!\"" , "clang/lib/Lex/LiteralSupport.cpp", 2234, __extension__ __PRETTY_FUNCTION__ ));
2235	// Skip 'R"'.
2236	SpellingPtr += 2;
2237	while (*SpellingPtr != '(') {
2238	++SpellingPtr;
2239	assert(SpellingPtr < SpellingEnd && "Missing ( for raw string literal")(static_cast <bool> (SpellingPtr < SpellingEnd && "Missing ( for raw string literal") ? void (0) : __assert_fail ("SpellingPtr < SpellingEnd && \"Missing ( for raw string literal\"" , "clang/lib/Lex/LiteralSupport.cpp", 2239, __extension__ __PRETTY_FUNCTION__ ));
2240	}
2241	// Skip '('.
2242	++SpellingPtr;
2243	return SpellingPtr - SpellingStart + ByteNo;
2244	}
2245
2246	// Skip over the leading quote
2247	assert(SpellingPtr[0] == '"' && "Should be a string literal!")(static_cast <bool> (SpellingPtr[0] == '"' && "Should be a string literal!" ) ? void (0) : __assert_fail ("SpellingPtr[0] == '\"' && \"Should be a string literal!\"" , "clang/lib/Lex/LiteralSupport.cpp", 2247, __extension__ __PRETTY_FUNCTION__ ));
2248	++SpellingPtr;
2249
2250	// Skip over bytes until we find the offset we're looking for.
2251	while (ByteNo) {
2252	assert(SpellingPtr < SpellingEnd && "Didn't find byte offset!")(static_cast <bool> (SpellingPtr < SpellingEnd && "Didn't find byte offset!") ? void (0) : __assert_fail ("SpellingPtr < SpellingEnd && \"Didn't find byte offset!\"" , "clang/lib/Lex/LiteralSupport.cpp", 2252, __extension__ __PRETTY_FUNCTION__ ));
2253
2254	// Step over non-escapes simply.
2255	if (*SpellingPtr != '\\') {
2256	++SpellingPtr;
2257	--ByteNo;
2258	continue;
2259	}
2260
2261	// Otherwise, this is an escape character. Advance over it.
2262	bool HadError = false;
2263	if (SpellingPtr[1] == 'u' \|\| SpellingPtr[1] == 'U' \|\|
2264	SpellingPtr[1] == 'N') {
2265	const char *EscapePtr = SpellingPtr;
2266	unsigned Len = MeasureUCNEscape(SpellingStart, SpellingPtr, SpellingEnd,
2267	1, Features, HadError);
2268	if (Len > ByteNo) {
2269	// ByteNo is somewhere within the escape sequence.
2270	SpellingPtr = EscapePtr;
2271	break;
2272	}
2273	ByteNo -= Len;
2274	} else {
2275	ProcessCharEscape(SpellingStart, SpellingPtr, SpellingEnd, HadError,
2276	FullSourceLoc(Tok.getLocation(), SM),
2277	CharByteWidth*8, Diags, Features);
2278	--ByteNo;
2279	}
2280	assert(!HadError && "This method isn't valid on erroneous strings")(static_cast <bool> (!HadError && "This method isn't valid on erroneous strings" ) ? void (0) : __assert_fail ("!HadError && \"This method isn't valid on erroneous strings\"" , "clang/lib/Lex/LiteralSupport.cpp", 2280, __extension__ __PRETTY_FUNCTION__ ));
2281	}
2282
2283	return SpellingPtr-SpellingStart;
2284	}
2285
2286	/// Determine whether a suffix is a valid ud-suffix. We avoid treating reserved
2287	/// suffixes as ud-suffixes, because the diagnostic experience is better if we
2288	/// treat it as an invalid suffix.
2289	bool StringLiteralParser::isValidUDSuffix(const LangOptions &LangOpts,
2290	StringRef Suffix) {
2291	return NumericLiteralParser::isValidUDSuffix(LangOpts, Suffix) \|\|
2292	Suffix == "sv";
2293	}