/build/source/clang/lib/Lex/LiteralSupport.cpp

Bug Summary

File:	build/source/clang/lib/Lex/LiteralSupport.cpp
Warning:	line 1036, column 11 Value stored to 'HasSize' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

Show analyzer invocation

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name LiteralSupport.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -relaxed-aliasing -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-17/lib/clang/17 -D CLANG_REPOSITORY_STRING="++20230510111145+7df43bdb42ae-1~exp1~20230510111303.1288" -D _DEBUG -D _GLIBCXX_ASSERTIONS -D _GNU_SOURCE -D _LIBCPP_ENABLE_ASSERTIONS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I tools/clang/lib/Lex -I /build/source/clang/lib/Lex -I /build/source/clang/include -I tools/clang/include -I include -I /build/source/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-17/lib/clang/17/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/source/= -fcoverage-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/source/= -source-date-epoch 1683717183 -O2 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2023-05-10-133810-16478-1 -x c++ /build/source/clang/lib/Lex/LiteralSupport.cpp

1	//===--- LiteralSupport.cpp - Code to parse and process literals ----------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements the NumericLiteralParser, CharLiteralParser, and
10	// StringLiteralParser interfaces.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "clang/Lex/LiteralSupport.h"
15	#include "clang/Basic/CharInfo.h"
16	#include "clang/Basic/LangOptions.h"
17	#include "clang/Basic/SourceLocation.h"
18	#include "clang/Basic/TargetInfo.h"
19	#include "clang/Lex/LexDiagnostic.h"
20	#include "clang/Lex/Lexer.h"
21	#include "clang/Lex/Preprocessor.h"
22	#include "clang/Lex/Token.h"
23	#include "llvm/ADT/APInt.h"
24	#include "llvm/ADT/SmallVector.h"
25	#include "llvm/ADT/StringExtras.h"
26	#include "llvm/ADT/StringSwitch.h"
27	#include "llvm/Support/ConvertUTF.h"
28	#include "llvm/Support/Error.h"
29	#include "llvm/Support/ErrorHandling.h"
30	#include "llvm/Support/Unicode.h"
31	#include <algorithm>
32	#include <cassert>
33	#include <cstddef>
34	#include <cstdint>
35	#include <cstring>
36	#include <string>
37
38	using namespace clang;
39
40	static unsigned getCharWidth(tok::TokenKind kind, const TargetInfo &Target) {
41	switch (kind) {
42	default: llvm_unreachable("Unknown token type!")::llvm::llvm_unreachable_internal("Unknown token type!", "clang/lib/Lex/LiteralSupport.cpp" , 42);
43	case tok::char_constant:
44	case tok::string_literal:
45	case tok::utf8_char_constant:
46	case tok::utf8_string_literal:
47	return Target.getCharWidth();
48	case tok::wide_char_constant:
49	case tok::wide_string_literal:
50	return Target.getWCharWidth();
51	case tok::utf16_char_constant:
52	case tok::utf16_string_literal:
53	return Target.getChar16Width();
54	case tok::utf32_char_constant:
55	case tok::utf32_string_literal:
56	return Target.getChar32Width();
57	}
58	}
59
60	static CharSourceRange MakeCharSourceRange(const LangOptions &Features,
61	FullSourceLoc TokLoc,
62	const char *TokBegin,
63	const char *TokRangeBegin,
64	const char *TokRangeEnd) {
65	SourceLocation Begin =
66	Lexer::AdvanceToTokenCharacter(TokLoc, TokRangeBegin - TokBegin,
67	TokLoc.getManager(), Features);
68	SourceLocation End =
69	Lexer::AdvanceToTokenCharacter(Begin, TokRangeEnd - TokRangeBegin,
70	TokLoc.getManager(), Features);
71	return CharSourceRange::getCharRange(Begin, End);
72	}
73
74	/// Produce a diagnostic highlighting some portion of a literal.
75	///
76	/// Emits the diagnostic \p DiagID, highlighting the range of characters from
77	/// \p TokRangeBegin (inclusive) to \p TokRangeEnd (exclusive), which must be
78	/// a substring of a spelling buffer for the token beginning at \p TokBegin.
79	static DiagnosticBuilder Diag(DiagnosticsEngine *Diags,
80	const LangOptions &Features, FullSourceLoc TokLoc,
81	const char TokBegin, const char TokRangeBegin,
82	const char *TokRangeEnd, unsigned DiagID) {
83	SourceLocation Begin =
84	Lexer::AdvanceToTokenCharacter(TokLoc, TokRangeBegin - TokBegin,
85	TokLoc.getManager(), Features);
86	return Diags->Report(Begin, DiagID) <<
87	MakeCharSourceRange(Features, TokLoc, TokBegin, TokRangeBegin, TokRangeEnd);
88	}
89
90	/// ProcessCharEscape - Parse a standard C escape sequence, which can occur in
91	/// either a character or a string literal.
92	static unsigned ProcessCharEscape(const char *ThisTokBegin,
93	const char *&ThisTokBuf,
94	const char *ThisTokEnd, bool &HadError,
95	FullSourceLoc Loc, unsigned CharWidth,
96	DiagnosticsEngine *Diags,
97	const LangOptions &Features) {
98	const char *EscapeBegin = ThisTokBuf;
99	bool Delimited = false;
100	bool EndDelimiterFound = false;
101
102	// Skip the '\' char.
103	++ThisTokBuf;
104
105	// We know that this character can't be off the end of the buffer, because
106	// that would have been \", which would not have been the end of string.
107	unsigned ResultChar = *ThisTokBuf++;
108	switch (ResultChar) {
109	// These map to themselves.
110	case '\\': case '\'': case '"': case '?': break;
111
112	// These have fixed mappings.
113	case 'a':
114	// TODO: K&R: the meaning of '\\a' is different in traditional C
115	ResultChar = 7;
116	break;
117	case 'b':
118	ResultChar = 8;
119	break;
120	case 'e':
121	if (Diags)
122	Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
123	diag::ext_nonstandard_escape) << "e";
124	ResultChar = 27;
125	break;
126	case 'E':
127	if (Diags)
128	Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
129	diag::ext_nonstandard_escape) << "E";
130	ResultChar = 27;
131	break;
132	case 'f':
133	ResultChar = 12;
134	break;
135	case 'n':
136	ResultChar = 10;
137	break;
138	case 'r':
139	ResultChar = 13;
140	break;
141	case 't':
142	ResultChar = 9;
143	break;
144	case 'v':
145	ResultChar = 11;
146	break;
147	case 'x': { // Hex escape.
148	ResultChar = 0;
149	if (ThisTokBuf != ThisTokEnd && *ThisTokBuf == '{') {
150	Delimited = true;
151	ThisTokBuf++;
152	if (*ThisTokBuf == '}') {
153	Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
154	diag::err_delimited_escape_empty);
155	return ResultChar;
156	}
157	} else if (ThisTokBuf == ThisTokEnd \|\| !isHexDigit(*ThisTokBuf)) {
158	if (Diags)
159	Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
160	diag::err_hex_escape_no_digits) << "x";
161	return ResultChar;
162	}
163
164	// Hex escapes are a maximal series of hex digits.
165	bool Overflow = false;
166	for (; ThisTokBuf != ThisTokEnd; ++ThisTokBuf) {
167	if (Delimited && *ThisTokBuf == '}') {
168	ThisTokBuf++;
169	EndDelimiterFound = true;
170	break;
171	}
172	int CharVal = llvm::hexDigitValue(*ThisTokBuf);
173	if (CharVal == -1) {
174	// Non delimited hex escape sequences stop at the first non-hex digit.
175	if (!Delimited)
176	break;
177	HadError = true;
178	if (Diags)
179	Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
180	diag::err_delimited_escape_invalid)
181	<< StringRef(ThisTokBuf, 1);
182	continue;
183	}
184	// About to shift out a digit?
185	if (ResultChar & 0xF0000000)
186	Overflow = true;
187	ResultChar <<= 4;
188	ResultChar \|= CharVal;
189	}
190	// See if any bits will be truncated when evaluated as a character.
191	if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
192	Overflow = true;
193	ResultChar &= ~0U >> (32-CharWidth);
194	}
195
196	// Check for overflow.
197	if (!HadError && Overflow) { // Too many digits to fit in
198	HadError = true;
199	if (Diags)
200	Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
201	diag::err_escape_too_large)
202	<< 0;
203	}
204	break;
205	}
206	case '0': case '1': case '2': case '3':
207	case '4': case '5': case '6': case '7': {
208	// Octal escapes.
209	--ThisTokBuf;
210	ResultChar = 0;
211
212	// Octal escapes are a series of octal digits with maximum length 3.
213	// "\0123" is a two digit sequence equal to "\012" "3".
214	unsigned NumDigits = 0;
215	do {
216	ResultChar <<= 3;
217	ResultChar \|= *ThisTokBuf++ - '0';
218	++NumDigits;
219	} while (ThisTokBuf != ThisTokEnd && NumDigits < 3 &&
220	ThisTokBuf[0] >= '0' && ThisTokBuf[0] <= '7');
221
222	// Check for overflow. Reject '\777', but not L'\777'.
223	if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
224	if (Diags)
225	Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
226	diag::err_escape_too_large) << 1;
227	ResultChar &= ~0U >> (32-CharWidth);
228	}
229	break;
230	}
231	case 'o': {
232	bool Overflow = false;
233	if (ThisTokBuf == ThisTokEnd \|\| *ThisTokBuf != '{') {
234	HadError = true;
235	if (Diags)
236	Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
237	diag::err_delimited_escape_missing_brace)
238	<< "o";
239
240	break;
241	}
242	ResultChar = 0;
243	Delimited = true;
244	++ThisTokBuf;
245	if (*ThisTokBuf == '}') {
246	Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
247	diag::err_delimited_escape_empty);
248	return ResultChar;
249	}
250
251	while (ThisTokBuf != ThisTokEnd) {
252	if (*ThisTokBuf == '}') {
253	EndDelimiterFound = true;
254	ThisTokBuf++;
255	break;
256	}
257	if (ThisTokBuf < '0' \|\| ThisTokBuf > '7') {
258	HadError = true;
259	if (Diags)
260	Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
261	diag::err_delimited_escape_invalid)
262	<< StringRef(ThisTokBuf, 1);
263	ThisTokBuf++;
264	continue;
265	}
266	// Check if one of the top three bits is set before shifting them out.
267	if (ResultChar & 0xE0000000)
268	Overflow = true;
269
270	ResultChar <<= 3;
271	ResultChar \|= *ThisTokBuf++ - '0';
272	}
273	// Check for overflow. Reject '\777', but not L'\777'.
274	if (!HadError &&
275	(Overflow \|\| (CharWidth != 32 && (ResultChar >> CharWidth) != 0))) {
276	HadError = true;
277	if (Diags)
278	Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
279	diag::err_escape_too_large)
280	<< 1;
281	ResultChar &= ~0U >> (32 - CharWidth);
282	}
283	break;
284	}
285	// Otherwise, these are not valid escapes.
286	case '(': case '{': case '[': case '%':
287	// GCC accepts these as extensions. We warn about them as such though.
288	if (Diags)
289	Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
290	diag::ext_nonstandard_escape)
291	<< std::string(1, ResultChar);
292	break;
293	default:
294	if (!Diags)
295	break;
296
297	if (isPrintable(ResultChar))
298	Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
299	diag::ext_unknown_escape)
300	<< std::string(1, ResultChar);
301	else
302	Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
303	diag::ext_unknown_escape)
304	<< "x" + llvm::utohexstr(ResultChar);
305	break;
306	}
307
308	if (Delimited && Diags) {
309	if (!EndDelimiterFound)
310	Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
311	diag::err_expected)
312	<< tok::r_brace;
313	else if (!HadError) {
314	Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
315	Features.CPlusPlus23 ? diag::warn_cxx23_delimited_escape_sequence
316	: diag::ext_delimited_escape_sequence)
317	<< /delimited/ 0 << (Features.CPlusPlus ? 1 : 0);
318	}
319	}
320
321	return ResultChar;
322	}
323
324	static void appendCodePoint(unsigned Codepoint,
325	llvm::SmallVectorImpl<char> &Str) {
326	char ResultBuf[4];
327	char *ResultPtr = ResultBuf;
328	if (llvm::ConvertCodePointToUTF8(Codepoint, ResultPtr))
329	Str.append(ResultBuf, ResultPtr);
330	}
331
332	void clang::expandUCNs(SmallVectorImpl<char> &Buf, StringRef Input) {
333	for (StringRef::iterator I = Input.begin(), E = Input.end(); I != E; ++I) {
334	if (*I != '\\') {
335	Buf.push_back(*I);
336	continue;
337	}
338
339	++I;
340	char Kind = *I;
341	++I;
342
343	assert(Kind == 'u' \|\| Kind == 'U' \|\| Kind == 'N')(static_cast <bool> (Kind == 'u' \|\| Kind == 'U' \|\| Kind == 'N') ? void (0) : __assert_fail ("Kind == 'u' \|\| Kind == 'U' \|\| Kind == 'N'" , "clang/lib/Lex/LiteralSupport.cpp", 343, __extension__ __PRETTY_FUNCTION__ ));
344	uint32_t CodePoint = 0;
345
346	if (Kind == 'u' && *I == '{') {
347	for (++I; *I != '}'; ++I) {
348	unsigned Value = llvm::hexDigitValue(*I);
349	assert(Value != -1U)(static_cast <bool> (Value != -1U) ? void (0) : __assert_fail ("Value != -1U", "clang/lib/Lex/LiteralSupport.cpp", 349, __extension__ __PRETTY_FUNCTION__));
350	CodePoint <<= 4;
351	CodePoint += Value;
352	}
353	appendCodePoint(CodePoint, Buf);
354	continue;
355	}
356
357	if (Kind == 'N') {
358	assert(I == '{')(static_cast <bool> (I == '{') ? void (0) : __assert_fail ("*I == '{'", "clang/lib/Lex/LiteralSupport.cpp", 358, __extension__ __PRETTY_FUNCTION__));
359	++I;
360	auto Delim = std::find(I, Input.end(), '}');
361	assert(Delim != Input.end())(static_cast <bool> (Delim != Input.end()) ? void (0) : __assert_fail ("Delim != Input.end()", "clang/lib/Lex/LiteralSupport.cpp" , 361, __extension__ __PRETTY_FUNCTION__));
362	std::optional<llvm::sys::unicode::LooseMatchingResult> Res =
363	llvm::sys::unicode::nameToCodepointLooseMatching(
364	StringRef(I, std::distance(I, Delim)));
365	assert(Res)(static_cast <bool> (Res) ? void (0) : __assert_fail ("Res" , "clang/lib/Lex/LiteralSupport.cpp", 365, __extension__ __PRETTY_FUNCTION__ ));
366	CodePoint = Res->CodePoint;
367	assert(CodePoint != 0xFFFFFFFF)(static_cast <bool> (CodePoint != 0xFFFFFFFF) ? void (0 ) : __assert_fail ("CodePoint != 0xFFFFFFFF", "clang/lib/Lex/LiteralSupport.cpp" , 367, __extension__ __PRETTY_FUNCTION__));
368	appendCodePoint(CodePoint, Buf);
369	I = Delim;
370	continue;
371	}
372
373	unsigned NumHexDigits;
374	if (Kind == 'u')
375	NumHexDigits = 4;
376	else
377	NumHexDigits = 8;
378
379	assert(I + NumHexDigits <= E)(static_cast <bool> (I + NumHexDigits <= E) ? void ( 0) : __assert_fail ("I + NumHexDigits <= E", "clang/lib/Lex/LiteralSupport.cpp" , 379, __extension__ __PRETTY_FUNCTION__));
380
381	for (; NumHexDigits != 0; ++I, --NumHexDigits) {
382	unsigned Value = llvm::hexDigitValue(*I);
383	assert(Value != -1U)(static_cast <bool> (Value != -1U) ? void (0) : __assert_fail ("Value != -1U", "clang/lib/Lex/LiteralSupport.cpp", 383, __extension__ __PRETTY_FUNCTION__));
384
385	CodePoint <<= 4;
386	CodePoint += Value;
387	}
388
389	appendCodePoint(CodePoint, Buf);
390	--I;
391	}
392	}
393
394	static bool ProcessNumericUCNEscape(const char *ThisTokBegin,
395	const char *&ThisTokBuf,
396	const char *ThisTokEnd, uint32_t &UcnVal,
397	unsigned short &UcnLen, bool &Delimited,
398	FullSourceLoc Loc, DiagnosticsEngine *Diags,
399	const LangOptions &Features,
400	bool in_char_string_literal = false) {
401	const char *UcnBegin = ThisTokBuf;
402	bool HasError = false;
403	bool EndDelimiterFound = false;
404
405	// Skip the '\u' char's.
406	ThisTokBuf += 2;
407	Delimited = false;
408	if (UcnBegin[1] == 'u' && in_char_string_literal &&
409	ThisTokBuf != ThisTokEnd && *ThisTokBuf == '{') {
410	Delimited = true;
411	ThisTokBuf++;
412	} else if (ThisTokBuf == ThisTokEnd \|\| !isHexDigit(*ThisTokBuf)) {
413	if (Diags)
414	Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
415	diag::err_hex_escape_no_digits)
416	<< StringRef(&ThisTokBuf[-1], 1);
417	return false;
418	}
419	UcnLen = (ThisTokBuf[-1] == 'u' ? 4 : 8);
420
421	bool Overflow = false;
422	unsigned short Count = 0;
423	for (; ThisTokBuf != ThisTokEnd && (Delimited \|\| Count != UcnLen);
424	++ThisTokBuf) {
425	if (Delimited && *ThisTokBuf == '}') {
426	++ThisTokBuf;
427	EndDelimiterFound = true;
428	break;
429	}
430	int CharVal = llvm::hexDigitValue(*ThisTokBuf);
431	if (CharVal == -1) {
432	HasError = true;
433	if (!Delimited)
434	break;
435	if (Diags) {
436	Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
437	diag::err_delimited_escape_invalid)
438	<< StringRef(ThisTokBuf, 1);
439	}
440	Count++;
441	continue;
442	}
443	if (UcnVal & 0xF0000000) {
444	Overflow = true;
445	continue;
446	}
447	UcnVal <<= 4;
448	UcnVal \|= CharVal;
449	Count++;
450	}
451
452	if (Overflow) {
453	if (Diags)
454	Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
455	diag::err_escape_too_large)
456	<< 0;
457	return false;
458	}
459
460	if (Delimited && !EndDelimiterFound) {
461	if (Diags) {
462	Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
463	diag::err_expected)
464	<< tok::r_brace;
465	}
466	return false;
467	}
468
469	// If we didn't consume the proper number of digits, there is a problem.
470	if (Count == 0 \|\| (!Delimited && Count != UcnLen)) {
471	if (Diags)
472	Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
473	Delimited ? diag::err_delimited_escape_empty
474	: diag::err_ucn_escape_incomplete);
475	return false;
476	}
477	return !HasError;
478	}
479
480	static void DiagnoseInvalidUnicodeCharacterName(
481	DiagnosticsEngine *Diags, const LangOptions &Features, FullSourceLoc Loc,
482	const char TokBegin, const char TokRangeBegin, const char *TokRangeEnd,
483	llvm::StringRef Name) {
484
485	Diag(Diags, Features, Loc, TokBegin, TokRangeBegin, TokRangeEnd,
486	diag::err_invalid_ucn_name)
487	<< Name;
488
489	namespace u = llvm::sys::unicode;
490
491	std::optional<u::LooseMatchingResult> Res =
492	u::nameToCodepointLooseMatching(Name);
493	if (Res) {
494	Diag(Diags, Features, Loc, TokBegin, TokRangeBegin, TokRangeEnd,
495	diag::note_invalid_ucn_name_loose_matching)
496	<< FixItHint::CreateReplacement(
497	MakeCharSourceRange(Features, Loc, TokBegin, TokRangeBegin,
498	TokRangeEnd),
499	Res->Name);
500	return;
501	}
502
503	unsigned Distance = 0;
504	SmallVector<u::MatchForCodepointName> Matches =
505	u::nearestMatchesForCodepointName(Name, 5);
506	assert(!Matches.empty() && "No unicode characters found")(static_cast <bool> (!Matches.empty() && "No unicode characters found" ) ? void (0) : __assert_fail ("!Matches.empty() && \"No unicode characters found\"" , "clang/lib/Lex/LiteralSupport.cpp", 506, __extension__ __PRETTY_FUNCTION__ ));
507
508	for (const auto &Match : Matches) {
509	if (Distance == 0)
510	Distance = Match.Distance;
511	if (std::max(Distance, Match.Distance) -
512	std::min(Distance, Match.Distance) >
513	3)
514	break;
515	Distance = Match.Distance;
516
517	std::string Str;
518	llvm::UTF32 V = Match.Value;
519	bool Converted =
520	llvm::convertUTF32ToUTF8String(llvm::ArrayRef<llvm::UTF32>(&V, 1), Str);
521	(void)Converted;
522	assert(Converted && "Found a match wich is not a unicode character")(static_cast <bool> (Converted && "Found a match wich is not a unicode character" ) ? void (0) : __assert_fail ("Converted && \"Found a match wich is not a unicode character\"" , "clang/lib/Lex/LiteralSupport.cpp", 522, __extension__ __PRETTY_FUNCTION__ ));
523
524	Diag(Diags, Features, Loc, TokBegin, TokRangeBegin, TokRangeEnd,
525	diag::note_invalid_ucn_name_candidate)
526	<< Match.Name << llvm::utohexstr(Match.Value)
527	<< Str // FIXME: Fix the rendering of non printable characters
528	<< FixItHint::CreateReplacement(
529	MakeCharSourceRange(Features, Loc, TokBegin, TokRangeBegin,
530	TokRangeEnd),
531	Match.Name);
532	}
533	}
534
535	static bool ProcessNamedUCNEscape(const char *ThisTokBegin,
536	const char *&ThisTokBuf,
537	const char *ThisTokEnd, uint32_t &UcnVal,
538	unsigned short &UcnLen, FullSourceLoc Loc,
539	DiagnosticsEngine *Diags,
540	const LangOptions &Features) {
541	const char *UcnBegin = ThisTokBuf;
542	assert(UcnBegin[0] == '\\' && UcnBegin[1] == 'N')(static_cast <bool> (UcnBegin[0] == '\\' && UcnBegin [1] == 'N') ? void (0) : __assert_fail ("UcnBegin[0] == '\\\\' && UcnBegin[1] == 'N'" , "clang/lib/Lex/LiteralSupport.cpp", 542, __extension__ __PRETTY_FUNCTION__ ));
543	ThisTokBuf += 2;
544	if (ThisTokBuf == ThisTokEnd \|\| *ThisTokBuf != '{') {
545	if (Diags) {
546	Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
547	diag::err_delimited_escape_missing_brace)
548	<< StringRef(&ThisTokBuf[-1], 1);
549	}
550	return false;
551	}
552	ThisTokBuf++;
553	const char *ClosingBrace = std::find_if(ThisTokBuf, ThisTokEnd, [](char C) {
554	return C == '}' \|\| isVerticalWhitespace(C);
555	});
556	bool Incomplete = ClosingBrace == ThisTokEnd;
557	bool Empty = ClosingBrace == ThisTokBuf;
558	if (Incomplete \|\| Empty) {
559	if (Diags) {
560	Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
561	Incomplete ? diag::err_ucn_escape_incomplete
562	: diag::err_delimited_escape_empty)
563	<< StringRef(&UcnBegin[1], 1);
564	}
565	ThisTokBuf = ClosingBrace == ThisTokEnd ? ClosingBrace : ClosingBrace + 1;
566	return false;
567	}
568	StringRef Name(ThisTokBuf, ClosingBrace - ThisTokBuf);
569	ThisTokBuf = ClosingBrace + 1;
570	std::optional<char32_t> Res = llvm::sys::unicode::nameToCodepointStrict(Name);
571	if (!Res) {
572	if (Diags)
573	DiagnoseInvalidUnicodeCharacterName(Diags, Features, Loc, ThisTokBegin,
574	&UcnBegin[3], ClosingBrace, Name);
575	return false;
576	}
577	UcnVal = *Res;
578	UcnLen = UcnVal > 0xFFFF ? 8 : 4;
579	return true;
580	}
581
582	/// ProcessUCNEscape - Read the Universal Character Name, check constraints and
583	/// return the UTF32.
584	static bool ProcessUCNEscape(const char ThisTokBegin, const char &ThisTokBuf,
585	const char *ThisTokEnd, uint32_t &UcnVal,
586	unsigned short &UcnLen, FullSourceLoc Loc,
587	DiagnosticsEngine *Diags,
588	const LangOptions &Features,
589	bool in_char_string_literal = false) {
590
591	bool HasError;
592	const char *UcnBegin = ThisTokBuf;
593	bool IsDelimitedEscapeSequence = false;
594	bool IsNamedEscapeSequence = false;
595	if (ThisTokBuf[1] == 'N') {
596	IsNamedEscapeSequence = true;
597	HasError = !ProcessNamedUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd,
598	UcnVal, UcnLen, Loc, Diags, Features);
599	} else {
600	HasError =
601	!ProcessNumericUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, UcnVal,
602	UcnLen, IsDelimitedEscapeSequence, Loc, Diags,
603	Features, in_char_string_literal);
604	}
605	if (HasError)
606	return false;
607
608	// Check UCN constraints (C99 6.4.3p2) [C++11 lex.charset p2]
609	if ((0xD800 <= UcnVal && UcnVal <= 0xDFFF) \|\| // surrogate codepoints
610	UcnVal > 0x10FFFF) { // maximum legal UTF32 value
611	if (Diags)
612	Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
613	diag::err_ucn_escape_invalid);
614	return false;
615	}
616
617	// C++11 allows UCNs that refer to control characters and basic source
618	// characters inside character and string literals
619	if (UcnVal < 0xa0 &&
620	(UcnVal != 0x24 && UcnVal != 0x40 && UcnVal != 0x60)) { // $, @, `
621	bool IsError = (!Features.CPlusPlus11 \|\| !in_char_string_literal);
622	if (Diags) {
623	char BasicSCSChar = UcnVal;
624	if (UcnVal >= 0x20 && UcnVal < 0x7f)
625	Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
626	IsError ? diag::err_ucn_escape_basic_scs :
627	diag::warn_cxx98_compat_literal_ucn_escape_basic_scs)
628	<< StringRef(&BasicSCSChar, 1);
629	else
630	Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
631	IsError ? diag::err_ucn_control_character :
632	diag::warn_cxx98_compat_literal_ucn_control_character);
633	}
634	if (IsError)
635	return false;
636	}
637
638	if (!Features.CPlusPlus && !Features.C99 && Diags)
639	Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
640	diag::warn_ucn_not_valid_in_c89_literal);
641
642	if ((IsDelimitedEscapeSequence \|\| IsNamedEscapeSequence) && Diags)
643	Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
644	Features.CPlusPlus23 ? diag::warn_cxx23_delimited_escape_sequence
645	: diag::ext_delimited_escape_sequence)
646	<< (IsNamedEscapeSequence ? 1 : 0) << (Features.CPlusPlus ? 1 : 0);
647
648	return true;
649	}
650
651	/// MeasureUCNEscape - Determine the number of bytes within the resulting string
652	/// which this UCN will occupy.
653	static int MeasureUCNEscape(const char ThisTokBegin, const char &ThisTokBuf,
654	const char *ThisTokEnd, unsigned CharByteWidth,
655	const LangOptions &Features, bool &HadError) {
656	// UTF-32: 4 bytes per escape.
657	if (CharByteWidth == 4)
658	return 4;
659
660	uint32_t UcnVal = 0;
661	unsigned short UcnLen = 0;
662	FullSourceLoc Loc;
663
664	if (!ProcessUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, UcnVal,
665	UcnLen, Loc, nullptr, Features, true)) {
666	HadError = true;
667	return 0;
668	}
669
670	// UTF-16: 2 bytes for BMP, 4 bytes otherwise.
671	if (CharByteWidth == 2)
672	return UcnVal <= 0xFFFF ? 2 : 4;
673
674	// UTF-8.
675	if (UcnVal < 0x80)
676	return 1;
677	if (UcnVal < 0x800)
678	return 2;
679	if (UcnVal < 0x10000)
680	return 3;
681	return 4;
682	}
683
684	/// EncodeUCNEscape - Read the Universal Character Name, check constraints and
685	/// convert the UTF32 to UTF8 or UTF16. This is a subroutine of
686	/// StringLiteralParser. When we decide to implement UCN's for identifiers,
687	/// we will likely rework our support for UCN's.
688	static void EncodeUCNEscape(const char ThisTokBegin, const char &ThisTokBuf,
689	const char *ThisTokEnd,
690	char *&ResultBuf, bool &HadError,
691	FullSourceLoc Loc, unsigned CharByteWidth,
692	DiagnosticsEngine *Diags,
693	const LangOptions &Features) {
694	typedef uint32_t UTF32;
695	UTF32 UcnVal = 0;
696	unsigned short UcnLen = 0;
697	if (!ProcessUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, UcnVal, UcnLen,
698	Loc, Diags, Features, true)) {
699	HadError = true;
700	return;
701	}
702
703	assert((CharByteWidth == 1 \|\| CharByteWidth == 2 \|\| CharByteWidth == 4) &&(static_cast <bool> ((CharByteWidth == 1 \|\| CharByteWidth == 2 \|\| CharByteWidth == 4) && "only character widths of 1, 2, or 4 bytes supported" ) ? void (0) : __assert_fail ("(CharByteWidth == 1 \|\| CharByteWidth == 2 \|\| CharByteWidth == 4) && \"only character widths of 1, 2, or 4 bytes supported\"" , "clang/lib/Lex/LiteralSupport.cpp", 704, __extension__ __PRETTY_FUNCTION__ ))
704	"only character widths of 1, 2, or 4 bytes supported")(static_cast <bool> ((CharByteWidth == 1 \|\| CharByteWidth == 2 \|\| CharByteWidth == 4) && "only character widths of 1, 2, or 4 bytes supported" ) ? void (0) : __assert_fail ("(CharByteWidth == 1 \|\| CharByteWidth == 2 \|\| CharByteWidth == 4) && \"only character widths of 1, 2, or 4 bytes supported\"" , "clang/lib/Lex/LiteralSupport.cpp", 704, __extension__ __PRETTY_FUNCTION__ ));
705
706	(void)UcnLen;
707	assert((UcnLen== 4 \|\| UcnLen== 8) && "only ucn length of 4 or 8 supported")(static_cast <bool> ((UcnLen== 4 \|\| UcnLen== 8) && "only ucn length of 4 or 8 supported") ? void (0) : __assert_fail ("(UcnLen== 4 \|\| UcnLen== 8) && \"only ucn length of 4 or 8 supported\"" , "clang/lib/Lex/LiteralSupport.cpp", 707, __extension__ __PRETTY_FUNCTION__ ));
708
709	if (CharByteWidth == 4) {
710	// FIXME: Make the type of the result buffer correct instead of
711	// using reinterpret_cast.
712	llvm::UTF32 ResultPtr = reinterpret_cast<llvm::UTF32>(ResultBuf);
713	*ResultPtr = UcnVal;
714	ResultBuf += 4;
715	return;
716	}
717
718	if (CharByteWidth == 2) {
719	// FIXME: Make the type of the result buffer correct instead of
720	// using reinterpret_cast.
721	llvm::UTF16 ResultPtr = reinterpret_cast<llvm::UTF16>(ResultBuf);
722
723	if (UcnVal <= (UTF32)0xFFFF) {
724	*ResultPtr = UcnVal;
725	ResultBuf += 2;
726	return;
727	}
728
729	// Convert to UTF16.
730	UcnVal -= 0x10000;
731	*ResultPtr = 0xD800 + (UcnVal >> 10);
732	*(ResultPtr+1) = 0xDC00 + (UcnVal & 0x3FF);
733	ResultBuf += 4;
734	return;
735	}
736
737	assert(CharByteWidth == 1 && "UTF-8 encoding is only for 1 byte characters")(static_cast <bool> (CharByteWidth == 1 && "UTF-8 encoding is only for 1 byte characters" ) ? void (0) : __assert_fail ("CharByteWidth == 1 && \"UTF-8 encoding is only for 1 byte characters\"" , "clang/lib/Lex/LiteralSupport.cpp", 737, __extension__ __PRETTY_FUNCTION__ ));
738
739	// Now that we've parsed/checked the UCN, we convert from UTF32->UTF8.
740	// The conversion below was inspired by:
741	// http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c
742	// First, we determine how many bytes the result will require.
743	typedef uint8_t UTF8;
744
745	unsigned short bytesToWrite = 0;
746	if (UcnVal < (UTF32)0x80)
747	bytesToWrite = 1;
748	else if (UcnVal < (UTF32)0x800)
749	bytesToWrite = 2;
750	else if (UcnVal < (UTF32)0x10000)
751	bytesToWrite = 3;
752	else
753	bytesToWrite = 4;
754
755	const unsigned byteMask = 0xBF;
756	const unsigned byteMark = 0x80;
757
758	// Once the bits are split out into bytes of UTF8, this is a mask OR-ed
759	// into the first byte, depending on how many bytes follow.
760	static const UTF8 firstByteMark[5] = {
761	0x00, 0x00, 0xC0, 0xE0, 0xF0
762	};
763	// Finally, we write the bytes into ResultBuf.
764	ResultBuf += bytesToWrite;
765	switch (bytesToWrite) { // note: everything falls through.
766	case 4:
767	*--ResultBuf = (UTF8)((UcnVal \| byteMark) & byteMask); UcnVal >>= 6;
768	[[fallthrough]];
769	case 3:
770	*--ResultBuf = (UTF8)((UcnVal \| byteMark) & byteMask); UcnVal >>= 6;
771	[[fallthrough]];
772	case 2:
773	*--ResultBuf = (UTF8)((UcnVal \| byteMark) & byteMask); UcnVal >>= 6;
774	[[fallthrough]];
775	case 1:
776	*--ResultBuf = (UTF8) (UcnVal \| firstByteMark[bytesToWrite]);
777	}
778	// Update the buffer.
779	ResultBuf += bytesToWrite;
780	}
781
782	/// integer-constant: [C99 6.4.4.1]
783	/// decimal-constant integer-suffix
784	/// octal-constant integer-suffix
785	/// hexadecimal-constant integer-suffix
786	/// binary-literal integer-suffix [GNU, C++1y]
787	/// user-defined-integer-literal: [C++11 lex.ext]
788	/// decimal-literal ud-suffix
789	/// octal-literal ud-suffix
790	/// hexadecimal-literal ud-suffix
791	/// binary-literal ud-suffix [GNU, C++1y]
792	/// decimal-constant:
793	/// nonzero-digit
794	/// decimal-constant digit
795	/// octal-constant:
796	/// 0
797	/// octal-constant octal-digit
798	/// hexadecimal-constant:
799	/// hexadecimal-prefix hexadecimal-digit
800	/// hexadecimal-constant hexadecimal-digit
801	/// hexadecimal-prefix: one of
802	/// 0x 0X
803	/// binary-literal:
804	/// 0b binary-digit
805	/// 0B binary-digit
806	/// binary-literal binary-digit
807	/// integer-suffix:
808	/// unsigned-suffix [long-suffix]
809	/// unsigned-suffix [long-long-suffix]
810	/// long-suffix [unsigned-suffix]
811	/// long-long-suffix [unsigned-sufix]
812	/// nonzero-digit:
813	/// 1 2 3 4 5 6 7 8 9
814	/// octal-digit:
815	/// 0 1 2 3 4 5 6 7
816	/// hexadecimal-digit:
817	/// 0 1 2 3 4 5 6 7 8 9
818	/// a b c d e f
819	/// A B C D E F
820	/// binary-digit:
821	/// 0
822	/// 1
823	/// unsigned-suffix: one of
824	/// u U
825	/// long-suffix: one of
826	/// l L
827	/// long-long-suffix: one of
828	/// ll LL
829	///
830	/// floating-constant: [C99 6.4.4.2]
831	/// TODO: add rules...
832	///
833	NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
834	SourceLocation TokLoc,
835	const SourceManager &SM,
836	const LangOptions &LangOpts,
837	const TargetInfo &Target,
838	DiagnosticsEngine &Diags)
839	: SM(SM), LangOpts(LangOpts), Diags(Diags),
840	ThisTokBegin(TokSpelling.begin()), ThisTokEnd(TokSpelling.end()) {
841
842	s = DigitsBegin = ThisTokBegin;
843	saw_exponent = false;
844	saw_period = false;
845	saw_ud_suffix = false;
846	saw_fixed_point_suffix = false;
847	isLong = false;
848	isUnsigned = false;
849	isLongLong = false;
850	isSizeT = false;
851	isHalf = false;
852	isFloat = false;
853	isImaginary = false;
854	isFloat16 = false;
855	isFloat128 = false;
856	MicrosoftInteger = 0;
857	isFract = false;
858	isAccum = false;
859	hadError = false;
860	isBitInt = false;
861
862	// This routine assumes that the range begin/end matches the regex for integer
863	// and FP constants (specifically, the 'pp-number' regex), and assumes that
864	// the byte at "*end" is both valid and not part of the regex. Because of
865	// this, it doesn't have to check for 'overscan' in various places.
866	if (isPreprocessingNumberBody(*ThisTokEnd)) {
867	Diags.Report(TokLoc, diag::err_lexing_numeric);
868	hadError = true;
869	return;
870	}
871
872	if (*s == '0') { // parse radix
873	ParseNumberStartingWithZero(TokLoc);
874	if (hadError)
875	return;
876	} else { // the first digit is non-zero
877	radix = 10;
878	s = SkipDigits(s);
879	if (s == ThisTokEnd) {
880	// Done.
881	} else {
882	ParseDecimalOrOctalCommon(TokLoc);
883	if (hadError)
884	return;
885	}
886	}
887
888	SuffixBegin = s;
889	checkSeparator(TokLoc, s, CSK_AfterDigits);
890
891	// Initial scan to lookahead for fixed point suffix.
892	if (LangOpts.FixedPoint) {
893	for (const char *c = s; c != ThisTokEnd; ++c) {
894	if (c == 'r' \|\| c == 'k' \|\| c == 'R' \|\| c == 'K') {
895	saw_fixed_point_suffix = true;
896	break;
897	}
898	}
899	}
900
901	// Parse the suffix. At this point we can classify whether we have an FP or
902	// integer constant.
903	bool isFixedPointConstant = isFixedPointLiteral();
904	bool isFPConstant = isFloatingLiteral();
905	bool HasSize = false;
906
907	// Loop over all of the characters of the suffix. If we see something bad,
908	// we break out of the loop.
909	for (; s != ThisTokEnd; ++s) {
910	switch (*s) {
911	case 'R':
912	case 'r':
913	if (!LangOpts.FixedPoint)
914	break;
915	if (isFract \|\| isAccum) break;
916	if (!(saw_period \|\| saw_exponent)) break;
917	isFract = true;
918	continue;
919	case 'K':
920	case 'k':
921	if (!LangOpts.FixedPoint)
922	break;
923	if (isFract \|\| isAccum) break;
924	if (!(saw_period \|\| saw_exponent)) break;
925	isAccum = true;
926	continue;
927	case 'h': // FP Suffix for "half".
928	case 'H':
929	// OpenCL Extension v1.2 s9.5 - h or H suffix for half type.
930	if (!(LangOpts.Half \|\| LangOpts.FixedPoint))
931	break;
932	if (isIntegerLiteral()) break; // Error for integer constant.
933	if (HasSize)
934	break;
935	HasSize = true;
936	isHalf = true;
937	continue; // Success.
938	case 'f': // FP Suffix for "float"
939	case 'F':
940	if (!isFPConstant) break; // Error for integer constant.
941	if (HasSize)
942	break;
943	HasSize = true;
944
945	// CUDA host and device may have different _Float16 support, therefore
946	// allows f16 literals to avoid false alarm.
947	// When we compile for OpenMP target offloading on NVPTX, f16 suffix
948	// should also be supported.
949	// ToDo: more precise check for CUDA.
950	// TODO: AMDGPU might also support it in the future.
951	if ((Target.hasFloat16Type() \|\| LangOpts.CUDA \|\|
952	(LangOpts.OpenMPIsDevice && Target.getTriple().isNVPTX())) &&
953	s + 2 < ThisTokEnd && s[1] == '1' && s[2] == '6') {
954	s += 2; // success, eat up 2 characters.
955	isFloat16 = true;
956	continue;
957	}
958
959	isFloat = true;
960	continue; // Success.
961	case 'q': // FP Suffix for "__float128"
962	case 'Q':
963	if (!isFPConstant) break; // Error for integer constant.
964	if (HasSize)
965	break;
966	HasSize = true;
967	isFloat128 = true;
968	continue; // Success.
969	case 'u':
970	case 'U':
971	if (isFPConstant) break; // Error for floating constant.
972	if (isUnsigned) break; // Cannot be repeated.
973	isUnsigned = true;
974	continue; // Success.
975	case 'l':
976	case 'L':
977	if (HasSize)
978	break;
979	HasSize = true;
980
981	// Check for long long. The L's need to be adjacent and the same case.
982	if (s[1] == s[0]) {
983	assert(s + 1 < ThisTokEnd && "didn't maximally munch?")(static_cast <bool> (s + 1 < ThisTokEnd && "didn't maximally munch?" ) ? void (0) : __assert_fail ("s + 1 < ThisTokEnd && \"didn't maximally munch?\"" , "clang/lib/Lex/LiteralSupport.cpp", 983, __extension__ __PRETTY_FUNCTION__ ));
984	if (isFPConstant) break; // long long invalid for floats.
985	isLongLong = true;
986	++s; // Eat both of them.
987	} else {
988	isLong = true;
989	}
990	continue; // Success.
991	case 'z':
992	case 'Z':
993	if (isFPConstant)
994	break; // Invalid for floats.
995	if (HasSize)
996	break;
997	HasSize = true;
998	isSizeT = true;
999	continue;
1000	case 'i':
1001	case 'I':
1002	if (LangOpts.MicrosoftExt && !isFPConstant) {
1003	// Allow i8, i16, i32, and i64. First, look ahead and check if
1004	// suffixes are Microsoft integers and not the imaginary unit.
1005	uint8_t Bits = 0;
1006	size_t ToSkip = 0;
1007	switch (s[1]) {
1008	case '8': // i8 suffix
1009	Bits = 8;
1010	ToSkip = 2;
1011	break;
1012	case '1':
1013	if (s[2] == '6') { // i16 suffix
1014	Bits = 16;
1015	ToSkip = 3;
1016	}
1017	break;
1018	case '3':
1019	if (s[2] == '2') { // i32 suffix
1020	Bits = 32;
1021	ToSkip = 3;
1022	}
1023	break;
1024	case '6':
1025	if (s[2] == '4') { // i64 suffix
1026	Bits = 64;
1027	ToSkip = 3;
1028	}
1029	break;
1030	default:
1031	break;
1032	}
1033	if (Bits) {
1034	if (HasSize)
1035	break;
1036	HasSize = true;
	Value stored to 'HasSize' is never read
1037	MicrosoftInteger = Bits;
1038	s += ToSkip;
1039	assert(s <= ThisTokEnd && "didn't maximally munch?")(static_cast <bool> (s <= ThisTokEnd && "didn't maximally munch?" ) ? void (0) : __assert_fail ("s <= ThisTokEnd && \"didn't maximally munch?\"" , "clang/lib/Lex/LiteralSupport.cpp", 1039, __extension__ __PRETTY_FUNCTION__ ));
1040	break;
1041	}
1042	}
1043	[[fallthrough]];
1044	case 'j':
1045	case 'J':
1046	if (isImaginary) break; // Cannot be repeated.
1047	isImaginary = true;
1048	continue; // Success.
1049	case 'w':
1050	case 'W':
1051	if (isFPConstant)
1052	break; // Invalid for floats.
1053	if (HasSize)
1054	break; // Invalid if we already have a size for the literal.
1055
1056	// wb and WB are allowed, but a mixture of cases like Wb or wB is not. We
1057	// explicitly do not support the suffix in C++ as an extension because a
1058	// library-based UDL that resolves to a library type may be more
1059	// appropriate there.
1060	if (!LangOpts.CPlusPlus && ((s[0] == 'w' && s[1] == 'b') \|\|
1061	(s[0] == 'W' && s[1] == 'B'))) {
1062	isBitInt = true;
1063	HasSize = true;
1064	++s; // Skip both characters (2nd char skipped on continue).
1065	continue; // Success.
1066	}
1067	}
1068	// If we reached here, there was an error or a ud-suffix.
1069	break;
1070	}
1071
1072	// "i", "if", and "il" are user-defined suffixes in C++1y.
1073	if (s != ThisTokEnd \|\| isImaginary) {
1074	// FIXME: Don't bother expanding UCNs if !tok.hasUCN().
1075	expandUCNs(UDSuffixBuf, StringRef(SuffixBegin, ThisTokEnd - SuffixBegin));
1076	if (isValidUDSuffix(LangOpts, UDSuffixBuf)) {
1077	if (!isImaginary) {
1078	// Any suffix pieces we might have parsed are actually part of the
1079	// ud-suffix.
1080	isLong = false;
1081	isUnsigned = false;
1082	isLongLong = false;
1083	isSizeT = false;
1084	isFloat = false;
1085	isFloat16 = false;
1086	isHalf = false;
1087	isImaginary = false;
1088	isBitInt = false;
1089	MicrosoftInteger = 0;
1090	saw_fixed_point_suffix = false;
1091	isFract = false;
1092	isAccum = false;
1093	}
1094
1095	saw_ud_suffix = true;
1096	return;
1097	}
1098
1099	if (s != ThisTokEnd) {
1100	// Report an error if there are any.
1101	Diags.Report(Lexer::AdvanceToTokenCharacter(
1102	TokLoc, SuffixBegin - ThisTokBegin, SM, LangOpts),
1103	diag::err_invalid_suffix_constant)
1104	<< StringRef(SuffixBegin, ThisTokEnd - SuffixBegin)
1105	<< (isFixedPointConstant ? 2 : isFPConstant);
1106	hadError = true;
1107	}
1108	}
1109
1110	if (!hadError && saw_fixed_point_suffix) {
1111	assert(isFract \|\| isAccum)(static_cast <bool> (isFract \|\| isAccum) ? void (0) : __assert_fail ("isFract \|\| isAccum", "clang/lib/Lex/LiteralSupport.cpp", 1111 , __extension__ __PRETTY_FUNCTION__));
1112	}
1113	}
1114
1115	/// ParseDecimalOrOctalCommon - This method is called for decimal or octal
1116	/// numbers. It issues an error for illegal digits, and handles floating point
1117	/// parsing. If it detects a floating point number, the radix is set to 10.
1118	void NumericLiteralParser::ParseDecimalOrOctalCommon(SourceLocation TokLoc){
1119	assert((radix == 8 \|\| radix == 10) && "Unexpected radix")(static_cast <bool> ((radix == 8 \|\| radix == 10) && "Unexpected radix") ? void (0) : __assert_fail ("(radix == 8 \|\| radix == 10) && \"Unexpected radix\"" , "clang/lib/Lex/LiteralSupport.cpp", 1119, __extension__ __PRETTY_FUNCTION__ ));
1120
1121	// If we have a hex digit other than 'e' (which denotes a FP exponent) then
1122	// the code is using an incorrect base.
1123	if (isHexDigit(s) && s != 'e' && *s != 'E' &&
1124	!isValidUDSuffix(LangOpts, StringRef(s, ThisTokEnd - s))) {
1125	Diags.Report(
1126	Lexer::AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin, SM, LangOpts),
1127	diag::err_invalid_digit)
1128	<< StringRef(s, 1) << (radix == 8 ? 1 : 0);
1129	hadError = true;
1130	return;
1131	}
1132
1133	if (*s == '.') {
1134	checkSeparator(TokLoc, s, CSK_AfterDigits);
1135	s++;
1136	radix = 10;
1137	saw_period = true;
1138	checkSeparator(TokLoc, s, CSK_BeforeDigits);
1139	s = SkipDigits(s); // Skip suffix.
1140	}
1141	if (s == 'e' \|\| s == 'E') { // exponent
1142	checkSeparator(TokLoc, s, CSK_AfterDigits);
1143	const char *Exponent = s;
1144	s++;
1145	radix = 10;
1146	saw_exponent = true;
1147	if (s != ThisTokEnd && (s == '+' \|\| s == '-')) s++; // sign
1148	const char *first_non_digit = SkipDigits(s);
1149	if (containsDigits(s, first_non_digit)) {
1150	checkSeparator(TokLoc, s, CSK_BeforeDigits);
1151	s = first_non_digit;
1152	} else {
1153	if (!hadError) {
1154	Diags.Report(Lexer::AdvanceToTokenCharacter(
1155	TokLoc, Exponent - ThisTokBegin, SM, LangOpts),
1156	diag::err_exponent_has_no_digits);
1157	hadError = true;
1158	}
1159	return;
1160	}
1161	}
1162	}
1163
1164	/// Determine whether a suffix is a valid ud-suffix. We avoid treating reserved
1165	/// suffixes as ud-suffixes, because the diagnostic experience is better if we
1166	/// treat it as an invalid suffix.
1167	bool NumericLiteralParser::isValidUDSuffix(const LangOptions &LangOpts,
1168	StringRef Suffix) {
1169	if (!LangOpts.CPlusPlus11 \|\| Suffix.empty())
1170	return false;
1171
1172	// By C++11 [lex.ext]p10, ud-suffixes starting with an '_' are always valid.
1173	if (Suffix[0] == '_')
1174	return true;
1175
1176	// In C++11, there are no library suffixes.
1177	if (!LangOpts.CPlusPlus14)
1178	return false;
1179
1180	// In C++14, "s", "h", "min", "ms", "us", and "ns" are used in the library.
1181	// Per tweaked N3660, "il", "i", and "if" are also used in the library.
1182	// In C++2a "d" and "y" are used in the library.
1183	return llvm::StringSwitch<bool>(Suffix)
1184	.Cases("h", "min", "s", true)
1185	.Cases("ms", "us", "ns", true)
1186	.Cases("il", "i", "if", true)
1187	.Cases("d", "y", LangOpts.CPlusPlus20)
1188	.Default(false);
1189	}
1190
1191	void NumericLiteralParser::checkSeparator(SourceLocation TokLoc,
1192	const char *Pos,
1193	CheckSeparatorKind IsAfterDigits) {
1194	if (IsAfterDigits == CSK_AfterDigits) {
1195	if (Pos == ThisTokBegin)
1196	return;
1197	--Pos;
1198	} else if (Pos == ThisTokEnd)
1199	return;
1200
1201	if (isDigitSeparator(*Pos)) {
1202	Diags.Report(Lexer::AdvanceToTokenCharacter(TokLoc, Pos - ThisTokBegin, SM,
1203	LangOpts),
1204	diag::err_digit_separator_not_between_digits)
1205	<< IsAfterDigits;
1206	hadError = true;
1207	}
1208	}
1209
1210	/// ParseNumberStartingWithZero - This method is called when the first character
1211	/// of the number is found to be a zero. This means it is either an octal
1212	/// number (like '04') or a hex number ('0x123a') a binary number ('0b1010') or
1213	/// a floating point number (01239.123e4). Eat the prefix, determining the
1214	/// radix etc.
1215	void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
1216	assert(s[0] == '0' && "Invalid method call")(static_cast <bool> (s[0] == '0' && "Invalid method call" ) ? void (0) : __assert_fail ("s[0] == '0' && \"Invalid method call\"" , "clang/lib/Lex/LiteralSupport.cpp", 1216, __extension__ __PRETTY_FUNCTION__ ));
1217	s++;
1218
1219	int c1 = s[0];
1220
1221	// Handle a hex number like 0x1234.
1222	if ((c1 == 'x' \|\| c1 == 'X') && (isHexDigit(s[1]) \|\| s[1] == '.')) {
1223	s++;
1224	assert(s < ThisTokEnd && "didn't maximally munch?")(static_cast <bool> (s < ThisTokEnd && "didn't maximally munch?" ) ? void (0) : __assert_fail ("s < ThisTokEnd && \"didn't maximally munch?\"" , "clang/lib/Lex/LiteralSupport.cpp", 1224, __extension__ __PRETTY_FUNCTION__ ));
1225	radix = 16;
1226	DigitsBegin = s;
1227	s = SkipHexDigits(s);
1228	bool HasSignificandDigits = containsDigits(DigitsBegin, s);
1229	if (s == ThisTokEnd) {
1230	// Done.
1231	} else if (*s == '.') {
1232	s++;
1233	saw_period = true;
1234	const char *floatDigitsBegin = s;
1235	s = SkipHexDigits(s);
1236	if (containsDigits(floatDigitsBegin, s))
1237	HasSignificandDigits = true;
1238	if (HasSignificandDigits)
1239	checkSeparator(TokLoc, floatDigitsBegin, CSK_BeforeDigits);
1240	}
1241
1242	if (!HasSignificandDigits) {
1243	Diags.Report(Lexer::AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin, SM,
1244	LangOpts),
1245	diag::err_hex_constant_requires)
1246	<< LangOpts.CPlusPlus << 1;
1247	hadError = true;
1248	return;
1249	}
1250
1251	// A binary exponent can appear with or with a '.'. If dotted, the
1252	// binary exponent is required.
1253	if (s == 'p' \|\| s == 'P') {
1254	checkSeparator(TokLoc, s, CSK_AfterDigits);
1255	const char *Exponent = s;
1256	s++;
1257	saw_exponent = true;
1258	if (s != ThisTokEnd && (s == '+' \|\| s == '-')) s++; // sign
1259	const char *first_non_digit = SkipDigits(s);
1260	if (!containsDigits(s, first_non_digit)) {
1261	if (!hadError) {
1262	Diags.Report(Lexer::AdvanceToTokenCharacter(
1263	TokLoc, Exponent - ThisTokBegin, SM, LangOpts),
1264	diag::err_exponent_has_no_digits);
1265	hadError = true;
1266	}
1267	return;
1268	}
1269	checkSeparator(TokLoc, s, CSK_BeforeDigits);
1270	s = first_non_digit;
1271
1272	if (!LangOpts.HexFloats)
1273	Diags.Report(TokLoc, LangOpts.CPlusPlus
1274	? diag::ext_hex_literal_invalid
1275	: diag::ext_hex_constant_invalid);
1276	else if (LangOpts.CPlusPlus17)
1277	Diags.Report(TokLoc, diag::warn_cxx17_hex_literal);
1278	} else if (saw_period) {
1279	Diags.Report(Lexer::AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin, SM,
1280	LangOpts),
1281	diag::err_hex_constant_requires)
1282	<< LangOpts.CPlusPlus << 0;
1283	hadError = true;
1284	}
1285	return;
1286	}
1287
1288	// Handle simple binary numbers 0b01010
1289	if ((c1 == 'b' \|\| c1 == 'B') && (s[1] == '0' \|\| s[1] == '1')) {
1290	// 0b101010 is a C++1y / GCC extension.
1291	Diags.Report(TokLoc, LangOpts.CPlusPlus14
1292	? diag::warn_cxx11_compat_binary_literal
1293	: LangOpts.CPlusPlus ? diag::ext_binary_literal_cxx14
1294	: diag::ext_binary_literal);
1295	++s;
1296	assert(s < ThisTokEnd && "didn't maximally munch?")(static_cast <bool> (s < ThisTokEnd && "didn't maximally munch?" ) ? void (0) : __assert_fail ("s < ThisTokEnd && \"didn't maximally munch?\"" , "clang/lib/Lex/LiteralSupport.cpp", 1296, __extension__ __PRETTY_FUNCTION__ ));
1297	radix = 2;
1298	DigitsBegin = s;
1299	s = SkipBinaryDigits(s);
1300	if (s == ThisTokEnd) {
1301	// Done.
1302	} else if (isHexDigit(*s) &&
1303	!isValidUDSuffix(LangOpts, StringRef(s, ThisTokEnd - s))) {
1304	Diags.Report(Lexer::AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin, SM,
1305	LangOpts),
1306	diag::err_invalid_digit)
1307	<< StringRef(s, 1) << 2;
1308	hadError = true;
1309	}
1310	// Other suffixes will be diagnosed by the caller.
1311	return;
1312	}
1313
1314	// For now, the radix is set to 8. If we discover that we have a
1315	// floating point constant, the radix will change to 10. Octal floating
1316	// point constants are not permitted (only decimal and hexadecimal).
1317	radix = 8;
1318	const char *PossibleNewDigitStart = s;
1319	s = SkipOctalDigits(s);
1320	// When the value is 0 followed by a suffix (like 0wb), we want to leave 0
1321	// as the start of the digits. So if skipping octal digits does not skip
1322	// anything, we leave the digit start where it was.
1323	if (s != PossibleNewDigitStart)
1324	DigitsBegin = PossibleNewDigitStart;
1325
1326	if (s == ThisTokEnd)
1327	return; // Done, simple octal number like 01234
1328
1329	// If we have some other non-octal digit that is a decimal digit, see if
1330	// this is part of a floating point number like 094.123 or 09e1.
1331	if (isDigit(*s)) {
1332	const char *EndDecimal = SkipDigits(s);
1333	if (EndDecimal[0] == '.' \|\| EndDecimal[0] == 'e' \|\| EndDecimal[0] == 'E') {
1334	s = EndDecimal;
1335	radix = 10;
1336	}
1337	}
1338
1339	ParseDecimalOrOctalCommon(TokLoc);
1340	}
1341
1342	static bool alwaysFitsInto64Bits(unsigned Radix, unsigned NumDigits) {
1343	switch (Radix) {
1344	case 2:
1345	return NumDigits <= 64;
1346	case 8:
1347	return NumDigits <= 64 / 3; // Digits are groups of 3 bits.
1348	case 10:
1349	return NumDigits <= 19; // floor(log10(2^64))
1350	case 16:
1351	return NumDigits <= 64 / 4; // Digits are groups of 4 bits.
1352	default:
1353	llvm_unreachable("impossible Radix")::llvm::llvm_unreachable_internal("impossible Radix", "clang/lib/Lex/LiteralSupport.cpp" , 1353);
1354	}
1355	}
1356
1357	/// GetIntegerValue - Convert this numeric literal value to an APInt that
1358	/// matches Val's input width. If there is an overflow, set Val to the low bits
1359	/// of the result and return true. Otherwise, return false.
1360	bool NumericLiteralParser::GetIntegerValue(llvm::APInt &Val) {
1361	// Fast path: Compute a conservative bound on the maximum number of
1362	// bits per digit in this radix. If we can't possibly overflow a
1363	// uint64 based on that bound then do the simple conversion to
1364	// integer. This avoids the expensive overflow checking below, and
1365	// handles the common cases that matter (small decimal integers and
1366	// hex/octal values which don't overflow).
1367	const unsigned NumDigits = SuffixBegin - DigitsBegin;
1368	if (alwaysFitsInto64Bits(radix, NumDigits)) {
1369	uint64_t N = 0;
1370	for (const char *Ptr = DigitsBegin; Ptr != SuffixBegin; ++Ptr)
1371	if (!isDigitSeparator(*Ptr))
1372	N = N * radix + llvm::hexDigitValue(*Ptr);
1373
1374	// This will truncate the value to Val's input width. Simply check
1375	// for overflow by comparing.
1376	Val = N;
1377	return Val.getZExtValue() != N;
1378	}
1379
1380	Val = 0;
1381	const char *Ptr = DigitsBegin;
1382
1383	llvm::APInt RadixVal(Val.getBitWidth(), radix);
1384	llvm::APInt CharVal(Val.getBitWidth(), 0);
1385	llvm::APInt OldVal = Val;
1386
1387	bool OverflowOccurred = false;
1388	while (Ptr < SuffixBegin) {
1389	if (isDigitSeparator(*Ptr)) {
1390	++Ptr;
1391	continue;
1392	}
1393
1394	unsigned C = llvm::hexDigitValue(*Ptr++);
1395
1396	// If this letter is out of bound for this radix, reject it.
1397	assert(C < radix && "NumericLiteralParser ctor should have rejected this")(static_cast <bool> (C < radix && "NumericLiteralParser ctor should have rejected this" ) ? void (0) : __assert_fail ("C < radix && \"NumericLiteralParser ctor should have rejected this\"" , "clang/lib/Lex/LiteralSupport.cpp", 1397, __extension__ __PRETTY_FUNCTION__ ));
1398
1399	CharVal = C;
1400
1401	// Add the digit to the value in the appropriate radix. If adding in digits
1402	// made the value smaller, then this overflowed.
1403	OldVal = Val;
1404
1405	// Multiply by radix, did overflow occur on the multiply?
1406	Val *= RadixVal;
1407	OverflowOccurred \|= Val.udiv(RadixVal) != OldVal;
1408
1409	// Add value, did overflow occur on the value?
1410	// (a + b) ult b <=> overflow
1411	Val += CharVal;
1412	OverflowOccurred \|= Val.ult(CharVal);
1413	}
1414	return OverflowOccurred;
1415	}
1416
1417	llvm::APFloat::opStatus
1418	NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) {
1419	using llvm::APFloat;
1420
1421	unsigned n = std::min(SuffixBegin - ThisTokBegin, ThisTokEnd - ThisTokBegin);
1422
1423	llvm::SmallString<16> Buffer;
1424	StringRef Str(ThisTokBegin, n);
1425	if (Str.contains('\'')) {
1426	Buffer.reserve(n);
1427	std::remove_copy_if(Str.begin(), Str.end(), std::back_inserter(Buffer),
1428	&isDigitSeparator);
1429	Str = Buffer;
1430	}
1431
1432	auto StatusOrErr =
1433	Result.convertFromString(Str, APFloat::rmNearestTiesToEven);
1434	assert(StatusOrErr && "Invalid floating point representation")(static_cast <bool> (StatusOrErr && "Invalid floating point representation" ) ? void (0) : __assert_fail ("StatusOrErr && \"Invalid floating point representation\"" , "clang/lib/Lex/LiteralSupport.cpp", 1434, __extension__ __PRETTY_FUNCTION__ ));
1435	return !errorToBool(StatusOrErr.takeError()) ? *StatusOrErr
1436	: APFloat::opInvalidOp;
1437	}
1438
1439	static inline bool IsExponentPart(char c) {
1440	return c == 'p' \|\| c == 'P' \|\| c == 'e' \|\| c == 'E';
1441	}
1442
1443	bool NumericLiteralParser::GetFixedPointValue(llvm::APInt &StoreVal, unsigned Scale) {
1444	assert(radix == 16 \|\| radix == 10)(static_cast <bool> (radix == 16 \|\| radix == 10) ? void (0) : __assert_fail ("radix == 16 \|\| radix == 10", "clang/lib/Lex/LiteralSupport.cpp" , 1444, __extension__ __PRETTY_FUNCTION__));
1445
1446	// Find how many digits are needed to store the whole literal.
1447	unsigned NumDigits = SuffixBegin - DigitsBegin;
1448	if (saw_period) --NumDigits;
1449
1450	// Initial scan of the exponent if it exists
1451	bool ExpOverflowOccurred = false;
1452	bool NegativeExponent = false;
1453	const char *ExponentBegin;
1454	uint64_t Exponent = 0;
1455	int64_t BaseShift = 0;
1456	if (saw_exponent) {
1457	const char *Ptr = DigitsBegin;
1458
1459	while (!IsExponentPart(*Ptr)) ++Ptr;
1460	ExponentBegin = Ptr;
1461	++Ptr;
1462	NegativeExponent = *Ptr == '-';
1463	if (NegativeExponent) ++Ptr;
1464
1465	unsigned NumExpDigits = SuffixBegin - Ptr;
1466	if (alwaysFitsInto64Bits(radix, NumExpDigits)) {
1467	llvm::StringRef ExpStr(Ptr, NumExpDigits);
1468	llvm::APInt ExpInt(/numBits=/64, ExpStr, /radix=/10);
1469	Exponent = ExpInt.getZExtValue();
1470	} else {
1471	ExpOverflowOccurred = true;
1472	}
1473
1474	if (NegativeExponent) BaseShift -= Exponent;
1475	else BaseShift += Exponent;
1476	}
1477
1478	// Number of bits needed for decimal literal is
1479	// ceil(NumDigits * log2(10)) Integral part
1480	// + Scale Fractional part
1481	// + ceil(Exponent * log2(10)) Exponent
1482	// --------------------------------------------------
1483	// ceil((NumDigits + Exponent) * log2(10)) + Scale
1484	//
1485	// But for simplicity in handling integers, we can round up log2(10) to 4,
1486	// making:
1487	// 4 * (NumDigits + Exponent) + Scale
1488	//
1489	// Number of digits needed for hexadecimal literal is
1490	// 4 * NumDigits Integral part
1491	// + Scale Fractional part
1492	// + Exponent Exponent
1493	// --------------------------------------------------
1494	// (4 * NumDigits) + Scale + Exponent
1495	uint64_t NumBitsNeeded;
1496	if (radix == 10)
1497	NumBitsNeeded = 4 * (NumDigits + Exponent) + Scale;
1498	else
1499	NumBitsNeeded = 4 * NumDigits + Exponent + Scale;
1500
1501	if (NumBitsNeeded > std::numeric_limits<unsigned>::max())
1502	ExpOverflowOccurred = true;
1503	llvm::APInt Val(static_cast<unsigned>(NumBitsNeeded), 0, /isSigned=/false);
1504
1505	bool FoundDecimal = false;
1506
1507	int64_t FractBaseShift = 0;
1508	const char *End = saw_exponent ? ExponentBegin : SuffixBegin;
1509	for (const char *Ptr = DigitsBegin; Ptr < End; ++Ptr) {
1510	if (*Ptr == '.') {
1511	FoundDecimal = true;
1512	continue;
1513	}
1514
1515	// Normal reading of an integer
1516	unsigned C = llvm::hexDigitValue(*Ptr);
1517	assert(C < radix && "NumericLiteralParser ctor should have rejected this")(static_cast <bool> (C < radix && "NumericLiteralParser ctor should have rejected this" ) ? void (0) : __assert_fail ("C < radix && \"NumericLiteralParser ctor should have rejected this\"" , "clang/lib/Lex/LiteralSupport.cpp", 1517, __extension__ __PRETTY_FUNCTION__ ));
1518
1519	Val *= radix;
1520	Val += C;
1521
1522	if (FoundDecimal)
1523	// Keep track of how much we will need to adjust this value by from the
1524	// number of digits past the radix point.
1525	--FractBaseShift;
1526	}
1527
1528	// For a radix of 16, we will be multiplying by 2 instead of 16.
1529	if (radix == 16) FractBaseShift *= 4;
1530	BaseShift += FractBaseShift;
1531
1532	Val <<= Scale;
1533
1534	uint64_t Base = (radix == 16) ? 2 : 10;
1535	if (BaseShift > 0) {
1536	for (int64_t i = 0; i < BaseShift; ++i) {
1537	Val *= Base;
1538	}
1539	} else if (BaseShift < 0) {
1540	for (int64_t i = BaseShift; i < 0 && !Val.isZero(); ++i)
1541	Val = Val.udiv(Base);
1542	}
1543
1544	bool IntOverflowOccurred = false;
1545	auto MaxVal = llvm::APInt::getMaxValue(StoreVal.getBitWidth());
1546	if (Val.getBitWidth() > StoreVal.getBitWidth()) {
1547	IntOverflowOccurred \|= Val.ugt(MaxVal.zext(Val.getBitWidth()));
1548	StoreVal = Val.trunc(StoreVal.getBitWidth());
1549	} else if (Val.getBitWidth() < StoreVal.getBitWidth()) {
1550	IntOverflowOccurred \|= Val.zext(MaxVal.getBitWidth()).ugt(MaxVal);
1551	StoreVal = Val.zext(StoreVal.getBitWidth());
1552	} else {
1553	StoreVal = Val;
1554	}
1555
1556	return IntOverflowOccurred \|\| ExpOverflowOccurred;
1557	}
1558
1559	/// \verbatim
1560	/// user-defined-character-literal: [C++11 lex.ext]
1561	/// character-literal ud-suffix
1562	/// ud-suffix:
1563	/// identifier
1564	/// character-literal: [C++11 lex.ccon]
1565	/// ' c-char-sequence '
1566	/// u' c-char-sequence '
1567	/// U' c-char-sequence '
1568	/// L' c-char-sequence '
1569	/// u8' c-char-sequence ' [C++1z lex.ccon]
1570	/// c-char-sequence:
1571	/// c-char
1572	/// c-char-sequence c-char
1573	/// c-char:
1574	/// any member of the source character set except the single-quote ',
1575	/// backslash \, or new-line character
1576	/// escape-sequence
1577	/// universal-character-name
1578	/// escape-sequence:
1579	/// simple-escape-sequence
1580	/// octal-escape-sequence
1581	/// hexadecimal-escape-sequence
1582	/// simple-escape-sequence:
1583	/// one of \' \" \? \\ \a \b \f \n \r \t \v
1584	/// octal-escape-sequence:
1585	/// \ octal-digit
1586	/// \ octal-digit octal-digit
1587	/// \ octal-digit octal-digit octal-digit
1588	/// hexadecimal-escape-sequence:
1589	/// \x hexadecimal-digit
1590	/// hexadecimal-escape-sequence hexadecimal-digit
1591	/// universal-character-name: [C++11 lex.charset]
1592	/// \u hex-quad
1593	/// \U hex-quad hex-quad
1594	/// hex-quad:
1595	/// hex-digit hex-digit hex-digit hex-digit
1596	/// \endverbatim
1597	///
1598	CharLiteralParser::CharLiteralParser(const char begin, const char end,
1599	SourceLocation Loc, Preprocessor &PP,
1600	tok::TokenKind kind) {
1601	// At this point we know that the character matches the regex "(L\|u\|U)?'.*'".
1602	HadError = false;
1603
1604	Kind = kind;
1605
1606	const char *TokBegin = begin;
1607
1608	// Skip over wide character determinant.
1609	if (Kind != tok::char_constant)
1610	++begin;
1611	if (Kind == tok::utf8_char_constant)
1612	++begin;
1613
1614	// Skip over the entry quote.
1615	if (begin[0] != '\'') {
1616	PP.Diag(Loc, diag::err_lexing_char);
1617	HadError = true;
1618	return;
1619	}
1620
1621	++begin;
1622
1623	// Remove an optional ud-suffix.
1624	if (end[-1] != '\'') {
1625	const char *UDSuffixEnd = end;
1626	do {
1627	--end;
1628	} while (end[-1] != '\'');
1629	// FIXME: Don't bother with this if !tok.hasUCN().
1630	expandUCNs(UDSuffixBuf, StringRef(end, UDSuffixEnd - end));
1631	UDSuffixOffset = end - TokBegin;
1632	}
1633
1634	// Trim the ending quote.
1635	assert(end != begin && "Invalid token lexed")(static_cast <bool> (end != begin && "Invalid token lexed" ) ? void (0) : __assert_fail ("end != begin && \"Invalid token lexed\"" , "clang/lib/Lex/LiteralSupport.cpp", 1635, __extension__ __PRETTY_FUNCTION__ ));
1636	--end;
1637
1638	// FIXME: The "Value" is an uint64_t so we can handle char literals of
1639	// up to 64-bits.
1640	// FIXME: This extensively assumes that 'char' is 8-bits.
1641	assert(PP.getTargetInfo().getCharWidth() == 8 &&(static_cast <bool> (PP.getTargetInfo().getCharWidth() == 8 && "Assumes char is 8 bits") ? void (0) : __assert_fail ("PP.getTargetInfo().getCharWidth() == 8 && \"Assumes char is 8 bits\"" , "clang/lib/Lex/LiteralSupport.cpp", 1642, __extension__ __PRETTY_FUNCTION__ ))
1642	"Assumes char is 8 bits")(static_cast <bool> (PP.getTargetInfo().getCharWidth() == 8 && "Assumes char is 8 bits") ? void (0) : __assert_fail ("PP.getTargetInfo().getCharWidth() == 8 && \"Assumes char is 8 bits\"" , "clang/lib/Lex/LiteralSupport.cpp", 1642, __extension__ __PRETTY_FUNCTION__ ));
1643	assert(PP.getTargetInfo().getIntWidth() <= 64 &&(static_cast <bool> (PP.getTargetInfo().getIntWidth() <= 64 && (PP.getTargetInfo().getIntWidth() & 7) == 0 && "Assumes sizeof(int) on target is <= 64 and a multiple of char" ) ? void (0) : __assert_fail ("PP.getTargetInfo().getIntWidth() <= 64 && (PP.getTargetInfo().getIntWidth() & 7) == 0 && \"Assumes sizeof(int) on target is <= 64 and a multiple of char\"" , "clang/lib/Lex/LiteralSupport.cpp", 1645, __extension__ __PRETTY_FUNCTION__ ))
1644	(PP.getTargetInfo().getIntWidth() & 7) == 0 &&(static_cast <bool> (PP.getTargetInfo().getIntWidth() <= 64 && (PP.getTargetInfo().getIntWidth() & 7) == 0 && "Assumes sizeof(int) on target is <= 64 and a multiple of char" ) ? void (0) : __assert_fail ("PP.getTargetInfo().getIntWidth() <= 64 && (PP.getTargetInfo().getIntWidth() & 7) == 0 && \"Assumes sizeof(int) on target is <= 64 and a multiple of char\"" , "clang/lib/Lex/LiteralSupport.cpp", 1645, __extension__ __PRETTY_FUNCTION__ ))
1645	"Assumes sizeof(int) on target is <= 64 and a multiple of char")(static_cast <bool> (PP.getTargetInfo().getIntWidth() <= 64 && (PP.getTargetInfo().getIntWidth() & 7) == 0 && "Assumes sizeof(int) on target is <= 64 and a multiple of char" ) ? void (0) : __assert_fail ("PP.getTargetInfo().getIntWidth() <= 64 && (PP.getTargetInfo().getIntWidth() & 7) == 0 && \"Assumes sizeof(int) on target is <= 64 and a multiple of char\"" , "clang/lib/Lex/LiteralSupport.cpp", 1645, __extension__ __PRETTY_FUNCTION__ ));
1646	assert(PP.getTargetInfo().getWCharWidth() <= 64 &&(static_cast <bool> (PP.getTargetInfo().getWCharWidth() <= 64 && "Assumes sizeof(wchar) on target is <= 64" ) ? void (0) : __assert_fail ("PP.getTargetInfo().getWCharWidth() <= 64 && \"Assumes sizeof(wchar) on target is <= 64\"" , "clang/lib/Lex/LiteralSupport.cpp", 1647, __extension__ __PRETTY_FUNCTION__ ))
1647	"Assumes sizeof(wchar) on target is <= 64")(static_cast <bool> (PP.getTargetInfo().getWCharWidth() <= 64 && "Assumes sizeof(wchar) on target is <= 64" ) ? void (0) : __assert_fail ("PP.getTargetInfo().getWCharWidth() <= 64 && \"Assumes sizeof(wchar) on target is <= 64\"" , "clang/lib/Lex/LiteralSupport.cpp", 1647, __extension__ __PRETTY_FUNCTION__ ));
1648
1649	SmallVector<uint32_t, 4> codepoint_buffer;
1650	codepoint_buffer.resize(end - begin);
1651	uint32_t *buffer_begin = &codepoint_buffer.front();
1652	uint32_t *buffer_end = buffer_begin + codepoint_buffer.size();
1653
1654	// Unicode escapes representing characters that cannot be correctly
1655	// represented in a single code unit are disallowed in character literals
1656	// by this implementation.
1657	uint32_t largest_character_for_kind;
1658	if (tok::wide_char_constant == Kind) {
1659	largest_character_for_kind =
1660	0xFFFFFFFFu >> (32-PP.getTargetInfo().getWCharWidth());
1661	} else if (tok::utf8_char_constant == Kind) {
1662	largest_character_for_kind = 0x7F;
1663	} else if (tok::utf16_char_constant == Kind) {
1664	largest_character_for_kind = 0xFFFF;
1665	} else if (tok::utf32_char_constant == Kind) {
1666	largest_character_for_kind = 0x10FFFF;
1667	} else {
1668	largest_character_for_kind = 0x7Fu;
1669	}
1670
1671	while (begin != end) {
1672	// Is this a span of non-escape characters?
1673	if (begin[0] != '\\') {
1674	char const *start = begin;
1675	do {
1676	++begin;
1677	} while (begin != end && *begin != '\\');
1678
1679	char const *tmp_in_start = start;
1680	uint32_t *tmp_out_start = buffer_begin;
1681	llvm::ConversionResult res =
1682	llvm::ConvertUTF8toUTF32(reinterpret_cast<llvm::UTF8 const **>(&start),
1683	reinterpret_cast<llvm::UTF8 const *>(begin),
1684	&buffer_begin, buffer_end, llvm::strictConversion);
1685	if (res != llvm::conversionOK) {
1686	// If we see bad encoding for unprefixed character literals, warn and
1687	// simply copy the byte values, for compatibility with gcc and
1688	// older versions of clang.
1689	bool NoErrorOnBadEncoding = isOrdinary();
1690	unsigned Msg = diag::err_bad_character_encoding;
1691	if (NoErrorOnBadEncoding)
1692	Msg = diag::warn_bad_character_encoding;
1693	PP.Diag(Loc, Msg);
1694	if (NoErrorOnBadEncoding) {
1695	start = tmp_in_start;
1696	buffer_begin = tmp_out_start;
1697	for (; start != begin; ++start, ++buffer_begin)
1698	buffer_begin = static_cast<uint8_t>(start);
1699	} else {
1700	HadError = true;
1701	}
1702	} else {
1703	for (; tmp_out_start < buffer_begin; ++tmp_out_start) {
1704	if (*tmp_out_start > largest_character_for_kind) {
1705	HadError = true;
1706	PP.Diag(Loc, diag::err_character_too_large);
1707	}
1708	}
1709	}
1710
1711	continue;
1712	}
1713	// Is this a Universal Character Name escape?
1714	if (begin[1] == 'u' \|\| begin[1] == 'U' \|\| begin[1] == 'N') {
1715	unsigned short UcnLen = 0;
1716	if (!ProcessUCNEscape(TokBegin, begin, end, *buffer_begin, UcnLen,
1717	FullSourceLoc(Loc, PP.getSourceManager()),
1718	&PP.getDiagnostics(), PP.getLangOpts(), true)) {
1719	HadError = true;
1720	} else if (*buffer_begin > largest_character_for_kind) {
1721	HadError = true;
1722	PP.Diag(Loc, diag::err_character_too_large);
1723	}
1724
1725	++buffer_begin;
1726	continue;
1727	}
1728	unsigned CharWidth = getCharWidth(Kind, PP.getTargetInfo());
1729	uint64_t result =
1730	ProcessCharEscape(TokBegin, begin, end, HadError,
1731	FullSourceLoc(Loc,PP.getSourceManager()),
1732	CharWidth, &PP.getDiagnostics(), PP.getLangOpts());
1733	*buffer_begin++ = result;
1734	}
1735
1736	unsigned NumCharsSoFar = buffer_begin - &codepoint_buffer.front();
1737
1738	if (NumCharsSoFar > 1) {
1739	if (isOrdinary() && NumCharsSoFar == 4)
1740	PP.Diag(Loc, diag::warn_four_char_character_literal);
1741	else if (isOrdinary())
1742	PP.Diag(Loc, diag::warn_multichar_character_literal);
1743	else {
1744	PP.Diag(Loc, diag::err_multichar_character_literal) << (isWide() ? 0 : 1);
1745	HadError = true;
1746	}
1747	IsMultiChar = true;
1748	} else {
1749	IsMultiChar = false;
1750	}
1751
1752	llvm::APInt LitVal(PP.getTargetInfo().getIntWidth(), 0);
1753
1754	// Narrow character literals act as though their value is concatenated
1755	// in this implementation, but warn on overflow.
1756	bool multi_char_too_long = false;
1757	if (isOrdinary() && isMultiChar()) {
1758	LitVal = 0;
1759	for (size_t i = 0; i < NumCharsSoFar; ++i) {
1760	// check for enough leading zeros to shift into
1761	multi_char_too_long \|= (LitVal.countl_zero() < 8);
1762	LitVal <<= 8;
1763	LitVal = LitVal + (codepoint_buffer[i] & 0xFF);
1764	}
1765	} else if (NumCharsSoFar > 0) {
1766	// otherwise just take the last character
1767	LitVal = buffer_begin[-1];
1768	}
1769
1770	if (!HadError && multi_char_too_long) {
1771	PP.Diag(Loc, diag::warn_char_constant_too_large);
1772	}
1773
1774	// Transfer the value from APInt to uint64_t
1775	Value = LitVal.getZExtValue();
1776
1777	// If this is a single narrow character, sign extend it (e.g. '\xFF' is "-1")
1778	// if 'char' is signed for this target (C99 6.4.4.4p10). Note that multiple
1779	// character constants are not sign extended in the this implementation:
1780	// '\xFF\xFF' = 65536 and '\x0\xFF' = 255, which matches GCC.
1781	if (isOrdinary() && NumCharsSoFar == 1 && (Value & 128) &&
1782	PP.getLangOpts().CharIsSigned)
1783	Value = (signed char)Value;
1784	}
1785
1786	/// \verbatim
1787	/// string-literal: [C++0x lex.string]
1788	/// encoding-prefix " [s-char-sequence] "
1789	/// encoding-prefix R raw-string
1790	/// encoding-prefix:
1791	/// u8
1792	/// u
1793	/// U
1794	/// L
1795	/// s-char-sequence:
1796	/// s-char
1797	/// s-char-sequence s-char
1798	/// s-char:
1799	/// any member of the source character set except the double-quote ",
1800	/// backslash \, or new-line character
1801	/// escape-sequence
1802	/// universal-character-name
1803	/// raw-string:
1804	/// " d-char-sequence ( r-char-sequence ) d-char-sequence "
1805	/// r-char-sequence:
1806	/// r-char
1807	/// r-char-sequence r-char
1808	/// r-char:
1809	/// any member of the source character set, except a right parenthesis )
1810	/// followed by the initial d-char-sequence (which may be empty)
1811	/// followed by a double quote ".
1812	/// d-char-sequence:
1813	/// d-char
1814	/// d-char-sequence d-char
1815	/// d-char:
1816	/// any member of the basic source character set except:
1817	/// space, the left parenthesis (, the right parenthesis ),
1818	/// the backslash \, and the control characters representing horizontal
1819	/// tab, vertical tab, form feed, and newline.
1820	/// escape-sequence: [C++0x lex.ccon]
1821	/// simple-escape-sequence
1822	/// octal-escape-sequence
1823	/// hexadecimal-escape-sequence
1824	/// simple-escape-sequence:
1825	/// one of \' \" \? \\ \a \b \f \n \r \t \v
1826	/// octal-escape-sequence:
1827	/// \ octal-digit
1828	/// \ octal-digit octal-digit
1829	/// \ octal-digit octal-digit octal-digit
1830	/// hexadecimal-escape-sequence:
1831	/// \x hexadecimal-digit
1832	/// hexadecimal-escape-sequence hexadecimal-digit
1833	/// universal-character-name:
1834	/// \u hex-quad
1835	/// \U hex-quad hex-quad
1836	/// hex-quad:
1837	/// hex-digit hex-digit hex-digit hex-digit
1838	/// \endverbatim
1839	///
1840	StringLiteralParser::
1841	StringLiteralParser(ArrayRef<Token> StringToks,
1842	Preprocessor &PP)
1843	: SM(PP.getSourceManager()), Features(PP.getLangOpts()),
1844	Target(PP.getTargetInfo()), Diags(&PP.getDiagnostics()),
1845	MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
1846	ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) {
1847	init(StringToks);
1848	}
1849
1850	void StringLiteralParser::init(ArrayRef<Token> StringToks){
1851	// The literal token may have come from an invalid source location (e.g. due
1852	// to a PCH error), in which case the token length will be 0.
1853	if (StringToks.empty() \|\| StringToks[0].getLength() < 2)
1854	return DiagnoseLexingError(SourceLocation());
1855
1856	// Scan all of the string portions, remember the max individual token length,
1857	// computing a bound on the concatenated string length, and see whether any
1858	// piece is a wide-string. If any of the string portions is a wide-string
1859	// literal, the result is a wide-string literal [C99 6.4.5p4].
1860	assert(!StringToks.empty() && "expected at least one token")(static_cast <bool> (!StringToks.empty() && "expected at least one token" ) ? void (0) : __assert_fail ("!StringToks.empty() && \"expected at least one token\"" , "clang/lib/Lex/LiteralSupport.cpp", 1860, __extension__ __PRETTY_FUNCTION__ ));
1861	MaxTokenLength = StringToks[0].getLength();
1862	assert(StringToks[0].getLength() >= 2 && "literal token is invalid!")(static_cast <bool> (StringToks[0].getLength() >= 2 && "literal token is invalid!") ? void (0) : __assert_fail ("StringToks[0].getLength() >= 2 && \"literal token is invalid!\"" , "clang/lib/Lex/LiteralSupport.cpp", 1862, __extension__ __PRETTY_FUNCTION__ ));
1863	SizeBound = StringToks[0].getLength()-2; // -2 for "".
1864	Kind = StringToks[0].getKind();
1865
1866	hadError = false;
1867
1868	// Implement Translation Phase #6: concatenation of string literals
1869	/// (C99 5.1.1.2p1). The common case is only one string fragment.
1870	for (const Token &Tok : StringToks) {
1871	if (Tok.getLength() < 2)
1872	return DiagnoseLexingError(Tok.getLocation());
1873
1874	// The string could be shorter than this if it needs cleaning, but this is a
1875	// reasonable bound, which is all we need.
1876	assert(Tok.getLength() >= 2 && "literal token is invalid!")(static_cast <bool> (Tok.getLength() >= 2 && "literal token is invalid!") ? void (0) : __assert_fail ("Tok.getLength() >= 2 && \"literal token is invalid!\"" , "clang/lib/Lex/LiteralSupport.cpp", 1876, __extension__ __PRETTY_FUNCTION__ ));
1877	SizeBound += Tok.getLength() - 2; // -2 for "".
1878
1879	// Remember maximum string piece length.
1880	if (Tok.getLength() > MaxTokenLength)
1881	MaxTokenLength = Tok.getLength();
1882
1883	// Remember if we see any wide or utf-8/16/32 strings.
1884	// Also check for illegal concatenations.
1885	if (Tok.isNot(Kind) && Tok.isNot(tok::string_literal)) {
1886	if (isOrdinary()) {
1887	Kind = Tok.getKind();
1888	} else {
1889	if (Diags)
1890	Diags->Report(Tok.getLocation(), diag::err_unsupported_string_concat);
1891	hadError = true;
1892	}
1893	}
1894	}
1895
1896	// Include space for the null terminator.
1897	++SizeBound;
1898
1899	// TODO: K&R warning: "traditional C rejects string constant concatenation"
1900
1901	// Get the width in bytes of char/wchar_t/char16_t/char32_t
1902	CharByteWidth = getCharWidth(Kind, Target);
1903	assert((CharByteWidth & 7) == 0 && "Assumes character size is byte multiple")(static_cast <bool> ((CharByteWidth & 7) == 0 && "Assumes character size is byte multiple") ? void (0) : __assert_fail ("(CharByteWidth & 7) == 0 && \"Assumes character size is byte multiple\"" , "clang/lib/Lex/LiteralSupport.cpp", 1903, __extension__ __PRETTY_FUNCTION__ ));
1904	CharByteWidth /= 8;
1905
1906	// The output buffer size needs to be large enough to hold wide characters.
1907	// This is a worst-case assumption which basically corresponds to L"" "long".
1908	SizeBound *= CharByteWidth;
1909
1910	// Size the temporary buffer to hold the result string data.
1911	ResultBuf.resize(SizeBound);
1912
1913	// Likewise, but for each string piece.
1914	SmallString<512> TokenBuf;
1915	TokenBuf.resize(MaxTokenLength);
1916
1917	// Loop over all the strings, getting their spelling, and expanding them to
1918	// wide strings as appropriate.
1919	ResultPtr = &ResultBuf[0]; // Next byte to fill in.
1920
1921	Pascal = false;
1922
1923	SourceLocation UDSuffixTokLoc;
1924
1925	for (unsigned i = 0, e = StringToks.size(); i != e; ++i) {
1926	const char *ThisTokBuf = &TokenBuf[0];
1927	// Get the spelling of the token, which eliminates trigraphs, etc. We know
1928	// that ThisTokBuf points to a buffer that is big enough for the whole token
1929	// and 'spelled' tokens can only shrink.
1930	bool StringInvalid = false;
1931	unsigned ThisTokLen =
1932	Lexer::getSpelling(StringToks[i], ThisTokBuf, SM, Features,
1933	&StringInvalid);
1934	if (StringInvalid)
1935	return DiagnoseLexingError(StringToks[i].getLocation());
1936
1937	const char *ThisTokBegin = ThisTokBuf;
1938	const char *ThisTokEnd = ThisTokBuf+ThisTokLen;
1939
1940	// Remove an optional ud-suffix.
1941	if (ThisTokEnd[-1] != '"') {
1942	const char *UDSuffixEnd = ThisTokEnd;
1943	do {
1944	--ThisTokEnd;
1945	} while (ThisTokEnd[-1] != '"');
1946
1947	StringRef UDSuffix(ThisTokEnd, UDSuffixEnd - ThisTokEnd);
1948
1949	if (UDSuffixBuf.empty()) {
1950	if (StringToks[i].hasUCN())
1951	expandUCNs(UDSuffixBuf, UDSuffix);
1952	else
1953	UDSuffixBuf.assign(UDSuffix);
1954	UDSuffixToken = i;
1955	UDSuffixOffset = ThisTokEnd - ThisTokBuf;
1956	UDSuffixTokLoc = StringToks[i].getLocation();
1957	} else {
1958	SmallString<32> ExpandedUDSuffix;
1959	if (StringToks[i].hasUCN()) {
1960	expandUCNs(ExpandedUDSuffix, UDSuffix);
1961	UDSuffix = ExpandedUDSuffix;
1962	}
1963
1964	// C++11 [lex.ext]p8: At the end of phase 6, if a string literal is the
1965	// result of a concatenation involving at least one user-defined-string-
1966	// literal, all the participating user-defined-string-literals shall
1967	// have the same ud-suffix.
1968	if (UDSuffixBuf != UDSuffix) {
1969	if (Diags) {
1970	SourceLocation TokLoc = StringToks[i].getLocation();
1971	Diags->Report(TokLoc, diag::err_string_concat_mixed_suffix)
1972	<< UDSuffixBuf << UDSuffix
1973	<< SourceRange(UDSuffixTokLoc, UDSuffixTokLoc)
1974	<< SourceRange(TokLoc, TokLoc);
1975	}
1976	hadError = true;
1977	}
1978	}
1979	}
1980
1981	// Strip the end quote.
1982	--ThisTokEnd;
1983
1984	// TODO: Input character set mapping support.
1985
1986	// Skip marker for wide or unicode strings.
1987	if (ThisTokBuf[0] == 'L' \|\| ThisTokBuf[0] == 'u' \|\| ThisTokBuf[0] == 'U') {
1988	++ThisTokBuf;
1989	// Skip 8 of u8 marker for utf8 strings.
1990	if (ThisTokBuf[0] == '8')
1991	++ThisTokBuf;
1992	}
1993
1994	// Check for raw string
1995	if (ThisTokBuf[0] == 'R') {
1996	if (ThisTokBuf[1] != '"') {
1997	// The file may have come from PCH and then changed after loading the
1998	// PCH; Fail gracefully.
1999	return DiagnoseLexingError(StringToks[i].getLocation());
2000	}
2001	ThisTokBuf += 2; // skip R"
2002
2003	// C++11 [lex.string]p2: A `d-char-sequence` shall consist of at most 16
2004	// characters.
2005	constexpr unsigned MaxRawStrDelimLen = 16;
2006
2007	const char *Prefix = ThisTokBuf;
2008	while (static_cast<unsigned>(ThisTokBuf - Prefix) < MaxRawStrDelimLen &&
2009	ThisTokBuf[0] != '(')
2010	++ThisTokBuf;
2011	if (ThisTokBuf[0] != '(')
2012	return DiagnoseLexingError(StringToks[i].getLocation());
2013	++ThisTokBuf; // skip '('
2014
2015	// Remove same number of characters from the end
2016	ThisTokEnd -= ThisTokBuf - Prefix;
2017	if (ThisTokEnd < ThisTokBuf)
2018	return DiagnoseLexingError(StringToks[i].getLocation());
2019
2020	// C++14 [lex.string]p4: A source-file new-line in a raw string literal
2021	// results in a new-line in the resulting execution string-literal.
2022	StringRef RemainingTokenSpan(ThisTokBuf, ThisTokEnd - ThisTokBuf);
2023	while (!RemainingTokenSpan.empty()) {
2024	// Split the string literal on \r\n boundaries.
2025	size_t CRLFPos = RemainingTokenSpan.find("\r\n");
2026	StringRef BeforeCRLF = RemainingTokenSpan.substr(0, CRLFPos);
2027	StringRef AfterCRLF = RemainingTokenSpan.substr(CRLFPos);
2028
2029	// Copy everything before the \r\n sequence into the string literal.
2030	if (CopyStringFragment(StringToks[i], ThisTokBegin, BeforeCRLF))
2031	hadError = true;
2032
2033	// Point into the \n inside the \r\n sequence and operate on the
2034	// remaining portion of the literal.
2035	RemainingTokenSpan = AfterCRLF.substr(1);
2036	}
2037	} else {
2038	if (ThisTokBuf[0] != '"') {
2039	// The file may have come from PCH and then changed after loading the
2040	// PCH; Fail gracefully.
2041	return DiagnoseLexingError(StringToks[i].getLocation());
2042	}
2043	++ThisTokBuf; // skip "
2044
2045	// Check if this is a pascal string
2046	if (Features.PascalStrings && ThisTokBuf + 1 != ThisTokEnd &&
2047	ThisTokBuf[0] == '\\' && ThisTokBuf[1] == 'p') {
2048
2049	// If the \p sequence is found in the first token, we have a pascal string
2050	// Otherwise, if we already have a pascal string, ignore the first \p
2051	if (i == 0) {
2052	++ThisTokBuf;
2053	Pascal = true;
2054	} else if (Pascal)
2055	ThisTokBuf += 2;
2056	}
2057
2058	while (ThisTokBuf != ThisTokEnd) {
2059	// Is this a span of non-escape characters?
2060	if (ThisTokBuf[0] != '\\') {
2061	const char *InStart = ThisTokBuf;
2062	do {
2063	++ThisTokBuf;
2064	} while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');
2065
2066	// Copy the character span over.
2067	if (CopyStringFragment(StringToks[i], ThisTokBegin,
2068	StringRef(InStart, ThisTokBuf - InStart)))
2069	hadError = true;
2070	continue;
2071	}
2072	// Is this a Universal Character Name escape?
2073	if (ThisTokBuf[1] == 'u' \|\| ThisTokBuf[1] == 'U' \|\|
2074	ThisTokBuf[1] == 'N') {
2075	EncodeUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd,
2076	ResultPtr, hadError,
2077	FullSourceLoc(StringToks[i].getLocation(), SM),
2078	CharByteWidth, Diags, Features);
2079	continue;
2080	}
2081	// Otherwise, this is a non-UCN escape character. Process it.
2082	unsigned ResultChar =
2083	ProcessCharEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, hadError,
2084	FullSourceLoc(StringToks[i].getLocation(), SM),
2085	CharByteWidth*8, Diags, Features);
2086
2087	if (CharByteWidth == 4) {
2088	// FIXME: Make the type of the result buffer correct instead of
2089	// using reinterpret_cast.
2090	llvm::UTF32 ResultWidePtr = reinterpret_cast<llvm::UTF32>(ResultPtr);
2091	*ResultWidePtr = ResultChar;
2092	ResultPtr += 4;
2093	} else if (CharByteWidth == 2) {
2094	// FIXME: Make the type of the result buffer correct instead of
2095	// using reinterpret_cast.
2096	llvm::UTF16 ResultWidePtr = reinterpret_cast<llvm::UTF16>(ResultPtr);
2097	*ResultWidePtr = ResultChar & 0xFFFF;
2098	ResultPtr += 2;
2099	} else {
2100	assert(CharByteWidth == 1 && "Unexpected char width")(static_cast <bool> (CharByteWidth == 1 && "Unexpected char width" ) ? void (0) : __assert_fail ("CharByteWidth == 1 && \"Unexpected char width\"" , "clang/lib/Lex/LiteralSupport.cpp", 2100, __extension__ __PRETTY_FUNCTION__ ));
2101	*ResultPtr++ = ResultChar & 0xFF;
2102	}
2103	}
2104	}
2105	}
2106
2107	if (Pascal) {
2108	if (CharByteWidth == 4) {
2109	// FIXME: Make the type of the result buffer correct instead of
2110	// using reinterpret_cast.
2111	llvm::UTF32 ResultWidePtr = reinterpret_cast<llvm::UTF32>(ResultBuf.data());
2112	ResultWidePtr[0] = GetNumStringChars() - 1;
2113	} else if (CharByteWidth == 2) {
2114	// FIXME: Make the type of the result buffer correct instead of
2115	// using reinterpret_cast.
2116	llvm::UTF16 ResultWidePtr = reinterpret_cast<llvm::UTF16>(ResultBuf.data());
2117	ResultWidePtr[0] = GetNumStringChars() - 1;
2118	} else {
2119	assert(CharByteWidth == 1 && "Unexpected char width")(static_cast <bool> (CharByteWidth == 1 && "Unexpected char width" ) ? void (0) : __assert_fail ("CharByteWidth == 1 && \"Unexpected char width\"" , "clang/lib/Lex/LiteralSupport.cpp", 2119, __extension__ __PRETTY_FUNCTION__ ));
2120	ResultBuf[0] = GetNumStringChars() - 1;
2121	}
2122
2123	// Verify that pascal strings aren't too large.
2124	if (GetStringLength() > 256) {
2125	if (Diags)
2126	Diags->Report(StringToks.front().getLocation(),
2127	diag::err_pascal_string_too_long)
2128	<< SourceRange(StringToks.front().getLocation(),
2129	StringToks.back().getLocation());
2130	hadError = true;
2131	return;
2132	}
2133	} else if (Diags) {
2134	// Complain if this string literal has too many characters.
2135	unsigned MaxChars = Features.CPlusPlus? 65536 : Features.C99 ? 4095 : 509;
2136
2137	if (GetNumStringChars() > MaxChars)
2138	Diags->Report(StringToks.front().getLocation(),
2139	diag::ext_string_too_long)
2140	<< GetNumStringChars() << MaxChars
2141	<< (Features.CPlusPlus ? 2 : Features.C99 ? 1 : 0)
2142	<< SourceRange(StringToks.front().getLocation(),
2143	StringToks.back().getLocation());
2144	}
2145	}
2146
2147	static const char resyncUTF8(const char Err, const char *End) {
2148	if (Err == End)
2149	return End;
2150	End = Err + std::min<unsigned>(llvm::getNumBytesForUTF8(*Err), End-Err);
2151	while (++Err != End && (*Err & 0xC0) == 0x80)
2152	;
2153	return Err;
2154	}
2155
2156	/// This function copies from Fragment, which is a sequence of bytes
2157	/// within Tok's contents (which begin at TokBegin) into ResultPtr.
2158	/// Performs widening for multi-byte characters.
2159	bool StringLiteralParser::CopyStringFragment(const Token &Tok,
2160	const char *TokBegin,
2161	StringRef Fragment) {
2162	const llvm::UTF8 *ErrorPtrTmp;
2163	if (ConvertUTF8toWide(CharByteWidth, Fragment, ResultPtr, ErrorPtrTmp))
2164	return false;
2165
2166	// If we see bad encoding for unprefixed string literals, warn and
2167	// simply copy the byte values, for compatibility with gcc and older
2168	// versions of clang.
2169	bool NoErrorOnBadEncoding = isOrdinary();
2170	if (NoErrorOnBadEncoding) {
2171	memcpy(ResultPtr, Fragment.data(), Fragment.size());
2172	ResultPtr += Fragment.size();
2173	}
2174
2175	if (Diags) {
2176	const char ErrorPtr = reinterpret_cast<const char >(ErrorPtrTmp);
2177
2178	FullSourceLoc SourceLoc(Tok.getLocation(), SM);
2179	const DiagnosticBuilder &Builder =
2180	Diag(Diags, Features, SourceLoc, TokBegin,
2181	ErrorPtr, resyncUTF8(ErrorPtr, Fragment.end()),
2182	NoErrorOnBadEncoding ? diag::warn_bad_string_encoding
2183	: diag::err_bad_string_encoding);
2184
2185	const char *NextStart = resyncUTF8(ErrorPtr, Fragment.end());
2186	StringRef NextFragment(NextStart, Fragment.end()-NextStart);
2187
2188	// Decode into a dummy buffer.
2189	SmallString<512> Dummy;
2190	Dummy.reserve(Fragment.size() * CharByteWidth);
2191	char *Ptr = Dummy.data();
2192
2193	while (!ConvertUTF8toWide(CharByteWidth, NextFragment, Ptr, ErrorPtrTmp)) {
2194	const char ErrorPtr = reinterpret_cast<const char >(ErrorPtrTmp);
2195	NextStart = resyncUTF8(ErrorPtr, Fragment.end());
2196	Builder << MakeCharSourceRange(Features, SourceLoc, TokBegin,
2197	ErrorPtr, NextStart);
2198	NextFragment = StringRef(NextStart, Fragment.end()-NextStart);
2199	}
2200	}
2201	return !NoErrorOnBadEncoding;
2202	}
2203
2204	void StringLiteralParser::DiagnoseLexingError(SourceLocation Loc) {
2205	hadError = true;
2206	if (Diags)
2207	Diags->Report(Loc, diag::err_lexing_string);
2208	}
2209
2210	/// getOffsetOfStringByte - This function returns the offset of the
2211	/// specified byte of the string data represented by Token. This handles
2212	/// advancing over escape sequences in the string.
2213	unsigned StringLiteralParser::getOffsetOfStringByte(const Token &Tok,
2214	unsigned ByteNo) const {
2215	// Get the spelling of the token.
2216	SmallString<32> SpellingBuffer;
2217	SpellingBuffer.resize(Tok.getLength());
2218
2219	bool StringInvalid = false;
2220	const char *SpellingPtr = &SpellingBuffer[0];
2221	unsigned TokLen = Lexer::getSpelling(Tok, SpellingPtr, SM, Features,
2222	&StringInvalid);
2223	if (StringInvalid)
2224	return 0;
2225
2226	const char *SpellingStart = SpellingPtr;
2227	const char *SpellingEnd = SpellingPtr+TokLen;
2228
2229	// Handle UTF-8 strings just like narrow strings.
2230	if (SpellingPtr[0] == 'u' && SpellingPtr[1] == '8')
2231	SpellingPtr += 2;
2232
2233	assert(SpellingPtr[0] != 'L' && SpellingPtr[0] != 'u' &&(static_cast <bool> (SpellingPtr[0] != 'L' && SpellingPtr [0] != 'u' && SpellingPtr[0] != 'U' && "Doesn't handle wide or utf strings yet" ) ? void (0) : __assert_fail ("SpellingPtr[0] != 'L' && SpellingPtr[0] != 'u' && SpellingPtr[0] != 'U' && \"Doesn't handle wide or utf strings yet\"" , "clang/lib/Lex/LiteralSupport.cpp", 2234, __extension__ __PRETTY_FUNCTION__ ))
2234	SpellingPtr[0] != 'U' && "Doesn't handle wide or utf strings yet")(static_cast <bool> (SpellingPtr[0] != 'L' && SpellingPtr [0] != 'u' && SpellingPtr[0] != 'U' && "Doesn't handle wide or utf strings yet" ) ? void (0) : __assert_fail ("SpellingPtr[0] != 'L' && SpellingPtr[0] != 'u' && SpellingPtr[0] != 'U' && \"Doesn't handle wide or utf strings yet\"" , "clang/lib/Lex/LiteralSupport.cpp", 2234, __extension__ __PRETTY_FUNCTION__ ));
2235
2236	// For raw string literals, this is easy.
2237	if (SpellingPtr[0] == 'R') {
2238	assert(SpellingPtr[1] == '"' && "Should be a raw string literal!")(static_cast <bool> (SpellingPtr[1] == '"' && "Should be a raw string literal!" ) ? void (0) : __assert_fail ("SpellingPtr[1] == '\"' && \"Should be a raw string literal!\"" , "clang/lib/Lex/LiteralSupport.cpp", 2238, __extension__ __PRETTY_FUNCTION__ ));
2239	// Skip 'R"'.
2240	SpellingPtr += 2;
2241	while (*SpellingPtr != '(') {
2242	++SpellingPtr;
2243	assert(SpellingPtr < SpellingEnd && "Missing ( for raw string literal")(static_cast <bool> (SpellingPtr < SpellingEnd && "Missing ( for raw string literal") ? void (0) : __assert_fail ("SpellingPtr < SpellingEnd && \"Missing ( for raw string literal\"" , "clang/lib/Lex/LiteralSupport.cpp", 2243, __extension__ __PRETTY_FUNCTION__ ));
2244	}
2245	// Skip '('.
2246	++SpellingPtr;
2247	return SpellingPtr - SpellingStart + ByteNo;
2248	}
2249
2250	// Skip over the leading quote
2251	assert(SpellingPtr[0] == '"' && "Should be a string literal!")(static_cast <bool> (SpellingPtr[0] == '"' && "Should be a string literal!" ) ? void (0) : __assert_fail ("SpellingPtr[0] == '\"' && \"Should be a string literal!\"" , "clang/lib/Lex/LiteralSupport.cpp", 2251, __extension__ __PRETTY_FUNCTION__ ));
2252	++SpellingPtr;
2253
2254	// Skip over bytes until we find the offset we're looking for.
2255	while (ByteNo) {
2256	assert(SpellingPtr < SpellingEnd && "Didn't find byte offset!")(static_cast <bool> (SpellingPtr < SpellingEnd && "Didn't find byte offset!") ? void (0) : __assert_fail ("SpellingPtr < SpellingEnd && \"Didn't find byte offset!\"" , "clang/lib/Lex/LiteralSupport.cpp", 2256, __extension__ __PRETTY_FUNCTION__ ));
2257
2258	// Step over non-escapes simply.
2259	if (*SpellingPtr != '\\') {
2260	++SpellingPtr;
2261	--ByteNo;
2262	continue;
2263	}
2264
2265	// Otherwise, this is an escape character. Advance over it.
2266	bool HadError = false;
2267	if (SpellingPtr[1] == 'u' \|\| SpellingPtr[1] == 'U' \|\|
2268	SpellingPtr[1] == 'N') {
2269	const char *EscapePtr = SpellingPtr;
2270	unsigned Len = MeasureUCNEscape(SpellingStart, SpellingPtr, SpellingEnd,
2271	1, Features, HadError);
2272	if (Len > ByteNo) {
2273	// ByteNo is somewhere within the escape sequence.
2274	SpellingPtr = EscapePtr;
2275	break;
2276	}
2277	ByteNo -= Len;
2278	} else {
2279	ProcessCharEscape(SpellingStart, SpellingPtr, SpellingEnd, HadError,
2280	FullSourceLoc(Tok.getLocation(), SM),
2281	CharByteWidth*8, Diags, Features);
2282	--ByteNo;
2283	}
2284	assert(!HadError && "This method isn't valid on erroneous strings")(static_cast <bool> (!HadError && "This method isn't valid on erroneous strings" ) ? void (0) : __assert_fail ("!HadError && \"This method isn't valid on erroneous strings\"" , "clang/lib/Lex/LiteralSupport.cpp", 2284, __extension__ __PRETTY_FUNCTION__ ));
2285	}
2286
2287	return SpellingPtr-SpellingStart;
2288	}
2289
2290	/// Determine whether a suffix is a valid ud-suffix. We avoid treating reserved
2291	/// suffixes as ud-suffixes, because the diagnostic experience is better if we
2292	/// treat it as an invalid suffix.
2293	bool StringLiteralParser::isValidUDSuffix(const LangOptions &LangOpts,
2294	StringRef Suffix) {
2295	return NumericLiteralParser::isValidUDSuffix(LangOpts, Suffix) \|\|
2296	Suffix == "sv";
2297	}