Bug Summary

File:projects/compiler-rt/lib/xray/xray_fdr_logging.cc
Warning:line 445, column 7
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name xray_fdr_logging.cc -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -mrelocation-model pic -pic-level 2 -mthread-model posix -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-8/lib/clang/8.0.0 -D XRAY_HAS_EXCEPTIONS=1 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-8~svn345461/build-llvm/projects/compiler-rt/lib/xray -I /build/llvm-toolchain-snapshot-8~svn345461/projects/compiler-rt/lib/xray -I /build/llvm-toolchain-snapshot-8~svn345461/build-llvm/include -I /build/llvm-toolchain-snapshot-8~svn345461/include -I /build/llvm-toolchain-snapshot-8~svn345461/projects/compiler-rt/lib/xray/.. -I /build/llvm-toolchain-snapshot-8~svn345461/projects/compiler-rt/lib/xray/../../include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/include/clang/8.0.0/include/ -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-8/lib/clang/8.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -Wno-unused-parameter -Wno-variadic-macros -Wno-non-virtual-dtor -std=c++11 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-8~svn345461/build-llvm/projects/compiler-rt/lib/xray -ferror-limit 19 -fmessage-length 0 -fvisibility hidden -fvisibility-inlines-hidden -fno-builtin -fno-rtti -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -o /tmp/scan-build-2018-10-27-211344-32123-1 -x c++ /build/llvm-toolchain-snapshot-8~svn345461/projects/compiler-rt/lib/xray/xray_fdr_logging.cc -faddrsig
1//===-- xray_fdr_logging.cc ------------------------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file is a part of XRay, a dynamic runtime instrumentation system.
11//
12// Here we implement the Flight Data Recorder mode for XRay, where we use
13// compact structures to store records in memory as well as when writing out the
14// data to files.
15//
16//===----------------------------------------------------------------------===//
17#include "xray_fdr_logging.h"
18#include <cassert>
19#include <errno(*__errno_location ()).h>
20#include <limits>
21#include <memory>
22#include <pthread.h>
23#include <sys/syscall.h>
24#include <sys/time.h>
25#include <time.h>
26#include <unistd.h>
27
28#include "sanitizer_common/sanitizer_allocator_internal.h"
29#include "sanitizer_common/sanitizer_atomic.h"
30#include "sanitizer_common/sanitizer_common.h"
31#include "xray/xray_interface.h"
32#include "xray/xray_records.h"
33#include "xray_allocator.h"
34#include "xray_buffer_queue.h"
35#include "xray_defs.h"
36#include "xray_fdr_flags.h"
37#include "xray_fdr_log_writer.h"
38#include "xray_flags.h"
39#include "xray_recursion_guard.h"
40#include "xray_tsc.h"
41#include "xray_utils.h"
42
43namespace __xray {
44
45static atomic_sint32_t LoggingStatus = {
46 XRayLogInitStatus::XRAY_LOG_UNINITIALIZED};
47
48namespace {
49// Group together thread-local-data in a struct, then hide it behind a function
50// call so that it can be initialized on first use instead of as a global. We
51// force the alignment to 64-bytes for x86 cache line alignment, as this
52// structure is used in the hot path of implementation.
53struct alignas(64) ThreadLocalData {
54 BufferQueue::Buffer Buffer{};
55 char *RecordPtr = nullptr;
56 // The number of FunctionEntry records immediately preceding RecordPtr.
57 uint8_t NumConsecutiveFnEnters = 0;
58
59 // The number of adjacent, consecutive pairs of FunctionEntry, Tail Exit
60 // records preceding RecordPtr.
61 uint8_t NumTailCalls = 0;
62
63 // We use a thread_local variable to keep track of which CPUs we've already
64 // run, and the TSC times for these CPUs. This allows us to stop repeating the
65 // CPU field in the function records.
66 //
67 // We assume that we'll support only 65536 CPUs for x86_64.
68 uint16_t CurrentCPU = std::numeric_limits<uint16_t>::max();
69 uint64_t LastTSC = 0;
70 uint64_t LastFunctionEntryTSC = 0;
71
72 // Make sure a thread that's ever called handleArg0 has a thread-local
73 // live reference to the buffer queue for this particular instance of
74 // FDRLogging, and that we're going to clean it up when the thread exits.
75 BufferQueue *BQ = nullptr;
76};
77} // namespace
78
79static_assert(std::is_trivially_destructible<ThreadLocalData>::value,
80 "ThreadLocalData must be trivially destructible");
81
82static constexpr auto MetadataRecSize = sizeof(MetadataRecord);
83static constexpr auto FunctionRecSize = sizeof(FunctionRecord);
84
85// Use a global pthread key to identify thread-local data for logging.
86static pthread_key_t Key;
87
88// Global BufferQueue.
89static std::aligned_storage<sizeof(BufferQueue)>::type BufferQueueStorage;
90static BufferQueue *BQ = nullptr;
91
92static atomic_sint32_t LogFlushStatus = {
93 XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING};
94
95// This function will initialize the thread-local data structure used by the FDR
96// logging implementation and return a reference to it. The implementation
97// details require a bit of care to maintain.
98//
99// First, some requirements on the implementation in general:
100//
101// - XRay handlers should not call any memory allocation routines that may
102// delegate to an instrumented implementation. This means functions like
103// malloc() and free() should not be called while instrumenting.
104//
105// - We would like to use some thread-local data initialized on first-use of
106// the XRay instrumentation. These allow us to implement unsynchronized
107// routines that access resources associated with the thread.
108//
109// The implementation here uses a few mechanisms that allow us to provide both
110// the requirements listed above. We do this by:
111//
112// 1. Using a thread-local aligned storage buffer for representing the
113// ThreadLocalData struct. This data will be uninitialized memory by
114// design.
115//
116// 2. Not requiring a thread exit handler/implementation, keeping the
117// thread-local as purely a collection of references/data that do not
118// require cleanup.
119//
120// We're doing this to avoid using a `thread_local` object that has a
121// non-trivial destructor, because the C++ runtime might call std::malloc(...)
122// to register calls to destructors. Deadlocks may arise when, for example, an
123// externally provided malloc implementation is XRay instrumented, and
124// initializing the thread-locals involves calling into malloc. A malloc
125// implementation that does global synchronization might be holding a lock for a
126// critical section, calling a function that might be XRay instrumented (and
127// thus in turn calling into malloc by virtue of registration of the
128// thread_local's destructor).
129static_assert(alignof(ThreadLocalData) >= 64,
130 "ThreadLocalData must be cache line aligned.");
131static ThreadLocalData &getThreadLocalData() {
132 thread_local typename std::aligned_storage<
133 sizeof(ThreadLocalData), alignof(ThreadLocalData)>::type TLDStorage{};
134
135 if (pthread_getspecific(Key) == NULL__null) {
4
Assuming the condition is true
5
Taking true branch
136 new (reinterpret_cast<ThreadLocalData *>(&TLDStorage)) ThreadLocalData{};
137 pthread_setspecific(Key, &TLDStorage);
6
Value assigned to field 'BQ'
138 }
139
140 return *reinterpret_cast<ThreadLocalData *>(&TLDStorage);
141}
142
143static void writeNewBufferPreamble(tid_t Tid, timespec TS,
144 pid_t Pid) XRAY_NEVER_INSTRUMENT {
145 static_assert(sizeof(time_t) <= 8, "time_t needs to be at most 8 bytes");
146 auto &TLD = getThreadLocalData();
147 MetadataRecord Metadata[] = {
148 // Write out a MetadataRecord to signify that this is the start of a new
149 // buffer, associated with a particular thread, with a new CPU. For the
150 // data, we have 15 bytes to squeeze as much information as we can. At
151 // this point we only write down the following bytes:
152 // - Thread ID (tid_t, cast to 4 bytes type due to Darwin being 8 bytes)
153 createMetadataRecord<MetadataRecord::RecordKinds::NewBuffer>(
154 static_cast<int32_t>(Tid)),
155
156 // Also write the WalltimeMarker record. We only really need microsecond
157 // precision here, and enforce across platforms that we need 64-bit
158 // seconds and 32-bit microseconds encoded in the Metadata record.
159 createMetadataRecord<MetadataRecord::RecordKinds::WalltimeMarker>(
160 static_cast<int64_t>(TS.tv_sec),
161 static_cast<int32_t>(TS.tv_nsec / 1000)),
162
163 // Also write the Pid record.
164 createMetadataRecord<MetadataRecord::RecordKinds::Pid>(
165 static_cast<int32_t>(Pid)),
166 };
167
168 TLD.NumConsecutiveFnEnters = 0;
169 TLD.NumTailCalls = 0;
170 if (TLD.BQ == nullptr || TLD.BQ->finalizing())
171 return;
172 FDRLogWriter Writer(TLD.Buffer);
173 TLD.RecordPtr += Writer.writeMetadataRecords(Metadata);
174}
175
176static void setupNewBuffer(int (*wall_clock_reader)(
177 clockid_t, struct timespec *)) XRAY_NEVER_INSTRUMENT {
178 auto &TLD = getThreadLocalData();
179 auto &B = TLD.Buffer;
180 TLD.RecordPtr = static_cast<char *>(B.Data);
181 atomic_store(&B.Extents, 0, memory_order_release);
182 tid_t Tid = GetTid();
183 timespec TS{0, 0};
184 pid_t Pid = internal_getpid();
185 // This is typically clock_gettime, but callers have injection ability.
186 wall_clock_reader(CLOCK_MONOTONIC1, &TS);
187 writeNewBufferPreamble(Tid, TS, Pid);
188 TLD.NumConsecutiveFnEnters = 0;
189 TLD.NumTailCalls = 0;
190}
191
192static void incrementExtents(size_t Add) {
193 auto &TLD = getThreadLocalData();
194 atomic_fetch_add(&TLD.Buffer.Extents, Add, memory_order_acq_rel);
195}
196
197static void decrementExtents(size_t Subtract) {
198 auto &TLD = getThreadLocalData();
199 atomic_fetch_sub(&TLD.Buffer.Extents, Subtract, memory_order_acq_rel);
200}
201
202static void writeNewCPUIdMetadata(uint16_t CPU,
203 uint64_t TSC) XRAY_NEVER_INSTRUMENT {
204 auto &TLD = getThreadLocalData();
205 FDRLogWriter W(TLD.Buffer, TLD.RecordPtr);
206
207 // The data for the New CPU will contain the following bytes:
208 // - CPU ID (uint16_t, 2 bytes)
209 // - Full TSC (uint64_t, 8 bytes)
210 // Total = 10 bytes.
211 W.writeMetadata<MetadataRecord::RecordKinds::NewCPUId>(CPU, TSC);
212 TLD.RecordPtr = W.getNextRecord();
213 TLD.NumConsecutiveFnEnters = 0;
214 TLD.NumTailCalls = 0;
215}
216
217static void writeTSCWrapMetadata(uint64_t TSC) XRAY_NEVER_INSTRUMENT {
218 auto &TLD = getThreadLocalData();
219 FDRLogWriter W(TLD.Buffer, TLD.RecordPtr);
220
221 // The data for the TSCWrap record contains the following bytes:
222 // - Full TSC (uint64_t, 8 bytes)
223 // Total = 8 bytes.
224 W.writeMetadata<MetadataRecord::RecordKinds::TSCWrap>(TSC);
225 TLD.RecordPtr = W.getNextRecord();
226 TLD.NumConsecutiveFnEnters = 0;
227 TLD.NumTailCalls = 0;
228}
229
230// Call Argument metadata records store the arguments to a function in the
231// order of their appearance; holes are not supported by the buffer format.
232static void writeCallArgumentMetadata(uint64_t A) XRAY_NEVER_INSTRUMENT {
233 auto &TLD = getThreadLocalData();
234 FDRLogWriter W(TLD.Buffer, TLD.RecordPtr);
235 W.writeMetadata<MetadataRecord::RecordKinds::CallArgument>(A);
236 TLD.RecordPtr = W.getNextRecord();
237}
238
239static void writeFunctionRecord(int32_t FuncId, uint32_t TSCDelta,
240 XRayEntryType EntryType) XRAY_NEVER_INSTRUMENT {
241 constexpr int32_t MaxFuncId = (1 << 29) - 1;
242 if (UNLIKELY(FuncId > MaxFuncId)__builtin_expect(!!(FuncId > MaxFuncId), 0)) {
243 if (Verbosity())
244 Report("Warning: Function ID '%d' > max function id: '%d'", FuncId,
245 MaxFuncId);
246 return;
247 }
248
249 auto &TLD = getThreadLocalData();
250 FDRLogWriter W(TLD.Buffer, TLD.RecordPtr);
251
252 // Only take 28 bits of the function id.
253 //
254 // We need to be careful about the sign bit and the bitwise operations being
255 // performed here. In effect, we want to truncate the value of the function id
256 // to the first 28 bits. To do this properly, this means we need to mask the
257 // function id with (2 ^ 28) - 1 == 0x0fffffff.
258 //
259 auto TruncatedId = FuncId & MaxFuncId;
260 auto Kind = FDRLogWriter::FunctionRecordKind::Enter;
261
262 switch (EntryType) {
263 case XRayEntryType::ENTRY:
264 ++TLD.NumConsecutiveFnEnters;
265 break;
266 case XRayEntryType::LOG_ARGS_ENTRY:
267 // We should not rewind functions with logged args.
268 TLD.NumConsecutiveFnEnters = 0;
269 TLD.NumTailCalls = 0;
270 Kind = FDRLogWriter::FunctionRecordKind::EnterArg;
271 break;
272 case XRayEntryType::EXIT:
273 // If we've decided to log the function exit, we will never erase the log
274 // before it.
275 TLD.NumConsecutiveFnEnters = 0;
276 TLD.NumTailCalls = 0;
277 Kind = FDRLogWriter::FunctionRecordKind::Exit;
278 break;
279 case XRayEntryType::TAIL:
280 // If we just entered the function we're tail exiting from or erased every
281 // invocation since then, this function entry tail pair is a candidate to
282 // be erased when the child function exits.
283 if (TLD.NumConsecutiveFnEnters > 0) {
284 ++TLD.NumTailCalls;
285 TLD.NumConsecutiveFnEnters = 0;
286 } else {
287 // We will never be able to erase this tail call since we have logged
288 // something in between the function entry and tail exit.
289 TLD.NumTailCalls = 0;
290 TLD.NumConsecutiveFnEnters = 0;
291 }
292 Kind = FDRLogWriter::FunctionRecordKind::TailExit;
293 break;
294 case XRayEntryType::CUSTOM_EVENT: {
295 // This is a bug in patching, so we'll report it once and move on.
296 static atomic_uint8_t ErrorLatch{0};
297 if (!atomic_exchange(&ErrorLatch, 1, memory_order_acq_rel))
298 Report("Internal error: patched an XRay custom event call as a function; "
299 "func id = %d\n",
300 FuncId);
301 return;
302 }
303 case XRayEntryType::TYPED_EVENT: {
304 static atomic_uint8_t ErrorLatch{0};
305 if (!atomic_exchange(&ErrorLatch, 1, memory_order_acq_rel))
306 Report("Internal error: patched an XRay typed event call as a function; "
307 "func id = %d\n",
308 FuncId);
309 return;
310 }
311 }
312
313 W.writeFunction(Kind, TruncatedId, TSCDelta);
314 TLD.RecordPtr = W.getNextRecord();
315}
316
317static atomic_uint64_t TicksPerSec{0};
318static atomic_uint64_t ThresholdTicks{0};
319
320// Re-point the thread local pointer into this thread's Buffer before the recent
321// "Function Entry" record and any "Tail Call Exit" records after that.
322static void rewindRecentCall(uint64_t TSC, uint64_t &LastTSC,
323 uint64_t &LastFunctionEntryTSC,
324 int32_t FuncId) XRAY_NEVER_INSTRUMENT {
325 auto &TLD = getThreadLocalData();
326 TLD.RecordPtr -= FunctionRecSize;
327 decrementExtents(FunctionRecSize);
328 FunctionRecord FuncRecord;
329 internal_memcpy(&FuncRecord, TLD.RecordPtr, FunctionRecSize);
330 DCHECK(FuncRecord.RecordKind ==
331 uint8_t(FunctionRecord::RecordKinds::FunctionEnter) &&
332 "Expected to find function entry recording when rewinding.");
333 DCHECK(FuncRecord.FuncId == (FuncId & ~(0x0F << 28)) &&
334 "Expected matching function id when rewinding Exit");
335 --TLD.NumConsecutiveFnEnters;
336 LastTSC -= FuncRecord.TSCDelta;
337
338 // We unwound one call. Update the state and return without writing a log.
339 if (TLD.NumConsecutiveFnEnters != 0) {
340 LastFunctionEntryTSC -= FuncRecord.TSCDelta;
341 return;
342 }
343
344 // Otherwise we've rewound the stack of all function entries, we might be
345 // able to rewind further by erasing tail call functions that are being
346 // exited from via this exit.
347 LastFunctionEntryTSC = 0;
348 auto RewindingTSC = LastTSC;
349 auto RewindingRecordPtr = TLD.RecordPtr - FunctionRecSize;
350 while (TLD.NumTailCalls > 0) {
351 // Rewind the TSC back over the TAIL EXIT record.
352 FunctionRecord ExpectedTailExit;
353 internal_memcpy(&ExpectedTailExit, RewindingRecordPtr, FunctionRecSize);
354
355 DCHECK(ExpectedTailExit.RecordKind ==
356 uint8_t(FunctionRecord::RecordKinds::FunctionTailExit) &&
357 "Expected to find tail exit when rewinding.");
358 RewindingRecordPtr -= FunctionRecSize;
359 RewindingTSC -= ExpectedTailExit.TSCDelta;
360 FunctionRecord ExpectedFunctionEntry;
361 internal_memcpy(&ExpectedFunctionEntry, RewindingRecordPtr,
362 FunctionRecSize);
363 DCHECK(ExpectedFunctionEntry.RecordKind ==
364 uint8_t(FunctionRecord::RecordKinds::FunctionEnter) &&
365 "Expected to find function entry when rewinding tail call.");
366 DCHECK(ExpectedFunctionEntry.FuncId == ExpectedTailExit.FuncId &&
367 "Expected funcids to match when rewinding tail call.");
368
369 // This tail call exceeded the threshold duration. It will not be erased.
370 if ((TSC - RewindingTSC) >= atomic_load_relaxed(&ThresholdTicks)) {
371 TLD.NumTailCalls = 0;
372 return;
373 }
374
375 // We can erase a tail exit pair that we're exiting through since
376 // its duration is under threshold.
377 --TLD.NumTailCalls;
378 RewindingRecordPtr -= FunctionRecSize;
379 RewindingTSC -= ExpectedFunctionEntry.TSCDelta;
380 TLD.RecordPtr -= 2 * FunctionRecSize;
381 LastTSC = RewindingTSC;
382 decrementExtents(2 * FunctionRecSize);
383 }
384}
385
386static bool releaseThreadLocalBuffer(BufferQueue &BQArg) {
387 auto &TLD = getThreadLocalData();
388 auto EC = BQArg.releaseBuffer(TLD.Buffer);
389 if (TLD.Buffer.Data == nullptr)
390 return true;
391
392 if (EC != BufferQueue::ErrorCode::Ok) {
393 Report("Failed to release buffer at %p; error=%s\n", TLD.Buffer.Data,
394 BufferQueue::getErrorString(EC));
395 return false;
396 }
397 return true;
398}
399
400static bool prepareBuffer(uint64_t TSC, unsigned char CPU,
401 int (*wall_clock_reader)(clockid_t,
402 struct timespec *),
403 size_t MaxSize) XRAY_NEVER_INSTRUMENT {
404 auto &TLD = getThreadLocalData();
405 char *BufferStart = static_cast<char *>(TLD.Buffer.Data);
406 if ((TLD.RecordPtr + MaxSize) > (BufferStart + TLD.Buffer.Size)) {
407 if (!releaseThreadLocalBuffer(*TLD.BQ))
408 return false;
409 auto EC = TLD.BQ->getBuffer(TLD.Buffer);
410 if (EC != BufferQueue::ErrorCode::Ok) {
411 Report("Failed to prepare a buffer; error = '%s'\n",
412 BufferQueue::getErrorString(EC));
413 return false;
414 }
415 setupNewBuffer(wall_clock_reader);
416
417 // Always write the CPU metadata as the first record in the buffer.
418 writeNewCPUIdMetadata(CPU, TSC);
419 }
420 return true;
421}
422
423static bool
424isLogInitializedAndReady(BufferQueue *LBQ, uint64_t TSC, unsigned char CPU,
425 int (*wall_clock_reader)(clockid_t, struct timespec *))
426 XRAY_NEVER_INSTRUMENT {
427 // Bail out right away if logging is not initialized yet.
428 // We should take the opportunity to release the buffer though.
429 auto Status = atomic_load(&LoggingStatus, memory_order_acquire);
430 auto &TLD = getThreadLocalData();
431 if (Status != XRayLogInitStatus::XRAY_LOG_INITIALIZED) {
14
Assuming 'Status' is equal to XRAY_LOG_INITIALIZED
15
Taking false branch
432 if (TLD.RecordPtr != nullptr &&
433 (Status == XRayLogInitStatus::XRAY_LOG_FINALIZING ||
434 Status == XRayLogInitStatus::XRAY_LOG_FINALIZED)) {
435 if (!releaseThreadLocalBuffer(*LBQ))
436 return false;
437 TLD.RecordPtr = nullptr;
438 return false;
439 }
440 return false;
441 }
442
443 if (atomic_load(&LoggingStatus, memory_order_acquire) !=
444 XRayLogInitStatus::XRAY_LOG_INITIALIZED ||
445 LBQ->finalizing()) {
16
Called C++ object pointer is null
446 if (!releaseThreadLocalBuffer(*LBQ))
447 return false;
448 TLD.RecordPtr = nullptr;
449 }
450
451 if (TLD.Buffer.Data == nullptr) {
452 auto EC = LBQ->getBuffer(TLD.Buffer);
453 if (EC != BufferQueue::ErrorCode::Ok) {
454 auto LS = atomic_load(&LoggingStatus, memory_order_acquire);
455 if (LS != XRayLogInitStatus::XRAY_LOG_FINALIZING &&
456 LS != XRayLogInitStatus::XRAY_LOG_FINALIZED)
457 Report("Failed to acquire a buffer; error = '%s'\n",
458 BufferQueue::getErrorString(EC));
459 return false;
460 }
461
462 setupNewBuffer(wall_clock_reader);
463
464 // Always write the CPU metadata as the first record in the buffer.
465 writeNewCPUIdMetadata(CPU, TSC);
466 }
467
468 if (TLD.CurrentCPU == std::numeric_limits<uint16_t>::max()) {
469 // This means this is the first CPU this thread has ever run on. We set
470 // the current CPU and record this as the first TSC we've seen.
471 TLD.CurrentCPU = CPU;
472 writeNewCPUIdMetadata(CPU, TSC);
473 }
474
475 return true;
476}
477
478// Compute the TSC difference between the time of measurement and the previous
479// event. There are a few interesting situations we need to account for:
480//
481// - The thread has migrated to a different CPU. If this is the case, then
482// we write down the following records:
483//
484// 1. A 'NewCPUId' Metadata record.
485// 2. A FunctionRecord with a 0 for the TSCDelta field.
486//
487// - The TSC delta is greater than the 32 bits we can store in a
488// FunctionRecord. In this case we write down the following records:
489//
490// 1. A 'TSCWrap' Metadata record.
491// 2. A FunctionRecord with a 0 for the TSCDelta field.
492//
493// - The TSC delta is representable within the 32 bits we can store in a
494// FunctionRecord. In this case we write down just a FunctionRecord with
495// the correct TSC delta.
496static uint32_t writeCurrentCPUTSC(ThreadLocalData &TLD, uint64_t TSC,
497 uint8_t CPU) {
498 if (CPU != TLD.CurrentCPU) {
499 // We've moved to a new CPU.
500 writeNewCPUIdMetadata(CPU, TSC);
501 return 0;
502 }
503
504 // If the delta is greater than the range for a uint32_t, then we write out
505 // the TSC wrap metadata entry with the full TSC, and the TSC for the
506 // function record be 0.
507 uint64_t Delta = TSC - TLD.LastTSC;
508 if (Delta <= std::numeric_limits<uint32_t>::max())
509 return Delta;
510
511 writeTSCWrapMetadata(TSC);
512 return 0;
513}
514
515static void endBufferIfFull() XRAY_NEVER_INSTRUMENT {
516 auto &TLD = getThreadLocalData();
517 auto BufferStart = static_cast<char *>(TLD.Buffer.Data);
518 if ((TLD.RecordPtr + MetadataRecSize) - BufferStart <=
519 ptrdiff_t{MetadataRecSize}) {
520 if (!releaseThreadLocalBuffer(*TLD.BQ))
521 return;
522 TLD.RecordPtr = nullptr;
523 }
524}
525
526thread_local atomic_uint8_t Running{0};
527
528/// Here's where the meat of the processing happens. The writer captures
529/// function entry, exit and tail exit points with a time and will create
530/// TSCWrap, NewCPUId and Function records as necessary. The writer might
531/// walk backward through its buffer and erase trivial functions to avoid
532/// polluting the log and may use the buffer queue to obtain or release a
533/// buffer.
534static void processFunctionHook(int32_t FuncId, XRayEntryType Entry,
535 uint64_t TSC, unsigned char CPU, uint64_t Arg1,
536 int (*wall_clock_reader)(clockid_t,
537 struct timespec *))
538 XRAY_NEVER_INSTRUMENT {
539 __asm volatile("# LLVM-MCA-BEGIN processFunctionHook");
540 // Prevent signal handler recursion, so in case we're already in a log writing
541 // mode and the signal handler comes in (and is also instrumented) then we
542 // don't want to be clobbering potentially partial writes already happening in
543 // the thread. We use a simple thread_local latch to only allow one on-going
544 // handleArg0 to happen at any given time.
545 RecursionGuard Guard{Running};
546 if (!Guard) {
2
Taking false branch
547 DCHECK(atomic_load_relaxed(&Running) && "RecursionGuard is buggy!");
548 return;
549 }
550
551 auto &TLD = getThreadLocalData();
3
Calling 'getThreadLocalData'
7
Returning from 'getThreadLocalData'
552
553 if (TLD.BQ == nullptr && BQ != nullptr)
8
Assuming pointer value is null
9
Assuming the condition is true
10
Assuming the condition is false
11
Taking false branch
554 TLD.BQ = BQ;
555
556 if (!isLogInitializedAndReady(TLD.BQ, TSC, CPU, wall_clock_reader))
12
Passing null pointer value via 1st parameter 'LBQ'
13
Calling 'isLogInitializedAndReady'
557 return;
558
559 // Before we go setting up writing new function entries, we need to be really
560 // careful about the pointer math we're doing. This means we need to ensure
561 // that the record we are about to write is going to fit into the buffer,
562 // without overflowing the buffer.
563 //
564 // To do this properly, we use the following assumptions:
565 //
566 // - The least number of bytes we will ever write is 8
567 // (sizeof(FunctionRecord)) only if the delta between the previous entry
568 // and this entry is within 32 bits.
569 // - The most number of bytes we will ever write is 8 + 16 + 16 = 40.
570 // This is computed by:
571 //
572 // MaxSize = sizeof(FunctionRecord) + 2 * sizeof(MetadataRecord)
573 //
574 // These arise in the following cases:
575 //
576 // 1. When the delta between the TSC we get and the previous TSC for the
577 // same CPU is outside of the uint32_t range, we end up having to
578 // write a MetadataRecord to indicate a "tsc wrap" before the actual
579 // FunctionRecord. This means we have: 1 MetadataRecord + 1 Function
580 // Record.
581 // 2. When we learn that we've moved CPUs, we need to write a
582 // MetadataRecord to indicate a "cpu change", and thus write out the
583 // current TSC for that CPU before writing out the actual
584 // FunctionRecord. This means we have: 1 MetadataRecord + 1 Function
585 // Record.
586 // 3. Given the previous two cases, in addition we can add at most one
587 // function argument record. This means we have: 2 MetadataRecord + 1
588 // Function Record.
589 //
590 // So the math we need to do is to determine whether writing 40 bytes past the
591 // current pointer exceeds the buffer's maximum size. If we don't have enough
592 // space to write 40 bytes in the buffer, we need get a new Buffer, set it up
593 // properly before doing any further writing.
594 size_t MaxSize = FunctionRecSize + 2 * MetadataRecSize;
595 if (!prepareBuffer(TSC, CPU, wall_clock_reader, MaxSize)) {
596 TLD.BQ = nullptr;
597 return;
598 }
599
600 auto RecordTSCDelta = writeCurrentCPUTSC(TLD, TSC, CPU);
601 TLD.LastTSC = TSC;
602 TLD.CurrentCPU = CPU;
603 switch (Entry) {
604 case XRayEntryType::ENTRY:
605 // Update the thread local state for the next invocation.
606 TLD.LastFunctionEntryTSC = TSC;
607 break;
608 case XRayEntryType::LOG_ARGS_ENTRY:
609 // Update the thread local state for the next invocation, but also prevent
610 // rewinding when we have arguments logged.
611 TLD.LastFunctionEntryTSC = TSC;
612 TLD.NumConsecutiveFnEnters = 0;
613 TLD.NumTailCalls = 0;
614 break;
615 case XRayEntryType::TAIL:
616 case XRayEntryType::EXIT:
617 // Break out and write the exit record if we can't erase any functions.
618 if (TLD.NumConsecutiveFnEnters == 0 ||
619 (TSC - TLD.LastFunctionEntryTSC) >=
620 atomic_load_relaxed(&ThresholdTicks))
621 break;
622 rewindRecentCall(TSC, TLD.LastTSC, TLD.LastFunctionEntryTSC, FuncId);
623 return; // without writing log.
624 case XRayEntryType::CUSTOM_EVENT: {
625 // This is a bug in patching, so we'll report it once and move on.
626 static atomic_uint8_t ErrorLatch{0};
627 if (!atomic_exchange(&ErrorLatch, 1, memory_order_acq_rel))
628 Report("Internal error: patched an XRay custom event call as a function; "
629 "func id = %d\n",
630 FuncId);
631 return;
632 }
633 case XRayEntryType::TYPED_EVENT: {
634 static atomic_uint8_t ErrorLatch{0};
635 if (!atomic_exchange(&ErrorLatch, 1, memory_order_acq_rel))
636 Report("Internal error: patched an XRay typed event call as a function; "
637 "func id = %d\n",
638 FuncId);
639 return;
640 }
641 }
642
643 writeFunctionRecord(FuncId, RecordTSCDelta, Entry);
644 if (Entry == XRayEntryType::LOG_ARGS_ENTRY)
645 writeCallArgumentMetadata(Arg1);
646
647 // If we've exhausted the buffer by this time, we then release the buffer to
648 // make sure that other threads may start using this buffer.
649 endBufferIfFull();
650 __asm volatile("# LLVM-MCA-END");
651}
652
653static XRayFileHeader &fdrCommonHeaderInfo() {
654 static std::aligned_storage<sizeof(XRayFileHeader)>::type HStorage;
655 static pthread_once_t OnceInit = PTHREAD_ONCE_INIT0;
656 static bool TSCSupported = true;
657 static uint64_t CycleFrequency = NanosecondsPerSecond;
658 pthread_once(&OnceInit, +[] {
659 XRayFileHeader &H = reinterpret_cast<XRayFileHeader &>(HStorage);
660 // Version 2 of the log writes the extents of the buffer, instead of
661 // relying on an end-of-buffer record.
662 // Version 3 includes PID metadata record
663 H.Version = 3;
664 H.Type = FileTypes::FDR_LOG;
665
666 // Test for required CPU features and cache the cycle frequency
667 TSCSupported = probeRequiredCPUFeatures();
668 if (TSCSupported)
669 CycleFrequency = getTSCFrequency();
670 H.CycleFrequency = CycleFrequency;
671
672 // FIXME: Actually check whether we have 'constant_tsc' and
673 // 'nonstop_tsc' before setting the values in the header.
674 H.ConstantTSC = 1;
675 H.NonstopTSC = 1;
676 });
677 return reinterpret_cast<XRayFileHeader &>(HStorage);
678}
679
680// This is the iterator implementation, which knows how to handle FDR-mode
681// specific buffers. This is used as an implementation of the iterator function
682// needed by __xray_set_buffer_iterator(...). It maintains a global state of the
683// buffer iteration for the currently installed FDR mode buffers. In particular:
684//
685// - If the argument represents the initial state of XRayBuffer ({nullptr, 0})
686// then the iterator returns the header information.
687// - If the argument represents the header information ({address of header
688// info, size of the header info}) then it returns the first FDR buffer's
689// address and extents.
690// - It will keep returning the next buffer and extents as there are more
691// buffers to process. When the input represents the last buffer, it will
692// return the initial state to signal completion ({nullptr, 0}).
693//
694// See xray/xray_log_interface.h for more details on the requirements for the
695// implementations of __xray_set_buffer_iterator(...) and
696// __xray_log_process_buffers(...).
697XRayBuffer fdrIterator(const XRayBuffer B) {
698 DCHECK(internal_strcmp(__xray_log_get_current_mode(), "xray-fdr") == 0);
699 DCHECK(BQ->finalizing());
700
701 if (BQ == nullptr || !BQ->finalizing()) {
702 if (Verbosity())
703 Report(
704 "XRay FDR: Failed global buffer queue is null or not finalizing!\n");
705 return {nullptr, 0};
706 }
707
708 // We use a global scratch-pad for the header information, which only gets
709 // initialized the first time this function is called. We'll update one part
710 // of this information with some relevant data (in particular the number of
711 // buffers to expect).
712 static std::aligned_storage<sizeof(XRayFileHeader)>::type HeaderStorage;
713 static pthread_once_t HeaderOnce = PTHREAD_ONCE_INIT0;
714 pthread_once(&HeaderOnce, +[] {
715 reinterpret_cast<XRayFileHeader &>(HeaderStorage) = fdrCommonHeaderInfo();
716 });
717
718 // We use a convenience alias for code referring to Header from here on out.
719 auto &Header = reinterpret_cast<XRayFileHeader &>(HeaderStorage);
720 if (B.Data == nullptr && B.Size == 0) {
721 Header.FdrData = FdrAdditionalHeaderData{BQ->ConfiguredBufferSize()};
722 return XRayBuffer{static_cast<void *>(&Header), sizeof(Header)};
723 }
724
725 static BufferQueue::const_iterator It{};
726 static BufferQueue::const_iterator End{};
727 static uint8_t *CurrentBuffer{nullptr};
728 static size_t SerializedBufferSize = 0;
729 if (B.Data == static_cast<void *>(&Header) && B.Size == sizeof(Header)) {
730 // From this point on, we provide raw access to the raw buffer we're getting
731 // from the BufferQueue. We're relying on the iterators from the current
732 // Buffer queue.
733 It = BQ->cbegin();
734 End = BQ->cend();
735 }
736
737 if (CurrentBuffer != nullptr) {
738 deallocateBuffer(CurrentBuffer, SerializedBufferSize);
739 CurrentBuffer = nullptr;
740 }
741
742 if (It == End)
743 return {nullptr, 0};
744
745 // Set up the current buffer to contain the extents like we would when writing
746 // out to disk. The difference here would be that we still write "empty"
747 // buffers, or at least go through the iterators faithfully to let the
748 // handlers see the empty buffers in the queue.
749 auto BufferSize = atomic_load(&It->Extents, memory_order_acquire);
750 SerializedBufferSize = BufferSize + sizeof(MetadataRecord);
751 CurrentBuffer = allocateBuffer(SerializedBufferSize);
752 if (CurrentBuffer == nullptr)
753 return {nullptr, 0};
754
755 // Write out the extents as a Metadata Record into the CurrentBuffer.
756 MetadataRecord ExtentsRecord;
757 ExtentsRecord.Type = uint8_t(RecordType::Metadata);
758 ExtentsRecord.RecordKind =
759 uint8_t(MetadataRecord::RecordKinds::BufferExtents);
760 internal_memcpy(ExtentsRecord.Data, &BufferSize, sizeof(BufferSize));
761 auto AfterExtents =
762 static_cast<char *>(internal_memcpy(CurrentBuffer, &ExtentsRecord,
763 sizeof(MetadataRecord))) +
764 sizeof(MetadataRecord);
765 internal_memcpy(AfterExtents, It->Data, BufferSize);
766
767 XRayBuffer Result;
768 Result.Data = CurrentBuffer;
769 Result.Size = SerializedBufferSize;
770 ++It;
771 return Result;
772}
773
774// Must finalize before flushing.
775XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT {
776 if (atomic_load(&LoggingStatus, memory_order_acquire) !=
777 XRayLogInitStatus::XRAY_LOG_FINALIZED) {
778 if (Verbosity())
779 Report("Not flushing log, implementation is not finalized.\n");
780 return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
781 }
782
783 s32 Result = XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
784 if (!atomic_compare_exchange_strong(&LogFlushStatus, &Result,
785 XRayLogFlushStatus::XRAY_LOG_FLUSHING,
786 memory_order_release)) {
787 if (Verbosity())
788 Report("Not flushing log, implementation is still finalizing.\n");
789 return static_cast<XRayLogFlushStatus>(Result);
790 }
791
792 if (BQ == nullptr) {
793 if (Verbosity())
794 Report("Cannot flush when global buffer queue is null.\n");
795 return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
796 }
797
798 // We wait a number of milliseconds to allow threads to see that we've
799 // finalised before attempting to flush the log.
800 SleepForMillis(fdrFlags()->grace_period_ms);
801
802 // At this point, we're going to uninstall the iterator implementation, before
803 // we decide to do anything further with the global buffer queue.
804 __xray_log_remove_buffer_iterator();
805
806 // Once flushed, we should set the global status of the logging implementation
807 // to "uninitialized" to allow for FDR-logging multiple runs.
808 auto ResetToUnitialized = at_scope_exit([] {
809 atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
810 memory_order_release);
811 });
812
813 auto CleanupBuffers = at_scope_exit([] {
814 if (BQ != nullptr) {
815 auto &TLD = getThreadLocalData();
816 if (TLD.RecordPtr != nullptr && TLD.BQ != nullptr)
817 releaseThreadLocalBuffer(*TLD.BQ);
818 BQ->~BufferQueue();
819 BQ = nullptr;
820 }
821 });
822
823 if (fdrFlags()->no_file_flush) {
824 if (Verbosity())
825 Report("XRay FDR: Not flushing to file, 'no_file_flush=true'.\n");
826
827 atomic_store(&LogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED,
828 memory_order_release);
829 return XRayLogFlushStatus::XRAY_LOG_FLUSHED;
830 }
831
832 // We write out the file in the following format:
833 //
834 // 1) We write down the XRay file header with version 1, type FDR_LOG.
835 // 2) Then we use the 'apply' member of the BufferQueue that's live, to
836 // ensure that at this point in time we write down the buffers that have
837 // been released (and marked "used") -- we dump the full buffer for now
838 // (fixed-sized) and let the tools reading the buffers deal with the data
839 // afterwards.
840 //
841 LogWriter* LW = LogWriter::Open();
842 if (LW == nullptr) {
843 auto Result = XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
844 atomic_store(&LogFlushStatus, Result, memory_order_release);
845 return Result;
846 }
847
848 XRayFileHeader Header = fdrCommonHeaderInfo();
849 Header.FdrData = FdrAdditionalHeaderData{BQ->ConfiguredBufferSize()};
850 LW->WriteAll(reinterpret_cast<char *>(&Header),
851 reinterpret_cast<char *>(&Header) + sizeof(Header));
852
853 // Release the current thread's buffer before we attempt to write out all the
854 // buffers. This ensures that in case we had only a single thread going, that
855 // we are able to capture the data nonetheless.
856 auto &TLD = getThreadLocalData();
857 if (TLD.RecordPtr != nullptr && TLD.BQ != nullptr)
858 releaseThreadLocalBuffer(*TLD.BQ);
859
860 BQ->apply([&](const BufferQueue::Buffer &B) {
861 // Starting at version 2 of the FDR logging implementation, we only write
862 // the records identified by the extents of the buffer. We use the Extents
863 // from the Buffer and write that out as the first record in the buffer. We
864 // still use a Metadata record, but fill in the extents instead for the
865 // data.
866 MetadataRecord ExtentsRecord;
867 auto BufferExtents = atomic_load(&B.Extents, memory_order_acquire);
868 DCHECK(BufferExtents <= B.Size);
869 ExtentsRecord.Type = uint8_t(RecordType::Metadata);
870 ExtentsRecord.RecordKind =
871 uint8_t(MetadataRecord::RecordKinds::BufferExtents);
872 internal_memcpy(ExtentsRecord.Data, &BufferExtents, sizeof(BufferExtents));
873 if (BufferExtents > 0) {
874 LW->WriteAll(reinterpret_cast<char *>(&ExtentsRecord),
875 reinterpret_cast<char *>(&ExtentsRecord) +
876 sizeof(MetadataRecord));
877 LW->WriteAll(reinterpret_cast<char *>(B.Data),
878 reinterpret_cast<char *>(B.Data) + BufferExtents);
879 }
880 });
881
882 atomic_store(&LogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED,
883 memory_order_release);
884 return XRayLogFlushStatus::XRAY_LOG_FLUSHED;
885}
886
887XRayLogInitStatus fdrLoggingFinalize() XRAY_NEVER_INSTRUMENT {
888 s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_INITIALIZED;
889 if (!atomic_compare_exchange_strong(&LoggingStatus, &CurrentStatus,
890 XRayLogInitStatus::XRAY_LOG_FINALIZING,
891 memory_order_release)) {
892 if (Verbosity())
893 Report("Cannot finalize log, implementation not initialized.\n");
894 return static_cast<XRayLogInitStatus>(CurrentStatus);
895 }
896
897 // Do special things to make the log finalize itself, and not allow any more
898 // operations to be performed until re-initialized.
899 if (BQ == nullptr) {
900 if (Verbosity())
901 Report("Attempting to finalize an uninitialized global buffer!\n");
902 } else {
903 BQ->finalize();
904 }
905
906 atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_FINALIZED,
907 memory_order_release);
908 return XRayLogInitStatus::XRAY_LOG_FINALIZED;
909}
910
911struct TSCAndCPU {
912 uint64_t TSC = 0;
913 unsigned char CPU = 0;
914};
915
916static TSCAndCPU getTimestamp() XRAY_NEVER_INSTRUMENT {
917 // We want to get the TSC as early as possible, so that we can check whether
918 // we've seen this CPU before. We also do it before we load anything else,
919 // to allow for forward progress with the scheduling.
920 TSCAndCPU Result;
921
922 // Test once for required CPU features
923 static pthread_once_t OnceProbe = PTHREAD_ONCE_INIT0;
924 static bool TSCSupported = true;
925 pthread_once(&OnceProbe, +[] { TSCSupported = probeRequiredCPUFeatures(); });
926
927 if (TSCSupported) {
928 Result.TSC = __xray::readTSC(Result.CPU);
929 } else {
930 // FIXME: This code needs refactoring as it appears in multiple locations
931 timespec TS;
932 int result = clock_gettime(CLOCK_REALTIME0, &TS);
933 if (result != 0) {
934 Report("clock_gettime(2) return %d, errno=%d", result, int(errno(*__errno_location ())));
935 TS = {0, 0};
936 }
937 Result.CPU = 0;
938 Result.TSC = TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec;
939 }
940 return Result;
941}
942
943void fdrLoggingHandleArg0(int32_t FuncId,
944 XRayEntryType Entry) XRAY_NEVER_INSTRUMENT {
945 auto TC = getTimestamp();
946 processFunctionHook(FuncId, Entry, TC.TSC, TC.CPU, 0, clock_gettime);
947}
948
949void fdrLoggingHandleArg1(int32_t FuncId, XRayEntryType Entry,
950 uint64_t Arg) XRAY_NEVER_INSTRUMENT {
951 auto TC = getTimestamp();
952 processFunctionHook(FuncId, Entry, TC.TSC, TC.CPU, Arg, clock_gettime);
1
Calling 'processFunctionHook'
953}
954
955void fdrLoggingHandleCustomEvent(void *Event,
956 std::size_t EventSize) XRAY_NEVER_INSTRUMENT {
957 auto TC = getTimestamp();
958 auto &TSC = TC.TSC;
959 auto &CPU = TC.CPU;
960 RecursionGuard Guard{Running};
961 if (!Guard)
962 return;
963 if (EventSize > std::numeric_limits<int32_t>::max()) {
964 static pthread_once_t Once = PTHREAD_ONCE_INIT0;
965 pthread_once(&Once, +[] { Report("Event size too large.\n"); });
966 }
967 int32_t ReducedEventSize = static_cast<int32_t>(EventSize);
968 auto &TLD = getThreadLocalData();
969 if (!isLogInitializedAndReady(TLD.BQ, TSC, CPU, clock_gettime))
970 return;
971
972 // Here we need to prepare the log to handle:
973 // - The metadata record we're going to write. (16 bytes)
974 // - The additional data we're going to write. Currently, that's the size
975 // of the event we're going to dump into the log as free-form bytes.
976 if (!prepareBuffer(TSC, CPU, clock_gettime,
977 MetadataRecSize + ReducedEventSize)) {
978 TLD.BQ = nullptr;
979 return;
980 }
981
982 // We need to reset the counts for the number of functions we're able to
983 // rewind.
984 TLD.NumConsecutiveFnEnters = 0;
985 TLD.NumTailCalls = 0;
986
987 // Write the custom event metadata record, which consists of the following
988 // information:
989 // - 8 bytes (64-bits) for the full TSC when the event started.
990 // - 4 bytes (32-bits) for the length of the data.
991 MetadataRecord CustomEvent;
992 CustomEvent.Type = uint8_t(RecordType::Metadata);
993 CustomEvent.RecordKind =
994 uint8_t(MetadataRecord::RecordKinds::CustomEventMarker);
995 constexpr auto TSCSize = sizeof(TC.TSC);
996 internal_memcpy(&CustomEvent.Data, &ReducedEventSize, sizeof(int32_t));
997 internal_memcpy(&CustomEvent.Data + sizeof(int32_t), &TSC, TSCSize);
998 internal_memcpy(TLD.RecordPtr, &CustomEvent, sizeof(CustomEvent));
999 TLD.RecordPtr += sizeof(CustomEvent);
1000 internal_memcpy(TLD.RecordPtr, Event, ReducedEventSize);
1001 TLD.RecordPtr += ReducedEventSize;
1002 incrementExtents(MetadataRecSize + ReducedEventSize);
1003 endBufferIfFull();
1004}
1005
1006void fdrLoggingHandleTypedEvent(
1007 uint16_t EventType, const void *Event,
1008 std::size_t EventSize) noexcept XRAY_NEVER_INSTRUMENT {
1009 auto TC = getTimestamp();
1010 auto &TSC = TC.TSC;
1011 auto &CPU = TC.CPU;
1012 RecursionGuard Guard{Running};
1013 if (!Guard)
1014 return;
1015 if (EventSize > std::numeric_limits<int32_t>::max()) {
1016 static pthread_once_t Once = PTHREAD_ONCE_INIT0;
1017 pthread_once(&Once, +[] { Report("Event size too large.\n"); });
1018 }
1019 int32_t ReducedEventSize = static_cast<int32_t>(EventSize);
1020 auto &TLD = getThreadLocalData();
1021 if (!isLogInitializedAndReady(TLD.BQ, TSC, CPU, clock_gettime))
1022 return;
1023
1024 // Here we need to prepare the log to handle:
1025 // - The metadata record we're going to write. (16 bytes)
1026 // - The additional data we're going to write. Currently, that's the size
1027 // of the event we're going to dump into the log as free-form bytes.
1028 if (!prepareBuffer(TSC, CPU, clock_gettime,
1029 MetadataRecSize + ReducedEventSize)) {
1030 TLD.BQ = nullptr;
1031 return;
1032 }
1033 // Write the custom event metadata record, which consists of the following
1034 // information:
1035 // - 8 bytes (64-bits) for the full TSC when the event started.
1036 // - 4 bytes (32-bits) for the length of the data.
1037 // - 2 bytes (16-bits) for the event type. 3 bytes remain since one of the
1038 // bytes has the record type (Metadata Record) and kind (TypedEvent).
1039 // We'll log the error if the event type is greater than 2 bytes.
1040 // Event types are generated sequentially, so 2^16 is enough.
1041 MetadataRecord TypedEvent;
1042 TypedEvent.Type = uint8_t(RecordType::Metadata);
1043 TypedEvent.RecordKind =
1044 uint8_t(MetadataRecord::RecordKinds::TypedEventMarker);
1045 constexpr auto TSCSize = sizeof(TC.TSC);
1046 internal_memcpy(&TypedEvent.Data, &ReducedEventSize, sizeof(int32_t));
1047 internal_memcpy(&TypedEvent.Data[sizeof(int32_t)], &TSC, TSCSize);
1048 internal_memcpy(&TypedEvent.Data[sizeof(int32_t) + TSCSize], &EventType,
1049 sizeof(EventType));
1050 internal_memcpy(TLD.RecordPtr, &TypedEvent, sizeof(TypedEvent));
1051
1052 TLD.RecordPtr += sizeof(TypedEvent);
1053 internal_memcpy(TLD.RecordPtr, Event, ReducedEventSize);
1054 TLD.RecordPtr += ReducedEventSize;
1055 incrementExtents(MetadataRecSize + EventSize);
1056 endBufferIfFull();
1057}
1058
1059XRayLogInitStatus fdrLoggingInit(size_t, size_t, void *Options,
1060 size_t OptionsSize) XRAY_NEVER_INSTRUMENT {
1061 if (Options == nullptr)
1062 return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
1063
1064 s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
1065 if (!atomic_compare_exchange_strong(&LoggingStatus, &CurrentStatus,
1066 XRayLogInitStatus::XRAY_LOG_INITIALIZING,
1067 memory_order_release)) {
1068 if (Verbosity())
1069 Report("Cannot initialize already initialized implementation.\n");
1070 return static_cast<XRayLogInitStatus>(CurrentStatus);
1071 }
1072
1073 if (Verbosity())
1074 Report("Initializing FDR mode with options: %s\n",
1075 static_cast<const char *>(Options));
1076
1077 // TODO: Factor out the flags specific to the FDR mode implementation. For
1078 // now, use the global/single definition of the flags, since the FDR mode
1079 // flags are already defined there.
1080 FlagParser FDRParser;
1081 FDRFlags FDRFlags;
1082 registerXRayFDRFlags(&FDRParser, &FDRFlags);
1083 FDRFlags.setDefaults();
1084
1085 // Override first from the general XRAY_DEFAULT_OPTIONS compiler-provided
1086 // options until we migrate everyone to use the XRAY_FDR_OPTIONS
1087 // compiler-provided options.
1088 FDRParser.ParseString(useCompilerDefinedFlags());
1089 FDRParser.ParseString(useCompilerDefinedFDRFlags());
1090 auto *EnvOpts = GetEnv("XRAY_FDR_OPTIONS");
1091 if (EnvOpts == nullptr)
1092 EnvOpts = "";
1093 FDRParser.ParseString(EnvOpts);
1094
1095 // FIXME: Remove this when we fully remove the deprecated flags.
1096 if (internal_strlen(EnvOpts) == 0) {
1097 FDRFlags.func_duration_threshold_us =
1098 flags()->xray_fdr_log_func_duration_threshold_us;
1099 FDRFlags.grace_period_ms = flags()->xray_fdr_log_grace_period_ms;
1100 }
1101
1102 // The provided options should always override the compiler-provided and
1103 // environment-variable defined options.
1104 FDRParser.ParseString(static_cast<const char *>(Options));
1105 *fdrFlags() = FDRFlags;
1106 auto BufferSize = FDRFlags.buffer_size;
1107 auto BufferMax = FDRFlags.buffer_max;
1108 bool Success = false;
1109
1110 if (BQ != nullptr) {
1111 BQ->~BufferQueue();
1112 BQ = nullptr;
1113 }
1114
1115 if (BQ == nullptr) {
1116 BQ = reinterpret_cast<BufferQueue *>(&BufferQueueStorage);
1117 new (BQ) BufferQueue(BufferSize, BufferMax, Success);
1118 }
1119
1120 if (!Success) {
1121 Report("BufferQueue init failed.\n");
1122 if (BQ != nullptr) {
1123 BQ->~BufferQueue();
1124 BQ = nullptr;
1125 }
1126 return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
1127 }
1128
1129 static pthread_once_t OnceInit = PTHREAD_ONCE_INIT0;
1130 pthread_once(&OnceInit, +[] {
1131 atomic_store(&TicksPerSec,
1132 probeRequiredCPUFeatures() ? getTSCFrequency()
1133 : __xray::NanosecondsPerSecond,
1134 memory_order_release);
1135 pthread_key_create(&Key, +[](void *TLDPtr) {
1136 if (TLDPtr == nullptr)
1137 return;
1138 auto &TLD = *reinterpret_cast<ThreadLocalData *>(TLDPtr);
1139 if (TLD.BQ == nullptr)
1140 return;
1141 if (TLD.Buffer.Data == nullptr)
1142 return;
1143 auto EC = TLD.BQ->releaseBuffer(TLD.Buffer);
1144 if (EC != BufferQueue::ErrorCode::Ok)
1145 Report("At thread exit, failed to release buffer at %p; error=%s\n",
1146 TLD.Buffer.Data, BufferQueue::getErrorString(EC));
1147 });
1148 });
1149
1150 atomic_store(&ThresholdTicks,
1151 atomic_load_relaxed(&TicksPerSec) *
1152 fdrFlags()->func_duration_threshold_us / 1000000,
1153 memory_order_release);
1154 // Arg1 handler should go in first to avoid concurrent code accidentally
1155 // falling back to arg0 when it should have ran arg1.
1156 __xray_set_handler_arg1(fdrLoggingHandleArg1);
1157 // Install the actual handleArg0 handler after initialising the buffers.
1158 __xray_set_handler(fdrLoggingHandleArg0);
1159 __xray_set_customevent_handler(fdrLoggingHandleCustomEvent);
1160 __xray_set_typedevent_handler(fdrLoggingHandleTypedEvent);
1161
1162 // Install the buffer iterator implementation.
1163 __xray_log_set_buffer_iterator(fdrIterator);
1164
1165 atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_INITIALIZED,
1166 memory_order_release);
1167
1168 if (Verbosity())
1169 Report("XRay FDR init successful.\n");
1170 return XRayLogInitStatus::XRAY_LOG_INITIALIZED;
1171}
1172
1173bool fdrLogDynamicInitializer() XRAY_NEVER_INSTRUMENT {
1174 XRayLogImpl Impl{
1175 fdrLoggingInit,
1176 fdrLoggingFinalize,
1177 fdrLoggingHandleArg0,
1178 fdrLoggingFlush,
1179 };
1180 auto RegistrationResult = __xray_log_register_mode("xray-fdr", Impl);
1181 if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK &&
1182 Verbosity()) {
1183 Report("Cannot register XRay FDR mode to 'xray-fdr'; error = %d\n",
1184 RegistrationResult);
1185 return false;
1186 }
1187
1188 if (flags()->xray_fdr_log ||
1189 !internal_strcmp(flags()->xray_mode, "xray-fdr")) {
1190 auto SelectResult = __xray_log_select_mode("xray-fdr");
1191 if (SelectResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK &&
1192 Verbosity()) {
1193 Report("Cannot select XRay FDR mode as 'xray-fdr'; error = %d\n",
1194 SelectResult);
1195 return false;
1196 }
1197 }
1198 return true;
1199}
1200
1201} // namespace __xray
1202
1203static auto UNUSED__attribute__((unused)) Unused = __xray::fdrLogDynamicInitializer();