LLVM 19.0.0git
Threading.inc
Go to the documentation of this file.
1//===- Unix/Threading.inc - Unix Threading Implementation ----- -*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides the Unix specific implementation of Threading functions.
10//
11//===----------------------------------------------------------------------===//
12
13#include "Unix.h"
14#include "llvm/ADT/ScopeExit.h"
17#include "llvm/ADT/StringRef.h"
18#include "llvm/ADT/Twine.h"
21
22#if defined(__APPLE__)
23#include <mach/mach_init.h>
24#include <mach/mach_port.h>
25#include <pthread/qos.h>
26#include <sys/sysctl.h>
27#include <sys/types.h>
28#endif
29
30#include <pthread.h>
31
32#if defined(__FreeBSD__) || defined(__OpenBSD__)
33#include <pthread_np.h> // For pthread_getthreadid_np() / pthread_set_name_np()
34#endif
35
36#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
37#include <errno.h>
38#include <sys/cpuset.h>
39#include <sys/sysctl.h>
40#include <sys/user.h>
41#include <unistd.h>
42#endif
43
44#if defined(__NetBSD__)
45#include <lwp.h> // For _lwp_self()
46#endif
47
48#if defined(__OpenBSD__)
49#include <unistd.h> // For getthrid()
50#endif
51
52#if defined(__linux__)
53#include <sched.h> // For sched_getaffinity
54#include <sys/syscall.h> // For syscall codes
55#include <unistd.h> // For syscall()
56#endif
57
58namespace llvm {
59pthread_t
60llvm_execute_on_thread_impl(void *(*ThreadFunc)(void *), void *Arg,
61 std::optional<unsigned> StackSizeInBytes) {
62 int errnum;
63
64 // Construct the attributes object.
65 pthread_attr_t Attr;
66 if ((errnum = ::pthread_attr_init(&Attr)) != 0) {
67 ReportErrnumFatal("pthread_attr_init failed", errnum);
68 }
69
70 auto AttrGuard = llvm::make_scope_exit([&] {
71 if ((errnum = ::pthread_attr_destroy(&Attr)) != 0) {
72 ReportErrnumFatal("pthread_attr_destroy failed", errnum);
73 }
74 });
75
76 // Set the requested stack size, if given.
77 if (StackSizeInBytes) {
78 if ((errnum = ::pthread_attr_setstacksize(&Attr, *StackSizeInBytes)) != 0) {
79 ReportErrnumFatal("pthread_attr_setstacksize failed", errnum);
80 }
81 }
82
83 // Construct and execute the thread.
84 pthread_t Thread;
85 if ((errnum = ::pthread_create(&Thread, &Attr, ThreadFunc, Arg)) != 0)
86 ReportErrnumFatal("pthread_create failed", errnum);
87
88 return Thread;
89}
90
91void llvm_thread_detach_impl(pthread_t Thread) {
92 int errnum;
93
94 if ((errnum = ::pthread_detach(Thread)) != 0) {
95 ReportErrnumFatal("pthread_detach failed", errnum);
96 }
97}
98
99void llvm_thread_join_impl(pthread_t Thread) {
100 int errnum;
101
102 if ((errnum = ::pthread_join(Thread, nullptr)) != 0) {
103 ReportErrnumFatal("pthread_join failed", errnum);
104 }
105}
106
107pthread_t llvm_thread_get_id_impl(pthread_t Thread) { return Thread; }
108
109pthread_t llvm_thread_get_current_id_impl() { return ::pthread_self(); }
110
111} // namespace llvm
112
114#if defined(__APPLE__)
115 // Calling "mach_thread_self()" bumps the reference count on the thread
116 // port, so we need to deallocate it. mach_task_self() doesn't bump the ref
117 // count.
118 thread_port_t Self = mach_thread_self();
119 mach_port_deallocate(mach_task_self(), Self);
120 return Self;
121#elif defined(__FreeBSD__)
122 return uint64_t(pthread_getthreadid_np());
123#elif defined(__NetBSD__)
124 return uint64_t(_lwp_self());
125#elif defined(__OpenBSD__)
126 return uint64_t(getthrid());
127#elif defined(__ANDROID__)
128 return uint64_t(gettid());
129#elif defined(__linux__)
130 return uint64_t(syscall(SYS_gettid));
131#else
132 return uint64_t(pthread_self());
133#endif
134}
135
136static constexpr uint32_t get_max_thread_name_length_impl() {
137#if defined(__NetBSD__)
138 return PTHREAD_MAX_NAMELEN_NP;
139#elif defined(__APPLE__)
140 return 64;
141#elif defined(__linux__)
142#if HAVE_PTHREAD_SETNAME_NP
143 return 16;
144#else
145 return 0;
146#endif
147#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
148 return 16;
149#elif defined(__OpenBSD__)
150 return 32;
151#else
152 return 0;
153#endif
154}
155
157 return get_max_thread_name_length_impl();
158}
159
160void llvm::set_thread_name(const Twine &Name) {
161 // Make sure the input is null terminated.
162 SmallString<64> Storage;
163 StringRef NameStr = Name.toNullTerminatedStringRef(Storage);
164
165 // Truncate from the beginning, not the end, if the specified name is too
166 // long. For one, this ensures that the resulting string is still null
167 // terminated, but additionally the end of a long thread name will usually
168 // be more unique than the beginning, since a common pattern is for similar
169 // threads to share a common prefix.
170 // Note that the name length includes the null terminator.
172 NameStr = NameStr.take_back(get_max_thread_name_length() - 1);
173 (void)NameStr;
174#if defined(__linux__)
175#if (defined(__GLIBC__) && defined(_GNU_SOURCE)) || defined(__ANDROID__)
176#if HAVE_PTHREAD_SETNAME_NP
177 ::pthread_setname_np(::pthread_self(), NameStr.data());
178#endif
179#endif
180#elif defined(__FreeBSD__) || defined(__OpenBSD__)
181 ::pthread_set_name_np(::pthread_self(), NameStr.data());
182#elif defined(__NetBSD__)
183 ::pthread_setname_np(::pthread_self(), "%s",
184 const_cast<char *>(NameStr.data()));
185#elif defined(__APPLE__)
186 ::pthread_setname_np(NameStr.data());
187#endif
188}
189
190void llvm::get_thread_name(SmallVectorImpl<char> &Name) {
191 Name.clear();
192
193#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
194 int pid = ::getpid();
195 uint64_t tid = get_threadid();
196
197 struct kinfo_proc *kp = nullptr, *nkp;
198 size_t len = 0;
199 int error;
200 int ctl[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PID | KERN_PROC_INC_THREAD,
201 (int)pid};
202
203 while (1) {
204 error = sysctl(ctl, 4, kp, &len, nullptr, 0);
205 if (kp == nullptr || (error != 0 && errno == ENOMEM)) {
206 // Add extra space in case threads are added before next call.
207 len += sizeof(*kp) + len / 10;
208 nkp = (struct kinfo_proc *)::realloc(kp, len);
209 if (nkp == nullptr) {
210 free(kp);
211 return;
212 }
213 kp = nkp;
214 continue;
215 }
216 if (error != 0)
217 len = 0;
218 break;
219 }
220
221 for (size_t i = 0; i < len / sizeof(*kp); i++) {
222 if (kp[i].ki_tid == (lwpid_t)tid) {
223 Name.append(kp[i].ki_tdname, kp[i].ki_tdname + strlen(kp[i].ki_tdname));
224 break;
225 }
226 }
227 free(kp);
228 return;
229#elif defined(__NetBSD__)
230 constexpr uint32_t len = get_max_thread_name_length_impl();
231 char buf[len];
232 ::pthread_getname_np(::pthread_self(), buf, len);
233
234 Name.append(buf, buf + strlen(buf));
235#elif defined(__OpenBSD__)
236 constexpr uint32_t len = get_max_thread_name_length_impl();
237 char buf[len];
238 ::pthread_get_name_np(::pthread_self(), buf, len);
239
240 Name.append(buf, buf + strlen(buf));
241#elif defined(__linux__)
242#if HAVE_PTHREAD_GETNAME_NP
243 constexpr uint32_t len = get_max_thread_name_length_impl();
244 char Buffer[len] = {'\0'}; // FIXME: working around MSan false positive.
245 if (0 == ::pthread_getname_np(::pthread_self(), Buffer, len))
246 Name.append(Buffer, Buffer + strlen(Buffer));
247#endif
248#endif
249}
250
251SetThreadPriorityResult llvm::set_thread_priority(ThreadPriority Priority) {
252#if defined(__linux__) && defined(SCHED_IDLE)
253 // Some *really* old glibcs are missing SCHED_IDLE.
254 // http://man7.org/linux/man-pages/man3/pthread_setschedparam.3.html
255 // http://man7.org/linux/man-pages/man2/sched_setscheduler.2.html
256 sched_param priority;
257 // For each of the above policies, param->sched_priority must be 0.
258 priority.sched_priority = 0;
259 // SCHED_IDLE for running very low priority background jobs.
260 // SCHED_OTHER the standard round-robin time-sharing policy;
261 return !pthread_setschedparam(
262 pthread_self(),
263 // FIXME: consider SCHED_BATCH for Low
264 Priority == ThreadPriority::Default ? SCHED_OTHER : SCHED_IDLE,
265 &priority)
266 ? SetThreadPriorityResult::SUCCESS
267 : SetThreadPriorityResult::FAILURE;
268#elif defined(__APPLE__)
269 // https://developer.apple.com/documentation/apple-silicon/tuning-your-code-s-performance-for-apple-silicon
270 //
271 // Background - Applies to work that isn’t visible to the user and may take
272 // significant time to complete. Examples include indexing, backing up, or
273 // synchronizing data. This class emphasizes energy efficiency.
274 //
275 // Utility - Applies to work that takes anywhere from a few seconds to a few
276 // minutes to complete. Examples include downloading a document or importing
277 // data. This class offers a balance between responsiveness, performance, and
278 // energy efficiency.
279 const auto qosClass = [&]() {
280 switch (Priority) {
281 case ThreadPriority::Background:
282 return QOS_CLASS_BACKGROUND;
283 case ThreadPriority::Low:
284 return QOS_CLASS_UTILITY;
285 case ThreadPriority::Default:
286 return QOS_CLASS_DEFAULT;
287 }
288 }();
289 return !pthread_set_qos_class_self_np(qosClass, 0)
290 ? SetThreadPriorityResult::SUCCESS
291 : SetThreadPriorityResult::FAILURE;
292#endif
293 return SetThreadPriorityResult::FAILURE;
294}
295
296#include <thread>
297
298static int computeHostNumHardwareThreads() {
299#if defined(__FreeBSD__)
300 cpuset_t mask;
301 CPU_ZERO(&mask);
302 if (cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, sizeof(mask),
303 &mask) == 0)
304 return CPU_COUNT(&mask);
305#elif defined(__linux__)
306 cpu_set_t Set;
307 if (sched_getaffinity(0, sizeof(Set), &Set) == 0)
308 return CPU_COUNT(&Set);
309#endif
310 // Guard against std::thread::hardware_concurrency() returning 0.
311 if (unsigned Val = std::thread::hardware_concurrency())
312 return Val;
313 return 1;
314}
315
317 unsigned ThreadPoolNum) const {}
318
320 // FIXME: Implement
321 llvm_unreachable("Not implemented!");
322}
323
324unsigned llvm::get_cpus() { return 1; }
325
326#if defined(__linux__) && (defined(__i386__) || defined(__x86_64__))
327// On Linux, the number of physical cores can be computed from /proc/cpuinfo,
328// using the number of unique physical/core id pairs. The following
329// implementation reads the /proc/cpuinfo format on an x86_64 system.
330static int computeHostNumPhysicalCores() {
331 // Enabled represents the number of physical id/core id pairs with at least
332 // one processor id enabled by the CPU affinity mask.
333 cpu_set_t Affinity, Enabled;
334 if (sched_getaffinity(0, sizeof(Affinity), &Affinity) != 0)
335 return -1;
336 CPU_ZERO(&Enabled);
337
338 // Read /proc/cpuinfo as a stream (until EOF reached). It cannot be
339 // mmapped because it appears to have 0 size.
342 if (std::error_code EC = Text.getError()) {
343 llvm::errs() << "Can't read "
344 << "/proc/cpuinfo: " << EC.message() << "\n";
345 return -1;
346 }
347 SmallVector<StringRef, 8> strs;
348 (*Text)->getBuffer().split(strs, "\n", /*MaxSplit=*/-1,
349 /*KeepEmpty=*/false);
350 int CurProcessor = -1;
351 int CurPhysicalId = -1;
352 int CurSiblings = -1;
353 int CurCoreId = -1;
354 for (StringRef Line : strs) {
355 std::pair<StringRef, StringRef> Data = Line.split(':');
356 auto Name = Data.first.trim();
357 auto Val = Data.second.trim();
358 // These fields are available if the kernel is configured with CONFIG_SMP.
359 if (Name == "processor")
360 Val.getAsInteger(10, CurProcessor);
361 else if (Name == "physical id")
362 Val.getAsInteger(10, CurPhysicalId);
363 else if (Name == "siblings")
364 Val.getAsInteger(10, CurSiblings);
365 else if (Name == "core id") {
366 Val.getAsInteger(10, CurCoreId);
367 // The processor id corresponds to an index into cpu_set_t.
368 if (CPU_ISSET(CurProcessor, &Affinity))
369 CPU_SET(CurPhysicalId * CurSiblings + CurCoreId, &Enabled);
370 }
371 }
372 return CPU_COUNT(&Enabled);
373}
374#elif (defined(__linux__) && defined(__s390x__)) || defined(_AIX)
375static int computeHostNumPhysicalCores() {
376 return sysconf(_SC_NPROCESSORS_ONLN);
377}
378#elif defined(__linux__) && !defined(__ANDROID__)
379static int computeHostNumPhysicalCores() {
380 cpu_set_t Affinity;
381 if (sched_getaffinity(0, sizeof(Affinity), &Affinity) == 0)
382 return CPU_COUNT(&Affinity);
383
384 // The call to sched_getaffinity() may have failed because the Affinity
385 // mask is too small for the number of CPU's on the system (i.e. the
386 // system has more than 1024 CPUs). Allocate a mask large enough for
387 // twice as many CPUs.
388 cpu_set_t *DynAffinity;
389 DynAffinity = CPU_ALLOC(2048);
390 if (sched_getaffinity(0, CPU_ALLOC_SIZE(2048), DynAffinity) == 0) {
391 int NumCPUs = CPU_COUNT(DynAffinity);
392 CPU_FREE(DynAffinity);
393 return NumCPUs;
394 }
395 return -1;
396}
397#elif defined(__APPLE__)
398// Gets the number of *physical cores* on the machine.
399static int computeHostNumPhysicalCores() {
401 size_t len = sizeof(count);
402 sysctlbyname("hw.physicalcpu", &count, &len, NULL, 0);
403 if (count < 1) {
404 int nm[2];
405 nm[0] = CTL_HW;
406 nm[1] = HW_AVAILCPU;
407 sysctl(nm, 2, &count, &len, NULL, 0);
408 if (count < 1)
409 return -1;
410 }
411 return count;
412}
413#elif defined(__MVS__)
414static int computeHostNumPhysicalCores() {
415 enum {
416 // Byte offset of the pointer to the Communications Vector Table (CVT) in
417 // the Prefixed Save Area (PSA). The table entry is a 31-bit pointer and
418 // will be zero-extended to uintptr_t.
419 FLCCVT = 16,
420 // Byte offset of the pointer to the Common System Data Area (CSD) in the
421 // CVT. The table entry is a 31-bit pointer and will be zero-extended to
422 // uintptr_t.
423 CVTCSD = 660,
424 // Byte offset to the number of live CPs in the LPAR, stored as a signed
425 // 32-bit value in the table.
426 CSD_NUMBER_ONLINE_STANDARD_CPS = 264,
427 };
428 char *PSA = 0;
429 char *CVT = reinterpret_cast<char *>(
430 static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(PSA[FLCCVT])));
431 char *CSD = reinterpret_cast<char *>(
432 static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(CVT[CVTCSD])));
433 return reinterpret_cast<int &>(CSD[CSD_NUMBER_ONLINE_STANDARD_CPS]);
434}
435#else
436// On other systems, return -1 to indicate unknown.
437static int computeHostNumPhysicalCores() { return -1; }
438#endif
439
441 static int NumCores = computeHostNumPhysicalCores();
442 return NumCores;
443}
std::string Name
uint64_t Thread
Definition: Profile.cpp:48
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallString class.
This file defines the SmallVector class.
static bool Enabled
Definition: Statistic.cpp:46
#define error(X)
static void ReportErrnumFatal(const char *Msg, int errnum)
Definition: Unix.h:70
Represents either an error or a value T.
Definition: ErrorOr.h:56
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFileAsStream(const Twine &Filename)
Read all of the specified file into a MemoryBuffer as a stream (i.e.
void apply_thread_strategy(unsigned ThreadPoolNum) const
Assign the current thread to an ideal hardware CPU or NUMA node.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
SetThreadPriorityResult set_thread_priority(ThreadPriority Priority)
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
Definition: ScopeExit.h:59
llvm::BitVector get_thread_affinity_mask()
Returns a mask that represents on which hardware thread, core, CPU, NUMA group, the calling thread ca...
Definition: Threading.cpp:41
uint32_t get_max_thread_name_length()
Get the maximum length of a thread name on this platform.
Definition: Threading.cpp:35
unsigned get_cpus()
Returns how many physical CPUs or NUMA groups the system has.
void set_thread_name(const Twine &Name)
Set the name of the current thread.
Definition: Threading.cpp:37
SetThreadPriorityResult
Definition: Threading.h:255
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
void get_thread_name(SmallVectorImpl< char > &Name)
Get the name of the current thread.
Definition: Threading.cpp:39
int get_physical_cores()
Returns how many physical cores (as opposed to logical cores returned from thread::hardware_concurren...
Definition: Threading.cpp:49
uint64_t get_threadid()
Return the current thread id, as used in various OS system calls.
Definition: Threading.cpp:33
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1923
auto mask(ShuffFunc S, unsigned Length, OptArgs... args) -> MaskT