LLVM 20.0.0git
Threading.inc
Go to the documentation of this file.
1//===- Windows/Threading.inc - Win32 Threading Implementation - -*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides the Win32 specific implementation of Threading functions.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/Twine.h"
15#include "llvm/Support/thread.h"
16
18#include <process.h>
19
20#include <bitset>
21
22// Windows will at times define MemoryFence.
23#ifdef MemoryFence
24#undef MemoryFence
25#endif
26
27namespace llvm {
28HANDLE
29llvm_execute_on_thread_impl(unsigned(__stdcall *ThreadFunc)(void *), void *Arg,
30 std::optional<unsigned> StackSizeInBytes) {
31 HANDLE hThread = (HANDLE)::_beginthreadex(NULL, StackSizeInBytes.value_or(0),
32 ThreadFunc, Arg, 0, NULL);
33
34 if (!hThread) {
35 ReportLastErrorFatal("_beginthreadex failed");
36 }
37
38 return hThread;
39}
40
41void llvm_thread_join_impl(HANDLE hThread) {
42 if (::WaitForSingleObject(hThread, INFINITE) == WAIT_FAILED) {
43 ReportLastErrorFatal("WaitForSingleObject failed");
44 }
45}
46
47void llvm_thread_detach_impl(HANDLE hThread) {
48 if (::CloseHandle(hThread) == FALSE) {
49 ReportLastErrorFatal("CloseHandle failed");
50 }
51}
52
53DWORD llvm_thread_get_id_impl(HANDLE hThread) { return ::GetThreadId(hThread); }
54
55DWORD llvm_thread_get_current_id_impl() { return ::GetCurrentThreadId(); }
56
57} // namespace llvm
58
59uint64_t llvm::get_threadid() { return uint64_t(::GetCurrentThreadId()); }
60
62
63#if defined(_MSC_VER)
64static void SetThreadName(DWORD Id, LPCSTR Name) {
65 constexpr DWORD MS_VC_EXCEPTION = 0x406D1388;
66
67#pragma pack(push, 8)
68 struct THREADNAME_INFO {
69 DWORD dwType; // Must be 0x1000.
70 LPCSTR szName; // Pointer to thread name
71 DWORD dwThreadId; // Thread ID (-1 == current thread)
72 DWORD dwFlags; // Reserved. Do not use.
73 };
74#pragma pack(pop)
75
76 THREADNAME_INFO info;
77 info.dwType = 0x1000;
78 info.szName = Name;
79 info.dwThreadId = Id;
80 info.dwFlags = 0;
81
82 __try {
83 ::RaiseException(MS_VC_EXCEPTION, 0, sizeof(info) / sizeof(ULONG_PTR),
84 (ULONG_PTR *)&info);
85 } __except (EXCEPTION_EXECUTE_HANDLER) {
86 }
87}
88#endif
89
90void llvm::set_thread_name(const Twine &Name) {
91#if defined(_MSC_VER)
92 // Make sure the input is null terminated.
93 SmallString<64> Storage;
94 StringRef NameStr = Name.toNullTerminatedStringRef(Storage);
95 SetThreadName(::GetCurrentThreadId(), NameStr.data());
96#endif
97}
98
99void llvm::get_thread_name(SmallVectorImpl<char> &Name) {
100 // "Name" is not an inherent property of a thread on Windows. In fact, when
101 // you "set" the name, you are only firing a one-time message to a debugger
102 // which it interprets as a program setting its threads' name. We may be
103 // able to get fancy by creating a TLS entry when someone calls
104 // set_thread_name so that subsequent calls to get_thread_name return this
105 // value.
106 Name.clear();
107}
108
109SetThreadPriorityResult llvm::set_thread_priority(ThreadPriority Priority) {
110 // https://docs.microsoft.com/en-us/windows/desktop/api/processthreadsapi/nf-processthreadsapi-setthreadpriority
111 // Begin background processing mode. The system lowers the resource scheduling
112 // priorities of the thread so that it can perform background work without
113 // significantly affecting activity in the foreground.
114 // End background processing mode. The system restores the resource scheduling
115 // priorities of the thread as they were before the thread entered background
116 // processing mode.
117 //
118 // FIXME: consider THREAD_PRIORITY_BELOW_NORMAL for Low
119 return SetThreadPriority(GetCurrentThread(),
120 Priority != ThreadPriority::Default
121 ? THREAD_MODE_BACKGROUND_BEGIN
122 : THREAD_MODE_BACKGROUND_END)
123 ? SetThreadPriorityResult::SUCCESS
124 : SetThreadPriorityResult::FAILURE;
125}
126
127struct ProcessorGroup {
128 unsigned ID;
129 unsigned AllThreads;
130 unsigned UsableThreads;
131 unsigned ThreadsPerCore;
132 uint64_t Affinity;
133
134 unsigned useableCores() const {
135 return std::max(1U, UsableThreads / ThreadsPerCore);
136 }
137};
138
139template <typename F>
140static bool IterateProcInfo(LOGICAL_PROCESSOR_RELATIONSHIP Relationship, F Fn) {
141 DWORD Len = 0;
142 BOOL R = ::GetLogicalProcessorInformationEx(Relationship, NULL, &Len);
143 if (R || GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
144 return false;
145 }
146 auto *Info = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)calloc(1, Len);
147 R = ::GetLogicalProcessorInformationEx(Relationship, Info, &Len);
148 if (R) {
149 auto *End =
150 (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)Info + Len);
151 for (auto *Curr = Info; Curr < End;
152 Curr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)Curr +
153 Curr->Size)) {
154 if (Curr->Relationship != Relationship)
155 continue;
156 Fn(Curr);
157 }
158 }
159 free(Info);
160 return true;
161}
162
163static std::optional<std::vector<USHORT>> getActiveGroups() {
164 USHORT Count = 0;
165 if (::GetProcessGroupAffinity(GetCurrentProcess(), &Count, nullptr))
166 return std::nullopt;
167
168 if (GetLastError() != ERROR_INSUFFICIENT_BUFFER)
169 return std::nullopt;
170
171 std::vector<USHORT> Groups;
172 Groups.resize(Count);
173 if (!::GetProcessGroupAffinity(GetCurrentProcess(), &Count, Groups.data()))
174 return std::nullopt;
175
176 return Groups;
177}
178
179static ArrayRef<ProcessorGroup> getProcessorGroups() {
180 auto computeGroups = []() {
181 SmallVector<ProcessorGroup, 4> Groups;
182
183 auto HandleGroup = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) {
184 GROUP_RELATIONSHIP &El = ProcInfo->Group;
185 for (unsigned J = 0; J < El.ActiveGroupCount; ++J) {
186 ProcessorGroup G;
187 G.ID = Groups.size();
188 G.AllThreads = El.GroupInfo[J].MaximumProcessorCount;
189 G.UsableThreads = El.GroupInfo[J].ActiveProcessorCount;
190 assert(G.UsableThreads <= 64);
191 G.Affinity = El.GroupInfo[J].ActiveProcessorMask;
192 Groups.push_back(G);
193 }
194 };
195
196 if (!IterateProcInfo(RelationGroup, HandleGroup))
197 return std::vector<ProcessorGroup>();
198
199 auto HandleProc = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) {
200 PROCESSOR_RELATIONSHIP &El = ProcInfo->Processor;
201 assert(El.GroupCount == 1);
202 unsigned NumHyperThreads = 1;
203 // If the flag is set, each core supports more than one hyper-thread.
204 if (El.Flags & LTP_PC_SMT)
205 NumHyperThreads = std::bitset<64>(El.GroupMask[0].Mask).count();
206 unsigned I = El.GroupMask[0].Group;
207 Groups[I].ThreadsPerCore = NumHyperThreads;
208 };
209
210 if (!IterateProcInfo(RelationProcessorCore, HandleProc))
211 return std::vector<ProcessorGroup>();
212
213 auto ActiveGroups = getActiveGroups();
214 if (!ActiveGroups)
215 return std::vector<ProcessorGroup>();
216
217 // If there's an affinity mask set, assume the user wants to constrain the
218 // current process to only a single CPU group. On Windows, it is not
219 // possible for affinity masks to cross CPU group boundaries.
220 DWORD_PTR ProcessAffinityMask = 0, SystemAffinityMask = 0;
221 if (::GetProcessAffinityMask(GetCurrentProcess(), &ProcessAffinityMask,
222 &SystemAffinityMask)) {
223
224 if (ProcessAffinityMask != SystemAffinityMask) {
225 if (llvm::RunningWindows11OrGreater() && ActiveGroups->size() > 1) {
226 // The process affinity mask is spurious, due to an OS bug, ignore it.
227 return std::vector<ProcessorGroup>(Groups.begin(), Groups.end());
228 }
229
230 assert(ActiveGroups->size() == 1 &&
231 "When an affinity mask is set, the process is expected to be "
232 "assigned to a single processor group!");
233
234 unsigned CurrentGroupID = (*ActiveGroups)[0];
235 ProcessorGroup NewG{Groups[CurrentGroupID]};
236 NewG.Affinity = ProcessAffinityMask;
237 NewG.UsableThreads = llvm::popcount(ProcessAffinityMask);
238 Groups.clear();
239 Groups.push_back(NewG);
240 }
241 }
242 return std::vector<ProcessorGroup>(Groups.begin(), Groups.end());
243 };
244 static auto Groups = computeGroups();
245 return ArrayRef<ProcessorGroup>(Groups);
246}
247
248template <typename R, typename UnaryPredicate>
249static unsigned aggregate(R &&Range, UnaryPredicate P) {
250 unsigned I{};
251 for (const auto &It : Range)
252 I += P(It);
253 return I;
254}
255
257 static unsigned Cores =
258 aggregate(getProcessorGroups(), [](const ProcessorGroup &G) {
259 return G.UsableThreads / G.ThreadsPerCore;
260 });
261 return Cores;
262}
263
264static int computeHostNumHardwareThreads() {
265 static unsigned Threads =
266 aggregate(getProcessorGroups(),
267 [](const ProcessorGroup &G) { return G.UsableThreads; });
268 return Threads;
269}
270
271// Finds the proper CPU socket where a thread number should go. Returns
272// 'std::nullopt' if the thread shall remain on the actual CPU socket.
273std::optional<unsigned>
274llvm::ThreadPoolStrategy::compute_cpu_socket(unsigned ThreadPoolNum) const {
275 ArrayRef<ProcessorGroup> Groups = getProcessorGroups();
276 // Only one CPU socket in the system or process affinity was set, no need to
277 // move the thread(s) to another CPU socket.
278 if (Groups.size() <= 1)
279 return std::nullopt;
280
281 // We ask for less threads than there are hardware threads per CPU socket, no
282 // need to dispatch threads to other CPU sockets.
283 unsigned MaxThreadsPerSocket =
284 UseHyperThreads ? Groups[0].UsableThreads : Groups[0].useableCores();
285 if (compute_thread_count() <= MaxThreadsPerSocket)
286 return std::nullopt;
287
288 assert(ThreadPoolNum < compute_thread_count() &&
289 "The thread index is not within thread strategy's range!");
290
291 // Assumes the same number of hardware threads per CPU socket.
292 return (ThreadPoolNum * Groups.size()) / compute_thread_count();
293}
294
295// Assign the current thread to a more appropriate CPU socket or CPU group
297 unsigned ThreadPoolNum) const {
298
299 // After Windows 11 and Windows Server 2022, let the OS do the scheduling,
300 // since a process automatically gains access to all processor groups.
302 return;
303
304 std::optional<unsigned> Socket = compute_cpu_socket(ThreadPoolNum);
305 if (!Socket)
306 return;
307 ArrayRef<ProcessorGroup> Groups = getProcessorGroups();
308 GROUP_AFFINITY Affinity{};
309 Affinity.Group = Groups[*Socket].ID;
310 Affinity.Mask = Groups[*Socket].Affinity;
311 SetThreadGroupAffinity(GetCurrentThread(), &Affinity, nullptr);
312}
313
315 GROUP_AFFINITY Affinity{};
316 GetThreadGroupAffinity(GetCurrentThread(), &Affinity);
317
318 static unsigned All =
319 aggregate(getProcessorGroups(),
320 [](const ProcessorGroup &G) { return G.AllThreads; });
321
322 unsigned StartOffset =
323 aggregate(getProcessorGroups(), [&](const ProcessorGroup &G) {
324 return G.ID < Affinity.Group ? G.AllThreads : 0;
325 });
326
328 V.resize(All);
329 for (unsigned I = 0; I < sizeof(KAFFINITY) * 8; ++I) {
330 if ((Affinity.Mask >> I) & 1)
331 V.set(StartOffset + I);
332 }
333 return V;
334}
335
336unsigned llvm::get_cpus() { return getProcessorGroups().size(); }
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
std::string Name
bool End
Definition: ELF_riscv.cpp:480
lazy value info
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define P(N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallString class.
static const X86InstrFMA3Group Groups[]
std::optional< unsigned > compute_cpu_socket(unsigned ThreadPoolNum) const
Finds the CPU socket where a thread should go.
void apply_thread_strategy(unsigned ThreadPoolNum) const
Assign the current thread to an ideal hardware CPU or NUMA node.
unsigned compute_thread_count() const
Retrieves the max available threads for the current strategy.
Definition: Threading.cpp:41
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
SetThreadPriorityResult set_thread_priority(ThreadPriority Priority)
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
void ReportLastErrorFatal(const char *Msg)
llvm::BitVector get_thread_affinity_mask()
Returns a mask that represents on which hardware thread, core, CPU, NUMA group, the calling thread ca...
Definition: Threading.cpp:39
uint32_t get_max_thread_name_length()
Get the maximum length of a thread name on this platform.
Definition: Threading.cpp:33
unsigned get_cpus()
Returns how many physical CPUs or NUMA groups the system has.
void set_thread_name(const Twine &Name)
Set the name of the current thread.
Definition: Threading.cpp:35
SetThreadPriorityResult
Definition: Threading.h:267
void get_thread_name(SmallVectorImpl< char > &Name)
Get the name of the current thread.
Definition: Threading.cpp:37
int get_physical_cores()
Returns how many physical cores (as opposed to logical cores returned from thread::hardware_concurren...
Definition: Threading.cpp:47
uint64_t get_threadid()
Return the current thread id, as used in various OS system calls.
Definition: Threading.cpp:31
bool RunningWindows11OrGreater()
Determines if the program is running on Windows 11 or Windows Server 2022.