28llvm_execute_on_thread_impl(
unsigned(__stdcall *ThreadFunc)(
void *),
void *Arg,
29 std::optional<unsigned> StackSizeInBytes) {
30 HANDLE hThread = (HANDLE)::_beginthreadex(NULL, StackSizeInBytes.value_or(0),
31 ThreadFunc, Arg, 0, NULL);
40void llvm_thread_join_impl(HANDLE hThread) {
41 if (::WaitForSingleObject(hThread, INFINITE) == WAIT_FAILED) {
46void llvm_thread_detach_impl(HANDLE hThread) {
47 if (::CloseHandle(hThread) == FALSE) {
52DWORD llvm_thread_get_id_impl(HANDLE hThread) { return ::GetThreadId(hThread); }
54DWORD llvm_thread_get_current_id_impl() { return ::GetCurrentThreadId(); }
63static void SetThreadName(DWORD Id, LPCSTR
Name) {
64 constexpr DWORD MS_VC_EXCEPTION = 0x406D1388;
67 struct THREADNAME_INFO {
82 ::RaiseException(MS_VC_EXCEPTION, 0,
sizeof(
info) /
sizeof(ULONG_PTR),
84 } __except (EXCEPTION_EXECUTE_HANDLER) {
92 SmallString<64> Storage;
93 StringRef NameStr =
Name.toNullTerminatedStringRef(Storage);
94 SetThreadName(::GetCurrentThreadId(), NameStr.data());
118 return SetThreadPriority(GetCurrentThread(),
119 Priority != ThreadPriority::Default
120 ? THREAD_MODE_BACKGROUND_BEGIN
121 : THREAD_MODE_BACKGROUND_END)
122 ? SetThreadPriorityResult::SUCCESS
123 : SetThreadPriorityResult::FAILURE;
126struct ProcessorGroup {
129 unsigned UsableThreads;
130 unsigned ThreadsPerCore;
133 unsigned useableCores()
const {
134 return std::max(1U, UsableThreads / ThreadsPerCore);
139static bool IterateProcInfo(LOGICAL_PROCESSOR_RELATIONSHIP Relationship,
F Fn) {
141 BOOL
R = ::GetLogicalProcessorInformationEx(Relationship, NULL, &Len);
142 if (R || GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
145 auto *
Info = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)calloc(1, Len);
146 R = ::GetLogicalProcessorInformationEx(Relationship,
Info, &Len);
149 (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)
Info +
Len);
150 for (
auto *Curr =
Info; Curr <
End;
151 Curr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)Curr +
153 if (Curr->Relationship != Relationship)
162static std::optional<std::vector<USHORT>> getActiveGroups() {
164 if (::GetProcessGroupAffinity(GetCurrentProcess(), &Count,
nullptr))
167 if (GetLastError() != ERROR_INSUFFICIENT_BUFFER)
170 std::vector<USHORT>
Groups;
172 if (!::GetProcessGroupAffinity(GetCurrentProcess(), &Count,
Groups.data()))
178static ArrayRef<ProcessorGroup> getProcessorGroups() {
179 auto computeGroups = []() {
180 SmallVector<ProcessorGroup, 4>
Groups;
182 auto HandleGroup = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) {
183 GROUP_RELATIONSHIP &El = ProcInfo->Group;
184 for (
unsigned J = 0; J < El.ActiveGroupCount; ++J) {
187 G.AllThreads = El.GroupInfo[J].MaximumProcessorCount;
188 G.UsableThreads = El.GroupInfo[J].ActiveProcessorCount;
190 G.Affinity = El.GroupInfo[J].ActiveProcessorMask;
195 if (!IterateProcInfo(RelationGroup, HandleGroup))
196 return std::vector<ProcessorGroup>();
198 auto HandleProc = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) {
199 PROCESSOR_RELATIONSHIP &El = ProcInfo->Processor;
200 assert(El.GroupCount == 1);
201 unsigned NumHyperThreads = 1;
203 if (El.Flags & LTP_PC_SMT)
204 NumHyperThreads = std::bitset<64>(El.GroupMask[0].Mask).count();
205 unsigned I = El.GroupMask[0].Group;
206 Groups[
I].ThreadsPerCore = NumHyperThreads;
209 if (!IterateProcInfo(RelationProcessorCore, HandleProc))
210 return std::vector<ProcessorGroup>();
212 auto ActiveGroups = getActiveGroups();
214 return std::vector<ProcessorGroup>();
219 DWORD_PTR ProcessAffinityMask = 0, SystemAffinityMask = 0;
220 if (::GetProcessAffinityMask(GetCurrentProcess(), &ProcessAffinityMask,
221 &SystemAffinityMask)) {
223 if (ProcessAffinityMask != SystemAffinityMask) {
226 return std::vector<ProcessorGroup>(
Groups.begin(),
Groups.end());
229 assert(ActiveGroups->size() == 1 &&
230 "When an affinity mask is set, the process is expected to be "
231 "assigned to a single processor group!");
233 unsigned CurrentGroupID = (*ActiveGroups)[0];
234 ProcessorGroup NewG{
Groups[CurrentGroupID]};
235 NewG.Affinity = ProcessAffinityMask;
241 return std::vector<ProcessorGroup>(
Groups.begin(),
Groups.end());
243 static auto Groups = computeGroups();
244 return ArrayRef<ProcessorGroup>(
Groups);
247template <
typename R,
typename UnaryPredicate>
248static unsigned aggregate(R &&
Range, UnaryPredicate
P) {
250 for (
const auto &It :
Range)
256 static unsigned Cores =
257 aggregate(getProcessorGroups(), [](
const ProcessorGroup &
G) {
258 return G.UsableThreads /
G.ThreadsPerCore;
263static int computeHostNumHardwareThreads() {
264 static unsigned Threads =
265 aggregate(getProcessorGroups(),
266 [](
const ProcessorGroup &
G) {
return G.UsableThreads; });
272std::optional<unsigned>
274 ArrayRef<ProcessorGroup>
Groups = getProcessorGroups();
282 unsigned MaxThreadsPerSocket =
288 "The thread index is not within thread strategy's range!");
296 unsigned ThreadPoolNum)
const {
306 ArrayRef<ProcessorGroup>
Groups = getProcessorGroups();
307 GROUP_AFFINITY Affinity{};
308 Affinity.Group =
Groups[*Socket].ID;
309 Affinity.Mask =
Groups[*Socket].Affinity;
310 SetThreadGroupAffinity(GetCurrentThread(), &Affinity,
nullptr);
314 GROUP_AFFINITY Affinity{};
315 GetThreadGroupAffinity(GetCurrentThread(), &Affinity);
317 static unsigned All =
318 aggregate(getProcessorGroups(),
319 [](
const ProcessorGroup &
G) {
return G.AllThreads; });
321 unsigned StartOffset =
322 aggregate(getProcessorGroups(), [&](
const ProcessorGroup &
G) {
323 return G.ID < Affinity.Group ?
G.AllThreads : 0;
328 for (
unsigned I = 0;
I <
sizeof(KAFFINITY) * 8; ++
I) {
329 if ((Affinity.Mask >>
I) & 1)
330 V.set(StartOffset +
I);
Analysis containing CSE Info
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallString class.
static const X86InstrFMA3Group Groups[]
std::optional< unsigned > compute_cpu_socket(unsigned ThreadPoolNum) const
Finds the CPU socket where a thread should go.
void apply_thread_strategy(unsigned ThreadPoolNum) const
Assign the current thread to an ideal hardware CPU or NUMA node.
unsigned compute_thread_count() const
Retrieves the max available threads for the current strategy.
This is an optimization pass for GlobalISel generic memory operations.
SetThreadPriorityResult set_thread_priority(ThreadPriority Priority)
int popcount(T Value) noexcept
Count the number of set bits in a value.
void ReportLastErrorFatal(const char *Msg)
llvm::BitVector get_thread_affinity_mask()
Returns a mask that represents on which hardware thread, core, CPU, NUMA group, the calling thread ca...
uint32_t get_max_thread_name_length()
Get the maximum length of a thread name on this platform.
unsigned get_cpus()
Returns how many physical CPUs or NUMA groups the system has.
void set_thread_name(const Twine &Name)
Set the name of the current thread.
void get_thread_name(SmallVectorImpl< char > &Name)
Get the name of the current thread.
int get_physical_cores()
Returns how many physical cores (as opposed to logical cores returned from thread::hardware_concurren...
uint64_t get_threadid()
Return the current thread id, as used in various OS system calls.
bool RunningWindows11OrGreater()
Determines if the program is running on Windows 11 or Windows Server 2022.