29llvm_execute_on_thread_impl(
unsigned(__stdcall *ThreadFunc)(
void *),
void *Arg,
30 std::optional<unsigned> StackSizeInBytes) {
31 HANDLE hThread = (HANDLE)::_beginthreadex(NULL, StackSizeInBytes.value_or(0),
32 ThreadFunc, Arg, 0, NULL);
41void llvm_thread_join_impl(HANDLE hThread) {
42 if (::WaitForSingleObject(hThread, INFINITE) == WAIT_FAILED) {
47void llvm_thread_detach_impl(HANDLE hThread) {
48 if (::CloseHandle(hThread) == FALSE) {
53DWORD llvm_thread_get_id_impl(HANDLE hThread) { return ::GetThreadId(hThread); }
55DWORD llvm_thread_get_current_id_impl() { return ::GetCurrentThreadId(); }
64static void SetThreadName(DWORD Id, LPCSTR
Name) {
65 constexpr DWORD MS_VC_EXCEPTION = 0x406D1388;
68 struct THREADNAME_INFO {
83 ::RaiseException(MS_VC_EXCEPTION, 0,
sizeof(
info) /
sizeof(ULONG_PTR),
85 } __except (EXCEPTION_EXECUTE_HANDLER) {
93 SmallString<64> Storage;
94 StringRef NameStr =
Name.toNullTerminatedStringRef(Storage);
95 SetThreadName(::GetCurrentThreadId(), NameStr.data());
119 return SetThreadPriority(GetCurrentThread(),
120 Priority != ThreadPriority::Default
121 ? THREAD_MODE_BACKGROUND_BEGIN
122 : THREAD_MODE_BACKGROUND_END)
123 ? SetThreadPriorityResult::SUCCESS
124 : SetThreadPriorityResult::FAILURE;
127struct ProcessorGroup {
130 unsigned UsableThreads;
131 unsigned ThreadsPerCore;
134 unsigned useableCores()
const {
135 return std::max(1U, UsableThreads / ThreadsPerCore);
140static bool IterateProcInfo(LOGICAL_PROCESSOR_RELATIONSHIP Relationship,
F Fn) {
142 BOOL
R = ::GetLogicalProcessorInformationEx(Relationship, NULL, &Len);
143 if (R || GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
146 auto *
Info = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)calloc(1, Len);
147 R = ::GetLogicalProcessorInformationEx(Relationship,
Info, &Len);
150 (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((
uint8_t *)
Info +
Len);
151 for (
auto *Curr =
Info; Curr <
End;
152 Curr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((
uint8_t *)Curr +
154 if (Curr->Relationship != Relationship)
163static std::optional<std::vector<USHORT>> getActiveGroups() {
165 if (::GetProcessGroupAffinity(GetCurrentProcess(), &Count,
nullptr))
168 if (GetLastError() != ERROR_INSUFFICIENT_BUFFER)
171 std::vector<USHORT>
Groups;
173 if (!::GetProcessGroupAffinity(GetCurrentProcess(), &Count,
Groups.data()))
179static ArrayRef<ProcessorGroup> getProcessorGroups() {
180 auto computeGroups = []() {
181 SmallVector<ProcessorGroup, 4>
Groups;
183 auto HandleGroup = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) {
184 GROUP_RELATIONSHIP &El = ProcInfo->Group;
185 for (
unsigned J = 0; J < El.ActiveGroupCount; ++J) {
188 G.AllThreads = El.GroupInfo[J].MaximumProcessorCount;
189 G.UsableThreads = El.GroupInfo[J].ActiveProcessorCount;
191 G.Affinity = El.GroupInfo[J].ActiveProcessorMask;
196 if (!IterateProcInfo(RelationGroup, HandleGroup))
197 return std::vector<ProcessorGroup>();
199 auto HandleProc = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) {
200 PROCESSOR_RELATIONSHIP &El = ProcInfo->Processor;
201 assert(El.GroupCount == 1);
202 unsigned NumHyperThreads = 1;
204 if (El.Flags & LTP_PC_SMT)
205 NumHyperThreads = std::bitset<64>(El.GroupMask[0].Mask).count();
206 unsigned I = El.GroupMask[0].Group;
207 Groups[
I].ThreadsPerCore = NumHyperThreads;
210 if (!IterateProcInfo(RelationProcessorCore, HandleProc))
211 return std::vector<ProcessorGroup>();
213 auto ActiveGroups = getActiveGroups();
215 return std::vector<ProcessorGroup>();
220 DWORD_PTR ProcessAffinityMask = 0, SystemAffinityMask = 0;
221 if (::GetProcessAffinityMask(GetCurrentProcess(), &ProcessAffinityMask,
222 &SystemAffinityMask)) {
224 if (ProcessAffinityMask != SystemAffinityMask) {
227 return std::vector<ProcessorGroup>(
Groups.begin(),
Groups.end());
230 assert(ActiveGroups->size() == 1 &&
231 "When an affinity mask is set, the process is expected to be "
232 "assigned to a single processor group!");
234 unsigned CurrentGroupID = (*ActiveGroups)[0];
235 ProcessorGroup NewG{
Groups[CurrentGroupID]};
236 NewG.Affinity = ProcessAffinityMask;
242 return std::vector<ProcessorGroup>(
Groups.begin(),
Groups.end());
244 static auto Groups = computeGroups();
245 return ArrayRef<ProcessorGroup>(
Groups);
248template <
typename R,
typename UnaryPredicate>
249static unsigned aggregate(R &&
Range, UnaryPredicate
P) {
251 for (
const auto &It :
Range)
257 static unsigned Cores =
258 aggregate(getProcessorGroups(), [](
const ProcessorGroup &
G) {
259 return G.UsableThreads /
G.ThreadsPerCore;
264static int computeHostNumHardwareThreads() {
265 static unsigned Threads =
266 aggregate(getProcessorGroups(),
267 [](
const ProcessorGroup &
G) {
return G.UsableThreads; });
273std::optional<unsigned>
275 ArrayRef<ProcessorGroup>
Groups = getProcessorGroups();
283 unsigned MaxThreadsPerSocket =
289 "The thread index is not within thread strategy's range!");
297 unsigned ThreadPoolNum)
const {
307 ArrayRef<ProcessorGroup>
Groups = getProcessorGroups();
308 GROUP_AFFINITY Affinity{};
309 Affinity.Group =
Groups[*Socket].ID;
310 Affinity.Mask =
Groups[*Socket].Affinity;
311 SetThreadGroupAffinity(GetCurrentThread(), &Affinity,
nullptr);
315 GROUP_AFFINITY Affinity{};
316 GetThreadGroupAffinity(GetCurrentThread(), &Affinity);
318 static unsigned All =
319 aggregate(getProcessorGroups(),
320 [](
const ProcessorGroup &
G) {
return G.AllThreads; });
322 unsigned StartOffset =
323 aggregate(getProcessorGroups(), [&](
const ProcessorGroup &
G) {
324 return G.ID < Affinity.Group ?
G.AllThreads : 0;
329 for (
unsigned I = 0;
I <
sizeof(KAFFINITY) * 8; ++
I) {
330 if ((Affinity.Mask >>
I) & 1)
331 V.set(StartOffset +
I);
Analysis containing CSE Info
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallString class.
static const X86InstrFMA3Group Groups[]
std::optional< unsigned > compute_cpu_socket(unsigned ThreadPoolNum) const
Finds the CPU socket where a thread should go.
void apply_thread_strategy(unsigned ThreadPoolNum) const
Assign the current thread to an ideal hardware CPU or NUMA node.
unsigned compute_thread_count() const
Retrieves the max available threads for the current strategy.
This is an optimization pass for GlobalISel generic memory operations.
SetThreadPriorityResult set_thread_priority(ThreadPriority Priority)
int popcount(T Value) noexcept
Count the number of set bits in a value.
void ReportLastErrorFatal(const char *Msg)
llvm::BitVector get_thread_affinity_mask()
Returns a mask that represents on which hardware thread, core, CPU, NUMA group, the calling thread ca...
uint32_t get_max_thread_name_length()
Get the maximum length of a thread name on this platform.
unsigned get_cpus()
Returns how many physical CPUs or NUMA groups the system has.
void set_thread_name(const Twine &Name)
Set the name of the current thread.
void get_thread_name(SmallVectorImpl< char > &Name)
Get the name of the current thread.
int get_physical_cores()
Returns how many physical cores (as opposed to logical cores returned from thread::hardware_concurren...
uint64_t get_threadid()
Return the current thread id, as used in various OS system calls.
bool RunningWindows11OrGreater()
Determines if the program is running on Windows 11 or Windows Server 2022.