doxygen/Windows_2Threading_8inc_source.html

//===- Windows/Threading.inc - Win32 Threading Implementation - -*- C++ -*-===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// This file provides the Win32 specific implementation of Threading functions.

//

//===----------------------------------------------------------------------===//


#include "llvm/ADT/SmallString.h"

#include "llvm/ADT/Twine.h"

#include "llvm/Support/thread.h"


#include "llvm/Support/Windows/WindowsSupport.h"

#include <process.h>


#include <bitset>


// Windows will at times define MemoryFence.

#ifdef MemoryFence

#undef MemoryFence

#endif


namespace llvm {

HANDLE

llvm_execute_on_thread_impl(unsigned(__stdcall *ThreadFunc)(void *), void *Arg,

                            std::optional<unsigned> StackSizeInBytes) {

  HANDLE hThread = (HANDLE)::_beginthreadex(NULL, StackSizeInBytes.value_or(0),

                                            ThreadFunc, Arg, 0, NULL);


  if (!hThread)

    ReportLastErrorFatal("_beginthreadex failed");


  return hThread;

}


void llvm_thread_join_impl(HANDLE hThread) {

  if (::WaitForSingleObject(hThread, INFINITE) == WAIT_FAILED)

    ReportLastErrorFatal("WaitForSingleObject failed");

  if (::CloseHandle(hThread) == FALSE)

    ReportLastErrorFatal("CloseHandle failed");

}


void llvm_thread_detach_impl(HANDLE hThread) {

  if (::CloseHandle(hThread) == FALSE)

    ReportLastErrorFatal("CloseHandle failed");

}


DWORD llvm_thread_get_id_impl(HANDLE hThread) { return ::GetThreadId(hThread); }


DWORD llvm_thread_get_current_id_impl() { return ::GetCurrentThreadId(); }


} // namespace llvm


uint64_t llvm::get_threadid() { return uint64_t(::GetCurrentThreadId()); }


uint32_t llvm::get_max_thread_name_length() { return 0; }


#if defined(_MSC_VER)

static void SetThreadName(DWORD Id, LPCSTR Name) {

  constexpr DWORD MS_VC_EXCEPTION = 0x406D1388;


#pragma pack(push, 8)

  struct THREADNAME_INFO {

    DWORD dwType;     // Must be 0x1000.

    LPCSTR szName;    // Pointer to thread name

    DWORD dwThreadId; // Thread ID (-1 == current thread)

    DWORD dwFlags;    // Reserved.  Do not use.

  };

#pragma pack(pop)


  THREADNAME_INFO info;

  info.dwType = 0x1000;

  info.szName = Name;

  info.dwThreadId = Id;

  info.dwFlags = 0;


  __try {

    ::RaiseException(MS_VC_EXCEPTION, 0, sizeof(info) / sizeof(ULONG_PTR),

                     (ULONG_PTR *)&info);

  } __except (EXCEPTION_EXECUTE_HANDLER) {

  }

}

#endif


void llvm::set_thread_name(const Twine &Name) {

#if defined(_MSC_VER)

  // Make sure the input is null terminated.

  SmallString<64> Storage;

  StringRef NameStr = Name.toNullTerminatedStringRef(Storage);

  SetThreadName(::GetCurrentThreadId(), NameStr.data());

#endif

}


void llvm::get_thread_name(SmallVectorImpl<char> &Name) {

  // "Name" is not an inherent property of a thread on Windows.  In fact, when

  // you "set" the name, you are only firing a one-time message to a debugger

  // which it interprets as a program setting its threads' name.  We may be

  // able to get fancy by creating a TLS entry when someone calls

  // set_thread_name so that subsequent calls to get_thread_name return this

  // value.

  Name.clear();

}


namespace llvm::sys::windows {

HMODULE loadSystemModuleSecure(LPCWSTR lpModuleName) {

  // Ensure we load indeed a module from system32 path.

  // As per GetModuleHandle documentation:

  // "If lpModuleName does not include a path and there is more than one loaded

  // module with the same base name and extension, you cannot predict which

  // module handle will be returned.". This mitigates

  // https://learn.microsoft.com/en-us/security-updates/securityadvisories/2010/2269637

  SmallVector<wchar_t, MAX_PATH> Buf;

  size_t Size = MAX_PATH;

  do {

    Buf.resize_for_overwrite(Size);

    SetLastError(NO_ERROR);

    Size = ::GetSystemDirectoryW(Buf.data(), Buf.size());

    if (Size == 0)

      return NULL;


    // Try again with larger buffer.

  } while (Size > Buf.size());


  Buf.truncate(Size);

  Buf.push_back(L'\\');

  Buf.append(lpModuleName, lpModuleName + std::wcslen(lpModuleName));

  Buf.push_back(0);


  return ::GetModuleHandleW(Buf.data());

}

} // namespace llvm::sys::windows


SetThreadPriorityResult llvm::set_thread_priority(ThreadPriority Priority) {

#ifdef THREAD_POWER_THROTTLING_CURRENT_VERSION

  HMODULE kernelM = llvm::sys::windows::loadSystemModuleSecure(L"kernel32.dll");

  if (kernelM) {

    // SetThreadInformation is only available on Windows 8 and later. Since we

    // still support compilation on Windows 7, we load the function dynamically.

    typedef BOOL(WINAPI * SetThreadInformation_t)(

        HANDLE hThread, THREAD_INFORMATION_CLASS ThreadInformationClass,

        _In_reads_bytes_(ThreadInformationSize) PVOID ThreadInformation,

        ULONG ThreadInformationSize);

    static const auto pfnSetThreadInformation =

        (SetThreadInformation_t)::GetProcAddress(kernelM,

                                                 "SetThreadInformation");

    if (pfnSetThreadInformation) {

      auto setThreadInformation = [](ULONG ControlMaskAndStateMask) {

        THREAD_POWER_THROTTLING_STATE state{};

        state.Version = THREAD_POWER_THROTTLING_CURRENT_VERSION;

        state.ControlMask = ControlMaskAndStateMask;

        state.StateMask = ControlMaskAndStateMask;

        return pfnSetThreadInformation(

            ::GetCurrentThread(), ThreadPowerThrottling, &state, sizeof(state));

      };


      // Use EcoQoS for ThreadPriority::Background available (running on most

      // efficent cores at the most efficient cpu frequency):

      // https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-setthreadinformation

      // https://learn.microsoft.com/en-us/windows/win32/procthread/quality-of-service

      setThreadInformation(Priority == ThreadPriority::Background

                               ? THREAD_POWER_THROTTLING_EXECUTION_SPEED

                               : 0);

    }

  }

#endif


  // https://docs.microsoft.com/en-us/windows/desktop/api/processthreadsapi/nf-processthreadsapi-setthreadpriority

  // Begin background processing mode. The system lowers the resource scheduling

  // priorities of the thread so that it can perform background work without

  // significantly affecting activity in the foreground.

  // End background processing mode. The system restores the resource scheduling

  // priorities of the thread as they were before the thread entered background

  // processing mode.

  //

  // FIXME: consider THREAD_PRIORITY_BELOW_NORMAL for Low

  return SetThreadPriority(GetCurrentThread(),

                           Priority != ThreadPriority::Default

                               ? THREAD_MODE_BACKGROUND_BEGIN

                               : THREAD_MODE_BACKGROUND_END)

             ? SetThreadPriorityResult::SUCCESS

             : SetThreadPriorityResult::FAILURE;

}


struct ProcessorGroup {

  unsigned ID;

  unsigned AllThreads;

  unsigned UsableThreads;

  unsigned ThreadsPerCore;

  uint64_t Affinity;


  unsigned useableCores() const {

    return std::max(1U, UsableThreads / ThreadsPerCore);

  }

};


template <typename F>

static bool IterateProcInfo(LOGICAL_PROCESSOR_RELATIONSHIP Relationship, F Fn) {

  DWORD Len = 0;

  BOOL R = ::GetLogicalProcessorInformationEx(Relationship, NULL, &Len);

  if (R || GetLastError() != ERROR_INSUFFICIENT_BUFFER)

    return false;


  auto *Info = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)calloc(1, Len);

  R = ::GetLogicalProcessorInformationEx(Relationship, Info, &Len);

  if (R) {

    auto *End =

        (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)Info + Len);

    for (auto *Curr = Info; Curr < End;

         Curr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)Curr +

                                                            Curr->Size)) {

      if (Curr->Relationship != Relationship)

        continue;

      Fn(Curr);

    }

  }

  free(Info);

  return true;

}


static std::optional<std::vector<USHORT>> getActiveGroups() {

  USHORT Count = 0;

  if (::GetProcessGroupAffinity(GetCurrentProcess(), &Count, nullptr))

    return std::nullopt;


  if (GetLastError() != ERROR_INSUFFICIENT_BUFFER)

    return std::nullopt;


  std::vector<USHORT> Groups;

  Groups.resize(Count);

  if (!::GetProcessGroupAffinity(GetCurrentProcess(), &Count, Groups.data()))

    return std::nullopt;


  return Groups;

}


static ArrayRef<ProcessorGroup> getProcessorGroups() {

  auto computeGroups = []() {

    SmallVector<ProcessorGroup, 4> Groups;


    auto HandleGroup = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) {

      GROUP_RELATIONSHIP &El = ProcInfo->Group;

      for (unsigned J = 0; J < El.ActiveGroupCount; ++J) {

        ProcessorGroup G;

        G.ID = Groups.size();

        G.AllThreads = El.GroupInfo[J].MaximumProcessorCount;

        G.UsableThreads = El.GroupInfo[J].ActiveProcessorCount;

        assert(G.UsableThreads <= 64);

        G.Affinity = El.GroupInfo[J].ActiveProcessorMask;

        Groups.push_back(G);

      }

    };


    if (!IterateProcInfo(RelationGroup, HandleGroup))

      return std::vector<ProcessorGroup>();


    auto HandleProc = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) {

      PROCESSOR_RELATIONSHIP &El = ProcInfo->Processor;

      assert(El.GroupCount == 1);

      unsigned NumHyperThreads = 1;

      // If the flag is set, each core supports more than one hyper-thread.

      if (El.Flags & LTP_PC_SMT)

        NumHyperThreads = std::bitset<64>(El.GroupMask[0].Mask).count();

      unsigned I = El.GroupMask[0].Group;

      Groups[I].ThreadsPerCore = NumHyperThreads;

    };


    if (!IterateProcInfo(RelationProcessorCore, HandleProc))

      return std::vector<ProcessorGroup>();


    auto ActiveGroups = getActiveGroups();

    if (!ActiveGroups)

      return std::vector<ProcessorGroup>();


    // If there's an affinity mask set, assume the user wants to constrain the

    // current process to only a single CPU group. On Windows, it is not

    // possible for affinity masks to cross CPU group boundaries.

    DWORD_PTR ProcessAffinityMask = 0, SystemAffinityMask = 0;

    if (::GetProcessAffinityMask(GetCurrentProcess(), &ProcessAffinityMask,

                                 &SystemAffinityMask)) {


      if (ProcessAffinityMask != SystemAffinityMask) {

        if (llvm::RunningWindows11OrGreater() && ActiveGroups->size() > 1) {

          // The process affinity mask is spurious, due to an OS bug, ignore it.

          return std::vector<ProcessorGroup>(Groups.begin(), Groups.end());

        }


        assert(ActiveGroups->size() == 1 &&

               "When an affinity mask is set, the process is expected to be "

               "assigned to a single processor group!");


        unsigned CurrentGroupID = (*ActiveGroups)[0];

        ProcessorGroup NewG{Groups[CurrentGroupID]};

        NewG.Affinity = ProcessAffinityMask;

        NewG.UsableThreads = llvm::popcount(ProcessAffinityMask);

        Groups.clear();

        Groups.push_back(NewG);

      }

    }

    return std::vector<ProcessorGroup>(Groups.begin(), Groups.end());

  };

  static auto Groups = computeGroups();

  return ArrayRef<ProcessorGroup>(Groups);

}


template <typename R, typename UnaryPredicate>

static unsigned aggregate(R &&Range, UnaryPredicate P) {

  unsigned I{};

  for (const auto &It : Range)

    I += P(It);

  return I;

}


int llvm::get_physical_cores() {

  static unsigned Cores =

      aggregate(getProcessorGroups(), [](const ProcessorGroup &G) {

        return G.UsableThreads / G.ThreadsPerCore;

      });

  return Cores;

}


static int computeHostNumHardwareThreads() {

  static unsigned Threads =

      aggregate(getProcessorGroups(),

                [](const ProcessorGroup &G) { return G.UsableThreads; });

  return Threads;

}


// Finds the proper CPU socket where a thread number should go. Returns

// 'std::nullopt' if the thread shall remain on the actual CPU socket.

std::optional<unsigned>

llvm::ThreadPoolStrategy::compute_cpu_socket(unsigned ThreadPoolNum) const {

  ArrayRef<ProcessorGroup> Groups = getProcessorGroups();

  // Only one CPU socket in the system or process affinity was set, no need to

  // move the thread(s) to another CPU socket.

  if (Groups.size() <= 1)

    return std::nullopt;


  // We ask for less threads than there are hardware threads per CPU socket, no

  // need to dispatch threads to other CPU sockets.

  unsigned MaxThreadsPerSocket =

      UseHyperThreads ? Groups[0].UsableThreads : Groups[0].useableCores();

  if (compute_thread_count() <= MaxThreadsPerSocket)

    return std::nullopt;


  assert(ThreadPoolNum < compute_thread_count() &&

         "The thread index is not within thread strategy's range!");


  // Assumes the same number of hardware threads per CPU socket.

  return (ThreadPoolNum * Groups.size()) / compute_thread_count();

}


// Assign the current thread to a more appropriate CPU socket or CPU group

void llvm::ThreadPoolStrategy::apply_thread_strategy(

    unsigned ThreadPoolNum) const {


  // After Windows 11 and Windows Server 2022, let the OS do the scheduling,

  // since a process automatically gains access to all processor groups.

  if (llvm::RunningWindows11OrGreater())

    return;


  std::optional<unsigned> Socket = compute_cpu_socket(ThreadPoolNum);

  if (!Socket)

    return;

  ArrayRef<ProcessorGroup> Groups = getProcessorGroups();

  GROUP_AFFINITY Affinity{};

  Affinity.Group = Groups[*Socket].ID;

  Affinity.Mask = Groups[*Socket].Affinity;

  SetThreadGroupAffinity(GetCurrentThread(), &Affinity, nullptr);

}


llvm::BitVector llvm::get_thread_affinity_mask() {

  GROUP_AFFINITY Affinity{};

  GetThreadGroupAffinity(GetCurrentThread(), &Affinity);


  static unsigned All =

      aggregate(getProcessorGroups(),

                [](const ProcessorGroup &G) { return G.AllThreads; });


  unsigned StartOffset =

      aggregate(getProcessorGroups(), [&](const ProcessorGroup &G) {

        return G.ID < Affinity.Group ? G.AllThreads : 0;

      });


  llvm::BitVector V;

  V.resize(All);

  for (unsigned I = 0; I < sizeof(KAFFINITY) * 8; ++I) {

    if ((Affinity.Mask >> I) & 1)

      V.set(StartOffset + I);

  }

  return V;

}


unsigned llvm::get_cpus() { return getProcessorGroups().size(); }

assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

Info
Analysis containing CSE Info
Definition CSEInfo.cpp:27

InlinePriorityMode::Size
@ Size
Definition InlineOrder.cpp:25

info
lazy value info
Definition LazyValueInfo.cpp:59

F
#define F(x, y, z)
Definition MD5.cpp:54

I
#define I(x, y, z)
Definition MD5.cpp:57

G
#define G(x, y, z)
Definition MD5.cpp:55

Range
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))

P
#define P(N)

SmallString.h
This file defines the SmallString class.

Twine.h

WindowsSupport.h

Groups
static const X86InstrFMA3Group Groups[]
Definition X86InstrFMA3Info.cpp:81

llvm::BitVector
Definition BitVector.h:101

llvm::ThreadPoolStrategy::UseHyperThreads
bool UseHyperThreads
Definition Threading.h:125

llvm::ThreadPoolStrategy::apply_thread_strategy
LLVM_ABI void apply_thread_strategy(unsigned ThreadPoolNum) const
Assign the current thread to an ideal hardware CPU or NUMA node.

llvm::ThreadPoolStrategy::compute_cpu_socket
LLVM_ABI std::optional< unsigned > compute_cpu_socket(unsigned ThreadPoolNum) const
Finds the CPU socket where a thread should go.

llvm::ThreadPoolStrategy::compute_thread_count
LLVM_ABI unsigned compute_thread_count() const
Retrieves the max available threads for the current strategy.
Definition Threading.cpp:42

uint32_t

uint64_t

uint8_t

llvm::AArch64PACKey::ID
ID
Definition AArch64BaseInfo.h:968

llvm::AMDGPU::SDWA::DWORD
@ DWORD
Definition SIDefines.h:926

llvm::AMDGPU::VGPRIndexMode::Id
Id
Definition SIDefines.h:295

llvm::GraphProgram::Name
Name
Definition GraphWriter.h:51

llvm::M68k::MemAddrModeKind::V
@ V
Definition M68kBaseInfo.h:63

llvm::RISCVFenceField::R
@ R
Definition RISCVBaseInfo.h:474

llvm::omp::RTLDependInfoFields::Len
@ Len
Definition OMPConstants.h:286

llvm::sys::windows
Definition WindowsSupport.h:234

llvm::sys::windows::loadSystemModuleSecure
LLVM_ABI HMODULE loadSystemModuleSecure(LPCWSTR lpModuleName)
Retrieves the handle to a in-memory system module such as ntdll.dll, while ensuring we're not retriev...

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26

llvm::ReportLastErrorFatal
void ReportLastErrorFatal(const char *Msg)
Definition WindowsSupport.h:72

llvm::AllocationType::All
@ All
Definition ModuleSummaryIndex.h:402

llvm::get_thread_affinity_mask
LLVM_ABI llvm::BitVector get_thread_affinity_mask()
Returns a mask that represents on which hardware thread, core, CPU, NUMA group, the calling thread ca...
Definition Threading.cpp:40

llvm::get_max_thread_name_length
LLVM_ABI uint32_t get_max_thread_name_length()
Get the maximum length of a thread name on this platform.
Definition Threading.cpp:34

llvm::popcount
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154

llvm::set_thread_priority
LLVM_ABI SetThreadPriorityResult set_thread_priority(ThreadPriority Priority)

llvm::get_cpus
LLVM_ABI unsigned get_cpus()
Returns how many physical CPUs or NUMA groups the system has.

llvm::RunningWindows11OrGreater
LLVM_ABI bool RunningWindows11OrGreater()
Determines if the program is running on Windows 11 or Windows Server 2022.

llvm::Count
FunctionAddr VTableAddr Count
Definition InstrProf.h:139

llvm::set_thread_name
LLVM_ABI void set_thread_name(const Twine &Name)
Set the name of the current thread.
Definition Threading.cpp:36

llvm::SmallVector
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
Definition SmallVector.h:1129

llvm::SetThreadPriorityResult
SetThreadPriorityResult
Definition Threading.h:285

llvm::get_thread_name
LLVM_ABI void get_thread_name(SmallVectorImpl< char > &Name)
Get the name of the current thread.
Definition Threading.cpp:38

llvm::get_physical_cores
LLVM_ABI int get_physical_cores()
Returns how many physical cores (as opposed to logical cores returned from thread::hardware_concurren...
Definition Threading.cpp:48

llvm::get_threadid
LLVM_ABI uint64_t get_threadid()
Return the current thread id, as used in various OS system calls.
Definition Threading.cpp:32

llvm::ArrayRef
ArrayRef(const T &OneElt) -> ArrayRef< T >

thread.h