From 9c4763887deaed73059d390dbf4343e26e519873 Mon Sep 17 00:00:00 2001 From: German Semenov Date: Fri, 10 Nov 2023 18:33:53 +0300 Subject: [PATCH] Added NUMA architecture support for multiprocessig system --- src/xrCore/Threading/TaskManager.cpp | 51 ++++++++++++++++++++++++++++ src/xrCore/Threading/TaskManager.hpp | 2 ++ src/xrCore/_math.cpp | 5 +++ 3 files changed, 58 insertions(+) diff --git a/src/xrCore/Threading/TaskManager.cpp b/src/xrCore/Threading/TaskManager.cpp index 54859dcc900..53aa99c8ed5 100644 --- a/src/xrCore/Threading/TaskManager.cpp +++ b/src/xrCore/Threading/TaskManager.cpp @@ -213,12 +213,63 @@ void CalcIterations() ttapi_dwFastIter = u32((iterations * frequency) / ((end - start) * 50000)); } +// Lainon: This implementation supports both conventional single-cpu PC configurations +// and multi-cpu system on NUMA architecture +size_t NUMAHardwareConcurrency() noexcept +{ + size_t concurrency = 0; + DWORD length = 0; + if (GetLogicalProcessorInformationEx(RelationAll, nullptr, &length) != FALSE) + { + return concurrency; + } + if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) + { + return concurrency; + } + std::unique_ptr buffer(std::malloc(length), std::free); + if (!buffer) + { + return concurrency; + } + auto* mem = reinterpret_cast(buffer.get()); + if (GetLogicalProcessorInformationEx( + RelationAll, reinterpret_cast(mem), &length) == false) + { + return concurrency; + } + DWORD i = 0; + while (i < length) + { + const auto* proc = reinterpret_cast(mem + i); + if (proc->Processor.GroupCount <= 1) // single-proccessor configuration + return std::thread::hardware_concurrency(); + + if (proc->Relationship == RelationProcessorCore) + { + for (WORD group = 0; group < proc->Processor.GroupCount; ++group) + { + for (KAFFINITY mask = proc->Processor.GroupMask[group].Mask; mask != 0; mask >>= 1) + { + concurrency += mask & 1; + } + } + } + i += proc->Size; + } + return concurrency; +} + TaskManager::TaskManager() { s_main_thread_worker = &s_tl_worker; s_main_thread_worker->id = 0; +#if defined(XR_PLATFORM_WINDOWS) + const u32 threads = NUMAHardwareConcurrency() - OTHER_THREADS_COUNT; +#else const u32 threads = std::thread::hardware_concurrency() - OTHER_THREADS_COUNT; +#endif workers.reserve(threads); for (u32 i = 0; i < threads; ++i) { diff --git a/src/xrCore/Threading/TaskManager.hpp b/src/xrCore/Threading/TaskManager.hpp index d8fab683b0c..c54f5d107d7 100644 --- a/src/xrCore/Threading/TaskManager.hpp +++ b/src/xrCore/Threading/TaskManager.hpp @@ -18,6 +18,8 @@ #include "Event.hpp" #include "Task.hpp" +size_t NUMAHardwareConcurrency() noexcept; + class TaskWorker; class XRCORE_API TaskManager final diff --git a/src/xrCore/_math.cpp b/src/xrCore/_math.cpp index 4414234e0a5..be0ab1e680a 100644 --- a/src/xrCore/_math.cpp +++ b/src/xrCore/_math.cpp @@ -1,6 +1,7 @@ #include "stdafx.h" #if defined(XR_PLATFORM_WINDOWS) +# include "Threading/TaskManager.hpp" # include // _controlfp # if defined(_M_FP_PRECISE) # pragma fenv_access(on) @@ -236,7 +237,11 @@ void _initialize_cpu() listFeature("AltiVec", SDL_HasAltiVec()); Msg("* CPU features: %s", features); +#if defined(XR_PLATFORM_WINDOWS) + Msg("* CPU threads: %d", NUMAHardwareConcurrency()); +#else Msg("* CPU threads: %d", std::thread::hardware_concurrency()); +#endif CPU::HasSSE = SDL_HasSSE(); // just in case, not sure if needed