Skip to content

Commit

Permalink
Added NUMA architecture support for multiprocessig system
Browse files Browse the repository at this point in the history
  • Loading branch information
German Semenov committed Nov 10, 2023
1 parent dfc96a1 commit 9c47638
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 0 deletions.
51 changes: 51 additions & 0 deletions src/xrCore/Threading/TaskManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -213,12 +213,63 @@ void CalcIterations()
ttapi_dwFastIter = u32((iterations * frequency) / ((end - start) * 50000));
}

// Lainon: This implementation supports both conventional single-cpu PC configurations
// and multi-cpu system on NUMA architecture
size_t NUMAHardwareConcurrency() noexcept
{
size_t concurrency = 0;
DWORD length = 0;
if (GetLogicalProcessorInformationEx(RelationAll, nullptr, &length) != FALSE)
{
return concurrency;
}
if (GetLastError() != ERROR_INSUFFICIENT_BUFFER)
{
return concurrency;
}
std::unique_ptr<void, void (*)(void*)> buffer(std::malloc(length), std::free);
if (!buffer)
{
return concurrency;
}
auto* mem = reinterpret_cast<unsigned char*>(buffer.get());
if (GetLogicalProcessorInformationEx(
RelationAll, reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(mem), &length) == false)
{
return concurrency;
}
DWORD i = 0;
while (i < length)
{
const auto* proc = reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(mem + i);
if (proc->Processor.GroupCount <= 1) // single-proccessor configuration
return std::thread::hardware_concurrency();

if (proc->Relationship == RelationProcessorCore)
{
for (WORD group = 0; group < proc->Processor.GroupCount; ++group)
{
for (KAFFINITY mask = proc->Processor.GroupMask[group].Mask; mask != 0; mask >>= 1)
{
concurrency += mask & 1;
}
}
}
i += proc->Size;
}
return concurrency;
}

TaskManager::TaskManager()
{
s_main_thread_worker = &s_tl_worker;
s_main_thread_worker->id = 0;

#if defined(XR_PLATFORM_WINDOWS)
const u32 threads = NUMAHardwareConcurrency() - OTHER_THREADS_COUNT;
#else
const u32 threads = std::thread::hardware_concurrency() - OTHER_THREADS_COUNT;
#endif
workers.reserve(threads);
for (u32 i = 0; i < threads; ++i)
{
Expand Down
2 changes: 2 additions & 0 deletions src/xrCore/Threading/TaskManager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
#include "Event.hpp"
#include "Task.hpp"

size_t NUMAHardwareConcurrency() noexcept;

class TaskWorker;

class XRCORE_API TaskManager final
Expand Down
5 changes: 5 additions & 0 deletions src/xrCore/_math.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "stdafx.h"

#if defined(XR_PLATFORM_WINDOWS)
# include "Threading/TaskManager.hpp"
# include <float.h> // _controlfp
# if defined(_M_FP_PRECISE)
# pragma fenv_access(on)
Expand Down Expand Up @@ -236,7 +237,11 @@ void _initialize_cpu()
listFeature("AltiVec", SDL_HasAltiVec());

Msg("* CPU features: %s", features);
#if defined(XR_PLATFORM_WINDOWS)
Msg("* CPU threads: %d", NUMAHardwareConcurrency());
#else
Msg("* CPU threads: %d", std::thread::hardware_concurrency());
#endif

CPU::HasSSE = SDL_HasSSE(); // just in case, not sure if needed

Expand Down

0 comments on commit 9c47638

Please sign in to comment.