diff --git a/HeterogeneousCore/CUDAServices/bin/BuildFile.xml b/HeterogeneousCore/CUDAServices/bin/BuildFile.xml deleted file mode 100644 index ae3d37a5f6b4f..0000000000000 --- a/HeterogeneousCore/CUDAServices/bin/BuildFile.xml +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - - - - diff --git a/HeterogeneousCore/CUDAUtilities/bin/BuildFile.xml b/HeterogeneousCore/CUDAUtilities/bin/BuildFile.xml new file mode 100644 index 0000000000000..fa33a5a6280f0 --- /dev/null +++ b/HeterogeneousCore/CUDAUtilities/bin/BuildFile.xml @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/HeterogeneousCore/CUDAServices/bin/cudaComputeCapabilities.cpp b/HeterogeneousCore/CUDAUtilities/bin/cudaComputeCapabilities.cpp similarity index 100% rename from HeterogeneousCore/CUDAServices/bin/cudaComputeCapabilities.cpp rename to HeterogeneousCore/CUDAUtilities/bin/cudaComputeCapabilities.cpp diff --git a/HeterogeneousCore/CUDAUtilities/bin/cudaComputeCapabilities_fallback.cpp b/HeterogeneousCore/CUDAUtilities/bin/cudaComputeCapabilities_fallback.cpp new file mode 100644 index 0000000000000..dc3861a53bd49 --- /dev/null +++ b/HeterogeneousCore/CUDAUtilities/bin/cudaComputeCapabilities_fallback.cpp @@ -0,0 +1,9 @@ +// C/C++ headers +#include +#include + +// always returns EXIT_FAILURE +int main() { + std::cerr << "cudaComputeCapabilities: CUDA is not supported on this architecture" << std::endl; + return EXIT_FAILURE; +} diff --git a/HeterogeneousCore/CUDAServices/bin/cudaIsEnabled.cpp b/HeterogeneousCore/CUDAUtilities/bin/cudaIsEnabled.cpp similarity index 100% rename from HeterogeneousCore/CUDAServices/bin/cudaIsEnabled.cpp rename to HeterogeneousCore/CUDAUtilities/bin/cudaIsEnabled.cpp diff --git a/HeterogeneousCore/CUDAUtilities/bin/cudaIsEnabled_fallback.cpp b/HeterogeneousCore/CUDAUtilities/bin/cudaIsEnabled_fallback.cpp new file mode 100644 index 0000000000000..92785c12af592 --- /dev/null +++ b/HeterogeneousCore/CUDAUtilities/bin/cudaIsEnabled_fallback.cpp @@ -0,0 +1,5 @@ +// C/C++ headers +#include + +// always returns EXIT_FAILURE +int main() { return EXIT_FAILURE; } diff --git a/HeterogeneousCore/CUDAServices/bin/isCudaDeviceSupported.cu b/HeterogeneousCore/CUDAUtilities/bin/isCudaDeviceSupported.cu similarity index 100% rename from HeterogeneousCore/CUDAServices/bin/isCudaDeviceSupported.cu rename to HeterogeneousCore/CUDAUtilities/bin/isCudaDeviceSupported.cu diff --git a/HeterogeneousCore/CUDAServices/bin/isCudaDeviceSupported.h b/HeterogeneousCore/CUDAUtilities/bin/isCudaDeviceSupported.h similarity index 100% rename from HeterogeneousCore/CUDAServices/bin/isCudaDeviceSupported.h rename to HeterogeneousCore/CUDAUtilities/bin/isCudaDeviceSupported.h diff --git a/HeterogeneousCore/CUDAUtilities/test/BuildFile.xml b/HeterogeneousCore/CUDAUtilities/test/BuildFile.xml index bb453aa1662e3..5c1146014c0aa 100644 --- a/HeterogeneousCore/CUDAUtilities/test/BuildFile.xml +++ b/HeterogeneousCore/CUDAUtilities/test/BuildFile.xml @@ -133,4 +133,14 @@ + + + + + + + + + + diff --git a/HeterogeneousCore/CUDAUtilities/test/testCudaCheck.cpp b/HeterogeneousCore/CUDAUtilities/test/testCudaCheck.cpp new file mode 100644 index 0000000000000..c812cd06904ef --- /dev/null +++ b/HeterogeneousCore/CUDAUtilities/test/testCudaCheck.cpp @@ -0,0 +1,20 @@ +// Catch2 headers +#define CATCH_CONFIG_MAIN +#include + +// CUDA headers +#include + +// CMSSW headers +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" + +TEST_CASE("HeterogeneousCore/CUDAUtilities testCudaCheck", "[testCudaCheck]") { + SECTION("Test cudaCheck() driver API") { + REQUIRE_NOTHROW(cudaCheck(CUDA_SUCCESS)); + REQUIRE_THROWS(cudaCheck(CUDA_ERROR_UNKNOWN)); + } + SECTION("Test cudaCheck() runtime API") { + REQUIRE_NOTHROW(cudaCheck(cudaSuccess)); + REQUIRE_THROWS(cudaCheck(cudaErrorUnknown)); + } +} diff --git a/HeterogeneousCore/CUDAUtilities/test/testRequireCUDADevices.cpp b/HeterogeneousCore/CUDAUtilities/test/testRequireCUDADevices.cpp new file mode 100644 index 0000000000000..d513de0591447 --- /dev/null +++ b/HeterogeneousCore/CUDAUtilities/test/testRequireCUDADevices.cpp @@ -0,0 +1,21 @@ +// Catch2 headers +#define CATCH_CONFIG_MAIN +#include + +// CUDA headers +#include + +// CMSSW headers +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" +#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h" + +TEST_CASE("HeterogeneousCore/CUDAUtilities testRequireCUDADevices", "[testRequireCUDADevices]") { + SECTION("Test requireDevices()") { + cms::cudatest::requireDevices(); + + int devices = 0; + cudaCheck(cudaGetDeviceCount(&devices)); + + REQUIRE(devices > 0); + } +} diff --git a/HeterogeneousCore/ROCmUtilities/BuildFile.xml b/HeterogeneousCore/ROCmUtilities/BuildFile.xml new file mode 100644 index 0000000000000..8b78ba26b6e35 --- /dev/null +++ b/HeterogeneousCore/ROCmUtilities/BuildFile.xml @@ -0,0 +1,7 @@ + + + + + + + diff --git a/HeterogeneousCore/ROCmUtilities/bin/BuildFile.xml b/HeterogeneousCore/ROCmUtilities/bin/BuildFile.xml new file mode 100644 index 0000000000000..46fe21d388c3e --- /dev/null +++ b/HeterogeneousCore/ROCmUtilities/bin/BuildFile.xml @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/HeterogeneousCore/ROCmUtilities/bin/isRocmDeviceSupported.h b/HeterogeneousCore/ROCmUtilities/bin/isRocmDeviceSupported.h new file mode 100644 index 0000000000000..b345b13b032b3 --- /dev/null +++ b/HeterogeneousCore/ROCmUtilities/bin/isRocmDeviceSupported.h @@ -0,0 +1,6 @@ +#ifndef HeterogeneousCore_ROCmServices_bin_isRocmDeviceSupported_h +#define HeterogeneousCore_ROCmServices_bin_isRocmDeviceSupported_h + +bool isRocmDeviceSupported(int device); + +#endif // HeterogeneousCore_ROCmServices_bin_isRocmDeviceSupported_h diff --git a/HeterogeneousCore/ROCmUtilities/bin/isRocmDeviceSupported.hip.cc b/HeterogeneousCore/ROCmUtilities/bin/isRocmDeviceSupported.hip.cc new file mode 100644 index 0000000000000..11e78bdff5f15 --- /dev/null +++ b/HeterogeneousCore/ROCmUtilities/bin/isRocmDeviceSupported.hip.cc @@ -0,0 +1,57 @@ +#include + +#include "isRocmDeviceSupported.h" + +namespace { + __global__ void setSupported(bool* result) { *result = true; } +} // namespace + +bool isRocmDeviceSupported(int device) { + bool supported = false; + bool* supported_d; + + // select the requested device - will fail if the index is invalid + hipError_t status = hipSetDevice(device); + if (status != hipSuccess) + return false; + + // allocate memory for the flag on the device + status = hipMalloc(&supported_d, sizeof(bool)); + if (status != hipSuccess) + return false; + + // initialise the flag on the device + status = hipMemset(supported_d, 0x00, sizeof(bool)); + if (status != hipSuccess) + return false; + + // try to set the flag on the device + setSupported<<<1, 1>>>(supported_d); + + // check for an eventual error from launching the kernel on an unsupported device + status = hipGetLastError(); + if (status != hipSuccess) + return false; + + // wait for the kernelto run + status = hipDeviceSynchronize(); + if (status != hipSuccess) + return false; + + // copy the flag back to the host + status = hipMemcpy(&supported, supported_d, sizeof(bool), hipMemcpyDeviceToHost); + if (status != hipSuccess) + return false; + + // free the device memory + status = hipFree(supported_d); + if (status != hipSuccess) + return false; + + // reset the device + status = hipDeviceReset(); + if (status != hipSuccess) + return false; + + return supported; +} diff --git a/HeterogeneousCore/ROCmUtilities/bin/rocmComputeCapabilities.cpp b/HeterogeneousCore/ROCmUtilities/bin/rocmComputeCapabilities.cpp new file mode 100644 index 0000000000000..06a1ce18fdbb6 --- /dev/null +++ b/HeterogeneousCore/ROCmUtilities/bin/rocmComputeCapabilities.cpp @@ -0,0 +1,34 @@ +// C/C++ standard headers +#include +#include +#include + +// ROCm headers +#include + +// CMSSW headers +#include "HeterogeneousCore/ROCmUtilities/interface/hipCheck.h" +#include "isRocmDeviceSupported.h" + +int main() { + int devices = 0; + hipError_t status = hipGetDeviceCount(&devices); + if (status != hipSuccess) { + std::cerr << "rocmComputeCapabilities: " << hipGetErrorString(status) << std::endl; + return EXIT_FAILURE; + } + + for (int i = 0; i < devices; ++i) { + hipDeviceProp_t properties; + hipCheck(hipGetDeviceProperties(&properties, i)); + std::stringstream arch; + arch << "gfx" << properties.gcnArch; + std::cout << std::setw(4) << i << " " << std::setw(8) << arch.str() << " " << properties.name; + if (not isRocmDeviceSupported(i)) { + std::cout << " (unsupported)"; + } + std::cout << std::endl; + } + + return EXIT_SUCCESS; +} diff --git a/HeterogeneousCore/ROCmUtilities/bin/rocmComputeCapabilities_fallback.cpp b/HeterogeneousCore/ROCmUtilities/bin/rocmComputeCapabilities_fallback.cpp new file mode 100644 index 0000000000000..e754a7187b49e --- /dev/null +++ b/HeterogeneousCore/ROCmUtilities/bin/rocmComputeCapabilities_fallback.cpp @@ -0,0 +1,9 @@ +// C/C++ headers +#include +#include + +// always returns EXIT_FAILURE +int main() { + std::cerr << "rocmComputeCapabilities: ROCm is not supported on this architecture" << std::endl; + return EXIT_FAILURE; +} diff --git a/HeterogeneousCore/ROCmUtilities/bin/rocmIsEnabled.cpp b/HeterogeneousCore/ROCmUtilities/bin/rocmIsEnabled.cpp new file mode 100644 index 0000000000000..9fb97efbcc745 --- /dev/null +++ b/HeterogeneousCore/ROCmUtilities/bin/rocmIsEnabled.cpp @@ -0,0 +1,26 @@ +// C/C++ headers +#include + +// ROCm headers +#include + +// local headers +#include "isRocmDeviceSupported.h" + +// returns EXIT_SUCCESS if at least one visible ROCm device can be used, or EXIT_FAILURE otherwise +int main() { + int devices = 0; + auto status = hipGetDeviceCount(&devices); + if (status != hipSuccess) { + return EXIT_FAILURE; + } + + // check that at least one visible ROCm device can be used + for (int i = 0; i < devices; ++i) { + if (isRocmDeviceSupported(i)) + return EXIT_SUCCESS; + } + + // no visible usable devices + return EXIT_FAILURE; +} diff --git a/HeterogeneousCore/ROCmUtilities/bin/rocmIsEnabled_fallback.cpp b/HeterogeneousCore/ROCmUtilities/bin/rocmIsEnabled_fallback.cpp new file mode 100644 index 0000000000000..92785c12af592 --- /dev/null +++ b/HeterogeneousCore/ROCmUtilities/bin/rocmIsEnabled_fallback.cpp @@ -0,0 +1,5 @@ +// C/C++ headers +#include + +// always returns EXIT_FAILURE +int main() { return EXIT_FAILURE; } diff --git a/HeterogeneousCore/ROCmUtilities/interface/hipCheck.h b/HeterogeneousCore/ROCmUtilities/interface/hipCheck.h new file mode 100644 index 0000000000000..a376194bcb470 --- /dev/null +++ b/HeterogeneousCore/ROCmUtilities/interface/hipCheck.h @@ -0,0 +1,54 @@ +#ifndef HeterogeneousCore_ROCmUtilities_hipCheck_h +#define HeterogeneousCore_ROCmUtilities_hipCheck_h + +// C++ standard headers +#include +#include +#include +#include +#include + +// ROCm headers +#include + +// CMSSW headers +#include "FWCore/Utilities/interface/Likely.h" + +namespace cms { + namespace rocm { + + [[noreturn]] inline void abortOnError(const char* file, + int line, + const char* cmd, + const char* error, + const char* message, + std::string_view description = std::string_view()) { + std::ostringstream out; + out << "\n"; + out << file << ", line " << line << ":\n"; + out << "hipCheck(" << cmd << ");\n"; + out << error << ": " << message << "\n"; + if (!description.empty()) + out << description << "\n"; + throw std::runtime_error(out.str()); + } + + inline bool hipCheck_(const char* file, + int line, + const char* cmd, + hipError_t result, + std::string_view description = std::string_view()) { + if (LIKELY(result == hipSuccess)) + return true; + + const char* error = hipGetErrorName(result); + const char* message = hipGetErrorString(result); + abortOnError(file, line, cmd, error, message, description); + return false; + } + } // namespace rocm +} // namespace cms + +#define hipCheck(ARG, ...) (cms::rocm::hipCheck_(__FILE__, __LINE__, #ARG, (ARG), ##__VA_ARGS__)) + +#endif // HeterogeneousCore_ROCmUtilities_hipCheck_h diff --git a/HeterogeneousCore/ROCmUtilities/interface/requireDevices.h b/HeterogeneousCore/ROCmUtilities/interface/requireDevices.h new file mode 100644 index 0000000000000..d09e262f861ec --- /dev/null +++ b/HeterogeneousCore/ROCmUtilities/interface/requireDevices.h @@ -0,0 +1,19 @@ +#ifndef HeterogeneousCore_ROCmUtilities_interface_requireDevices_h +#define HeterogeneousCore_ROCmUtilities_interface_requireDevices_h + +/** + * These functions are meant to be called only from unit tests. + */ +namespace cms { + namespace rocmtest { + + /// In presence of ROCm devices, return true; otherwise print message and return false + bool testDevices(); + + /// Print message and exit if there are no ROCm devices + void requireDevices(); + + } // namespace rocmtest +} // namespace cms + +#endif // HeterogeneousCore_ROCmUtilities_interface_requireDevices_h diff --git a/HeterogeneousCore/ROCmUtilities/src/requireDevices.cc b/HeterogeneousCore/ROCmUtilities/src/requireDevices.cc new file mode 100644 index 0000000000000..b62d48d383d07 --- /dev/null +++ b/HeterogeneousCore/ROCmUtilities/src/requireDevices.cc @@ -0,0 +1,30 @@ +#include +#include + +#include + +#include "HeterogeneousCore/ROCmUtilities/interface/requireDevices.h" + +namespace cms::rocmtest { + + bool testDevices() { + int devices = 0; + auto status = hipGetDeviceCount(&devices); + if (status != hipSuccess) { + std::cerr << "Failed to initialise the ROCm runtime, the test will be skipped.\n"; + return false; + } + if (devices == 0) { + std::cerr << "No ROCm devices available, the test will be skipped.\n"; + return false; + } + return true; + } + + void requireDevices() { + if (not testDevices()) { + exit(EXIT_SUCCESS); + } + } + +} // namespace cms::rocmtest diff --git a/HeterogeneousCore/ROCmUtilities/test/BuildFile.xml b/HeterogeneousCore/ROCmUtilities/test/BuildFile.xml new file mode 100644 index 0000000000000..516b14b94cbd9 --- /dev/null +++ b/HeterogeneousCore/ROCmUtilities/test/BuildFile.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + diff --git a/HeterogeneousCore/ROCmUtilities/test/testHipCheck.cpp b/HeterogeneousCore/ROCmUtilities/test/testHipCheck.cpp new file mode 100644 index 0000000000000..b08d5213d07ef --- /dev/null +++ b/HeterogeneousCore/ROCmUtilities/test/testHipCheck.cpp @@ -0,0 +1,16 @@ +// Catch2 headers +#define CATCH_CONFIG_MAIN +#include + +// ROCm headers +#include + +// CMSSW headers +#include "HeterogeneousCore/ROCmUtilities/interface/hipCheck.h" + +TEST_CASE("HeterogeneousCore/ROCmUtilities testHipCheck", "[testHipCheck]") { + SECTION("Test hipCheck() API") { + REQUIRE_NOTHROW(hipCheck(hipSuccess)); + REQUIRE_THROWS(hipCheck(hipErrorUnknown)); + } +} diff --git a/HeterogeneousCore/ROCmUtilities/test/testRequireROCmDevices.cpp b/HeterogeneousCore/ROCmUtilities/test/testRequireROCmDevices.cpp new file mode 100644 index 0000000000000..edc2eff9672ea --- /dev/null +++ b/HeterogeneousCore/ROCmUtilities/test/testRequireROCmDevices.cpp @@ -0,0 +1,21 @@ +// Catch2 headers +#define CATCH_CONFIG_MAIN +#include + +// ROCm headers +#include + +// CMSSW headers +#include "HeterogeneousCore/ROCmUtilities/interface/hipCheck.h" +#include "HeterogeneousCore/ROCmUtilities/interface/requireDevices.h" + +TEST_CASE("HeterogeneousCore/ROCmUtilities testRequireROCmDevices", "[testRequireROCmDevices]") { + SECTION("Test requireDevices()") { + cms::rocmtest::requireDevices(); + + int devices = 0; + hipCheck(hipGetDeviceCount(&devices)); + + REQUIRE(devices > 0); + } +}