diff --git a/src/variorum/Nvidia_GPU/Volta.c b/src/variorum/Nvidia_GPU/Volta.c index 9250b5474..32fe44141 100644 --- a/src/variorum/Nvidia_GPU/Volta.c +++ b/src/variorum/Nvidia_GPU/Volta.c @@ -218,3 +218,43 @@ int volta_get_power_json(json_t *get_power_obj) return 0; } +int volta_get_energy(int long_ver) +{ + char *val = getenv("VARIORUM_LOG"); + if (val != NULL && atoi(val) == 1) + { + printf("Running %s\n", __FUNCTION__); + } + + unsigned iter = 0; + unsigned nsockets = 0; +#ifdef VARIORUM_WITH_NVIDIA_GPU + variorum_get_topology(&nsockets, NULL, NULL, P_NVIDIA_GPU_IDX); +#endif + for (iter = 0; iter < nsockets; iter++) + { + nvidia_gpu_get_energy_data(iter, long_ver, stdout); + } + return 0; +} + +int volta_get_energy_json(json_t *get_energy_obj) +{ + char *val = getenv("VARIORUM_LOG"); + if (val != NULL && atoi(val) == 1) + { + printf("Running %s\n", __FUNCTION__); + } + + unsigned iter = 0; + unsigned nsockets; + variorum_get_topology(&nsockets, NULL, NULL, P_NVIDIA_GPU_IDX); + + for (iter = 0; iter < nsockets; iter++) + { + nvidia_gpu_get_energy_json(iter, get_energy_obj); + } + + return 0; +} + diff --git a/src/variorum/Nvidia_GPU/Volta.h b/src/variorum/Nvidia_GPU/Volta.h index ce3d7ccd6..384707033 100644 --- a/src/variorum/Nvidia_GPU/Volta.h +++ b/src/variorum/Nvidia_GPU/Volta.h @@ -48,4 +48,11 @@ int volta_get_gpu_utilization_json( char **get_gpu_util_obj_str ); +int volta_get_energy( + int long_ver +); + +int volta_get_energy_json( + json_t *get_energy_obj_str +); #endif diff --git a/src/variorum/Nvidia_GPU/config_nvidia.c b/src/variorum/Nvidia_GPU/config_nvidia.c index 4e98a6cbb..be34ce48a 100644 --- a/src/variorum/Nvidia_GPU/config_nvidia.c +++ b/src/variorum/Nvidia_GPU/config_nvidia.c @@ -38,6 +38,8 @@ int set_nvidia_func_ptrs(int idx) g_platform[idx].variorum_cap_each_gpu_power_limit = volta_cap_each_gpu_power_limit; g_platform[idx].variorum_get_power_json = volta_get_power_json; + g_platform[idx].variorum_print_energy = volta_get_energy; + g_platform[idx].variorum_get_energy_json = volta_get_energy_json; } else { diff --git a/src/variorum/Nvidia_GPU/nvidia_gpu_power_features.c b/src/variorum/Nvidia_GPU/nvidia_gpu_power_features.c index 3d5e9d5c3..f43da2a75 100644 --- a/src/variorum/Nvidia_GPU/nvidia_gpu_power_features.c +++ b/src/variorum/Nvidia_GPU/nvidia_gpu_power_features.c @@ -540,3 +540,105 @@ void nvidia_gpu_get_power_json(int chipid, json_t *get_power_obj) } +void nvidia_gpu_get_energy_data(int chipid, int verbose, FILE *output) +{ + unsigned long long energy; + double value = 0.0; + int d; + static int init_output = 0; + + //Iterate over all GPU device handles for this socket and print power + for (d = chipid * (int)m_gpus_per_socket; + d < (chipid + 1) * (int)m_gpus_per_socket; ++d) + { + nvmlDeviceGetTotalEnergyConsumption(m_unit_devices_file_desc[d], &energy); + // Convert from milliJoules to Joules + value = (double)energy * 0.001f; + + if (verbose) + { + + fprintf(output, "%s: %s, %s: %d, %s: %d, %s: %lf W\n", + "_NVIDIA_GPU_ENERGY_USAGE Host", m_hostname, + "Socket", chipid, + "DeviceID", d, "Energy", value); + } + else + { + if (!init_output) + { +#ifdef LIBJUSTIFY_FOUND + cfprintf(output, "%s %s %s %s %s\n", + "_NVIDIA_GPU_ENERGY_USAGE", "Host", + "Socket", "DeviceID", "Energy"); +#else + fprintf(output, "%s %s %s %s %s\n", + "_NVIDIA_GPU_ENERGY_USAGE", "Host", + "Socket", "DeviceID", "Energy"); +#endif + init_output = 1; + } +#ifdef LIBJUSTIFY_FOUND + cfprintf(output, "%s %s %d %d %lf\n", + "_NVIDIA_GPU_ENERGY_USAGE", m_hostname, chipid, d, value); +#else + fprintf(output, "%s %s %d %d %lf\n", + "_NVIDIA_GPU_ENERGY_USAGE", m_hostname, chipid, d, value); + +#endif + } + } +} + +void nvidia_gpu_get_energy_json(int chipid, json_t *get_energy_obj) +{ + unsigned long long gpu_energy; + double value = 0.0; + double total_gpu_energy = 0.0; + int d; + static size_t devIDlen = 24; // Long enough to avoid format truncation. + char devID[devIDlen]; + char socket_id[12]; + snprintf(socket_id, 12, "socket_%d", chipid); + + json_object_set_new(get_energy_obj, "num_gpus_per_socket", + json_integer(m_gpus_per_socket)); + + //try to find socket object in node object, set new object if not found + json_t *socket_obj = json_object_get(get_energy_obj, socket_id); + if (socket_obj == NULL) + { + socket_obj = json_object(); + json_object_set_new(get_energy_obj, socket_id, socket_obj); + } + + //create new json object for GPU + json_t *gpu_obj = json_object(); + json_object_set_new(socket_obj, "energy_gpu_joules", gpu_obj); + + for (d = chipid * (int)m_gpus_per_socket; + d < (chipid + 1) * (int)m_gpus_per_socket; ++d) + { + nvmlDeviceGetTotalEnergyConsumption(m_unit_devices_file_desc[d], &gpu_energy); + value = (double)gpu_energy * 0.001f; + snprintf(devID, devIDlen, "GPU_%d", d); + json_object_set_new(gpu_obj, devID, json_real(value)); + total_gpu_energy += value; + } + + // If we have an existing CPU object with power_node_watts, update its value. + // Except on IBM Power9 systems, as they report node power with PWRSYS + // directly. So we don't need to add in the GPU values separately. + +#ifndef VARIORUM_WITH_IBM_CPU + if (json_object_get(get_energy_obj, "energy_node_joules") != NULL) + { + double energy_node; + energy_node = json_real_value(json_object_get(get_energy_obj, + "energy_node_joules")); + json_object_set(get_energy_obj, "energy_node_joules", + json_real(energy_node + total_gpu_energy)); + } +#endif + +} diff --git a/src/variorum/Nvidia_GPU/nvidia_gpu_power_features.h b/src/variorum/Nvidia_GPU/nvidia_gpu_power_features.h index a824f4abc..34f801295 100644 --- a/src/variorum/Nvidia_GPU/nvidia_gpu_power_features.h +++ b/src/variorum/Nvidia_GPU/nvidia_gpu_power_features.h @@ -82,4 +82,15 @@ void nvidia_gpu_get_power_json( json_t *output ); +void nvidia_gpu_get_energy_data( + int chipid, + int verbose, + FILE *output +); + +void nvidia_gpu_get_energy_json( + int chipid, + json_t *output +); + #endif diff --git a/src/variorum/variorum.c b/src/variorum/variorum.c index 92edea46a..64ce713ee 100644 --- a/src/variorum/variorum.c +++ b/src/variorum/variorum.c @@ -1573,8 +1573,8 @@ int variorum_print_energy(void) { int err = 0; int i; - int has_cpu = 0; - int has_gpu = 0; + // int has_cpu = 0; + // int has_gpu = 0; err = variorum_enter(__FILE__, __FUNCTION__, __LINE__); if (err) { @@ -1585,41 +1585,43 @@ int variorum_print_energy(void) // If we have a CPU-only or CPU+GPU multi-platform build, we should print // the node-level energy. // First check if we have a CPU platform, then check for a GPU platform - -#if defined(VARIORUM_WITH_INTEL_CPU) || defined(VARIORUM_WITH_AMD_CPU) || defined(VARIORUM_WITH_IBM_CPU) - has_cpu = 1; -#endif -#if defined(VARIORUM_WITH_NVIDIA_GPU) || defined(VARIORUM_WITH_AMD_GPU) || defined(VARIORUM_WITH_INTEL_GPU) - has_gpu = 1; -#endif - - // CPU-only or multi-platform build - if ((has_cpu && has_gpu) || (has_cpu)) + /* + #if defined(VARIORUM_WITH_INTEL_CPU) || defined(VARIORUM_WITH_AMD_CPU) || defined(VARIORUM_WITH_IBM_CPU) + has_cpu = 1; + #endif + #if defined(VARIORUM_WITH_NVIDIA_GPU) || defined(VARIORUM_WITH_AMD_GPU) || defined(VARIORUM_WITH_INTEL_GPU) + has_gpu = 1; + #endif + + // CPU-only or multi-platform build + if ((has_cpu && has_gpu) || (has_cpu)) + { + */ + for (i = 0; i < P_NUM_PLATFORMS; i++) { - for (i = 0; i < P_NUM_PLATFORMS; i++) + if (g_platform[i].variorum_print_energy == NULL) { - if (g_platform[i].variorum_print_energy == NULL) - { - variorum_error_handler("Feature not yet implemented or is not supported", - VARIORUM_ERROR_FEATURE_NOT_IMPLEMENTED, getenv("HOSTNAME"), __FILE__, - __FUNCTION__, __LINE__); - return 0; - } - err = g_platform[i].variorum_print_energy(0); - if (err) - { - return -1; - } + variorum_error_handler("Feature not yet implemented or is not supported", + VARIORUM_ERROR_FEATURE_NOT_IMPLEMENTED, getenv("HOSTNAME"), __FILE__, + __FUNCTION__, __LINE__); + return 0; + } + err = g_platform[i].variorum_print_energy(0); + if (err) + { + return -1; } } - else - { - // We have a GPU-only build, currently doesn't support get_energy - variorum_error_handler("Feature not yet implemented or is not supported", - VARIORUM_ERROR_FEATURE_NOT_IMPLEMENTED, getenv("HOSTNAME"), __FILE__, - __FUNCTION__, __LINE__); - return 0; - } + /* } + else + { + // We have a GPU-only build, currently doesn't support get_energy + variorum_error_handler("Feature not yet implemented or is not supported", + VARIORUM_ERROR_FEATURE_NOT_IMPLEMENTED, getenv("HOSTNAME"), __FILE__, + __FUNCTION__, __LINE__); + return 0; + } + */ err = variorum_exit(__FILE__, __FUNCTION__, __LINE__); if (err) @@ -1633,8 +1635,8 @@ int variorum_print_verbose_energy(void) { int err = 0; int i; - int has_cpu = 0; - int has_gpu = 0; + // int has_cpu = 0; + // int has_gpu = 0; err = variorum_enter(__FILE__, __FUNCTION__, __LINE__); if (err) { @@ -1645,41 +1647,42 @@ int variorum_print_verbose_energy(void) // If we have a CPU-only or CPU+GPU multi-platform build, we should print // the node-level energy. // First check if we have a CPU platform, then check for a GPU platform - -#if defined(VARIORUM_WITH_INTEL_CPU) || defined(VARIORUM_WITH_AMD_CPU) || defined(VARIORUM_WITH_IBM_CPU) - has_cpu = 1; -#endif -#if defined(VARIORUM_WITH_NVIDIA_GPU) || defined(VARIORUM_WITH_AMD_GPU) || defined(VARIORUM_WITH_INTEL_GPU) - has_gpu = 1; -#endif - - // CPU-only or multi-platform build - if ((has_cpu && has_gpu) || (has_cpu)) + /* + #if defined(VARIORUM_WITH_INTEL_CPU) || defined(VARIORUM_WITH_AMD_CPU) || defined(VARIORUM_WITH_IBM_CPU) + has_cpu = 1; + #endif + #if defined(VARIORUM_WITH_NVIDIA_GPU) || defined(VARIORUM_WITH_AMD_GPU) || defined(VARIORUM_WITH_INTEL_GPU) + has_gpu = 1; + #endif + + // CPU-only or multi-platform build + if ((has_cpu && has_gpu) || (has_cpu)) + { + */ + for (i = 0; i < P_NUM_PLATFORMS; i++) { - for (i = 0; i < P_NUM_PLATFORMS; i++) + if (g_platform[i].variorum_print_energy == NULL) { - if (g_platform[i].variorum_print_energy == NULL) - { - variorum_error_handler("Feature not yet implemented or is not supported", - VARIORUM_ERROR_FEATURE_NOT_IMPLEMENTED, getenv("HOSTNAME"), __FILE__, - __FUNCTION__, __LINE__); - return 0; - } - err = g_platform[i].variorum_print_energy(1); - if (err) - { - return -1; - } + variorum_error_handler("Feature not yet implemented or is not supported", + VARIORUM_ERROR_FEATURE_NOT_IMPLEMENTED, getenv("HOSTNAME"), __FILE__, + __FUNCTION__, __LINE__); + return 0; + } + err = g_platform[i].variorum_print_energy(1); + if (err) + { + return -1; } } - else - { - // We have a GPU-only build, currently doesn't support get_energy - variorum_error_handler("Feature not yet implemented or is not supported", - VARIORUM_ERROR_FEATURE_NOT_IMPLEMENTED, getenv("HOSTNAME"), __FILE__, - __FUNCTION__, __LINE__); - return 0; - } + /* } + else + { + // We have a GPU-only build, currently doesn't support get_energy + variorum_error_handler("Feature not yet implemented or is not supported", + VARIORUM_ERROR_FEATURE_NOT_IMPLEMENTED, getenv("HOSTNAME"), __FILE__, + __FUNCTION__, __LINE__); + return 0; + }*/ err = variorum_exit(__FILE__, __FUNCTION__, __LINE__); if (err) { @@ -1692,8 +1695,8 @@ int variorum_get_energy_json(char **get_energy_obj_str) { int err = 0; int i; - int has_cpu = 0; - int has_gpu = 0; + // int has_cpu = 0; + // int has_gpu = 0; char hostname[1024]; uint64_t ts; struct timeval tv; @@ -1717,44 +1720,46 @@ int variorum_get_energy_json(char **get_energy_obj_str) // If we have a CPU-only or CPU+GPU multi-platform build, we should print // the node-level energy. // First check if we have a CPU platform, then check for a GPU platform - -#if defined(VARIORUM_WITH_INTEL_CPU) || defined(VARIORUM_WITH_AMD_CPU) || defined(VARIORUM_WITH_IBM_CPU) - has_cpu = 1; -#endif -#if defined(VARIORUM_WITH_NVIDIA_GPU) || defined(VARIORUM_WITH_AMD_GPU) || defined(VARIORUM_WITH_INTEL_GPU) - has_gpu = 1; -#endif - - // CPU-only or multi-platform build - if ((has_cpu && has_gpu) || (has_cpu)) + /* + #if defined(VARIORUM_WITH_INTEL_CPU) || defined(VARIORUM_WITH_AMD_CPU) || defined(VARIORUM_WITH_IBM_CPU) + has_cpu = 1; + #endif + #if defined(VARIORUM_WITH_NVIDIA_GPU) || defined(VARIORUM_WITH_AMD_GPU) || defined(VARIORUM_WITH_INTEL_GPU) + has_gpu = 1; + #endif + + // CPU-only or multi-platform build + if ((has_cpu && has_gpu) || (has_cpu)) + { + */ + for (i = 0; i < P_NUM_PLATFORMS; i++) { - for (i = 0; i < P_NUM_PLATFORMS; i++) + if (g_platform[i].variorum_get_energy_json == NULL) { - if (g_platform[i].variorum_get_energy_json == NULL) - { - variorum_error_handler("Feature not yet implemented or is not supported", - VARIORUM_ERROR_FEATURE_NOT_IMPLEMENTED, - getenv("HOSTNAME"), __FILE__, - __FUNCTION__, __LINE__); - return 0; - } - err = g_platform[i].variorum_get_energy_json(node_obj); - if (err) - { - printf("Error with variorum get frequency json platform %d\n", i); - } - *get_energy_obj_str = json_dumps(get_energy_obj, JSON_INDENT(4)); + variorum_error_handler("Feature not yet implemented or is not supported", + VARIORUM_ERROR_FEATURE_NOT_IMPLEMENTED, + getenv("HOSTNAME"), __FILE__, + __FUNCTION__, __LINE__); + return 0; + } + err = g_platform[i].variorum_get_energy_json(node_obj); + if (err) + { + printf("Error with variorum get frequency json platform %d\n", i); } - } - else - { - // We have a GPU-only build, currently doesn't support get_energy - variorum_error_handler("Feature not yet implemented or is not supported", - VARIORUM_ERROR_FEATURE_NOT_IMPLEMENTED, getenv("HOSTNAME"), __FILE__, - __FUNCTION__, __LINE__); *get_energy_obj_str = json_dumps(get_energy_obj, JSON_INDENT(4)); - return 0; } + /* } + else + { + // We have a GPU-only build, currently doesn't support get_energy + variorum_error_handler("Feature not yet implemented or is not supported", + VARIORUM_ERROR_FEATURE_NOT_IMPLEMENTED, getenv("HOSTNAME"), __FILE__, + __FUNCTION__, __LINE__); + *get_energy_obj_str = json_dumps(get_energy_obj, JSON_INDENT(4)); + return 0; + } + */ json_decref(get_energy_obj);