From 0ae41a2a6e98a7957ff626d5414bccb68c9fee06 Mon Sep 17 00:00:00 2001
From: Pau Ruiz Safont <psafont@users.noreply.github.com>
Date: Thu, 1 Dec 2022 13:16:56 +0000
Subject: [PATCH] CP-33044 replace gpumon shutdown with NVML detach/attach

We want to avoid stopping gpumon while starting VMs with vGPUs because
it limits scalabiity and a daemon should run continiously. Use the
recently added capability to detach/attach the NVML library instead.

Introduce feature flag nvidia-gpumon-detach; when true, we use the new
attach/detach feature to keep gpumon running over the start of a VM.

This does not yet work with all GPUs but it would help cross-team
development to make this code available.

Signed-off-by: Christian Lindig <christian.lindig@cloud.com>
---
 ocaml/xapi/xapi_globs.ml  |  9 +++++++++
 ocaml/xapi/xapi_gpumon.ml | 33 ++++++++++++++++++++++++++++++++-
 2 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml
index 62e373590e9..3bfd8af9a1d 100644
--- a/ocaml/xapi/xapi_globs.ml
+++ b/ocaml/xapi/xapi_globs.ml
@@ -1159,6 +1159,10 @@ type nvidia_t4_sriov = Nvidia_T4_SRIOV | Nvidia_LEGACY | Nvidia_DEFAULT
 
 let nvidia_t4_sriov = ref Nvidia_DEFAULT
 
+(** CP-41126. true - we are detaching the NVML library in gpumon; false -
+    we stop gpumon. *)
+let nvidia_gpumon_detach = ref false
+
 let failed_login_alert_freq = ref 3600
 
 let other_options =
@@ -1470,6 +1474,11 @@ let other_options =
     , (fun () -> string_of_int !max_observer_file_size)
     , "The maximum size of log files for saving spans"
     )
+  ; ( "nvidia-gpumon-detach"
+    , Arg.Set nvidia_gpumon_detach
+    , (fun () -> string_of_bool !nvidia_gpumon_detach)
+    , "On VM start, detach the NVML library rather than stopping gpumon"
+    )
   ]
 
 (* The options can be set with the variable xapiflags in /etc/sysconfig/xapi.
diff --git a/ocaml/xapi/xapi_gpumon.ml b/ocaml/xapi/xapi_gpumon.ml
index 384ab68f822..fa71fce96b5 100644
--- a/ocaml/xapi/xapi_gpumon.ml
+++ b/ocaml/xapi/xapi_gpumon.ml
@@ -18,6 +18,8 @@ open D
 
 let gpumon = "xcp-rrdd-gpumon"
 
+let with_lock = Xapi_stdext_threads.Threadext.Mutex.execute
+
 module Gpumon = Daemon_manager.Make (struct
   let check =
     Daemon_manager.Function
@@ -40,7 +42,36 @@ module Gpumon = Daemon_manager.Make (struct
     Xapi_systemctl.stop ~wait_until_success:false gpumon
 end)
 
-let with_gpumon_stopped = Gpumon.with_daemon_stopped
+let gpumon_m = Mutex.create ()
+
+let with_gpumon_stopped ?(timeout = 30.0) f =
+  match !Xapi_globs.nvidia_gpumon_detach with
+  | false ->
+      Gpumon.with_daemon_stopped ~timeout f
+  | true -> (
+      debug "%s: about to acquire lock" __FUNCTION__ ;
+      with_lock gpumon_m @@ fun () ->
+      let module GPU = Gpumon_client.Client.Nvidia in
+      match GPU.nvml_is_attached __FUNCTION__ with
+      | false ->
+          (* nothing to do, just execute f *)
+          debug "%s: NVML is detached; nothing to do" __FUNCTION__ ;
+          f ()
+      | true ->
+          (* detach, execute f, re-attach in any case. Be aware
+             that both xenopsd, xapi call /usr/lib/nvidia/sriov-manage,
+             which may stop stop gpumon *)
+          Fun.protect
+            (fun () ->
+              debug "%s: about to detach NVML" __FUNCTION__ ;
+              GPU.nvml_detach __FUNCTION__ ;
+              f ()
+            )
+            ~finally:(fun () ->
+              debug "%s: about to attach NVML" __FUNCTION__ ;
+              GPU.nvml_attach __FUNCTION__
+            )
+    )
 
 module Nvidia = struct
   let key = "nvidia"