diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index f3f7e00a69f..580b27f6288 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -9,7 +9,7 @@ on: merge_group: concurrency: # On new push, cancel old workflows from the same PR, branch or tag: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + group: ${{ github.workflow }}-${{github.event_name}}-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true jobs: diff --git a/.github/workflows/other.yml b/.github/workflows/other.yml index 52c73729594..d6ad9c849a6 100644 --- a/.github/workflows/other.yml +++ b/.github/workflows/other.yml @@ -9,7 +9,7 @@ on: merge_group: concurrency: # On new push, cancel old workflows from the same PR, branch or tag: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + group: ${{ github.workflow }}-${{github.event_name}}-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true jobs: diff --git a/.github/workflows/shellcheck.yaml b/.github/workflows/shellcheck.yaml index 8be332ada23..b078eaba549 100644 --- a/.github/workflows/shellcheck.yaml +++ b/.github/workflows/shellcheck.yaml @@ -5,7 +5,7 @@ on: merge_group: concurrency: # On new push, cancel old workflows from the same PR, branch or tag: - group: sc-${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + group: sc-${{ github.workflow }}-${{github.event_name}}-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true jobs: diff --git a/ocaml/libs/tracing/tracing.ml b/ocaml/libs/tracing/tracing.ml index 22d1e942288..ab097253dcb 100644 --- a/ocaml/libs/tracing/tracing.ml +++ b/ocaml/libs/tracing/tracing.ml @@ -18,6 +18,14 @@ open D let fail fmt = Printf.ksprintf failwith fmt +let failures = Atomic.make 0 + +let not_throttled () = + let old = Atomic.fetch_and_add failures 1 in + old < 2 + +let reset_throttled () = Atomic.set failures 0 + module W3CBaggage = struct module Key = struct let is_valid_key str = @@ -86,12 +94,6 @@ let validate_attribute (key, value) = && Re.execp attribute_key_regex key && W3CBaggage.Key.is_valid_key key -let observe = Atomic.make false - -let set_observe mode = Atomic.set observe mode - -let get_observe () = Atomic.get observe - module SpanKind = struct type t = Server | Consumer | Client | Producer | Internal [@@deriving rpcty] @@ -133,6 +135,10 @@ end module Attributes = struct include Map.Make (String) + let merge_element map (key, value) = add key value map + + let merge_into into list = List.fold_left merge_element into list + let of_list list = List.to_seq list |> of_seq let to_assoc_list attr = to_seq attr |> List.of_seq @@ -148,18 +154,80 @@ module SpanEvent = struct type t = {name: string; time: float; attributes: string Attributes.t} end +module Trace_id : sig + type t + + val make : unit -> t + + val compare : t -> t -> int + + val of_string : string -> t + + val to_string : t -> string +end = struct + type t = int64 * int64 + + let make () = (Random.bits64 (), Random.bits64 ()) + + let of_string s = + try Scanf.sscanf s "%016Lx%016Lx" (fun a b -> (a, b)) + with e -> + D.debug "Failed to parse trace id %s: %s" s (Printexc.to_string e) ; + (* don't cause XAPI to fail *) + (0L, 0L) + + let to_string (a, b) = Printf.sprintf "%016Lx%016Lx" a b + + let compare (a1, a2) (b1, b2) = + match Int64.compare a1 b1 with 0 -> Int64.compare a2 b2 | n -> n +end + +module Span_id : sig + type t + + val make : unit -> t + + val compare : t -> t -> int + + val of_string : string -> t + + val to_string : t -> string +end = struct + type t = int64 + + let make = Random.bits64 + + let of_string s = + try Scanf.sscanf s "%Lx" Fun.id + with e -> + D.debug "Failed to parse span id %s: %s" s (Printexc.to_string e) ; + (* don't cause XAPI to fail *) + 0L + + let to_string = Printf.sprintf "%016Lx" + + let compare = Int64.compare +end + module SpanContext = struct - type t = {trace_id: string; span_id: string} [@@deriving rpcty] + type t = {trace_id: Trace_id.t; span_id: Span_id.t} [@@deriving rpcty] let context trace_id span_id = {trace_id; span_id} - let to_traceparent t = Printf.sprintf "00-%s-%s-01" t.trace_id t.span_id + let to_traceparent t = + Printf.sprintf "00-%s-%s-01" + (Trace_id.to_string t.trace_id) + (Span_id.to_string t.span_id) let of_traceparent traceparent = let elements = String.split_on_char '-' traceparent in match elements with | ["00"; trace_id; span_id; _] -> - Some {trace_id; span_id} + Some + { + trace_id= Trace_id.of_string trace_id + ; span_id= Span_id.of_string span_id + } | _ -> None @@ -195,17 +263,15 @@ module Span = struct let get_context t = t.context - let generate_id n = String.init n (fun _ -> "0123456789abcdef".[Random.int 16]) - let start ?(attributes = Attributes.empty) ~name ~parent ~span_kind () = let trace_id = match parent with | None -> - generate_id 32 + Trace_id.make () | Some span_parent -> span_parent.context.trace_id in - let span_id = generate_id 16 in + let span_id = Span_id.make () in let context : SpanContext.t = {trace_id; span_id} in (* Using gettimeofday over Mtime as it is better for sharing timestamps between the systems *) let begin_time = Unix.gettimeofday () in @@ -303,16 +369,25 @@ module Span = struct span end -module Spans = struct - let lock = Mutex.create () - - let spans = Hashtbl.create 100 +module TraceMap = Map.Make (Trace_id) +module SpanMap = Map.Make (Span_id) - let span_count () = - Xapi_stdext_threads.Threadext.Mutex.execute lock (fun () -> - Hashtbl.length spans +module Spans = struct + let spans = Atomic.make TraceMap.empty + + let rec update_spans f arg = + let old = Atomic.get spans in + let next = f old arg in + if Atomic.compare_and_set spans old next then + () + else ( + (* TODO: should use Kcas.update, or Saturn skip_lists for domains *) + Thread.yield () ; + (update_spans [@tailcall]) f arg ) + let span_count () = TraceMap.cardinal (Atomic.get spans) + let max_spans = Atomic.make 2500 let set_max_spans x = Atomic.set max_spans x @@ -321,140 +396,117 @@ module Spans = struct let set_max_traces x = Atomic.set max_traces x - let finished_spans = Hashtbl.create 100 + let finished_spans = Atomic.make ([], 0) - let span_hashtbl_is_empty () = - Xapi_stdext_threads.Threadext.Mutex.execute lock (fun () -> - Hashtbl.length spans = 0 - ) + let span_hashtbl_is_empty () = TraceMap.is_empty (Atomic.get spans) - let finished_span_hashtbl_is_empty () = - Xapi_stdext_threads.Threadext.Mutex.execute lock (fun () -> - Hashtbl.length finished_spans = 0 - ) + let finished_span_hashtbl_is_empty () = Atomic.get finished_spans |> snd = 0 - let add_to_spans ~(span : Span.t) = + let add_to_spans_unlocked spans (span : Span.t) = let key = span.context.trace_id in - Xapi_stdext_threads.Threadext.Mutex.execute lock (fun () -> - match Hashtbl.find_opt spans key with - | None -> - if Hashtbl.length spans < Atomic.get max_traces then - Hashtbl.add spans key [span] - else - debug "%s exceeded max traces when adding to span table" - __FUNCTION__ - | Some span_list -> - if List.length span_list < Atomic.get max_spans then - Hashtbl.replace spans key (span :: span_list) - else - debug "%s exceeded max traces when adding to span table" - __FUNCTION__ - ) + match TraceMap.find_opt key spans with + | None -> + if TraceMap.cardinal spans < Atomic.get max_traces then + TraceMap.add key (SpanMap.singleton span.context.span_id span) spans + else ( + if not_throttled () then + debug "%s exceeded max traces when adding to span table" + __FUNCTION__ ; + spans + ) + | Some span_list -> + if SpanMap.cardinal span_list < Atomic.get max_spans then + TraceMap.add key + (SpanMap.add span.context.span_id span span_list) + spans + else ( + if not_throttled () then + debug "%s exceeded max traces when adding to span table" + __FUNCTION__ ; + spans + ) - let remove_from_spans span = - let key = span.Span.context.trace_id in - Xapi_stdext_threads.Threadext.Mutex.execute lock (fun () -> - match Hashtbl.find_opt spans key with - | None -> - debug "%s span does not exist or already finished" __FUNCTION__ ; - None - | Some span_list -> - ( match - List.filter (fun x -> x.Span.context <> span.context) span_list - with - | [] -> - Hashtbl.remove spans key - | filtered_list -> - Hashtbl.replace spans key filtered_list - ) ; - Some span - ) + let add_to_spans ~span = update_spans add_to_spans_unlocked span - let add_to_finished span = + let remove_from_spans_unlocked spans span = let key = span.Span.context.trace_id in - Xapi_stdext_threads.Threadext.Mutex.execute lock (fun () -> - match Hashtbl.find_opt finished_spans key with - | None -> - if Hashtbl.length finished_spans < Atomic.get max_traces then - Hashtbl.add finished_spans key [span] - else - debug "%s exceeded max traces when adding to finished span table" - __FUNCTION__ - | Some span_list -> - if List.length span_list < Atomic.get max_spans then - Hashtbl.replace finished_spans key (span :: span_list) - else - debug "%s exceeded max traces when adding to finished span table" - __FUNCTION__ - ) + match TraceMap.find_opt key spans with + | None -> + if not_throttled () then + debug "%s span does not exist or already finished" __FUNCTION__ ; + spans + | Some span_list -> + let span_list = SpanMap.remove span.Span.context.span_id span_list in + if SpanMap.is_empty span_list then + TraceMap.remove key spans + else + TraceMap.add key span_list spans + + let remove_from_spans span = + update_spans remove_from_spans_unlocked span ; + Some span + + let rec add_to_finished span = + let ((spans, n) as old) = Atomic.get finished_spans in + if n < Atomic.get max_spans then + let next = (span :: spans, n + 1) in + if Atomic.compare_and_set finished_spans old next then + () + else ( + Thread.yield () ; + (add_to_finished [@tailcall]) span + ) + else if not_throttled () then + debug "%s exceeded max traces when adding to finished span table" + __FUNCTION__ let mark_finished span = Option.iter add_to_finished (remove_from_spans span) - let span_is_finished x = - match x with - | None -> - false - | Some (span : Span.t) -> - Xapi_stdext_threads.Threadext.Mutex.execute lock (fun () -> - match Hashtbl.find_opt finished_spans span.context.trace_id with - | None -> - false - | Some span_list -> - List.mem span span_list - ) + let empty_finished = ([], 0) (** since copies the existing finished spans and then clears the existing spans as to only export them once *) let since () = - Xapi_stdext_threads.Threadext.Mutex.execute lock (fun () -> - let copy = Hashtbl.copy finished_spans in - Hashtbl.clear finished_spans ; - copy - ) + let copy = Atomic.exchange finished_spans empty_finished in + reset_throttled () ; copy - let dump () = - Xapi_stdext_threads.Threadext.Mutex.execute lock (fun () -> - Hashtbl.(copy spans, Hashtbl.copy finished_spans) - ) + let dump () = (Atomic.get spans, Atomic.get finished_spans) module GC = struct - let lock = Mutex.create () - let span_timeout = Atomic.make 86400. (* one day in seconds *) let span_timeout_thread = ref None - let gc_inactive_spans () = - Xapi_stdext_threads.Threadext.Mutex.execute lock (fun () -> - Hashtbl.filter_map_inplace - (fun _ spanlist -> - let filtered = - List.filter_map - (fun span -> - let elapsed = - Unix.gettimeofday () -. span.Span.begin_time - in - if elapsed > Atomic.get span_timeout *. 1000000. then ( - debug "Tracing: Span %s timed out, forcibly finishing now" - span.Span.context.span_id ; - let span = - Span.finish ~span - ~attributes: - (Attributes.singleton "gc_inactive_span_timeout" - (string_of_float elapsed) - ) - () - in - add_to_finished span ; None - ) else - Some span - ) - spanlist - in - match filtered with [] -> None | spans -> Some spans - ) - spans - ) + let gc_inactive_spans_unlocked spans () = + TraceMap.filter_map + (fun _ spanlist -> + let filtered = + SpanMap.filter_map + (fun _ span -> + let elapsed = Unix.gettimeofday () -. span.Span.begin_time in + if elapsed > Atomic.get span_timeout *. 1000000. then ( + if not_throttled () then + debug "Tracing: Span %s timed out, forcibly finishing now" + (Span_id.to_string span.Span.context.span_id) ; + let span = + Span.finish ~span + ~attributes: + (Attributes.singleton "gc_inactive_span_timeout" + (string_of_float elapsed) + ) + () + in + add_to_finished span ; None + ) else + Some span + ) + spanlist + in + if SpanMap.is_empty filtered then None else Some filtered + ) + spans + + let gc_inactive_spans () = update_spans gc_inactive_spans_unlocked () let initialise_thread ~timeout = Atomic.set span_timeout timeout ; @@ -481,6 +533,18 @@ module TracerProvider = struct ; enabled: bool } + let no_op = + { + name_label= "" + ; attributes= Attributes.empty + ; endpoints= [] + ; enabled= false + } + + let current = Atomic.make no_op + + let get_current () = Atomic.get current + let get_name_label t = t.name_label let get_attributes t = Attributes.to_assoc_list t.attributes @@ -510,7 +574,7 @@ module TracerProvider = struct might not be aware that a TracerProvider has already been created.*) error "Tracing : TracerProvider %s already exists" name_label ) ; - if enabled then set_observe true + if enabled then Atomic.set current provider ) let get_tracer_providers_unlocked () = @@ -520,6 +584,16 @@ module TracerProvider = struct Xapi_stdext_threads.Threadext.Mutex.execute lock get_tracer_providers_unlocked + let update_providers_unlocked () = + let providers = get_tracer_providers_unlocked () in + match List.find_opt (fun provider -> provider.enabled) providers with + | None -> + Atomic.set current no_op ; + Atomic.set Spans.spans TraceMap.empty ; + Atomic.set Spans.finished_spans Spans.empty_finished + | Some enabled -> + Atomic.set current enabled + let set ?enabled ?attributes ?endpoints ~uuid () = let update_provider (provider : t) enabled attributes endpoints = let enabled = Option.value ~default:provider.enabled enabled in @@ -544,58 +618,22 @@ module TracerProvider = struct fail "The TracerProvider : %s does not exist" uuid in Hashtbl.replace tracer_providers uuid provider ; - if - List.for_all - (fun provider -> not provider.enabled) - (get_tracer_providers_unlocked ()) - then ( - set_observe false ; - Xapi_stdext_threads.Threadext.Mutex.execute Spans.lock (fun () -> - Hashtbl.clear Spans.spans ; - Hashtbl.clear Spans.finished_spans - ) - ) else - set_observe true + update_providers_unlocked () ) let destroy ~uuid = Xapi_stdext_threads.Threadext.Mutex.execute lock (fun () -> let _ = Hashtbl.remove tracer_providers uuid in - if Hashtbl.length tracer_providers = 0 then set_observe false else () + update_providers_unlocked () ) end -module Tracer = struct - type t = {_name: string; provider: TracerProvider.t} - - let create ~name ~provider = {_name= name; provider} - - let no_op = - let provider : TracerProvider.t = - { - name_label= "" - ; attributes= Attributes.empty - ; endpoints= [] - ; enabled= false - } - in - {_name= ""; provider} +let get_observe () = TracerProvider.(get_current ()).enabled - let get_tracer ~name = - if Atomic.get observe then ( - let providers = - Xapi_stdext_threads.Threadext.Mutex.execute TracerProvider.lock - TracerProvider.get_tracer_providers_unlocked - in +module Tracer = struct + type t = TracerProvider.t - match List.find_opt TracerProvider.get_enabled providers with - | Some provider -> - create ~name ~provider - | None -> - warn "No provider found for tracing %s" name ; - no_op - ) else - no_op + let get_tracer ~name:_ = TracerProvider.get_current () let span_of_span_context context name : Span.t = { @@ -613,21 +651,17 @@ module Tracer = struct let start ~tracer:t ?(attributes = []) ?(span_kind = SpanKind.Internal) ~name ~parent () : (Span.t option, exn) result = - (* Do not start span if the TracerProvider is diabled*) - if not t.provider.enabled then + let open TracerProvider in + (* Do not start span if the TracerProvider is disabled*) + if not t.enabled then ok_none else - let attributes = Attributes.of_list attributes in - let attributes = - Attributes.union - (fun _k a _b -> Some a) - attributes t.provider.attributes - in + let attributes = Attributes.merge_into t.attributes attributes in let span = Span.start ~attributes ~name ~parent ~span_kind () in Spans.add_to_spans ~span ; Ok (Some span) let update_span_with_parent span (parent : Span.t option) = - if Atomic.get observe then + if (TracerProvider.get_current ()).enabled then match parent with | None -> Some span @@ -667,8 +701,6 @@ module Tracer = struct span ) - let span_is_finished x = Spans.span_is_finished x - let span_hashtbl_is_empty () = Spans.span_hashtbl_is_empty () let finished_span_hashtbl_is_empty () = @@ -679,8 +711,8 @@ let enable_span_garbage_collector ?(timeout = 86400.) () = Spans.GC.initialise_thread ~timeout let with_tracing ?(attributes = []) ?(parent = None) ~name f = - if Atomic.get observe then ( - let tracer = Tracer.get_tracer ~name in + let tracer = Tracer.get_tracer ~name in + if tracer.enabled then ( match Tracer.start ~tracer ~attributes ~name ~parent () with | Ok span -> ( try diff --git a/ocaml/libs/tracing/tracing.mli b/ocaml/libs/tracing/tracing.mli index 42b700ebb51..e78153c9790 100644 --- a/ocaml/libs/tracing/tracing.mli +++ b/ocaml/libs/tracing/tracing.mli @@ -54,18 +54,40 @@ module SpanEvent : sig type t = {name: string; time: float; attributes: string Attributes.t} end -module SpanContext : sig +module Span_id : sig type t - val context : string -> string -> t + val make : unit -> t + + val compare : t -> t -> int + + val of_string : string -> t + + val to_string : t -> string +end + +module Trace_id : sig + type t + + val make : unit -> t + + val compare : t -> t -> int + + val of_string : string -> t + + val to_string : t -> string +end + +module SpanContext : sig + type t val to_traceparent : t -> string val of_traceparent : string -> t option - val trace_id_of_span_context : t -> string + val trace_id_of_span_context : t -> Trace_id.t - val span_id_of_span_context : t -> string + val span_id_of_span_context : t -> Span_id.t end module Span : sig @@ -98,6 +120,10 @@ module Span : sig val get_attributes : t -> (string * string) list end +module TraceMap : module type of Map.Make (Trace_id) + +module SpanMap : module type of Map.Make (Span_id) + module Spans : sig val set_max_spans : int -> unit @@ -105,10 +131,9 @@ module Spans : sig val span_count : unit -> int - val since : unit -> (string, Span.t list) Hashtbl.t + val since : unit -> Span.t list * int - val dump : - unit -> (string, Span.t list) Hashtbl.t * (string, Span.t list) Hashtbl.t + val dump : unit -> Span.t SpanMap.t TraceMap.t * (Span.t list * int) end module Tracer : sig @@ -140,8 +165,6 @@ module Tracer : sig val finish : ?error:exn * string -> Span.t option -> (Span.t option, exn) result - val span_is_finished : Span.t option -> bool - val span_hashtbl_is_empty : unit -> bool val finished_span_hashtbl_is_empty : unit -> bool diff --git a/ocaml/libs/tracing/tracing_export.ml b/ocaml/libs/tracing/tracing_export.ml index 5bb154d20c2..43761cdde1c 100644 --- a/ocaml/libs/tracing/tracing_export.ml +++ b/ocaml/libs/tracing/tracing_export.ml @@ -83,13 +83,24 @@ module Content = struct ) in { - id= s |> Span.get_context |> SpanContext.span_id_of_span_context - ; traceId= s |> Span.get_context |> SpanContext.trace_id_of_span_context + id= + s + |> Span.get_context + |> SpanContext.span_id_of_span_context + |> Span_id.to_string + ; traceId= + s + |> Span.get_context + |> SpanContext.trace_id_of_span_context + |> Trace_id.to_string ; parentId= s |> Span.get_parent |> Option.map (fun x -> - x |> Span.get_context |> SpanContext.span_id_of_span_context + x + |> Span.get_context + |> SpanContext.span_id_of_span_context + |> Span_id.to_string ) ; name= s |> Span.get_name ; timestamp= int_of_float (Span.get_begin_time s *. 1000000.) @@ -248,9 +259,7 @@ module Destination = struct | Bugtool -> (file_export, "Tracing.File.export") in - let all_spans = - Hashtbl.fold (fun _ spans acc -> spans @ acc) traces [] - in + let all_spans, count = traces in let attributes = [ ("export.span.count", all_spans |> List.length |> string_of_int) @@ -258,9 +267,7 @@ module Destination = struct ; ( "xs.tracing.spans_table.count" , Spans.span_count () |> string_of_int ) - ; ( "xs.tracing.finished_spans_table.count" - , traces |> Hashtbl.length |> string_of_int - ) + ; ("xs.tracing.finished_spans_table.count", string_of_int count) ] in let@ _ = with_tracing ~parent ~attributes ~name in @@ -273,17 +280,15 @@ module Destination = struct debug "Tracing: unable to export span : %s" (Printexc.to_string exn) let flush_spans () = - let span_list = Spans.since () in - let attributes = - [("export.traces.count", Hashtbl.length span_list |> string_of_int)] - in + let ((_span_list, span_count) as span_info) = Spans.since () in + let attributes = [("export.traces.count", string_of_int span_count)] in let@ parent = with_tracing ~parent:None ~attributes ~name:"Tracing.flush_spans" in TracerProvider.get_tracer_providers () |> List.filter TracerProvider.get_enabled |> List.concat_map TracerProvider.get_endpoints - |> List.iter (export_to_endpoint parent span_list) + |> List.iter (export_to_endpoint parent span_info) let delay = Delay.make () diff --git a/ocaml/tests/bench/bechamel_simple_cli.ml b/ocaml/tests/bench/bechamel_simple_cli.ml new file mode 100644 index 00000000000..e40399cf04d --- /dev/null +++ b/ocaml/tests/bench/bechamel_simple_cli.ml @@ -0,0 +1,153 @@ +open Bechamel +open Toolkit + +(* Bechamel doesn't provide before/after hooks, just allocate/free, but those are done outside the place where + Bechamel checks for GC live words stabilization. +*) +let before_after ~before ~after ~get ~label ~unit = + let shared_state = Atomic.make None and called = Atomic.make 0 in + let module BeforeAfter = struct + type witness = int Atomic.t + + let make () = Atomic.make 0 + + let load t = Atomic.set t 0 + + let unload _ = () + + let label _ = label + + let unit _ = unit + + let get _ = + (* + We get added to the instances both at the beginning and the end, so we get called 4 times: + + get () - 0: None -> state := before () + time () + get () - 1 + + benchmark_loop () + + get () - 2 + time () + get () - 3, after state, state := None + + We want the time measurement to be as close to the benchmark loop as possible, + so we perform operations only on call 1 and 4 + *) + let phase = Atomic.fetch_and_add called 1 mod 4 in + let old = Atomic.get shared_state in + match (old, phase) with + | None, 0 -> + before () |> Option.some |> Atomic.set shared_state ; + 0. + | Some state, (1 | 2) -> + get state + | Some state, 3 -> + let r = get state in + Atomic.set shared_state None ; + after state ; + r + | None, _ -> + assert false + | Some _, _ -> + assert false + end in + let measure = Measure.register (module BeforeAfter) in + Measure.instance (module BeforeAfter) measure + +let skip_label = "workload" + +let thread_workload ~before ~run ~after = + let before () = + let state = before () + and stop = Atomic.make false + and loops = Atomic.make 0 in + let thread_worker () = + while not (Atomic.get stop) do + Sys.opaque_identity (run state : unit) ; + Atomic.incr loops + done + in + let t = Thread.create thread_worker () in + (state, stop, loops, t) + and after (state, stop, _loops, worker) = + Atomic.set stop true ; Thread.join worker ; after state + and get (_, _, loops, _) = Atomic.fetch_and_add loops 1 |> float_of_int in + before_after ~before ~after ~get ~label:skip_label ~unit:"loops" + +(* based on bechamel example code *) + +(* For very short benchmarks ensure that they get to run long enough to switch threads + a few times. + Bechamel has both an iteration count and time limit, so this won't be a problem for slower benchmarks. +*) +let limit = 10_000_000 + +let benchmark ~instances tests = + let cfg = Benchmark.cfg ~limit ~quota:(Time.second 10.0) () in + Benchmark.all cfg instances tests + +let analyze ~instances raw_results = + let ols ~bootstrap = + Analyze.ols ~bootstrap ~r_square:true ~predictors:[|Measure.run|] + in + let results = + List.map + (fun instance -> + let f bootstrap = Analyze.all (ols ~bootstrap) instance raw_results in + try f 3000 with _ -> f 0 + ) + instances + in + (Analyze.merge (ols ~bootstrap:3000) instances results, raw_results) + +open Notty_unix + +let img (window, results) = + Bechamel_notty.Multiple.image_of_ols_results ~rect:window + ~predictor:Measure.run results + |> eol + +let not_workload measure = not (Measure.label measure = skip_label) + +let run_and_print instances tests = + let results, _ = + tests + |> benchmark ~instances + |> analyze ~instances:(List.filter not_workload instances) + in + let window = + match winsize Unix.stdout with + | Some (w, h) -> + {Bechamel_notty.w; h} + | None -> + {Bechamel_notty.w= 80; h= 1} + in + img (window, results) |> eol |> output_image ; + results + |> Hashtbl.iter @@ fun label results -> + if label = Measure.label Instance.monotonic_clock then + let units = Bechamel_notty.Unit.unit_of_label label in + results + |> Hashtbl.iter @@ fun name ols -> + Format.printf "%s (%s):@, %a@." name units Analyze.OLS.pp ols + +let cli ?(always = []) ?(workloads = []) tests = + let instances = + always + @ Instance.[monotonic_clock; minor_allocated; major_allocated] + @ always + in + List.iter (fun i -> Bechamel_notty.Unit.add i (Measure.unit i)) instances ; + Format.printf "@,Running benchmarks (no workloads)@." ; + run_and_print instances tests ; + + if workloads <> [] then ( + Format.printf "@,Running benchmarks (workloads)@." ; + List.iter (fun i -> Bechamel_notty.Unit.add i (Measure.unit i)) workloads ; + (* workloads come first, so that we unpause them in time *) + let instances = workloads @ instances @ workloads in + run_and_print instances tests + ) diff --git a/ocaml/tests/bench/bench_tracing.ml b/ocaml/tests/bench/bench_tracing.ml new file mode 100644 index 00000000000..eebe6e6aef2 --- /dev/null +++ b/ocaml/tests/bench/bench_tracing.ml @@ -0,0 +1,87 @@ +open Bechamel + +let ( let@ ) f x = f x + +(* TODO: before *) + +let trace_test_inner span = + let@ span = + Tracing.with_child_trace + ~attributes:[("foo", "testing")] + span ~name:__FUNCTION__ + in + let@ _ = + Tracing.with_child_trace ~attributes:[("bar", "val")] span ~name:"test" + in + Sys.opaque_identity ignore () + +let trace_test_span _ = Tracing.with_tracing ~name:__FUNCTION__ trace_test_inner + +let trace_test_off _ = trace_test_inner None + +let uuid = "TEST" + +let export_thread = + (* need to ensure this isn't running outside the benchmarked section, + or bechamel might fail with 'Failed to stabilize GC' + *) + let after _ = Tracing_export.flush_and_exit () in + Bechamel_simple_cli.thread_workload ~before:Tracing_export.main ~after + ~run:ignore + +let workload1 = + Bechamel_simple_cli.thread_workload ~before:ignore ~after:ignore + ~run:trace_test_span + +let create_gc_work = + let a = Array.make 1_000 "" in + fun () -> + (* create work for the GC by continously creating a lot of short lived strings *) + Sys.opaque_identity (Array.iteri (fun i _ -> a.(i) <- String.make 2 'x') a) + +let workload2 = + Bechamel_simple_cli.thread_workload ~before:ignore ~after:ignore + ~run:create_gc_work + +let workloads = [workload1; workload2] + +let allocate () = + Tracing.TracerProvider.create ~enabled:true ~attributes:[] ~endpoints:[] + ~name_label:__MODULE__ ~uuid ; + Tracing_export.main () + +let free t = + Tracing.TracerProvider.destroy ~uuid ; + Tracing_export.flush_and_exit () ; + Thread.join t + +let test_tracing_on ?(overflow = false) ~name f = + let allocate () = + if overflow then ( + Tracing.Spans.set_max_spans 10 ; + Tracing.Spans.set_max_traces 10 + ) ; + allocate () + and free t = + if overflow then ( + Tracing.Spans.set_max_spans Bechamel_simple_cli.limit ; + Tracing.Spans.set_max_traces Bechamel_simple_cli.limit + ) ; + free t + in + Test.make_with_resource ~name ~allocate ~free Test.uniq f + +let benchmarks = + Tracing.Spans.set_max_spans Bechamel_simple_cli.limit ; + Tracing.Spans.set_max_traces Bechamel_simple_cli.limit ; + Test.make_grouped ~name:"tracing" + [ + Test.make ~name:"overhead(off)" (Staged.stage trace_test_off) + ; test_tracing_on ~name:"overhead(on, no span)" (Staged.stage trace_test_off) + ; test_tracing_on ~name:"overhead(on, create span)" + (Staged.stage trace_test_span) + ; test_tracing_on ~overflow:true ~name:"max span overflow" + (Staged.stage trace_test_span) + ] + +let () = Bechamel_simple_cli.cli ~always:[export_thread] ~workloads benchmarks diff --git a/ocaml/tests/bench/dune b/ocaml/tests/bench/dune new file mode 100644 index 00000000000..0d11700e285 --- /dev/null +++ b/ocaml/tests/bench/dune @@ -0,0 +1,4 @@ +(executable + (name bench_tracing) + (libraries tracing bechamel bechamel-notty notty.unix tracing_export threads.posix fmt notty) +) diff --git a/ocaml/tests/test_observer.ml b/ocaml/tests/test_observer.ml index 322c586cb20..7ea23a05939 100644 --- a/ocaml/tests/test_observer.ml +++ b/ocaml/tests/test_observer.ml @@ -385,19 +385,15 @@ let test_all_spans_finish () = let _ = List.map (fun span -> Tracer.finish span) trace_spans in let remaining_spans, finished_spans = Spans.dump () in let result = - Hashtbl.fold - (fun k v acc -> - Option.fold ~none:0 ~some:List.length (Hashtbl.find_opt finished_spans k) - = List.length v - && acc - ) + TraceMap.fold + (fun _k v acc -> snd finished_spans = SpanMap.cardinal v && acc) active_spans true in Alcotest.(check bool) "All spans that are finished are moved to finished_spans" true result ; Alcotest.(check int) "traces with no spans are removed from the hashtable" 0 - (Hashtbl.length remaining_spans) ; + (TraceMap.cardinal remaining_spans) ; test_destroy ~__context ~self () let test_hashtbl_leaks () = @@ -440,8 +436,8 @@ let test_hashtbl_leaks () = let _, finished_spans = Spans.dump () in let filtered_spans_count = finished_spans - |> Hashtbl.to_seq_values - |> Seq.concat_map List.to_seq + |> fst + |> List.to_seq |> Seq.filter filter_export_spans |> Seq.length in @@ -587,6 +583,46 @@ let test_observed_components_of () = List.iter test_exp_comp expected_components_given_config_value ; observer_experimental_components := original_value +module type Id = sig + type t + + val compare : t -> t -> int + + val to_string : t -> string +end + +let testable_of_id (type a) (module I : Id with type t = a) = + let equal a b = I.compare a b = 0 and pp = Fmt.of_to_string I.to_string in + Alcotest.V1.testable pp equal + +let trace_id = testable_of_id (module Trace_id) + +let span_id = testable_of_id (module Span_id) + +let test_traceid () = + let expected = Trace_id.make () in + let str = expected |> Trace_id.to_string in + let actual = str |> Trace_id.of_string in + Alcotest.V1.check' trace_id ~expected ~actual ~msg:"roundtrip" ; + Alcotest.V1.(check' int ~expected:32 ~actual:(String.length str) ~msg:"length") + +let test_traceid' () = + let expected = "00000000000000010000000000000001" in + let actual = expected |> Trace_id.of_string |> Trace_id.to_string in + Alcotest.V1.(check' string ~expected ~actual ~msg:"roundtrip(str)") + +let test_spanid () = + let expected = Span_id.make () in + let str = expected |> Span_id.to_string in + let actual = str |> Span_id.of_string in + Alcotest.V1.check' span_id ~expected ~actual ~msg:"roundtrip" ; + Alcotest.V1.(check' int ~expected:16 ~actual:(String.length str) ~msg:"length") + +let test_spanid' () = + let expected = "0000000000000001" in + let actual = expected |> Span_id.of_string |> Span_id.to_string in + Alcotest.V1.(check' string ~expected ~actual ~msg:"roundtrip(str)") + let test = [ ( "test_observer_create_and_destroy" @@ -601,6 +637,10 @@ let test = ; ("test_tracing_exn_backtraces", `Quick, test_tracing_exn_backtraces) ; ("test_attribute_validation", `Quick, test_attribute_validation) ; ("test_observed_components_of", `Quick, test_observed_components_of) + ; ("test span_id", `Quick, test_spanid) + ; ("test trace_id", `Quick, test_traceid) + ; ("test span_id", `Quick, test_spanid') + ; ("test trace_id", `Quick, test_traceid') ] let () = diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index 56fbce47edd..cbaa7430e88 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -1019,7 +1019,7 @@ let trace_log_dir = ref "/var/log/dt/zipkinv2/json" let export_interval = ref 30. -let max_spans = ref 1000 +let max_spans = ref 10000 let max_traces = ref 10000