From 0326d2785b0b643492e9d42b42a44f4c8a01ee7e Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Wed, 15 Jan 2025 16:01:45 +0000 Subject: [PATCH 1/2] CA-404597: rrd/lib_test - Verify that RRD handles non-rate data sources correctly Other unit tests only verify the interoperability of the RRDs - dumping them to JSON/XML and reading back in, verifying that the same data was decoded. We're now seeing a problem where Gauge data sources, which should be absolute values provided by the plugin, fluctuate wildly when processed by the RRD library. Ensure we have an easy way to test this for both Gauge and Absolute data sources - these values should be passed as-is by the RRD library, without any time-based transformations. This test currently fails and will be passing with the fix commits. Signed-off-by: Andrii Sultanov --- ocaml/libs/xapi-rrd/lib_test/unit_tests.ml | 109 ++++++++++++++++++++- 1 file changed, 108 insertions(+), 1 deletion(-) diff --git a/ocaml/libs/xapi-rrd/lib_test/unit_tests.ml b/ocaml/libs/xapi-rrd/lib_test/unit_tests.ml index f9cb5765b9f..1bcd65ad7d3 100644 --- a/ocaml/libs/xapi-rrd/lib_test/unit_tests.ml +++ b/ocaml/libs/xapi-rrd/lib_test/unit_tests.ml @@ -107,6 +107,110 @@ let test_length_invariants rrd () = let check_length dss rra = check_length_of_fring dss rra.rra_data in Array.iter (check_length rrd.rrd_dss) rrd.rrd_rras +let absolute_rrd = + let rra = rra_create CF_Average 100 1 0.5 in + let rra2 = rra_create CF_Average 100 10 0.5 in + let rra3 = rra_create CF_Average 100 100 0.5 in + let rra4 = rra_create CF_Average 100 1000 0.5 in + let ts = 1000000000.0 in + let ds = ds_create "foo" Absolute ~mrhb:10.0 (VT_Float 0.0) in + let ds2 = ds_create "bar" Absolute ~mrhb:10.0 (VT_Float 0.0) in + let ds3 = ds_create "baz" Absolute ~mrhb:10.0 (VT_Float 0.0) in + let ds4 = ds_create "boo" Absolute ~mrhb:10.0 (VT_Float 0.0) in + let rrd = rrd_create [|ds; ds2; ds3; ds4|] [|rra; rra2; rra3; rra4|] 1L ts in + let id = Identity in + for i = 1 to 100000 do + let t = 1000000.0 +. (0.7 *. float_of_int i) in + let v1 = + (0, {value= VT_Float (0.5 +. (0.5 *. sin (t /. 10.0))); transform= id}) + in + let v2 = + (1, {value= VT_Float (1.5 +. (0.5 *. cos (t /. 80.0))); transform= id}) + in + let v3 = + (2, {value= VT_Float (3.5 +. (0.5 *. sin (t /. 700.0))); transform= id}) + in + let v4 = + (3, {value= VT_Float (6.5 +. (0.5 *. cos (t /. 5000.0))); transform= id}) + in + ds_update rrd t [|v1; v2; v3; v4|] false + done ; + rrd + +let absolute_rrd_CA_404597 () = + let rra = rra_create CF_Average 100 1 0.5 in + let rra2 = rra_create CF_Average 100 10 0.5 in + let rra3 = rra_create CF_Average 100 100 0.5 in + let rra4 = rra_create CF_Average 100 1000 0.5 in + let ts = 1000000000.0 in + let ds = ds_create "foo" Absolute ~mrhb:10.0 (VT_Float 0.0) in + let ds2 = ds_create "bar" Absolute ~mrhb:10.0 (VT_Float 0.0) in + let ds3 = ds_create "baz" Absolute ~mrhb:10.0 (VT_Float 0.0) in + let ds4 = ds_create "boo" Absolute ~mrhb:10.0 (VT_Float 0.0) in + let rrd = rrd_create [|ds; ds2; ds3; ds4|] [|rra; rra2; rra3; rra4|] 1L ts in + let id = Identity in + for i = 1 to 100000 do + let t = 1000000.0 +. (0.7 *. float_of_int i) in + let ((_, val1) as v1) = + (0, {value= VT_Float (0.5 +. (0.5 *. sin (t /. 10.0))); transform= id}) + in + let ((_, val2) as v2) = + (1, {value= VT_Float (1.5 +. (0.5 *. cos (t /. 80.0))); transform= id}) + in + let ((_, val3) as v3) = + (2, {value= VT_Float (3.5 +. (0.5 *. sin (t /. 700.0))); transform= id}) + in + let ((_, val4) as v4) = + (3, {value= VT_Float (6.5 +. (0.5 *. cos (t /. 5000.0))); transform= id}) + in + ds_update rrd t [|v1; v2; v3; v4|] false ; + + Array.iter2 + (fun ds value -> + compare_float __LOC__ ds.ds_value + (float_of_string (ds_value_to_string value.value)) + ) + rrd.rrd_dss [|val1; val2; val3; val4|] + done + +(** Verify that Gauge data soruce values are correctly handled by the RRD lib + and that timestamps do not cause absolute values to fluctuate *) +let gauge_rrd_CA_404597 () = + let rra = rra_create CF_Average 100 1 0.5 in + let rra2 = rra_create CF_Average 100 10 0.5 in + let rra3 = rra_create CF_Average 100 100 0.5 in + let rra4 = rra_create CF_Average 100 1000 0.5 in + let ts = 1000000000.0 in + let ds = ds_create "foo" Gauge ~mrhb:10.0 (VT_Float 0.0) in + let ds2 = ds_create "bar" Gauge ~mrhb:10.0 (VT_Float 0.0) in + let ds3 = ds_create "baz" Gauge ~mrhb:10.0 (VT_Float 0.0) in + let ds4 = ds_create "boo" Gauge ~mrhb:10.0 (VT_Float 0.0) in + let rrd = rrd_create [|ds; ds2; ds3; ds4|] [|rra; rra2; rra3; rra4|] 1L ts in + let id = Identity in + for i = 1 to 100000 do + let t = 1000000.0 +. (0.7 *. float_of_int i) in + let ((_, val1) as v1) = + (0, {value= VT_Float (0.5 +. (0.5 *. sin (t /. 10.0))); transform= id}) + in + let ((_, val2) as v2) = + (1, {value= VT_Float (1.5 +. (0.5 *. cos (t /. 80.0))); transform= id}) + in + let ((_, val3) as v3) = + (2, {value= VT_Float (3.5 +. (0.5 *. sin (t /. 700.0))); transform= id}) + in + let ((_, val4) as v4) = + (3, {value= VT_Float (6.5 +. (0.5 *. cos (t /. 5000.0))); transform= id}) + in + ds_update rrd t [|v1; v2; v3; v4|] false ; + + Array.iter2 + (fun ds value -> + compare_float __LOC__ ds.ds_value + (float_of_string (ds_value_to_string value.value)) + ) + rrd.rrd_dss [|val1; val2; val3; val4|] + done + let gauge_rrd = let rra = rra_create CF_Average 100 1 0.5 in let rra2 = rra_create CF_Average 100 10 0.5 in @@ -328,12 +432,15 @@ let regression_suite = ; ("CA-329043 (1)", `Quick, test_ranges ca_329043_rrd_1) ; ("CA-329043 (2)", `Quick, test_ranges ca_329043_rrd_2) ; ("CA-329813", `Quick, test_ranges ca_329813_rrd) + ; ("CA-404597 (1)", `Quick, gauge_rrd_CA_404597) + ; ("CA-404597 (2)", `Quick, absolute_rrd_CA_404597) ] let () = Alcotest.run "Test RRD library" [ - ("Gauge RRD", rrd_suite gauge_rrd) + ("Absolute RRD", rrd_suite absolute_rrd) + ; ("Gauge RRD", rrd_suite gauge_rrd) ; ("RRD for CA-322008", rrd_suite ca_322008_rrd) ; ("RRD for CA-329043", rrd_suite ca_329043_rrd_1) ; ("RRD for CA-329813", rrd_suite ca_329813_rrd) From 73ca3cca49fd604a2cd408c332078535f0f694fe Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Thu, 16 Jan 2025 13:08:45 +0000 Subject: [PATCH 2/2] CA-404597: rrd - Pass Gauge and Absolute data source values as-is Some recent changes related to RRDs likely exposed a long-standing latent issue where the RRD library would process the passed-in values for Gauge and Absolute data sources incorrectly leading to constant values changing from update to update, for example: ``` $ rrd2csv memory_total_kib timestamp, AVERAGE:host:8b533333-91e1-4698-bd17-95b9732ffbb6:memory_total_kib 2025-01-15T08:41:40Z, 33351000 2025-01-15T08:41:45Z, 33350000 2025-01-15T08:41:50Z, 33346000 2025-01-15T08:41:55Z, 33352000 ``` Instead of treating Gauge and Absolute data sources as a variation on the rate-based Derive data source type, expecting time-based calculations to cancel each other out, do not undertake any calculations on non-rate data sources at all. This makes the unit test added in the previous commit pass. Signed-off-by: Andrii Sultanov --- ocaml/libs/xapi-rrd/lib/rrd.ml | 36 ++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/ocaml/libs/xapi-rrd/lib/rrd.ml b/ocaml/libs/xapi-rrd/lib/rrd.ml index 75610964fc1..6667f2a4f5c 100644 --- a/ocaml/libs/xapi-rrd/lib/rrd.ml +++ b/ocaml/libs/xapi-rrd/lib/rrd.ml @@ -341,9 +341,10 @@ let rra_update rrd proc_pdp_st elapsed_pdp_st pdps = Array.iter updatefn rrd.rrd_rras (* We assume that the data being given is of the form of a rate; that is, - it's dependent on the time interval between updates. To be able to - deal with gauge DSs, we multiply by the interval so that it cancels - the subsequent divide by interval later on *) + it's dependent on the time interval between updates. + Gauge and Absolute data sources are simply kept as is without any + time-based calculations, while Derive data sources will be changed according + to the time passed since the last measurement. (see CA-404597) *) let process_ds_value ds value interval new_rrd = if interval > ds.ds_mrhb then nan @@ -360,10 +361,8 @@ let process_ds_value ds value interval new_rrd = let rate = match (ds.ds_ty, new_rrd) with - | Absolute, _ | Derive, true -> + | Absolute, _ | Derive, true | Gauge, _ -> value_raw - | Gauge, _ -> - value_raw *. interval | Derive, false -> ( match (ds.ds_last, value) with | VT_Int64 x, VT_Int64 y -> @@ -433,7 +432,14 @@ let ds_update rrd timestamp valuesandtransforms new_rrd = if Utils.isnan value then ds.ds_unknown_sec <- pre_int else - ds.ds_value <- ds.ds_value +. (pre_int *. value /. interval) + (* CA-404597 - Gauge and Absolute values should be passed as-is, + without being involved in time-based calculations at all. + This applies to calculations below as well *) + match ds.ds_ty with + | Gauge | Absolute -> + ds.ds_value <- value + | Derive -> + ds.ds_value <- ds.ds_value +. (pre_int *. value /. interval) ) v2s ; @@ -450,7 +456,13 @@ let ds_update rrd timestamp valuesandtransforms new_rrd = let raw = let proc_pdp_st = get_float_time last_updated rrd.timestep in let occu_pdp_st = get_float_time timestamp rrd.timestep in - ds.ds_value /. (occu_pdp_st -. proc_pdp_st -. ds.ds_unknown_sec) + + match ds.ds_ty with + | Gauge | Absolute -> + ds.ds_value + | Derive -> + ds.ds_value + /. (occu_pdp_st -. proc_pdp_st -. ds.ds_unknown_sec) in (* Apply the transform after the raw value has been calculated *) let raw = apply_transform_function transform raw in @@ -473,8 +485,12 @@ let ds_update rrd timestamp valuesandtransforms new_rrd = ds.ds_value <- 0.0 ; ds.ds_unknown_sec <- post_int ) else ( - ds.ds_value <- post_int *. value /. interval ; - ds.ds_unknown_sec <- 0.0 + ds.ds_unknown_sec <- 0.0 ; + match ds.ds_ty with + | Gauge | Absolute -> + ds.ds_value <- value + | Derive -> + ds.ds_value <- post_int *. value /. interval ) ) v2s