From f5d9ac943c5d07e02a9bd619c2bc6a0d6f6a92a1 Mon Sep 17 00:00:00 2001 From: Lord Hepipud Date: Wed, 29 Jan 2025 14:45:53 +0100 Subject: [PATCH] Adds improved handling for Metrics over Time (#772) This adds new and improved handling for Metrics over Time. The overall execution time for the background tasks has been reduced, while also the memory management is way more efficient. In addition to the improved core handling of the feature, performance metrics for metrics over time will NO LONGER BE WRITTEN. This will increase the performance of the graphing solutions like InfluxDB a lot, while the monitoring by using the "-ThresholdInterval" argument is still possible. ```powershell PS> Invoke-IcingaCheckCPU -Warning '5%' -ThresholdInterval '10m'; [WARNING] CPU Load [WARNING] Overall Load, Socket #0 \_ [WARNING] Overall Load: Value 6.546175% is greater than threshold 5% (10m Avg.) \_ [WARNING] Socket #0 \_ [WARNING] Core 0: Value 18.391566% is greater than threshold 5% (10m Avg.) \_ [WARNING] Core 1: Value 14.100505% is greater than threshold 5% (10m Avg.) \_ [WARNING] Core Total: Value 6.546175% is greater than threshold 5% (10m Avg.) | totalload::ifw_cpu::load=5.804053;5;;0;100 0_0::ifw_cpu::load=18.03764;5;;0;100 0_1::ifw_cpu::load=9.36611;5;;0;100 0_2::ifw_cpu::load=5.830669;5;;0;100 0_3::ifw_cpu::load=0.646737;5;;0;100 0_4::ifw_cpu::load=0.926955;5;;0;100 0_5::ifw_cpu::load=0.016205;5;;0;100 0_total::ifw_cpu::load=5.804053;5;;0;100 ``` --- doc/100-General/10-Changelog.md | 4 + .../New-IcingaEnvironmentVariable.psm1 | 7 +- .../task/Add-IcingaServiceCheckTask.psm1 | 140 +++++++++--------- ...w-IcingaServiceCheckDaemonEnvironment.psm1 | 2 +- .../Compare-IcingaPluginThresholds.psm1 | 16 +- .../Compare-IcingaPluginValueToThreshold.psm1 | 52 +++++-- .../ConvertTo-IcingaMetricsOverTime.psm1 | 59 ++++++++ .../Get-IcingaMetricsOverTimePerfData.psm1 | 38 +++++ lib/icinga/plugin/New-IcingaCheck.psm1 | 49 ++++-- .../plugin/New-IcingaCheckBaseObject.psm1 | 3 +- lib/icinga/plugin/New-IcingaCheckResult.psm1 | 4 + .../plugin/Write-IcingaPluginPerfData.psm1 | 10 +- 12 files changed, 271 insertions(+), 113 deletions(-) create mode 100644 lib/icinga/plugin/ConvertTo-IcingaMetricsOverTime.psm1 create mode 100644 lib/icinga/plugin/Get-IcingaMetricsOverTimePerfData.psm1 diff --git a/doc/100-General/10-Changelog.md b/doc/100-General/10-Changelog.md index 0e401a97..6e8e6caa 100644 --- a/doc/100-General/10-Changelog.md +++ b/doc/100-General/10-Changelog.md @@ -13,6 +13,10 @@ Released closed milestones can be found on [GitHub](https://github.com/Icinga/ic * [#759](https://github.com/Icinga/icinga-powershell-framework/pull/759) Fixes maximum cache duration for service daemons to the right value +### Enhancements + +* [#772](https://github.com/Icinga/icinga-powershell-framework/pull/772) Adds new Metric over Time handling + ## 1.13.0 Beta-2 (2024-09-19) [Issues and PRs](https://github.com/Icinga/icinga-powershell-framework/milestone/36) diff --git a/lib/core/framework/New-IcingaEnvironmentVariable.psm1 b/lib/core/framework/New-IcingaEnvironmentVariable.psm1 index fe174b4d..b34c2160 100644 --- a/lib/core/framework/New-IcingaEnvironmentVariable.psm1 +++ b/lib/core/framework/New-IcingaEnvironmentVariable.psm1 @@ -37,6 +37,7 @@ function New-IcingaEnvironmentVariable() $Global:Icinga.Private.Add( 'Scheduler', @{ + 'CheckCommand' = ''; 'CheckData' = @{ }; 'ThresholdCache' = @{ }; 'CheckResults' = @(); @@ -44,8 +45,10 @@ function New-IcingaEnvironmentVariable() 'PluginException' = $null; 'ExitCode' = $null; 'PerfDataWriter' = @{ - 'Cache' = @{}; - 'Storage' = (New-Object System.Text.StringBuilder); + 'Cache' = @{ }; + 'Storage' = (New-Object System.Text.StringBuilder); + 'Daemon' = @{ }; + 'MetricsOverTime' = ''; } } ); diff --git a/lib/daemons/ServiceCheckDaemon/task/Add-IcingaServiceCheckTask.psm1 b/lib/daemons/ServiceCheckDaemon/task/Add-IcingaServiceCheckTask.psm1 index 50573bfc..4546a4ae 100644 --- a/lib/daemons/ServiceCheckDaemon/task/Add-IcingaServiceCheckTask.psm1 +++ b/lib/daemons/ServiceCheckDaemon/task/Add-IcingaServiceCheckTask.psm1 @@ -17,7 +17,14 @@ function Add-IcingaServiceCheckTask() # Read our check result store data from disk for this service check Read-IcingaCheckResultStore -CheckCommand $CheckCommand; - [int]$CheckInterval = ConvertTo-Seconds $Interval; + $MetricCacheFile = Join-Path -Path (Join-Path -Path (Join-Path -Path (Get-IcingaCacheDir) -ChildPath 'service_check_cache') -ChildPath 'metrics') -ChildPath ([string]::Format('{0}.xml', $CheckCommand)); + [int]$CheckInterval = ConvertTo-Seconds $Interval; + [hashtable]$CheckDataCache = @{ }; + [array]$PerfDataEntries = @(); + + if (Test-Path -Path $MetricCacheFile) { + $CheckDataCache = [System.Management.Automation.PSSerializer]::Deserialize((Get-Content -Path $MetricCacheFile -Raw -Encoding UTF8)); + } while ($TRUE) { if ($Global:Icinga.Private.Daemons.ServiceCheck.PassedTime -lt $CheckInterval) { @@ -29,6 +36,9 @@ function Add-IcingaServiceCheckTask() $Global:Icinga.Private.Daemons.ServiceCheck.PassedTime = 0; + # Clear possible previous performance data from the daemon cache + $Global:Icinga.Private.Scheduler.PerfDataWriter.Daemon.Clear(); + # Execute our check with possible arguments try { & $CheckCommand @Arguments | Out-Null; @@ -45,93 +55,79 @@ function Add-IcingaServiceCheckTask() $UnixTime = Get-IcingaUnixTime; - try { - foreach ($result in $global:Icinga.Private.Scheduler.CheckData[$CheckCommand]['results'].Keys) { - [string]$HashIndex = $result; - $Global:Icinga.Private.Daemons.ServiceCheck.SortedResult = $global:Icinga.Private.Scheduler.CheckData[$CheckCommand]['results'][$HashIndex].GetEnumerator() | Sort-Object name -Descending; - - Add-IcingaHashtableItem ` - -Hashtable $Global:Icinga.Private.Daemons.ServiceCheck.PerformanceCache ` - -Key $HashIndex ` - -Value @{ } | Out-Null; + foreach ($PerfLabel in $Global:Icinga.Private.Scheduler.PerfDataWriter.Daemon.Keys) { + $PerfValue = $Global:Icinga.Private.Scheduler.PerfDataWriter.Daemon[$PerfLabel].Value; + $PerfUnit = $Global:Icinga.Private.Scheduler.PerfDataWriter.Daemon[$PerfLabel].Unit; - foreach ($timeEntry in $Global:Icinga.Private.Daemons.ServiceCheck.SortedResult) { - - if ((Test-Numeric $timeEntry.Value) -eq $FALSE) { - continue; - } + if ($CheckDataCache.ContainsKey($PerfLabel) -eq $FALSE) { + $CheckDataCache.Add($PerfLabel, (New-Object System.Collections.ArrayList)); + } - foreach ($calc in $Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation.Keys) { - if (($UnixTime - $Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Time) -le [int]$timeEntry.Key) { - $Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Sum += $timeEntry.Value; - $Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Count += 1; - } - } - if (($UnixTime - $Global:Icinga.Private.Daemons.ServiceCheck.MaxTimeInSeconds) -le [int]$timeEntry.Key) { - Add-IcingaHashtableItem ` - -Hashtable $Global:Icinga.Private.Daemons.ServiceCheck.PerformanceCache[$HashIndex] ` - -Key ([string]$timeEntry.Key) ` - -Value ([string]$timeEntry.Value) | Out-Null; - } + $CheckDataCache[$PerfLabel].Add( + @{ + 'Time' = $UnixTime; + 'Value' = $PerfValue; + 'Unit' = $PerfUnit; + } + ) | Out-Null; + + [int]$IndexCount = $CheckDataCache[$PerfLabel].Count; + [int]$RemoveIndex = 0; + for ($i = 0; $i -lt $IndexCount; $i++) { + # In case we store more values than we require for our max time range, remove the oldest one + if (($UnixTime - $Global:Icinga.Private.Daemons.ServiceCheck.MaxTimeInSeconds) -gt [int]($CheckDataCache[$PerfLabel][$i].Time)) { + $RemoveIndex += 1; + continue; } + # Calculate the average value for our performance data based on the remaining data foreach ($calc in $Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation.Keys) { - if ($Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Count -ne 0) { - $AverageValue = ($Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Sum / $Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Count); - [string]$MetricMultiName = [string]::Format('::{0}::Interval{1}', (Format-IcingaPerfDataLabel -PerfData $HashIndex -MultiOutput), $Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Time); - $Global:Icinga.Private.Scheduler.CheckData[$CheckCommand]['average'] | Add-Member -MemberType NoteProperty -Name $MetricMultiName -Value $AverageValue -Force; + if (($UnixTime - $Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Time) -le [int]($CheckDataCache[$PerfLabel][$i].Time)) { + $Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Sum += $CheckDataCache[$PerfLabel][$i].Value; + $Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Count += 1; } + } + } - $Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Sum = 0; - $Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Count = 0; + # Remove older entries more efficiently. As we store the data in an ArrayList, the oldest entries are at the beginning + # Therefore we can just remove a range of entries from the beginning of the list or clear the list if we need to remove all entries + if ($RemoveIndex -gt 0) { + if ($RemoveIndex -ge $IndexCount) { + $CheckDataCache[$PerfLabel].Clear() | Out-Null; + } else { + $CheckDataCache[$PerfLabel].RemoveRange(0, $RemoveIndex) | Out-Null; } + $RemoveIndex = 0; } - Write-IcingaDebugMessage ` - -Message 'Object dump of service check daemon' ` - -Objects @( - $CheckCommand, - 'Average Calc', - ($Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation | Out-String), - 'PerformanceCache', - $Global:Icinga.Private.Daemons.ServiceCheck.PerformanceCache, - 'Max Time in Seconds', - $Global:Icinga.Private.Daemons.ServiceCheck.MaxTimeInSeconds, - 'Unix Time', - $UnixTime - ); - - # Flush data we no longer require in our cache to free memory - [array]$CheckStores = $Global:Icinga.Private.Scheduler.CheckData[$CheckCommand]['results'].Keys; - - foreach ($CheckStore in $CheckStores) { - [string]$CheckKey = $CheckStore; - [array]$CheckTimeStamps = $global:Icinga.Private.Scheduler.CheckData[$CheckCommand]['results'][$CheckKey].Keys; - - foreach ($TimeSample in $CheckTimeStamps) { - if (($UnixTime - $Global:Icinga.Private.Daemons.ServiceCheck.MaxTimeInSeconds) -gt [int]$TimeSample) { - Remove-IcingaHashtableItem -Hashtable $global:Icinga.Private.Scheduler.CheckData[$CheckCommand]['results'][$CheckKey] -Key ([string]$TimeSample); - } + # Now calculate the average values for our performance data + foreach ($calc in $Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation.Keys) { + if ($Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Count -ne 0) { + $AverageValue = ($Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Sum / $Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Count); + [string]$MetricMultiName = [string]::Format('{0}::Interval{1}={2}{3}', $PerfLabel, $Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Time, (Format-IcingaPerfDataValue $AverageValue), $PerfUnit); + # Write our performance data label + $PerfDataEntries += $MetricMultiName; } + + $Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Sum = 0; + $Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Count = 0; } + } - Set-IcingaCacheData -Space 'sc_daemon' -CacheStore 'checkresult' -KeyName $CheckCommand -Value $global:Icinga.Private.Scheduler.CheckData[$CheckCommand]['average']; + $Global:Icinga.Public.Daemons.ServiceCheck.PerformanceDataCache[$CheckCommand] = $PerfDataEntries -Join ' '; + $PerfDataEntries = @(); - # Make the performance data available for all threads - $Global:Icinga.Public.Daemons.ServiceCheck.PerformanceDataCache[$CheckCommand] = $global:Icinga.Private.Scheduler.CheckData[$CheckCommand]['average']; - # Write collected metrics to disk in case we reload the daemon. We will load them back into the module after reload then - Set-IcingaCacheData -Space 'sc_daemon' -CacheStore 'checkresult_store' -KeyName $CheckCommand -Value $Global:Icinga.Private.Daemons.ServiceCheck.PerformanceCache; + $PerformanceLabelFile = Join-Path -Path (Join-Path -Path (Join-Path -Path (Get-IcingaCacheDir) -ChildPath 'service_check_cache') -ChildPath 'performance_labels') -ChildPath ([string]::Format('{0}.db', $CheckCommand)); + $CheckCacheXMLObj = [System.Management.Automation.PSSerializer]::Serialize($CheckDataCache); - } catch { - Write-IcingaEventMessage -EventId 1452 -Namespace 'Framework' -ExceptionObject $_ -Objects $CheckCommand, ($Arguments | Out-String), (Get-IcingaInternalPluginOutput); + if ((Test-Path -Path $PerformanceLabelFile) -eq $FALSE) { + New-Item -Path $PerformanceLabelFile -ItemType File -Force | Out-Null; + } + if ((Test-Path -Path $MetricCacheFile) -eq $FALSE) { + New-Item -Path $MetricCacheFile -ItemType File -Force | Out-Null; } - # Always ensure our check data is cleared regardless of possible - # exceptions which might occur - Clear-IcingaCheckSchedulerEnvironment; - # Reset certain values from the scheduler environment - Clear-IcingaServiceCheckDaemonEnvironment; - # Force Icinga for Windows Garbage Collection - Optimize-IcingaForWindowsMemory -ClearErrorStack -SmartGC; + Set-Content -Path $PerformanceLabelFile -Value $Global:Icinga.Public.Daemons.ServiceCheck.PerformanceDataCache[$CheckCommand] -Force -Encoding UTF8; + Set-Content -Path $MetricCacheFile -Value $CheckCacheXMLObj -Force -Encoding UTF8; } } diff --git a/lib/daemons/ServiceCheckDaemon/tools/New-IcingaServiceCheckDaemonEnvironment.psm1 b/lib/daemons/ServiceCheckDaemon/tools/New-IcingaServiceCheckDaemonEnvironment.psm1 index 34266e21..97863d65 100644 --- a/lib/daemons/ServiceCheckDaemon/tools/New-IcingaServiceCheckDaemonEnvironment.psm1 +++ b/lib/daemons/ServiceCheckDaemon/tools/New-IcingaServiceCheckDaemonEnvironment.psm1 @@ -8,7 +8,7 @@ function New-IcingaServiceCheckDaemonEnvironment() if ($Global:Icinga.Public.Daemons.ServiceCheck.PerformanceDataCache.ContainsKey($CheckCommand) -eq $FALSE) { $Global:Icinga.Public.Daemons.ServiceCheck.PerformanceDataCache.Add( - $CheckCommand, @{ } + $CheckCommand, '' ); } diff --git a/lib/icinga/plugin/Compare-IcingaPluginThresholds.psm1 b/lib/icinga/plugin/Compare-IcingaPluginThresholds.psm1 index 234e3b58..5b98ba32 100644 --- a/lib/icinga/plugin/Compare-IcingaPluginThresholds.psm1 +++ b/lib/icinga/plugin/Compare-IcingaPluginThresholds.psm1 @@ -104,6 +104,10 @@ function Compare-IcingaPluginThresholds() # Fix possible numeric value comparison issues $TestInput = Test-IcingaDecimal $InputValue; $BaseInput = Test-IcingaDecimal $BaseValue; + $MoTData = @{ + 'Label' = $PerfDataLabel; + 'Interval' = $TimeInterval; + }; if ($TestInput.Decimal) { [decimal]$InputValue = [decimal]$TestInput.Value; @@ -132,17 +136,17 @@ function Compare-IcingaPluginThresholds() $CheckResult = $null; if ($Matches) { - $CheckResult = Compare-IcingaPluginValueToThreshold -Value $InputValue -BaseValue $IcingaThresholds.BaseValue -Threshold $IcingaThresholds.Threshold -Unit $Unit -Translation $Translation -OverrideMode $IcingaEnums.IcingaThresholdMethod.Matches; + $CheckResult = Compare-IcingaPluginValueToThreshold -Value $InputValue -BaseValue $IcingaThresholds.BaseValue -Threshold $IcingaThresholds.Threshold -Unit $Unit -Translation $Translation -OverrideMode $IcingaEnums.IcingaThresholdMethod.Matches -MetricsOverTime $MoTData; } elseif ($NotMatches) { - $CheckResult = Compare-IcingaPluginValueToThreshold -Value $InputValue -BaseValue $IcingaThresholds.BaseValue -Threshold $IcingaThresholds.Threshold -Unit $Unit -Translation $Translation -OverrideMode $IcingaEnums.IcingaThresholdMethod.NotMatches; + $CheckResult = Compare-IcingaPluginValueToThreshold -Value $InputValue -BaseValue $IcingaThresholds.BaseValue -Threshold $IcingaThresholds.Threshold -Unit $Unit -Translation $Translation -OverrideMode $IcingaEnums.IcingaThresholdMethod.NotMatches -MetricsOverTime $MoTData; } elseif ($IsBetween) { - $CheckResult = Compare-IcingaPluginValueToThreshold -Value $InputValue -BaseValue $IcingaThresholds.BaseValue -Threshold $IcingaThresholds.Threshold -Unit $Unit -Translation $Translation -OverrideMode $IcingaEnums.IcingaThresholdMethod.Between; + $CheckResult = Compare-IcingaPluginValueToThreshold -Value $InputValue -BaseValue $IcingaThresholds.BaseValue -Threshold $IcingaThresholds.Threshold -Unit $Unit -Translation $Translation -OverrideMode $IcingaEnums.IcingaThresholdMethod.Between -MetricsOverTime $MoTData; } elseif ($IsLowerEqual) { - $CheckResult = Compare-IcingaPluginValueToThreshold -Value $InputValue -BaseValue $IcingaThresholds.BaseValue -Threshold $IcingaThresholds.Threshold -Unit $Unit -Translation $Translation -OverrideMode $IcingaEnums.IcingaThresholdMethod.LowerEqual; + $CheckResult = Compare-IcingaPluginValueToThreshold -Value $InputValue -BaseValue $IcingaThresholds.BaseValue -Threshold $IcingaThresholds.Threshold -Unit $Unit -Translation $Translation -OverrideMode $IcingaEnums.IcingaThresholdMethod.LowerEqual -MetricsOverTime $MoTData; } elseif ($IsGreaterEqual) { - $CheckResult = Compare-IcingaPluginValueToThreshold -Value $InputValue -BaseValue $IcingaThresholds.BaseValue -Threshold $IcingaThresholds.Threshold -Unit $Unit -Translation $Translation -OverrideMode $IcingaEnums.IcingaThresholdMethod.GreaterEqual; + $CheckResult = Compare-IcingaPluginValueToThreshold -Value $InputValue -BaseValue $IcingaThresholds.BaseValue -Threshold $IcingaThresholds.Threshold -Unit $Unit -Translation $Translation -OverrideMode $IcingaEnums.IcingaThresholdMethod.GreaterEqual -MetricsOverTime $MoTData; } else { - $CheckResult = Compare-IcingaPluginValueToThreshold -Value $InputValue -BaseValue $IcingaThresholds.BaseValue -Threshold $IcingaThresholds.Threshold -Unit $Unit -Translation $Translation; + $CheckResult = Compare-IcingaPluginValueToThreshold -Value $InputValue -BaseValue $IcingaThresholds.BaseValue -Threshold $IcingaThresholds.Threshold -Unit $Unit -Translation $Translation -MetricsOverTime $MoTData; } $IcingaThresholds.Message = $CheckResult.Message; diff --git a/lib/icinga/plugin/Compare-IcingaPluginValueToThreshold.psm1 b/lib/icinga/plugin/Compare-IcingaPluginValueToThreshold.psm1 index 0f29de70..45cf8da0 100644 --- a/lib/icinga/plugin/Compare-IcingaPluginValueToThreshold.psm1 +++ b/lib/icinga/plugin/Compare-IcingaPluginValueToThreshold.psm1 @@ -54,12 +54,13 @@ function Compare-IcingaPluginValueToThreshold() { param ( - $Value = $null, - $BaseValue = $null, - $Unit = $null, - $Translation = $null, - $Threshold = $null, - $OverrideMode = $null + $Value = $null, + $BaseValue = $null, + $Unit = $null, + $Translation = $null, + $Threshold = $null, + $OverrideMode = $null, + $MetricsOverTime = $null ); [hashtable]$RetValue = @{ @@ -67,6 +68,25 @@ function Compare-IcingaPluginValueToThreshold() 'IsOk' = $FALSE; 'HasError' = $FALSE; } + + # This will properly handle metrics over time + $MoTObject = ConvertTo-IcingaMetricsOverTime -MetricsOverTime $MetricsOverTime; + $OriginalValue = $Value; + if ($MoTObject.Error -eq $FALSE) { + if ($MoTObject.Apply) { + $Value = $MoTObject.Value; + } + } else { + $RetValue.Message = $MoTObject.Message; + $RetValue.HasError = $TRUE; + + return $RetValue; + } + + # The MoT message is by default empty and will do nothing. In case we use checks for + # Metrics over Time, this will return something like "(15m Avg.)" and expand it to the + # final output message + $MoTMessage = $MoTObject.Message; $HumanReadableValue = $Value; $PercentValue = $null; $TranslatedValue = $Value; @@ -114,12 +134,12 @@ function Compare-IcingaPluginValueToThreshold() $IcingaEnums.IcingaThresholdMethod.Default { if ($Value -lt 0 -Or $Value -gt $Threshold.Value) { if ($Value -lt 0) { - $RetValue.Message = [string]::Format('Value {0} is lower than 0', $HumanReadableValue); + $RetValue.Message = [string]::Format('Value {0} is lower than 0{1}', $HumanReadableValue, $MoTMessage); return $RetValue; } if ($Value -gt $Threshold.Value) { - $RetValue.Message = [string]::Format('Value {0} is greater than threshold {1}', $HumanReadableValue, (Convert-IcingaPluginValueToString -Value $Threshold.Value -BaseValue $BaseValue -Unit $Threshold.Unit -OriginalUnit $Unit -UsePercent:$UsePercent -IsThreshold)); + $RetValue.Message = [string]::Format('Value {0} is greater than threshold {1}{2}', $HumanReadableValue, (Convert-IcingaPluginValueToString -Value $Threshold.Value -BaseValue $BaseValue -Unit $Threshold.Unit -OriginalUnit $Unit -UsePercent:$UsePercent -IsThreshold), $MoTMessage); return $RetValue; } } @@ -127,56 +147,56 @@ function Compare-IcingaPluginValueToThreshold() }; $IcingaEnums.IcingaThresholdMethod.Lower { if ($Value -lt $Threshold.Value) { - $RetValue.Message = [string]::Format('Value {0} is lower than threshold {1}', $HumanReadableValue, (Convert-IcingaPluginValueToString -Value $Threshold.Value -BaseValue $BaseValue -Unit $Threshold.Unit -OriginalUnit $Unit -UsePercent:$UsePercent -IsThreshold)); + $RetValue.Message = [string]::Format('Value {0} is lower than threshold {1}{2}', $HumanReadableValue, (Convert-IcingaPluginValueToString -Value $Threshold.Value -BaseValue $BaseValue -Unit $Threshold.Unit -OriginalUnit $Unit -UsePercent:$UsePercent -IsThreshold), $MoTMessage); return $RetValue; } break; }; $IcingaEnums.IcingaThresholdMethod.LowerEqual { if ($Value -le $Threshold.Value) { - $RetValue.Message = [string]::Format('Value {0} is lower or equal than threshold {1}', $HumanReadableValue, (Convert-IcingaPluginValueToString -Value $Threshold.Value -BaseValue $BaseValue -Unit $Threshold.Unit -OriginalUnit $Unit -UsePercent:$UsePercent -IsThreshold)); + $RetValue.Message = [string]::Format('Value {0} is lower or equal than threshold {1}{2}', $HumanReadableValue, (Convert-IcingaPluginValueToString -Value $Threshold.Value -BaseValue $BaseValue -Unit $Threshold.Unit -OriginalUnit $Unit -UsePercent:$UsePercent -IsThreshold), $MoTMessage); return $RetValue; } break; }; $IcingaEnums.IcingaThresholdMethod.Greater { if ($Value -gt $Threshold.Value) { - $RetValue.Message = [string]::Format('Value {0} is greater than threshold {1}', $HumanReadableValue, (Convert-IcingaPluginValueToString -Value $Threshold.Value -BaseValue $BaseValue -Unit $Threshold.Unit -OriginalUnit $Unit -UsePercent:$UsePercent -IsThreshold)); + $RetValue.Message = [string]::Format('Value {0} is greater than threshold {1}{2}', $HumanReadableValue, (Convert-IcingaPluginValueToString -Value $Threshold.Value -BaseValue $BaseValue -Unit $Threshold.Unit -OriginalUnit $Unit -UsePercent:$UsePercent -IsThreshold), $MoTMessage); return $RetValue; } break; }; $IcingaEnums.IcingaThresholdMethod.GreaterEqual { if ($Value -gt $Threshold.Value) { - $RetValue.Message = [string]::Format('Value {0} is greater or equal than threshold {1}', $HumanReadableValue, (Convert-IcingaPluginValueToString -Value $Threshold.Value -BaseValue $BaseValue -Unit $Threshold.Unit -OriginalUnit $Unit -UsePercent:$UsePercent -IsThreshold)); + $RetValue.Message = [string]::Format('Value {0} is greater or equal than threshold {1}{2}', $HumanReadableValue, (Convert-IcingaPluginValueToString -Value $Threshold.Value -BaseValue $BaseValue -Unit $Threshold.Unit -OriginalUnit $Unit -UsePercent:$UsePercent -IsThreshold), $MoTMessage); return $RetValue; } break; }; $IcingaEnums.IcingaThresholdMethod.Between { if ($Value -lt $Threshold.StartRange -Or $Value -gt $Threshold.EndRange) { - $RetValue.Message = [string]::Format('Value {0} is not between thresholds <{1} or >{2}', $HumanReadableValue, (Convert-IcingaPluginValueToString -Value $Threshold.StartRange -BaseValue $BaseValue -Unit $Threshold.Unit -OriginalUnit $Unit -UsePercent:$UsePercent -IsThreshold), (Convert-IcingaPluginValueToString -Value $Threshold.EndRange -BaseValue $BaseValue -Unit $Threshold.Unit -OriginalUnit $Unit -UsePercent:$UsePercent -IsThreshold)); + $RetValue.Message = [string]::Format('Value {0} is not between thresholds <{1} or >{2}{3}', $HumanReadableValue, (Convert-IcingaPluginValueToString -Value $Threshold.StartRange -BaseValue $BaseValue -Unit $Threshold.Unit -OriginalUnit $Unit -UsePercent:$UsePercent -IsThreshold), (Convert-IcingaPluginValueToString -Value $Threshold.EndRange -BaseValue $BaseValue -Unit $Threshold.Unit -OriginalUnit $Unit -UsePercent:$UsePercent -IsThreshold), $MoTMessage); return $RetValue; } break; }; $IcingaEnums.IcingaThresholdMethod.Outside { if ($Value -ge $Threshold.StartRange -And $Value -le $Threshold.EndRange) { - $RetValue.Message = [string]::Format('Value {0} is between thresholds >={1} and <={2}', $HumanReadableValue, (Convert-IcingaPluginValueToString -Value $Threshold.StartRange -BaseValue $BaseValue -Unit $Threshold.Unit -OriginalUnit $Unit -UsePercent:$UsePercent -IsThreshold), (Convert-IcingaPluginValueToString -Value $Threshold.EndRange -BaseValue $BaseValue -Unit $Threshold.Unit -OriginalUnit $Unit -UsePercent:$UsePercent -IsThreshold)); + $RetValue.Message = [string]::Format('Value {0} is between thresholds >={1} and <={2}{3}', $HumanReadableValue, (Convert-IcingaPluginValueToString -Value $Threshold.StartRange -BaseValue $BaseValue -Unit $Threshold.Unit -OriginalUnit $Unit -UsePercent:$UsePercent -IsThreshold), (Convert-IcingaPluginValueToString -Value $Threshold.EndRange -BaseValue $BaseValue -Unit $Threshold.Unit -OriginalUnit $Unit -UsePercent:$UsePercent -IsThreshold), $MoTMessage); return $RetValue; } break; }; $IcingaEnums.IcingaThresholdMethod.Matches { if ($Value -Like $Threshold.Value ) { - $RetValue.Message = [string]::Format('Value {0} is matching threshold {1}', $TranslatedValue, (ConvertTo-IcingaPluginOutputTranslation -Translation $Translation -Value $Threshold.Value)); + $RetValue.Message = [string]::Format('Value {0} is matching threshold {1}{2}', $TranslatedValue, (ConvertTo-IcingaPluginOutputTranslation -Translation $Translation -Value $Threshold.Value), $MoTMessage); return $RetValue; } break; }; $IcingaEnums.IcingaThresholdMethod.NotMatches { if ($Value -NotLike $Threshold.Value ) { - $RetValue.Message = [string]::Format('Value {0} is not matching threshold {1}', $TranslatedValue, (ConvertTo-IcingaPluginOutputTranslation -Translation $Translation -Value $Threshold.Value)); + $RetValue.Message = [string]::Format('Value {0} is not matching threshold {1}{2}', $TranslatedValue, (ConvertTo-IcingaPluginOutputTranslation -Translation $Translation -Value $Threshold.Value), $MoTMessage); return $RetValue; } break; diff --git a/lib/icinga/plugin/ConvertTo-IcingaMetricsOverTime.psm1 b/lib/icinga/plugin/ConvertTo-IcingaMetricsOverTime.psm1 new file mode 100644 index 00000000..65f66e22 --- /dev/null +++ b/lib/icinga/plugin/ConvertTo-IcingaMetricsOverTime.psm1 @@ -0,0 +1,59 @@ +function ConvertTo-IcingaMetricsOverTime() +{ + param ( + $MetricsOverTime = $null + ); + + $MoTObj = @{ + 'Value' = $null; + 'Message' = ''; + 'Apply' = $FALSE; + 'Error' = $FALSE; + } + + if ($MetricsOverTime -eq $null) { + return $MoTObj; + } + + if ([string]::IsNullOrEmpty($MetricsOverTime.Interval)) { + return $MoTObj; + } + + try { + [int]$IntervalInSeconds = ConvertTo-Seconds -Value $MetricsOverTime.Interval; + $MoTPerfData = Get-IcingaMetricsOverTimePerfData; + [array]$MoTToArray = $MoTPerfData.Split(' '); + [string]$SearchLabel = [string]::Format('{0}::Interval{1}', $MetricsOverTime.Label, $IntervalInSeconds); + [hashtable]$AvailableMoT = @{ }; + + foreach ($mot in $MoTToArray) { + if ([string]::IsNullOrEmpty($mot) -Or $mot.Contains('=') -eq $FALSE) { + continue; + } + + $MoTPerfData = $mot.Split('='); + $TimeIndexName = [string]::Format('{0}s', $MoTPerfData[0].Split('::')[-1].Replace('Interval', '')); + + if ($AvailableMoT.ContainsKey($TimeIndexName) -eq $FALSE) { + $AvailableMoT.Add($TimeIndexName, $TRUE); + } + + if ($MoTPerfData[0] -eq $SearchLabel) { + $MoTObj.Value = $MoTPerfData[1]; + $MoTObj.Apply = $TRUE; + $MoTObj.Message = [string]::Format(' ({0} Avg.)', $MetricsOverTime.Interval); + break; + } + } + + if ($MoTObj.Apply -eq $FALSE) { + $MoTObj.Message = [string]::Format('[Failed to parse metrics over time with -ThresholdInterval "{0}": No data found matching the requested time index. Available indexes: [{1}]]', $MetricsOverTime.Interval, ($AvailableMoT.Keys -Join ', ')); + $MoTObj.Error = $TRUE; + } + } catch { + $MoTObj.Message = [string]::Format('[Failed to parse metrics over time with -ThresholdInterval "{0}": {1}]', $MetricsOverTime.Interval, $_.Exception.Message); + $MoTObj.Error = $TRUE; + } + + return $MoTObj; +} diff --git a/lib/icinga/plugin/Get-IcingaMetricsOverTimePerfData.psm1 b/lib/icinga/plugin/Get-IcingaMetricsOverTimePerfData.psm1 new file mode 100644 index 00000000..b8287f01 --- /dev/null +++ b/lib/icinga/plugin/Get-IcingaMetricsOverTimePerfData.psm1 @@ -0,0 +1,38 @@ +function Get-IcingaMetricsOverTimePerfData() +{ + param ( + [switch]$AddWhiteSpace = $FALSE + ); + + [string]$MetricsOverTime = ''; + [bool]$IsDaemonWorker = $FALSE; + + if ([string]::IsNullOrEmpty($Global:Icinga.Private.Scheduler.PerfDataWriter.MetricsOverTime) -eq $FALSE) { + if ($AddWhiteSpace) { + return (' ' + $Global:Icinga.Private.Scheduler.PerfDataWriter.MetricsOverTime); + } + + return $Global:Icinga.Private.Scheduler.PerfDataWriter.MetricsOverTime; + } + + if ($Global:Icinga.Public.Daemons.ContainsKey('ServiceCheck') -And $Global:Icinga.Public.Daemons.ServiceCheck.ContainsKey('PerformanceDataCache') -And $Global:Icinga.Public.Daemons.ServiceCheck.PerformanceDataCache.ContainsKey($Global:Icinga.Private.Scheduler.CheckCommand)) { + $IsDaemonWorker = $TRUE; + } + + if ($IsDaemonWorker) { + $MetricsOverTime = $Global:Icinga.Public.Daemons.ServiceCheck.PerformanceDataCache[$Global:Icinga.Private.Scheduler.CheckCommand]; + } else { + $PerformanceLabelFile = Join-Path -Path (Join-Path -Path (Join-Path -Path (Get-IcingaCacheDir) -ChildPath 'service_check_cache') -ChildPath 'performance_labels') -ChildPath ([string]::Format('{0}.db', $Global:Icinga.Private.Scheduler.CheckCommand)); + if (Test-Path -Path $PerformanceLabelFile) { + $MetricsOverTime = Get-Content -Path $PerformanceLabelFile -Raw; + } + } + + $Global:Icinga.Private.Scheduler.PerfDataWriter.MetricsOverTime = $MetricsOverTime; + + if ([string]::IsNullOrEmpty($MetricsOverTime) -eq $FALSE -And $AddWhiteSpace) { + $MetricsOverTime = ' ' + $MetricsOverTime; + } + + return $MetricsOverTime; +} diff --git a/lib/icinga/plugin/New-IcingaCheck.psm1 b/lib/icinga/plugin/New-IcingaCheck.psm1 index 3434a4d2..bbefe8d0 100644 --- a/lib/icinga/plugin/New-IcingaCheck.psm1 +++ b/lib/icinga/plugin/New-IcingaCheck.psm1 @@ -191,6 +191,23 @@ function New-IcingaCheck() return $this.__CriticalValue; } + $IcingaCheck | Add-Member -MemberType ScriptMethod -Name '__CreatePerfDataLabel' -Value { + $PerfDataTemplate = ($this.__CheckCommand.Replace('Invoke-IcingaCheck', '')); + + if ([string]::IsNullOrEmpty($this.MetricTemplate) -eq $FALSE) { + $PerfDataTemplate = $this.MetricTemplate; + } + + [string]$PerfDataName = [string]::Format( + '{0}::ifw_{1}::{2}', + $this.MetricIndex, + $PerfDataTemplate.ToLower(), + $this.MetricName + ); + + return $PerfDataName; + } + $IcingaCheck | Add-Member -MemberType ScriptMethod -Name '__SetPerformanceData' -Value { if ($null -eq $this.__ThresholdObject -Or $this.NoPerfData) { return; @@ -230,18 +247,7 @@ function New-IcingaCheck() } } - $PerfDataTemplate = ($this.__CheckCommand.Replace('Invoke-IcingaCheck', '')); - - if ([string]::IsNullOrEmpty($this.MetricTemplate) -eq $FALSE) { - $PerfDataTemplate = $this.MetricTemplate; - } - - [string]$PerfDataName = [string]::Format( - '{0}::ifw_{1}::{2}', - $this.MetricIndex, - $PerfDataTemplate.ToLower(), - $this.MetricName - ); + [string]$PerfDataName = $this.__CreatePerfDataLabel(); # Ensure we only add a label with identical name once if ($Global:Icinga.Private.Scheduler.PerfDataWriter.Cache.ContainsKey($PerfDataName) -eq $FALSE) { @@ -266,6 +272,23 @@ function New-IcingaCheck() $Global:Icinga.Private.Scheduler.PerfDataWriter.Storage.Append(' ') | Out-Null; } + # This is just to make sure the background daemon has data to work with and also ensure we don't increase + # memory in case we don't have the daemon running + if ($Global:Icinga.Private.Scheduler.PerfDataWriter.Daemon.ContainsKey($PerfDataName) -eq $FALSE) { + $Global:Icinga.Private.Scheduler.PerfDataWriter.Daemon.Add( + $PerfDataName, + @{ + 'Value' = $value; + 'Unit' = $this.__ThresholdObject.PerfUnit + } + ); + } else { + $Global:Icinga.Private.Scheduler.PerfDataWriter.Daemon[$PerfDataName] = @{ + 'Value' = $value; + 'Unit' = $this.__ThresholdObject.PerfUnit + } + } + $Global:Icinga.Private.Scheduler.PerfDataWriter.Storage.Append($PerfDataLabel.ToLower()) | Out-Null; } @@ -463,7 +486,7 @@ function New-IcingaCheck() '-BaseValue' = $this.BaseValue; '-Unit' = $this.Unit; '-CheckName' = $this.__GetName(); - '-PerfDataLabel' = $this.LabelName; + '-PerfDataLabel' = $this.__CreatePerfDataLabel(); '-ThresholdCache' = (Get-IcingaThresholdCache -CheckCommand $this.__CheckCommand); '-Translation' = $this.Translation; '-TimeInterval' = $this.__TimeInterval; diff --git a/lib/icinga/plugin/New-IcingaCheckBaseObject.psm1 b/lib/icinga/plugin/New-IcingaCheckBaseObject.psm1 index ca8f3005..520dbeba 100644 --- a/lib/icinga/plugin/New-IcingaCheckBaseObject.psm1 +++ b/lib/icinga/plugin/New-IcingaCheckBaseObject.psm1 @@ -20,7 +20,8 @@ function New-IcingaCheckBaseObject() foreach ($entry in $CallStack) { [string]$CheckCommand = $entry.Command; if ($CheckCommand.ToLower() -Like 'invoke-icingacheck*') { - $this.__CheckCommand = $CheckCommand; + $this.__CheckCommand = $CheckCommand; + $Global:Icinga.Private.Scheduler.CheckCommand = $CheckCommand; break; } } diff --git a/lib/icinga/plugin/New-IcingaCheckResult.psm1 b/lib/icinga/plugin/New-IcingaCheckResult.psm1 index ec4ce55e..d64c265a 100644 --- a/lib/icinga/plugin/New-IcingaCheckResult.psm1 +++ b/lib/icinga/plugin/New-IcingaCheckResult.psm1 @@ -28,6 +28,10 @@ function New-IcingaCheckResult() Write-IcingaPluginPerfData -IcingaCheck $this.Check; } + # Clear our metrics over time cache, as we need to load them again for the next + # plugin execution + $Global:Icinga.Private.Scheduler.PerfDataWriter.MetricsOverTime = ''; + # Ensure we reset our internal cache once the plugin was executed $CheckCommand = $this.Check.__GetCheckCommand(); if ([string]::IsNullOrEmpty($CheckCommand) -eq $FALSE -And $Global:Icinga.Private.Scheduler.ThresholdCache.ContainsKey($CheckCommand)) { diff --git a/lib/icinga/plugin/Write-IcingaPluginPerfData.psm1 b/lib/icinga/plugin/Write-IcingaPluginPerfData.psm1 index c2f2f017..89f12fd4 100644 --- a/lib/icinga/plugin/Write-IcingaPluginPerfData.psm1 +++ b/lib/icinga/plugin/Write-IcingaPluginPerfData.psm1 @@ -1,11 +1,17 @@ function Write-IcingaPluginPerfData() { + # We shouldn't write all Metrics over Time to Icinga, as this will just cause a massive + # overload of Performance Metrics written and processed. We leave this code for now + # allowing us to enable it later again or make it user configurable + #[string]$MetricsOverTime = Get-IcingaMetricsOverTimePerfData -AddWhiteSpace; + [string]$MetricsOverTime = ''; + if ($Global:Icinga.Protected.RunAsDaemon -eq $FALSE -And $Global:Icinga.Protected.JEAContext -eq $FALSE) { if ($Global:Icinga.Private.Scheduler.PerfDataWriter.Storage.Length -ne 0) { - Write-IcingaConsolePlain ([string]::Format('| {0}', ($Global:Icinga.Private.Scheduler.PerfDataWriter.Storage.ToString()))); + Write-IcingaConsolePlain ([string]::Format('| {0}{1}', ($Global:Icinga.Private.Scheduler.PerfDataWriter.Storage.ToString()), $MetricsOverTime)); } } else { - $Global:Icinga.Private.Scheduler.PerformanceData = $Global:Icinga.Private.Scheduler.PerfDataWriter.Storage.ToString(); + $Global:Icinga.Private.Scheduler.PerformanceData = $Global:Icinga.Private.Scheduler.PerfDataWriter.Storage.ToString() + $MetricsOverTime; } # Ensure we clear our cache after writing the data