diff --git a/Dashboard/Services/DatabaseService.Overview.cs b/Dashboard/Services/DatabaseService.Overview.cs index 9b767e03..f218a223 100644 --- a/Dashboard/Services/DatabaseService.Overview.cs +++ b/Dashboard/Services/DatabaseService.Overview.cs @@ -32,7 +32,12 @@ public async Task> GetDailySummaryAsync(DateTime? summary // CPU column for the High CPU events count + critical-health check (PM#1004). // The view at report.daily_summary always uses total_cpu_utilization (no per-user prefs available there). // This date-parameterized path additionally honors the user's CpuAlertMode. - string cpuColumn = cpuAlertMode == CpuAlertMode.SqlOnly ? "sqlserver_cpu_utilization" : "total_cpu_utilization"; + // Total mode coalesces to the SQL-only figure because total_cpu_utilization is + // NULL on SQL Server on Linux, where host CPU is not derivable (Issue #1048). + // Expressions are fully cus.-qualified so they drop straight into the predicates below. + string cpuColumn = cpuAlertMode == CpuAlertMode.SqlOnly + ? "cus.sqlserver_cpu_utilization" + : "ISNULL(cus.total_cpu_utilization, cus.sqlserver_cpu_utilization)"; // If no date provided, use the view directly (today's summary) // Otherwise, replicate the view logic with the specified date @@ -111,7 +116,7 @@ AND mpe.collection_time < @day_end SELECT COUNT_BIG(*) FROM collect.cpu_utilization_stats AS cus - WHERE cus.{cpuColumn} >= 80 + WHERE {cpuColumn} >= 80 AND cus.collection_time >= @day_start AND cus.collection_time < @day_end ), @@ -138,7 +143,7 @@ WHEN EXISTS SELECT 1/0 FROM collect.cpu_utilization_stats AS cus - WHERE cus.{cpuColumn} >= 90 + WHERE {cpuColumn} >= 90 AND cus.collection_time >= @day_start AND cus.collection_time < @day_end ) diff --git a/Dashboard/Services/DatabaseService.ResourceMetrics.cs b/Dashboard/Services/DatabaseService.ResourceMetrics.cs index 25758069..28007f4b 100644 --- a/Dashboard/Services/DatabaseService.ResourceMetrics.cs +++ b/Dashboard/Services/DatabaseService.ResourceMetrics.cs @@ -244,7 +244,9 @@ public async Task> GetCpuDataAsync(int hoursBack = 24, DateTi sample_time, sqlserver_cpu_utilization, other_process_cpu_utilization, - total_cpu_utilization + /* total is NULL on SQL Server on Linux (host CPU not derivable, Issue #1048); + degrade to the correct SQL-only figure so the chart never plots a total below SQL. */ + total_cpu_utilization = ISNULL(total_cpu_utilization, sqlserver_cpu_utilization) FROM collect.cpu_utilization_stats WHERE collection_time >= @from_date AND collection_time <= @to_date @@ -260,7 +262,9 @@ ORDER BY sample_time, sqlserver_cpu_utilization, other_process_cpu_utilization, - total_cpu_utilization + /* total is NULL on SQL Server on Linux (host CPU not derivable, Issue #1048); + degrade to the correct SQL-only figure so the chart never plots a total below SQL. */ + total_cpu_utilization = ISNULL(total_cpu_utilization, sqlserver_cpu_utilization) FROM collect.cpu_utilization_stats WHERE collection_time >= DATEADD(HOUR, @hours_back, SYSDATETIME()) ORDER BY @@ -1580,7 +1584,9 @@ public async Task> GetCpuUtilizationAsync(int hoursBack cu.sample_time, cu.sqlserver_cpu_utilization, cu.other_process_cpu_utilization, - cu.total_cpu_utilization + /* total is NULL on SQL Server on Linux (host CPU not derivable, Issue #1048); + degrade to the correct SQL-only figure rather than reporting 0. */ + total_cpu_utilization = ISNULL(cu.total_cpu_utilization, cu.sqlserver_cpu_utilization) FROM collect.cpu_utilization_stats AS cu {dateFilter} ORDER BY diff --git a/Lite/Services/RemoteCollectorService.Cpu.cs b/Lite/Services/RemoteCollectorService.Cpu.cs index fbf3afd9..81e3d56c 100644 --- a/Lite/Services/RemoteCollectorService.Cpu.cs +++ b/Lite/Services/RemoteCollectorService.Cpu.cs @@ -49,18 +49,31 @@ drs.end_time DESC DECLARE @ms_ticks bigint, - @start_time datetime2(7); + @start_time datetime2(7), + @is_linux bit = 0; SELECT @ms_ticks = dosi.ms_ticks, @start_time = dosi.sqlserver_start_time FROM sys.dm_os_sys_info AS dosi; +/* Detect SQL Server on Linux. SystemIdle is always 0 in the SCHEDULER_MONITOR + ring buffer on Linux, so 100 - SystemIdle - ProcessUtilization fabricates a + host figure that pins total CPU at 100% (Issue #1048). No DMV exposes true host + CPU on Linux, so other_process is stored as NULL there. sys.dm_os_host_info is + 2017+; referenced via sp_executesql so SQL 2016 never binds it (@is_linux = 0). */ +IF OBJECT_ID(N'sys.dm_os_host_info', N'V') IS NOT NULL + EXEC sys.sp_executesql + N'SELECT @linux = CASE WHEN hi.host_platform = N''Linux'' THEN 1 ELSE 0 END FROM sys.dm_os_host_info AS hi;', + N'@linux bit OUTPUT', @linux = @is_linux OUTPUT; + SELECT TOP (60) sample_time = DATEADD(SECOND, -((@ms_ticks - t.timestamp) / 1000), SYSDATETIME()), sqlserver_cpu_utilization = x.process_utilization, other_process_cpu_utilization = CASE + WHEN @is_linux = 1 + THEN NULL WHEN (100 - x.system_idle - x.process_utilization) < 0 THEN 0 ELSE 100 - x.system_idle - x.process_utilization @@ -158,7 +171,8 @@ drs.end_time DESC .AppendValue(GetServerNameForStorage(server)) .AppendValue(sampleTime) .AppendValue(reader.IsDBNull(1) ? 0 : reader.GetInt32(1)) - .AppendValue(reader.IsDBNull(2) ? 0 : reader.GetInt32(2)) + /* NULL = host/other CPU not derivable (SQL on Linux, Issue #1048) */ + .AppendValue(reader.IsDBNull(2) ? (int?)null : reader.GetInt32(2)) .EndRow(); rowsCollected++; diff --git a/install/02_create_tables.sql b/install/02_create_tables.sql index 7bd208c0..ee016039 100644 --- a/install/02_create_tables.sql +++ b/install/02_create_tables.sql @@ -941,7 +941,13 @@ BEGIN collection_time datetime2(7) NOT NULL DEFAULT SYSDATETIME(), sample_time datetime2(7) NOT NULL, sqlserver_cpu_utilization integer NOT NULL, - other_process_cpu_utilization integer NOT NULL, + /* + Nullable: NULL means host/other-process CPU is not derivable. On SQL Server + on Linux the SCHEDULER_MONITOR ring buffer reports SystemIdle = 0, so the + collector cannot compute a real host figure and stores NULL here rather than + a false 100% (Issue #1048). NULL propagates to total_cpu_utilization below. + */ + other_process_cpu_utilization integer NULL, total_cpu_utilization AS (sqlserver_cpu_utilization + other_process_cpu_utilization) PERSISTED, CONSTRAINT PK_cpu_utilization_stats PRIMARY KEY CLUSTERED (collection_time, collection_id) WITH (DATA_COMPRESSION = PAGE) ); diff --git a/install/18_collect_cpu_utilization_stats.sql b/install/18_collect_cpu_utilization_stats.sql index df528289..1c3081b6 100644 --- a/install/18_collect_cpu_utilization_stats.sql +++ b/install/18_collect_cpu_utilization_stats.sql @@ -52,8 +52,28 @@ BEGIN FROM sys.dm_os_sys_info AS osi ), @max_sample_time datetime2(7) = NULL, + @is_linux bit = 0, @error_message nvarchar(4000); + /* + Detect SQL Server on Linux. On Linux the SCHEDULER_MONITOR ring buffer + reports SystemIdle = 0, so 100 - SystemIdle - ProcessUtilization fabricates + a host figure that pins total CPU at 100% forever (Issue #1048). There is no + DMV that exposes true host CPU on Linux, so on Linux we store NULL for + other_process_cpu_utilization instead of a false value. + + sys.dm_os_host_info exists only on SQL Server 2017+. It is referenced through + sp_executesql so SQL Server 2016 (which has no Linux build) never binds it and + simply leaves @is_linux = 0. + */ + IF OBJECT_ID(N'sys.dm_os_host_info', N'V') IS NOT NULL + BEGIN + EXECUTE sys.sp_executesql + N'SELECT @linux = CASE WHEN hi.host_platform = N''Linux'' THEN 1 ELSE 0 END FROM sys.dm_os_host_info AS hi;', + N'@linux bit OUTPUT', + @linux = @is_linux OUTPUT; + END; + BEGIN TRY BEGIN TRANSACTION; @@ -134,6 +154,8 @@ BEGIN x.process_utilization, other_process_cpu_utilization = CASE + WHEN @is_linux = 1 + THEN NULL /*SystemIdle is always 0 on Linux; host CPU is not derivable (Issue #1048)*/ WHEN (100 - x.system_idle - x.process_utilization) < 0 THEN 0 ELSE 100 - x.system_idle - x.process_utilization diff --git a/install/47_create_reporting_views.sql b/install/47_create_reporting_views.sql index d9740203..41e5a300 100644 --- a/install/47_create_reporting_views.sql +++ b/install/47_create_reporting_views.sql @@ -243,8 +243,14 @@ AS SELECT event_time = cus.sample_time, sql_server_cpu = cus.sqlserver_cpu_utilization, + /* + other_process_cpu / total_cpu are NULL on SQL Server on Linux, where host CPU + is not derivable (Issue #1048). Fall the total back to the (correct) SQL-only + figure so consumers degrade to SQL CPU rather than NULL; severity is already + driven by sqlserver_cpu_utilization, so it is unaffected on Linux. + */ other_process_cpu = cus.other_process_cpu_utilization, - total_cpu = cus.total_cpu_utilization, + total_cpu = ISNULL(cus.total_cpu_utilization, cus.sqlserver_cpu_utilization), severity = CASE WHEN cus.sqlserver_cpu_utilization >= 90 @@ -350,7 +356,11 @@ SELECT SELECT COUNT_BIG(*) FROM collect.cpu_utilization_stats AS cus - WHERE cus.total_cpu_utilization >= 80 + /* + On Linux total_cpu_utilization is NULL (host CPU not derivable, Issue #1048); + fall back to the correct SQL-only figure so high-CPU events are still detected. + */ + WHERE ISNULL(cus.total_cpu_utilization, cus.sqlserver_cpu_utilization) >= 80 AND cus.collection_time >= DATEADD(DAY, 0, CONVERT(date, SYSDATETIME())) ), collectors_failing = diff --git a/upgrades/2.11.0-to-2.12.0/03_make_other_process_cpu_nullable.sql b/upgrades/2.11.0-to-2.12.0/03_make_other_process_cpu_nullable.sql new file mode 100644 index 00000000..3b66ba45 --- /dev/null +++ b/upgrades/2.11.0-to-2.12.0/03_make_other_process_cpu_nullable.sql @@ -0,0 +1,101 @@ +/* +Copyright 2026 Darling Data, LLC +https://www.erikdarling.com/ + +Upgrade from 2.11.0 to 2.12.0 +Issue #1048: Linux host CPU pinned at 100%. + +On SQL Server on Linux the RING_BUFFER_SCHEDULER_MONITOR ring buffer reports +SystemIdle = 0 (a documented platform limitation — Microsoft's own sample query +carries the comment "SystemIdle on Linux will be 0"). The collector derives +other_process_cpu_utilization as 100 - SystemIdle - ProcessUtilization, so on +Linux that becomes 100 - 0 - sqlcpu and the host total (sqlserver + other) pins +at 100% forever. SQL Server's own CPU number (ProcessUtilization) is correct; +only the host/other figure is fabricated. No DMV exposes true host CPU on Linux, +so the collector now stores NULL for other_process_cpu_utilization on Linux and +the Dashboard renders the host figure as unavailable rather than a false 100%. + +That requires other_process_cpu_utilization to be nullable. It cannot be ALTERed +in place because the PERSISTED computed column total_cpu_utilization references +it (ALTER COLUMN is blocked while a computed column depends on the column), so we +drop the computed column, widen the base column to NULL, then re-add the computed +column. NULL in the base column propagates to NULL in total_cpu_utilization. + +Idempotent and partial-failure safe: each step is guarded independently, so a +re-run (or a re-run after an interrupted run) converges to the target shape. +*/ + +SET ANSI_NULLS ON; +SET ANSI_PADDING ON; +SET ANSI_WARNINGS ON; +SET ARITHABORT ON; +SET CONCAT_NULL_YIELDS_NULL ON; +SET QUOTED_IDENTIFIER ON; +SET NUMERIC_ROUNDABORT OFF; +SET IMPLICIT_TRANSACTIONS OFF; +SET STATISTICS TIME, IO OFF; +GO + +USE PerformanceMonitor; +GO + +IF OBJECT_ID(N'collect.cpu_utilization_stats', N'U') IS NULL +BEGIN + PRINT 'collect.cpu_utilization_stats does not exist — no action taken'; +END; +ELSE +BEGIN + DECLARE + @is_nullable bit = + ( + SELECT + c.is_nullable + FROM sys.columns AS c + WHERE c.object_id = OBJECT_ID(N'collect.cpu_utilization_stats') + AND c.name = N'other_process_cpu_utilization' + ); + + IF @is_nullable = 0 + BEGIN + /* + 1. Drop the persisted computed column that depends on the base column. + ALTER COLUMN below is blocked while this dependency exists. + */ + IF COL_LENGTH(N'collect.cpu_utilization_stats', N'total_cpu_utilization') IS NOT NULL + BEGIN + ALTER TABLE + collect.cpu_utilization_stats + DROP COLUMN total_cpu_utilization; + + PRINT 'Dropped computed column collect.cpu_utilization_stats.total_cpu_utilization'; + END; + + /* + 2. Widen the base column to allow NULL (Linux host CPU is not derivable). + */ + ALTER TABLE + collect.cpu_utilization_stats + ALTER COLUMN other_process_cpu_utilization integer NULL; + + PRINT 'Made collect.cpu_utilization_stats.other_process_cpu_utilization nullable'; + END; + ELSE + BEGIN + PRINT 'collect.cpu_utilization_stats.other_process_cpu_utilization already nullable — no action taken'; + END; + + /* + 3. Ensure the computed column exists. Guarded separately so an interrupted + prior run (computed dropped, then a failure before re-add) still recovers. + NULL in other_process_cpu_utilization propagates to NULL in total. + */ + IF COL_LENGTH(N'collect.cpu_utilization_stats', N'total_cpu_utilization') IS NULL + BEGIN + ALTER TABLE + collect.cpu_utilization_stats + ADD total_cpu_utilization AS (sqlserver_cpu_utilization + other_process_cpu_utilization) PERSISTED; + + PRINT 'Re-added computed column collect.cpu_utilization_stats.total_cpu_utilization'; + END; +END; +GO diff --git a/upgrades/2.11.0-to-2.12.0/upgrade.txt b/upgrades/2.11.0-to-2.12.0/upgrade.txt index 2053d64e..4218c2d6 100644 --- a/upgrades/2.11.0-to-2.12.0/upgrade.txt +++ b/upgrades/2.11.0-to-2.12.0/upgrade.txt @@ -1,2 +1,3 @@ 01_extend_blocked_process_report_columns.sql 02_create_remediation_action_log.sql +03_make_other_process_cpu_nullable.sql