From c2c241b8ac96c5441a917794bdc9f022c9a0d719 Mon Sep 17 00:00:00 2001 From: jangjang0401 Date: Thu, 14 May 2026 14:21:23 +0900 Subject: [PATCH] gpstate: fix false "Acting as Primary" report for hot standby mirrors With hot_standby=on, mirrors accept SQL connections and return PQPING_OK from pg_isready. The previous code unconditionally mapped PQPING_OK + role=mirror to "Acting as Primary", causing gpstate -s to show a spurious warning on every mirror. Fix this in clsSystemState.__buildGpStateData() by cross-checking pg_stat_replication on the primary: if the mirror has an active WAL receiver connection in streaming or catchup state, it is a legitimate hot standby and the status is corrected to "Up". Only fall through to "Acting as Primary" when no such replication connection exists, meaning the segment truly promoted itself to primary. This approach reuses the existing primary connection already established by _add_replication_info(), so no additional database connections are required. _add_replication_info() is updated to return the raw replication state string to make this information available to the caller. --- gpMgmt/bin/gppylib/programs/clsSystemState.py | 29 +++++++++++++++---- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/gpMgmt/bin/gppylib/programs/clsSystemState.py b/gpMgmt/bin/gppylib/programs/clsSystemState.py index e234b633122..896e78f3ad5 100644 --- a/gpMgmt/bin/gppylib/programs/clsSystemState.py +++ b/gpMgmt/bin/gppylib/programs/clsSystemState.py @@ -1039,6 +1039,9 @@ def _add_replication_info(data, primary, mirror): If segment is a mirror, peer should be set to its corresponding primary. Otherwise, peer is ignored. + + Returns the raw replication state string from pg_stat_replication + (e.g. 'streaming', 'catchup'), or None if unavailable. """ # Preload the mirror's replication info with Unknowns so that we'll # still have usable UI information on an early exit from this function. @@ -1047,7 +1050,7 @@ def _add_replication_info(data, primary, mirror): # Even though this information is considered part of the mirror's state, # we have to connect to the primary to get it. if not primary.isSegmentUp(): - return + return None # Query pg_stat_replication for the info we want. rows = [] @@ -1102,7 +1105,7 @@ def _add_replication_info(data, primary, mirror): logger.warning('could not query segment {} ({}:{})'.format( primary.dbid, primary.hostname, primary.port )) - return + return None # Successfully queried pg_stat_replication. If there are any backup # or pg_rewind connections, mention them in the primary status. @@ -1134,10 +1137,10 @@ def _add_replication_info(data, primary, mirror): standby_connections = [r for r in rows if r[0] == 'gp_walreceiver'] if not standby_connections: logger.warning('pg_stat_replication shows no standby connections') - return + return None elif len(standby_connections) > 1: logger.warning('pg_stat_replication shows more than one standby connection') - return + return None row = standby_connections[0] @@ -1154,6 +1157,8 @@ def _add_replication_info(data, primary, mirror): replay_left=row[6], ) + return row[1] + @staticmethod def _set_mirror_replication_values(data, mirror, **kwargs): """ @@ -1217,11 +1222,16 @@ def __buildGpStateData(self, gpArray, hostNameToResults): data.addValue(VALUE__MIRROR_STATUS, "Physical replication not configured") # Add replication info on a per-pair basis. + # mirrorReplState maps mirror dbid -> raw replication state from pg_stat_replication + # (e.g. 'streaming', 'catchup'), used below to detect hot standby false positives. + mirrorReplState = {} if gpArray.hasMirrors: for pair in gpArray.segmentPairs: primary, mirror = pair.primaryDB, pair.mirrorDB data.switchSegment(mirror) - self._add_replication_info(data, primary, mirror) + replState = self._add_replication_info(data, primary, mirror) + if replState is not None: + mirrorReplState[mirror.getSegmentDbId()] = replState for seg in segments: data.switchSegment(seg) @@ -1283,6 +1293,15 @@ def __buildGpStateData(self, gpArray, hostNameToResults): databaseStatusIsWarning = True else: databaseStatus = segmentData[gp.SEGMENT_STATUS__GET_MIRROR_STATUS]["databaseStatus"] + # With hot_standby=on, a mirror in recovery returns PQPING_OK, causing + # gpgetstatususingtransition to report "Acting as Primary". Correct this + # by checking pg_stat_replication on the primary: if the mirror has an + # active WAL receiver connection (streaming/catchup), it is a legitimate + # hot standby and should be reported as "Up". + if (databaseStatus == "Acting as Primary" + and seg.isSegmentMirror(current_role=True) + and mirrorReplState.get(seg.getSegmentDbId()) in ('streaming', 'catchup')): + databaseStatus = "Up" databaseStatusIsWarning = databaseStatus != "Up" if seg.isSegmentMirror(current_role=True):