From 167d4441f17ee4abd8e534a2a33115e24855d0a6 Mon Sep 17 00:00:00 2001 From: Aleksey Shein Date: Thu, 25 Jun 2026 21:38:57 +0200 Subject: [PATCH] feat: MariaDB GTID support gh-ost hard-coded the go-mysql binlog syncer to MySQLFlavor and parsed every GTID set as MySQL, so --gtid could not be used against MariaDB. go-mysql already speaks the MariaDB GTID dialect; this wires it up. - Detect server flavor from the version string (IsMariaDB / FlavorFor) and set it on the BinlogSyncerConfig. - Make GTIDBinlogCoordinates hold the flavor-agnostic gomysql.GTIDSet interface; parse via ParseGTIDSet(flavor, ...). - Handle MariadbGTIDEvent alongside GTIDEvent in the streamer, and read the committed GTID set from XIDEvent.GSet without a MySQL-only cast. - Read MariaDB GTID positions from @@global.gtid_binlog_pos, Gtid_IO_Pos and @@global.gtid_slave_pos (MariaDB has no Executed_Gtid_Set column nor gtid_mode / enforce_gtid_consistency). - Skip the gtid_mode / enforce_gtid_consistency validation on MariaDB, where GTIDs are always recorded when binary logging is enabled. localtests: - Detect the server version once and reuse it; run gtid_mode=ON tests on MariaDB (normalize its current_gtid_mode to ON) and trim the GTID diagnostics that don't exist on MariaDB. - Enable gtid_strict_mode on the MariaDB test servers. Because --test-on-replica makes gh-ost write locally on the replica, give the replica its own GTID domain so its writes never collide with the primary's domain-0 stream under strict mode. - Add the gtid-resume case: interrupt a --gtid migration mid-copy via the interactive 'panic' command, then --resume, exercising the GTID checkpoint round-trip (WriteCheckpoint persists the GTID set, ReadLastCheckpoint parses it back via NewGTIDBinlogCoordinates(flavor)). Passes across the MySQL, MariaDB and Percona CI matrix. Verified: full localtests suite passes on MySQL 5.7/8.0/8.4 and on the MariaDB matrix (10.5, 10.6, 10.11, 11.4, 11.8) with gtid_strict_mode on. --- doc/command-line-flags.md | 4 +- go/binlog/gomysql_reader.go | 28 +++-- go/logic/applier.go | 2 +- go/logic/inspect.go | 7 ++ go/logic/migrator_test.go | 4 +- go/logic/streamer.go | 27 +++- go/mysql/binlog_file_test.go | 38 +++++- go/mysql/binlog_gtid.go | 43 +++++-- go/mysql/replica_terminology_map.go | 9 +- go/mysql/utils.go | 39 +++++- localtests/gtid-resume/create.sql | 23 ++++ localtests/gtid-resume/extra_args | 1 + localtests/gtid-resume/gtid_mode | 1 + localtests/gtid-resume/ignore_versions | 1 + localtests/gtid-resume/test.sh | 133 ++++++++++++++++++++ localtests/test.sh | 45 ++++--- script/docker/mariadb/common.cnf | 3 + script/docker/mariadb/start_replication.sql | 11 ++ 18 files changed, 368 insertions(+), 51 deletions(-) create mode 100644 localtests/gtid-resume/create.sql create mode 100644 localtests/gtid-resume/extra_args create mode 100644 localtests/gtid-resume/gtid_mode create mode 100644 localtests/gtid-resume/ignore_versions create mode 100755 localtests/gtid-resume/test.sh diff --git a/doc/command-line-flags.md b/doc/command-line-flags.md index d017c7c25..f2938dc52 100644 --- a/doc/command-line-flags.md +++ b/doc/command-line-flags.md @@ -218,7 +218,9 @@ Add this flag when executing on a 1st generation Google Cloud Platform (GCP). ### gtid -Add this flag to enable support for [MySQL replication GTIDs](https://dev.mysql.com/doc/refman/5.7/en/replication-gtids-concepts.html) for replication positioning. This requires `gtid_mode` and `enforce_gtid_consistency` to be set to `ON`. +Add this flag to enable support for [MySQL replication GTIDs](https://dev.mysql.com/doc/refman/5.7/en/replication-gtids-concepts.html) for replication positioning. On MySQL this requires `gtid_mode` and `enforce_gtid_consistency` to be set to `ON`. + +[MariaDB GTIDs](https://mariadb.com/kb/en/gtid/) are also supported: gh-ost detects the server flavor automatically and uses the appropriate GTID dialect. MariaDB has no `gtid_mode`/`enforce_gtid_consistency` settings — GTIDs are always recorded when binary logging is enabled, so no extra configuration is required beyond `--gtid`. ### heartbeat-interval-millis diff --git a/go/binlog/gomysql_reader.go b/go/binlog/gomysql_reader.go index f8819589a..0a7630129 100644 --- a/go/binlog/gomysql_reader.go +++ b/go/binlog/gomysql_reader.go @@ -19,7 +19,6 @@ import ( gomysql "github.com/go-mysql-org/go-mysql/mysql" "github.com/go-mysql-org/go-mysql/replication" - uuid "github.com/google/uuid" ) type RowsEventFilterFunc func(databaseName, tableName string) bool @@ -60,7 +59,7 @@ func NewGoMySQLReader(migrationContext *base.MigrationContext, rowsEventFilters } config := replication.BinlogSyncerConfig{ ServerID: uint32(migrationContext.ReplicaServerId), - Flavor: gomysql.MySQLFlavor, + Flavor: mysql.FlavorFor(migrationContext.InspectorMySQLVersion), Host: connectionConfig.Key.Hostname, Port: uint16(connectionConfig.Key.Port), User: connectionConfig.User, @@ -177,11 +176,19 @@ func (gmr *GoMySQLReader) StreamEvents(canStopStreaming func() bool, entriesChan } switch event := ev.Event.(type) { - case *replication.GTIDEvent: + case *replication.GTIDEvent, *replication.MariadbGTIDEvent: + // MySQL emits *GTIDEvent, MariaDB emits *MariadbGTIDEvent; both + // implement BinlogGTIDEvent.GTIDNext() returning the GTID about to + // be applied. We advance currentCoordinates by merging it into the + // running GTID set, regardless of flavor. if !gmr.migrationContext.UseGTIDs { continue } - sid, err := uuid.FromBytes(event.SID) + gtidEvent, ok := ev.Event.(gomysql.BinlogGTIDEvent) + if !ok { + return fmt.Errorf("unexpected GTID event type: %T", ev.Event) + } + nextGTID, err := gtidEvent.GTIDNext() if err != nil { return err } @@ -191,10 +198,11 @@ func (gmr *GoMySQLReader) StreamEvents(canStopStreaming func() bool, entriesChan } coords := gmr.currentCoordinates.(*mysql.GTIDBinlogCoordinates) if coords.GTIDSet == nil { - gtidSet := gomysql.NewMysqlGTIDSet() - coords.GTIDSet = >idSet + coords.GTIDSet = nextGTID + } else if err := coords.GTIDSet.Update(nextGTID.String()); err != nil { + gmr.currentCoordinatesMutex.Unlock() + return err } - coords.GTIDSet.AddGTID(sid, event.GNO) gmr.currentCoordinatesMutex.Unlock() case *replication.RotateEvent: if gmr.migrationContext.UseGTIDs { @@ -207,7 +215,11 @@ func (gmr *GoMySQLReader) StreamEvents(canStopStreaming func() bool, entriesChan gmr.currentCoordinatesMutex.Unlock() case *replication.XIDEvent: if gmr.migrationContext.UseGTIDs { - gmr.LastTrxCoords = &mysql.GTIDBinlogCoordinates{GTIDSet: event.GSet.(*gomysql.MysqlGTIDSet)} + // event.GSet is the full executed GTID set maintained by the + // syncer (MysqlGTIDSet or MariadbGTIDSet depending on flavor). + if event.GSet != nil { + gmr.LastTrxCoords = &mysql.GTIDBinlogCoordinates{GTIDSet: event.GSet} + } } else { gmr.LastTrxCoords = gmr.currentCoordinates.Clone() } diff --git a/go/logic/applier.go b/go/logic/applier.go index 50ad45ce2..b1559a678 100644 --- a/go/logic/applier.go +++ b/go/logic/applier.go @@ -849,7 +849,7 @@ func (apl *Applier) ReadLastCheckpoint() (*Checkpoint, error) { } chk.Timestamp = time.Unix(timestamp, 0) if apl.migrationContext.UseGTIDs { - gtidCoords, err := mysql.NewGTIDBinlogCoordinates(coordStr) + gtidCoords, err := mysql.NewGTIDBinlogCoordinates(mysql.FlavorFor(apl.migrationContext.ApplierMySQLVersion), coordStr) if err != nil { return nil, err } diff --git a/go/logic/inspect.go b/go/logic/inspect.go index f010f6e9a..05d6b67b5 100644 --- a/go/logic/inspect.go +++ b/go/logic/inspect.go @@ -427,6 +427,13 @@ func (isp *Inspector) validateBinlogs() error { // validateGTIDConfig checks that the GTID configuration is good to go func (isp *Inspector) validateGTIDConfig() error { + if mysql.IsMariaDB(isp.dbVersion) { + // MariaDB has no @@gtid_mode / @@enforce_gtid_consistency: GTIDs are + // always recorded in the binary log when binary logging is enabled + // (which is validated separately). Nothing else to check. + isp.migrationContext.Log.Infof("MariaDB GTID config validated on %s", isp.connectionConfig.Key.String()) + return nil + } var gtidMode, enforceGtidConsistency string query := `select @@global.gtid_mode, @@global.enforce_gtid_consistency` if err := isp.db.QueryRow(query).Scan(>idMode, &enforceGtidConsistency); err != nil { diff --git a/go/logic/migrator_test.go b/go/logic/migrator_test.go index 2b76a2360..f4d458235 100644 --- a/go/logic/migrator_test.go +++ b/go/logic/migrator_test.go @@ -209,7 +209,7 @@ func TestMigratorHeartbeatDoesNotAdvancePastUnappliedDML(t *testing.T) { // A DML on the original table at GTID :100 is observed and enqueued, but // not yet applied. - dmlCoords, err := mysql.NewGTIDBinlogCoordinates(srcUUID + ":1-100") + dmlCoords, err := mysql.NewGTIDBinlogCoordinates(mysql.MySQLFlavor, srcUUID+":1-100") require.NoError(t, err) migrator.applyEventsQueue <- newApplyEventStructByDML(&binlog.BinlogEntry{ DmlEvent: &binlog.BinlogDMLEvent{ @@ -224,7 +224,7 @@ func TestMigratorHeartbeatDoesNotAdvancePastUnappliedDML(t *testing.T) { // A heartbeat row is then written; its GTID set includes the un-applied // DML plus a few additional transactions. - heartbeatCoords, err := mysql.NewGTIDBinlogCoordinates(srcUUID + ":1-105") + heartbeatCoords, err := mysql.NewGTIDBinlogCoordinates(mysql.MySQLFlavor, srcUUID+":1-105") require.NoError(t, err) heartbeatColumnValues := sql.ToColumnValues([]interface{}{ 123, diff --git a/go/logic/streamer.go b/go/logic/streamer.go index f43dd2217..7d3d00120 100644 --- a/go/logic/streamer.go +++ b/go/logic/streamer.go @@ -16,7 +16,6 @@ import ( "github.com/github/gh-ost/go/binlog" "github.com/github/gh-ost/go/mysql" - gomysql "github.com/go-mysql-org/go-mysql/mysql" "github.com/openark/golib/sqlutils" ) @@ -162,17 +161,22 @@ func (es *EventsStreamer) GetCurrentBinlogCoordinates() mysql.BinlogCoordinates // readCurrentBinlogCoordinates reads master status from hooked server func (es *EventsStreamer) readCurrentBinlogCoordinates() error { + // MariaDB exposes no GTID column in SHOW MASTER STATUS; its current binlog + // GTID position lives in @@global.gtid_binlog_pos. + if es.migrationContext.UseGTIDs && mysql.IsMariaDB(es.dbVersion) { + return es.readCurrentMariaDBGTIDCoordinates() + } binaryLogStatusTerm := mysql.ReplicaTermFor(es.dbVersion, "master status") query := fmt.Sprintf("show /* gh-ost readCurrentBinlogCoordinates */ %s", binaryLogStatusTerm) foundMasterStatus := false err := sqlutils.QueryRowsMap(es.db, query, func(m sqlutils.RowMap) error { if es.migrationContext.UseGTIDs { execGtidSet := m.GetString("Executed_Gtid_Set") - gtidSet, err := gomysql.ParseMysqlGTIDSet(execGtidSet) + coords, err := mysql.NewGTIDBinlogCoordinates(mysql.MySQLFlavor, execGtidSet) if err != nil { return err } - es.initialBinlogCoordinates = &mysql.GTIDBinlogCoordinates{GTIDSet: gtidSet.(*gomysql.MysqlGTIDSet)} + es.initialBinlogCoordinates = coords } else { es.initialBinlogCoordinates = &mysql.FileBinlogCoordinates{ LogFile: m.GetString("File"), @@ -192,6 +196,23 @@ func (es *EventsStreamer) readCurrentBinlogCoordinates() error { return nil } +// readCurrentMariaDBGTIDCoordinates reads the current binlog GTID position from +// a MariaDB server, which is exposed via @@global.gtid_binlog_pos rather than a +// column in SHOW MASTER STATUS. +func (es *EventsStreamer) readCurrentMariaDBGTIDCoordinates() error { + var gtidBinlogPos string + if err := es.db.QueryRow(`select @@global.gtid_binlog_pos`).Scan(>idBinlogPos); err != nil { + return err + } + coords, err := mysql.NewGTIDBinlogCoordinates(mysql.MariaDBFlavor, gtidBinlogPos) + if err != nil { + return err + } + es.initialBinlogCoordinates = coords + es.migrationContext.Log.Debugf("Streamer binlog coordinates: %+v", es.initialBinlogCoordinates) + return nil +} + // StreamEvents will begin streaming events. It will be blocking, so should be // executed by a goroutine func (es *EventsStreamer) StreamEvents(canStopStreaming func() bool) error { diff --git a/go/mysql/binlog_file_test.go b/go/mysql/binlog_file_test.go index 50d513698..2728a4419 100644 --- a/go/mysql/binlog_file_test.go +++ b/go/mysql/binlog_file_test.go @@ -48,12 +48,12 @@ func TestBinlogCoordinates(t *testing.T) { 48e2bc1d-d66d-11e8-bf56-a0369f9437b8:1, 492e2980-4518-11e9-92c6-e4434b3eca94:1-4926754399`) - c5 := GTIDBinlogCoordinates{GTIDSet: gtidSet1.(*gomysql.MysqlGTIDSet)} - c6 := GTIDBinlogCoordinates{GTIDSet: gtidSet1.(*gomysql.MysqlGTIDSet)} - c7 := GTIDBinlogCoordinates{GTIDSet: gtidSet2.(*gomysql.MysqlGTIDSet)} - c8 := GTIDBinlogCoordinates{GTIDSet: gtidSet3.(*gomysql.MysqlGTIDSet)} - c9 := GTIDBinlogCoordinates{GTIDSet: gtidSetBig1.(*gomysql.MysqlGTIDSet)} - c10 := GTIDBinlogCoordinates{GTIDSet: gtidSetBig2.(*gomysql.MysqlGTIDSet)} + c5 := GTIDBinlogCoordinates{GTIDSet: gtidSet1} + c6 := GTIDBinlogCoordinates{GTIDSet: gtidSet1} + c7 := GTIDBinlogCoordinates{GTIDSet: gtidSet2} + c8 := GTIDBinlogCoordinates{GTIDSet: gtidSet3} + c9 := GTIDBinlogCoordinates{GTIDSet: gtidSetBig1} + c10 := GTIDBinlogCoordinates{GTIDSet: gtidSetBig2} require.True(t, c5.Equals(&c6)) require.True(t, c1.Equals(&c2)) @@ -76,6 +76,32 @@ func TestBinlogCoordinates(t *testing.T) { require.True(t, c9.SmallerThanOrEquals(&c10)) } +func TestMariaDBGTIDBinlogCoordinates(t *testing.T) { + // MariaDB GTID sets use domain-server-sequence format. + c1, err := NewGTIDBinlogCoordinates(MariaDBFlavor, "0-1-100") + require.NoError(t, err) + c2, err := NewGTIDBinlogCoordinates(MariaDBFlavor, "0-1-100") + require.NoError(t, err) + c3, err := NewGTIDBinlogCoordinates(MariaDBFlavor, "0-1-150") + require.NoError(t, err) + + require.True(t, c1.Equals(c2)) + require.False(t, c1.Equals(c3)) + require.True(t, c1.SmallerThan(c3)) + require.False(t, c3.SmallerThan(c1)) + require.True(t, c1.SmallerThanOrEquals(c3)) + + clone := c1.Clone() + require.True(t, c1.Equals(clone)) + require.False(t, c1.IsEmpty()) +} + +func TestFlavorFor(t *testing.T) { + require.Equal(t, MariaDBFlavor, FlavorFor("10.6.18-MariaDB-log")) + require.Equal(t, MySQLFlavor, FlavorFor("8.0.36")) + require.Equal(t, MySQLFlavor, FlavorFor("8.4.0")) +} + func TestBinlogCoordinatesAsKey(t *testing.T) { m := make(map[BinlogCoordinates]bool) diff --git a/go/mysql/binlog_gtid.go b/go/mysql/binlog_gtid.go index 505ac92d1..5119f8000 100644 --- a/go/mysql/binlog_gtid.go +++ b/go/mysql/binlog_gtid.go @@ -9,17 +9,39 @@ import ( gomysql "github.com/go-mysql-org/go-mysql/mysql" ) -// GTIDBinlogCoordinates describe binary log coordinates in MySQL GTID format. +// Re-exported go-mysql flavor identifiers so the rest of gh-ost doesn't have to +// import go-mysql directly to talk about flavors. +const ( + MySQLFlavor = gomysql.MySQLFlavor + MariaDBFlavor = gomysql.MariaDBFlavor +) + +// FlavorFor returns the go-mysql flavor identifier for the given server version +// string. It is used to parse GTID sets and to configure the binlog syncer in +// the correct (MySQL vs MariaDB) GTID dialect. +func FlavorFor(mysqlVersion string) string { + if IsMariaDB(mysqlVersion) { + return MariaDBFlavor + } + return MySQLFlavor +} + +// GTIDBinlogCoordinates describe binary log coordinates as a GTID set. The +// underlying set is either a MySQL or a MariaDB GTID set depending on the +// flavor it was parsed with; all operations go through the gomysql.GTIDSet +// interface so the two flavors are handled uniformly. type GTIDBinlogCoordinates struct { - GTIDSet *gomysql.MysqlGTIDSet + GTIDSet gomysql.GTIDSet } -// NewGTIDBinlogCoordinates parses a MySQL GTID set into a *GTIDBinlogCoordinates struct. -func NewGTIDBinlogCoordinates(gtidSet string) (*GTIDBinlogCoordinates, error) { - set, err := gomysql.ParseMysqlGTIDSet(gtidSet) - return >IDBinlogCoordinates{ - GTIDSet: set.(*gomysql.MysqlGTIDSet), - }, err +// NewGTIDBinlogCoordinates parses a GTID set string (in the given flavor's +// dialect) into a *GTIDBinlogCoordinates struct. +func NewGTIDBinlogCoordinates(flavor, gtidSet string) (*GTIDBinlogCoordinates, error) { + set, err := gomysql.ParseGTIDSet(flavor, gtidSet) + if err != nil { + return nil, err + } + return >IDBinlogCoordinates{GTIDSet: set}, nil } // DisplayString returns a user-friendly string representation of these current UUID set or the full GTID set. @@ -29,6 +51,9 @@ func (coord *GTIDBinlogCoordinates) DisplayString() string { // String returns a user-friendly string representation of these full GTID set. func (coord GTIDBinlogCoordinates) String() string { + if coord.GTIDSet == nil { + return "" + } return coord.GTIDSet.String() } @@ -74,7 +99,7 @@ func (coord *GTIDBinlogCoordinates) SmallerThanOrEquals(other BinlogCoordinates) func (coord *GTIDBinlogCoordinates) Clone() BinlogCoordinates { out := >IDBinlogCoordinates{} if coord.GTIDSet != nil { - out.GTIDSet = coord.GTIDSet.Clone().(*gomysql.MysqlGTIDSet) + out.GTIDSet = coord.GTIDSet.Clone() } return out } diff --git a/go/mysql/replica_terminology_map.go b/go/mysql/replica_terminology_map.go index 140fcbaca..53110b498 100644 --- a/go/mysql/replica_terminology_map.go +++ b/go/mysql/replica_terminology_map.go @@ -26,11 +26,18 @@ var MysqlReplicaTermMap = map[string]string{ "slave": "replica", } +// IsMariaDB reports whether the given server version string identifies a +// MariaDB server (as opposed to Oracle MySQL). MariaDB reports versions >= 10 +// and differs from MySQL in replica terminology and GTID handling. +func IsMariaDB(mysqlVersion string) bool { + return strings.Contains(strings.ToLower(mysqlVersion), "mariadb") +} + func ReplicaTermFor(mysqlVersion string, term string) string { // MariaDB reports versions >= 10, which compare greater than the 8.4 // cutoff, but it never adopted the new replica/source terminology. Keep // the legacy terms for it. - if strings.Contains(strings.ToLower(mysqlVersion), "mariadb") { + if IsMariaDB(mysqlVersion) { return term } diff --git a/go/mysql/utils.go b/go/mysql/utils.go index 2ad200bdd..ad3c09487 100644 --- a/go/mysql/utils.go +++ b/go/mysql/utils.go @@ -217,14 +217,17 @@ func GetMasterConnectionConfigSafe(dbVersion string, connectionConfig *Connectio } func GetReplicationBinlogCoordinates(dbVersion string, db *gosql.DB, gtid bool) (readBinlogCoordinates, executeBinlogCoordinates BinlogCoordinates, err error) { + if gtid && IsMariaDB(dbVersion) { + return getMariaDBReplicationGTIDCoordinates(db) + } showReplicaStatusQuery := fmt.Sprintf("show %s", ReplicaTermFor(dbVersion, `slave status`)) err = sqlutils.QueryRowsMap(db, showReplicaStatusQuery, func(m sqlutils.RowMap) error { if gtid { - executeBinlogCoordinates, err = NewGTIDBinlogCoordinates(m.GetString("Executed_Gtid_Set")) + executeBinlogCoordinates, err = NewGTIDBinlogCoordinates(MySQLFlavor, m.GetString("Executed_Gtid_Set")) if err != nil { return err } - readBinlogCoordinates, err = NewGTIDBinlogCoordinates(m.GetString("Retrieved_Gtid_Set")) + readBinlogCoordinates, err = NewGTIDBinlogCoordinates(MySQLFlavor, m.GetString("Retrieved_Gtid_Set")) if err != nil { return err } @@ -244,10 +247,20 @@ func GetReplicationBinlogCoordinates(dbVersion string, db *gosql.DB, gtid bool) } func GetSelfBinlogCoordinates(dbVersion string, db *gosql.DB, gtid bool) (selfBinlogCoordinates BinlogCoordinates, err error) { + if gtid && IsMariaDB(dbVersion) { + // MariaDB does not expose a GTID column in SHOW MASTER STATUS; the + // executed GTID position of this server's own binary log is in + // @@global.gtid_binlog_pos. + var gtidBinlogPos string + if err = db.QueryRow(`select @@global.gtid_binlog_pos`).Scan(>idBinlogPos); err != nil { + return nil, err + } + return NewGTIDBinlogCoordinates(MariaDBFlavor, gtidBinlogPos) + } binaryLogStatusTerm := ReplicaTermFor(dbVersion, "master status") err = sqlutils.QueryRowsMap(db, fmt.Sprintf("show %s", binaryLogStatusTerm), func(m sqlutils.RowMap) error { if gtid { - selfBinlogCoordinates, err = NewGTIDBinlogCoordinates(m.GetString("Executed_Gtid_Set")) + selfBinlogCoordinates, err = NewGTIDBinlogCoordinates(MySQLFlavor, m.GetString("Executed_Gtid_Set")) } else { selfBinlogCoordinates = NewFileBinlogCoordinates( m.GetString("File"), @@ -259,6 +272,26 @@ func GetSelfBinlogCoordinates(dbVersion string, db *gosql.DB, gtid bool) (selfBi return selfBinlogCoordinates, err } +// getMariaDBReplicationGTIDCoordinates reports the IO/SQL thread GTID positions +// of a MariaDB replica. MariaDB has no Executed_Gtid_Set/Retrieved_Gtid_Set +// columns: the IO thread position is in SHOW SLAVE STATUS's Gtid_IO_Pos, and the +// applied position is in @@global.gtid_slave_pos. +func getMariaDBReplicationGTIDCoordinates(db *gosql.DB) (readBinlogCoordinates, executeBinlogCoordinates BinlogCoordinates, err error) { + err = sqlutils.QueryRowsMap(db, "show slave status", func(m sqlutils.RowMap) error { + readBinlogCoordinates, err = NewGTIDBinlogCoordinates(MariaDBFlavor, m.GetString("Gtid_IO_Pos")) + return err + }) + if err != nil { + return readBinlogCoordinates, executeBinlogCoordinates, err + } + var gtidSlavePos string + if err = db.QueryRow(`select @@global.gtid_slave_pos`).Scan(>idSlavePos); err != nil { + return readBinlogCoordinates, executeBinlogCoordinates, err + } + executeBinlogCoordinates, err = NewGTIDBinlogCoordinates(MariaDBFlavor, gtidSlavePos) + return readBinlogCoordinates, executeBinlogCoordinates, err +} + // GetInstanceKey reads hostname and port on given DB func GetInstanceKey(db *gosql.DB) (instanceKey *InstanceKey, err error) { instanceKey = &InstanceKey{} diff --git a/localtests/gtid-resume/create.sql b/localtests/gtid-resume/create.sql new file mode 100644 index 000000000..964c24052 --- /dev/null +++ b/localtests/gtid-resume/create.sql @@ -0,0 +1,23 @@ +drop table if exists gh_ost_test; +create table gh_ost_test ( + id int auto_increment, + t varchar(128) charset utf8mb4, + primary key(id) +) auto_increment=1; + +-- Seed ~8k rows. Small enough that the resume copies quickly, while a high +-- --nice-ratio (see extra_args) stretches the first run's row-copy so it still +-- outlasts the first checkpoint (>=10s) and can be interrupted mid-copy. +-- Doubling INSERT ... SELECT is portable across MySQL 5.7/8.x and MariaDB. +insert into gh_ost_test (t) values (md5(rand())), (md5(rand())), (md5(rand())), (md5(rand())); +insert into gh_ost_test (t) select md5(rand()) from gh_ost_test; +insert into gh_ost_test (t) select md5(rand()) from gh_ost_test; +insert into gh_ost_test (t) select md5(rand()) from gh_ost_test; +insert into gh_ost_test (t) select md5(rand()) from gh_ost_test; +insert into gh_ost_test (t) select md5(rand()) from gh_ost_test; +insert into gh_ost_test (t) select md5(rand()) from gh_ost_test; +insert into gh_ost_test (t) select md5(rand()) from gh_ost_test; +insert into gh_ost_test (t) select md5(rand()) from gh_ost_test; +insert into gh_ost_test (t) select md5(rand()) from gh_ost_test; +insert into gh_ost_test (t) select md5(rand()) from gh_ost_test; +insert into gh_ost_test (t) select md5(rand()) from gh_ost_test; diff --git a/localtests/gtid-resume/extra_args b/localtests/gtid-resume/extra_args new file mode 100644 index 000000000..3904fa9dd --- /dev/null +++ b/localtests/gtid-resume/extra_args @@ -0,0 +1 @@ +--gtid --checkpoint-seconds=10 --nice-ratio=25 \ No newline at end of file diff --git a/localtests/gtid-resume/gtid_mode b/localtests/gtid-resume/gtid_mode new file mode 100644 index 000000000..8cefb6a89 --- /dev/null +++ b/localtests/gtid-resume/gtid_mode @@ -0,0 +1 @@ +ON \ No newline at end of file diff --git a/localtests/gtid-resume/ignore_versions b/localtests/gtid-resume/ignore_versions new file mode 100644 index 000000000..b3862aade --- /dev/null +++ b/localtests/gtid-resume/ignore_versions @@ -0,0 +1 @@ +(5.5) \ No newline at end of file diff --git a/localtests/gtid-resume/test.sh b/localtests/gtid-resume/test.sh new file mode 100755 index 000000000..52ca34bdc --- /dev/null +++ b/localtests/gtid-resume/test.sh @@ -0,0 +1,133 @@ +#!/bin/bash +# Custom test: interrupt a --gtid migration mid-copy, then --resume it. +# +# Purpose: exercise the GTID checkpoint round-trip end-to-end -- +# WriteCheckpoint persists the binlog coordinates as a GTID set, and on resume +# ReadLastCheckpoint parses them back via mysql.NewGTIDBinlogCoordinates(flavor, +# coords). On the MariaDB image this covers the MariaDB GTID dialect of that +# read path; on MySQL images it covers the MySQL dialect. +# +# Gated to GTID-on servers via the gtid_mode=ON file. Runs on all GTID flavors. +# +# Flow: run gh-ost with --checkpoint in the background; --nice-ratio stretches +# the row-copy so it reliably outlasts the first checkpoint (gh-ost enforces +# --checkpoint-seconds>=10), so a checkpoint is written mid-copy; abort with the +# interactive 'panic' command (aborts without cleanup, leaving the ghost/ +# changelog/checkpoint tables in place); then start a second gh-ost with +# --resume and let it finish; finally checksum original vs ghost (the framework +# skips its own checksum for custom test.sh cases). +# +# Note: the copy must NOT be throttled while waiting for the checkpoint -- +# throttling pauses DML apply, the backlog grows, and the checkpoint (2s +# internal timeout) never reaches a consistent point. --nice-ratio slows the +# copy by sleeping between chunks (it does not pause DML apply), so checkpoints +# still succeed. + +# Force the harness throttle-query to never throttle (THROTTLE_SECONDS=0 -> its +# condition becomes "< 0"). Throttling pauses DML apply and makes checkpoints +# time out, so it must stay fully off for this test. Must be set before +# build_ghost_command bakes the throttle-query in. +export THROTTLE_SECONDS=0 + +table_name="gh_ost_test" +ghost_table_name="_gh_ost_test_gho" +ghost_socket="/tmp/gh-ost.test.sock" + +stop_ghost() { + # Abort via the interactive 'panic' command (no cleanup -> tables remain for + # resume). Fall back to SIGKILL if the process does not exit. + echo panic | nc -U "$ghost_socket" >/dev/null 2>&1 + for i in $(seq 1 30); do + ps -p $ghost_pid >/dev/null || return 0 + sleep 0.5 + done + kill -9 $ghost_pid 2>/dev/null + pkill -9 -f "$ghost_binary" 2>/dev/null +} + +build_ghost_command + +# --- first run: interrupt mid-copy ----------------------------------------- +echo >$test_logfile +echo_dot +bash -c "$cmd" >>$test_logfile 2>&1 & +ghost_pid=$! + +# Wait for a checkpoint to be written while the row-copy is still in progress. +checkpoint_written=false +for i in $(seq 1 120); do + if grep -q "checkpoint success at coords=" "$test_logfile"; then + checkpoint_written=true + break + fi + if grep -q "Row copy complete" "$test_logfile"; then + echo + echo "ERROR row copy completed before a checkpoint was written; increase --nice-ratio (extra_args) or the seed size in create.sql" + stop_ghost + return 1 + fi + if ! ps -p $ghost_pid >/dev/null; then + echo + echo "ERROR gh-ost exited early during the first (pre-resume) run" + print_log_excerpt + return 1 + fi + sleep 0.5 + echo_dot +done + +if [ "$checkpoint_written" != true ]; then + echo + echo "ERROR no checkpoint was written within the wait window" + print_log_excerpt + stop_ghost + return 1 +fi + +# Abort the migration to simulate an interruption. 'panic' leaves the ghost, +# changelog and checkpoint tables in place for the resume. +stop_ghost +wait $ghost_pid 2>/dev/null +echo_dot + +# --- second run: resume from checkpoint ------------------------------------ +# --initially-drop-ghost-table (in $cmd) is ignored under --resume: gh-ost keeps +# the partially-copied ghost table and continues from the checkpoint. +# Drop --nice-ratio (only needed to stretch the first run past the checkpoint) +# so the resume copies the remaining rows at full speed. +resume_cmd="$(echo "$cmd" | sed -E 's/--nice-ratio=[0-9.]+//g') --resume" +bash -c "$resume_cmd" >>$test_logfile 2>&1 & +resume_pid=$! +wait $resume_pid +execution_result=$? + +if [ $execution_result -ne 0 ]; then + echo + echo "ERROR resume run failed (exit ${execution_result})" + print_log_excerpt + return 1 +fi + +if ! grep -q "Resuming from checkpoint" "$test_logfile"; then + echo + echo "ERROR resume run did not read a checkpoint (no 'Resuming from checkpoint' in log)" + print_log_excerpt + return 1 +fi + +# --- validate: original vs ghost table checksum on the replica -------------- +echo_dot +gh-ost-test-mysql-replica --default-character-set=utf8mb4 test -e "select * from ${table_name} order by id" -ss >$orig_content_output_file +gh-ost-test-mysql-replica --default-character-set=utf8mb4 test -e "select * from ${ghost_table_name} order by id" -ss >$ghost_content_output_file +orig_checksum=$(cat $orig_content_output_file | md5sum) +ghost_checksum=$(cat $ghost_content_output_file | md5sum) + +if [ "$orig_checksum" != "$ghost_checksum" ]; then + echo + echo "ERROR ${test_name}: checksum mismatch after resume" + echo "---" + diff $orig_content_output_file $ghost_content_output_file | head -50 + return 1 +fi + +return 0 diff --git a/localtests/test.sh b/localtests/test.sh index 49ed780d6..dfc9bee6a 100755 --- a/localtests/test.sh +++ b/localtests/test.sh @@ -72,12 +72,24 @@ verify_master_and_replica() { original_sql_mode="$(gh-ost-test-mysql-master -e "select @@global.sql_mode" -s -s)" echo "sql_mode on master is ${original_sql_mode}" + # Detect the server version once (master and replica run the same image); + # reused below for GTID handling and replica/source terminology. + mysql_version="$(gh-ost-test-mysql-master -s -s -e "select @@version")" + current_gtid_mode=$(gh-ost-test-mysql-master -s -s -e "select @@global.gtid_mode" 2>/dev/null || echo unsupported) - current_enforce_gtid_consistency=$(gh-ost-test-mysql-master -s -s -e "select @@global.enforce_gtid_consistency" 2>/dev/null || echo unsupported) - current_master_server_uuid=$(gh-ost-test-mysql-master -s -s -e "select @@global.server_uuid" 2>/dev/null || echo unsupported) - current_replica_server_uuid=$(gh-ost-test-mysql-replica -s -s -e "select @@global.server_uuid" 2>/dev/null || echo unsupported) - echo "gtid_mode on master is ${current_gtid_mode} with enforce_gtid_consistency=${current_enforce_gtid_consistency}" - echo "server_uuid on master is ${current_master_server_uuid}, replica is ${current_replica_server_uuid}" + if [[ "$mysql_version" == *MariaDB* ]]; then + # MariaDB has no @@gtid_mode, @@enforce_gtid_consistency or @@server_uuid, + # but always records GTIDs in the binary log when binary logging is on. + # Treat it as gtid_mode=ON so GTID tests run on it. + current_gtid_mode="ON" + echo "gtid_mode on master is ${current_gtid_mode}" + else + current_enforce_gtid_consistency=$(gh-ost-test-mysql-master -s -s -e "select @@global.enforce_gtid_consistency" 2>/dev/null || echo unsupported) + current_master_server_uuid=$(gh-ost-test-mysql-master -s -s -e "select @@global.server_uuid" 2>/dev/null || echo unsupported) + current_replica_server_uuid=$(gh-ost-test-mysql-replica -s -s -e "select @@global.server_uuid" 2>/dev/null || echo unsupported) + echo "gtid_mode on master is ${current_gtid_mode} with enforce_gtid_consistency=${current_enforce_gtid_consistency}" + echo "server_uuid on master is ${current_master_server_uuid}, replica is ${current_replica_server_uuid}" + fi if [ "$(gh-ost-test-mysql-replica -e "select 1" -ss)" != "1" ]; then echo "Cannot verify gh-ost-test-mysql-replica" @@ -91,9 +103,8 @@ verify_master_and_replica() { [ "$replica_host" == "$(hostname)" ] && replica_host="127.0.0.1" echo "# replica verified at $replica_host:$replica_port" - # Detect the server version once; no need to re-query it per test. - # Cache replica_terminology and seconds_behind_source values to avoid later checks. - mysql_version="$(gh-ost-test-mysql-replica -s -s -e "select @@version")" + # mysql_version was detected above; cache replica_terminology and + # seconds_behind_source here to avoid re-checking per test. mysql_version_comment="$(gh-ost-test-mysql-master -s -s -e "select @@version_comment")" if [[ "$mysql_version" == *MariaDB* ]]; then # MariaDB reports versions >= 10 but never adopted the replica/source @@ -308,16 +319,16 @@ test_single() { if [ -f $tests_path/$test_name/gtid_mode ]; then target_gtid_mode=$(cat $tests_path/$test_name/gtid_mode) if [ "$current_gtid_mode" != "$target_gtid_mode" ]; then - # MariaDB has no @@gtid_mode (reports "unsupported"); these tests - # target MySQL GTID behaviour, so skip rather than abort the run. - if [ "$current_gtid_mode" = "unsupported" ]; then - echo -n " skipping (gtid_mode unsupported)" - return 0 - fi - echo "gtid_mode is ${current_gtid_mode}, expected ${target_gtid_mode}" - exit 1 + # "unsupported" here means a MySQL build without GTID; skip rather + # than abort the whole run. + if [ "$current_gtid_mode" = "unsupported" ]; then + echo -n " skipping (gtid_mode unsupported)" + return 0 + fi + echo "gtid_mode is ${current_gtid_mode}, expected ${target_gtid_mode}" + exit 1 + fi fi - fi if [ -f $tests_path/$test_name/sql_mode ]; then gh-ost-test-mysql-master --default-character-set=utf8mb4 test -e "set @@global.sql_mode='$(cat $tests_path/$test_name/sql_mode)'" diff --git a/script/docker/mariadb/common.cnf b/script/docker/mariadb/common.cnf index a0023d628..af6a2acf5 100644 --- a/script/docker/mariadb/common.cnf +++ b/script/docker/mariadb/common.cnf @@ -8,3 +8,6 @@ character-set-server=utf8mb4 log-bin=mariadb-bin binlog-format=ROW log-slave-updates=ON +# Enforce strictly in-order GTIDs (MariaDB's recommended production setting) so +# the tests exercise gh-ost's MariaDB GTID handling under strict mode. +gtid-strict-mode=ON diff --git a/script/docker/mariadb/start_replication.sql b/script/docker/mariadb/start_replication.sql index 94820bb28..bad9f643b 100644 --- a/script/docker/mariadb/start_replication.sql +++ b/script/docker/mariadb/start_replication.sql @@ -1,2 +1,13 @@ +-- gtid_strict_mode enforces strictly in-order sequence numbers per GTID domain. +-- Under --test-on-replica gh-ost writes directly to this replica (ghost table +-- DDL/DML, changelog, cut-over), which would otherwise land in the primary's +-- domain 0 and collide with the replicated 0-1-N stream as out-of-order. Give +-- this server its own domain for locally-originated writes so the two never +-- interleave. Also reset the binlog GTID state left by the local init scripts +-- (create_user.sql ran here too, in domain 0) and start from an empty slave +-- position so the replica cleanly replays the primary's history. +reset master; +set global gtid_slave_pos=''; +set global gtid_domain_id=1; change master to master_host='mysql-primary', master_port=3307, master_user='repl', master_password='repl', master_use_gtid=slave_pos; start slave;