Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions doc/command-line-flags.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,18 @@ MySQL 8.0 supports "instant DDL" for some operations. If an alter statement can

It is not reliable to parse the `ALTER` statement to determine if it is instant or not. This is because the table might be in an older row format, or have some other incompatibility that is difficult to identify.

When `--attempt-instant-ddl` is enabled, `gh-ost` will attempt `ALGORITHM=INSTANT` **early**, right after connecting to the inspector and before creating ghost tables or starting binlog streaming. If instant DDL succeeds, the migration completes immediately without any of the normal setup overhead. This is especially beneficial for large tables where the ghost table creation and binlog streaming setup would otherwise add significant time.

`--attempt-instant-ddl` is disabled by default, but the risks of enabling it are relatively minor: `gh-ost` may need to acquire a metadata lock at the start of the operation. This is not a problem for most scenarios, but it could be a problem for users that start the DDL during a period with long running transactions.

`gh-ost` will automatically fallback to the normal DDL process if the attempt to use instant DDL is unsuccessful.

### force-instant-ddl

`--force-instant-ddl` requires instant DDL to succeed. If `ALGORITHM=INSTANT` is not supported for the given operation, `gh-ost` will abort the migration immediately instead of falling back to a regular row-copy migration. This is useful when you intend an instant metadata-only change and want to prevent accidental multi-hour migrations on large tables.

Implies `--attempt-instant-ddl`. Ignored with `--revert`.

### binlogsyncer-max-reconnect-attempts
`--binlogsyncer-max-reconnect-attempts=0`, the maximum number of attempts to re-establish a broken inspector connection for sync binlog. `0` or `negative number` means infinite retry, default `0`

Expand Down
1 change: 1 addition & 0 deletions go/base/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ type MigrationContext struct {
GoogleCloudPlatform bool
AzureMySQL bool
AttemptInstantDDL bool
ForceInstantDDL bool
Resume bool
Revert bool
OldTableName string
Expand Down
7 changes: 7 additions & 0 deletions go/cmd/gh-ost/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ func main() {
flag.StringVar(&migrationContext.OriginalTableName, "table", "", "table name (mandatory)")
flag.StringVar(&migrationContext.AlterStatement, "alter", "", "alter statement (mandatory)")
flag.BoolVar(&migrationContext.AttemptInstantDDL, "attempt-instant-ddl", false, "Attempt to use instant DDL for this migration first")
flag.BoolVar(&migrationContext.ForceInstantDDL, "force-instant-ddl", false, "Require instant DDL; abort if the operation cannot be completed instantly (do not fall back to regular migration)")
storageEngine := flag.String("storage-engine", "innodb", "Specify table storage engine (default: 'innodb'). When 'rocksdb': the session transaction isolation level is changed from REPEATABLE_READ to READ_COMMITTED.")

flag.BoolVar(&migrationContext.CountTableRows, "exact-rowcount", false, "actually count table rows as opposed to estimate them (results in more accurate progress estimation)")
Expand Down Expand Up @@ -230,6 +231,9 @@ func main() {
if migrationContext.AttemptInstantDDL {
log.Warning("--attempt-instant-ddl was provided with --revert, it will be ignored")
}
if migrationContext.ForceInstantDDL {
log.Warning("--force-instant-ddl was provided with --revert, it will be ignored")
}
if migrationContext.IncludeTriggers {
log.Warning("--include-triggers was provided with --revert, it will be ignored")
}
Expand Down Expand Up @@ -270,6 +274,9 @@ func main() {
if migrationContext.SwitchToRowBinlogFormat && migrationContext.AssumeRBR {
migrationContext.Log.Fatal("--switch-to-rbr and --assume-rbr are mutually exclusive")
}
if migrationContext.ForceInstantDDL {
migrationContext.AttemptInstantDDL = true
}
if migrationContext.TestOnReplicaSkipReplicaStop {
if !migrationContext.TestOnReplica {
migrationContext.Log.Fatal("--test-on-replica-skip-replica-stop requires --test-on-replica to be enabled")
Expand Down
30 changes: 0 additions & 30 deletions go/logic/applier.go
Original file line number Diff line number Diff line change
Expand Up @@ -279,36 +279,6 @@ func (this *Applier) ValidateOrDropExistingTables() error {
return nil
}

// AttemptInstantDDL attempts to use instant DDL (from MySQL 8.0, and earlier in Aurora and some others).
// If successful, the operation is only a meta-data change so a lot of time is saved!
// The risk of attempting to instant DDL when not supported is that a metadata lock may be acquired.
// This is minor, since gh-ost will eventually require a metadata lock anyway, but at the cut-over stage.
// Instant operations include:
// - Adding a column
// - Dropping a column
// - Dropping an index
// - Extending a VARCHAR column
// - Adding a virtual generated column
// It is not reliable to parse the `alter` statement to determine if it is instant or not.
// This is because the table might be in an older row format, or have some other incompatibility
// that is difficult to identify.
func (this *Applier) AttemptInstantDDL() error {
query := this.generateInstantDDLQuery()
this.migrationContext.Log.Infof("INSTANT DDL query is: %s", query)

// Reuse cut-over-lock-timeout from regular migration process to reduce risk
// in situations where there may be long-running transactions.
tableLockTimeoutSeconds := this.migrationContext.CutOverLockTimeoutSeconds * 2
this.migrationContext.Log.Infof("Setting LOCK timeout as %d seconds", tableLockTimeoutSeconds)
lockTimeoutQuery := fmt.Sprintf(`set /* gh-ost */ session lock_wait_timeout:=%d`, tableLockTimeoutSeconds)
if _, err := this.db.Exec(lockTimeoutQuery); err != nil {
return err
}
// We don't need a trx, because for instant DDL the SQL mode doesn't matter.
_, err := this.db.Exec(query)
return err
}

// CreateGhostTable creates the ghost table on the applier host
func (this *Applier) CreateGhostTable() error {
query := fmt.Sprintf(`create /* gh-ost */ table %s.%s like %s.%s`,
Expand Down
89 changes: 68 additions & 21 deletions go/logic/migrator.go
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,25 @@
if err := this.checkAbort(); err != nil {
return err
}

// In MySQL 8.0 (and possibly earlier) some DDL statements can be applied instantly.
// Attempt this EARLY, before creating ghost tables or starting binlog streaming,
// to avoid unnecessary overhead for large tables when instant DDL is possible.
// Skip during resume (the DDL may have already been applied) and noop mode.
if this.migrationContext.AttemptInstantDDL && !this.migrationContext.Resume {
if this.migrationContext.Noop {
this.migrationContext.Log.Debugf("Noop operation; not really attempting instant DDL")
} else {
if err := this.attemptInstantDDLEarly(); err == nil {
return nil
} else if this.migrationContext.ForceInstantDDL {
return fmt.Errorf("--force-instant-ddl enabled but ALGORITHM=INSTANT is not supported for this operation: %s", err)

Check failure on line 445 in go/logic/migrator.go

View workflow job for this annotation

GitHub Actions / lint

non-wrapping format verb for fmt.Errorf. Use `%w` to format errors (errorlint)

Check failure on line 445 in go/logic/migrator.go

View workflow job for this annotation

GitHub Actions / lint

non-wrapping format verb for fmt.Errorf. Use `%w` to format errors (errorlint)
} else {
this.migrationContext.Log.Infof("ALGORITHM=INSTANT not supported for this operation, proceeding with original algorithm")
}
}
}

// If we are resuming, we will initiateStreaming later when we know
// the binlog coordinates to resume streaming from.
// If not resuming, the streamer must be initiated before the applier,
Expand All @@ -451,27 +470,6 @@
if err := this.createFlagFiles(); err != nil {
return err
}
// In MySQL 8.0 (and possibly earlier) some DDL statements can be applied instantly.
// Attempt to do this if AttemptInstantDDL is set.
if this.migrationContext.AttemptInstantDDL {
if this.migrationContext.Noop {
this.migrationContext.Log.Debugf("Noop operation; not really attempting instant DDL")
} else {
this.migrationContext.Log.Infof("Attempting to execute alter with ALGORITHM=INSTANT")
if err := this.applier.AttemptInstantDDL(); err == nil {
if err := this.finalCleanup(); err != nil {
return nil
}
if err := this.hooksExecutor.onSuccess(); err != nil {
return err
}
this.migrationContext.Log.Infof("Success! table %s.%s migrated instantly", sql.EscapeName(this.migrationContext.DatabaseName), sql.EscapeName(this.migrationContext.OriginalTableName))
return nil
} else {
this.migrationContext.Log.Infof("ALGORITHM=INSTANT not supported for this operation, proceeding with original algorithm: %s", err)
}
}
}

initialLag, _ := this.inspector.getReplicationLag()
if !this.migrationContext.Resume {
Expand Down Expand Up @@ -1030,6 +1028,55 @@
return nil
}

// attemptInstantDDLEarly attempts to execute the ALTER with ALGORITHM=INSTANT
// before any ghost table or binlog streaming setup. This avoids the overhead of
// creating ghost tables, changelog tables, and streaming binlog events for
// operations that MySQL 8.0+ can execute as instant metadata-only changes.
// If instant DDL succeeds, the migration is complete. If it fails, the caller
// should proceed with the normal migration flow.
func (this *Migrator) attemptInstantDDLEarly() error {
this.migrationContext.Log.Infof("Attempting to execute alter with ALGORITHM=INSTANT before full migration setup")

// Open a temporary connection to the master for the instant DDL attempt.
// This avoids initializing the full Applier (ghost table, changelog, etc.).
connConfig := this.migrationContext.ApplierConnectionConfig
uri := connConfig.GetDBUri(this.migrationContext.DatabaseName)
db, _, err := mysql.GetDB(this.migrationContext.Uuid, uri)
if err != nil {
this.migrationContext.Log.Infof("Could not open connection for instant DDL attempt: %s", err)
return err
}

tableLockTimeoutSeconds := this.migrationContext.CutOverLockTimeoutSeconds * 2
this.migrationContext.Log.Infof("Setting LOCK timeout as %d seconds for instant DDL attempt", tableLockTimeoutSeconds)
lockTimeoutQuery := fmt.Sprintf(`set /* gh-ost */ session lock_wait_timeout:=%d`, tableLockTimeoutSeconds)
if _, err := db.Exec(lockTimeoutQuery); err != nil {
this.migrationContext.Log.Infof("Could not set lock timeout for instant DDL: %s", err)
return err
}

query := fmt.Sprintf(`ALTER /* gh-ost */ TABLE %s.%s %s, ALGORITHM=INSTANT`,
sql.EscapeName(this.migrationContext.DatabaseName),
sql.EscapeName(this.migrationContext.OriginalTableName),
this.migrationContext.AlterStatementOptions,
)
this.migrationContext.Log.Infof("INSTANT DDL query: %s", query)

if _, err := db.Exec(query); err != nil {
this.migrationContext.Log.Infof("ALGORITHM=INSTANT is not supported for this operation, proceeding with regular algorithm: %s", err)
return err
}

if err := this.hooksExecutor.onSuccess(); err != nil {
return err
}
this.migrationContext.Log.Infof("Successfully executed instant DDL on %s.%s (no ghost table was needed)",
sql.EscapeName(this.migrationContext.DatabaseName),
sql.EscapeName(this.migrationContext.OriginalTableName),
)
return nil
}

// initiateInspector connects, validates and inspects the "inspector" server.
// The "inspector" server is typically a replica; it is where we issue some
// queries such as:
Expand Down
70 changes: 70 additions & 0 deletions go/logic/migrator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,76 @@ func (suite *MigratorTestSuite) TestMigrateEmpty() {
suite.Require().Equal("_testing_del", tableName)
}

func (suite *MigratorTestSuite) TestMigrateInstantDDLEarly() {
ctx := context.Background()

_, err := suite.db.ExecContext(ctx, fmt.Sprintf("CREATE TABLE %s (id INT PRIMARY KEY, name VARCHAR(64))", getTestTableName()))
suite.Require().NoError(err)

connectionConfig, err := getTestConnectionConfig(ctx, suite.mysqlContainer)
suite.Require().NoError(err)

migrationContext := newTestMigrationContext()
migrationContext.ApplierConnectionConfig = connectionConfig
migrationContext.InspectorConnectionConfig = connectionConfig
migrationContext.SetConnectionConfig("innodb")
migrationContext.AttemptInstantDDL = true

// Adding a column is an instant DDL operation in MySQL 8.0+
migrationContext.AlterStatementOptions = "ADD COLUMN instant_col VARCHAR(255)"

migrator := NewMigrator(migrationContext, "0.0.0")

err = migrator.Migrate()
suite.Require().NoError(err)

// Verify the new column was added via instant DDL
var tableName, createTableSQL string
//nolint:execinquery
err = suite.db.QueryRow("SHOW CREATE TABLE "+getTestTableName()).Scan(&tableName, &createTableSQL)
suite.Require().NoError(err)

suite.Require().Contains(createTableSQL, "instant_col")

// Verify that NO ghost table was created (instant DDL should skip ghost table creation)
//nolint:execinquery
err = suite.db.QueryRow("SHOW TABLES IN test LIKE '_testing_gho'").Scan(&tableName)
suite.Require().Error(err, "ghost table should not exist after instant DDL")
suite.Require().Equal(gosql.ErrNoRows, err)

// Verify that NO changelog table was created
//nolint:execinquery
err = suite.db.QueryRow("SHOW TABLES IN test LIKE '_testing_ghc'").Scan(&tableName)
suite.Require().Error(err, "changelog table should not exist after instant DDL")
suite.Require().Equal(gosql.ErrNoRows, err)
}

func (suite *MigratorTestSuite) TestForceInstantDDLFailsForNonInstantOp() {
ctx := context.Background()

_, err := suite.db.ExecContext(ctx, fmt.Sprintf("CREATE TABLE %s (id INT PRIMARY KEY, name VARCHAR(64))", getTestTableName()))
suite.Require().NoError(err)

connectionConfig, err := getTestConnectionConfig(ctx, suite.mysqlContainer)
suite.Require().NoError(err)

migrationContext := newTestMigrationContext()
migrationContext.ApplierConnectionConfig = connectionConfig
migrationContext.InspectorConnectionConfig = connectionConfig
migrationContext.SetConnectionConfig("innodb")
migrationContext.AttemptInstantDDL = true
migrationContext.ForceInstantDDL = true

// Changing a column type from VARCHAR to INT is NOT an instant DDL operation
migrationContext.AlterStatementOptions = "MODIFY COLUMN name INT"

migrator := NewMigrator(migrationContext, "0.0.0")

err = migrator.Migrate()
suite.Require().Error(err)
suite.Require().Contains(err.Error(), "--force-instant-ddl")
}

func (suite *MigratorTestSuite) TestRetryBatchCopyWithHooks() {
ctx := context.Background()

Expand Down
25 changes: 25 additions & 0 deletions localtests/force-instant-ddl/create.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
drop table if exists gh_ost_test;

create table gh_ost_test (
id int auto_increment,
i int not null,
color varchar(32),
primary key(id)
) auto_increment = 1;

insert into
gh_ost_test
values
(null, 11, 'red');

insert into
gh_ost_test
values
(null, 13, 'green');

insert into
gh_ost_test
values
(null, 17, 'blue');

drop event if exists gh_ost_test;
1 change: 1 addition & 0 deletions localtests/force-instant-ddl/extra_args
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
--force-instant-ddl --alter='ADD COLUMN extra_col VARCHAR(255)'
Loading