diff --git a/internal/hcs/migration.go b/internal/hcs/migration.go index f9597ae9ba..5bb02effa2 100644 --- a/internal/hcs/migration.go +++ b/internal/hcs/migration.go @@ -48,7 +48,7 @@ func migrationCallbackHandler(eventPtr uintptr, ctx uintptr) uintptr { } e := (*computecore.HcsEvent)(unsafe.Pointer(eventPtr)) - ch := *(*chan string)(unsafe.Pointer(ctx)) + ch := *(*chan hcsschema.OperationSystemMigrationNotificationInfo)(unsafe.Pointer(ctx)) eventData := "" if e.EventData != nil { @@ -60,9 +60,21 @@ func migrationCallbackHandler(eventPtr uintptr, ctx uintptr) uintptr { "event-data": eventData, }).Debug("HCS migration notification") + var info hcsschema.OperationSystemMigrationNotificationInfo + if eventData != "" { + if err := json.Unmarshal([]byte(eventData), &info); err != nil { + logrus.WithFields(logrus.Fields{ + "event-type": e.Type.String(), + "event-data": eventData, + logrus.ErrorKey: err, + }).Warn("failed to unmarshal migration notification payload, dropping event") + return 0 + } + } + // Non-blocking send to avoid blocking the HCS callback thread. select { - case ch <- eventData: + case ch <- info: default: logrus.WithField("event-type", e.Type.String()).Warn("migration notification channel full, dropping event") } @@ -94,7 +106,7 @@ func (computeSystem *System) openMigrationHandle(ctx context.Context) error { // Create the notification channel and store it on the struct. computeSystem.migrationHandle = handle - computeSystem.migrationNotifyCh = make(chan string, migrationNotificationBufferSize) + computeSystem.migrationNotifyCh = make(chan hcsschema.OperationSystemMigrationNotificationInfo, migrationNotificationBufferSize) // Pin the address of the notification channel field so it stays visible // to the GC while HCS holds it as a uintptr callback context. Without @@ -372,8 +384,8 @@ func (computeSystem *System) FinalizeLiveMigration(ctx context.Context, resume b } // MigrationNotifications returns a read-only channel that receives live migration -// event data strings. Returns an error if no migration handle is open. -func (computeSystem *System) MigrationNotifications() (<-chan string, error) { +// event payloads. Returns an error if no migration handle is open. +func (computeSystem *System) MigrationNotifications() (<-chan hcsschema.OperationSystemMigrationNotificationInfo, error) { computeSystem.handleLock.RLock() defer computeSystem.handleLock.RUnlock() diff --git a/internal/hcs/migration_test.go b/internal/hcs/migration_test.go new file mode 100644 index 0000000000..d6144dda71 --- /dev/null +++ b/internal/hcs/migration_test.go @@ -0,0 +1,236 @@ +//go:build windows + +package hcs + +import ( + "testing" + "unsafe" + + "github.com/Microsoft/hcsshim/internal/computecore" + hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + "golang.org/x/sys/windows" +) + +// ───────────────────────────────────────────────────────────────────────────── +// Test helpers +// +// The handler under test reads its arguments as raw uintptrs that originate +// outside the Go heap (HCS hands them to us via a syscall callback). To +// faithfully exercise that contract — and the cgo pointer-passing rules it +// implies — the helpers below allocate the HcsEvent, the UTF-16 EventData +// buffer, and the channel context out of process heap memory via LocalAlloc. +// All allocations are bound to the test's lifetime through t.Cleanup, so the +// individual tests stay free of teardown bookkeeping. +// ───────────────────────────────────────────────────────────────────────────── + +// allocCEvent returns a uintptr to a LocalAlloc'd HcsEvent. If payload is +// non-empty it is encoded as UTF-16 into a second LocalAlloc'd buffer and +// wired up as EventData; otherwise EventData is left nil. +func allocCEvent(t *testing.T, payload string) uintptr { + t.Helper() + + evtAddr, err := windows.LocalAlloc(windows.LPTR, uint32(unsafe.Sizeof(computecore.HcsEvent{}))) + if err != nil { + t.Fatalf("LocalAlloc(event): %v", err) + } + t.Cleanup(func() { _, _ = windows.LocalFree(windows.Handle(evtAddr)) }) + + e := (*computecore.HcsEvent)(unsafe.Pointer(evtAddr)) + e.Type = computecore.HcsEventTypeGroupLiveMigration + + if payload == "" { + return evtAddr + } + + utf16, err := windows.UTF16FromString(payload) + if err != nil { + t.Fatalf("UTF16FromString: %v", err) + } + // UTF-16 code units are 2 bytes by definition. + dataAddr, err := windows.LocalAlloc(windows.LPTR, uint32(len(utf16)*2)) + if err != nil { + t.Fatalf("LocalAlloc(data): %v", err) + } + t.Cleanup(func() { _, _ = windows.LocalFree(windows.Handle(dataAddr)) }) + + // Copy the UTF-16 sequence (including the trailing NUL from UTF16FromString) + // into the C buffer. + copy(unsafe.Slice((*uint16)(unsafe.Pointer(dataAddr)), len(utf16)), utf16) + e.EventData = (*uint16)(unsafe.Pointer(dataAddr)) + return evtAddr +} + +// allocCChanCtx stores ch in a LocalAlloc'd buffer and returns its address, +// so the handler reads the chan header out of C memory rather than the Go heap +// (matching how HCS delivers the registered callback context). +func allocCChanCtx(t *testing.T, ch chan hcsschema.OperationSystemMigrationNotificationInfo) uintptr { + t.Helper() + addr, err := windows.LocalAlloc(windows.LPTR, uint32(unsafe.Sizeof(ch))) + if err != nil { + t.Fatalf("LocalAlloc(ctx): %v", err) + } + t.Cleanup(func() { _, _ = windows.LocalFree(windows.Handle(addr)) }) + + *(*chan hcsschema.OperationSystemMigrationNotificationInfo)(unsafe.Pointer(addr)) = ch + return addr +} + +// expectNotification fails the test unless want is the next queued value on ch. +func expectNotification(t *testing.T, ch <-chan hcsschema.OperationSystemMigrationNotificationInfo, want hcsschema.OperationSystemMigrationNotificationInfo) { + t.Helper() + select { + case got := <-ch: + if got != want { + t.Fatalf("notification mismatch: got %+v want %+v", got, want) + } + default: + t.Fatal("expected a notification on the channel") + } +} + +// expectNoNotification fails the test if a notification is queued on ch. +func expectNoNotification(t *testing.T, ch <-chan hcsschema.OperationSystemMigrationNotificationInfo) { + t.Helper() + select { + case got := <-ch: + t.Fatalf("did not expect a notification, got %+v", got) + default: + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Nil-argument guards +// ───────────────────────────────────────────────────────────────────────────── + +// TestMigrationCallbackHandler_NilArgs verifies that the handler is a no-op +// (returns 0, sends nothing on the channel) when either argument is zero. +func TestMigrationCallbackHandler_NilArgs(t *testing.T) { + ch := make(chan hcsschema.OperationSystemMigrationNotificationInfo, 1) + + cases := []struct { + name string + event, ctx uintptr + }{ + {"BothZero", 0, 0}, + {"EventZero", 0, allocCChanCtx(t, ch)}, + {"CtxZero", allocCEvent(t, ""), 0}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + if ret := migrationCallbackHandler(tc.event, tc.ctx); ret != 0 { + t.Fatalf("expected 0, got %d", ret) + } + }) + } + expectNoNotification(t, ch) +} + +// ───────────────────────────────────────────────────────────────────────────── +// Payload decoding +// ───────────────────────────────────────────────────────────────────────────── + +// TestMigrationCallbackHandler_Payloads verifies that real-world HCS +// GroupLiveMigration JSON payloads — including a nil EventData pointer — are +// decoded and forwarded on the notification channel. +func TestMigrationCallbackHandler_Payloads(t *testing.T) { + cases := []struct { + name string + payload string + want hcsschema.OperationSystemMigrationNotificationInfo + }{ + { + name: "NilEventData", + // payload "" => EventData pointer is nil; want is the zero value. + }, + { + name: "SetupDone", + payload: `{"Event":"SetupDone"}`, + want: hcsschema.OperationSystemMigrationNotificationInfo{Event: hcsschema.MigrationEventSetupDone}, + }, + { + name: "BlackoutStarted", + payload: `{"Event":"BlackoutStarted"}`, + want: hcsschema.OperationSystemMigrationNotificationInfo{Event: hcsschema.MigrationEventBlackoutStarted}, + }, + { + name: "OfflineDoneSuccess", + payload: `{"Event":"OfflineDone","Result":"Success"}`, + want: hcsschema.OperationSystemMigrationNotificationInfo{ + Event: hcsschema.MigrationEventOfflineDone, + Result: hcsschema.MigrationResultSuccess, + }, + }, + { + name: "MigrationDoneSuccess", + payload: `{"Event":"MigrationDone","Result":"Success"}`, + want: hcsschema.OperationSystemMigrationNotificationInfo{ + Event: hcsschema.MigrationEventMigrationDone, + Result: hcsschema.MigrationResultSuccess, + }, + }, + { + name: "WithOrigin", + payload: `{"Origin":"Source","Event":"MigrationDone","Result":"Success"}`, + want: hcsschema.OperationSystemMigrationNotificationInfo{ + Origin: hcsschema.MigrationOriginSource, + Event: hcsschema.MigrationEventMigrationDone, + Result: hcsschema.MigrationResultSuccess, + }, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + ch := make(chan hcsschema.OperationSystemMigrationNotificationInfo, 1) + evt := allocCEvent(t, tc.payload) + ctx := allocCChanCtx(t, ch) + + if ret := migrationCallbackHandler(evt, ctx); ret != 0 { + t.Fatalf("expected 0, got %d", ret) + } + expectNotification(t, ch, tc.want) + }) + } +} + +// TestMigrationCallbackHandler_InvalidJSONDropped verifies that an +// unparseable EventData payload is logged and dropped without sending. +func TestMigrationCallbackHandler_InvalidJSONDropped(t *testing.T) { + ch := make(chan hcsschema.OperationSystemMigrationNotificationInfo, 1) + evt := allocCEvent(t, "not-json") + ctx := allocCChanCtx(t, ch) + + if ret := migrationCallbackHandler(evt, ctx); ret != 0 { + t.Fatalf("expected 0, got %d", ret) + } + expectNoNotification(t, ch) +} + +// ───────────────────────────────────────────────────────────────────────────── +// Backpressure +// ───────────────────────────────────────────────────────────────────────────── + +// TestMigrationCallbackHandler_FullChannelDropsEvent verifies that when the +// notification channel is full the handler drops the new event rather than +// blocking the HCS callback thread. +func TestMigrationCallbackHandler_FullChannelDropsEvent(t *testing.T) { + ch := make(chan hcsschema.OperationSystemMigrationNotificationInfo, 1) + + // Pre-fill the channel so the next send would block. + prefill := hcsschema.OperationSystemMigrationNotificationInfo{Event: hcsschema.MigrationEventSetupDone} + ch <- prefill + + evt := allocCEvent(t, `{"Event":"MigrationDone"}`) + ctx := allocCChanCtx(t, ch) + + if ret := migrationCallbackHandler(evt, ctx); ret != 0 { + t.Fatalf("expected 0, got %d", ret) + } + + // The original prefill must still be the only entry (new event dropped). + if got := <-ch; got != prefill { + t.Fatalf("expected prefill to remain, got %+v", got) + } + expectNoNotification(t, ch) +} diff --git a/internal/hcs/schema2/migration.go b/internal/hcs/schema2/migration.go index e459ae1c18..ee35fe0771 100644 --- a/internal/hcs/schema2/migration.go +++ b/internal/hcs/schema2/migration.go @@ -116,3 +116,66 @@ type MigrationNetworkSettings struct { // SessionID is the session ID associated with the socket connection between source and destination. SessionID uint32 `json:"SessionId,omitempty"` } + +// OperationSystemMigrationNotificationInfo is a notification payload describing +// the current state of an in-progress live migration operation. It is emitted +// by HCS over the migration notification channel as the workflow progresses. +type OperationSystemMigrationNotificationInfo struct { + // Origin indicates which side of the live migration this notification + // pertains to (source or destination). + Origin MigrationOrigin `json:"Origin,omitempty"` + // Event is the type of live migration event being reported. + Event MigrationEvent `json:"Event,omitempty"` + // Result is an optional outcome accompanying the event. It is typically + // populated for terminal events. + Result MigrationResult `json:"Result,omitempty"` + // AdditionalDetails carries extra event-specific information whose schema + // depends on the event being reported. Modeled as the HCS schema `Any` type. + AdditionalDetails *interface{} `json:"AdditionalDetails,omitempty"` +} + +// MigrationEvent describes a live migration event reported by HCS. +type MigrationEvent string + +const ( + // MigrationEventUnknown indicates an unspecified or unrecognized event. + MigrationEventUnknown MigrationEvent = "Unknown" + // MigrationEventMigrationDone indicates that migration has completed. + MigrationEventMigrationDone MigrationEvent = "MigrationDone" + // MigrationEventBlackoutStarted indicates that the VM has entered the blackout phase. + MigrationEventBlackoutStarted MigrationEvent = "BlackoutStarted" + // MigrationEventOfflineDone indicates that taking the VM offline has completed. + MigrationEventOfflineDone MigrationEvent = "OfflineDone" + // MigrationEventBlackoutExited indicates that the VM has successfully started + // again after the blackout phase. + MigrationEventBlackoutExited MigrationEvent = "BlackoutExited" + // MigrationEventSetupDone indicates that the live migration setup has completed. + MigrationEventSetupDone MigrationEvent = "SetupDone" + // MigrationEventTransferInProgress indicates that the VM is still transferring + // memory and other necessary state. + MigrationEventTransferInProgress MigrationEvent = "TransferInProgress" + // MigrationEventMigrationRecoveryDone indicates that migration recovery has been performed. + MigrationEventMigrationRecoveryDone MigrationEvent = "MigrationRecoveryDone" + // MigrationEventMigrationFailed indicates that migration failed. + MigrationEventMigrationFailed MigrationEvent = "MigrationFailed" +) + +// MigrationResult describes the possible result of a migration operation. +type MigrationResult string + +const ( + // MigrationResultInvalid indicates an invalid or unspecified result. + MigrationResultInvalid MigrationResult = "Invalid" + // MigrationResultSuccess indicates the migration operation succeeded. + MigrationResultSuccess MigrationResult = "Success" + // MigrationResultMigrationCancelled indicates the migration was cancelled. + MigrationResultMigrationCancelled MigrationResult = "MigrationCancelled" + // MigrationResultGuestInitiatedCancellation indicates the guest initiated the cancellation. + MigrationResultGuestInitiatedCancellation MigrationResult = "GuestInitiatedCancellation" + // MigrationResultSourceMigrationFailed indicates the migration failed on the source side. + MigrationResultSourceMigrationFailed MigrationResult = "SourceMigrationFailed" + // MigrationResultDestinationMigrationFailed indicates the migration failed on the destination side. + MigrationResultDestinationMigrationFailed MigrationResult = "DestinationMigrationFailed" + // MigrationResultMigrationRecoveryFailed indicates the migration recovery failed. + MigrationResultMigrationRecoveryFailed MigrationResult = "MigrationRecoveryFailed" +) diff --git a/internal/hcs/system.go b/internal/hcs/system.go index 762b49786a..3d9fcce1bd 100644 --- a/internal/hcs/system.go +++ b/internal/hcs/system.go @@ -43,7 +43,7 @@ type System struct { // Live Migration specific fields. migrationHandle computecore.HcsSystem - migrationNotifyCh chan string + migrationNotifyCh chan hcsschema.OperationSystemMigrationNotificationInfo // migrationPinner pins &migrationNotifyCh while it is registered as the // callback context with HCS, so the GC sees the cgo-held uintptr as a // live reference. Unpinned in closeMigrationHandle after HCS guarantees