Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 17 additions & 5 deletions internal/hcs/migration.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ func migrationCallbackHandler(eventPtr uintptr, ctx uintptr) uintptr {
}

e := (*computecore.HcsEvent)(unsafe.Pointer(eventPtr))
ch := *(*chan string)(unsafe.Pointer(ctx))
ch := *(*chan hcsschema.OperationSystemMigrationNotificationInfo)(unsafe.Pointer(ctx))

eventData := ""
if e.EventData != nil {
Expand All @@ -60,9 +60,21 @@ func migrationCallbackHandler(eventPtr uintptr, ctx uintptr) uintptr {
"event-data": eventData,
}).Debug("HCS migration notification")

var info hcsschema.OperationSystemMigrationNotificationInfo
if eventData != "" {
if err := json.Unmarshal([]byte(eventData), &info); err != nil {
logrus.WithFields(logrus.Fields{
"event-type": e.Type.String(),
"event-data": eventData,
logrus.ErrorKey: err,
}).Warn("failed to unmarshal migration notification payload, dropping event")
return 0
}
}

// Non-blocking send to avoid blocking the HCS callback thread.
select {
case ch <- eventData:
case ch <- info:
default:
logrus.WithField("event-type", e.Type.String()).Warn("migration notification channel full, dropping event")
}
Expand Down Expand Up @@ -94,7 +106,7 @@ func (computeSystem *System) openMigrationHandle(ctx context.Context) error {

// Create the notification channel and store it on the struct.
computeSystem.migrationHandle = handle
computeSystem.migrationNotifyCh = make(chan string, migrationNotificationBufferSize)
computeSystem.migrationNotifyCh = make(chan hcsschema.OperationSystemMigrationNotificationInfo, migrationNotificationBufferSize)

// Pin the address of the notification channel field so it stays visible
// to the GC while HCS holds it as a uintptr callback context. Without
Expand Down Expand Up @@ -372,8 +384,8 @@ func (computeSystem *System) FinalizeLiveMigration(ctx context.Context, resume b
}

// MigrationNotifications returns a read-only channel that receives live migration
// event data strings. Returns an error if no migration handle is open.
func (computeSystem *System) MigrationNotifications() (<-chan string, error) {
// event payloads. Returns an error if no migration handle is open.
func (computeSystem *System) MigrationNotifications() (<-chan hcsschema.OperationSystemMigrationNotificationInfo, error) {
computeSystem.handleLock.RLock()
defer computeSystem.handleLock.RUnlock()

Expand Down
236 changes: 236 additions & 0 deletions internal/hcs/migration_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,236 @@
//go:build windows

package hcs

import (
"testing"
"unsafe"

"github.com/Microsoft/hcsshim/internal/computecore"
hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2"
"golang.org/x/sys/windows"
)

// ─────────────────────────────────────────────────────────────────────────────
// Test helpers
//
// The handler under test reads its arguments as raw uintptrs that originate
// outside the Go heap (HCS hands them to us via a syscall callback). To
// faithfully exercise that contract — and the cgo pointer-passing rules it
// implies — the helpers below allocate the HcsEvent, the UTF-16 EventData
// buffer, and the channel context out of process heap memory via LocalAlloc.
// All allocations are bound to the test's lifetime through t.Cleanup, so the
// individual tests stay free of teardown bookkeeping.
// ─────────────────────────────────────────────────────────────────────────────

// allocCEvent returns a uintptr to a LocalAlloc'd HcsEvent. If payload is
// non-empty it is encoded as UTF-16 into a second LocalAlloc'd buffer and
// wired up as EventData; otherwise EventData is left nil.
func allocCEvent(t *testing.T, payload string) uintptr {
t.Helper()

evtAddr, err := windows.LocalAlloc(windows.LPTR, uint32(unsafe.Sizeof(computecore.HcsEvent{})))
if err != nil {
t.Fatalf("LocalAlloc(event): %v", err)
}
t.Cleanup(func() { _, _ = windows.LocalFree(windows.Handle(evtAddr)) })

e := (*computecore.HcsEvent)(unsafe.Pointer(evtAddr))
e.Type = computecore.HcsEventTypeGroupLiveMigration

if payload == "" {
return evtAddr
}

utf16, err := windows.UTF16FromString(payload)
if err != nil {
t.Fatalf("UTF16FromString: %v", err)
}
// UTF-16 code units are 2 bytes by definition.
dataAddr, err := windows.LocalAlloc(windows.LPTR, uint32(len(utf16)*2))
if err != nil {
t.Fatalf("LocalAlloc(data): %v", err)
}
t.Cleanup(func() { _, _ = windows.LocalFree(windows.Handle(dataAddr)) })

// Copy the UTF-16 sequence (including the trailing NUL from UTF16FromString)
// into the C buffer.
copy(unsafe.Slice((*uint16)(unsafe.Pointer(dataAddr)), len(utf16)), utf16)
e.EventData = (*uint16)(unsafe.Pointer(dataAddr))
return evtAddr
}

// allocCChanCtx stores ch in a LocalAlloc'd buffer and returns its address,
// so the handler reads the chan header out of C memory rather than the Go heap
// (matching how HCS delivers the registered callback context).
func allocCChanCtx(t *testing.T, ch chan hcsschema.OperationSystemMigrationNotificationInfo) uintptr {
t.Helper()
addr, err := windows.LocalAlloc(windows.LPTR, uint32(unsafe.Sizeof(ch)))
if err != nil {
t.Fatalf("LocalAlloc(ctx): %v", err)
}
t.Cleanup(func() { _, _ = windows.LocalFree(windows.Handle(addr)) })

*(*chan hcsschema.OperationSystemMigrationNotificationInfo)(unsafe.Pointer(addr)) = ch
return addr
}

// expectNotification fails the test unless want is the next queued value on ch.
func expectNotification(t *testing.T, ch <-chan hcsschema.OperationSystemMigrationNotificationInfo, want hcsschema.OperationSystemMigrationNotificationInfo) {
t.Helper()
select {
case got := <-ch:
if got != want {
t.Fatalf("notification mismatch: got %+v want %+v", got, want)
}
default:
t.Fatal("expected a notification on the channel")
}
}

// expectNoNotification fails the test if a notification is queued on ch.
func expectNoNotification(t *testing.T, ch <-chan hcsschema.OperationSystemMigrationNotificationInfo) {
t.Helper()
select {
case got := <-ch:
t.Fatalf("did not expect a notification, got %+v", got)
default:
}
}

// ─────────────────────────────────────────────────────────────────────────────
// Nil-argument guards
// ─────────────────────────────────────────────────────────────────────────────

// TestMigrationCallbackHandler_NilArgs verifies that the handler is a no-op
// (returns 0, sends nothing on the channel) when either argument is zero.
func TestMigrationCallbackHandler_NilArgs(t *testing.T) {
ch := make(chan hcsschema.OperationSystemMigrationNotificationInfo, 1)

cases := []struct {
name string
event, ctx uintptr
}{
{"BothZero", 0, 0},
{"EventZero", 0, allocCChanCtx(t, ch)},
{"CtxZero", allocCEvent(t, ""), 0},
}

for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
if ret := migrationCallbackHandler(tc.event, tc.ctx); ret != 0 {
t.Fatalf("expected 0, got %d", ret)
}
})
}
expectNoNotification(t, ch)
}

// ─────────────────────────────────────────────────────────────────────────────
// Payload decoding
// ─────────────────────────────────────────────────────────────────────────────

// TestMigrationCallbackHandler_Payloads verifies that real-world HCS
// GroupLiveMigration JSON payloads — including a nil EventData pointer — are
// decoded and forwarded on the notification channel.
func TestMigrationCallbackHandler_Payloads(t *testing.T) {
cases := []struct {
name string
payload string
want hcsschema.OperationSystemMigrationNotificationInfo
}{
{
name: "NilEventData",
// payload "" => EventData pointer is nil; want is the zero value.
},
{
name: "SetupDone",
payload: `{"Event":"SetupDone"}`,
want: hcsschema.OperationSystemMigrationNotificationInfo{Event: hcsschema.MigrationEventSetupDone},
},
{
name: "BlackoutStarted",
payload: `{"Event":"BlackoutStarted"}`,
want: hcsschema.OperationSystemMigrationNotificationInfo{Event: hcsschema.MigrationEventBlackoutStarted},
},
{
name: "OfflineDoneSuccess",
payload: `{"Event":"OfflineDone","Result":"Success"}`,
want: hcsschema.OperationSystemMigrationNotificationInfo{
Event: hcsschema.MigrationEventOfflineDone,
Result: hcsschema.MigrationResultSuccess,
},
},
{
name: "MigrationDoneSuccess",
payload: `{"Event":"MigrationDone","Result":"Success"}`,
want: hcsschema.OperationSystemMigrationNotificationInfo{
Event: hcsschema.MigrationEventMigrationDone,
Result: hcsschema.MigrationResultSuccess,
},
},
{
name: "WithOrigin",
payload: `{"Origin":"Source","Event":"MigrationDone","Result":"Success"}`,
want: hcsschema.OperationSystemMigrationNotificationInfo{
Origin: hcsschema.MigrationOriginSource,
Event: hcsschema.MigrationEventMigrationDone,
Result: hcsschema.MigrationResultSuccess,
},
},
}

for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
ch := make(chan hcsschema.OperationSystemMigrationNotificationInfo, 1)
evt := allocCEvent(t, tc.payload)
ctx := allocCChanCtx(t, ch)

if ret := migrationCallbackHandler(evt, ctx); ret != 0 {
t.Fatalf("expected 0, got %d", ret)
}
expectNotification(t, ch, tc.want)
})
}
}

// TestMigrationCallbackHandler_InvalidJSONDropped verifies that an
// unparseable EventData payload is logged and dropped without sending.
func TestMigrationCallbackHandler_InvalidJSONDropped(t *testing.T) {
ch := make(chan hcsschema.OperationSystemMigrationNotificationInfo, 1)
evt := allocCEvent(t, "not-json")
ctx := allocCChanCtx(t, ch)

if ret := migrationCallbackHandler(evt, ctx); ret != 0 {
t.Fatalf("expected 0, got %d", ret)
}
expectNoNotification(t, ch)
}

// ─────────────────────────────────────────────────────────────────────────────
// Backpressure
// ─────────────────────────────────────────────────────────────────────────────

// TestMigrationCallbackHandler_FullChannelDropsEvent verifies that when the
// notification channel is full the handler drops the new event rather than
// blocking the HCS callback thread.
func TestMigrationCallbackHandler_FullChannelDropsEvent(t *testing.T) {
ch := make(chan hcsschema.OperationSystemMigrationNotificationInfo, 1)

// Pre-fill the channel so the next send would block.
prefill := hcsschema.OperationSystemMigrationNotificationInfo{Event: hcsschema.MigrationEventSetupDone}
ch <- prefill

evt := allocCEvent(t, `{"Event":"MigrationDone"}`)
ctx := allocCChanCtx(t, ch)

if ret := migrationCallbackHandler(evt, ctx); ret != 0 {
t.Fatalf("expected 0, got %d", ret)
}

// The original prefill must still be the only entry (new event dropped).
if got := <-ch; got != prefill {
t.Fatalf("expected prefill to remain, got %+v", got)
}
expectNoNotification(t, ch)
}
63 changes: 63 additions & 0 deletions internal/hcs/schema2/migration.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,3 +116,66 @@ type MigrationNetworkSettings struct {
// SessionID is the session ID associated with the socket connection between source and destination.
SessionID uint32 `json:"SessionId,omitempty"`
}

// OperationSystemMigrationNotificationInfo is a notification payload describing
// the current state of an in-progress live migration operation. It is emitted
// by HCS over the migration notification channel as the workflow progresses.
type OperationSystemMigrationNotificationInfo struct {
// Origin indicates which side of the live migration this notification
// pertains to (source or destination).
Origin MigrationOrigin `json:"Origin,omitempty"`
// Event is the type of live migration event being reported.
Event MigrationEvent `json:"Event,omitempty"`
// Result is an optional outcome accompanying the event. It is typically
// populated for terminal events.
Result MigrationResult `json:"Result,omitempty"`
// AdditionalDetails carries extra event-specific information whose schema
// depends on the event being reported. Modeled as the HCS schema `Any` type.
AdditionalDetails *interface{} `json:"AdditionalDetails,omitempty"`
}

// MigrationEvent describes a live migration event reported by HCS.
type MigrationEvent string

const (
// MigrationEventUnknown indicates an unspecified or unrecognized event.
MigrationEventUnknown MigrationEvent = "Unknown"
// MigrationEventMigrationDone indicates that migration has completed.
MigrationEventMigrationDone MigrationEvent = "MigrationDone"
// MigrationEventBlackoutStarted indicates that the VM has entered the blackout phase.
MigrationEventBlackoutStarted MigrationEvent = "BlackoutStarted"
// MigrationEventOfflineDone indicates that taking the VM offline has completed.
MigrationEventOfflineDone MigrationEvent = "OfflineDone"
// MigrationEventBlackoutExited indicates that the VM has successfully started
// again after the blackout phase.
MigrationEventBlackoutExited MigrationEvent = "BlackoutExited"
// MigrationEventSetupDone indicates that the live migration setup has completed.
MigrationEventSetupDone MigrationEvent = "SetupDone"
// MigrationEventTransferInProgress indicates that the VM is still transferring
// memory and other necessary state.
MigrationEventTransferInProgress MigrationEvent = "TransferInProgress"
// MigrationEventMigrationRecoveryDone indicates that migration recovery has been performed.
MigrationEventMigrationRecoveryDone MigrationEvent = "MigrationRecoveryDone"
// MigrationEventMigrationFailed indicates that migration failed.
MigrationEventMigrationFailed MigrationEvent = "MigrationFailed"
)

// MigrationResult describes the possible result of a migration operation.
type MigrationResult string

const (
// MigrationResultInvalid indicates an invalid or unspecified result.
MigrationResultInvalid MigrationResult = "Invalid"
// MigrationResultSuccess indicates the migration operation succeeded.
MigrationResultSuccess MigrationResult = "Success"
// MigrationResultMigrationCancelled indicates the migration was cancelled.
MigrationResultMigrationCancelled MigrationResult = "MigrationCancelled"
// MigrationResultGuestInitiatedCancellation indicates the guest initiated the cancellation.
MigrationResultGuestInitiatedCancellation MigrationResult = "GuestInitiatedCancellation"
// MigrationResultSourceMigrationFailed indicates the migration failed on the source side.
MigrationResultSourceMigrationFailed MigrationResult = "SourceMigrationFailed"
// MigrationResultDestinationMigrationFailed indicates the migration failed on the destination side.
MigrationResultDestinationMigrationFailed MigrationResult = "DestinationMigrationFailed"
// MigrationResultMigrationRecoveryFailed indicates the migration recovery failed.
MigrationResultMigrationRecoveryFailed MigrationResult = "MigrationRecoveryFailed"
)
2 changes: 1 addition & 1 deletion internal/hcs/system.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ type System struct {

// Live Migration specific fields.
migrationHandle computecore.HcsSystem
migrationNotifyCh chan string
migrationNotifyCh chan hcsschema.OperationSystemMigrationNotificationInfo
// migrationPinner pins &migrationNotifyCh while it is registered as the
// callback context with HCS, so the GC sees the cgo-held uintptr as a
// live reference. Unpinned in closeMigrationHandle after HCS guarantees
Expand Down
Loading