Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .golangci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ linters:
enable:
- godoclint
- unparam
- goheader
disable:
- errcheck
settings:
Expand All @@ -16,6 +17,11 @@ linters:
options:
max-len:
length: 100
goheader:
template: |-
This Source Code Form is subject to the terms of the Mozilla Public
License, v. 2.0. If a copy of the MPL was not distributed with this
file, You can obtain one at https://mozilla.org/MPL/2.0/.

formatters:
enable:
Expand Down
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,12 @@ helm-push: helm-package
fmt:
@ echo "+ Formatting Go code..."
@ go tool -modfile tools/go.mod golangci-lint fmt
@ go tool -modfile tools/go.mod golangci-lint run --enable-only=goheader --fix

.PHONY: lint
lint:
@ echo "+ Running Go linters..."
@ go tool -modfile tools/go.mod golangci-lint run

.PHONY: dev
dev: fmt lint test
79 changes: 41 additions & 38 deletions internal/provider/instances_v2.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ import (

"github.com/oxidecomputer/oxide.go/oxide"
v1 "k8s.io/api/core/v1"
"k8s.io/client-go/kubernetes"
cloudprovider "k8s.io/cloud-provider"
)

Expand All @@ -21,41 +20,46 @@ var _ cloudprovider.InstancesV2 = (*InstancesV2)(nil)
// gibibyte is the number of bytes in a gibibyte.
const gibibyte = 1024 * 1024 * 1024

type oxideInstanceClient interface {
InstanceNetworkInterfaceList(
context.Context,
oxide.InstanceNetworkInterfaceListParams,
) (*oxide.InstanceNetworkInterfaceResultsPage, error)
InstanceExternalIpList(
context.Context,
oxide.InstanceExternalIpListParams,
) (*oxide.ExternalIpResultsPage, error)
InstanceView(context.Context, oxide.InstanceViewParams) (*oxide.Instance, error)
}

// InstancesV2 implements [cloudprovider.InstancesV2] to provide Oxide specific
// instance functionality.
type InstancesV2 struct {
client *oxide.Client
client oxideInstanceClient
project string

k8sClient kubernetes.Interface
}

// InstanceExists checks whether the provided Kubernetes node exists as instance
// in Oxide.
// InstanceExists checks whether the provided Kubernetes node exists as an instance
// in Oxide. The cloud node lifecycle controller uses this information to determine
// if it can delete the Node object.
func (i *InstancesV2) InstanceExists(ctx context.Context, node *v1.Node) (bool, error) {
ctx, cancel := context.WithTimeout(ctx, 30*time.Second)
defer cancel()

instanceID, err := InstanceIDFromProviderID(node.Spec.ProviderID)
// Get the instance, either from the provider ID or by looking up by name.
_, err := i.getInstance(ctx, node)
if err != nil {
return false, fmt.Errorf("failed retrieving instance id from provider id: %w", err)
}

if _, err := i.client.InstanceView(ctx, oxide.InstanceViewParams{
Instance: oxide.NameOrId(instanceID),
}); err != nil {
if errors.Is(err, oxide.ErrObjectNotFound) {
return false, nil
}

return false, fmt.Errorf("failed viewing oxide instance %s: %w", instanceID, err)
return false, err
}

return true, nil
}

// InstanceMetadata populates the metadata for the provided node, notably
// setting its provider ID.
// InstanceMetadata is called by the cloud node controller to initialize nodes with
// the node.cloudprovider.kubernetes.io/uninitialized:NoSchedule taint. It returns
// metadata for the provided node, notably its provider ID.
func (i *InstancesV2) InstanceMetadata(
ctx context.Context,
node *v1.Node,
Expand Down Expand Up @@ -146,6 +150,25 @@ func (i *InstancesV2) InstanceMetadata(
}, nil
}

// InstanceShutdown checks whether the provided node is shut down in Oxide.
// The cloud node lifecycle controller uses this to determine if the
// node.cloudprovider.kubernetes.io/shutdown:NoSchedule taint should
// be applied to the Node object.
func (i *InstancesV2) InstanceShutdown(ctx context.Context, node *v1.Node) (bool, error) {
ctx, cancel := context.WithTimeout(ctx, 30*time.Second)
defer cancel()

// Get the instance, either from the provider ID or by looking up by name.
instance, err := i.getInstance(ctx, node)
if err != nil {
if errors.Is(err, oxide.ErrObjectNotFound) {
return true, nil
}
return false, err
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Either here or in getInstance we should decorate the error with the instance ID we tried to fetch. I don't remember if the outer Kubernetes calling code has that information already.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

getInstance returns what we try to look up in the wrapped error. If it can't parse the provider ID, it's printed. If it uses the fallback path when the providerID is not set, it wraps the error returned from the SDK. The wrapped SDK error is admittedly ugly, so will need to figure out how to format that better into a structured line...

│ oxide-ccm-oxide-cloud-controller-manager-776766d446-2zmhd E0529 21:35:34.515314       1 node_controller.go:288] Error getting instance metadata for node addresses: failed viewing oxide instance: GET https://oxide<blah>/v1/instances/46221934-d503-43ea-825f-730ada60d85c                             │
│ oxide-ccm-oxide-cloud-controller-manager-776766d446-2zmhd ----------- RESPONSE -----------                                                                                                                                                                                                                                  │
│ oxide-ccm-oxide-cloud-controller-manager-776766d446-2zmhd Status: 404 ObjectNotFound                                                                                                                                                                                                                                        │
│ oxide-ccm-oxide-cloud-controller-manager-776766d446-2zmhd Message: not found: instance with id "46221934-d503-43ea-825f-730ada60d85c"                                                                                                                                                                                       │
│ oxide-ccm-oxide-cloud-controller-manager-776766d446-2zmhd RequestID: 9163f53f-95b7-4c6b-8d61-538df4c4c408                                                                                                                                                                                                                   │
│ oxide-ccm-oxide-cloud-controller-manager-776766d446-2zmhd ------- RESPONSE HEADERS -------                                                                                                                                                                                                                                  │
│ oxide-ccm-oxide-cloud-controller-manager-776766d446-2zmhd Content-Type: [application/json]                                                                                                                                                                                                                                  │
│ oxide-ccm-oxide-cloud-controller-manager-776766d446-2zmhd X-Request-Id: [9163f53f-95b7-4c6b-8d61-538df4c4c408]                                                                                                                                                                                                              │
│ oxide-ccm-oxide-cloud-controller-manager-776766d446-2zmhd Date: [Fri, 29 May 2026 21:35:34 GMT]                                                                                                                                                                                                                             │
│ oxide-ccm-oxide-cloud-controller-manager-776766d446-2zmhd Content-Length: [177]

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's coming from oxide.go here: https://github.com/oxidecomputer/oxide.go/blob/93d8204aae4d37f5c56c90ea94764f5d4b0cce3a/oxide/errors.go#L96-L133

We have RFD-614 to discuss how we'd like to improve custom errors but we've stalled a bit on finishing it since we've implemented some of the stuff discussed there already. Ideally we'd pass the error to some structured logging package and it calls some structured printing method instead of Error() string. Either way that's another discussion for oxide.go.

}
return instance.RunState == oxide.InstanceStateStopped, nil
}

// getInstance retrieves the instance either from the node's provider ID
// or by looking up the instance by name.
func (i *InstancesV2) getInstance(ctx context.Context, node *v1.Node) (*oxide.Instance, error) {
Expand All @@ -170,23 +193,3 @@ func (i *InstancesV2) getInstance(ctx context.Context, node *v1.Node) (*oxide.In

return instance, nil
}

// InstanceShutdown checks whether the provided node is shut down in Oxide.
func (i *InstancesV2) InstanceShutdown(ctx context.Context, node *v1.Node) (bool, error) {
ctx, cancel := context.WithTimeout(ctx, 30*time.Second)
defer cancel()

instanceID, err := InstanceIDFromProviderID(node.Spec.ProviderID)
if err != nil {
return false, fmt.Errorf("failed retrieving instance id from provider id: %w", err)
}

instance, err := i.client.InstanceView(ctx, oxide.InstanceViewParams{
Instance: oxide.NameOrId(instanceID),
})
if err != nil {
return false, fmt.Errorf("failed viewing oxide instance %s: %w", instanceID, err)
}

return instance.RunState == oxide.InstanceStateStopped, nil
}
202 changes: 202 additions & 0 deletions internal/provider/instances_v2_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.

package provider
Comment thread
sudomateo marked this conversation as resolved.

import (
"context"
"testing"

"github.com/oxidecomputer/oxide.go/oxide"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

type mockOxideClient struct {
InstanceNetworkInterfaceListOutput *oxide.InstanceNetworkInterfaceResultsPage
InstanceNetworkInterfaceListError error

InstanceExternalIpListOutput *oxide.ExternalIpResultsPage
InstanceExternalIpListError error

InstanceViewOutput *oxide.Instance
InstanceViewError error
}

var (
nodeWithoutProviderID = v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "node-1"},
Spec: v1.NodeSpec{},
}
nodeWithProviderID = v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "node-1"},
Spec: v1.NodeSpec{
ProviderID: "oxide://12345678-1234-1234-1234-123456789abc",
},
}
nodeDoesNotExistInOxide = v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "node-2"},
Spec: v1.NodeSpec{
ProviderID: "oxide://87654321-1234-1234-1234-123456789abc",
},
}

instanceRunning = oxide.Instance{
Name: oxide.Name("node-1"),
Id: "12345678-1234-1234-1234-123456789abc",
RunState: oxide.InstanceStateRunning,
}

instanceStopped = oxide.Instance{
Name: oxide.Name("node-1"),
Id: "12345678-1234-1234-1234-123456789abc",
RunState: oxide.InstanceStateStopped,
}
)

func TestInstanceExists(t *testing.T) {
t.Run("WithProviderID", func(t *testing.T) {
instancesV2 := InstancesV2{
client: &mockOxideClient{
InstanceViewOutput: &instanceRunning,
},
project: "test",
}
exists, err := instancesV2.InstanceExists(t.Context(), &nodeWithProviderID)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if !exists {
t.Fatal("expected instance to exist via the Oxide API")
}
})

t.Run("NoProviderID", func(t *testing.T) {
instancesV2 := InstancesV2{
client: &mockOxideClient{
InstanceViewOutput: &instanceRunning,
},
project: "test",
}
exists, err := instancesV2.InstanceExists(t.Context(), &nodeWithoutProviderID)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if !exists {
t.Fatal("expected instance to exist via the Oxide API")
}
})

t.Run("DoesNotExistInOxide", func(t *testing.T) {
instancesV2 := InstancesV2{
client: &mockOxideClient{
InstanceViewError: oxide.ErrObjectNotFound,
},
project: "test",
}
exists, err := instancesV2.InstanceExists(t.Context(), &nodeDoesNotExistInOxide)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if exists {
t.Fatal("expected instance to NOT exist via the Oxide API")
}
})
}

func TestShutdown(t *testing.T) {
t.Run("RunningWithProviderID", func(t *testing.T) {
instancesV2 := InstancesV2{
client: &mockOxideClient{
InstanceViewOutput: &instanceRunning,
},
project: "test",
}
shutdown, err := instancesV2.InstanceShutdown(t.Context(), &nodeWithProviderID)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if shutdown {
t.Fatal("expected instance to be running via the Oxide API")
}
})

t.Run("RunningNoProviderID", func(t *testing.T) {
instancesV2 := InstancesV2{
client: &mockOxideClient{
InstanceViewOutput: &instanceRunning,
},
project: "test",
}
shutdown, err := instancesV2.InstanceShutdown(t.Context(), &nodeWithoutProviderID)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if shutdown {
t.Fatal("expected instance to be running via the Oxide API")
}
})

t.Run("InstanceStopped", func(t *testing.T) {
instancesV2 := InstancesV2{
client: &mockOxideClient{
InstanceViewOutput: &instanceStopped,
},
project: "test",
}
shutdown, err := instancesV2.InstanceShutdown(t.Context(), &nodeDoesNotExistInOxide)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if !shutdown {
t.Fatal("expected instance to be stopped via the Oxide API")
}
})

t.Run("DoesNotExistInOxide", func(t *testing.T) {
instancesV2 := InstancesV2{
client: &mockOxideClient{
InstanceViewError: oxide.ErrObjectNotFound,
},
project: "test",
}
shutdown, err := instancesV2.InstanceShutdown(t.Context(), &nodeDoesNotExistInOxide)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if !shutdown {
t.Fatal("expected instance to NOT exist via the Oxide API")
}
})
}

func (c *mockOxideClient) InstanceNetworkInterfaceList(
context.Context,
oxide.InstanceNetworkInterfaceListParams,
) (*oxide.InstanceNetworkInterfaceResultsPage, error) {
if c.InstanceNetworkInterfaceListError != nil {
return nil, c.InstanceNetworkInterfaceListError
}
return c.InstanceNetworkInterfaceListOutput, nil
}

func (c *mockOxideClient) InstanceExternalIpList(
context.Context,
oxide.InstanceExternalIpListParams,
) (*oxide.ExternalIpResultsPage, error) {
if c.InstanceExternalIpListError != nil {
return nil, c.InstanceExternalIpListError
}
return c.InstanceExternalIpListOutput, nil
}

func (c *mockOxideClient) InstanceView(
context.Context,
oxide.InstanceViewParams,
) (*oxide.Instance, error) {
if c.InstanceViewError != nil {
return nil, c.InstanceViewError
}
return c.InstanceViewOutput, nil
}
5 changes: 2 additions & 3 deletions internal/provider/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,8 @@ func (o *Oxide) Instances() (cloudprovider.Instances, bool) {
// metadata, and determine whether they exists to facilitate cleanup.
func (o *Oxide) InstancesV2() (cloudprovider.InstancesV2, bool) {
return &InstancesV2{
client: o.client,
project: o.project,
k8sClient: o.k8sClient,
client: o.client,
project: o.project,
}, true
}

Expand Down