Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

VM Instance Initialization: Driver.InitializeMachine #898

Merged
merged 10 commits into from
Apr 3, 2024
38 changes: 37 additions & 1 deletion pkg/util/provider/driver/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,24 @@ package driver
import (
"context"

"github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1"
corev1 "k8s.io/api/core/v1"

"github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1"
)

// Driver is the common interface for creation/deletion of the VMs over different cloud-providers.
type Driver interface {
// CreateMachine call is responsible for VM creation on the provider
CreateMachine(context.Context, *CreateMachineRequest) (*CreateMachineResponse, error)
// InitializeMachine call is responsible for VM initialization on the provider.
// This method should only be invoked as a post VM creation initialization to configure network configuration etc.
//
// In case of an error, this operation should return an error with one of the following status codes
// - codes.Unimplemented if the provider does not support VM instance initialization.
// - codes.Uninitialized initialization of VM instance failed due to errors
// - codes.NotFound if VM instance was not found.
// - codes.Aborted if VM instance was aborted by the provider.
rishabh-11 marked this conversation as resolved.
Show resolved Hide resolved
InitializeMachine(context.Context, *InitializeMachineRequest) (*InitializeMachineResponse, error)
// DeleteMachine call is responsible for VM deletion/termination on the provider
DeleteMachine(context.Context, *DeleteMachineRequest) (*DeleteMachineResponse, error)
// GetMachineStatus call get's the status of the VM backing the machine object on the provider
Expand Down Expand Up @@ -64,6 +74,32 @@ type CreateMachineResponse struct {
LastKnownState string
}

// InitializeMachineRequest encapsulates params for the VM Initialization operation (Driver.InitializeMachine).
type InitializeMachineRequest struct {
// Machine object representing VM that must be initialized
Machine *v1alpha1.Machine

// MachineClass backing the machine object
MachineClass *v1alpha1.MachineClass

// Secret backing the machineClass object
Secret *corev1.Secret
}

// InitializeMachineResponse is the response for VM instance initialization (Driver.InitializeMachine).
type InitializeMachineResponse struct {
// ProviderID is the unique identification of the VM at the cloud provider.
// ProviderID typically matches with the node.Spec.ProviderID on the node object.
// Eg: gce://project-name/region/vm-ID
ProviderID string

// NodeName is the name of the node-object registered to kubernetes.
NodeName string

// LastKnownState represents the last state of the VM instance after initialization operation.
LastKnownState string
unmarshall marked this conversation as resolved.
Show resolved Hide resolved
}

// DeleteMachineRequest is the delete request for VM deletion
type DeleteMachineRequest struct {
// Machine object from whom VM is to be deleted
Expand Down
23 changes: 22 additions & 1 deletion pkg/util/provider/driver/fake.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,28 @@ func (d *FakeDriver) CreateMachine(ctx context.Context, createMachineRequest *Cr
return nil, d.Err
}

// InitializeMachine makes a call to the driver to initialize the VM instance of machine.
func (d *FakeDriver) InitializeMachine(ctx context.Context, initMachineRequest *InitializeMachineRequest) (*InitializeMachineResponse, error) {
sErr, ok := status.FromError(d.Err)
if ok && sErr != nil {
switch sErr.Code() {
case codes.NotFound:
d.VMExists = false
return nil, d.Err
case codes.Unimplemented:
break
default:
return nil, d.Err
}
}
d.VMExists = true
return &InitializeMachineResponse{
ProviderID: d.ProviderID,
NodeName: d.NodeName,
LastKnownState: d.LastKnownState,
}, d.Err
}

// DeleteMachine make a call to the driver to delete the machine.
func (d *FakeDriver) DeleteMachine(ctx context.Context, deleteMachineRequest *DeleteMachineRequest) (*DeleteMachineResponse, error) {
d.VMExists = false
Expand All @@ -92,7 +114,6 @@ func (d *FakeDriver) GetMachineStatus(ctx context.Context, getMachineStatusReque
case d.Err != nil:
return nil, d.Err
}

return &GetMachineStatusResponse{
ProviderID: d.ProviderID,
NodeName: d.NodeName,
Expand Down
2 changes: 2 additions & 0 deletions pkg/util/provider/machinecodes/codes/code_string.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ func (c Code) String() string {
return "DataLoss"
case Unauthenticated:
return "Unauthenticated"
case Uninitialized:
unmarshall marked this conversation as resolved.
Show resolved Hide resolved
return "Uninitialized"
default:
return "Code(" + strconv.FormatInt(int64(c), 10) + ")"
}
Expand Down
6 changes: 6 additions & 0 deletions pkg/util/provider/machinecodes/codes/codes.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,11 @@ const (
// Unauthenticated indicates the request does not have valid
// authentication credentials for the operation.
Unauthenticated Code = 16

// Uninitialized indicates that the VM instance initialization was not performed.
// This is meant to be used by providers in implementation of
// [github.com/gardener/machine-controller-manager/pkg/util/provider/driver.Driver.GetMachineStatus]
Uninitialized Code = 17
)

var strToCode = map[string]Code{
Expand All @@ -164,6 +169,7 @@ var strToCode = map[string]Code{
"Unavailable": Unavailable,
"DataLoss": DataLoss,
"Unauthenticated": Unauthenticated,
"Uninitialized": Uninitialized,
}

// StringToCode coverts string into the Code.
Expand Down
68 changes: 66 additions & 2 deletions pkg/util/provider/machinecontroller/machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -360,8 +360,10 @@ func (c *controller) triggerCreationFlow(ctx context.Context, createMachineReque
nodeName, providerID string

// Initializations
machine = createMachineRequest.Machine
machineName = createMachineRequest.Machine.Name
machine = createMachineRequest.Machine
machineName = createMachineRequest.Machine.Name
newlyCreatedMachine = false
uninitializedMachine = false
)

// we should avoid mutating Secret, since it goes all the way into the Informer's store
Expand Down Expand Up @@ -390,6 +392,7 @@ func (c *controller) triggerCreationFlow(ctx context.Context, createMachineReque
klog.Errorf("Error occurred while decoding machine error for machine %q: %s", machine.Name, err)
return machineutils.MediumRetry, err
}
klog.Warningf("For machine %q, obtained VM error status as: %s", machineErr)

// Decoding machine error code
switch machineErr.Code() {
Expand All @@ -407,6 +410,7 @@ func (c *controller) triggerCreationFlow(ctx context.Context, createMachineReque
klog.Errorf("Error while creating machine %s: %s", machine.Name, err.Error())
return c.machineCreateErrorHandler(ctx, machine, createMachineResponse, err)
}
newlyCreatedMachine = true
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is set to true after call to driver.CreateMachine has successfully returned. However soon after the following check is made:

if _, err := c.nodeLister.Get(nodeName); err == nil && nodeName != machineName

which if true deletes the machine. However in this case newlyCreatedMachine is not reset to false.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. I would suggest to move newlyCreatedMachine = true to the end of the block.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should remove the newlyCreatedMachine variable and just use uninitializedMachine. Set uninitializedMachine to true after the VM creation is successful.


nodeName = createMachineResponse.NodeName
providerID = createMachineResponse.ProviderID
Expand Down Expand Up @@ -496,6 +500,10 @@ func (c *controller) triggerCreationFlow(ctx context.Context, createMachineReque

return machineutils.ShortRetry, err

case codes.Uninitialized:
uninitializedMachine = true
klog.Infof("VM instance associated with machine %s was created but not initialized.", machine.Name)

default:
updateRetryPeriod, updateErr := c.machineStatusUpdate(
ctx,
Expand Down Expand Up @@ -526,6 +534,13 @@ func (c *controller) triggerCreationFlow(ctx context.Context, createMachineReque
providerID = getMachineStatusResponse.ProviderID
}

if newlyCreatedMachine || uninitializedMachine {
rishabh-11 marked this conversation as resolved.
Show resolved Hide resolved
retryPeriod, err := c.initializeMachine(ctx, createMachineRequest.Machine, createMachineRequest.MachineClass, createMachineRequest.Secret)
if err != nil {
return retryPeriod, err
}
}

_, machineNodeLabelPresent := createMachineRequest.Machine.Labels[v1alpha1.NodeLabelKey]
_, machinePriorityAnnotationPresent := createMachineRequest.Machine.Annotations[machineutils.MachinePriority]

Expand Down Expand Up @@ -585,6 +600,55 @@ func (c *controller) triggerCreationFlow(ctx context.Context, createMachineReque
return machineutils.LongRetry, nil
}

func (c *controller) initializeMachine(ctx context.Context, machine *v1alpha1.Machine, machineClass *v1alpha1.MachineClass, secret *corev1.Secret) (machineutils.RetryPeriod, error) {
req := &driver.InitializeMachineRequest{
Machine: machine,
MachineClass: machineClass,
Secret: secret,
}
klog.V(3).Infof("Initializing VM instance for Machine %q", machine.Name)
resp, err := c.driver.InitializeMachine(ctx, req)
if err != nil {
errStatus, ok := status.FromError(err)
if !ok {
klog.Errorf("Error occurred while decoding machine error for machine %q: %s", machine.Name, err)
return machineutils.MediumRetry, err
rishabh-11 marked this conversation as resolved.
Show resolved Hide resolved
}
klog.Errorf("Error occurred while initializing VM instance for machine %q: %s", machine.Name, err)
switch errStatus.Code() {
case codes.Unimplemented:
klog.V(3).Infof("Provider does not support Driver.InitializeMachine - skipping VM instance initialization for %q.", machine.Name)
return 0, nil
case codes.NotFound:
rishabh-11 marked this conversation as resolved.
Show resolved Hide resolved
klog.Warningf("No VM instance found for machine %q. Skipping VM instance initialization.", machine.Name)
return 0, nil
case codes.Canceled, codes.Aborted:
elankath marked this conversation as resolved.
Show resolved Hide resolved
klog.Warningf("VM instance initialization for machine %q was aborted/cancelled.", machine.Name)
return 0, nil
}
retryPeriod, _ := c.machineStatusUpdate(
ctx,
machine,
v1alpha1.LastOperation{
Description: fmt.Sprintf("Provider error: %s. %s", err.Error(), machineutils.InstanceInitialization),
ErrorCode: errStatus.Code().String(),
State: v1alpha1.MachineStateFailed,
unmarshall marked this conversation as resolved.
Show resolved Hide resolved
Type: v1alpha1.MachineOperationCreate,
LastUpdateTime: metav1.Now(),
},
v1alpha1.CurrentStatus{
Phase: c.getCreateFailurePhase(machine),
LastUpdateTime: metav1.Now(),
},
machine.Status.LastKnownState,
)
return retryPeriod, err
}

klog.V(3).Infof("VM instance %q for machine %q was initialized with last known state: %q", resp.ProviderID, machine.Name, resp.LastKnownState)
return 0, nil
}

func (c *controller) triggerDeletionFlow(ctx context.Context, deleteMachineRequest *driver.DeleteMachineRequest) (machineutils.RetryPeriod, error) {
var (
machine = deleteMachineRequest.Machine
Expand Down
67 changes: 67 additions & 0 deletions pkg/util/provider/machinecontroller/machine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -508,6 +508,9 @@ var _ = Describe("machine", func() {
} else {
Expect(actual.Status.LastOperation.ErrorCode).To(Equal(""))
}
if data.expect.machine.Status.LastOperation.Description != "" {
Expect(actual.Status.LastOperation.Description).To(Equal(data.expect.machine.Status.LastOperation.Description))
}
},

Entry("Machine creation succeeds with object UPDATE", &data{
Expand Down Expand Up @@ -1029,6 +1032,70 @@ var _ = Describe("machine", func() {
retry: machineutils.ShortRetry,
},
}),
Entry("Machine initialization failed due to VM instance initialization error", &data{
setup: setup{
secrets: []*corev1.Secret{
{
ObjectMeta: *newObjectMeta(objMeta, 0),
Data: map[string][]byte{"userData": []byte("test")},
},
},
machineClasses: []*v1alpha1.MachineClass{
{
ObjectMeta: *newObjectMeta(objMeta, 0),
SecretRef: newSecretReference(objMeta, 0),
},
},
machines: newMachines(1, &v1alpha1.MachineTemplateSpec{
ObjectMeta: *newObjectMeta(objMeta, 0),
Spec: v1alpha1.MachineSpec{
Class: v1alpha1.ClassSpec{
Kind: "MachineClass",
Name: "machine-0",
},
},
}, nil, nil, nil, nil, true, metav1.Now()),
nodes: []*corev1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Name: "fakeNode-0",
},
},
},
},
action: action{
machine: "machine-0",
fakeDriver: &driver.FakeDriver{
VMExists: true,
ProviderID: "fakeID-0",
NodeName: "fakeNode-0",
Err: status.Error(codes.Uninitialized, "VM instance could not be initialized"),
},
},
expect: expect{
machine: newMachine(&v1alpha1.MachineTemplateSpec{
ObjectMeta: *newObjectMeta(objMeta, 0),
Spec: v1alpha1.MachineSpec{
Class: v1alpha1.ClassSpec{
Kind: "MachineClass",
Name: "machineClass",
},
},
}, &v1alpha1.MachineStatus{
CurrentStatus: v1alpha1.CurrentStatus{
Phase: v1alpha1.MachineCrashLoopBackOff,
},
LastOperation: v1alpha1.LastOperation{
Description: fmt.Sprintf("Provider error: %s. %s", status.Error(codes.Uninitialized, "VM instance could not be initialized").Error(), machineutils.InstanceInitialization),
ErrorCode: codes.Uninitialized.String(),
State: v1alpha1.MachineStateFailed,
Type: v1alpha1.MachineOperationCreate,
},
}, nil, nil, nil, true, metav1.Now()),
err: status.Error(codes.Uninitialized, "VM instance could not be initialized"),
retry: machineutils.ShortRetry,
},
}),
/*
Entry("Machine creation success even on temporary APIServer disruption", &data{
setup: setup{
Expand Down
3 changes: 3 additions & 0 deletions pkg/util/provider/machineutils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ const (
// GetVMStatus sets machine status to terminating and specifies next step as getting VMs
GetVMStatus = "Set machine status to termination. Now, getting VM Status"

// InstanceInitialization is a step that represents initialization of a VM instance (post-creation).
InstanceInitialization = "Initialize VM Instance"

// InitiateDrain specifies next step as initiate node drain
InitiateDrain = "Initiate node drain"

Expand Down