Ability to set VM's power state and retrieve backing Tart VM's name (#373)
* Ability to set VM's power state and retrieve backing Tart VM's name * Validate user-provided "powerState" field * Introduce TestSpecUpdatePowerStateSuspend * Introduce TestSpecUpdatePowerStateStopped * OpenAPI specification: add note about suspended VMs to "tartName" desc. * Sometimes we need to wait more than 30 seconds
This commit is contained in:
parent
eac2c33e88
commit
76a552bade
|
|
@ -282,7 +282,7 @@ paths:
|
|||
properties:
|
||||
type:
|
||||
type: string
|
||||
enum: [ADDED, MODIFIED, DELETED]
|
||||
enum: [ ADDED, MODIFIED, DELETED ]
|
||||
object:
|
||||
$ref: '#/components/schemas/VM'
|
||||
'404':
|
||||
|
|
@ -463,7 +463,7 @@ components:
|
|||
type: string
|
||||
description: VM image pull policy
|
||||
default: IfNotPresent
|
||||
enum: [IfNotPresent, Always]
|
||||
enum: [ IfNotPresent, Always ]
|
||||
cpu:
|
||||
type: number
|
||||
description: Number of CPUs assigned to this VM
|
||||
|
|
@ -567,7 +567,7 @@ components:
|
|||
description: |
|
||||
VM restart policy: specify "Never" to never restart or "OnFailure" to only restart when the VM fails
|
||||
default: Never
|
||||
enum: [Never, OnFailure]
|
||||
enum: [ Never, OnFailure ]
|
||||
resources:
|
||||
type: object
|
||||
description: Resources required by this VM on the worker
|
||||
|
|
@ -599,6 +599,39 @@ components:
|
|||
- path: /path/on/host/to/sources
|
||||
ro: true
|
||||
- path: /path/on/host/to/builds
|
||||
powerState:
|
||||
type: string
|
||||
description: |
|
||||
Desired power state of the VM.
|
||||
|
||||
When set to `stopped` or `suspended`, the VM does not consume any `resources`
|
||||
and can serve as a source for creating new Orchard VMs on the same worker. See
|
||||
`tartName` for more details.
|
||||
|
||||
Note that you can only transition into `stopped` or `suspended` only once
|
||||
at the moment.
|
||||
default: running
|
||||
enum: [ running, stopped, suspended ]
|
||||
tartName:
|
||||
type: string
|
||||
description: |
|
||||
Name of the Tart VM backing this VM resource.
|
||||
|
||||
`tartName` is specific to a worker, whereas `name` is cluster-wide.
|
||||
|
||||
`tartName` is useful in combination with `powerState` for creating stopped or suspended VMs
|
||||
that can be used to start or resume new VMs on the same worker.
|
||||
|
||||
However, with great power comes great responsibility. You need to make sure:
|
||||
|
||||
* that these new VMs will target the same worker using `labels` or `resources`,
|
||||
otherwise they will fail with the "the specified VM does not exist" error
|
||||
|
||||
* that there's only one cloned new VM for each suspended VM at a time;
|
||||
if you clone more new VMs from a single suspended VM, Tart will give them
|
||||
new MAC addresses automatically, which will stop them from booting,
|
||||
since the suspend‑resume machinery expects the same MAC address
|
||||
readOnly: true
|
||||
VMState:
|
||||
title: Virtual Machine State
|
||||
type: object
|
||||
|
|
@ -686,7 +719,7 @@ components:
|
|||
type: string
|
||||
ro:
|
||||
type: boolean
|
||||
default: []
|
||||
default: [ ]
|
||||
example:
|
||||
- pathPrefix: /Users/ci/src
|
||||
ro: true
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ import (
|
|||
storepkg "github.com/cirruslabs/orchard/internal/controller/store"
|
||||
"github.com/cirruslabs/orchard/internal/responder"
|
||||
"github.com/cirruslabs/orchard/internal/simplename"
|
||||
"github.com/cirruslabs/orchard/internal/worker/ondiskname"
|
||||
"github.com/cirruslabs/orchard/pkg/resource/v1"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/google/go-cmp/cmp"
|
||||
|
|
@ -43,6 +44,16 @@ func (controller *Controller) createVM(ctx *gin.Context) responder.Responder {
|
|||
vm.RestartedAt = time.Time{}
|
||||
vm.RestartCount = 0
|
||||
vm.UID = uuid.New().String()
|
||||
vm.PowerState = v1.PowerStateRunning
|
||||
vm.TartName = ondiskname.New(vm.Name, vm.UID, vm.RestartCount).String()
|
||||
vm.Generation = 0
|
||||
vm.ObservedGeneration = 0
|
||||
vm.Conditions = []v1.Condition{
|
||||
{
|
||||
Type: v1.ConditionTypeScheduled,
|
||||
State: v1.ConditionStateFalse,
|
||||
},
|
||||
}
|
||||
|
||||
// Softnet-specific logic: automatically enable Softnet when NetSoftnetAllow or NetSoftnetBlock are set
|
||||
// and propagate deprecated and non-deprecated boolean fields into each other
|
||||
|
|
@ -150,6 +161,20 @@ func (controller *Controller) updateVMSpec(ctx *gin.Context) responder.Responder
|
|||
"toggled for suspendable VMs"))
|
||||
}
|
||||
|
||||
// Power state-specific sanity checks
|
||||
if !userVM.PowerState.Valid() {
|
||||
return responder.JSON(http.StatusPreconditionFailed, NewErrorResponse("invalid \"powerState\" "+
|
||||
"value: %s", userVM.PowerState))
|
||||
}
|
||||
if dbVM.PowerState.TerminalState() {
|
||||
return responder.JSON(http.StatusPreconditionFailed, NewErrorResponse("invalid \"powerState\" "+
|
||||
"transition: cannot transition from a terminal power state"))
|
||||
}
|
||||
if !dbVM.Suspendable && userVM.PowerState == v1.PowerStateSuspended {
|
||||
return responder.JSON(http.StatusPreconditionFailed, NewErrorResponse("invalid \"powerState\" "+
|
||||
"transition: only suspendable VMs can be suspended"))
|
||||
}
|
||||
|
||||
if cmp.Equal(dbVM.VMSpec, userVM.VMSpec) {
|
||||
// Nothing was changed
|
||||
return responder.JSON(http.StatusOK, dbVM)
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ import (
|
|||
"github.com/cirruslabs/orchard/internal/controller/notifier"
|
||||
storepkg "github.com/cirruslabs/orchard/internal/controller/store"
|
||||
"github.com/cirruslabs/orchard/internal/opentelemetry"
|
||||
"github.com/cirruslabs/orchard/internal/worker/ondiskname"
|
||||
"github.com/cirruslabs/orchard/pkg/resource/v1"
|
||||
"github.com/cirruslabs/orchard/rpc"
|
||||
mapset "github.com/deckarep/golang-set/v2"
|
||||
|
|
@ -267,13 +268,22 @@ NextVM:
|
|||
return ErrVMSchedulingSkipped
|
||||
}
|
||||
|
||||
if currentUnscheduledVM.Status != v1.VMStatusPending ||
|
||||
currentUnscheduledVM.Worker != "" {
|
||||
if unscheduledVM.IsScheduled() {
|
||||
// Unscheduled VM is not unscheduled anymore,
|
||||
// so there's nothing to do
|
||||
return ErrVMSchedulingSkipped
|
||||
}
|
||||
|
||||
if unscheduledVM.TerminalState() {
|
||||
// We don't support re-scheduling of VMs in terminal state at the moment
|
||||
return ErrVMSchedulingSkipped
|
||||
}
|
||||
|
||||
if unscheduledVM.PowerState.TerminalState() {
|
||||
// We don't support re-scheduling of stopped/suspended VMs at the moment
|
||||
return ErrVMSchedulingSkipped
|
||||
}
|
||||
|
||||
currentWorker, err := txn.GetWorker(worker.Name)
|
||||
if err != nil {
|
||||
if errors.Is(err, storepkg.ErrNotFound) {
|
||||
|
|
@ -298,6 +308,10 @@ NextVM:
|
|||
|
||||
unscheduledVM.Worker = worker.Name
|
||||
unscheduledVM.ScheduledAt = time.Now()
|
||||
v1.ConditionsSet(&unscheduledVM.Conditions, v1.Condition{
|
||||
Type: v1.ConditionTypeScheduled,
|
||||
State: v1.ConditionStateTrue,
|
||||
})
|
||||
|
||||
// Fill out the actual CPU allocation
|
||||
if unscheduledVM.CPU == 0 {
|
||||
|
|
@ -376,10 +390,10 @@ func ProcessVMs(vms []v1.VM) ([]v1.VM, WorkerInfos) {
|
|||
workerToResources := make(WorkerInfos)
|
||||
|
||||
for _, vm := range vms {
|
||||
if vm.Worker == "" {
|
||||
unscheduledVMs = append(unscheduledVMs, vm)
|
||||
} else if !vm.TerminalState() {
|
||||
if vm.IsScheduled() {
|
||||
workerToResources.AddVM(vm.Worker, vm.Resources)
|
||||
} else {
|
||||
unscheduledVMs = append(unscheduledVMs, vm)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -427,7 +441,7 @@ func (scheduler *Scheduler) healthCheckingLoopIteration() (int, int, error) {
|
|||
// transaction, re-checking that the VM still exists
|
||||
// and it is still scheduled
|
||||
for _, vm := range vms {
|
||||
if vm.Worker == "" {
|
||||
if !vm.IsScheduled() {
|
||||
// Not a scheduled VM
|
||||
//
|
||||
// We'll re-check this below, but this allows us
|
||||
|
|
@ -447,7 +461,7 @@ func (scheduler *Scheduler) healthCheckingLoopIteration() (int, int, error) {
|
|||
return err
|
||||
}
|
||||
|
||||
if currentVM.Worker == "" {
|
||||
if !vm.IsScheduled() {
|
||||
// Not a scheduled VM, nothing to do
|
||||
return nil
|
||||
}
|
||||
|
|
@ -483,6 +497,14 @@ func (scheduler *Scheduler) healthCheckVM(txn storepkg.Transaction, vm v1.VM) er
|
|||
vm.RestartCount++
|
||||
vm.ScheduledAt = time.Time{}
|
||||
vm.StartedAt = time.Time{}
|
||||
vm.PowerState = v1.PowerStateRunning
|
||||
vm.TartName = ondiskname.New(vm.Name, vm.UID, vm.RestartCount).String()
|
||||
vm.Conditions = []v1.Condition{
|
||||
{
|
||||
Type: v1.ConditionTypeScheduled,
|
||||
State: v1.ConditionStateFalse,
|
||||
},
|
||||
}
|
||||
|
||||
return txn.SetVM(vm)
|
||||
}
|
||||
|
|
@ -508,5 +530,33 @@ func (scheduler *Scheduler) healthCheckVM(txn storepkg.Transaction, vm v1.VM) er
|
|||
return txn.SetVM(vm)
|
||||
}
|
||||
|
||||
if vm.PowerState.TerminalState() && v1.ConditionIsFalse(vm.Conditions, v1.ConditionTypeRunning) {
|
||||
// VM has entered a terminal power state and stopped running,
|
||||
// de-schedule it to free up resources
|
||||
v1.ConditionsSet(&vm.Conditions, v1.Condition{
|
||||
Type: v1.ConditionTypeScheduled,
|
||||
State: v1.ConditionStateFalse,
|
||||
})
|
||||
|
||||
return txn.SetVM(vm)
|
||||
}
|
||||
|
||||
if vm.TerminalState() {
|
||||
// VM has entered a terminal state,
|
||||
// de-schedule it to free up resources
|
||||
v1.ConditionsSet(&vm.Conditions, v1.Condition{
|
||||
Type: v1.ConditionTypeScheduled,
|
||||
State: v1.ConditionStateFalse,
|
||||
})
|
||||
|
||||
// Also correct the conditions for the worker
|
||||
v1.ConditionsSet(&vm.Conditions, v1.Condition{
|
||||
Type: v1.ConditionTypeRunning,
|
||||
State: v1.ConditionStateFalse,
|
||||
})
|
||||
|
||||
return txn.SetVM(vm)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10,10 +10,12 @@ import (
|
|||
"github.com/cirruslabs/orchard/internal/tests/devcontroller"
|
||||
"github.com/cirruslabs/orchard/internal/tests/wait"
|
||||
"github.com/cirruslabs/orchard/internal/worker/ondiskname"
|
||||
"github.com/cirruslabs/orchard/internal/worker/tart"
|
||||
v1 "github.com/cirruslabs/orchard/pkg/resource/v1"
|
||||
"github.com/samber/lo"
|
||||
"github.com/shirou/gopsutil/v4/process"
|
||||
"github.com/stretchr/testify/require"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
func TestSpecUpdateSoftnet(t *testing.T) {
|
||||
|
|
@ -63,7 +65,7 @@ func TestSpecUpdateSoftnet(t *testing.T) {
|
|||
require.EqualValues(t, 1, vm.Generation)
|
||||
require.EqualValues(t, 0, vm.ObservedGeneration)
|
||||
|
||||
require.True(t, wait.Wait(30*time.Second, func() bool {
|
||||
require.True(t, wait.Wait(2*time.Minute, func() bool {
|
||||
vm, err = devClient.VMs().Get(context.Background(), vmName)
|
||||
require.NoError(t, err)
|
||||
|
||||
|
|
@ -129,7 +131,7 @@ func TestSpecUpdateSoftnetSuspendable(t *testing.T) {
|
|||
require.EqualValues(t, 1, vm.Generation)
|
||||
require.EqualValues(t, 0, vm.ObservedGeneration)
|
||||
|
||||
require.True(t, wait.Wait(30*time.Second, func() bool {
|
||||
require.True(t, wait.Wait(2*time.Minute, func() bool {
|
||||
vm, err = devClient.VMs().Get(context.Background(), vmName)
|
||||
require.NoError(t, err)
|
||||
|
||||
|
|
@ -147,6 +149,148 @@ func TestSpecUpdateSoftnetSuspendable(t *testing.T) {
|
|||
require.True(t, sliceContainsAnotherSlice(tartRunCmdline, []string{"--net-softnet-block", "0.0.0.0/0"}))
|
||||
}
|
||||
|
||||
func TestSpecUpdatePowerStateSuspend(t *testing.T) {
|
||||
devClient, _, _ := devcontroller.StartIntegrationTestEnvironment(t)
|
||||
|
||||
// Create a suspendable VM with Softnet enabled
|
||||
vmName := "test"
|
||||
|
||||
err := devClient.VMs().Create(t.Context(), &v1.VM{
|
||||
Meta: v1.Meta{
|
||||
Name: vmName,
|
||||
},
|
||||
Image: imageconstant.DefaultMacosImage,
|
||||
CPU: 4,
|
||||
Memory: 8 * 1024,
|
||||
Headless: true,
|
||||
VMSpec: v1.VMSpec{
|
||||
Suspendable: true,
|
||||
NetSoftnet: true,
|
||||
},
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
// Wait for the VM to start
|
||||
var vm *v1.VM
|
||||
|
||||
require.True(t, wait.Wait(2*time.Minute, func() bool {
|
||||
vm, err = devClient.VMs().Get(context.Background(), vmName)
|
||||
require.NoError(t, err)
|
||||
|
||||
t.Logf("Waiting for the VM to start. Current status: %s", vm.Status)
|
||||
|
||||
return vm.Status == v1.VMStatusRunning
|
||||
}), "failed to start a VM")
|
||||
|
||||
// Ensure that the VM is running
|
||||
tartVMName := ondiskname.New(vmName, vm.UID, vm.RestartCount).String()
|
||||
|
||||
_, err = tartRunProcessCmdline(tartVMName)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Update the VM's specification and change it's power state
|
||||
vm.PowerState = v1.PowerStateSuspended
|
||||
|
||||
vm, err = devClient.VMs().Update(t.Context(), *vm)
|
||||
require.NoError(t, err)
|
||||
require.EqualValues(t, 1, vm.Generation)
|
||||
require.EqualValues(t, 0, vm.ObservedGeneration)
|
||||
|
||||
require.True(t, wait.Wait(2*time.Minute, func() bool {
|
||||
vm, err = devClient.VMs().Get(context.Background(), vmName)
|
||||
require.NoError(t, err)
|
||||
|
||||
t.Logf("Waiting for the VM's observed generation to be updated...")
|
||||
|
||||
return vm.ObservedGeneration == 1
|
||||
}), "failed to wait for the VM's observed generation to be updated")
|
||||
|
||||
// Ensure that the VM is not running
|
||||
_, err = tartRunProcessCmdline(tartVMName)
|
||||
require.Error(t, err)
|
||||
|
||||
// Ensure that the VM is present and is suspended
|
||||
tartVMs, err := tart.List(t.Context(), zap.NewNop().Sugar())
|
||||
require.NoError(t, err)
|
||||
require.Contains(t, tartVMs, tart.VMInfo{
|
||||
Name: vm.TartName,
|
||||
Source: "local",
|
||||
State: "suspended",
|
||||
Running: false,
|
||||
})
|
||||
}
|
||||
|
||||
func TestSpecUpdatePowerStateStopped(t *testing.T) {
|
||||
devClient, _, _ := devcontroller.StartIntegrationTestEnvironment(t)
|
||||
|
||||
// Create a suspendable VM with Softnet enabled
|
||||
vmName := "test"
|
||||
|
||||
err := devClient.VMs().Create(t.Context(), &v1.VM{
|
||||
Meta: v1.Meta{
|
||||
Name: vmName,
|
||||
},
|
||||
Image: imageconstant.DefaultMacosImage,
|
||||
CPU: 4,
|
||||
Memory: 8 * 1024,
|
||||
Headless: true,
|
||||
VMSpec: v1.VMSpec{
|
||||
Suspendable: true,
|
||||
NetSoftnet: true,
|
||||
},
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
// Wait for the VM to start
|
||||
var vm *v1.VM
|
||||
|
||||
require.True(t, wait.Wait(2*time.Minute, func() bool {
|
||||
vm, err = devClient.VMs().Get(context.Background(), vmName)
|
||||
require.NoError(t, err)
|
||||
|
||||
t.Logf("Waiting for the VM to start. Current status: %s", vm.Status)
|
||||
|
||||
return vm.Status == v1.VMStatusRunning
|
||||
}), "failed to start a VM")
|
||||
|
||||
// Ensure that the VM is running
|
||||
tartVMName := ondiskname.New(vmName, vm.UID, vm.RestartCount).String()
|
||||
|
||||
_, err = tartRunProcessCmdline(tartVMName)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Update the VM's specification and change it's power state
|
||||
vm.PowerState = v1.PowerStateStopped
|
||||
|
||||
vm, err = devClient.VMs().Update(t.Context(), *vm)
|
||||
require.NoError(t, err)
|
||||
require.EqualValues(t, 1, vm.Generation)
|
||||
require.EqualValues(t, 0, vm.ObservedGeneration)
|
||||
|
||||
require.True(t, wait.Wait(2*time.Minute, func() bool {
|
||||
vm, err = devClient.VMs().Get(context.Background(), vmName)
|
||||
require.NoError(t, err)
|
||||
|
||||
t.Logf("Waiting for the VM's observed generation to be updated...")
|
||||
|
||||
return vm.ObservedGeneration == 1
|
||||
}), "failed to wait for the VM's observed generation to be updated")
|
||||
|
||||
// Ensure that the VM is not running
|
||||
_, err = tartRunProcessCmdline(tartVMName)
|
||||
require.Error(t, err)
|
||||
|
||||
// Ensure that the VM is present and is suspended
|
||||
tartVMs, err := tart.List(t.Context(), zap.NewNop().Sugar())
|
||||
require.NoError(t, err)
|
||||
require.Contains(t, tartVMs, tart.VMInfo{
|
||||
Name: vm.TartName,
|
||||
Source: "local",
|
||||
State: "stopped",
|
||||
Running: false,
|
||||
})
|
||||
}
|
||||
|
||||
func tartRunProcessCmdline(vmName string) ([]string, error) {
|
||||
processes, err := process.Processes()
|
||||
if err != nil {
|
||||
|
|
|
|||
|
|
@ -6,9 +6,10 @@ import (
|
|||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"go.uber.org/zap"
|
||||
"os/exec"
|
||||
"strings"
|
||||
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
const tartCommandName = "tart"
|
||||
|
|
@ -20,6 +21,8 @@ var (
|
|||
|
||||
type VMInfo struct {
|
||||
Name string
|
||||
Source string
|
||||
State string
|
||||
Running bool
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,10 +0,0 @@
|
|||
package vmmanager
|
||||
|
||||
type Condition string
|
||||
|
||||
const (
|
||||
ConditionCloning Condition = "cloning"
|
||||
ConditionReady Condition = "ready"
|
||||
ConditionSuspending Condition = "suspending"
|
||||
ConditionStopping Condition = "stopping"
|
||||
)
|
||||
|
|
@ -47,7 +47,7 @@ type VM struct {
|
|||
// ConditionStopping states for a short time. This way in run() we know
|
||||
// that we're in a process of rebooting a VM, so we can avoid throwing
|
||||
// an error about unexpected VM termination.
|
||||
conditions mapset.Set[Condition]
|
||||
conditions mapset.Set[v1.ConditionType]
|
||||
|
||||
// Image FQN feature, see https://github.com/cirruslabs/orchard/issues/164
|
||||
imageFQN atomic.Pointer[string]
|
||||
|
|
@ -81,7 +81,7 @@ func NewVM(
|
|||
"vm_restart_count", vmResource.RestartCount,
|
||||
),
|
||||
|
||||
conditions: mapset.NewSet(ConditionCloning),
|
||||
conditions: mapset.NewSet(v1.ConditionTypeCloning),
|
||||
|
||||
ctx: vmContext,
|
||||
cancel: vmContextCancel,
|
||||
|
|
@ -133,7 +133,7 @@ func NewVM(
|
|||
// Backward compatibility with v1.VM specification's "Status" field
|
||||
vm.started.Store(true)
|
||||
|
||||
vm.conditions.Add(ConditionReady)
|
||||
vm.conditions.Add(v1.ConditionTypeRunning)
|
||||
|
||||
vm.run(vm.ctx, eventStreamer)
|
||||
}()
|
||||
|
|
@ -194,8 +194,27 @@ func (vm *VM) setErr(err error) {
|
|||
}
|
||||
}
|
||||
|
||||
func (vm *VM) Conditions() mapset.Set[Condition] {
|
||||
return vm.conditions.Clone()
|
||||
func (vm *VM) Conditions() []v1.Condition {
|
||||
// Only expose a minimum amount of conditions necessary
|
||||
// for the Orchard Controller to make decisions
|
||||
return []v1.Condition{
|
||||
vm.conditionTypeToCondition(v1.ConditionTypeRunning),
|
||||
}
|
||||
}
|
||||
|
||||
func (vm *VM) conditionTypeToCondition(conditionType v1.ConditionType) v1.Condition {
|
||||
var conditionState v1.ConditionState
|
||||
|
||||
if vm.conditions.ContainsOne(conditionType) {
|
||||
conditionState = v1.ConditionStateTrue
|
||||
} else {
|
||||
conditionState = v1.ConditionStateFalse
|
||||
}
|
||||
|
||||
return v1.Condition{
|
||||
Type: conditionType,
|
||||
State: conditionState,
|
||||
}
|
||||
}
|
||||
|
||||
func (vm *VM) cloneAndConfigure(ctx context.Context) error {
|
||||
|
|
@ -206,7 +225,7 @@ func (vm *VM) cloneAndConfigure(ctx context.Context) error {
|
|||
return err
|
||||
}
|
||||
|
||||
vm.conditions.Remove(ConditionCloning)
|
||||
vm.conditions.Remove(v1.ConditionTypeCloning)
|
||||
|
||||
// Image FQN feature, see https://github.com/cirruslabs/orchard/issues/164
|
||||
fqnRaw, _, err := tart.Tart(ctx, vm.logger, "fqn", vm.Resource.Image)
|
||||
|
|
@ -329,7 +348,7 @@ func (vm *VM) cloneAndConfigure(ctx context.Context) error {
|
|||
}
|
||||
|
||||
func (vm *VM) run(ctx context.Context, eventStreamer *client.EventStreamer) {
|
||||
defer vm.conditions.RemoveAll(ConditionReady, ConditionSuspending, ConditionStopping)
|
||||
defer vm.conditions.RemoveAll(v1.ConditionTypeRunning, v1.ConditionTypeSuspending, v1.ConditionTypeStopping)
|
||||
|
||||
// Launch the startup script goroutine as close as possible
|
||||
// to the VM startup (below) to avoid "tart ip" timing out
|
||||
|
|
@ -389,7 +408,7 @@ func (vm *VM) run(ctx context.Context, eventStreamer *client.EventStreamer) {
|
|||
case <-vm.ctx.Done():
|
||||
// Do not return an error because it's the user's intent to cancel this VM
|
||||
default:
|
||||
if !vm.conditions.ContainsAny(ConditionSuspending, ConditionStopping) {
|
||||
if !vm.conditions.ContainsAny(v1.ConditionTypeSuspending, v1.ConditionTypeStopping) {
|
||||
vm.setErr(fmt.Errorf("%w: VM exited unexpectedly", ErrVMFailed))
|
||||
}
|
||||
}
|
||||
|
|
@ -436,7 +455,7 @@ func (vm *VM) Suspend() <-chan error {
|
|||
}
|
||||
|
||||
vm.setStatusMessage("Suspending VM")
|
||||
vm.conditions.Add(ConditionSuspending)
|
||||
vm.conditions.Add(v1.ConditionTypeSuspending)
|
||||
|
||||
go func() {
|
||||
_, _, err := tart.Tart(context.Background(), zap.NewNop().Sugar(), "suspend", vm.id())
|
||||
|
|
@ -468,7 +487,7 @@ func (vm *VM) Stop() <-chan error {
|
|||
}
|
||||
|
||||
vm.setStatusMessage("Stopping VM")
|
||||
vm.conditions.Add(ConditionStopping)
|
||||
vm.conditions.Add(v1.ConditionTypeStopping)
|
||||
|
||||
go func() {
|
||||
// Try to gracefully terminate the VM
|
||||
|
|
@ -485,12 +504,14 @@ func (vm *VM) Stop() <-chan error {
|
|||
return errCh
|
||||
}
|
||||
|
||||
func (vm *VM) Start(vmResource v1.VM, eventStreamer *client.EventStreamer) {
|
||||
func (vm *VM) UpdateSpec(vmResource v1.VM) {
|
||||
vm.Resource = vmResource
|
||||
vm.Resource.ObservedGeneration = vmResource.Generation
|
||||
}
|
||||
|
||||
func (vm *VM) Start(eventStreamer *client.EventStreamer) {
|
||||
vm.setStatusMessage("Starting VM")
|
||||
vm.conditions.Add(ConditionReady)
|
||||
vm.conditions.Add(v1.ConditionTypeRunning)
|
||||
|
||||
vm.cancel()
|
||||
|
||||
|
|
@ -509,7 +530,7 @@ func (vm *VM) Delete() error {
|
|||
// (e.g. "tart clone", "tart run", etc.)
|
||||
vm.cancel()
|
||||
|
||||
if vm.conditions.Contains(ConditionCloning) {
|
||||
if vm.conditions.Contains(v1.ConditionTypeCloning) {
|
||||
// Not cloned yet, nothing to delete
|
||||
return nil
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,7 +6,6 @@ import (
|
|||
"fmt"
|
||||
"os"
|
||||
"slices"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/avast/retry-go/v4"
|
||||
|
|
@ -321,20 +320,17 @@ func (worker *Worker) syncVMs(ctx context.Context, updateVM func(context.Context
|
|||
}
|
||||
|
||||
localState := mo.None[v1.VMStatus]()
|
||||
var localConditions []string
|
||||
var localConditions []v1.Condition
|
||||
if vm != nil {
|
||||
localState = mo.Some(vm.Status())
|
||||
|
||||
for condition := range vm.Conditions().Iter() {
|
||||
localConditions = append(localConditions, string(condition))
|
||||
}
|
||||
localConditions = vm.Conditions()
|
||||
}
|
||||
|
||||
action := transitions[remoteState][localState]
|
||||
|
||||
worker.logger.Debugf("processing VM: %s, remote state: %s, local state: %s, "+
|
||||
"local conditions: [%s], action: %v\n", onDiskName, optionToString(remoteState),
|
||||
optionToString(localState), strings.Join(localConditions, ", "), action)
|
||||
optionToString(localState), v1.ConditionsHumanize(localConditions), action)
|
||||
|
||||
switch action {
|
||||
case ActionCreate:
|
||||
|
|
@ -368,17 +364,30 @@ func (worker *Worker) syncVMs(ctx context.Context, updateVM func(context.Context
|
|||
case ActionMonitorRunning:
|
||||
if vmResource.Generation != vm.Resource.Generation {
|
||||
// VM specification changed, reboot the VM for the changes to take effect
|
||||
if vm.Conditions().Contains(vmmanager.ConditionReady) {
|
||||
if v1.ConditionIsTrue(vm.Conditions(), v1.ConditionTypeRunning) {
|
||||
// VM is running, suspend or stop it first
|
||||
if vm.Resource.Suspendable {
|
||||
shouldStop := vmResource.PowerState == v1.PowerStateStopped || !vm.Resource.Suspendable
|
||||
|
||||
if shouldStop {
|
||||
vm.Stop()
|
||||
} else {
|
||||
vm.Suspend()
|
||||
}
|
||||
|
||||
if vm.Resource.Suspendable && vmResource.PowerState != v1.PowerStateStopped {
|
||||
vm.Suspend()
|
||||
} else {
|
||||
vm.Stop()
|
||||
}
|
||||
} else {
|
||||
} else if vmResource.PowerState == v1.PowerStateRunning {
|
||||
// VM stopped, start it with the new specification
|
||||
vm.UpdateSpec(*vmResource)
|
||||
|
||||
eventStreamer := worker.client.VMs().StreamEvents(vmResource.Name)
|
||||
vm.Start(*vmResource, eventStreamer)
|
||||
vm.Start(eventStreamer)
|
||||
} else if vmResource.PowerState == v1.PowerStateSuspended || vmResource.PowerState == v1.PowerStateStopped {
|
||||
// VM stopped, just update its specification
|
||||
vm.UpdateSpec(*vmResource)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -396,6 +405,13 @@ func (worker *Worker) syncVMs(ctx context.Context, updateVM func(context.Context
|
|||
updateNeeded = true
|
||||
}
|
||||
|
||||
// Propagate VM's conditions to the Orchard Controller
|
||||
for _, condition := range vm.Conditions() {
|
||||
if v1.ConditionsSet(&vmResource.Conditions, condition) {
|
||||
updateNeeded = true
|
||||
}
|
||||
}
|
||||
|
||||
if updateNeeded {
|
||||
if _, err := updateVM(ctx, *vmResource); err != nil {
|
||||
return err
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ type VM struct {
|
|||
Nested bool `json:"nested,omitempty"`
|
||||
|
||||
VMSpec
|
||||
VMSpecReadOnly
|
||||
VMState
|
||||
|
||||
// Status field is used to track the lifecycle of the VM associated with this resource.
|
||||
|
|
@ -101,18 +102,54 @@ func (vm *VM) SetVersion(version uint64) {
|
|||
vm.Version = version
|
||||
}
|
||||
|
||||
func (vm *VM) IsScheduled() bool {
|
||||
if ConditionExists(vm.Conditions, ConditionTypeScheduled) {
|
||||
return ConditionIsTrue(vm.Conditions, ConditionTypeScheduled)
|
||||
} else {
|
||||
return vm.Worker != ""
|
||||
}
|
||||
}
|
||||
|
||||
type VMSpec struct {
|
||||
NetSoftnetDeprecated bool `json:"net-softnet,omitempty"`
|
||||
NetSoftnet bool `json:"netSoftnet,omitempty"`
|
||||
NetSoftnetAllow []string `json:"netSoftnetAllow,omitempty"`
|
||||
NetSoftnetBlock []string `json:"netSoftnetBlock,omitempty"`
|
||||
Suspendable bool `json:"suspendable,omitempty"`
|
||||
NetSoftnetDeprecated bool `json:"net-softnet,omitempty"`
|
||||
NetSoftnet bool `json:"netSoftnet,omitempty"`
|
||||
NetSoftnetAllow []string `json:"netSoftnetAllow,omitempty"`
|
||||
NetSoftnetBlock []string `json:"netSoftnetBlock,omitempty"`
|
||||
Suspendable bool `json:"suspendable,omitempty"`
|
||||
PowerState PowerState `json:"powerState,omitempty"`
|
||||
}
|
||||
|
||||
type VMSpecReadOnly struct {
|
||||
TartName string `json:"tartName,omitempty"`
|
||||
}
|
||||
|
||||
type VMState struct {
|
||||
// ObservedGeneration corresponds to the Generation of VM specification
|
||||
// on which the worker had acted upon.
|
||||
ObservedGeneration uint64 `json:"observedGeneration"`
|
||||
|
||||
Conditions []Condition `json:"conditions,omitempty"`
|
||||
}
|
||||
|
||||
type PowerState string
|
||||
|
||||
const (
|
||||
PowerStateRunning PowerState = "running"
|
||||
PowerStateStopped PowerState = "stopped"
|
||||
PowerStateSuspended PowerState = "suspended"
|
||||
)
|
||||
|
||||
func (powerState PowerState) Valid() bool {
|
||||
switch powerState {
|
||||
case PowerStateRunning, PowerStateStopped, PowerStateSuspended:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func (powerState PowerState) TerminalState() bool {
|
||||
return powerState != PowerStateRunning
|
||||
}
|
||||
|
||||
type Event struct {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,106 @@
|
|||
package v1
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type Condition struct {
|
||||
Type ConditionType `json:"type"`
|
||||
State ConditionState `json:"state"`
|
||||
}
|
||||
|
||||
type ConditionType string
|
||||
|
||||
const (
|
||||
ConditionTypeScheduled ConditionType = "scheduled"
|
||||
ConditionTypeRunning ConditionType = "running"
|
||||
|
||||
ConditionTypeCloning ConditionType = "cloning"
|
||||
ConditionTypeSuspending ConditionType = "suspending"
|
||||
ConditionTypeStopping ConditionType = "stopping"
|
||||
)
|
||||
|
||||
type ConditionState string
|
||||
|
||||
const (
|
||||
ConditionStateTrue ConditionState = "true"
|
||||
ConditionStateFalse ConditionState = "false"
|
||||
)
|
||||
|
||||
func ConditionsSet(conditions *[]Condition, newCondition Condition) bool {
|
||||
for i := range *conditions {
|
||||
condition := &(*conditions)[i]
|
||||
|
||||
if condition.Type != newCondition.Type {
|
||||
continue
|
||||
}
|
||||
|
||||
if condition.State == newCondition.State {
|
||||
return false
|
||||
}
|
||||
|
||||
condition.State = newCondition.State
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
*conditions = append(*conditions, newCondition)
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func ConditionsHumanize(conditions []Condition) string {
|
||||
var conditionHumanized []string
|
||||
|
||||
for _, condition := range conditions {
|
||||
var pre string
|
||||
|
||||
switch condition.State {
|
||||
case ConditionStateTrue:
|
||||
// Nothing needs to be set
|
||||
case ConditionStateFalse:
|
||||
pre = "not "
|
||||
default:
|
||||
pre = "unknown "
|
||||
}
|
||||
|
||||
conditionHumanized = append(conditionHumanized, fmt.Sprintf("%s%s", pre, condition.Type))
|
||||
}
|
||||
|
||||
return strings.Join(conditionHumanized, ", ")
|
||||
}
|
||||
|
||||
func ConditionExists(conditions []Condition, conditionType ConditionType) bool {
|
||||
for _, condition := range conditions {
|
||||
if condition.Type == conditionType {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func ConditionIsTrue(conditions []Condition, conditionType ConditionType) bool {
|
||||
return conditionIsTypeAndState(conditions, conditionType, ConditionStateTrue)
|
||||
}
|
||||
|
||||
func ConditionIsFalse(conditions []Condition, conditionType ConditionType) bool {
|
||||
return conditionIsTypeAndState(conditions, conditionType, ConditionStateFalse)
|
||||
}
|
||||
|
||||
func conditionIsTypeAndState(conditions []Condition, conditionType ConditionType, state ConditionState) bool {
|
||||
for _, condition := range conditions {
|
||||
if condition.Type != conditionType {
|
||||
continue
|
||||
}
|
||||
|
||||
if condition.State != state {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
|
@ -0,0 +1,134 @@
|
|||
package v1_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
v1 "github.com/cirruslabs/orchard/pkg/resource/v1"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestConditionsSet(t *testing.T) {
|
||||
var conditions []v1.Condition
|
||||
|
||||
// Ensure that a new condition is added
|
||||
v1.ConditionsSet(&conditions, v1.Condition{
|
||||
Type: v1.ConditionTypeScheduled,
|
||||
State: v1.ConditionStateFalse,
|
||||
})
|
||||
require.Equal(t, []v1.Condition{
|
||||
{
|
||||
Type: v1.ConditionTypeScheduled,
|
||||
State: v1.ConditionStateFalse,
|
||||
},
|
||||
}, conditions)
|
||||
|
||||
// Ensure that an existing condition is updated
|
||||
v1.ConditionsSet(&conditions, v1.Condition{
|
||||
Type: v1.ConditionTypeScheduled,
|
||||
State: v1.ConditionStateTrue,
|
||||
})
|
||||
require.Equal(t, []v1.Condition{
|
||||
{
|
||||
Type: v1.ConditionTypeScheduled,
|
||||
State: v1.ConditionStateTrue,
|
||||
},
|
||||
}, conditions)
|
||||
|
||||
// Ensure that other conditions can be added
|
||||
v1.ConditionsSet(&conditions, v1.Condition{
|
||||
Type: v1.ConditionTypeRunning,
|
||||
State: v1.ConditionStateFalse,
|
||||
})
|
||||
require.Equal(t, []v1.Condition{
|
||||
{
|
||||
Type: v1.ConditionTypeScheduled,
|
||||
State: v1.ConditionStateTrue,
|
||||
},
|
||||
{
|
||||
Type: v1.ConditionTypeRunning,
|
||||
State: v1.ConditionStateFalse,
|
||||
},
|
||||
}, conditions)
|
||||
|
||||
// Ensure that other conditions can be updated
|
||||
v1.ConditionsSet(&conditions, v1.Condition{
|
||||
Type: v1.ConditionTypeRunning,
|
||||
State: v1.ConditionStateTrue,
|
||||
})
|
||||
require.Equal(t, []v1.Condition{
|
||||
{
|
||||
Type: v1.ConditionTypeScheduled,
|
||||
State: v1.ConditionStateTrue,
|
||||
},
|
||||
{
|
||||
Type: v1.ConditionTypeRunning,
|
||||
State: v1.ConditionStateTrue,
|
||||
},
|
||||
}, conditions)
|
||||
}
|
||||
|
||||
func TestConditionsHumanize(t *testing.T) {
|
||||
conditions := []v1.Condition{
|
||||
{
|
||||
Type: v1.ConditionTypeScheduled,
|
||||
State: v1.ConditionStateTrue,
|
||||
},
|
||||
{
|
||||
Type: v1.ConditionTypeRunning,
|
||||
State: v1.ConditionStateFalse,
|
||||
},
|
||||
}
|
||||
|
||||
require.Equal(t, "scheduled, not running", v1.ConditionsHumanize(conditions))
|
||||
|
||||
conditions = []v1.Condition{
|
||||
{
|
||||
Type: v1.ConditionTypeScheduled,
|
||||
},
|
||||
{
|
||||
Type: v1.ConditionTypeRunning,
|
||||
},
|
||||
}
|
||||
|
||||
require.Equal(t, "unknown scheduled, unknown running", v1.ConditionsHumanize(conditions))
|
||||
}
|
||||
|
||||
func TestConditionMembershipChecks(t *testing.T) {
|
||||
// Condition does not exist
|
||||
var conditions []v1.Condition
|
||||
require.False(t, v1.ConditionExists(conditions, v1.ConditionTypeScheduled))
|
||||
require.False(t, v1.ConditionIsTrue(conditions, v1.ConditionTypeScheduled))
|
||||
require.False(t, v1.ConditionIsFalse(conditions, v1.ConditionTypeScheduled))
|
||||
|
||||
// Condition exists, but its state is unknown
|
||||
conditions = []v1.Condition{
|
||||
{
|
||||
Type: v1.ConditionTypeScheduled,
|
||||
},
|
||||
}
|
||||
require.True(t, v1.ConditionExists(conditions, v1.ConditionTypeScheduled))
|
||||
require.False(t, v1.ConditionIsTrue(conditions, v1.ConditionTypeScheduled))
|
||||
require.False(t, v1.ConditionIsFalse(conditions, v1.ConditionTypeScheduled))
|
||||
|
||||
// Condition exists and its state is true
|
||||
conditions = []v1.Condition{
|
||||
{
|
||||
Type: v1.ConditionTypeScheduled,
|
||||
State: v1.ConditionStateTrue,
|
||||
},
|
||||
}
|
||||
require.True(t, v1.ConditionExists(conditions, v1.ConditionTypeScheduled))
|
||||
require.True(t, v1.ConditionIsTrue(conditions, v1.ConditionTypeScheduled))
|
||||
require.False(t, v1.ConditionIsFalse(conditions, v1.ConditionTypeScheduled))
|
||||
|
||||
// Condition exists and its state is false
|
||||
conditions = []v1.Condition{
|
||||
{
|
||||
Type: v1.ConditionTypeScheduled,
|
||||
State: v1.ConditionStateFalse,
|
||||
},
|
||||
}
|
||||
require.True(t, v1.ConditionExists(conditions, v1.ConditionTypeScheduled))
|
||||
require.False(t, v1.ConditionIsTrue(conditions, v1.ConditionTypeScheduled))
|
||||
require.True(t, v1.ConditionIsFalse(conditions, v1.ConditionTypeScheduled))
|
||||
}
|
||||
Loading…
Reference in New Issue