Controller: emit lifecycle events when the VM gets restarted or deleted (#208)

* Controller: emit lifecycle events when the VM gets restarted or deleted

* vm_{scheduling,run}_time → vm_{scheduling,run}_duration for clarity

* Update VM endpoint: only update VM started time when zero
This commit is contained in:
Nikolay Edigaryev 2024-09-24 17:53:10 +04:00 committed by GitHub
parent 1730eaf67c
commit 2a2ddea62a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 50 additions and 0 deletions

View File

@ -2,6 +2,7 @@ package controller
import (
"errors"
"github.com/cirruslabs/orchard/internal/controller/lifecycle"
storepkg "github.com/cirruslabs/orchard/internal/controller/store"
"github.com/cirruslabs/orchard/internal/responder"
"github.com/cirruslabs/orchard/internal/simplename"
@ -130,6 +131,10 @@ func (controller *Controller) updateVM(ctx *gin.Context) responder.Responder {
NewErrorResponse("cannot update status for a VM in a terminal state"))
}
if userVM.Status == v1.VMStatusRunning && dbVM.StartedAt.IsZero() {
dbVM.StartedAt = time.Now()
}
dbVM.Status = userVM.Status
dbVM.StatusMessage = userVM.StatusMessage
dbVM.ImageFQN = userVM.ImageFQN
@ -197,6 +202,8 @@ func (controller *Controller) deleteVM(ctx *gin.Context) responder.Responder {
return responder.Error(err)
}
lifecycle.Report(vm, "VM deleted", controller.logger)
return responder.Code(http.StatusOK)
})
}

View File

@ -0,0 +1,34 @@
package lifecycle
import (
v1 "github.com/cirruslabs/orchard/pkg/resource/v1"
"go.uber.org/zap"
"time"
)
func Report(vm *v1.VM, message string, logger *zap.SugaredLogger) {
args := []interface{}{
"component", "lifecycle",
"vm_uid", vm.UID,
"vm_name", vm.Name,
"vm_restart_count", vm.RestartCount,
"vm_image", vm.Image,
"vm_status", vm.Status,
}
if vm.ScheduledAt.IsZero() {
// VM was never scheduled
args = append(args, "vm_scheduling_duration", time.Since(vm.CreatedAt))
} else {
args = append(args, "vm_scheduling_duration", vm.ScheduledAt.Sub(vm.CreatedAt))
}
if vm.StartedAt.IsZero() {
// VM was never started
args = append(args, "vm_run_duration", time.Duration(0))
} else {
args = append(args, "vm_run_duration", time.Since(vm.StartedAt))
}
logger.With(args...).Info(message)
}

View File

@ -2,6 +2,7 @@ package scheduler
import (
"context"
"github.com/cirruslabs/orchard/internal/controller/lifecycle"
"github.com/cirruslabs/orchard/internal/controller/notifier"
storepkg "github.com/cirruslabs/orchard/internal/controller/store"
"github.com/cirruslabs/orchard/internal/opentelemetry"
@ -141,6 +142,7 @@ func (scheduler *Scheduler) schedulingLoopIteration() error {
time.Since(unscheduledVM.CreatedAt).Seconds())
unscheduledVM.Worker = worker.Name
unscheduledVM.ScheduledAt = time.Now()
if err := txn.SetVM(unscheduledVM); err != nil {
return err
@ -240,11 +242,15 @@ func (scheduler *Scheduler) healthCheckVM(txn storepkg.Transaction, nameToWorker
if needsRestart {
logger.Debugf("restarting VM")
lifecycle.Report(&vm, "VM restarted", scheduler.logger)
vm.Status = v1.VMStatusPending
vm.StatusMessage = ""
vm.Worker = ""
vm.RestartedAt = time.Now()
vm.RestartCount++
vm.ScheduledAt = time.Time{}
vm.StartedAt = time.Time{}
return txn.SetVM(vm)
}

View File

@ -57,6 +57,9 @@ type VM struct {
// by the worker using "tart fqn" command after it had pulled the image.
ImageFQN string `json:"image_fqn,omitempty"`
ScheduledAt time.Time `json:"scheduled_at,omitempty"`
StartedAt time.Time `json:"started_at,omitempty"`
Meta
}