Controller: emit lifecycle events when the VM gets restarted or deleted (#208)
* Controller: emit lifecycle events when the VM gets restarted or deleted
* vm_{scheduling,run}_time → vm_{scheduling,run}_duration for clarity
* Update VM endpoint: only update VM started time when zero
This commit is contained in:
parent
1730eaf67c
commit
2a2ddea62a
|
|
@ -2,6 +2,7 @@ package controller
|
|||
|
||||
import (
|
||||
"errors"
|
||||
"github.com/cirruslabs/orchard/internal/controller/lifecycle"
|
||||
storepkg "github.com/cirruslabs/orchard/internal/controller/store"
|
||||
"github.com/cirruslabs/orchard/internal/responder"
|
||||
"github.com/cirruslabs/orchard/internal/simplename"
|
||||
|
|
@ -130,6 +131,10 @@ func (controller *Controller) updateVM(ctx *gin.Context) responder.Responder {
|
|||
NewErrorResponse("cannot update status for a VM in a terminal state"))
|
||||
}
|
||||
|
||||
if userVM.Status == v1.VMStatusRunning && dbVM.StartedAt.IsZero() {
|
||||
dbVM.StartedAt = time.Now()
|
||||
}
|
||||
|
||||
dbVM.Status = userVM.Status
|
||||
dbVM.StatusMessage = userVM.StatusMessage
|
||||
dbVM.ImageFQN = userVM.ImageFQN
|
||||
|
|
@ -197,6 +202,8 @@ func (controller *Controller) deleteVM(ctx *gin.Context) responder.Responder {
|
|||
return responder.Error(err)
|
||||
}
|
||||
|
||||
lifecycle.Report(vm, "VM deleted", controller.logger)
|
||||
|
||||
return responder.Code(http.StatusOK)
|
||||
})
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,34 @@
|
|||
package lifecycle
|
||||
|
||||
import (
|
||||
v1 "github.com/cirruslabs/orchard/pkg/resource/v1"
|
||||
"go.uber.org/zap"
|
||||
"time"
|
||||
)
|
||||
|
||||
func Report(vm *v1.VM, message string, logger *zap.SugaredLogger) {
|
||||
args := []interface{}{
|
||||
"component", "lifecycle",
|
||||
"vm_uid", vm.UID,
|
||||
"vm_name", vm.Name,
|
||||
"vm_restart_count", vm.RestartCount,
|
||||
"vm_image", vm.Image,
|
||||
"vm_status", vm.Status,
|
||||
}
|
||||
|
||||
if vm.ScheduledAt.IsZero() {
|
||||
// VM was never scheduled
|
||||
args = append(args, "vm_scheduling_duration", time.Since(vm.CreatedAt))
|
||||
} else {
|
||||
args = append(args, "vm_scheduling_duration", vm.ScheduledAt.Sub(vm.CreatedAt))
|
||||
}
|
||||
|
||||
if vm.StartedAt.IsZero() {
|
||||
// VM was never started
|
||||
args = append(args, "vm_run_duration", time.Duration(0))
|
||||
} else {
|
||||
args = append(args, "vm_run_duration", time.Since(vm.StartedAt))
|
||||
}
|
||||
|
||||
logger.With(args...).Info(message)
|
||||
}
|
||||
|
|
@ -2,6 +2,7 @@ package scheduler
|
|||
|
||||
import (
|
||||
"context"
|
||||
"github.com/cirruslabs/orchard/internal/controller/lifecycle"
|
||||
"github.com/cirruslabs/orchard/internal/controller/notifier"
|
||||
storepkg "github.com/cirruslabs/orchard/internal/controller/store"
|
||||
"github.com/cirruslabs/orchard/internal/opentelemetry"
|
||||
|
|
@ -141,6 +142,7 @@ func (scheduler *Scheduler) schedulingLoopIteration() error {
|
|||
time.Since(unscheduledVM.CreatedAt).Seconds())
|
||||
|
||||
unscheduledVM.Worker = worker.Name
|
||||
unscheduledVM.ScheduledAt = time.Now()
|
||||
|
||||
if err := txn.SetVM(unscheduledVM); err != nil {
|
||||
return err
|
||||
|
|
@ -240,11 +242,15 @@ func (scheduler *Scheduler) healthCheckVM(txn storepkg.Transaction, nameToWorker
|
|||
if needsRestart {
|
||||
logger.Debugf("restarting VM")
|
||||
|
||||
lifecycle.Report(&vm, "VM restarted", scheduler.logger)
|
||||
|
||||
vm.Status = v1.VMStatusPending
|
||||
vm.StatusMessage = ""
|
||||
vm.Worker = ""
|
||||
vm.RestartedAt = time.Now()
|
||||
vm.RestartCount++
|
||||
vm.ScheduledAt = time.Time{}
|
||||
vm.StartedAt = time.Time{}
|
||||
|
||||
return txn.SetVM(vm)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -57,6 +57,9 @@ type VM struct {
|
|||
// by the worker using "tart fqn" command after it had pulled the image.
|
||||
ImageFQN string `json:"image_fqn,omitempty"`
|
||||
|
||||
ScheduledAt time.Time `json:"scheduled_at,omitempty"`
|
||||
StartedAt time.Time `json:"started_at,omitempty"`
|
||||
|
||||
Meta
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue