initial implementation
This commit is contained in:
parent
51909204fd
commit
e9486f8325
|
|
@ -62,6 +62,8 @@ spec:
|
|||
type: string
|
||||
enable_crd_validation:
|
||||
type: boolean
|
||||
enable_lazy_image_upgrade:
|
||||
type: boolean
|
||||
enable_shm_volume:
|
||||
type: boolean
|
||||
etcd_host:
|
||||
|
|
|
|||
|
|
@ -429,6 +429,10 @@ from numerous escape characters in the latter log entry, view it in CLI with
|
|||
`PodTemplate` used by the operator is yet to be updated with the default values
|
||||
used internally in K8s.
|
||||
|
||||
The operator also support lazy updates of the Spilo image. That means the pod template of a
|
||||
PG cluster's stateful set is updated immediately with the new image, but no rolling upgrade follows. This feature saves you
|
||||
some downtime when you know pods are re-started after the update anyway, for instance due to the node rotation.
|
||||
|
||||
## Logical backups
|
||||
|
||||
The operator can manage K8s cron jobs to run logical backups of Postgres
|
||||
|
|
|
|||
|
|
@ -75,6 +75,10 @@ Those are top-level keys, containing both leaf keys and groups.
|
|||
[OpenAPI v3 schema validation](https://kubernetes.io/docs/tasks/access-kubernetes-api/custom-resources/custom-resource-definitions/#validation)
|
||||
The default is `true`.
|
||||
|
||||
* **enable_lazy_image_upgrade**
|
||||
Instruct operator to update only the statefulsets with the new image without immediately doing the rolling update. The assumption is pods will be re-started later with the new image, for example due to the node rotation.
|
||||
The default is `false`.
|
||||
|
||||
* **etcd_host**
|
||||
Etcd connection string for Patroni defined as `host:port`. Not required when
|
||||
Patroni native Kubernetes support is used. The default is empty (use
|
||||
|
|
|
|||
|
|
@ -346,6 +346,61 @@ class EndToEndTestCase(unittest.TestCase):
|
|||
}
|
||||
k8s.update_config(unpatch_custom_service_annotations)
|
||||
|
||||
@timeout_decorator.timeout(TEST_TIMEOUT_SEC)
|
||||
def test_lazy_image_update(self):
|
||||
'''
|
||||
Test lazy update for the Spilo image: operator changes a stateful set but lets pods run with the old image
|
||||
until they are recreated for reasons other than operator's activity. That works because the operator uses
|
||||
"onDelete" pod update policy for stateful sets.
|
||||
|
||||
The test covers:
|
||||
1) enabling lazy upgrade in existing operator deployment
|
||||
2) forcing the normal rolling upgrade by changing the operator configmap and restarting its pod
|
||||
'''
|
||||
|
||||
k8s = self.k8s
|
||||
pod0 = "acid-minimal-cluster-0"
|
||||
pod1 = "acid-minimal-cluster-1"
|
||||
|
||||
# enable lazy update
|
||||
patch_lazy_image_upgrade = {
|
||||
"data": {
|
||||
"enable_lazy_image_upgrade": "true",
|
||||
"docker_image": "registry.opensource.zalan.do/acid/spilo-cdp-12:1.6-p16"
|
||||
}
|
||||
}
|
||||
k8s.update_config(patch_lazy_image_upgrade)
|
||||
|
||||
# wait for sts update
|
||||
time.sleep(60)
|
||||
|
||||
# restart the pod to get a container with the new image
|
||||
k8s.api.core_v1.delete_namespaced_pod(pod0, "default")
|
||||
time.sleep(60)
|
||||
|
||||
# lazy update works if the restarted pod and older pods have different Spilo versions
|
||||
# i.e. the update did not immediately affect all pods
|
||||
new_image = k8s.get_effective_pod_image(pod0)
|
||||
old_image = k8s.get_effective_pod_image(pod1)
|
||||
self.assertNotEqual(old_image, new_image, "Lazy updated failed: pods have the same image {}".format(new_image))
|
||||
|
||||
# clean up
|
||||
unpatch_lazy_image_upgrade = {
|
||||
"data": {
|
||||
"enable_lazy_image_upgrade": "false",
|
||||
}
|
||||
}
|
||||
k8s.update_config(unpatch_lazy_image_upgrade)
|
||||
|
||||
# at this point operator will complete the normal rolling update
|
||||
# so we additonally test if disabling the lazy update (forcing the normal rolling update) works
|
||||
time.sleep(60)
|
||||
|
||||
image0 = k8s.get_effective_pod_image(pod0)
|
||||
image1 = k8s.get_effective_pod_image(pod1)
|
||||
|
||||
self.assertEqual(image0, image1, "Disabling lazy updated failed: pods still have different images {} and {}".format(image0, image1))
|
||||
|
||||
def assert_master_is_unique(self, namespace='default', clusterName="acid-minimal-cluster"):
|
||||
'''
|
||||
Check that there is a single pod in the k8s cluster with the label "spilo-role=master"
|
||||
|
|
@ -481,6 +536,14 @@ class K8s:
|
|||
def create_with_kubectl(self, path):
|
||||
subprocess.run(["kubectl", "create", "-f", path])
|
||||
|
||||
def get_effective_pod_image(self, pod_name, namespace = 'default'):
|
||||
'''
|
||||
Get the Spilo image pod currently uses. In case of lazy rolling updates
|
||||
it may differ from the one specified in the stateful set.
|
||||
'''
|
||||
pod = self.api.core_v1.list_namespaced_pod(
|
||||
namespace, label_selector="statefulset.kubernetes.io/pod-name=" + pod_name)
|
||||
return pod.items[0].spec.containers[0].image
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@ data:
|
|||
# enable_crd_validation: "true"
|
||||
# enable_database_access: "true"
|
||||
# enable_init_containers: "true"
|
||||
# enable_lazy_image_upgrade: "true"
|
||||
enable_master_load_balancer: "false"
|
||||
# enable_pod_antiaffinity: "false"
|
||||
# enable_pod_disruption_budget: "true"
|
||||
|
|
|
|||
|
|
@ -38,6 +38,8 @@ spec:
|
|||
type: string
|
||||
enable_crd_validation:
|
||||
type: boolean
|
||||
enable_lazy_image_upgrade:
|
||||
type: boolean
|
||||
enable_shm_volume:
|
||||
type: boolean
|
||||
etcd_host:
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ metadata:
|
|||
name: postgresql-operator-default-configuration
|
||||
configuration:
|
||||
# enable_crd_validation: true
|
||||
# enable_lazy_image_upgrade: true
|
||||
etcd_host: ""
|
||||
docker_image: registry.opensource.zalan.do/acid/spilo-12:1.6-p2
|
||||
# enable_shm_volume: true
|
||||
|
|
|
|||
|
|
@ -409,6 +409,14 @@ func (c *Cluster) compareStatefulSetWith(statefulSet *appsv1.StatefulSet) *compa
|
|||
}
|
||||
}
|
||||
|
||||
// lazy Spilo update: modify the image in the statefulset itself but let its pods run with the old image
|
||||
// until they are re-created for other reasons, for example node rotation
|
||||
if c.OpConfig.EnableLazyImageUpgrade && !reflect.DeepEqual(c.Statefulset.Spec.Template.Spec.Containers[0].Image, statefulSet.Spec.Template.Spec.Containers[0].Image) {
|
||||
needsReplace = true
|
||||
needsRollUpdate = false
|
||||
reasons = append(reasons, "lazy Spilo update: new statefulset's pod image doesn't match the current one")
|
||||
}
|
||||
|
||||
if needsRollUpdate || needsReplace {
|
||||
match = false
|
||||
}
|
||||
|
|
@ -440,8 +448,6 @@ func (c *Cluster) compareContainers(description string, setA, setB []v1.Containe
|
|||
checks := []containerCheck{
|
||||
newCheck("new statefulset %s's %s (index %d) name doesn't match the current one",
|
||||
func(a, b v1.Container) bool { return a.Name != b.Name }),
|
||||
newCheck("new statefulset %s's %s (index %d) image doesn't match the current one",
|
||||
func(a, b v1.Container) bool { return a.Image != b.Image }),
|
||||
newCheck("new statefulset %s's %s (index %d) ports don't match the current one",
|
||||
func(a, b v1.Container) bool { return !reflect.DeepEqual(a.Ports, b.Ports) }),
|
||||
newCheck("new statefulset %s's %s (index %d) resources don't match the current ones",
|
||||
|
|
@ -452,6 +458,11 @@ func (c *Cluster) compareContainers(description string, setA, setB []v1.Containe
|
|||
func(a, b v1.Container) bool { return !reflect.DeepEqual(a.EnvFrom, b.EnvFrom) }),
|
||||
}
|
||||
|
||||
if !c.OpConfig.EnableLazyImageUpgrade {
|
||||
checks = append(checks, newCheck("new statefulset %s's %s (index %d) image doesn't match the current one",
|
||||
func(a, b v1.Container) bool { return a.Image != b.Image }))
|
||||
}
|
||||
|
||||
for index, containerA := range setA {
|
||||
containerB := setB[index]
|
||||
for _, check := range checks {
|
||||
|
|
|
|||
|
|
@ -3,17 +3,17 @@ package cluster
|
|||
import (
|
||||
"fmt"
|
||||
|
||||
batchv1beta1 "k8s.io/api/batch/v1beta1"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
policybeta1 "k8s.io/api/policy/v1beta1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
|
||||
acidv1 "github.com/zalando/postgres-operator/pkg/apis/acid.zalan.do/v1"
|
||||
"github.com/zalando/postgres-operator/pkg/spec"
|
||||
"github.com/zalando/postgres-operator/pkg/util"
|
||||
"github.com/zalando/postgres-operator/pkg/util/constants"
|
||||
"github.com/zalando/postgres-operator/pkg/util/k8sutil"
|
||||
"github.com/zalando/postgres-operator/pkg/util/volumes"
|
||||
appsv1 "k8s.io/api/apps/v1"
|
||||
batchv1beta1 "k8s.io/api/batch/v1beta1"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
policybeta1 "k8s.io/api/policy/v1beta1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
)
|
||||
|
||||
// Sync syncs the cluster, making sure the actual Kubernetes objects correspond to what is defined in the manifest.
|
||||
|
|
@ -244,6 +244,32 @@ func (c *Cluster) syncPodDisruptionBudget(isUpdate bool) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (c *Cluster) mustUpdatePodsAfterLazyUpdate(desiredSset *appsv1.StatefulSet) (bool, error) {
|
||||
|
||||
if c.OpConfig.EnableLazyImageUpgrade {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
pods, err := c.listPods()
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("could not list pods of the statefulset: %v", err)
|
||||
}
|
||||
|
||||
for _, pod := range pods {
|
||||
|
||||
effectivePodImage := pod.Spec.Containers[0].Image
|
||||
ssImage := desiredSset.Spec.Template.Spec.Containers[0].Image
|
||||
|
||||
if ssImage != effectivePodImage {
|
||||
c.logger.Infof("not all pods were re-started when the lazy upgrade was enabled; forcing the rolling upgrade now")
|
||||
return true, nil
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return false, nil
|
||||
}
|
||||
|
||||
func (c *Cluster) syncStatefulSet() error {
|
||||
var (
|
||||
podsRollingUpdateRequired bool
|
||||
|
|
@ -310,6 +336,19 @@ func (c *Cluster) syncStatefulSet() error {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !podsRollingUpdateRequired {
|
||||
// even if desired and actual statefulsets match
|
||||
// there still may be not up-to-date pods on condition
|
||||
// (a) the lazy update was just disabled
|
||||
// and
|
||||
// (b) some of the pods were not restarted when the lazy update was still in place
|
||||
podsRollingUpdateRequired, err = c.mustUpdatePodsAfterLazyUpdate(desiredSS)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not list pods of the statefulset: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Apply special PostgreSQL parameters that can only be set via the Patroni API.
|
||||
|
|
|
|||
|
|
@ -137,6 +137,7 @@ type Config struct {
|
|||
ProtectedRoles []string `name:"protected_role_names" default:"admin"`
|
||||
PostgresSuperuserTeams []string `name:"postgres_superuser_teams" default:""`
|
||||
SetMemoryRequestToLimit bool `name:"set_memory_request_to_limit" default:"false"`
|
||||
EnableLazyImageUpgrade bool `name:"enable_lazy_image_upgrade" default:"false"`
|
||||
}
|
||||
|
||||
// MustMarshal marshals the config or panics
|
||||
|
|
|
|||
Loading…
Reference in New Issue