initial implementation
This commit is contained in:
parent
51909204fd
commit
e9486f8325
|
|
@ -62,6 +62,8 @@ spec:
|
||||||
type: string
|
type: string
|
||||||
enable_crd_validation:
|
enable_crd_validation:
|
||||||
type: boolean
|
type: boolean
|
||||||
|
enable_lazy_image_upgrade:
|
||||||
|
type: boolean
|
||||||
enable_shm_volume:
|
enable_shm_volume:
|
||||||
type: boolean
|
type: boolean
|
||||||
etcd_host:
|
etcd_host:
|
||||||
|
|
|
||||||
|
|
@ -429,6 +429,10 @@ from numerous escape characters in the latter log entry, view it in CLI with
|
||||||
`PodTemplate` used by the operator is yet to be updated with the default values
|
`PodTemplate` used by the operator is yet to be updated with the default values
|
||||||
used internally in K8s.
|
used internally in K8s.
|
||||||
|
|
||||||
|
The operator also support lazy updates of the Spilo image. That means the pod template of a
|
||||||
|
PG cluster's stateful set is updated immediately with the new image, but no rolling upgrade follows. This feature saves you
|
||||||
|
some downtime when you know pods are re-started after the update anyway, for instance due to the node rotation.
|
||||||
|
|
||||||
## Logical backups
|
## Logical backups
|
||||||
|
|
||||||
The operator can manage K8s cron jobs to run logical backups of Postgres
|
The operator can manage K8s cron jobs to run logical backups of Postgres
|
||||||
|
|
|
||||||
|
|
@ -75,6 +75,10 @@ Those are top-level keys, containing both leaf keys and groups.
|
||||||
[OpenAPI v3 schema validation](https://kubernetes.io/docs/tasks/access-kubernetes-api/custom-resources/custom-resource-definitions/#validation)
|
[OpenAPI v3 schema validation](https://kubernetes.io/docs/tasks/access-kubernetes-api/custom-resources/custom-resource-definitions/#validation)
|
||||||
The default is `true`.
|
The default is `true`.
|
||||||
|
|
||||||
|
* **enable_lazy_image_upgrade**
|
||||||
|
Instruct operator to update only the statefulsets with the new image without immediately doing the rolling update. The assumption is pods will be re-started later with the new image, for example due to the node rotation.
|
||||||
|
The default is `false`.
|
||||||
|
|
||||||
* **etcd_host**
|
* **etcd_host**
|
||||||
Etcd connection string for Patroni defined as `host:port`. Not required when
|
Etcd connection string for Patroni defined as `host:port`. Not required when
|
||||||
Patroni native Kubernetes support is used. The default is empty (use
|
Patroni native Kubernetes support is used. The default is empty (use
|
||||||
|
|
|
||||||
|
|
@ -346,6 +346,61 @@ class EndToEndTestCase(unittest.TestCase):
|
||||||
}
|
}
|
||||||
k8s.update_config(unpatch_custom_service_annotations)
|
k8s.update_config(unpatch_custom_service_annotations)
|
||||||
|
|
||||||
|
@timeout_decorator.timeout(TEST_TIMEOUT_SEC)
|
||||||
|
def test_lazy_image_update(self):
|
||||||
|
'''
|
||||||
|
Test lazy update for the Spilo image: operator changes a stateful set but lets pods run with the old image
|
||||||
|
until they are recreated for reasons other than operator's activity. That works because the operator uses
|
||||||
|
"onDelete" pod update policy for stateful sets.
|
||||||
|
|
||||||
|
The test covers:
|
||||||
|
1) enabling lazy upgrade in existing operator deployment
|
||||||
|
2) forcing the normal rolling upgrade by changing the operator configmap and restarting its pod
|
||||||
|
'''
|
||||||
|
|
||||||
|
k8s = self.k8s
|
||||||
|
pod0 = "acid-minimal-cluster-0"
|
||||||
|
pod1 = "acid-minimal-cluster-1"
|
||||||
|
|
||||||
|
# enable lazy update
|
||||||
|
patch_lazy_image_upgrade = {
|
||||||
|
"data": {
|
||||||
|
"enable_lazy_image_upgrade": "true",
|
||||||
|
"docker_image": "registry.opensource.zalan.do/acid/spilo-cdp-12:1.6-p16"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
k8s.update_config(patch_lazy_image_upgrade)
|
||||||
|
|
||||||
|
# wait for sts update
|
||||||
|
time.sleep(60)
|
||||||
|
|
||||||
|
# restart the pod to get a container with the new image
|
||||||
|
k8s.api.core_v1.delete_namespaced_pod(pod0, "default")
|
||||||
|
time.sleep(60)
|
||||||
|
|
||||||
|
# lazy update works if the restarted pod and older pods have different Spilo versions
|
||||||
|
# i.e. the update did not immediately affect all pods
|
||||||
|
new_image = k8s.get_effective_pod_image(pod0)
|
||||||
|
old_image = k8s.get_effective_pod_image(pod1)
|
||||||
|
self.assertNotEqual(old_image, new_image, "Lazy updated failed: pods have the same image {}".format(new_image))
|
||||||
|
|
||||||
|
# clean up
|
||||||
|
unpatch_lazy_image_upgrade = {
|
||||||
|
"data": {
|
||||||
|
"enable_lazy_image_upgrade": "false",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
k8s.update_config(unpatch_lazy_image_upgrade)
|
||||||
|
|
||||||
|
# at this point operator will complete the normal rolling update
|
||||||
|
# so we additonally test if disabling the lazy update (forcing the normal rolling update) works
|
||||||
|
time.sleep(60)
|
||||||
|
|
||||||
|
image0 = k8s.get_effective_pod_image(pod0)
|
||||||
|
image1 = k8s.get_effective_pod_image(pod1)
|
||||||
|
|
||||||
|
self.assertEqual(image0, image1, "Disabling lazy updated failed: pods still have different images {} and {}".format(image0, image1))
|
||||||
|
|
||||||
def assert_master_is_unique(self, namespace='default', clusterName="acid-minimal-cluster"):
|
def assert_master_is_unique(self, namespace='default', clusterName="acid-minimal-cluster"):
|
||||||
'''
|
'''
|
||||||
Check that there is a single pod in the k8s cluster with the label "spilo-role=master"
|
Check that there is a single pod in the k8s cluster with the label "spilo-role=master"
|
||||||
|
|
@ -481,6 +536,14 @@ class K8s:
|
||||||
def create_with_kubectl(self, path):
|
def create_with_kubectl(self, path):
|
||||||
subprocess.run(["kubectl", "create", "-f", path])
|
subprocess.run(["kubectl", "create", "-f", path])
|
||||||
|
|
||||||
|
def get_effective_pod_image(self, pod_name, namespace = 'default'):
|
||||||
|
'''
|
||||||
|
Get the Spilo image pod currently uses. In case of lazy rolling updates
|
||||||
|
it may differ from the one specified in the stateful set.
|
||||||
|
'''
|
||||||
|
pod = self.api.core_v1.list_namespaced_pod(
|
||||||
|
namespace, label_selector="statefulset.kubernetes.io/pod-name=" + pod_name)
|
||||||
|
return pod.items[0].spec.containers[0].image
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
||||||
|
|
@ -24,6 +24,7 @@ data:
|
||||||
# enable_crd_validation: "true"
|
# enable_crd_validation: "true"
|
||||||
# enable_database_access: "true"
|
# enable_database_access: "true"
|
||||||
# enable_init_containers: "true"
|
# enable_init_containers: "true"
|
||||||
|
# enable_lazy_image_upgrade: "true"
|
||||||
enable_master_load_balancer: "false"
|
enable_master_load_balancer: "false"
|
||||||
# enable_pod_antiaffinity: "false"
|
# enable_pod_antiaffinity: "false"
|
||||||
# enable_pod_disruption_budget: "true"
|
# enable_pod_disruption_budget: "true"
|
||||||
|
|
|
||||||
|
|
@ -38,6 +38,8 @@ spec:
|
||||||
type: string
|
type: string
|
||||||
enable_crd_validation:
|
enable_crd_validation:
|
||||||
type: boolean
|
type: boolean
|
||||||
|
enable_lazy_image_upgrade:
|
||||||
|
type: boolean
|
||||||
enable_shm_volume:
|
enable_shm_volume:
|
||||||
type: boolean
|
type: boolean
|
||||||
etcd_host:
|
etcd_host:
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@ metadata:
|
||||||
name: postgresql-operator-default-configuration
|
name: postgresql-operator-default-configuration
|
||||||
configuration:
|
configuration:
|
||||||
# enable_crd_validation: true
|
# enable_crd_validation: true
|
||||||
|
# enable_lazy_image_upgrade: true
|
||||||
etcd_host: ""
|
etcd_host: ""
|
||||||
docker_image: registry.opensource.zalan.do/acid/spilo-12:1.6-p2
|
docker_image: registry.opensource.zalan.do/acid/spilo-12:1.6-p2
|
||||||
# enable_shm_volume: true
|
# enable_shm_volume: true
|
||||||
|
|
|
||||||
|
|
@ -409,6 +409,14 @@ func (c *Cluster) compareStatefulSetWith(statefulSet *appsv1.StatefulSet) *compa
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// lazy Spilo update: modify the image in the statefulset itself but let its pods run with the old image
|
||||||
|
// until they are re-created for other reasons, for example node rotation
|
||||||
|
if c.OpConfig.EnableLazyImageUpgrade && !reflect.DeepEqual(c.Statefulset.Spec.Template.Spec.Containers[0].Image, statefulSet.Spec.Template.Spec.Containers[0].Image) {
|
||||||
|
needsReplace = true
|
||||||
|
needsRollUpdate = false
|
||||||
|
reasons = append(reasons, "lazy Spilo update: new statefulset's pod image doesn't match the current one")
|
||||||
|
}
|
||||||
|
|
||||||
if needsRollUpdate || needsReplace {
|
if needsRollUpdate || needsReplace {
|
||||||
match = false
|
match = false
|
||||||
}
|
}
|
||||||
|
|
@ -440,8 +448,6 @@ func (c *Cluster) compareContainers(description string, setA, setB []v1.Containe
|
||||||
checks := []containerCheck{
|
checks := []containerCheck{
|
||||||
newCheck("new statefulset %s's %s (index %d) name doesn't match the current one",
|
newCheck("new statefulset %s's %s (index %d) name doesn't match the current one",
|
||||||
func(a, b v1.Container) bool { return a.Name != b.Name }),
|
func(a, b v1.Container) bool { return a.Name != b.Name }),
|
||||||
newCheck("new statefulset %s's %s (index %d) image doesn't match the current one",
|
|
||||||
func(a, b v1.Container) bool { return a.Image != b.Image }),
|
|
||||||
newCheck("new statefulset %s's %s (index %d) ports don't match the current one",
|
newCheck("new statefulset %s's %s (index %d) ports don't match the current one",
|
||||||
func(a, b v1.Container) bool { return !reflect.DeepEqual(a.Ports, b.Ports) }),
|
func(a, b v1.Container) bool { return !reflect.DeepEqual(a.Ports, b.Ports) }),
|
||||||
newCheck("new statefulset %s's %s (index %d) resources don't match the current ones",
|
newCheck("new statefulset %s's %s (index %d) resources don't match the current ones",
|
||||||
|
|
@ -452,6 +458,11 @@ func (c *Cluster) compareContainers(description string, setA, setB []v1.Containe
|
||||||
func(a, b v1.Container) bool { return !reflect.DeepEqual(a.EnvFrom, b.EnvFrom) }),
|
func(a, b v1.Container) bool { return !reflect.DeepEqual(a.EnvFrom, b.EnvFrom) }),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if !c.OpConfig.EnableLazyImageUpgrade {
|
||||||
|
checks = append(checks, newCheck("new statefulset %s's %s (index %d) image doesn't match the current one",
|
||||||
|
func(a, b v1.Container) bool { return a.Image != b.Image }))
|
||||||
|
}
|
||||||
|
|
||||||
for index, containerA := range setA {
|
for index, containerA := range setA {
|
||||||
containerB := setB[index]
|
containerB := setB[index]
|
||||||
for _, check := range checks {
|
for _, check := range checks {
|
||||||
|
|
|
||||||
|
|
@ -3,17 +3,17 @@ package cluster
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
batchv1beta1 "k8s.io/api/batch/v1beta1"
|
|
||||||
v1 "k8s.io/api/core/v1"
|
|
||||||
policybeta1 "k8s.io/api/policy/v1beta1"
|
|
||||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
||||||
|
|
||||||
acidv1 "github.com/zalando/postgres-operator/pkg/apis/acid.zalan.do/v1"
|
acidv1 "github.com/zalando/postgres-operator/pkg/apis/acid.zalan.do/v1"
|
||||||
"github.com/zalando/postgres-operator/pkg/spec"
|
"github.com/zalando/postgres-operator/pkg/spec"
|
||||||
"github.com/zalando/postgres-operator/pkg/util"
|
"github.com/zalando/postgres-operator/pkg/util"
|
||||||
"github.com/zalando/postgres-operator/pkg/util/constants"
|
"github.com/zalando/postgres-operator/pkg/util/constants"
|
||||||
"github.com/zalando/postgres-operator/pkg/util/k8sutil"
|
"github.com/zalando/postgres-operator/pkg/util/k8sutil"
|
||||||
"github.com/zalando/postgres-operator/pkg/util/volumes"
|
"github.com/zalando/postgres-operator/pkg/util/volumes"
|
||||||
|
appsv1 "k8s.io/api/apps/v1"
|
||||||
|
batchv1beta1 "k8s.io/api/batch/v1beta1"
|
||||||
|
v1 "k8s.io/api/core/v1"
|
||||||
|
policybeta1 "k8s.io/api/policy/v1beta1"
|
||||||
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Sync syncs the cluster, making sure the actual Kubernetes objects correspond to what is defined in the manifest.
|
// Sync syncs the cluster, making sure the actual Kubernetes objects correspond to what is defined in the manifest.
|
||||||
|
|
@ -244,6 +244,32 @@ func (c *Cluster) syncPodDisruptionBudget(isUpdate bool) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *Cluster) mustUpdatePodsAfterLazyUpdate(desiredSset *appsv1.StatefulSet) (bool, error) {
|
||||||
|
|
||||||
|
if c.OpConfig.EnableLazyImageUpgrade {
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
pods, err := c.listPods()
|
||||||
|
if err != nil {
|
||||||
|
return false, fmt.Errorf("could not list pods of the statefulset: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, pod := range pods {
|
||||||
|
|
||||||
|
effectivePodImage := pod.Spec.Containers[0].Image
|
||||||
|
ssImage := desiredSset.Spec.Template.Spec.Containers[0].Image
|
||||||
|
|
||||||
|
if ssImage != effectivePodImage {
|
||||||
|
c.logger.Infof("not all pods were re-started when the lazy upgrade was enabled; forcing the rolling upgrade now")
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
|
||||||
func (c *Cluster) syncStatefulSet() error {
|
func (c *Cluster) syncStatefulSet() error {
|
||||||
var (
|
var (
|
||||||
podsRollingUpdateRequired bool
|
podsRollingUpdateRequired bool
|
||||||
|
|
@ -310,6 +336,19 @@ func (c *Cluster) syncStatefulSet() error {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if !podsRollingUpdateRequired {
|
||||||
|
// even if desired and actual statefulsets match
|
||||||
|
// there still may be not up-to-date pods on condition
|
||||||
|
// (a) the lazy update was just disabled
|
||||||
|
// and
|
||||||
|
// (b) some of the pods were not restarted when the lazy update was still in place
|
||||||
|
podsRollingUpdateRequired, err = c.mustUpdatePodsAfterLazyUpdate(desiredSS)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("could not list pods of the statefulset: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Apply special PostgreSQL parameters that can only be set via the Patroni API.
|
// Apply special PostgreSQL parameters that can only be set via the Patroni API.
|
||||||
|
|
|
||||||
|
|
@ -137,6 +137,7 @@ type Config struct {
|
||||||
ProtectedRoles []string `name:"protected_role_names" default:"admin"`
|
ProtectedRoles []string `name:"protected_role_names" default:"admin"`
|
||||||
PostgresSuperuserTeams []string `name:"postgres_superuser_teams" default:""`
|
PostgresSuperuserTeams []string `name:"postgres_superuser_teams" default:""`
|
||||||
SetMemoryRequestToLimit bool `name:"set_memory_request_to_limit" default:"false"`
|
SetMemoryRequestToLimit bool `name:"set_memory_request_to_limit" default:"false"`
|
||||||
|
EnableLazyImageUpgrade bool `name:"enable_lazy_image_upgrade" default:"false"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// MustMarshal marshals the config or panics
|
// MustMarshal marshals the config or panics
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue