initial implementation

This commit is contained in:
Sergey Dudoladov 2020-03-09 15:47:57 +01:00
parent 51909204fd
commit e9486f8325
10 changed files with 135 additions and 7 deletions

View File

@ -62,6 +62,8 @@ spec:
type: string type: string
enable_crd_validation: enable_crd_validation:
type: boolean type: boolean
enable_lazy_image_upgrade:
type: boolean
enable_shm_volume: enable_shm_volume:
type: boolean type: boolean
etcd_host: etcd_host:

View File

@ -429,6 +429,10 @@ from numerous escape characters in the latter log entry, view it in CLI with
`PodTemplate` used by the operator is yet to be updated with the default values `PodTemplate` used by the operator is yet to be updated with the default values
used internally in K8s. used internally in K8s.
The operator also support lazy updates of the Spilo image. That means the pod template of a
PG cluster's stateful set is updated immediately with the new image, but no rolling upgrade follows. This feature saves you
some downtime when you know pods are re-started after the update anyway, for instance due to the node rotation.
## Logical backups ## Logical backups
The operator can manage K8s cron jobs to run logical backups of Postgres The operator can manage K8s cron jobs to run logical backups of Postgres

View File

@ -75,6 +75,10 @@ Those are top-level keys, containing both leaf keys and groups.
[OpenAPI v3 schema validation](https://kubernetes.io/docs/tasks/access-kubernetes-api/custom-resources/custom-resource-definitions/#validation) [OpenAPI v3 schema validation](https://kubernetes.io/docs/tasks/access-kubernetes-api/custom-resources/custom-resource-definitions/#validation)
The default is `true`. The default is `true`.
* **enable_lazy_image_upgrade**
Instruct operator to update only the statefulsets with the new image without immediately doing the rolling update. The assumption is pods will be re-started later with the new image, for example due to the node rotation.
The default is `false`.
* **etcd_host** * **etcd_host**
Etcd connection string for Patroni defined as `host:port`. Not required when Etcd connection string for Patroni defined as `host:port`. Not required when
Patroni native Kubernetes support is used. The default is empty (use Patroni native Kubernetes support is used. The default is empty (use

View File

@ -346,6 +346,61 @@ class EndToEndTestCase(unittest.TestCase):
} }
k8s.update_config(unpatch_custom_service_annotations) k8s.update_config(unpatch_custom_service_annotations)
@timeout_decorator.timeout(TEST_TIMEOUT_SEC)
def test_lazy_image_update(self):
'''
Test lazy update for the Spilo image: operator changes a stateful set but lets pods run with the old image
until they are recreated for reasons other than operator's activity. That works because the operator uses
"onDelete" pod update policy for stateful sets.
The test covers:
1) enabling lazy upgrade in existing operator deployment
2) forcing the normal rolling upgrade by changing the operator configmap and restarting its pod
'''
k8s = self.k8s
pod0 = "acid-minimal-cluster-0"
pod1 = "acid-minimal-cluster-1"
# enable lazy update
patch_lazy_image_upgrade = {
"data": {
"enable_lazy_image_upgrade": "true",
"docker_image": "registry.opensource.zalan.do/acid/spilo-cdp-12:1.6-p16"
}
}
k8s.update_config(patch_lazy_image_upgrade)
# wait for sts update
time.sleep(60)
# restart the pod to get a container with the new image
k8s.api.core_v1.delete_namespaced_pod(pod0, "default")
time.sleep(60)
# lazy update works if the restarted pod and older pods have different Spilo versions
# i.e. the update did not immediately affect all pods
new_image = k8s.get_effective_pod_image(pod0)
old_image = k8s.get_effective_pod_image(pod1)
self.assertNotEqual(old_image, new_image, "Lazy updated failed: pods have the same image {}".format(new_image))
# clean up
unpatch_lazy_image_upgrade = {
"data": {
"enable_lazy_image_upgrade": "false",
}
}
k8s.update_config(unpatch_lazy_image_upgrade)
# at this point operator will complete the normal rolling update
# so we additonally test if disabling the lazy update (forcing the normal rolling update) works
time.sleep(60)
image0 = k8s.get_effective_pod_image(pod0)
image1 = k8s.get_effective_pod_image(pod1)
self.assertEqual(image0, image1, "Disabling lazy updated failed: pods still have different images {} and {}".format(image0, image1))
def assert_master_is_unique(self, namespace='default', clusterName="acid-minimal-cluster"): def assert_master_is_unique(self, namespace='default', clusterName="acid-minimal-cluster"):
''' '''
Check that there is a single pod in the k8s cluster with the label "spilo-role=master" Check that there is a single pod in the k8s cluster with the label "spilo-role=master"
@ -481,6 +536,14 @@ class K8s:
def create_with_kubectl(self, path): def create_with_kubectl(self, path):
subprocess.run(["kubectl", "create", "-f", path]) subprocess.run(["kubectl", "create", "-f", path])
def get_effective_pod_image(self, pod_name, namespace = 'default'):
'''
Get the Spilo image pod currently uses. In case of lazy rolling updates
it may differ from the one specified in the stateful set.
'''
pod = self.api.core_v1.list_namespaced_pod(
namespace, label_selector="statefulset.kubernetes.io/pod-name=" + pod_name)
return pod.items[0].spec.containers[0].image
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -24,6 +24,7 @@ data:
# enable_crd_validation: "true" # enable_crd_validation: "true"
# enable_database_access: "true" # enable_database_access: "true"
# enable_init_containers: "true" # enable_init_containers: "true"
# enable_lazy_image_upgrade: "true"
enable_master_load_balancer: "false" enable_master_load_balancer: "false"
# enable_pod_antiaffinity: "false" # enable_pod_antiaffinity: "false"
# enable_pod_disruption_budget: "true" # enable_pod_disruption_budget: "true"

View File

@ -38,6 +38,8 @@ spec:
type: string type: string
enable_crd_validation: enable_crd_validation:
type: boolean type: boolean
enable_lazy_image_upgrade:
type: boolean
enable_shm_volume: enable_shm_volume:
type: boolean type: boolean
etcd_host: etcd_host:

View File

@ -4,6 +4,7 @@ metadata:
name: postgresql-operator-default-configuration name: postgresql-operator-default-configuration
configuration: configuration:
# enable_crd_validation: true # enable_crd_validation: true
# enable_lazy_image_upgrade: true
etcd_host: "" etcd_host: ""
docker_image: registry.opensource.zalan.do/acid/spilo-12:1.6-p2 docker_image: registry.opensource.zalan.do/acid/spilo-12:1.6-p2
# enable_shm_volume: true # enable_shm_volume: true

View File

@ -409,6 +409,14 @@ func (c *Cluster) compareStatefulSetWith(statefulSet *appsv1.StatefulSet) *compa
} }
} }
// lazy Spilo update: modify the image in the statefulset itself but let its pods run with the old image
// until they are re-created for other reasons, for example node rotation
if c.OpConfig.EnableLazyImageUpgrade && !reflect.DeepEqual(c.Statefulset.Spec.Template.Spec.Containers[0].Image, statefulSet.Spec.Template.Spec.Containers[0].Image) {
needsReplace = true
needsRollUpdate = false
reasons = append(reasons, "lazy Spilo update: new statefulset's pod image doesn't match the current one")
}
if needsRollUpdate || needsReplace { if needsRollUpdate || needsReplace {
match = false match = false
} }
@ -440,8 +448,6 @@ func (c *Cluster) compareContainers(description string, setA, setB []v1.Containe
checks := []containerCheck{ checks := []containerCheck{
newCheck("new statefulset %s's %s (index %d) name doesn't match the current one", newCheck("new statefulset %s's %s (index %d) name doesn't match the current one",
func(a, b v1.Container) bool { return a.Name != b.Name }), func(a, b v1.Container) bool { return a.Name != b.Name }),
newCheck("new statefulset %s's %s (index %d) image doesn't match the current one",
func(a, b v1.Container) bool { return a.Image != b.Image }),
newCheck("new statefulset %s's %s (index %d) ports don't match the current one", newCheck("new statefulset %s's %s (index %d) ports don't match the current one",
func(a, b v1.Container) bool { return !reflect.DeepEqual(a.Ports, b.Ports) }), func(a, b v1.Container) bool { return !reflect.DeepEqual(a.Ports, b.Ports) }),
newCheck("new statefulset %s's %s (index %d) resources don't match the current ones", newCheck("new statefulset %s's %s (index %d) resources don't match the current ones",
@ -452,6 +458,11 @@ func (c *Cluster) compareContainers(description string, setA, setB []v1.Containe
func(a, b v1.Container) bool { return !reflect.DeepEqual(a.EnvFrom, b.EnvFrom) }), func(a, b v1.Container) bool { return !reflect.DeepEqual(a.EnvFrom, b.EnvFrom) }),
} }
if !c.OpConfig.EnableLazyImageUpgrade {
checks = append(checks, newCheck("new statefulset %s's %s (index %d) image doesn't match the current one",
func(a, b v1.Container) bool { return a.Image != b.Image }))
}
for index, containerA := range setA { for index, containerA := range setA {
containerB := setB[index] containerB := setB[index]
for _, check := range checks { for _, check := range checks {

View File

@ -3,17 +3,17 @@ package cluster
import ( import (
"fmt" "fmt"
batchv1beta1 "k8s.io/api/batch/v1beta1"
v1 "k8s.io/api/core/v1"
policybeta1 "k8s.io/api/policy/v1beta1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
acidv1 "github.com/zalando/postgres-operator/pkg/apis/acid.zalan.do/v1" acidv1 "github.com/zalando/postgres-operator/pkg/apis/acid.zalan.do/v1"
"github.com/zalando/postgres-operator/pkg/spec" "github.com/zalando/postgres-operator/pkg/spec"
"github.com/zalando/postgres-operator/pkg/util" "github.com/zalando/postgres-operator/pkg/util"
"github.com/zalando/postgres-operator/pkg/util/constants" "github.com/zalando/postgres-operator/pkg/util/constants"
"github.com/zalando/postgres-operator/pkg/util/k8sutil" "github.com/zalando/postgres-operator/pkg/util/k8sutil"
"github.com/zalando/postgres-operator/pkg/util/volumes" "github.com/zalando/postgres-operator/pkg/util/volumes"
appsv1 "k8s.io/api/apps/v1"
batchv1beta1 "k8s.io/api/batch/v1beta1"
v1 "k8s.io/api/core/v1"
policybeta1 "k8s.io/api/policy/v1beta1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
) )
// Sync syncs the cluster, making sure the actual Kubernetes objects correspond to what is defined in the manifest. // Sync syncs the cluster, making sure the actual Kubernetes objects correspond to what is defined in the manifest.
@ -244,6 +244,32 @@ func (c *Cluster) syncPodDisruptionBudget(isUpdate bool) error {
return nil return nil
} }
func (c *Cluster) mustUpdatePodsAfterLazyUpdate(desiredSset *appsv1.StatefulSet) (bool, error) {
if c.OpConfig.EnableLazyImageUpgrade {
return false, nil
}
pods, err := c.listPods()
if err != nil {
return false, fmt.Errorf("could not list pods of the statefulset: %v", err)
}
for _, pod := range pods {
effectivePodImage := pod.Spec.Containers[0].Image
ssImage := desiredSset.Spec.Template.Spec.Containers[0].Image
if ssImage != effectivePodImage {
c.logger.Infof("not all pods were re-started when the lazy upgrade was enabled; forcing the rolling upgrade now")
return true, nil
}
}
return false, nil
}
func (c *Cluster) syncStatefulSet() error { func (c *Cluster) syncStatefulSet() error {
var ( var (
podsRollingUpdateRequired bool podsRollingUpdateRequired bool
@ -310,6 +336,19 @@ func (c *Cluster) syncStatefulSet() error {
} }
} }
} }
if !podsRollingUpdateRequired {
// even if desired and actual statefulsets match
// there still may be not up-to-date pods on condition
// (a) the lazy update was just disabled
// and
// (b) some of the pods were not restarted when the lazy update was still in place
podsRollingUpdateRequired, err = c.mustUpdatePodsAfterLazyUpdate(desiredSS)
if err != nil {
return fmt.Errorf("could not list pods of the statefulset: %v", err)
}
}
} }
// Apply special PostgreSQL parameters that can only be set via the Patroni API. // Apply special PostgreSQL parameters that can only be set via the Patroni API.

View File

@ -137,6 +137,7 @@ type Config struct {
ProtectedRoles []string `name:"protected_role_names" default:"admin"` ProtectedRoles []string `name:"protected_role_names" default:"admin"`
PostgresSuperuserTeams []string `name:"postgres_superuser_teams" default:""` PostgresSuperuserTeams []string `name:"postgres_superuser_teams" default:""`
SetMemoryRequestToLimit bool `name:"set_memory_request_to_limit" default:"false"` SetMemoryRequestToLimit bool `name:"set_memory_request_to_limit" default:"false"`
EnableLazyImageUpgrade bool `name:"enable_lazy_image_upgrade" default:"false"`
} }
// MustMarshal marshals the config or panics // MustMarshal marshals the config or panics