From 176c6e8b19820d4779b25ef4d35ccbe546f35b4d Mon Sep 17 00:00:00 2001 From: Oleksii Kliukin Date: Mon, 10 Apr 2017 11:50:26 +0200 Subject: [PATCH] Avoid passing the role into the recreatePod. Conceptually, the operator's task is just to change the pod. As it has no influence over the role the pod will take (either the master or a replica), it shouldn't wait for the specific role. This fixes at least one issue, where the pod running in a single-pod cluster has been waited for forever by the operator expecting it to have a wrong role (since Patroni callback assiging it the original replica role has been killed after a quick promote by the next callback.) --- pkg/cluster/pod.go | 8 ++++---- pkg/cluster/sync.go | 5 ++--- pkg/cluster/util.go | 7 +++++-- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/pkg/cluster/pod.go b/pkg/cluster/pod.go index 5c5409937..60106834c 100644 --- a/pkg/cluster/pod.go +++ b/pkg/cluster/pod.go @@ -112,7 +112,7 @@ func (c *Cluster) registerPodSubscriber(podName spec.PodName) chan spec.PodEvent return ch } -func (c *Cluster) recreatePod(pod v1.Pod, spiloRole string) error { +func (c *Cluster) recreatePod(pod v1.Pod) error { podName := spec.PodName{ Namespace: pod.Namespace, Name: pod.Name, @@ -133,7 +133,7 @@ func (c *Cluster) recreatePod(pod v1.Pod, spiloRole string) error { if err := c.waitForPodDeletion(ch); err != nil { return err } - if err := c.waitForPodLabel(ch, spiloRole); err != nil { + if err := c.waitForPodLabel(ch); err != nil { return err } c.logger.Infof("Pod '%s' is ready", podName) @@ -184,7 +184,7 @@ func (c *Cluster) recreatePods() error { continue } - err = c.recreatePod(pod, "replica") + err = c.recreatePod(pod) if err != nil { return fmt.Errorf("Can't recreate replica Pod '%s': %s", util.NameFromMeta(pod.ObjectMeta), err) } @@ -197,7 +197,7 @@ func (c *Cluster) recreatePods() error { //TODO: specify master, leave new master empty c.logger.Infof("Recreating master Pod '%s'", util.NameFromMeta(masterPod.ObjectMeta)) - if err := c.recreatePod(masterPod, "replica"); err != nil { + if err := c.recreatePod(masterPod); err != nil { return fmt.Errorf("Can't recreate master Pod '%s': %s", util.NameFromMeta(masterPod.ObjectMeta), err) } diff --git a/pkg/cluster/sync.go b/pkg/cluster/sync.go index 24db120fd..e81eec8d1 100644 --- a/pkg/cluster/sync.go +++ b/pkg/cluster/sync.go @@ -152,7 +152,6 @@ func (c *Cluster) syncPods() error { //First check if we have left overs from the previous rolling update for _, pod := range pods.Items { - podRole := util.PodSpiloRole(&pod) podName := spec.PodName{ Namespace: pod.Namespace, Name: pod.Name, @@ -161,7 +160,7 @@ func (c *Cluster) syncPods() error { if match && pod.Status.Phase == v1.PodPending { c.logger.Infof("Waiting for left over Pod '%s'", podName) ch := c.registerPodSubscriber(podName) - c.waitForPodLabel(ch, podRole) + c.waitForPodLabel(ch) c.unregisterPodSubscriber(podName) } } @@ -177,7 +176,7 @@ func (c *Cluster) syncPods() error { if util.PodSpiloRole(&pod) == "master" { //TODO: do manual failover first } - err = c.recreatePod(pod, "replica") // newly created pods are always "replica"s + err = c.recreatePod(pod) c.logger.Infof("Pod '%s' has been successfully recreated", util.NameFromMeta(pod.ObjectMeta)) } diff --git a/pkg/cluster/util.go b/pkg/cluster/util.go index 1223c2ba6..899432f91 100644 --- a/pkg/cluster/util.go +++ b/pkg/cluster/util.go @@ -146,12 +146,15 @@ func (c *Cluster) getTeamMembers() ([]string, error) { return teamInfo.Members, nil } -func (c *Cluster) waitForPodLabel(podEvents chan spec.PodEvent, spiloRole string) error { +func (c *Cluster) waitForPodLabel(podEvents chan spec.PodEvent) error { for { select { case podEvent := <-podEvents: role := util.PodSpiloRole(podEvent.CurPod) - if role == spiloRole { // TODO: newly-created Pods are always replicas => check against empty string only + // We cannot assume any role of the newly created pod. Normally, for a multi-pod cluster + // we should observe the 'replica' value, but it could be that some pods are not allowed + // to promote, therefore, the new pod could be a master as well. + if role == "master" || role == "replica" { return nil } case <-time.After(c.OpConfig.PodLabelWaitTimeout):