diff --git a/pkg/cluster/pod.go b/pkg/cluster/pod.go index 959bacb54..6658ba414 100644 --- a/pkg/cluster/pod.go +++ b/pkg/cluster/pod.go @@ -376,6 +376,36 @@ func (c *Cluster) getPatroniMemberData(pod *v1.Pod) (patroni.MemberData, error) return memberData, nil } +// podIsNotRunning returns true if a pod is known to be in a non-running state, +// e.g. stuck in CreateContainerConfigError, CrashLoopBackOff, ImagePullBackOff, etc. +// Pods with no status information are not considered non-running, as they may +// simply not have reported status yet. +func podIsNotRunning(pod *v1.Pod) bool { + if pod.Status.Phase == "" { + // No status reported yet — don't treat as non-running + return false + } + if pod.Status.Phase != v1.PodRunning { + return true + } + for _, cs := range pod.Status.ContainerStatuses { + if cs.State.Waiting != nil || cs.State.Terminated != nil { + return true + } + } + return false +} + +// allPodsRunning returns true only if every pod in the list is in a healthy running state. +func (c *Cluster) allPodsRunning(pods []v1.Pod) bool { + for i := range pods { + if podIsNotRunning(&pods[i]) { + return false + } + } + return true +} + func (c *Cluster) recreatePod(podName spec.NamespacedName) (*v1.Pod, error) { stopCh := make(chan struct{}) ch := c.registerPodSubscriber(podName) @@ -444,7 +474,8 @@ func (c *Cluster) recreatePods(pods []v1.Pod, switchoverCandidates []spec.Namesp // switchover if // 1. we have not observed a new master pod when re-creating former replicas // 2. we know possible switchover targets even when no replicas were recreated - if newMasterPod == nil && len(replicas) > 0 { + // 3. the master pod is actually running (can't switchover a dead master) + if newMasterPod == nil && len(replicas) > 0 && !podIsNotRunning(masterPod) { masterCandidate, err := c.getSwitchoverCandidate(masterPod) if err != nil { // do not recreate master now so it will keep the update flag and switchover will be retried on next sync @@ -455,6 +486,9 @@ func (c *Cluster) recreatePods(pods []v1.Pod, switchoverCandidates []spec.Namesp } } else if newMasterPod == nil && len(replicas) == 0 { c.logger.Warningf("cannot perform switch over before re-creating the pod: no replicas") + } else if podIsNotRunning(masterPod) { + c.logger.Warningf("master pod %q is not running, skipping switchover and recreating directly", + util.NameFromMeta(masterPod.ObjectMeta)) } c.logger.Infof("recreating old master pod %q", util.NameFromMeta(masterPod.ObjectMeta)) diff --git a/pkg/cluster/pod_test.go b/pkg/cluster/pod_test.go index 6816b4d7a..6ab3f9207 100644 --- a/pkg/cluster/pod_test.go +++ b/pkg/cluster/pod_test.go @@ -15,6 +15,7 @@ import ( "github.com/zalando/postgres-operator/pkg/util/config" "github.com/zalando/postgres-operator/pkg/util/k8sutil" "github.com/zalando/postgres-operator/pkg/util/patroni" + v1 "k8s.io/api/core/v1" ) func TestGetSwitchoverCandidate(t *testing.T) { @@ -112,3 +113,302 @@ func TestGetSwitchoverCandidate(t *testing.T) { } } } + +func TestPodIsNotRunning(t *testing.T) { + tests := []struct { + subtest string + pod v1.Pod + expected bool + }{ + { + subtest: "pod with no status reported yet", + pod: v1.Pod{ + Status: v1.PodStatus{}, + }, + expected: false, + }, + { + subtest: "pod running with all containers ready", + pod: v1.Pod{ + Status: v1.PodStatus{ + Phase: v1.PodRunning, + ContainerStatuses: []v1.ContainerStatus{ + { + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + }, + }, + }, + expected: false, + }, + { + subtest: "pod in pending phase", + pod: v1.Pod{ + Status: v1.PodStatus{ + Phase: v1.PodPending, + }, + }, + expected: true, + }, + { + subtest: "pod running but container in CreateContainerConfigError", + pod: v1.Pod{ + Status: v1.PodStatus{ + Phase: v1.PodRunning, + ContainerStatuses: []v1.ContainerStatus{ + { + State: v1.ContainerState{ + Waiting: &v1.ContainerStateWaiting{ + Reason: "CreateContainerConfigError", + Message: `secret "some-secret" not found`, + }, + }, + }, + }, + }, + }, + expected: true, + }, + { + subtest: "pod running but container in CrashLoopBackOff", + pod: v1.Pod{ + Status: v1.PodStatus{ + Phase: v1.PodRunning, + ContainerStatuses: []v1.ContainerStatus{ + { + State: v1.ContainerState{ + Waiting: &v1.ContainerStateWaiting{ + Reason: "CrashLoopBackOff", + }, + }, + }, + }, + }, + }, + expected: true, + }, + { + subtest: "pod running but container terminated", + pod: v1.Pod{ + Status: v1.PodStatus{ + Phase: v1.PodRunning, + ContainerStatuses: []v1.ContainerStatus{ + { + State: v1.ContainerState{ + Terminated: &v1.ContainerStateTerminated{ + ExitCode: 137, + }, + }, + }, + }, + }, + }, + expected: true, + }, + { + subtest: "pod running with mixed container states - one healthy one broken", + pod: v1.Pod{ + Status: v1.PodStatus{ + Phase: v1.PodRunning, + ContainerStatuses: []v1.ContainerStatus{ + { + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + { + State: v1.ContainerState{ + Waiting: &v1.ContainerStateWaiting{ + Reason: "CreateContainerConfigError", + }, + }, + }, + }, + }, + }, + expected: true, + }, + { + subtest: "pod in failed phase", + pod: v1.Pod{ + Status: v1.PodStatus{ + Phase: v1.PodFailed, + }, + }, + expected: true, + }, + { + subtest: "pod running with multiple healthy containers", + pod: v1.Pod{ + Status: v1.PodStatus{ + Phase: v1.PodRunning, + ContainerStatuses: []v1.ContainerStatus{ + { + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + { + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + }, + }, + }, + expected: false, + }, + { + subtest: "pod running with ImagePullBackOff", + pod: v1.Pod{ + Status: v1.PodStatus{ + Phase: v1.PodRunning, + ContainerStatuses: []v1.ContainerStatus{ + { + State: v1.ContainerState{ + Waiting: &v1.ContainerStateWaiting{ + Reason: "ImagePullBackOff", + }, + }, + }, + }, + }, + }, + expected: true, + }, + } + + for _, tt := range tests { + t.Run(tt.subtest, func(t *testing.T) { + result := podIsNotRunning(&tt.pod) + if result != tt.expected { + t.Errorf("podIsNotRunning() = %v, expected %v", result, tt.expected) + } + }) + } +} + +func TestAllPodsRunning(t *testing.T) { + client, _ := newFakeK8sSyncClient() + + var cluster = New( + Config{ + OpConfig: config.Config{ + Resources: config.Resources{ + ClusterLabels: map[string]string{"application": "spilo"}, + ClusterNameLabel: "cluster-name", + PodRoleLabel: "spilo-role", + }, + }, + }, client, acidv1.Postgresql{}, logger, eventRecorder) + + tests := []struct { + subtest string + pods []v1.Pod + expected bool + }{ + { + subtest: "all pods running", + pods: []v1.Pod{ + { + Status: v1.PodStatus{ + Phase: v1.PodRunning, + ContainerStatuses: []v1.ContainerStatus{ + {State: v1.ContainerState{Running: &v1.ContainerStateRunning{}}}, + }, + }, + }, + { + Status: v1.PodStatus{ + Phase: v1.PodRunning, + ContainerStatuses: []v1.ContainerStatus{ + {State: v1.ContainerState{Running: &v1.ContainerStateRunning{}}}, + }, + }, + }, + }, + expected: true, + }, + { + subtest: "one pod not running", + pods: []v1.Pod{ + { + Status: v1.PodStatus{ + Phase: v1.PodRunning, + ContainerStatuses: []v1.ContainerStatus{ + {State: v1.ContainerState{Running: &v1.ContainerStateRunning{}}}, + }, + }, + }, + { + Status: v1.PodStatus{ + Phase: v1.PodRunning, + ContainerStatuses: []v1.ContainerStatus{ + { + State: v1.ContainerState{ + Waiting: &v1.ContainerStateWaiting{ + Reason: "CreateContainerConfigError", + }, + }, + }, + }, + }, + }, + }, + expected: false, + }, + { + subtest: "all pods not running", + pods: []v1.Pod{ + { + Status: v1.PodStatus{ + Phase: v1.PodPending, + }, + }, + { + Status: v1.PodStatus{ + Phase: v1.PodRunning, + ContainerStatuses: []v1.ContainerStatus{ + { + State: v1.ContainerState{ + Waiting: &v1.ContainerStateWaiting{ + Reason: "CrashLoopBackOff", + }, + }, + }, + }, + }, + }, + }, + expected: false, + }, + { + subtest: "empty pod list", + pods: []v1.Pod{}, + expected: true, + }, + { + subtest: "pods with no status reported yet", + pods: []v1.Pod{ + { + Status: v1.PodStatus{}, + }, + { + Status: v1.PodStatus{}, + }, + }, + expected: true, + }, + } + + for _, tt := range tests { + t.Run(tt.subtest, func(t *testing.T) { + result := cluster.allPodsRunning(tt.pods) + if result != tt.expected { + t.Errorf("allPodsRunning() = %v, expected %v", result, tt.expected) + } + }) + } +} diff --git a/pkg/cluster/sync.go b/pkg/cluster/sync.go index ffebd306c..771965eba 100644 --- a/pkg/cluster/sync.go +++ b/pkg/cluster/sync.go @@ -718,14 +718,26 @@ func (c *Cluster) syncStatefulSet() error { if configPatched, restartPrimaryFirst, restartWait, err = c.syncPatroniConfig(pods, c.Spec.Patroni, requiredPgParameters); err != nil { c.logger.Warningf("Patroni config updated? %v - errors during config sync: %v", configPatched, err) postponeReasons = append(postponeReasons, "errors during Patroni config sync") - isSafeToRecreatePods = false + // Only mark unsafe if all pods are running. If some pods are not running, + // Patroni API errors are expected and should not block pod recreation, + // which is the only way to fix non-running pods. + if c.allPodsRunning(pods) { + isSafeToRecreatePods = false + } else { + c.logger.Warningf("ignoring Patroni config sync errors because some pods are not running") + } } // restart Postgres where it is still pending if err = c.restartInstances(pods, restartWait, restartPrimaryFirst); err != nil { c.logger.Errorf("errors while restarting Postgres in pods via Patroni API: %v", err) postponeReasons = append(postponeReasons, "errors while restarting Postgres via Patroni API") - isSafeToRecreatePods = false + // Same logic: don't let unreachable non-running pods block recreation. + if c.allPodsRunning(pods) { + isSafeToRecreatePods = false + } else { + c.logger.Warningf("ignoring Patroni restart errors because some pods are not running") + } } // if we get here we also need to re-create the pods (either leftovers from the old