add fix to recreate non running pods in syncStatefulsets

This commit is contained in:
Annie Li 2026-03-06 11:27:31 -08:00
parent d495825f4b
commit c5142ee5ec
No known key found for this signature in database
GPG Key ID: 03ABEE8E6189E333
4 changed files with 456 additions and 3 deletions

View File

@ -376,6 +376,30 @@ func (c *Cluster) getPatroniMemberData(pod *v1.Pod) (patroni.MemberData, error)
return memberData, nil
}
// podIsNotRunning returns true if a pod is not in a healthy running state,
// e.g. stuck in CreateContainerConfigError, CrashLoopBackOff, ImagePullBackOff, etc.
func podIsNotRunning(pod *v1.Pod) bool {
if pod.Status.Phase != v1.PodRunning {
return true
}
for _, cs := range pod.Status.ContainerStatuses {
if cs.State.Waiting != nil || cs.State.Terminated != nil {
return true
}
}
return false
}
// allPodsRunning returns true only if every pod in the list is in a healthy running state.
func (c *Cluster) allPodsRunning(pods []v1.Pod) bool {
for i := range pods {
if podIsNotRunning(&pods[i]) {
return false
}
}
return true
}
func (c *Cluster) recreatePod(podName spec.NamespacedName) (*v1.Pod, error) {
stopCh := make(chan struct{})
ch := c.registerPodSubscriber(podName)
@ -444,7 +468,8 @@ func (c *Cluster) recreatePods(pods []v1.Pod, switchoverCandidates []spec.Namesp
// switchover if
// 1. we have not observed a new master pod when re-creating former replicas
// 2. we know possible switchover targets even when no replicas were recreated
if newMasterPod == nil && len(replicas) > 0 {
// 3. the master pod is actually running (can't switchover a dead master)
if newMasterPod == nil && len(replicas) > 0 && !podIsNotRunning(masterPod) {
masterCandidate, err := c.getSwitchoverCandidate(masterPod)
if err != nil {
// do not recreate master now so it will keep the update flag and switchover will be retried on next sync
@ -455,6 +480,9 @@ func (c *Cluster) recreatePods(pods []v1.Pod, switchoverCandidates []spec.Namesp
}
} else if newMasterPod == nil && len(replicas) == 0 {
c.logger.Warningf("cannot perform switch over before re-creating the pod: no replicas")
} else if podIsNotRunning(masterPod) {
c.logger.Warningf("master pod %q is not running, skipping switchover and recreating directly",
util.NameFromMeta(masterPod.ObjectMeta))
}
c.logger.Infof("recreating old master pod %q", util.NameFromMeta(masterPod.ObjectMeta))

View File

@ -112,3 +112,283 @@ func TestGetSwitchoverCandidate(t *testing.T) {
}
}
}
func TestPodIsNotRunning(t *testing.T) {
tests := []struct {
subtest string
pod v1.Pod
expected bool
}{
{
subtest: "pod running with all containers ready",
pod: v1.Pod{
Status: v1.PodStatus{
Phase: v1.PodRunning,
ContainerStatuses: []v1.ContainerStatus{
{
State: v1.ContainerState{
Running: &v1.ContainerStateRunning{},
},
},
},
},
},
expected: false,
},
{
subtest: "pod in pending phase",
pod: v1.Pod{
Status: v1.PodStatus{
Phase: v1.PodPending,
},
},
expected: true,
},
{
subtest: "pod running but container in CreateContainerConfigError",
pod: v1.Pod{
Status: v1.PodStatus{
Phase: v1.PodRunning,
ContainerStatuses: []v1.ContainerStatus{
{
State: v1.ContainerState{
Waiting: &v1.ContainerStateWaiting{
Reason: "CreateContainerConfigError",
Message: `secret "some-secret" not found`,
},
},
},
},
},
},
expected: true,
},
{
subtest: "pod running but container in CrashLoopBackOff",
pod: v1.Pod{
Status: v1.PodStatus{
Phase: v1.PodRunning,
ContainerStatuses: []v1.ContainerStatus{
{
State: v1.ContainerState{
Waiting: &v1.ContainerStateWaiting{
Reason: "CrashLoopBackOff",
},
},
},
},
},
},
expected: true,
},
{
subtest: "pod running but container terminated",
pod: v1.Pod{
Status: v1.PodStatus{
Phase: v1.PodRunning,
ContainerStatuses: []v1.ContainerStatus{
{
State: v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{
ExitCode: 137,
},
},
},
},
},
},
expected: true,
},
{
subtest: "pod running with mixed container states - one healthy one broken",
pod: v1.Pod{
Status: v1.PodStatus{
Phase: v1.PodRunning,
ContainerStatuses: []v1.ContainerStatus{
{
State: v1.ContainerState{
Running: &v1.ContainerStateRunning{},
},
},
{
State: v1.ContainerState{
Waiting: &v1.ContainerStateWaiting{
Reason: "CreateContainerConfigError",
},
},
},
},
},
},
expected: true,
},
{
subtest: "pod in failed phase",
pod: v1.Pod{
Status: v1.PodStatus{
Phase: v1.PodFailed,
},
},
expected: true,
},
{
subtest: "pod running with multiple healthy containers",
pod: v1.Pod{
Status: v1.PodStatus{
Phase: v1.PodRunning,
ContainerStatuses: []v1.ContainerStatus{
{
State: v1.ContainerState{
Running: &v1.ContainerStateRunning{},
},
},
{
State: v1.ContainerState{
Running: &v1.ContainerStateRunning{},
},
},
},
},
},
expected: false,
},
{
subtest: "pod running with ImagePullBackOff",
pod: v1.Pod{
Status: v1.PodStatus{
Phase: v1.PodRunning,
ContainerStatuses: []v1.ContainerStatus{
{
State: v1.ContainerState{
Waiting: &v1.ContainerStateWaiting{
Reason: "ImagePullBackOff",
},
},
},
},
},
},
expected: true,
},
}
for _, tt := range tests {
t.Run(tt.subtest, func(t *testing.T) {
result := podIsNotRunning(&tt.pod)
if result != tt.expected {
t.Errorf("podIsNotRunning() = %v, expected %v", result, tt.expected)
}
})
}
}
func TestAllPodsRunning(t *testing.T) {
client, _ := newFakeK8sSyncClient()
var cluster = New(
Config{
OpConfig: config.Config{
Resources: config.Resources{
ClusterLabels: map[string]string{"application": "spilo"},
ClusterNameLabel: "cluster-name",
PodRoleLabel: "spilo-role",
},
},
}, client, acidv1.Postgresql{}, logger, eventRecorder)
tests := []struct {
subtest string
pods []v1.Pod
expected bool
}{
{
subtest: "all pods running",
pods: []v1.Pod{
{
Status: v1.PodStatus{
Phase: v1.PodRunning,
ContainerStatuses: []v1.ContainerStatus{
{State: v1.ContainerState{Running: &v1.ContainerStateRunning{}}},
},
},
},
{
Status: v1.PodStatus{
Phase: v1.PodRunning,
ContainerStatuses: []v1.ContainerStatus{
{State: v1.ContainerState{Running: &v1.ContainerStateRunning{}}},
},
},
},
},
expected: true,
},
{
subtest: "one pod not running",
pods: []v1.Pod{
{
Status: v1.PodStatus{
Phase: v1.PodRunning,
ContainerStatuses: []v1.ContainerStatus{
{State: v1.ContainerState{Running: &v1.ContainerStateRunning{}}},
},
},
},
{
Status: v1.PodStatus{
Phase: v1.PodRunning,
ContainerStatuses: []v1.ContainerStatus{
{
State: v1.ContainerState{
Waiting: &v1.ContainerStateWaiting{
Reason: "CreateContainerConfigError",
},
},
},
},
},
},
},
expected: false,
},
{
subtest: "all pods not running",
pods: []v1.Pod{
{
Status: v1.PodStatus{
Phase: v1.PodPending,
},
},
{
Status: v1.PodStatus{
Phase: v1.PodRunning,
ContainerStatuses: []v1.ContainerStatus{
{
State: v1.ContainerState{
Waiting: &v1.ContainerStateWaiting{
Reason: "CrashLoopBackOff",
},
},
},
},
},
},
},
expected: false,
},
{
subtest: "empty pod list",
pods: []v1.Pod{},
expected: true,
},
}
for _, tt := range tests {
t.Run(tt.subtest, func(t *testing.T) {
result := cluster.allPodsRunning(tt.pods)
if result != tt.expected {
t.Errorf("allPodsRunning() = %v, expected %v", result, tt.expected)
}
})
}
}

View File

@ -718,14 +718,26 @@ func (c *Cluster) syncStatefulSet() error {
if configPatched, restartPrimaryFirst, restartWait, err = c.syncPatroniConfig(pods, c.Spec.Patroni, requiredPgParameters); err != nil {
c.logger.Warningf("Patroni config updated? %v - errors during config sync: %v", configPatched, err)
postponeReasons = append(postponeReasons, "errors during Patroni config sync")
isSafeToRecreatePods = false
// Only mark unsafe if all pods are running. If some pods are not running,
// Patroni API errors are expected and should not block pod recreation,
// which is the only way to fix non-running pods.
if c.allPodsRunning(pods) {
isSafeToRecreatePods = false
} else {
c.logger.Warningf("ignoring Patroni config sync errors because some pods are not running")
}
}
// restart Postgres where it is still pending
if err = c.restartInstances(pods, restartWait, restartPrimaryFirst); err != nil {
c.logger.Errorf("errors while restarting Postgres in pods via Patroni API: %v", err)
postponeReasons = append(postponeReasons, "errors while restarting Postgres via Patroni API")
isSafeToRecreatePods = false
// Same logic: don't let unreachable non-running pods block recreation.
if c.allPodsRunning(pods) {
isSafeToRecreatePods = false
} else {
c.logger.Warningf("ignoring Patroni restart errors because some pods are not running")
}
}
// if we get here we also need to re-create the pods (either leftovers from the old

View File

@ -1053,3 +1053,136 @@ func TestUpdateSecretNameConflict(t *testing.T) {
expectedError := fmt.Sprintf("syncing secret %s failed: error while checking for password rotation: could not update secret because of user name mismatch", "default/prepared-owner-user.acid-test-cluster.credentials")
assert.Contains(t, err.Error(), expectedError)
}
func TestSyncStatefulSetNonRunningPodsDoNotBlockRecreation(t *testing.T) {
testName := "test that non-running pods do not block rolling update"
client, _ := newFakeK8sSyncClient()
clusterName := "acid-test-cluster"
namespace := "default"
ctrl := gomock.NewController(t)
defer ctrl.Finish()
pg := acidv1.Postgresql{
ObjectMeta: metav1.ObjectMeta{
Name: clusterName,
Namespace: namespace,
},
Spec: acidv1.PostgresSpec{
NumberOfInstances: 1,
Volume: acidv1.Volume{
Size: "1Gi",
},
},
}
var cluster = New(
Config{
OpConfig: config.Config{
PatroniAPICheckInterval: time.Duration(1),
PatroniAPICheckTimeout: time.Duration(5),
PodManagementPolicy: "ordered_ready",
Resources: config.Resources{
ClusterLabels: map[string]string{"application": "spilo"},
ClusterNameLabel: "cluster-name",
DefaultCPURequest: "300m",
DefaultCPULimit: "300m",
DefaultMemoryRequest: "300Mi",
DefaultMemoryLimit: "300Mi",
PodRoleLabel: "spilo-role",
ResourceCheckInterval: time.Duration(3),
ResourceCheckTimeout: time.Duration(10),
},
},
}, client, pg, logger, eventRecorder)
cluster.Name = clusterName
cluster.Namespace = namespace
// mock Patroni API that always fails (simulates unreachable pod)
mockClient := mocks.NewMockHTTPClient(ctrl)
mockClient.EXPECT().Do(gomock.Any()).Return(nil, fmt.Errorf("connection refused")).AnyTimes()
mockClient.EXPECT().Get(gomock.Any()).Return(nil, fmt.Errorf("connection refused")).AnyTimes()
cluster.patroni = patroni.New(patroniLogger, mockClient)
// test allPodsRunning with non-running pods returns false
nonRunningPods := []v1.Pod{
{
Status: v1.PodStatus{
Phase: v1.PodRunning,
ContainerStatuses: []v1.ContainerStatus{
{
State: v1.ContainerState{
Waiting: &v1.ContainerStateWaiting{
Reason: "CreateContainerConfigError",
Message: `secret "old-secret" not found`,
},
},
},
},
},
},
}
if cluster.allPodsRunning(nonRunningPods) {
t.Errorf("%s: allPodsRunning should return false for pods in CreateContainerConfigError", testName)
}
// test allPodsRunning with running pods returns true
runningPods := []v1.Pod{
{
Status: v1.PodStatus{
Phase: v1.PodRunning,
ContainerStatuses: []v1.ContainerStatus{
{
State: v1.ContainerState{
Running: &v1.ContainerStateRunning{},
},
},
},
},
},
}
if !cluster.allPodsRunning(runningPods) {
t.Errorf("%s: allPodsRunning should return true for running pods", testName)
}
// test mixed: 3-node cluster with 1 broken replica
mixedPods := []v1.Pod{
{
Status: v1.PodStatus{
Phase: v1.PodRunning,
ContainerStatuses: []v1.ContainerStatus{
{State: v1.ContainerState{Running: &v1.ContainerStateRunning{}}},
},
},
},
{
Status: v1.PodStatus{
Phase: v1.PodRunning,
ContainerStatuses: []v1.ContainerStatus{
{State: v1.ContainerState{Running: &v1.ContainerStateRunning{}}},
},
},
},
{
Status: v1.PodStatus{
Phase: v1.PodRunning,
ContainerStatuses: []v1.ContainerStatus{
{
State: v1.ContainerState{
Waiting: &v1.ContainerStateWaiting{
Reason: "CreateContainerConfigError",
},
},
},
},
},
},
}
if cluster.allPodsRunning(mixedPods) {
t.Errorf("%s: allPodsRunning should return false when one pod is in CreateContainerConfigError", testName)
}
}