add fix to recreate non running pods in syncStatefulsets
This commit is contained in:
parent
d495825f4b
commit
c5142ee5ec
|
|
@ -376,6 +376,30 @@ func (c *Cluster) getPatroniMemberData(pod *v1.Pod) (patroni.MemberData, error)
|
|||
return memberData, nil
|
||||
}
|
||||
|
||||
// podIsNotRunning returns true if a pod is not in a healthy running state,
|
||||
// e.g. stuck in CreateContainerConfigError, CrashLoopBackOff, ImagePullBackOff, etc.
|
||||
func podIsNotRunning(pod *v1.Pod) bool {
|
||||
if pod.Status.Phase != v1.PodRunning {
|
||||
return true
|
||||
}
|
||||
for _, cs := range pod.Status.ContainerStatuses {
|
||||
if cs.State.Waiting != nil || cs.State.Terminated != nil {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// allPodsRunning returns true only if every pod in the list is in a healthy running state.
|
||||
func (c *Cluster) allPodsRunning(pods []v1.Pod) bool {
|
||||
for i := range pods {
|
||||
if podIsNotRunning(&pods[i]) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (c *Cluster) recreatePod(podName spec.NamespacedName) (*v1.Pod, error) {
|
||||
stopCh := make(chan struct{})
|
||||
ch := c.registerPodSubscriber(podName)
|
||||
|
|
@ -444,7 +468,8 @@ func (c *Cluster) recreatePods(pods []v1.Pod, switchoverCandidates []spec.Namesp
|
|||
// switchover if
|
||||
// 1. we have not observed a new master pod when re-creating former replicas
|
||||
// 2. we know possible switchover targets even when no replicas were recreated
|
||||
if newMasterPod == nil && len(replicas) > 0 {
|
||||
// 3. the master pod is actually running (can't switchover a dead master)
|
||||
if newMasterPod == nil && len(replicas) > 0 && !podIsNotRunning(masterPod) {
|
||||
masterCandidate, err := c.getSwitchoverCandidate(masterPod)
|
||||
if err != nil {
|
||||
// do not recreate master now so it will keep the update flag and switchover will be retried on next sync
|
||||
|
|
@ -455,6 +480,9 @@ func (c *Cluster) recreatePods(pods []v1.Pod, switchoverCandidates []spec.Namesp
|
|||
}
|
||||
} else if newMasterPod == nil && len(replicas) == 0 {
|
||||
c.logger.Warningf("cannot perform switch over before re-creating the pod: no replicas")
|
||||
} else if podIsNotRunning(masterPod) {
|
||||
c.logger.Warningf("master pod %q is not running, skipping switchover and recreating directly",
|
||||
util.NameFromMeta(masterPod.ObjectMeta))
|
||||
}
|
||||
c.logger.Infof("recreating old master pod %q", util.NameFromMeta(masterPod.ObjectMeta))
|
||||
|
||||
|
|
|
|||
|
|
@ -112,3 +112,283 @@ func TestGetSwitchoverCandidate(t *testing.T) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestPodIsNotRunning(t *testing.T) {
|
||||
tests := []struct {
|
||||
subtest string
|
||||
pod v1.Pod
|
||||
expected bool
|
||||
}{
|
||||
{
|
||||
subtest: "pod running with all containers ready",
|
||||
pod: v1.Pod{
|
||||
Status: v1.PodStatus{
|
||||
Phase: v1.PodRunning,
|
||||
ContainerStatuses: []v1.ContainerStatus{
|
||||
{
|
||||
State: v1.ContainerState{
|
||||
Running: &v1.ContainerStateRunning{},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
subtest: "pod in pending phase",
|
||||
pod: v1.Pod{
|
||||
Status: v1.PodStatus{
|
||||
Phase: v1.PodPending,
|
||||
},
|
||||
},
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
subtest: "pod running but container in CreateContainerConfigError",
|
||||
pod: v1.Pod{
|
||||
Status: v1.PodStatus{
|
||||
Phase: v1.PodRunning,
|
||||
ContainerStatuses: []v1.ContainerStatus{
|
||||
{
|
||||
State: v1.ContainerState{
|
||||
Waiting: &v1.ContainerStateWaiting{
|
||||
Reason: "CreateContainerConfigError",
|
||||
Message: `secret "some-secret" not found`,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
subtest: "pod running but container in CrashLoopBackOff",
|
||||
pod: v1.Pod{
|
||||
Status: v1.PodStatus{
|
||||
Phase: v1.PodRunning,
|
||||
ContainerStatuses: []v1.ContainerStatus{
|
||||
{
|
||||
State: v1.ContainerState{
|
||||
Waiting: &v1.ContainerStateWaiting{
|
||||
Reason: "CrashLoopBackOff",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
subtest: "pod running but container terminated",
|
||||
pod: v1.Pod{
|
||||
Status: v1.PodStatus{
|
||||
Phase: v1.PodRunning,
|
||||
ContainerStatuses: []v1.ContainerStatus{
|
||||
{
|
||||
State: v1.ContainerState{
|
||||
Terminated: &v1.ContainerStateTerminated{
|
||||
ExitCode: 137,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
subtest: "pod running with mixed container states - one healthy one broken",
|
||||
pod: v1.Pod{
|
||||
Status: v1.PodStatus{
|
||||
Phase: v1.PodRunning,
|
||||
ContainerStatuses: []v1.ContainerStatus{
|
||||
{
|
||||
State: v1.ContainerState{
|
||||
Running: &v1.ContainerStateRunning{},
|
||||
},
|
||||
},
|
||||
{
|
||||
State: v1.ContainerState{
|
||||
Waiting: &v1.ContainerStateWaiting{
|
||||
Reason: "CreateContainerConfigError",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
subtest: "pod in failed phase",
|
||||
pod: v1.Pod{
|
||||
Status: v1.PodStatus{
|
||||
Phase: v1.PodFailed,
|
||||
},
|
||||
},
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
subtest: "pod running with multiple healthy containers",
|
||||
pod: v1.Pod{
|
||||
Status: v1.PodStatus{
|
||||
Phase: v1.PodRunning,
|
||||
ContainerStatuses: []v1.ContainerStatus{
|
||||
{
|
||||
State: v1.ContainerState{
|
||||
Running: &v1.ContainerStateRunning{},
|
||||
},
|
||||
},
|
||||
{
|
||||
State: v1.ContainerState{
|
||||
Running: &v1.ContainerStateRunning{},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
subtest: "pod running with ImagePullBackOff",
|
||||
pod: v1.Pod{
|
||||
Status: v1.PodStatus{
|
||||
Phase: v1.PodRunning,
|
||||
ContainerStatuses: []v1.ContainerStatus{
|
||||
{
|
||||
State: v1.ContainerState{
|
||||
Waiting: &v1.ContainerStateWaiting{
|
||||
Reason: "ImagePullBackOff",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expected: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.subtest, func(t *testing.T) {
|
||||
result := podIsNotRunning(&tt.pod)
|
||||
if result != tt.expected {
|
||||
t.Errorf("podIsNotRunning() = %v, expected %v", result, tt.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestAllPodsRunning(t *testing.T) {
|
||||
client, _ := newFakeK8sSyncClient()
|
||||
|
||||
var cluster = New(
|
||||
Config{
|
||||
OpConfig: config.Config{
|
||||
Resources: config.Resources{
|
||||
ClusterLabels: map[string]string{"application": "spilo"},
|
||||
ClusterNameLabel: "cluster-name",
|
||||
PodRoleLabel: "spilo-role",
|
||||
},
|
||||
},
|
||||
}, client, acidv1.Postgresql{}, logger, eventRecorder)
|
||||
|
||||
tests := []struct {
|
||||
subtest string
|
||||
pods []v1.Pod
|
||||
expected bool
|
||||
}{
|
||||
{
|
||||
subtest: "all pods running",
|
||||
pods: []v1.Pod{
|
||||
{
|
||||
Status: v1.PodStatus{
|
||||
Phase: v1.PodRunning,
|
||||
ContainerStatuses: []v1.ContainerStatus{
|
||||
{State: v1.ContainerState{Running: &v1.ContainerStateRunning{}}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Status: v1.PodStatus{
|
||||
Phase: v1.PodRunning,
|
||||
ContainerStatuses: []v1.ContainerStatus{
|
||||
{State: v1.ContainerState{Running: &v1.ContainerStateRunning{}}},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
subtest: "one pod not running",
|
||||
pods: []v1.Pod{
|
||||
{
|
||||
Status: v1.PodStatus{
|
||||
Phase: v1.PodRunning,
|
||||
ContainerStatuses: []v1.ContainerStatus{
|
||||
{State: v1.ContainerState{Running: &v1.ContainerStateRunning{}}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Status: v1.PodStatus{
|
||||
Phase: v1.PodRunning,
|
||||
ContainerStatuses: []v1.ContainerStatus{
|
||||
{
|
||||
State: v1.ContainerState{
|
||||
Waiting: &v1.ContainerStateWaiting{
|
||||
Reason: "CreateContainerConfigError",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
subtest: "all pods not running",
|
||||
pods: []v1.Pod{
|
||||
{
|
||||
Status: v1.PodStatus{
|
||||
Phase: v1.PodPending,
|
||||
},
|
||||
},
|
||||
{
|
||||
Status: v1.PodStatus{
|
||||
Phase: v1.PodRunning,
|
||||
ContainerStatuses: []v1.ContainerStatus{
|
||||
{
|
||||
State: v1.ContainerState{
|
||||
Waiting: &v1.ContainerStateWaiting{
|
||||
Reason: "CrashLoopBackOff",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
subtest: "empty pod list",
|
||||
pods: []v1.Pod{},
|
||||
expected: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.subtest, func(t *testing.T) {
|
||||
result := cluster.allPodsRunning(tt.pods)
|
||||
if result != tt.expected {
|
||||
t.Errorf("allPodsRunning() = %v, expected %v", result, tt.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -718,14 +718,26 @@ func (c *Cluster) syncStatefulSet() error {
|
|||
if configPatched, restartPrimaryFirst, restartWait, err = c.syncPatroniConfig(pods, c.Spec.Patroni, requiredPgParameters); err != nil {
|
||||
c.logger.Warningf("Patroni config updated? %v - errors during config sync: %v", configPatched, err)
|
||||
postponeReasons = append(postponeReasons, "errors during Patroni config sync")
|
||||
isSafeToRecreatePods = false
|
||||
// Only mark unsafe if all pods are running. If some pods are not running,
|
||||
// Patroni API errors are expected and should not block pod recreation,
|
||||
// which is the only way to fix non-running pods.
|
||||
if c.allPodsRunning(pods) {
|
||||
isSafeToRecreatePods = false
|
||||
} else {
|
||||
c.logger.Warningf("ignoring Patroni config sync errors because some pods are not running")
|
||||
}
|
||||
}
|
||||
|
||||
// restart Postgres where it is still pending
|
||||
if err = c.restartInstances(pods, restartWait, restartPrimaryFirst); err != nil {
|
||||
c.logger.Errorf("errors while restarting Postgres in pods via Patroni API: %v", err)
|
||||
postponeReasons = append(postponeReasons, "errors while restarting Postgres via Patroni API")
|
||||
isSafeToRecreatePods = false
|
||||
// Same logic: don't let unreachable non-running pods block recreation.
|
||||
if c.allPodsRunning(pods) {
|
||||
isSafeToRecreatePods = false
|
||||
} else {
|
||||
c.logger.Warningf("ignoring Patroni restart errors because some pods are not running")
|
||||
}
|
||||
}
|
||||
|
||||
// if we get here we also need to re-create the pods (either leftovers from the old
|
||||
|
|
|
|||
|
|
@ -1053,3 +1053,136 @@ func TestUpdateSecretNameConflict(t *testing.T) {
|
|||
expectedError := fmt.Sprintf("syncing secret %s failed: error while checking for password rotation: could not update secret because of user name mismatch", "default/prepared-owner-user.acid-test-cluster.credentials")
|
||||
assert.Contains(t, err.Error(), expectedError)
|
||||
}
|
||||
|
||||
func TestSyncStatefulSetNonRunningPodsDoNotBlockRecreation(t *testing.T) {
|
||||
testName := "test that non-running pods do not block rolling update"
|
||||
client, _ := newFakeK8sSyncClient()
|
||||
clusterName := "acid-test-cluster"
|
||||
namespace := "default"
|
||||
|
||||
ctrl := gomock.NewController(t)
|
||||
defer ctrl.Finish()
|
||||
|
||||
pg := acidv1.Postgresql{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: clusterName,
|
||||
Namespace: namespace,
|
||||
},
|
||||
Spec: acidv1.PostgresSpec{
|
||||
NumberOfInstances: 1,
|
||||
Volume: acidv1.Volume{
|
||||
Size: "1Gi",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
var cluster = New(
|
||||
Config{
|
||||
OpConfig: config.Config{
|
||||
PatroniAPICheckInterval: time.Duration(1),
|
||||
PatroniAPICheckTimeout: time.Duration(5),
|
||||
PodManagementPolicy: "ordered_ready",
|
||||
Resources: config.Resources{
|
||||
ClusterLabels: map[string]string{"application": "spilo"},
|
||||
ClusterNameLabel: "cluster-name",
|
||||
DefaultCPURequest: "300m",
|
||||
DefaultCPULimit: "300m",
|
||||
DefaultMemoryRequest: "300Mi",
|
||||
DefaultMemoryLimit: "300Mi",
|
||||
PodRoleLabel: "spilo-role",
|
||||
ResourceCheckInterval: time.Duration(3),
|
||||
ResourceCheckTimeout: time.Duration(10),
|
||||
},
|
||||
},
|
||||
}, client, pg, logger, eventRecorder)
|
||||
|
||||
cluster.Name = clusterName
|
||||
cluster.Namespace = namespace
|
||||
|
||||
// mock Patroni API that always fails (simulates unreachable pod)
|
||||
mockClient := mocks.NewMockHTTPClient(ctrl)
|
||||
mockClient.EXPECT().Do(gomock.Any()).Return(nil, fmt.Errorf("connection refused")).AnyTimes()
|
||||
mockClient.EXPECT().Get(gomock.Any()).Return(nil, fmt.Errorf("connection refused")).AnyTimes()
|
||||
cluster.patroni = patroni.New(patroniLogger, mockClient)
|
||||
|
||||
// test allPodsRunning with non-running pods returns false
|
||||
nonRunningPods := []v1.Pod{
|
||||
{
|
||||
Status: v1.PodStatus{
|
||||
Phase: v1.PodRunning,
|
||||
ContainerStatuses: []v1.ContainerStatus{
|
||||
{
|
||||
State: v1.ContainerState{
|
||||
Waiting: &v1.ContainerStateWaiting{
|
||||
Reason: "CreateContainerConfigError",
|
||||
Message: `secret "old-secret" not found`,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
if cluster.allPodsRunning(nonRunningPods) {
|
||||
t.Errorf("%s: allPodsRunning should return false for pods in CreateContainerConfigError", testName)
|
||||
}
|
||||
|
||||
// test allPodsRunning with running pods returns true
|
||||
runningPods := []v1.Pod{
|
||||
{
|
||||
Status: v1.PodStatus{
|
||||
Phase: v1.PodRunning,
|
||||
ContainerStatuses: []v1.ContainerStatus{
|
||||
{
|
||||
State: v1.ContainerState{
|
||||
Running: &v1.ContainerStateRunning{},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
if !cluster.allPodsRunning(runningPods) {
|
||||
t.Errorf("%s: allPodsRunning should return true for running pods", testName)
|
||||
}
|
||||
|
||||
// test mixed: 3-node cluster with 1 broken replica
|
||||
mixedPods := []v1.Pod{
|
||||
{
|
||||
Status: v1.PodStatus{
|
||||
Phase: v1.PodRunning,
|
||||
ContainerStatuses: []v1.ContainerStatus{
|
||||
{State: v1.ContainerState{Running: &v1.ContainerStateRunning{}}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Status: v1.PodStatus{
|
||||
Phase: v1.PodRunning,
|
||||
ContainerStatuses: []v1.ContainerStatus{
|
||||
{State: v1.ContainerState{Running: &v1.ContainerStateRunning{}}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Status: v1.PodStatus{
|
||||
Phase: v1.PodRunning,
|
||||
ContainerStatuses: []v1.ContainerStatus{
|
||||
{
|
||||
State: v1.ContainerState{
|
||||
Waiting: &v1.ContainerStateWaiting{
|
||||
Reason: "CreateContainerConfigError",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
if cluster.allPodsRunning(mixedPods) {
|
||||
t.Errorf("%s: allPodsRunning should return false when one pod is in CreateContainerConfigError", testName)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue