diff --git a/pkg/cluster/resources.go b/pkg/cluster/resources.go index 23ac3f348..3e8f73916 100644 --- a/pkg/cluster/resources.go +++ b/pkg/cluster/resources.go @@ -13,7 +13,6 @@ import ( "k8s.io/apimachinery/pkg/types" "github.com/zalando/postgres-operator/pkg/util" - "github.com/zalando/postgres-operator/pkg/util/constants" "github.com/zalando/postgres-operator/pkg/util/k8sutil" "github.com/zalando/postgres-operator/pkg/util/retryutil" ) @@ -278,7 +277,8 @@ func (c *Cluster) replaceStatefulSet(newStatefulSet *appsv1.StatefulSet) error { oldStatefulset := c.Statefulset options := metav1.DeleteOptions{PropagationPolicy: &deletePropagationPolicy} - if err := c.KubeClient.StatefulSets(oldStatefulset.Namespace).Delete(oldStatefulset.Name, &options); err != nil { + err := c.KubeClient.StatefulSets(oldStatefulset.Namespace).Delete(oldStatefulset.Name, &options) + if err != nil { return fmt.Errorf("could not delete statefulset %q: %v", statefulSetName, err) } // make sure we clear the stored statefulset status if the subsequent create fails. @@ -286,11 +286,16 @@ func (c *Cluster) replaceStatefulSet(newStatefulSet *appsv1.StatefulSet) error { // wait until the statefulset is truly deleted c.logger.Debugf("waiting for the statefulset to be deleted") - err := retryutil.Retry(constants.StatefulsetDeletionInterval, constants.StatefulsetDeletionTimeout, + err = retryutil.Retry(c.OpConfig.ResourceCheckInterval, c.OpConfig.ResourceCheckTimeout, func() (bool, error) { - _, err := c.KubeClient.StatefulSets(oldStatefulset.Namespace).Get(oldStatefulset.Name, metav1.GetOptions{}) - - return err != nil, nil + _, err2 := c.KubeClient.StatefulSets(oldStatefulset.Namespace).Get(oldStatefulset.Name, metav1.GetOptions{}) + if err2 == nil { + return false, nil + } + if k8sutil.ResourceNotFound(err2) { + return true, nil + } + return false, err2 }) if err != nil { return fmt.Errorf("could not delete statefulset: %v", err) @@ -380,13 +385,27 @@ func (c *Cluster) updateService(role PostgresRole, newService *v1.Service) error return fmt.Errorf("could not delete service %q: %v", serviceName, err) } - c.Endpoints[role] = nil - svc, err := c.KubeClient.Services(serviceName.Namespace).Create(newService) + // wait until the service is truly deleted + c.logger.Debugf("waiting for service to be deleted") + + err = retryutil.Retry(c.OpConfig.ResourceCheckInterval, c.OpConfig.ResourceCheckTimeout, + func() (bool, error) { + _, err2 := c.KubeClient.Services(serviceName.Namespace).Get(serviceName.Name, metav1.GetOptions{}) + if err2 == nil { + return false, nil + } + if k8sutil.ResourceNotFound(err2) { + return true, nil + } + return false, err2 + }) if err != nil { - return fmt.Errorf("could not create service %q: %v", serviceName, err) + return fmt.Errorf("could not delete service %q: %v", serviceName, err) } - c.Services[role] = svc + // make sure we clear the stored service and endpoint status if the subsequent create fails. + c.Services[role] = nil + c.Endpoints[role] = nil if role == Master { // create the new endpoint using the addresses obtained from the previous one endpointSpec := c.generateEndpoint(role, currentEndpoint.Subsets) @@ -398,6 +417,13 @@ func (c *Cluster) updateService(role PostgresRole, newService *v1.Service) error c.Endpoints[role] = ep } + svc, err := c.KubeClient.Services(serviceName.Namespace).Create(newService) + if err != nil { + return fmt.Errorf("could not create service %q: %v", serviceName, err) + } + + c.Services[role] = svc + return nil } diff --git a/pkg/util/constants/kubernetes.go b/pkg/util/constants/kubernetes.go index a4ea73e80..be79687eb 100644 --- a/pkg/util/constants/kubernetes.go +++ b/pkg/util/constants/kubernetes.go @@ -4,11 +4,9 @@ import "time" // General kubernetes-related constants const ( - PostgresContainerName = "postgres" - PostgresContainerIdx = 0 - K8sAPIPath = "/apis" - StatefulsetDeletionInterval = 1 * time.Second - StatefulsetDeletionTimeout = 30 * time.Second + PostgresContainerName = "postgres" + PostgresContainerIdx = 0 + K8sAPIPath = "/apis" QueueResyncPeriodPod = 5 * time.Minute QueueResyncPeriodTPR = 5 * time.Minute