Retry moving master pods (#463)
* Retry moving master pods * bump up master pod wait timeout
This commit is contained in:
		
							parent
							
								
									587d9091e7
								
							
						
					
					
						commit
						f400539b69
					
				|  | @ -213,6 +213,10 @@ configuration they are grouped under the `kubernetes` key. | ||||||
|   that should be assigned to the Postgres pods. The priority class itself must be defined in advance. |   that should be assigned to the Postgres pods. The priority class itself must be defined in advance. | ||||||
|   Default is empty (use the default priority class). |   Default is empty (use the default priority class). | ||||||
|    |    | ||||||
|  |  * **master_pod_move_timeout** | ||||||
|  |    The period of time to wait for the success of migration of master pods from an unschedulable node. | ||||||
|  |    The migration includes Patroni switchovers to respective replicas on healthy nodes. The situation where master pods still exist on the old node after this timeout expires has to be fixed manually. The default is 20 minutes. | ||||||
|  | 
 | ||||||
| * **enable_pod_antiaffinity** | * **enable_pod_antiaffinity** | ||||||
|   toggles [pod anti affinity](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/) on the Postgres pods, to avoid multiple pods |   toggles [pod anti affinity](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/) on the Postgres pods, to avoid multiple pods | ||||||
|   of the same Postgres cluster in the same topology , e.g. node. The default is `false`. |   of the same Postgres cluster in the same topology , e.g. node. The default is `false`. | ||||||
|  |  | ||||||
|  | @ -46,6 +46,7 @@ data: | ||||||
|   pod_label_wait_timeout: 10m |   pod_label_wait_timeout: 10m | ||||||
|   ready_wait_interval: 3s |   ready_wait_interval: 3s | ||||||
|   ready_wait_timeout: 30s |   ready_wait_timeout: 30s | ||||||
|  |   #  master_pod_move_timeout: 10m | ||||||
|   replication_username: standby |   replication_username: standby | ||||||
|   resource_check_interval: 3s |   resource_check_interval: 3s | ||||||
|   resource_check_timeout: 10m |   resource_check_timeout: 10m | ||||||
|  |  | ||||||
|  | @ -60,6 +60,7 @@ type KubernetesMetaConfiguration struct { | ||||||
| 	// TODO: use namespacedname
 | 	// TODO: use namespacedname
 | ||||||
| 	PodEnvironmentConfigMap string `json:"pod_environment_configmap,omitempty"` | 	PodEnvironmentConfigMap string `json:"pod_environment_configmap,omitempty"` | ||||||
| 	PodPriorityClassName    string `json:"pod_priority_class_name,omitempty"` | 	PodPriorityClassName    string `json:"pod_priority_class_name,omitempty"` | ||||||
|  | 	MasterPodMoveTimeout    time.Duration `json:"master_pod_move_timeout,omitempty"` | ||||||
| 	EnablePodAntiAffinity                  bool                  `json:"enable_pod_antiaffinity" default:"false"` | 	EnablePodAntiAffinity                  bool                  `json:"enable_pod_antiaffinity" default:"false"` | ||||||
| 	PodAntiAffinityTopologyKey			   string                `name:"pod_antiaffinity_topology_key" default:"kubernetes.io/hostname"` | 	PodAntiAffinityTopologyKey			   string                `name:"pod_antiaffinity_topology_key" default:"kubernetes.io/hostname"` | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -1,6 +1,10 @@ | ||||||
| package controller | package controller | ||||||
| 
 | 
 | ||||||
| import ( | import ( | ||||||
|  | 	"fmt" | ||||||
|  | 	"time" | ||||||
|  | 
 | ||||||
|  | 	"github.com/zalando/postgres-operator/pkg/util/retryutil" | ||||||
| 	"k8s.io/api/core/v1" | 	"k8s.io/api/core/v1" | ||||||
| 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||||||
| 	"k8s.io/apimachinery/pkg/labels" | 	"k8s.io/apimachinery/pkg/labels" | ||||||
|  | @ -38,6 +42,7 @@ func (c *Controller) nodeAdd(obj interface{}) { | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	c.logger.Debugf("new node has been added: %q (%s)", util.NameFromMeta(node.ObjectMeta), node.Spec.ProviderID) | 	c.logger.Debugf("new node has been added: %q (%s)", util.NameFromMeta(node.ObjectMeta), node.Spec.ProviderID) | ||||||
|  | 
 | ||||||
| 	// check if the node became not ready while the operator was down (otherwise we would have caught it in nodeUpdate)
 | 	// check if the node became not ready while the operator was down (otherwise we would have caught it in nodeUpdate)
 | ||||||
| 	if !c.nodeIsReady(node) { | 	if !c.nodeIsReady(node) { | ||||||
| 		c.moveMasterPodsOffNode(node) | 		c.moveMasterPodsOffNode(node) | ||||||
|  | @ -64,7 +69,9 @@ func (c *Controller) nodeUpdate(prev, cur interface{}) { | ||||||
| 	if !c.nodeIsReady(nodePrev) || c.nodeIsReady(nodeCur) { | 	if !c.nodeIsReady(nodePrev) || c.nodeIsReady(nodeCur) { | ||||||
| 		return | 		return | ||||||
| 	} | 	} | ||||||
|  | 
 | ||||||
| 	c.moveMasterPodsOffNode(nodeCur) | 	c.moveMasterPodsOffNode(nodeCur) | ||||||
|  | 
 | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| func (c *Controller) nodeIsReady(node *v1.Node) bool { | func (c *Controller) nodeIsReady(node *v1.Node) bool { | ||||||
|  | @ -72,7 +79,7 @@ func (c *Controller) nodeIsReady(node *v1.Node) bool { | ||||||
| 		util.MapContains(node.Labels, map[string]string{"master": "true"})) | 		util.MapContains(node.Labels, map[string]string{"master": "true"})) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| func (c *Controller) moveMasterPodsOffNode(node *v1.Node) { | func (c *Controller) attemptToMoveMasterPodsOffNode(node *v1.Node) error { | ||||||
| 	nodeName := util.NameFromMeta(node.ObjectMeta) | 	nodeName := util.NameFromMeta(node.ObjectMeta) | ||||||
| 	c.logger.Infof("moving pods: node %q became unschedulable and does not have a ready label: %q", | 	c.logger.Infof("moving pods: node %q became unschedulable and does not have a ready label: %q", | ||||||
| 		nodeName, c.opConfig.NodeReadinessLabel) | 		nodeName, c.opConfig.NodeReadinessLabel) | ||||||
|  | @ -83,7 +90,7 @@ func (c *Controller) moveMasterPodsOffNode(node *v1.Node) { | ||||||
| 	podList, err := c.KubeClient.Pods(c.opConfig.WatchedNamespace).List(opts) | 	podList, err := c.KubeClient.Pods(c.opConfig.WatchedNamespace).List(opts) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		c.logger.Errorf("could not fetch list of the pods: %v", err) | 		c.logger.Errorf("could not fetch list of the pods: %v", err) | ||||||
| 		return | 		return err | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	nodePods := make([]*v1.Pod, 0) | 	nodePods := make([]*v1.Pod, 0) | ||||||
|  | @ -148,9 +155,11 @@ func (c *Controller) moveMasterPodsOffNode(node *v1.Node) { | ||||||
| 		movedPods, totalPods, nodeName) | 		movedPods, totalPods, nodeName) | ||||||
| 
 | 
 | ||||||
| 	if leftPods := totalPods - movedPods; leftPods > 0 { | 	if leftPods := totalPods - movedPods; leftPods > 0 { | ||||||
| 		c.logger.Warnf("could not move master %d/%d pods from the %q node", | 		return fmt.Errorf("could not move master %d/%d pods from the %q node", | ||||||
| 			leftPods, totalPods, nodeName) | 			leftPods, totalPods, nodeName) | ||||||
| 	} | 	} | ||||||
|  | 
 | ||||||
|  | 	return nil | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| func (c *Controller) nodeDelete(obj interface{}) { | func (c *Controller) nodeDelete(obj interface{}) { | ||||||
|  | @ -161,3 +170,21 @@ func (c *Controller) nodeDelete(obj interface{}) { | ||||||
| 
 | 
 | ||||||
| 	c.logger.Debugf("node has been deleted: %q (%s)", util.NameFromMeta(node.ObjectMeta), node.Spec.ProviderID) | 	c.logger.Debugf("node has been deleted: %q (%s)", util.NameFromMeta(node.ObjectMeta), node.Spec.ProviderID) | ||||||
| } | } | ||||||
|  | 
 | ||||||
|  | func (c *Controller) moveMasterPodsOffNode(node *v1.Node) { | ||||||
|  | 
 | ||||||
|  | 	err := retryutil.Retry(1*time.Minute, c.opConfig.MasterPodMoveTimeout, | ||||||
|  | 		func() (bool, error) { | ||||||
|  | 			err := c.attemptToMoveMasterPodsOffNode(node) | ||||||
|  | 			if err != nil { | ||||||
|  | 				return false, fmt.Errorf("unable to move master pods off the unschedulable node; will retry after delay of 1 minute") | ||||||
|  | 			} | ||||||
|  | 			return true, nil | ||||||
|  | 		}, | ||||||
|  | 	) | ||||||
|  | 
 | ||||||
|  | 	if err != nil { | ||||||
|  | 		c.logger.Warning("failed to move master pods from the node %q: timeout of %v minutes expired", node.Name, c.opConfig.MasterPodMoveTimeout) | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | } | ||||||
|  |  | ||||||
|  | @ -52,6 +52,7 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *acidv1.OperatorConfigur | ||||||
| 	result.ClusterNameLabel = fromCRD.Kubernetes.ClusterNameLabel | 	result.ClusterNameLabel = fromCRD.Kubernetes.ClusterNameLabel | ||||||
| 	result.NodeReadinessLabel = fromCRD.Kubernetes.NodeReadinessLabel | 	result.NodeReadinessLabel = fromCRD.Kubernetes.NodeReadinessLabel | ||||||
| 	result.PodPriorityClassName = fromCRD.Kubernetes.PodPriorityClassName | 	result.PodPriorityClassName = fromCRD.Kubernetes.PodPriorityClassName | ||||||
|  | 	result.MasterPodMoveTimeout = fromCRD.Kubernetes.MasterPodMoveTimeout | ||||||
| 
 | 
 | ||||||
| 	result.EnablePodAntiAffinity = fromCRD.Kubernetes.EnablePodAntiAffinity; | 	result.EnablePodAntiAffinity = fromCRD.Kubernetes.EnablePodAntiAffinity; | ||||||
| 	result.PodAntiAffinityTopologyKey = fromCRD.Kubernetes.PodAntiAffinityTopologyKey; | 	result.PodAntiAffinityTopologyKey = fromCRD.Kubernetes.PodAntiAffinityTopologyKey; | ||||||
|  |  | ||||||
|  | @ -81,6 +81,7 @@ type Config struct { | ||||||
| 	// value of this string must be valid JSON or YAML; see initPodServiceAccount
 | 	// value of this string must be valid JSON or YAML; see initPodServiceAccount
 | ||||||
| 	PodServiceAccountDefinition            string `name:"pod_service_account_definition" default:""` | 	PodServiceAccountDefinition            string `name:"pod_service_account_definition" default:""` | ||||||
| 	PodServiceAccountRoleBindingDefinition string `name:"pod_service_account_role_binding_definition" default:""` | 	PodServiceAccountRoleBindingDefinition string `name:"pod_service_account_role_binding_definition" default:""` | ||||||
|  | 	MasterPodMoveTimeout                   time.Duration `name:"master_pod_move_timeout" default:"20m"` | ||||||
| 	DbHostedZone                           string `name:"db_hosted_zone" default:"db.example.com"` | 	DbHostedZone                           string `name:"db_hosted_zone" default:"db.example.com"` | ||||||
| 	AWSRegion                              string `name:"aws_region" default:"eu-central-1"` | 	AWSRegion                              string `name:"aws_region" default:"eu-central-1"` | ||||||
| 	WALES3Bucket                           string `name:"wal_s3_bucket"` | 	WALES3Bucket                           string `name:"wal_s3_bucket"` | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue