sketch the docs and configuration

This commit is contained in:
Sergey Dudoladov 2019-01-28 16:41:18 +01:00
parent 3a6ab485a6
commit c6d36bb7ef
6 changed files with 16 additions and 4 deletions

View File

@ -205,6 +205,11 @@ configuration they are grouped under the `kubernetes` key.
that should be assigned to the Postgres pods. The priority class itself must be defined in advance.
Default is empty (use the default priority class).
* **master_pod_move_timeout**
The period of time to wait for the success of Patroni switchovers from master pods on an unschedulable node
to their respective replicas on healthy nodes. The situation where master pods still exist on the old node
after this timeout expires has to be fixed manually. The default is 10 minutes.
## Kubernetes resource requests

View File

@ -46,6 +46,7 @@ data:
pod_label_wait_timeout: 10m
ready_wait_interval: 3s
ready_wait_timeout: 30s
# master_pod_move_timeout: 10m
replication_username: standby
resource_check_interval: 3s
resource_check_timeout: 10m

View File

@ -59,6 +59,7 @@ type KubernetesMetaConfiguration struct {
// TODO: use namespacedname
PodEnvironmentConfigMap string `json:"pod_environment_configmap,omitempty"`
PodPriorityClassName string `json:"pod_priority_class_name,omitempty"`
MasterPodMoveTimeout time.Duration `json:"master_pod_move_timeout,omitempty"`
}
// PostgresPodResourcesDefaults defines the spec of default resources

View File

@ -41,18 +41,21 @@ func (c *Controller) nodeAdd(obj interface{}) {
}
c.logger.Debugf("new node has been added: %q (%s)", util.NameFromMeta(node.ObjectMeta), node.Spec.ProviderID)
// check if the node became not ready while the operator was down (otherwise we would have caught it in nodeUpdate)
if !c.nodeIsReady(node) {
err := retryutil.Retry(2 * time.Minute, 10 * time.Minute,
err := retryutil.Retry(1 * time.Minute, c.opConfig.MasterPodMoveTimeout,
func() (bool, error) {
err := c.moveMasterPodsOffNode(node)
if err != nil {
return false, fmt.Errorf(("Unable to move master pods off the unschedulable node. Will retry after delay"))
return false, fmt.Errorf("unable to move master pods off the unschedulable node; will retry after delay")
}
return true, nil
} )
if err != nil {
c.logger.Warning("Unable to move maser pods")
c.logger.Warning("failed to move master pods from the node %q: timeout expired", node.Name)
}
}
}
@ -174,7 +177,7 @@ func (c *Controller) moveMasterPodsOffNode(node *v1.Node) error {
return fmt.Errorf("could not move master %d/%d pods from the %q node",
leftPods, totalPods, nodeName)
}
return nil
}

View File

@ -50,6 +50,7 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *acidv1.OperatorConfigur
result.ClusterNameLabel = fromCRD.Kubernetes.ClusterNameLabel
result.NodeReadinessLabel = fromCRD.Kubernetes.NodeReadinessLabel
result.PodPriorityClassName = fromCRD.Kubernetes.PodPriorityClassName
result.MasterPodMoveTimeout = fromCRD.Kubernetes.MasterPodMoveTimeout
result.DefaultCPURequest = fromCRD.PostgresPodResources.DefaultCPURequest
result.DefaultMemoryRequest = fromCRD.PostgresPodResources.DefaultMemoryRequest

View File

@ -80,6 +80,7 @@ type Config struct {
// value of this string must be valid JSON or YAML; see initPodServiceAccount
PodServiceAccountDefinition string `name:"pod_service_account_definition" default:""`
PodServiceAccountRoleBindingDefinition string `name:"pod_service_account_role_binding_definition" default:""`
MasterPodMoveTimeout time.Duration `name:"master_pod_move_timeout" default:"10m"`
DbHostedZone string `name:"db_hosted_zone" default:"db.example.com"`
AWSRegion string `name:"aws_region" default:"eu-central-1"`
WALES3Bucket string `name:"wal_s3_bucket"`