diff --git a/charts/postgres-operator/crds/operatorconfigurations.yaml b/charts/postgres-operator/crds/operatorconfigurations.yaml index 4c5f7a7bf..3fbc6d8a4 100644 --- a/charts/postgres-operator/crds/operatorconfigurations.yaml +++ b/charts/postgres-operator/crds/operatorconfigurations.yaml @@ -263,6 +263,139 @@ spec: type: array items: type: string + liveness_probe: + description: Probe describes a health check to be performed against + a container to determine whether it is alive or ready to receive + traffic. + properties: + exec: + description: One and only one of the following should be specified. + Exec specifies the action to take. + properties: + command: + description: Command is the command line to execute inside + the container, the working directory for the command is + root ('/') in the container's filesystem. The command + is simply exec'd, it is not run inside a shell, so traditional + shell instructions ('|', etc) won't work. To use a shell, + you need to explicitly call out to that shell. Exit + status of 0 is treated as live/healthy and non-zero + is unhealthy. + items: + type: string + type: array + type: object + failureThreshold: + description: Minimum consecutive failures for the probe to + be considered failed after having succeeded. Defaults to + 3. Minimum value is 1. + format: int32 + type: integer + httpGet: + description: HTTPGet specifies the http request to perform. + properties: + host: + description: Host name to connect to, defaults to the + pod IP. You probably want to set "Host" in httpHeaders + instead. + type: string + httpHeaders: + description: Custom headers to set in the request. HTTP + allows repeated headers. + items: + description: HTTPHeader describes a custom header to + be used in HTTP probes + properties: + name: + description: The header field name + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: Name or number of the port to access on the + container. Number must be in the range 1 to 65535. Name + must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: 'Number of seconds after the container has started + before liveness probes are initiated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + periodSeconds: + description: How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: Minimum consecutive successes for the probe to + be considered successful after having failed. Defaults to + 1. Must be 1 for liveness and startup. Minimum value is + 1. + format: int32 + type: integer + tcpSocket: + description: 'TCPSocket specifies an action involving a TCP + port. TCP hooks not yet supported TODO: implement a realistic + TCP lifecycle hook' + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: Number or name of the port to access on the + container. Number must be in the range 1 to 65535. Name + must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: Optional duration in seconds the pod needs to + terminate gracefully upon probe failure. The grace period + is the duration in seconds after the processes running in + the pod are sent a termination signal and the time when + the processes are forcibly halted with a kill signal. Set + this value longer than the expected cleanup time for your + process. If this value is nil, the pod's terminationGracePeriodSeconds + will be used. Otherwise, this value overrides the value + provided by the pod spec. Value must be non-negative integer. + The value zero indicates stop immediately via the kill signal + (no opportunity to shut down). This is a beta field and + requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is + used if unset. + format: int64 + type: integer + timeoutSeconds: + description: 'Number of seconds after which the probe times + out. Defaults to 1 second. Minimum value is 1. More info: + https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + type: object master_pod_move_timeout: type: string default: "20m" diff --git a/charts/postgres-operator/crds/postgresqls.yaml b/charts/postgres-operator/crds/postgresqls.yaml index e304aa340..4b88d2237 100644 --- a/charts/postgres-operator/crds/postgresqls.yaml +++ b/charts/postgres-operator/crds/postgresqls.yaml @@ -202,6 +202,140 @@ spec: items: type: object x-kubernetes-preserve-unknown-fields: true + livenessProbe: + description: 'Periodic probe of container liveness. Container + will be restarted if the probe fails. Cannot be updated. More + info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + properties: + exec: + description: One and only one of the following should be + specified. Exec specifies the action to take. + properties: + command: + description: Command is the command line to execute + inside the container, the working directory for the + command is root ('/') in the container's filesystem. + The command is simply exec'd, it is not run inside + a shell, so traditional shell instructions ('|', etc) + won't work. To use a shell, you need to explicitly + call out to that shell. Exit status of 0 is treated + as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + type: object + failureThreshold: + description: Minimum consecutive failures for the probe + to be considered failed after having succeeded. Defaults + to 3. Minimum value is 1. + format: int32 + type: integer + httpGet: + description: HTTPGet specifies the http request to perform. + properties: + host: + description: Host name to connect to, defaults to the + pod IP. You probably want to set "Host" in httpHeaders + instead. + type: string + httpHeaders: + description: Custom headers to set in the request. HTTP + allows repeated headers. + items: + description: HTTPHeader describes a custom header + to be used in HTTP probes + properties: + name: + description: The header field name + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: Name or number of the port to access on + the container. Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: 'Number of seconds after the container has + started before liveness probes are initiated. More info: + https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + periodSeconds: + description: How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: Minimum consecutive successes for the probe + to be considered successful after having failed. Defaults + to 1. Must be 1 for liveness and startup. Minimum value + is 1. + format: int32 + type: integer + tcpSocket: + description: 'TCPSocket specifies an action involving a + TCP port. TCP hooks not yet supported TODO: implement + a realistic TCP lifecycle hook' + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: Number or name of the port to access on + the container. Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: Optional duration in seconds the pod needs + to terminate gracefully upon probe failure. The grace + period is the duration in seconds after the processes + running in the pod are sent a termination signal and the + time when the processes are forcibly halted with a kill + signal. Set this value longer than the expected cleanup + time for your process. If this value is nil, the pod's + terminationGracePeriodSeconds will be used. Otherwise, + this value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates + stop immediately via the kill signal (no opportunity to + shut down). This is a beta field and requires enabling + ProbeTerminationGracePeriod feature gate. Minimum value + is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: 'Number of seconds after which the probe times + out. Defaults to 1 second. Minimum value is 1. More info: + https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + type: object init_containers: type: array description: deprecated diff --git a/charts/postgres-operator/values.yaml b/charts/postgres-operator/values.yaml index bf44b765f..6951fdd0d 100644 --- a/charts/postgres-operator/values.yaml +++ b/charts/postgres-operator/values.yaml @@ -213,6 +213,19 @@ configKubernetes: # whether the Spilo container should run with additional permissions other than parent. # required by cron which needs setuid spilo_allow_privilege_escalation: true + + # liveness probe for the spilo pod + # liveness_probe: + # httpGet: + # scheme: HTTP + # path: /liveness + # port: 8008 + # initialDelaySeconds: 10 + # periodSeconds: 10 + # timeoutSeconds: 5 + # successThreshold: 1 + # failureThreshold: 3 + # storage resize strategy, available options are: ebs, pvc, off or mixed storage_resize_mode: pvc # pod toleration assigned to instances of every Postgres cluster diff --git a/docs/reference/cluster_manifest.md b/docs/reference/cluster_manifest.md index 0f4f0bab6..c0f951c87 100644 --- a/docs/reference/cluster_manifest.md +++ b/docs/reference/cluster_manifest.md @@ -85,6 +85,10 @@ These parameters are grouped directly under the `spec` key in the manifest. requires a custom Spilo image. Note the FSGroup of a Pod cannot be changed without recreating a new Pod. Optional. +* **livenessProbe** + Allows for adding a liveness probe to the Spilo container to detect if it's + running properly. + * **enableMasterLoadBalancer** boolean flag to override the operator defaults (set by the `enable_master_load_balancer` parameter) to define whether to enable the load diff --git a/docs/reference/operator_parameters.md b/docs/reference/operator_parameters.md index 861ea842e..675307364 100644 --- a/docs/reference/operator_parameters.md +++ b/docs/reference/operator_parameters.md @@ -482,6 +482,10 @@ configuration they are grouped under the `kubernetes` key. process. Required by cron which needs setuid. Without this parameter, certification rotation & backups will not be done. The default is `true`. +* **liveness_probe** + Allows for adding a liveness probe to the Spilo container to detect if it's + running properly. + * **additional_pod_capabilities** list of additional capabilities to be added to the postgres container's SecurityContext (e.g. SYS_NICE etc.). Please, make sure first that the diff --git a/manifests/configmap.yaml b/manifests/configmap.yaml index 390cf1046..98b63c26f 100644 --- a/manifests/configmap.yaml +++ b/manifests/configmap.yaml @@ -154,6 +154,16 @@ data: # spilo_runasgroup: 103 # spilo_fsgroup: 103 spilo_privileged: "false" + # liveness_probe: |- + # httpGet: + # scheme: HTTP + # path: /liveness + # port: 8008 + # initialDelaySeconds: 10 + # periodSeconds: 10 + # timeoutSeconds: 5 + # successThreshold: 1 + # failureThreshold: 3 storage_resize_mode: "pvc" super_username: postgres # target_major_version: "15" diff --git a/manifests/operatorconfiguration.crd.yaml b/manifests/operatorconfiguration.crd.yaml index d0b30ef37..7767ac902 100644 --- a/manifests/operatorconfiguration.crd.yaml +++ b/manifests/operatorconfiguration.crd.yaml @@ -258,6 +258,139 @@ spec: type: array items: type: string + liveness_probe: + description: Probe describes a health check to be performed against + a container to determine whether it is alive or ready to receive + traffic. + properties: + exec: + description: One and only one of the following should be specified. + Exec specifies the action to take. + properties: + command: + description: Command is the command line to execute inside + the container, the working directory for the command is + root ('/') in the container's filesystem. The command + is simply exec'd, it is not run inside a shell, so traditional + shell instructions ('|', etc) won't work. To use a shell, + you need to explicitly call out to that shell. Exit + status of 0 is treated as live/healthy and non-zero + is unhealthy. + items: + type: string + type: array + type: object + failureThreshold: + description: Minimum consecutive failures for the probe to + be considered failed after having succeeded. Defaults to + 3. Minimum value is 1. + format: int32 + type: integer + httpGet: + description: HTTPGet specifies the http request to perform. + properties: + host: + description: Host name to connect to, defaults to the + pod IP. You probably want to set "Host" in httpHeaders + instead. + type: string + httpHeaders: + description: Custom headers to set in the request. HTTP + allows repeated headers. + items: + description: HTTPHeader describes a custom header to + be used in HTTP probes + properties: + name: + description: The header field name + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: Name or number of the port to access on the + container. Number must be in the range 1 to 65535. Name + must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: 'Number of seconds after the container has started + before liveness probes are initiated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + periodSeconds: + description: How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: Minimum consecutive successes for the probe to + be considered successful after having failed. Defaults to + 1. Must be 1 for liveness and startup. Minimum value is + 1. + format: int32 + type: integer + tcpSocket: + description: 'TCPSocket specifies an action involving a TCP + port. TCP hooks not yet supported TODO: implement a realistic + TCP lifecycle hook' + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: Number or name of the port to access on the + container. Number must be in the range 1 to 65535. Name + must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: Optional duration in seconds the pod needs to + terminate gracefully upon probe failure. The grace period + is the duration in seconds after the processes running in + the pod are sent a termination signal and the time when + the processes are forcibly halted with a kill signal. Set + this value longer than the expected cleanup time for your + process. If this value is nil, the pod's terminationGracePeriodSeconds + will be used. Otherwise, this value overrides the value + provided by the pod spec. Value must be non-negative integer. + The value zero indicates stop immediately via the kill signal + (no opportunity to shut down). This is a beta field and + requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is + used if unset. + format: int64 + type: integer + timeoutSeconds: + description: 'Number of seconds after which the probe times + out. Defaults to 1 second. Minimum value is 1. More info: + https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + type: object master_pod_move_timeout: type: string default: "20m" diff --git a/manifests/postgresql-operator-default-configuration.yaml b/manifests/postgresql-operator-default-configuration.yaml index d2bb99644..5efc31311 100644 --- a/manifests/postgresql-operator-default-configuration.yaml +++ b/manifests/postgresql-operator-default-configuration.yaml @@ -78,6 +78,16 @@ configuration: # inherited_labels: # - application # - environment + # liveness_probe: + # httpGet: + # scheme: HTTP + # path: /liveness + # port: 8008 + # initialDelaySeconds: 10 + # periodSeconds: 10 + # timeoutSeconds: 5 + # successThreshold: 1 + # failureThreshold: 3 master_pod_move_timeout: 20m # node_readiness_label: # status: ready diff --git a/manifests/postgresql.crd.yaml b/manifests/postgresql.crd.yaml index f0174c80f..d8321c6f6 100644 --- a/manifests/postgresql.crd.yaml +++ b/manifests/postgresql.crd.yaml @@ -200,6 +200,140 @@ spec: items: type: object x-kubernetes-preserve-unknown-fields: true + livenessProbe: + description: 'Periodic probe of container liveness. Container + will be restarted if the probe fails. Cannot be updated. More + info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + properties: + exec: + description: One and only one of the following should be + specified. Exec specifies the action to take. + properties: + command: + description: Command is the command line to execute + inside the container, the working directory for the + command is root ('/') in the container's filesystem. + The command is simply exec'd, it is not run inside + a shell, so traditional shell instructions ('|', etc) + won't work. To use a shell, you need to explicitly + call out to that shell. Exit status of 0 is treated + as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + type: object + failureThreshold: + description: Minimum consecutive failures for the probe + to be considered failed after having succeeded. Defaults + to 3. Minimum value is 1. + format: int32 + type: integer + httpGet: + description: HTTPGet specifies the http request to perform. + properties: + host: + description: Host name to connect to, defaults to the + pod IP. You probably want to set "Host" in httpHeaders + instead. + type: string + httpHeaders: + description: Custom headers to set in the request. HTTP + allows repeated headers. + items: + description: HTTPHeader describes a custom header + to be used in HTTP probes + properties: + name: + description: The header field name + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: Name or number of the port to access on + the container. Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: 'Number of seconds after the container has + started before liveness probes are initiated. More info: + https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + periodSeconds: + description: How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: Minimum consecutive successes for the probe + to be considered successful after having failed. Defaults + to 1. Must be 1 for liveness and startup. Minimum value + is 1. + format: int32 + type: integer + tcpSocket: + description: 'TCPSocket specifies an action involving a + TCP port. TCP hooks not yet supported TODO: implement + a realistic TCP lifecycle hook' + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: Number or name of the port to access on + the container. Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: Optional duration in seconds the pod needs + to terminate gracefully upon probe failure. The grace + period is the duration in seconds after the processes + running in the pod are sent a termination signal and the + time when the processes are forcibly halted with a kill + signal. Set this value longer than the expected cleanup + time for your process. If this value is nil, the pod's + terminationGracePeriodSeconds will be used. Otherwise, + this value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates + stop immediately via the kill signal (no opportunity to + shut down). This is a beta field and requires enabling + ProbeTerminationGracePeriod feature gate. Minimum value + is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: 'Number of seconds after which the probe times + out. Defaults to 1 second. Minimum value is 1. More info: + https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + type: object init_containers: type: array description: deprecated diff --git a/pkg/apis/acid.zalan.do/v1/crds.go b/pkg/apis/acid.zalan.do/v1/crds.go index 0c66b4b12..1cda575a5 100644 --- a/pkg/apis/acid.zalan.do/v1/crds.go +++ b/pkg/apis/acid.zalan.do/v1/crds.go @@ -364,6 +364,132 @@ var PostgresCRDResourceValidation = apiextv1.CustomResourceValidation{ }, }, }, + "livenessProbe": { + Description: "Periodic probe of container liveness. Container will be restarted if the probe fails. Cannot be updated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes", + Type: "object", + Properties: map[string]apiextv1.JSONSchemaProps{ + "exec": { + Description: "One and only one of the following should be specified. Exec specifies the action to take.", + Type: "object", + Properties: map[string]apiextv1.JSONSchemaProps{ + "command": { + Description: "Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.", + Type: "array", + Items: &apiextv1.JSONSchemaPropsOrArray{ + Schema: &apiextv1.JSONSchemaProps{ + Type: "string", + }, + }, + }, + }, + }, + "failureThreshold": { + Description: "Minimum consecutive failures for the probe to be considered failed after having succeeded. Defaults to 3. Minimum value is 1.", + Type: "integer", + Format: "int32", + }, + "httpGet": { + Description: "HTTPGet specifies the http request to perform.", + Type: "object", + Required: []string{"port"}, + Properties: map[string]apiextv1.JSONSchemaProps{ + "host": { + Description: "Host name to connect to, defaults to the pod IP. You probably want to set \"Host\" in httpHeaders instead.", + Type: "string", + }, + "httpHeaders": { + Description: "Custom headers to set in the request. HTTP allows repeated headers.", + Type: "array", + Items: &apiextv1.JSONSchemaPropsOrArray{ + Schema: &apiextv1.JSONSchemaProps{ + Description: "HTTPHeader describes a custom header to be used in HTTP probes", + Type: "object", + Required: []string{"name", "value"}, + Properties: map[string]apiextv1.JSONSchemaProps{ + "name": { + Description: "The header field name", + Type: "string", + }, + "value": { + Description: "The header field value", + Type: "string", + }, + }, + }, + }, + }, + "path": { + Description: "Path to access on the HTTP server.", + Type: "string", + }, + "port": { + Description: "Name or number of the port to access on the container. Number must be in the range 1 to 65535. Name must be an IANA_SVC_NAME.", + AnyOf: []apiextv1.JSONSchemaProps{ + { + Type: "integer", + }, + { + Type: "string", + }, + }, + XIntOrString: true, + }, + "scheme": { + Description: "Scheme to use for connecting to the host. Defaults to HTTP.", + Type: "string", + }, + }, + }, + "initialDelaySeconds": { + Description: "Number of seconds after the container has started before liveness probes are initiated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes", + Type: "integer", + Format: "int32", + }, + "periodSeconds": { + Description: "How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1.", + Type: "integer", + Format: "int32", + }, + "successThreshold": { + Description: "Minimum consecutive successes for the probe to be considered successful after having failed. Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.", + Type: "integer", + Format: "int32", + }, + "tcpSocket": { + Description: "TCPSocket specifies an action involving a TCP port. TCP hooks not yet supported TODO: implement a realistic TCP lifecycle hook", + Type: "object", + Required: []string{"port"}, + Properties: map[string]apiextv1.JSONSchemaProps{ + "host": { + Description: "Optional: Host name to connect to, defaults to the pod IP.", + Type: "string", + }, + "port": { + Description: "Number or name of the port to access on the container. Number must be in the range 1 to 65535. Name must be an IANA_SVC_NAME.", + XIntOrString: true, + AnyOf: []apiextv1.JSONSchemaProps{ + { + Type: "integer", + }, + { + Type: "string", + }, + }, + }, + }, + }, + "terminationGracePeriodSeconds": { + Description: "Optional duration in seconds the pod needs to terminate gracefully upon probe failure. The grace period is the duration in seconds after the processes running in the pod are sent a termination signal and the time when the processes are forcibly halted with a kill signal. Set this value longer than the expected cleanup time for your process. If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this value overrides the value provided by the pod spec. Value must be non-negative integer. The value zero indicates stop immediately via the kill signal (no opportunity to shut down). This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset.", + Type: "integer", + Format: "int64", + }, + "timeoutSeconds": { + Description: "Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes", + Type: "integer", + Format: "int32", + }, + }, + }, "nodeAffinity": { Type: "object", Properties: map[string]apiextv1.JSONSchemaProps{ @@ -1383,6 +1509,132 @@ var OperatorConfigCRDResourceValidation = apiextv1.CustomResourceValidation{ }, }, }, + "liveness_probe": { + Description: "Periodic probe of container liveness. Container will be restarted if the probe fails. Cannot be updated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes", + Type: "object", + Properties: map[string]apiextv1.JSONSchemaProps{ + "exec": { + Description: "One and only one of the following should be specified. Exec specifies the action to take.", + Type: "object", + Properties: map[string]apiextv1.JSONSchemaProps{ + "command": { + Description: "Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.", + Type: "array", + Items: &apiextv1.JSONSchemaPropsOrArray{ + Schema: &apiextv1.JSONSchemaProps{ + Type: "string", + }, + }, + }, + }, + }, + "failureThreshold": { + Description: "Minimum consecutive failures for the probe to be considered failed after having succeeded. Defaults to 3. Minimum value is 1.", + Type: "integer", + Format: "int32", + }, + "httpGet": { + Description: "HTTPGet specifies the http request to perform.", + Type: "object", + Required: []string{"port"}, + Properties: map[string]apiextv1.JSONSchemaProps{ + "host": { + Description: "Host name to connect to, defaults to the pod IP. You probably want to set \"Host\" in httpHeaders instead.", + Type: "string", + }, + "httpHeaders": { + Description: "Custom headers to set in the request. HTTP allows repeated headers.", + Type: "array", + Items: &apiextv1.JSONSchemaPropsOrArray{ + Schema: &apiextv1.JSONSchemaProps{ + Description: "HTTPHeader describes a custom header to be used in HTTP probes", + Type: "object", + Required: []string{"name", "value"}, + Properties: map[string]apiextv1.JSONSchemaProps{ + "name": { + Description: "The header field name", + Type: "string", + }, + "value": { + Description: "The header field value", + Type: "string", + }, + }, + }, + }, + }, + "path": { + Description: "Path to access on the HTTP server.", + Type: "string", + }, + "port": { + Description: "Name or number of the port to access on the container. Number must be in the range 1 to 65535. Name must be an IANA_SVC_NAME.", + AnyOf: []apiextv1.JSONSchemaProps{ + { + Type: "integer", + }, + { + Type: "string", + }, + }, + XIntOrString: true, + }, + "scheme": { + Description: "Scheme to use for connecting to the host. Defaults to HTTP.", + Type: "string", + }, + }, + }, + "initialDelaySeconds": { + Description: "Number of seconds after the container has started before liveness probes are initiated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes", + Type: "integer", + Format: "int32", + }, + "periodSeconds": { + Description: "How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1.", + Type: "integer", + Format: "int32", + }, + "successThreshold": { + Description: "Minimum consecutive successes for the probe to be considered successful after having failed. Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.", + Type: "integer", + Format: "int32", + }, + "tcpSocket": { + Description: "TCPSocket specifies an action involving a TCP port. TCP hooks not yet supported TODO: implement a realistic TCP lifecycle hook", + Type: "object", + Required: []string{"port"}, + Properties: map[string]apiextv1.JSONSchemaProps{ + "host": { + Description: "Optional: Host name to connect to, defaults to the pod IP.", + Type: "string", + }, + "port": { + Description: "Number or name of the port to access on the container. Number must be in the range 1 to 65535. Name must be an IANA_SVC_NAME.", + XIntOrString: true, + AnyOf: []apiextv1.JSONSchemaProps{ + { + Type: "integer", + }, + { + Type: "string", + }, + }, + }, + }, + }, + "terminationGracePeriodSeconds": { + Description: "Optional duration in seconds the pod needs to terminate gracefully upon probe failure. The grace period is the duration in seconds after the processes running in the pod are sent a termination signal and the time when the processes are forcibly halted with a kill signal. Set this value longer than the expected cleanup time for your process. If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this value overrides the value provided by the pod spec. Value must be non-negative integer. The value zero indicates stop immediately via the kill signal (no opportunity to shut down). This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset.", + Type: "integer", + Format: "int64", + }, + "timeoutSeconds": { + Description: "Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes", + Type: "integer", + Format: "int32", + }, + }, + }, "master_pod_move_timeout": { Type: "string", }, diff --git a/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go b/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go index 69d591e42..c805d5ba8 100644 --- a/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go +++ b/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go @@ -105,6 +105,7 @@ type KubernetesMetaConfiguration struct { EnableReadinessProbe bool `json:"enable_readiness_probe,omitempty"` EnableCrossNamespaceSecret bool `json:"enable_cross_namespace_secret,omitempty"` EnableFinalizers *bool `json:"enable_finalizers,omitempty"` + LivenessProbe *v1.Probe `json:"liveness_probe,omitempty"` } // PostgresPodResourcesDefaults defines the spec of default resources diff --git a/pkg/apis/acid.zalan.do/v1/postgresql_type.go b/pkg/apis/acid.zalan.do/v1/postgresql_type.go index edce944f1..a1a4c3399 100644 --- a/pkg/apis/acid.zalan.do/v1/postgresql_type.go +++ b/pkg/apis/acid.zalan.do/v1/postgresql_type.go @@ -87,6 +87,7 @@ type PostgresSpec struct { AdditionalVolumes []AdditionalVolume `json:"additionalVolumes,omitempty"` Streams []Stream `json:"streams,omitempty"` Env []v1.EnvVar `json:"env,omitempty"` + LivenessProbe *v1.Probe `json:"livenessProbe,omitempty"` // deprecated json tags InitContainersOld []v1.Container `json:"init_containers,omitempty"` diff --git a/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go b/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go index 69ce4fc9a..821c701e7 100644 --- a/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go +++ b/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go @@ -277,6 +277,11 @@ func (in *KubernetesMetaConfiguration) DeepCopyInto(out *KubernetesMetaConfigura *out = new(bool) **out = **in } + if in.LivenessProbe != nil { + in, out := &in.LivenessProbe, &out.LivenessProbe + *out = new(corev1.Probe) + (*in).DeepCopyInto(*out) + } return } @@ -849,6 +854,11 @@ func (in *PostgresSpec) DeepCopyInto(out *PostgresSpec) { (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.LivenessProbe != nil { + in, out := &in.LivenessProbe, &out.LivenessProbe + *out = new(corev1.Probe) + (*in).DeepCopyInto(*out) + } if in.InitContainersOld != nil { in, out := &in.InitContainersOld, &out.InitContainersOld *out = make([]corev1.Container, len(*in)) diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index b00d4b6b7..08dc4bb07 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -580,6 +580,10 @@ func (c *Cluster) compareContainers(description string, setA, setB []v1.Containe func(a, b v1.Container) bool { return !reflect.DeepEqual(a.SecurityContext, b.SecurityContext) }), newCheck("new statefulset %s's %s (index %d) volume mounts do not match the current one", func(a, b v1.Container) bool { return !reflect.DeepEqual(a.VolumeMounts, b.VolumeMounts) }), + newCheck("new statefulset %s's %s (index %d) readiness probe do not match the current one", + func(a, b v1.Container) bool { return !reflect.DeepEqual(a.ReadinessProbe, b.ReadinessProbe) }), + newCheck("new statefulset %s's %s (index %d) liveness probe do not match the current one", + func(a, b v1.Container) bool { return !reflect.DeepEqual(a.LivenessProbe, b.LivenessProbe) }), } if !c.OpConfig.EnableLazySpiloUpgrade { diff --git a/pkg/cluster/k8sres.go b/pkg/cluster/k8sres.go index b60c4ecb3..5c3ca128c 100644 --- a/pkg/cluster/k8sres.go +++ b/pkg/cluster/k8sres.go @@ -1244,6 +1244,19 @@ func generateSpiloReadinessProbe() *v1.Probe { } } +func generateSpiloLivenessProbe(probe, defaultProbe *v1.Probe) *v1.Probe { + + if probe != nil { + return probe + } + + if defaultProbe != nil { + return defaultProbe + } + + return nil +} + func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*appsv1.StatefulSet, error) { var ( @@ -1363,6 +1376,8 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*appsv1.Statef spiloContainer.ReadinessProbe = generateSpiloReadinessProbe() } + spiloContainer.LivenessProbe = generateSpiloLivenessProbe(spec.LivenessProbe, c.OpConfig.LivenessProbe) + // generate container specs for sidecars specified in the cluster manifest clusterSpecificSidecars := []v1.Container{} if spec.Sidecars != nil && len(spec.Sidecars) > 0 { diff --git a/pkg/controller/operator_config.go b/pkg/controller/operator_config.go index 7c964e292..f66dc3d7b 100644 --- a/pkg/controller/operator_config.go +++ b/pkg/controller/operator_config.go @@ -128,6 +128,7 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *acidv1.OperatorConfigur result.PodAntiAffinityTopologyKey = util.Coalesce(fromCRD.Kubernetes.PodAntiAffinityTopologyKey, "kubernetes.io/hostname") result.PodAntiAffinityPreferredDuringScheduling = fromCRD.Kubernetes.PodAntiAffinityPreferredDuringScheduling result.PodToleration = fromCRD.Kubernetes.PodToleration + result.LivenessProbe = fromCRD.Kubernetes.LivenessProbe // Postgres Pod resources result.DefaultCPURequest = util.Coalesce(fromCRD.PostgresPodResources.DefaultCPURequest, "100m") diff --git a/pkg/util/config/config.go b/pkg/util/config/config.go index c0fda940b..4f3ae6c08 100644 --- a/pkg/util/config/config.go +++ b/pkg/util/config/config.go @@ -61,6 +61,7 @@ type Resources struct { NodeReadinessLabel map[string]string `name:"node_readiness_label" default:""` NodeReadinessLabelMerge string `name:"node_readiness_label_merge" default:"OR"` ShmVolume *bool `name:"enable_shm_volume" default:"true"` + LivenessProbe *v1.Probe `name:"liveness_probe"` MaxInstances int32 `name:"max_instances" default:"-1"` MinInstances int32 `name:"min_instances" default:"-1"` @@ -248,6 +249,7 @@ type Config struct { TargetMajorVersion string `name:"target_major_version" default:"15"` PatroniAPICheckInterval time.Duration `name:"patroni_api_check_interval" default:"1s"` PatroniAPICheckTimeout time.Duration `name:"patroni_api_check_timeout" default:"5s"` + LivenessProbe *v1.Probe `json:"liveness_probe,omitempty"` EnablePatroniFailsafeMode *bool `name:"enable_patroni_failsafe_mode" default:"false"` PersistentVolumeClaimRetentionPolicy map[string]string `name:"persistent_volume_claim_retention_policy" default:"when_deleted:retain,when_scaled:retain"` }