Add shm_volume option (#427)

Add possibility to mount a tmpfs volume to /dev/shm to avoid issues like [this](https://github.com/docker-library/postgres/issues/416). To achieve that two new options were introduced: * `enableShmVolume` to PostgreSQL manifest, to specify whether or not mount this volume per database cluster * `enable_shm_volume` to operator configuration, to specify whether or not mount per operator. The first one, `enableShmVolume` takes precedence to allow us to be more flexible.
2018-12-21 16:22:30 +01:00 · 2018-12-21 16:22:30 +01:00 · d6e6b00770
parent ff5c63ddf1
commit d6e6b00770
10 changed files with 137 additions and 7 deletions
--- a/docs/reference/cluster_manifest.md
+++ b/docs/reference/cluster_manifest.md
@ -97,6 +97,18 @@ Those are parameters grouped directly under  the `spec` key in the manifest.
   is taken from the `pod_priority_class_name` operator parameter, if not set
   then the default priority class is taken. The priority class itself must be defined in advance.
 * **enableShmVolume**
  Start a database pod without limitations on shm memory. By default docker
  limit `/dev/shm` to `64M` (see e.g. the [docker
  issue](https://github.com/docker-library/postgres/issues/416), which could be
  not enough if PostgreSQL uses parallel workers heavily. If this option is
  present and value is `true`, to the target database pod will be mounted a new
  tmpfs volume to remove this limitation. If it's not present, the decision
  about mounting a volume will be made based on operator configuration
  (`enable_shm_volume`, which is `true` by default). It it's present and value
  is `false`, then no volume will be mounted no matter how operator was
  configured (so you can override the operator configuration).
 ## Postgres parameters
 Those parameters are grouped under the `postgresql` top-level key.
@ -112,6 +124,7 @@ Those parameters are grouped under the `postgresql` top-level key.
  cluster. Optional (Spilo automatically sets reasonable defaults for
  parameters like work_mem or max_connections).
 ## Patroni parameters
 Those parameters are grouped under the `patroni` top-level key. See the [patroni
--- a/docs/reference/operator_parameters.md
+++ b/docs/reference/operator_parameters.md
@ -224,6 +224,14 @@ CRD-based configuration.
 * **set_memory_request_to_limit**
  Set `memory_request` to `memory_limit` for all Postgres clusters (the default value is also increased). This prevents certain cases of memory overcommitment at the cost of overprovisioning memory and potential scheduling problems for containers with high memory limits due to the lack of memory on Kubernetes cluster nodes. This affects all containers (Postgres, Scalyr sidecar, and other sidecars). The default is `false`.
 * **enable_shm_volume**
  Instruct operator to start any new database pod without limitations on shm
  memory. If this option is enabled, to the target database pod will be mounted
  a new tmpfs volume to remove shm memory limitation (see e.g. the [docker
  issue](https://github.com/docker-library/postgres/issues/416)). This option
  is global for an operator object, and can be overwritten by `enableShmVolume`
  parameter from Postgres manifest. The default is `true`
 ## Operator timeouts
 This set of parameters define various timeouts related to some operator
--- a/manifests/complete-postgres-manifest.yaml
+++ b/manifests/complete-postgres-manifest.yaml
@ -19,6 +19,7 @@ spec:
  databases:
    foo: zalando
 #Expert section
  enableShmVolume: true
  postgresql:
    version: "10"
    parameters:
--- a/pkg/apis/acid.zalan.do/v1/postgresql_type.go
+++ b/pkg/apis/acid.zalan.do/v1/postgresql_type.go
@ -51,6 +51,7 @@ type PostgresSpec struct {
 	Tolerations          []v1.Toleration      `json:"tolerations,omitempty"`
 	Sidecars             []Sidecar            `json:"sidecars,omitempty"`
 	PodPriorityClassName string               `json:"pod_priority_class_name,omitempty"`
 	ShmVolume            *bool                `json:"enableShmVolume,omitempty"`
 }
 // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
--- a/pkg/apis/acid.zalan.do/v1/util_test.go
+++ b/pkg/apis/acid.zalan.do/v1/util_test.go
@ -499,7 +499,7 @@ func TestMarshal(t *testing.T) {
 			t.Errorf("Marshal error: %v", err)
 		}
 		if !bytes.Equal(m, tt.marshal) {
-			t.Errorf("Marshal Postgresql expected: %q, got: %q", string(tt.marshal), string(m))
+			t.Errorf("Marshal Postgresql \nexpected: %q, \ngot:      %q", string(tt.marshal), string(m))
 		}
 	}
 }
@ -507,11 +507,11 @@ func TestMarshal(t *testing.T) {
 func TestPostgresMeta(t *testing.T) {
 	for _, tt := range unmarshalCluster {
 		if a := tt.out.GetObjectKind(); a != &tt.out.TypeMeta {
-			t.Errorf("GetObjectKindMeta expected: %v, got: %v", tt.out.TypeMeta, a)
+			t.Errorf("GetObjectKindMeta \nexpected: %v, \ngot:       %v", tt.out.TypeMeta, a)
 		}
 		if a := tt.out.GetObjectMeta(); reflect.DeepEqual(a, tt.out.ObjectMeta) {
-			t.Errorf("GetObjectMeta expected: %v, got: %v", tt.out.ObjectMeta, a)
+			t.Errorf("GetObjectMeta \nexpected: %v, \ngot:       %v", tt.out.ObjectMeta, a)
 		}
 	}
 }
--- a/pkg/cluster/k8sres.go
+++ b/pkg/cluster/k8sres.go
@ -18,6 +18,7 @@ import (
 	acidv1 "github.com/zalando-incubator/postgres-operator/pkg/apis/acid.zalan.do/v1"
 	"github.com/zalando-incubator/postgres-operator/pkg/spec"
 	"github.com/zalando-incubator/postgres-operator/pkg/util"
 	"github.com/zalando-incubator/postgres-operator/pkg/util/config"
 	"github.com/zalando-incubator/postgres-operator/pkg/util/constants"
 	"k8s.io/apimachinery/pkg/labels"
 )
@ -396,6 +397,16 @@ func generateSidecarContainers(sidecars []acidv1.Sidecar,
 	return nil, nil
 }
 // Check whether or not we're requested to mount an shm volume,
 // taking into account that PostgreSQL manifest has precedence.
 func mountShmVolumeNeeded(opConfig config.Config, pgSpec *acidv1.PostgresSpec) bool {
 	if pgSpec.ShmVolume != nil {
 		return *pgSpec.ShmVolume
 	}
 	return opConfig.ShmVolume
 }
 func generatePodTemplate(
 	namespace string,
 	labels labels.Set,
@ -407,6 +418,7 @@ func generatePodTemplate(
 	podServiceAccountName string,
 	kubeIAMRole string,
 	priorityClassName string,
 	shmVolume bool,
 ) (*v1.PodTemplateSpec, error) {
 	terminateGracePeriodSeconds := terminateGracePeriod
@ -420,6 +432,10 @@ func generatePodTemplate(
 		Tolerations:                   *tolerationsSpec,
 	}
 	if shmVolume {
 		addShmVolume(&podSpec)
 	}
 	if nodeAffinity != nil {
 		podSpec.Affinity = nodeAffinity
 	}
@ -733,7 +749,12 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*v1beta1.State
 	volumeMounts := generateVolumeMounts()
 	// generate the spilo container
-	spiloContainer := generateSpiloContainer(c.containerName(), &effectiveDockerImage, resourceRequirements, spiloEnvVars, volumeMounts)
+	spiloContainer := generateSpiloContainer(c.containerName(),
 		&effectiveDockerImage,
 		resourceRequirements,
 		spiloEnvVars,
 		volumeMounts,
 	)
 	// resolve conflicts between operator-global and per-cluster sidecards
 	sideCars := c.mergeSidecars(spec.Sidecars)
@ -775,7 +796,8 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*v1beta1.State
 		int64(c.OpConfig.PodTerminateGracePeriod.Seconds()),
 		c.OpConfig.PodServiceAccountName,
 		c.OpConfig.KubeIAMRole,
-		effectivePodPriorityClassName); err != nil {
+		effectivePodPriorityClassName,
 		mountShmVolumeNeeded(c.OpConfig, spec)); err != nil {
 		return nil, fmt.Errorf("could not generate pod template: %v", err)
 	}
@ -882,6 +904,32 @@ func (c *Cluster) getNumberOfInstances(spec *acidv1.PostgresSpec) int32 {
 	return newcur
 }
 // To avoid issues with limited /dev/shm inside docker environment, when
 // PostgreSQL can't allocate enough of dsa segments from it, we can
 // mount an extra memory volume
 //
 // see https://docs.okd.io/latest/dev_guide/shared_memory.html
 func addShmVolume(podSpec *v1.PodSpec) {
 	volumes := append(podSpec.Volumes, v1.Volume{
 		Name: constants.ShmVolumeName,
 		VolumeSource: v1.VolumeSource{
 			EmptyDir: &v1.EmptyDirVolumeSource{
 				Medium: "Memory",
 			},
 		},
 	})
 	pgIdx := constants.PostgresContainerIdx
 	mounts := append(podSpec.Containers[pgIdx].VolumeMounts,
 		v1.VolumeMount{
 			Name:      constants.ShmVolumeName,
 			MountPath: constants.ShmVolumePath,
 		})
 	podSpec.Containers[0].VolumeMounts = mounts
 	podSpec.Volumes = volumes
 }
 func generatePersistentVolumeClaimTemplate(volumeSize, volumeStorageClass string) (*v1.PersistentVolumeClaim, error) {
 	var storageClassName *string
--- a/pkg/cluster/k8sres_test.go
+++ b/pkg/cluster/k8sres_test.go
@ -1,8 +1,11 @@
 package cluster
 import (
 	"k8s.io/api/core/v1"
 	acidv1 "github.com/zalando-incubator/postgres-operator/pkg/apis/acid.zalan.do/v1"
 	"github.com/zalando-incubator/postgres-operator/pkg/util/config"
 	"github.com/zalando-incubator/postgres-operator/pkg/util/constants"
 	"github.com/zalando-incubator/postgres-operator/pkg/util/k8sutil"
 	"testing"
 )
@ -75,3 +78,54 @@ func TestCreateLoadBalancerLogic(t *testing.T) {
 		}
 	}
 }
 func TestShmVolume(t *testing.T) {
 	testName := "TestShmVolume"
 	tests := []struct {
 		subTest string
 		podSpec *v1.PodSpec
 		shmPos  int
 	}{
 		{
 			subTest: "empty PodSpec",
 			podSpec: &v1.PodSpec{
 				Volumes: []v1.Volume{},
 				Containers: []v1.Container{
 					v1.Container{
 						VolumeMounts: []v1.VolumeMount{},
 					},
 				},
 			},
 			shmPos: 0,
 		},
 		{
 			subTest: "non empty PodSpec",
 			podSpec: &v1.PodSpec{
 				Volumes: []v1.Volume{v1.Volume{}},
 				Containers: []v1.Container{
 					v1.Container{
 						VolumeMounts: []v1.VolumeMount{
 							v1.VolumeMount{},
 						},
 					},
 				},
 			},
 			shmPos: 1,
 		},
 	}
 	for _, tt := range tests {
 		addShmVolume(tt.podSpec)
 		volumeName := tt.podSpec.Volumes[tt.shmPos].Name
 		volumeMountName := tt.podSpec.Containers[0].VolumeMounts[tt.shmPos].Name
 		if volumeName != constants.ShmVolumeName {
 			t.Errorf("%s %s: Expected volume %s was not created, have %s instead",
 				testName, tt.subTest, constants.ShmVolumeName, volumeName)
 		}
 		if volumeMountName != constants.ShmVolumeName {
 			t.Errorf("%s %s: Expected mount %s was not created, have %s instead",
 				testName, tt.subTest, constants.ShmVolumeName, volumeMountName)
 		}
 	}
 }
--- a/pkg/util/config/config.go
+++ b/pkg/util/config/config.go
@ -38,6 +38,7 @@ type Resources struct {
 	NodeReadinessLabel      map[string]string `name:"node_readiness_label" default:""`
 	MaxInstances            int32             `name:"max_instances" default:"-1"`
 	MinInstances            int32             `name:"min_instances" default:"-1"`
 	ShmVolume               bool              `name:"enable_shm_volume" default:"true"`
 }
 // Auth describes authentication specific configuration parameters
--- a/pkg/util/constants/kubernetes.go
+++ b/pkg/util/constants/kubernetes.go
@ -5,6 +5,7 @@ import "time"
 // General kubernetes-related constants
 const (
 	PostgresContainerName       = "postgres"
 	PostgresContainerIdx        = 0
 	K8sAPIPath                  = "/apis"
 	StatefulsetDeletionInterval = 1 * time.Second
 	StatefulsetDeletionTimeout  = 30 * time.Second
--- a/pkg/util/constants/postgresql.go
+++ b/pkg/util/constants/postgresql.go
@ -10,4 +10,7 @@ const (
 	PostgresConnectRetryTimeout = 2 * time.Minute
 	PostgresConnectTimeout      = 15 * time.Second
 	ShmVolumeName = "dshm"
 	ShmVolumePath = "/dev/shm"
 )