choose switchover candidate based on lag and role (#1700)
* choose switchover candidate based on lowest lag in MB and role (in synchronous mode)
This commit is contained in:
		
							parent
							
								
									8959618471
								
							
						
					
					
						commit
						07fd4ec00b
					
				|  | @ -4,6 +4,7 @@ import ( | ||||||
| 	"context" | 	"context" | ||||||
| 	"fmt" | 	"fmt" | ||||||
| 	"math/rand" | 	"math/rand" | ||||||
|  | 	"sort" | ||||||
| 	"strconv" | 	"strconv" | ||||||
| 	"time" | 	"time" | ||||||
| 
 | 
 | ||||||
|  | @ -459,8 +460,13 @@ func (c *Cluster) recreatePods(pods []v1.Pod, switchoverCandidates []spec.Namesp | ||||||
| 		// 1. we have not observed a new master pod when re-creating former replicas
 | 		// 1. we have not observed a new master pod when re-creating former replicas
 | ||||||
| 		// 2. we know possible switchover targets even when no replicas were recreated
 | 		// 2. we know possible switchover targets even when no replicas were recreated
 | ||||||
| 		if newMasterPod == nil && len(replicas) > 0 { | 		if newMasterPod == nil && len(replicas) > 0 { | ||||||
| 			if err := c.Switchover(masterPod, masterCandidate(replicas)); err != nil { | 			masterCandidate, err := c.getSwitchoverCandidate(masterPod) | ||||||
| 				c.logger.Warningf("could not perform switch over: %v", err) | 			if err != nil { | ||||||
|  | 				// do not recreate master now so it will keep the update flag and switchover will be retried on next sync
 | ||||||
|  | 				return fmt.Errorf("skipping switchover: %v", err) | ||||||
|  | 			} | ||||||
|  | 			if err := c.Switchover(masterPod, masterCandidate); err != nil { | ||||||
|  | 				return fmt.Errorf("could not perform switch over: %v", err) | ||||||
| 			} | 			} | ||||||
| 		} else if newMasterPod == nil && len(replicas) == 0 { | 		} else if newMasterPod == nil && len(replicas) == 0 { | ||||||
| 			c.logger.Warningf("cannot perform switch over before re-creating the pod: no replicas") | 			c.logger.Warningf("cannot perform switch over before re-creating the pod: no replicas") | ||||||
|  | @ -475,6 +481,50 @@ func (c *Cluster) recreatePods(pods []v1.Pod, switchoverCandidates []spec.Namesp | ||||||
| 	return nil | 	return nil | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | func (c *Cluster) getSwitchoverCandidate(master *v1.Pod) (spec.NamespacedName, error) { | ||||||
|  | 
 | ||||||
|  | 	var members []patroni.ClusterMember | ||||||
|  | 	candidates := make([]patroni.ClusterMember, 0) | ||||||
|  | 	syncCandidates := make([]patroni.ClusterMember, 0) | ||||||
|  | 
 | ||||||
|  | 	err := retryutil.Retry(1*time.Second, 5*time.Second, | ||||||
|  | 		func() (bool, error) { | ||||||
|  | 			var err error | ||||||
|  | 			members, err = c.patroni.GetClusterMembers(master) | ||||||
|  | 
 | ||||||
|  | 			if err != nil { | ||||||
|  | 				return false, err | ||||||
|  | 			} | ||||||
|  | 			return true, nil | ||||||
|  | 		}, | ||||||
|  | 	) | ||||||
|  | 	if err != nil { | ||||||
|  | 		return spec.NamespacedName{}, fmt.Errorf("failed to get Patroni cluster members: %s", err) | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	for _, member := range members { | ||||||
|  | 		if PostgresRole(member.Role) != Leader && PostgresRole(member.Role) != StandbyLeader && member.State == "running" { | ||||||
|  | 			candidates = append(candidates, member) | ||||||
|  | 			if PostgresRole(member.Role) == SyncStandby { | ||||||
|  | 				syncCandidates = append(syncCandidates, member) | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	// pick candidate with lowest lag
 | ||||||
|  | 	// if sync_standby replicas were found assume synchronous_mode is enabled and ignore other candidates list
 | ||||||
|  | 	if len(syncCandidates) > 0 { | ||||||
|  | 		sort.Slice(syncCandidates, func(i, j int) bool { return syncCandidates[i].LagInMb < syncCandidates[j].LagInMb }) | ||||||
|  | 		return spec.NamespacedName{Namespace: master.Namespace, Name: syncCandidates[0].Name}, nil | ||||||
|  | 	} | ||||||
|  | 	if len(candidates) > 0 { | ||||||
|  | 		sort.Slice(candidates, func(i, j int) bool { return candidates[i].LagInMb < candidates[j].LagInMb }) | ||||||
|  | 		return spec.NamespacedName{Namespace: master.Namespace, Name: candidates[0].Name}, nil | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return spec.NamespacedName{}, fmt.Errorf("no switchover candidate found") | ||||||
|  | } | ||||||
|  | 
 | ||||||
| func (c *Cluster) podIsEndOfLife(pod *v1.Pod) (bool, error) { | func (c *Cluster) podIsEndOfLife(pod *v1.Pod) (bool, error) { | ||||||
| 	node, err := c.KubeClient.Nodes().Get(context.TODO(), pod.Spec.NodeName, metav1.GetOptions{}) | 	node, err := c.KubeClient.Nodes().Get(context.TODO(), pod.Spec.NodeName, metav1.GetOptions{}) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
|  |  | ||||||
|  | @ -0,0 +1,86 @@ | ||||||
|  | package cluster | ||||||
|  | 
 | ||||||
|  | import ( | ||||||
|  | 	"bytes" | ||||||
|  | 	"fmt" | ||||||
|  | 	"io/ioutil" | ||||||
|  | 	"net/http" | ||||||
|  | 	"testing" | ||||||
|  | 
 | ||||||
|  | 	"github.com/golang/mock/gomock" | ||||||
|  | 	"github.com/zalando/postgres-operator/mocks" | ||||||
|  | 	acidv1 "github.com/zalando/postgres-operator/pkg/apis/acid.zalan.do/v1" | ||||||
|  | 	"github.com/zalando/postgres-operator/pkg/spec" | ||||||
|  | 	"github.com/zalando/postgres-operator/pkg/util/k8sutil" | ||||||
|  | 	"github.com/zalando/postgres-operator/pkg/util/patroni" | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | func TestGetSwitchoverCandidate(t *testing.T) { | ||||||
|  | 	testName := "test getting right switchover candidate" | ||||||
|  | 	namespace := "default" | ||||||
|  | 
 | ||||||
|  | 	ctrl := gomock.NewController(t) | ||||||
|  | 	defer ctrl.Finish() | ||||||
|  | 
 | ||||||
|  | 	var cluster = New(Config{}, k8sutil.KubernetesClient{}, acidv1.Postgresql{}, logger, eventRecorder) | ||||||
|  | 
 | ||||||
|  | 	// simulate different member scenarios
 | ||||||
|  | 	tests := []struct { | ||||||
|  | 		subtest           string | ||||||
|  | 		clusterJson       string | ||||||
|  | 		expectedCandidate spec.NamespacedName | ||||||
|  | 		expectedError     error | ||||||
|  | 	}{ | ||||||
|  | 		{ | ||||||
|  | 			subtest:           "choose sync_standby over replica", | ||||||
|  | 			clusterJson:       `{"members": [{"name": "acid-test-cluster-0", "role": "leader", "state": "running", "api_url": "http://192.168.100.1:8008/patroni", "host": "192.168.100.1", "port": 5432, "timeline": 1}, {"name": "acid-test-cluster-1", "role": "sync_standby", "state": "running", "api_url": "http://192.168.100.2:8008/patroni", "host": "192.168.100.2", "port": 5432, "timeline": 1, "lag": 0}, {"name": "acid-test-cluster-2", "role": "replica", "state": "running", "api_url": "http://192.168.100.3:8008/patroni", "host": "192.168.100.3", "port": 5432, "timeline": 1, "lag": 0}]}`, | ||||||
|  | 			expectedCandidate: spec.NamespacedName{Namespace: namespace, Name: "acid-test-cluster-1"}, | ||||||
|  | 			expectedError:     nil, | ||||||
|  | 		}, | ||||||
|  | 		{ | ||||||
|  | 			subtest:           "choose replica with lowest lag", | ||||||
|  | 			clusterJson:       `{"members": [{"name": "acid-test-cluster-0", "role": "leader", "state": "running", "api_url": "http://192.168.100.1:8008/patroni", "host": "192.168.100.1", "port": 5432, "timeline": 1}, {"name": "acid-test-cluster-1", "role": "replica", "state": "running", "api_url": "http://192.168.100.2:8008/patroni", "host": "192.168.100.2", "port": 5432, "timeline": 1, "lag": 5}, {"name": "acid-test-cluster-2", "role": "replica", "state": "running", "api_url": "http://192.168.100.3:8008/patroni", "host": "192.168.100.3", "port": 5432, "timeline": 1, "lag": 2}]}`, | ||||||
|  | 			expectedCandidate: spec.NamespacedName{Namespace: namespace, Name: "acid-test-cluster-2"}, | ||||||
|  | 			expectedError:     nil, | ||||||
|  | 		}, | ||||||
|  | 		{ | ||||||
|  | 			subtest:           "choose first replica when lag is equal evrywhere", | ||||||
|  | 			clusterJson:       `{"members": [{"name": "acid-test-cluster-0", "role": "leader", "state": "running", "api_url": "http://192.168.100.1:8008/patroni", "host": "192.168.100.1", "port": 5432, "timeline": 1}, {"name": "acid-test-cluster-1", "role": "replica", "state": "running", "api_url": "http://192.168.100.2:8008/patroni", "host": "192.168.100.2", "port": 5432, "timeline": 1, "lag": 5}, {"name": "acid-test-cluster-2", "role": "replica", "state": "running", "api_url": "http://192.168.100.3:8008/patroni", "host": "192.168.100.3", "port": 5432, "timeline": 1, "lag": 5}]}`, | ||||||
|  | 			expectedCandidate: spec.NamespacedName{Namespace: namespace, Name: "acid-test-cluster-1"}, | ||||||
|  | 			expectedError:     nil, | ||||||
|  | 		}, | ||||||
|  | 		{ | ||||||
|  | 			subtest:           "no running replica available", | ||||||
|  | 			clusterJson:       `{"members": [{"name": "acid-test-cluster-0", "role": "leader", "state": "running", "api_url": "http://192.168.100.1:8008/patroni", "host": "192.168.100.1", "port": 5432, "timeline": 2}, {"name": "acid-test-cluster-1", "role": "replica", "state": "starting", "api_url": "http://192.168.100.2:8008/patroni", "host": "192.168.100.2", "port": 5432, "timeline": 2}]}`, | ||||||
|  | 			expectedCandidate: spec.NamespacedName{}, | ||||||
|  | 			expectedError:     fmt.Errorf("no switchover candidate found"), | ||||||
|  | 		}, | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	for _, tt := range tests { | ||||||
|  | 		// mocking cluster members
 | ||||||
|  | 		r := ioutil.NopCloser(bytes.NewReader([]byte(tt.clusterJson))) | ||||||
|  | 
 | ||||||
|  | 		response := http.Response{ | ||||||
|  | 			StatusCode: 200, | ||||||
|  | 			Body:       r, | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		mockClient := mocks.NewMockHTTPClient(ctrl) | ||||||
|  | 		mockClient.EXPECT().Get(gomock.Any()).Return(&response, nil).AnyTimes() | ||||||
|  | 
 | ||||||
|  | 		p := patroni.New(patroniLogger, mockClient) | ||||||
|  | 		cluster.patroni = p | ||||||
|  | 		mockMasterPod := newMockPod("192.168.100.1") | ||||||
|  | 		mockMasterPod.Namespace = namespace | ||||||
|  | 
 | ||||||
|  | 		candidate, err := cluster.getSwitchoverCandidate(mockMasterPod) | ||||||
|  | 		if err != nil && err.Error() != tt.expectedError.Error() { | ||||||
|  | 			t.Errorf("%s - %s: unexpected error, %v", testName, tt.subtest, err) | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		if candidate != tt.expectedCandidate { | ||||||
|  | 			t.Errorf("%s - %s: unexpect switchover candidate, got %s, expected %s", testName, tt.subtest, candidate, tt.expectedCandidate) | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | @ -14,11 +14,14 @@ import ( | ||||||
| type PostgresRole string | type PostgresRole string | ||||||
| 
 | 
 | ||||||
| const ( | const ( | ||||||
| 	// Master role
 | 	// spilo roles
 | ||||||
| 	Master PostgresRole = "master" | 	Master  PostgresRole = "master" | ||||||
| 
 |  | ||||||
| 	// Replica role
 |  | ||||||
| 	Replica PostgresRole = "replica" | 	Replica PostgresRole = "replica" | ||||||
|  | 
 | ||||||
|  | 	// roles returned by Patroni cluster endpoint
 | ||||||
|  | 	Leader        PostgresRole = "leader" | ||||||
|  | 	StandbyLeader PostgresRole = "standby_leader" | ||||||
|  | 	SyncStandby   PostgresRole = "sync_standby" | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| // PodEventType represents the type of a pod-related event
 | // PodEventType represents the type of a pod-related event
 | ||||||
|  |  | ||||||
|  | @ -6,7 +6,6 @@ import ( | ||||||
| 	"encoding/gob" | 	"encoding/gob" | ||||||
| 	"encoding/json" | 	"encoding/json" | ||||||
| 	"fmt" | 	"fmt" | ||||||
| 	"math/rand" |  | ||||||
| 	"reflect" | 	"reflect" | ||||||
| 	"sort" | 	"sort" | ||||||
| 	"strings" | 	"strings" | ||||||
|  | @ -525,10 +524,6 @@ func (c *Cluster) credentialSecretNameForCluster(username string, clusterName st | ||||||
| 		"tprgroup", acidzalando.GroupName) | 		"tprgroup", acidzalando.GroupName) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| func masterCandidate(replicas []spec.NamespacedName) spec.NamespacedName { |  | ||||||
| 	return replicas[rand.Intn(len(replicas))] |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| func cloneSpec(from *acidv1.Postgresql) (*acidv1.Postgresql, error) { | func cloneSpec(from *acidv1.Postgresql) (*acidv1.Postgresql, error) { | ||||||
| 	var ( | 	var ( | ||||||
| 		buf    bytes.Buffer | 		buf    bytes.Buffer | ||||||
|  |  | ||||||
|  | @ -21,6 +21,7 @@ import ( | ||||||
| const ( | const ( | ||||||
| 	failoverPath = "/failover" | 	failoverPath = "/failover" | ||||||
| 	configPath   = "/config" | 	configPath   = "/config" | ||||||
|  | 	clusterPath  = "/cluster" | ||||||
| 	statusPath   = "/patroni" | 	statusPath   = "/patroni" | ||||||
| 	restartPath  = "/restart" | 	restartPath  = "/restart" | ||||||
| 	apiPort      = 8008 | 	apiPort      = 8008 | ||||||
|  | @ -29,6 +30,7 @@ const ( | ||||||
| 
 | 
 | ||||||
| // Interface describe patroni methods
 | // Interface describe patroni methods
 | ||||||
| type Interface interface { | type Interface interface { | ||||||
|  | 	GetClusterMembers(master *v1.Pod) ([]ClusterMember, error) | ||||||
| 	Switchover(master *v1.Pod, candidate string) error | 	Switchover(master *v1.Pod, candidate string) error | ||||||
| 	SetPostgresParameters(server *v1.Pod, options map[string]string) error | 	SetPostgresParameters(server *v1.Pod, options map[string]string) error | ||||||
| 	GetMemberData(server *v1.Pod) (MemberData, error) | 	GetMemberData(server *v1.Pod) (MemberData, error) | ||||||
|  | @ -175,6 +177,20 @@ func (p *Patroni) SetConfig(server *v1.Pod, config map[string]interface{}) error | ||||||
| 	return p.httpPostOrPatch(http.MethodPatch, apiURLString+configPath, buf) | 	return p.httpPostOrPatch(http.MethodPatch, apiURLString+configPath, buf) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | // ClusterMembers array of cluster members from Patroni API
 | ||||||
|  | type ClusterMembers struct { | ||||||
|  | 	Members []ClusterMember `json:"members"` | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // ClusterMember cluster member data from Patroni API
 | ||||||
|  | type ClusterMember struct { | ||||||
|  | 	Name     string `json:"name"` | ||||||
|  | 	Role     string `json:"role"` | ||||||
|  | 	State    string `json:"state"` | ||||||
|  | 	Timeline int    `json:"timeline"` | ||||||
|  | 	LagInMb  int    `json:"lag"` | ||||||
|  | } | ||||||
|  | 
 | ||||||
| // MemberDataPatroni child element
 | // MemberDataPatroni child element
 | ||||||
| type MemberDataPatroni struct { | type MemberDataPatroni struct { | ||||||
| 	Version string `json:"version"` | 	Version string `json:"version"` | ||||||
|  | @ -246,6 +262,27 @@ func (p *Patroni) Restart(server *v1.Pod) error { | ||||||
| 	return nil | 	return nil | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | // GetClusterMembers read cluster data from patroni API
 | ||||||
|  | func (p *Patroni) GetClusterMembers(server *v1.Pod) ([]ClusterMember, error) { | ||||||
|  | 
 | ||||||
|  | 	apiURLString, err := apiURL(server) | ||||||
|  | 	if err != nil { | ||||||
|  | 		return []ClusterMember{}, err | ||||||
|  | 	} | ||||||
|  | 	body, err := p.httpGet(apiURLString + clusterPath) | ||||||
|  | 	if err != nil { | ||||||
|  | 		return []ClusterMember{}, err | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	data := ClusterMembers{} | ||||||
|  | 	err = json.Unmarshal([]byte(body), &data) | ||||||
|  | 	if err != nil { | ||||||
|  | 		return []ClusterMember{}, err | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return data.Members, nil | ||||||
|  | } | ||||||
|  | 
 | ||||||
| // GetMemberData read member data from patroni API
 | // GetMemberData read member data from patroni API
 | ||||||
| func (p *Patroni) GetMemberData(server *v1.Pod) (MemberData, error) { | func (p *Patroni) GetMemberData(server *v1.Pod) (MemberData, error) { | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -85,6 +85,55 @@ func TestApiURL(t *testing.T) { | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | func TestGetClusterMembers(t *testing.T) { | ||||||
|  | 	ctrl := gomock.NewController(t) | ||||||
|  | 	defer ctrl.Finish() | ||||||
|  | 
 | ||||||
|  | 	expectedClusterMemberData := []ClusterMember{ | ||||||
|  | 		{ | ||||||
|  | 			Name:     "acid-test-cluster-0", | ||||||
|  | 			Role:     "leader", | ||||||
|  | 			State:    "running", | ||||||
|  | 			Timeline: 1, | ||||||
|  | 			LagInMb:  0, | ||||||
|  | 		}, { | ||||||
|  | 			Name:     "acid-test-cluster-1", | ||||||
|  | 			Role:     "sync_standby", | ||||||
|  | 			State:    "running", | ||||||
|  | 			Timeline: 1, | ||||||
|  | 			LagInMb:  0, | ||||||
|  | 		}, { | ||||||
|  | 			Name:     "acid-test-cluster-2", | ||||||
|  | 			Role:     "replica", | ||||||
|  | 			State:    "running", | ||||||
|  | 			Timeline: 1, | ||||||
|  | 			LagInMb:  0, | ||||||
|  | 		}} | ||||||
|  | 
 | ||||||
|  | 	json := `{"members": [{"name": "acid-test-cluster-0", "role": "leader", "state": "running", "api_url": "http://192.168.100.1:8008/patroni", "host": "192.168.100.1", "port": 5432, "timeline": 1}, {"name": "acid-test-cluster-1", "role": "sync_standby", "state": "running", "api_url": "http://192.168.100.2:8008/patroni", "host": "192.168.100.2", "port": 5432, "timeline": 1, "lag": 0}, {"name": "acid-test-cluster-2", "role": "replica", "state": "running", "api_url": "http://192.168.100.3:8008/patroni", "host": "192.168.100.3", "port": 5432, "timeline": 1, "lag": 0}]}` | ||||||
|  | 	r := ioutil.NopCloser(bytes.NewReader([]byte(json))) | ||||||
|  | 
 | ||||||
|  | 	response := http.Response{ | ||||||
|  | 		StatusCode: 200, | ||||||
|  | 		Body:       r, | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	mockClient := mocks.NewMockHTTPClient(ctrl) | ||||||
|  | 	mockClient.EXPECT().Get(gomock.Any()).Return(&response, nil) | ||||||
|  | 
 | ||||||
|  | 	p := New(logger, mockClient) | ||||||
|  | 
 | ||||||
|  | 	clusterMemberData, err := p.GetClusterMembers(newMockPod("192.168.100.1")) | ||||||
|  | 
 | ||||||
|  | 	if !reflect.DeepEqual(expectedClusterMemberData, clusterMemberData) { | ||||||
|  | 		t.Errorf("Patroni cluster members differ: expected: %#v, got: %#v", expectedClusterMemberData, clusterMemberData) | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if err != nil { | ||||||
|  | 		t.Errorf("Could not read Patroni data: %v", err) | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
| func TestGetMemberData(t *testing.T) { | func TestGetMemberData(t *testing.T) { | ||||||
| 	ctrl := gomock.NewController(t) | 	ctrl := gomock.NewController(t) | ||||||
| 	defer ctrl.Finish() | 	defer ctrl.Finish() | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue