removing inner goroutine in cluster.Switchover and resolve race between processPodEvent and unregisterPodSubscriber
This commit is contained in:
		
							parent
							
								
									a77d5df158
								
							
						
					
					
						commit
						3641911786
					
				|  | @ -1034,10 +1034,10 @@ func (c *Cluster) processPodEvent(obj interface{}) error { | ||||||
| 
 | 
 | ||||||
| 	c.podSubscribersMu.RLock() | 	c.podSubscribersMu.RLock() | ||||||
| 	subscriber, ok := c.podSubscribers[spec.NamespacedName(event.PodName)] | 	subscriber, ok := c.podSubscribers[spec.NamespacedName(event.PodName)] | ||||||
| 	c.podSubscribersMu.RUnlock() |  | ||||||
| 	if ok { | 	if ok { | ||||||
| 		subscriber <- event | 		subscriber <- event | ||||||
| 	} | 	} | ||||||
|  | 	c.podSubscribersMu.RUnlock() | ||||||
| 
 | 
 | ||||||
| 	return nil | 	return nil | ||||||
| } | } | ||||||
|  | @ -1501,34 +1501,16 @@ func (c *Cluster) Switchover(curMaster *v1.Pod, candidate spec.NamespacedName) e | ||||||
| 	var err error | 	var err error | ||||||
| 	c.logger.Debugf("switching over from %q to %q", curMaster.Name, candidate) | 	c.logger.Debugf("switching over from %q to %q", curMaster.Name, candidate) | ||||||
| 	c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeNormal, "Switchover", "Switching over from %q to %q", curMaster.Name, candidate) | 	c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeNormal, "Switchover", "Switching over from %q to %q", curMaster.Name, candidate) | ||||||
| 
 |  | ||||||
| 	var wg sync.WaitGroup |  | ||||||
| 
 |  | ||||||
| 	podLabelErr := make(chan error) |  | ||||||
| 	stopCh := make(chan struct{}) | 	stopCh := make(chan struct{}) | ||||||
| 
 | 	ch := c.registerPodSubscriber(candidate) | ||||||
| 	wg.Add(1) | 	defer c.unregisterPodSubscriber(candidate) | ||||||
| 
 | 	defer close(stopCh) | ||||||
| 	go func() { |  | ||||||
| 		defer wg.Done() |  | ||||||
| 		ch := c.registerPodSubscriber(candidate) |  | ||||||
| 		defer c.unregisterPodSubscriber(candidate) |  | ||||||
| 
 |  | ||||||
| 		role := Master |  | ||||||
| 
 |  | ||||||
| 		select { |  | ||||||
| 		case <-stopCh: |  | ||||||
| 		case podLabelErr <- func() (err2 error) { |  | ||||||
| 			_, err2 = c.waitForPodLabel(ch, stopCh, &role) |  | ||||||
| 			return |  | ||||||
| 		}(): |  | ||||||
| 		} |  | ||||||
| 	}() |  | ||||||
| 
 | 
 | ||||||
| 	if err = c.patroni.Switchover(curMaster, candidate.Name); err == nil { | 	if err = c.patroni.Switchover(curMaster, candidate.Name); err == nil { | ||||||
| 		c.logger.Debugf("successfully switched over from %q to %q", curMaster.Name, candidate) | 		c.logger.Debugf("successfully switched over from %q to %q", curMaster.Name, candidate) | ||||||
| 		c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeNormal, "Switchover", "Successfully switched over from %q to %q", curMaster.Name, candidate) | 		c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeNormal, "Switchover", "Successfully switched over from %q to %q", curMaster.Name, candidate) | ||||||
| 		if err = <-podLabelErr; err != nil { | 		_, err = c.waitForPodLabel(ch, stopCh, nil) | ||||||
|  | 		if err != nil { | ||||||
| 			err = fmt.Errorf("could not get master pod label: %v", err) | 			err = fmt.Errorf("could not get master pod label: %v", err) | ||||||
| 		} | 		} | ||||||
| 	} else { | 	} else { | ||||||
|  | @ -1536,14 +1518,6 @@ func (c *Cluster) Switchover(curMaster *v1.Pod, candidate spec.NamespacedName) e | ||||||
| 		c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeNormal, "Switchover", "Switchover from %q to %q FAILED: %v", curMaster.Name, candidate, err) | 		c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeNormal, "Switchover", "Switchover from %q to %q FAILED: %v", curMaster.Name, candidate, err) | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	// signal the role label waiting goroutine to close the shop and go home
 |  | ||||||
| 	close(stopCh) |  | ||||||
| 	// wait until the goroutine terminates, since unregisterPodSubscriber
 |  | ||||||
| 	// must be called before the outer return; otherwise we risk subscribing to the same pod twice.
 |  | ||||||
| 	wg.Wait() |  | ||||||
| 	// close the label waiting channel no sooner than the waiting goroutine terminates.
 |  | ||||||
| 	close(podLabelErr) |  | ||||||
| 
 |  | ||||||
| 	return err | 	return err | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -67,7 +67,7 @@ func (c *Cluster) markRollingUpdateFlagForPod(pod *v1.Pod, msg string) error { | ||||||
| 		return fmt.Errorf("could not form patch for pod's rolling update flag: %v", err) | 		return fmt.Errorf("could not form patch for pod's rolling update flag: %v", err) | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	err = retryutil.Retry(c.OpConfig.PatroniAPICheckInterval, c.OpConfig.PatroniAPICheckTimeout, | 	err = retryutil.Retry(c.OpConfig.ResourceCheckInterval, c.OpConfig.ResourceCheckTimeout, | ||||||
| 		func() (bool, error) { | 		func() (bool, error) { | ||||||
| 			_, err2 := c.KubeClient.Pods(pod.Namespace).Patch( | 			_, err2 := c.KubeClient.Pods(pod.Namespace).Patch( | ||||||
| 				context.TODO(), | 				context.TODO(), | ||||||
|  | @ -149,14 +149,14 @@ func (c *Cluster) deletePod(podName spec.NamespacedName) error { | ||||||
| func (c *Cluster) unregisterPodSubscriber(podName spec.NamespacedName) { | func (c *Cluster) unregisterPodSubscriber(podName spec.NamespacedName) { | ||||||
| 	c.logger.Debugf("unsubscribing from pod %q events", podName) | 	c.logger.Debugf("unsubscribing from pod %q events", podName) | ||||||
| 	c.podSubscribersMu.Lock() | 	c.podSubscribersMu.Lock() | ||||||
| 	defer c.podSubscribersMu.Unlock() | 	ch, ok := c.podSubscribers[podName] | ||||||
| 
 | 	if !ok { | ||||||
| 	if _, ok := c.podSubscribers[podName]; !ok { |  | ||||||
| 		panic("subscriber for pod '" + podName.String() + "' is not found") | 		panic("subscriber for pod '" + podName.String() + "' is not found") | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	close(c.podSubscribers[podName]) |  | ||||||
| 	delete(c.podSubscribers, podName) | 	delete(c.podSubscribers, podName) | ||||||
|  | 	c.podSubscribersMu.Unlock() | ||||||
|  | 	close(ch) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| func (c *Cluster) registerPodSubscriber(podName spec.NamespacedName) chan PodEvent { | func (c *Cluster) registerPodSubscriber(podName spec.NamespacedName) chan PodEvent { | ||||||
|  | @ -399,11 +399,12 @@ func (c *Cluster) getPatroniMemberData(pod *v1.Pod) (patroni.MemberData, error) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| func (c *Cluster) recreatePod(podName spec.NamespacedName) (*v1.Pod, error) { | func (c *Cluster) recreatePod(podName spec.NamespacedName) (*v1.Pod, error) { | ||||||
|  | 	stopCh := make(chan struct{}) | ||||||
| 	ch := c.registerPodSubscriber(podName) | 	ch := c.registerPodSubscriber(podName) | ||||||
| 	defer c.unregisterPodSubscriber(podName) | 	defer c.unregisterPodSubscriber(podName) | ||||||
| 	stopChan := make(chan struct{}) | 	defer close(stopCh) | ||||||
| 
 | 
 | ||||||
| 	err := retryutil.Retry(c.OpConfig.PatroniAPICheckInterval, c.OpConfig.PatroniAPICheckTimeout, | 	err := retryutil.Retry(c.OpConfig.ResourceCheckInterval, c.OpConfig.PodDeletionWaitTimeout, | ||||||
| 		func() (bool, error) { | 		func() (bool, error) { | ||||||
| 			err2 := c.KubeClient.Pods(podName.Namespace).Delete( | 			err2 := c.KubeClient.Pods(podName.Namespace).Delete( | ||||||
| 				context.TODO(), | 				context.TODO(), | ||||||
|  | @ -421,7 +422,7 @@ func (c *Cluster) recreatePod(podName spec.NamespacedName) (*v1.Pod, error) { | ||||||
| 	if err := c.waitForPodDeletion(ch); err != nil { | 	if err := c.waitForPodDeletion(ch); err != nil { | ||||||
| 		return nil, err | 		return nil, err | ||||||
| 	} | 	} | ||||||
| 	pod, err := c.waitForPodLabel(ch, stopChan, nil) | 	pod, err := c.waitForPodLabel(ch, stopCh, nil) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		return nil, err | 		return nil, err | ||||||
| 	} | 	} | ||||||
|  | @ -446,7 +447,7 @@ func (c *Cluster) recreatePods(pods []v1.Pod, switchoverCandidates []spec.Namesp | ||||||
| 			continue | 			continue | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
| 		podName := util.NameFromMeta(pod.ObjectMeta) | 		podName := util.NameFromMeta(pods[i].ObjectMeta) | ||||||
| 		newPod, err := c.recreatePod(podName) | 		newPod, err := c.recreatePod(podName) | ||||||
| 		if err != nil { | 		if err != nil { | ||||||
| 			return fmt.Errorf("could not recreate replica pod %q: %v", util.NameFromMeta(pod.ObjectMeta), err) | 			return fmt.Errorf("could not recreate replica pod %q: %v", util.NameFromMeta(pod.ObjectMeta), err) | ||||||
|  |  | ||||||
|  | @ -451,17 +451,18 @@ func (c *Controller) Run(stopCh <-chan struct{}, wg *sync.WaitGroup) { | ||||||
| 		panic("could not acquire initial list of clusters") | 		panic("could not acquire initial list of clusters") | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	wg.Add(5) | 	wg.Add(5 + util.Bool2Int(c.opConfig.EnablePostgresTeamCRD)) | ||||||
| 	go c.runPodInformer(stopCh, wg) | 	go c.runPodInformer(stopCh, wg) | ||||||
| 	go c.runPostgresqlInformer(stopCh, wg) | 	go c.runPostgresqlInformer(stopCh, wg) | ||||||
| 	go c.clusterResync(stopCh, wg) | 	go c.clusterResync(stopCh, wg) | ||||||
| 	go c.apiserver.Run(stopCh, wg) |  | ||||||
| 	go c.kubeNodesInformer(stopCh, wg) | 	go c.kubeNodesInformer(stopCh, wg) | ||||||
| 
 | 
 | ||||||
| 	if c.opConfig.EnablePostgresTeamCRD { | 	if c.opConfig.EnablePostgresTeamCRD { | ||||||
| 		go c.runPostgresTeamInformer(stopCh, wg) | 		go c.runPostgresTeamInformer(stopCh, wg) | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  | 	go c.apiserver.Run(stopCh, wg) | ||||||
|  | 
 | ||||||
| 	c.logger.Info("started working in background") | 	c.logger.Info("started working in background") | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -324,12 +324,20 @@ func testNil(values ...*int32) bool { | ||||||
| 	return false | 	return false | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| // Convert int to IntOrString type
 | // ToIntStr convert int to IntOrString type
 | ||||||
| func ToIntStr(val int) *intstr.IntOrString { | func ToIntStr(val int) *intstr.IntOrString { | ||||||
| 	b := intstr.FromInt(val) | 	b := intstr.FromInt(val) | ||||||
| 	return &b | 	return &b | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | // Bool2Int converts bool to int
 | ||||||
|  | func Bool2Int(flag bool) int { | ||||||
|  | 	if flag { | ||||||
|  | 		return 1 | ||||||
|  | 	} | ||||||
|  | 	return 0 | ||||||
|  | } | ||||||
|  | 
 | ||||||
| // Get int from IntOrString and return max int if string
 | // Get int from IntOrString and return max int if string
 | ||||||
| func IntFromIntStr(intOrStr intstr.IntOrString) int { | func IntFromIntStr(intOrStr intstr.IntOrString) int { | ||||||
| 	if intOrStr.Type == 1 { | 	if intOrStr.Type == 1 { | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue