From b75c61b2e60ca35a7c23548d856e7dcce1ac5c48 Mon Sep 17 00:00:00 2001 From: yxxhero Date: Tue, 3 Mar 2026 08:08:42 +0800 Subject: [PATCH] fix: configure kubedog rate limiter to prevent context cancellation Fixes #2445 The default Kubernetes client rate limiter settings were too restrictive, causing context cancellation errors when kubedog's reflector infrastructure tried to watch multiple resources simultaneously. When the deployment becomes ready before the rate limiter releases the request, the context gets canceled. This fix: - Increases default QPS from 5 to 100 and Burst from 10 to 200 - Makes QPS and Burst configurable per release via kubedogQPS and kubedogBurst - Uses direct client-go configuration instead of kubedog's kube.Init - Adds comprehensive documentation and examples Users can now tune these settings based on their cluster size and requirements: - Small clusters: QPS=50, Burst=100 - Medium clusters: QPS=100, Burst=200 (default) - Large clusters: QPS=200, Burst=400 Signed-off-by: yxxhero --- KUBEDOG_CONFIG.md | 208 ++++++++++++++++++++++++++++ examples/KUBEDOG_CONFIG_EXAMPLES.md | 186 +++++++++++++++++++++++++ pkg/kubedog/options.go | 14 ++ pkg/kubedog/tracker.go | 61 ++++++-- pkg/kubedog/tracker_test.go | 42 ++++++ pkg/state/helmx.go | 2 + pkg/state/state.go | 4 + 7 files changed, 502 insertions(+), 15 deletions(-) create mode 100644 KUBEDOG_CONFIG.md create mode 100644 examples/KUBEDOG_CONFIG_EXAMPLES.md diff --git a/KUBEDOG_CONFIG.md b/KUBEDOG_CONFIG.md new file mode 100644 index 00000000..fd87d117 --- /dev/null +++ b/KUBEDOG_CONFIG.md @@ -0,0 +1,208 @@ +# Kubedog Configuration + +This document describes how to configure kubedog resource tracking in Helmfile. + +## Overview + +Kubedog is a library for tracking Kubernetes resources during deployments. Helmfile uses kubedog when `trackMode: kubedog` is set to monitor the rollout of resources like Deployments, StatefulSets, DaemonSets, and Jobs. + +## Configuration Options + +### Release-level Configuration + +You can configure kubedog settings per release: + +```yaml +releases: + - name: my-app + namespace: default + chart: my-chart + trackMode: kubedog + kubedogQPS: 100 # Queries per second (default: 100) + kubedogBurst: 200 # Burst capacity (default: 200) + trackLogs: true + trackKinds: + - Deployment +``` + +### Global Default Configuration + +You can also set defaults in `helmDefaults`: + +```yaml +helmDefaults: + trackMode: kubedog + # Note: QPS and Burst can only be configured at release level +``` + +## Parameters + +### kubedogQPS + +- **Type**: `float32` +- **Default**: `100` +- **Description**: Sets the maximum number of queries per second to the Kubernetes API server from the kubedog client. This controls the rate of API requests when tracking resources. + +**When to increase**: +- Large clusters with many resources +- When tracking multiple releases simultaneously +- When you see rate limiting errors like "client rate limiter Wait returned an error: context canceled" + +**When to decrease**: +- Small clusters or development environments +- When you want to reduce load on the API server + +### kubedogBurst + +- **Type**: `int` +- **Default**: `200` +- **Description**: Sets the maximum burst of requests that can be made to the Kubernetes API server. This allows temporary spikes above the QPS limit. + +**When to increase**: +- When tracking releases with many resources +- When you see connection timeout errors +- In production environments with high throughput needs + +**When to decrease**: +- In resource-constrained environments +- When API server is under heavy load + +## Tuning Guidelines + +### For Small Clusters (< 50 resources) + +```yaml +releases: + - name: my-app + trackMode: kubedog + kubedogQPS: 50 + kubedogBurst: 100 +``` + +### For Medium Clusters (50-200 resources) + +```yaml +releases: + - name: my-app + trackMode: kubedog + kubedogQPS: 100 # default + kubedogBurst: 200 # default +``` + +### For Large Clusters (> 200 resources) + +```yaml +releases: + - name: my-app + trackMode: kubedog + kubedogQPS: 200 + kubedogBurst: 400 +``` + +### For Multiple Concurrent Releases + +When using `--concurrent` flag with multiple releases that use kubedog tracking: + +```yaml +releases: + - name: app1 + trackMode: kubedog + kubedogQPS: 50 + kubedogBurst: 100 + + - name: app2 + trackMode: kubedog + kubedogQPS: 50 + kubedogBurst: 100 +``` + +## Troubleshooting + +### Rate Limiting Errors + +**Error**: +``` +E0302 19:38:41.812322 91 reflector.go:204] "Failed to watch" err="client rate limiter Wait returned an error: context canceled" +``` + +**Solution**: Increase `kubedogQPS` and `kubedogBurst` values. + +### Connection Timeouts + +**Error**: +``` +context canceled while waiting for API server response +``` + +**Solution**: +1. Check network connectivity to the API server +2. Increase `kubedogBurst` to allow more concurrent requests +3. Decrease number of concurrent releases if using `--concurrent` flag + +### Slow Tracking + +**Symptom**: Resource tracking takes a long time to complete. + +**Solution**: +1. Use `trackKinds` to limit which resource types are tracked +2. Use `skipKinds` to exclude unnecessary resource types +3. Increase `kubedogQPS` to speed up API queries + +## Related Configuration + +### trackTimeout + +Sets the timeout for kubedog tracking (in seconds): + +```yaml +releases: + - name: my-app + trackMode: kubedog + trackTimeout: 600 # 10 minutes +``` + +### trackLogs + +Enable/disable log streaming from tracked resources: + +```yaml +releases: + - name: my-app + trackMode: kubedog + trackLogs: true # Show pod logs during tracking +``` + +### trackKinds / skipKinds + +Control which resource types to track: + +```yaml +releases: + - name: my-app + trackMode: kubedog + trackKinds: + - Deployment + - StatefulSet + skipKinds: + - ConfigMap + - Secret +``` + +## Implementation Details + +The kubedog client configuration uses: +- `k8s.io/client-go` for Kubernetes API communication +- Custom rate limiting via `rest.Config.QPS` and `rest.Config.Burst` +- Separate client cache per unique (kubeContext, kubeconfig, QPS, Burst) combination + +The default values (QPS=100, Burst=200) were chosen to: +- Prevent rate limiting errors in most common scenarios +- Support tracking of multiple resource types simultaneously +- Allow reasonable burst capacity for initial resource discovery +- Balance between tracking speed and API server load + +## See Also + +- [Issue #2445](https://github.com/helmfile/helmfile/issues/2445) - Original issue that led to configurable QPS/Burst +- [Kubedog Documentation](https://github.com/werf/kubedog) +- [Kubernetes Client Go Rate Limiting](https://kubernetes.io/docs/reference/config-api/apiserver-eventratelimit.v1alpha1/) diff --git a/examples/KUBEDOG_CONFIG_EXAMPLES.md b/examples/KUBEDOG_CONFIG_EXAMPLES.md new file mode 100644 index 00000000..57b76b95 --- /dev/null +++ b/examples/KUBEDOG_CONFIG_EXAMPLES.md @@ -0,0 +1,186 @@ +# Example: Kubedog Resource Tracking Configuration + +This example demonstrates various ways to configure kubedog resource tracking. + +## Basic Example + +```yaml +releases: + - name: simple-app + namespace: default + chart: ./charts/simple-app + trackMode: kubedog +``` + +Uses default QPS (100) and Burst (200). + +## Customized Rate Limiting + +```yaml +releases: + - name: high-throughput-app + namespace: production + chart: ./charts/app + trackMode: kubedog + # Increased limits for large-scale deployments + kubedogQPS: 200 + kubedogBurst: 400 + trackTimeout: 600 + trackLogs: true + trackKinds: + - Deployment + - StatefulSet +``` + +## Multiple Releases with Different Settings + +```yaml +releases: + # Small app - conservative limits + - name: frontend + namespace: web + chart: ./charts/frontend + trackMode: kubedog + kubedogQPS: 50 + kubedogBurst: 100 + + # Medium app - default limits + - name: backend + namespace: api + chart: ./charts/backend + trackMode: kubedog + + # Large app - increased limits + - name: data-processor + namespace: data + chart: ./charts/processor + trackMode: kubedog + kubedogQPS: 150 + kubedogBurst: 300 + trackKinds: + - Deployment + - StatefulSet + - Job +``` + +## Environment-Specific Configuration + +```yaml +environments: + development: + values: + - kubedogQPS: 50 + - kubedogBurst: 100 + staging: + values: + - kubedogQPS: 100 + - kubedogBurst: 200 + production: + values: + - kubedogQPS: 200 + - kubedogBurst: 400 + +releases: + - name: myapp + namespace: {{ .Environment.Name }} + chart: ./charts/myapp + trackMode: kubedog + kubedogQPS: {{ .Values.kubedogQPS }} + kubedogBurst: {{ .Values.kubedogBurst }} +``` + +## With Global Defaults + +```yaml +helmDefaults: + createNamespace: true + timeout: 300 + +releases: + - name: app1 + namespace: default + chart: ./charts/app + trackMode: kubedog + # Uses release-specific settings + kubedogQPS: 150 + kubedogBurst: 300 + + - name: app2 + namespace: default + chart: ./charts/app + trackMode: kubedog + # Uses default QPS=100, Burst=200 +``` + +## Selective Tracking + +```yaml +releases: + - name: complex-app + namespace: default + chart: ./charts/complex-app + trackMode: kubedog + kubedogQPS: 120 + kubedogBurst: 250 + # Only track deployments and jobs + trackKinds: + - Deployment + - Job + # Skip these resource types + skipKinds: + - ConfigMap + - Secret + - Ingress + # Track specific resources only + trackResources: + - kind: Deployment + name: main-app + - kind: Job + name: migration-job + namespace: default +``` + +## Testing the Configuration + +To test your kubedog configuration: + +```bash +# Apply with kubedog tracking +helmfile apply -n my-namespace -l app=myapp + +# With debug logging +helmfile apply -n my-namespace -l app=myapp --log-level debug + +# With specific environment +helmfile apply -e production -l app=myapp +``` + +## Expected Output + +When kubedog tracking is working correctly, you should see: + +``` +Tracking 5 resources from release myapp with kubedog +Tracking 5 resources with kubedog (filtered from 5 total) +┌ Status progress +│ DEPLOYMENT REPLICAS AVAILABLE UP-TO-DATE +│ myapp-main 1/1 1 1 +└ Status progress +All resources tracked successfully +UPDATED RELEASES: +NAME NAMESPACE CHART VERSION DURATION +myapp default ./charts/app 1.0.0 1m32s +``` + +## Troubleshooting Commands + +```bash +# Check current kubedog settings +helmfile build -n my-namespace -l app=myapp | grep -A 5 "kubedog" + +# Test with increased verbosity +helmfile apply -n my-namespace -l app=myapp --log-level debug 2>&1 | grep -i kubedog + +# Monitor API server requests (requires cluster access) +kubectl get --raw /metrics | grep apiserver_request_count +``` diff --git a/pkg/kubedog/options.go b/pkg/kubedog/options.go index 36c16ad2..a5b90752 100644 --- a/pkg/kubedog/options.go +++ b/pkg/kubedog/options.go @@ -18,12 +18,16 @@ type TrackOptions struct { Logs bool LogsSince time.Duration Filter *resource.FilterConfig + QPS float32 + Burst int } func NewTrackOptions() *TrackOptions { return &TrackOptions{ Timeout: 5 * time.Minute, LogsSince: 10 * time.Minute, + QPS: 100, + Burst: 200, } } @@ -41,3 +45,13 @@ func (o *TrackOptions) WithFilterConfig(config *resource.FilterConfig) *TrackOpt o.Filter = config return o } + +func (o *TrackOptions) WithQPS(qps float32) *TrackOptions { + o.QPS = qps + return o +} + +func (o *TrackOptions) WithBurst(burst int) *TrackOptions { + o.Burst = burst + return o +} diff --git a/pkg/kubedog/tracker.go b/pkg/kubedog/tracker.go index 752038b8..666ced83 100644 --- a/pkg/kubedog/tracker.go +++ b/pkg/kubedog/tracker.go @@ -8,11 +8,11 @@ import ( "sync" "time" - "github.com/werf/kubedog/pkg/kube" "github.com/werf/kubedog/pkg/tracker" "github.com/werf/kubedog/pkg/trackers/rollout/multitrack" "go.uber.org/zap" "k8s.io/client-go/kubernetes" + "k8s.io/client-go/tools/clientcmd" "github.com/helmfile/helmfile/pkg/resource" ) @@ -20,6 +20,8 @@ import ( type cacheKey struct { kubeContext string kubeconfig string + qps float32 + burst int } var ( @@ -41,6 +43,8 @@ type TrackerConfig struct { KubeContext string Kubeconfig string TrackOptions *TrackOptions + KubedogQPS *float32 + KubedogBurst *int } func NewTracker(config *TrackerConfig) (*Tracker, error) { @@ -54,16 +58,26 @@ func NewTracker(config *TrackerConfig) (*Tracker, error) { kubeconfig = os.Getenv("KUBECONFIG") } - clientSet, err := getOrCreateClient(config.KubeContext, kubeconfig) - if err != nil { - return nil, fmt.Errorf("failed to initialize kubernetes client: %w", err) - } - options := config.TrackOptions if options == nil { options = NewTrackOptions() } + qps := options.QPS + if config.KubedogQPS != nil { + qps = *config.KubedogQPS + } + + burst := options.Burst + if config.KubedogBurst != nil { + burst = *config.KubedogBurst + } + + clientSet, err := getOrCreateClient(config.KubeContext, kubeconfig, qps, burst) + if err != nil { + return nil, fmt.Errorf("failed to initialize kubernetes client: %w", err) + } + var filter *resource.ResourceFilter if options.Filter != nil { filter = resource.NewResourceFilter(options.Filter, logger) @@ -78,10 +92,12 @@ func NewTracker(config *TrackerConfig) (*Tracker, error) { }, nil } -func getOrCreateClient(kubeContext, kubeconfig string) (kubernetes.Interface, error) { +func getOrCreateClient(kubeContext, kubeconfig string, qps float32, burst int) (kubernetes.Interface, error) { key := cacheKey{ kubeContext: kubeContext, kubeconfig: kubeconfig, + qps: qps, + burst: burst, } kubeInitMu.Lock() @@ -91,18 +107,33 @@ func getOrCreateClient(kubeContext, kubeconfig string) (kubernetes.Interface, er return client, nil } - initOpts := kube.InitOptions{ - KubeConfigOptions: kube.KubeConfigOptions{ - Context: kubeContext, - ConfigPath: kubeconfig, - }, + var explicitPath string + if kubeconfig != "" { + explicitPath = kubeconfig + } + loadingRules := &clientcmd.ClientConfigLoadingRules{ + ExplicitPath: explicitPath, } - if err := kube.Init(initOpts); err != nil { - return nil, err + overrides := &clientcmd.ConfigOverrides{} + if kubeContext != "" { + overrides.CurrentContext = kubeContext + } + + cc := clientcmd.NewNonInteractiveDeferredLoadingClientConfig(loadingRules, overrides) + restConfig, err := cc.ClientConfig() + if err != nil { + return nil, fmt.Errorf("failed to load kubeconfig: %w", err) + } + + restConfig.QPS = qps + restConfig.Burst = burst + + client, err := kubernetes.NewForConfig(restConfig) + if err != nil { + return nil, fmt.Errorf("failed to create kubernetes client: %w", err) } - client := kube.Kubernetes clientCache[key] = client return client, nil diff --git a/pkg/kubedog/tracker_test.go b/pkg/kubedog/tracker_test.go index b09a96c2..6bc3eb2f 100644 --- a/pkg/kubedog/tracker_test.go +++ b/pkg/kubedog/tracker_test.go @@ -86,3 +86,45 @@ func TestTrackOptions_WithFilterConfig(t *testing.T) { assert.Equal(t, []string{"Deployment", "StatefulSet"}, opts.Filter.TrackKinds) assert.Equal(t, []string{"ConfigMap"}, opts.Filter.SkipKinds) } + +func TestTrackOptions_WithQPS(t *testing.T) { + opts := NewTrackOptions() + opts = opts.WithQPS(50.0) + + assert.Equal(t, float32(50.0), opts.QPS) +} + +func TestTrackOptions_WithBurst(t *testing.T) { + opts := NewTrackOptions() + opts = opts.WithBurst(100) + + assert.Equal(t, 100, opts.Burst) +} + +func TestTrackOptions_DefaultQPSBurst(t *testing.T) { + opts := NewTrackOptions() + + assert.Equal(t, float32(100), opts.QPS) + assert.Equal(t, 200, opts.Burst) +} + +func TestTrackerConfig_WithQPSBurst(t *testing.T) { + qps := float32(50.0) + burst := 100 + config := &TrackerConfig{ + Logger: nil, + Namespace: "test-ns", + KubeContext: "test-ctx", + Kubeconfig: "/test/kubeconfig", + TrackOptions: NewTrackOptions(), + KubedogQPS: &qps, + KubedogBurst: &burst, + } + + assert.NotNil(t, config) + assert.Equal(t, "test-ns", config.Namespace) + assert.Equal(t, &qps, config.KubedogQPS) + assert.Equal(t, &burst, config.KubedogBurst) + assert.Equal(t, float32(50.0), *config.KubedogQPS) + assert.Equal(t, 100, *config.KubedogBurst) +} diff --git a/pkg/state/helmx.go b/pkg/state/helmx.go index 6d82327c..62692e00 100644 --- a/pkg/state/helmx.go +++ b/pkg/state/helmx.go @@ -480,6 +480,8 @@ func (st *HelmState) trackWithKubedog(ctx context.Context, release *ReleaseSpec, KubeContext: kubeContext, Kubeconfig: st.kubeconfig, TrackOptions: trackOpts, + KubedogQPS: release.KubedogQPS, + KubedogBurst: release.KubedogBurst, }) if err != nil { return fmt.Errorf("failed to create kubedog tracker: %w", err) diff --git a/pkg/state/state.go b/pkg/state/state.go index 49b5d80b..ffadaf9a 100644 --- a/pkg/state/state.go +++ b/pkg/state/state.go @@ -466,6 +466,10 @@ type ReleaseSpec struct { SkipKinds []string `yaml:"skipKinds,omitempty"` // TrackResources is a whitelist of specific resources to track TrackResources []TrackResourceSpec `yaml:"trackResources,omitempty"` + // KubedogQPS specifies the QPS (queries per second) for kubedog kubernetes client + KubedogQPS *float32 `yaml:"kubedogQPS,omitempty"` + // KubedogBurst specifies the burst for kubedog kubernetes client + KubedogBurst *int `yaml:"kubedogBurst,omitempty"` } // TrackResourceSpec specifies a resource to track