diff --git a/KUBEDOG_CONFIG.md b/KUBEDOG_CONFIG.md new file mode 100644 index 00000000..fd87d117 --- /dev/null +++ b/KUBEDOG_CONFIG.md @@ -0,0 +1,208 @@ +# Kubedog Configuration + +This document describes how to configure kubedog resource tracking in Helmfile. + +## Overview + +Kubedog is a library for tracking Kubernetes resources during deployments. Helmfile uses kubedog when `trackMode: kubedog` is set to monitor the rollout of resources like Deployments, StatefulSets, DaemonSets, and Jobs. + +## Configuration Options + +### Release-level Configuration + +You can configure kubedog settings per release: + +```yaml +releases: + - name: my-app + namespace: default + chart: my-chart + trackMode: kubedog + kubedogQPS: 100 # Queries per second (default: 100) + kubedogBurst: 200 # Burst capacity (default: 200) + trackLogs: true + trackKinds: + - Deployment +``` + +### Global Default Configuration + +You can also set defaults in `helmDefaults`: + +```yaml +helmDefaults: + trackMode: kubedog + # Note: QPS and Burst can only be configured at release level +``` + +## Parameters + +### kubedogQPS + +- **Type**: `float32` +- **Default**: `100` +- **Description**: Sets the maximum number of queries per second to the Kubernetes API server from the kubedog client. This controls the rate of API requests when tracking resources. + +**When to increase**: +- Large clusters with many resources +- When tracking multiple releases simultaneously +- When you see rate limiting errors like "client rate limiter Wait returned an error: context canceled" + +**When to decrease**: +- Small clusters or development environments +- When you want to reduce load on the API server + +### kubedogBurst + +- **Type**: `int` +- **Default**: `200` +- **Description**: Sets the maximum burst of requests that can be made to the Kubernetes API server. This allows temporary spikes above the QPS limit. + +**When to increase**: +- When tracking releases with many resources +- When you see connection timeout errors +- In production environments with high throughput needs + +**When to decrease**: +- In resource-constrained environments +- When API server is under heavy load + +## Tuning Guidelines + +### For Small Clusters (< 50 resources) + +```yaml +releases: + - name: my-app + trackMode: kubedog + kubedogQPS: 50 + kubedogBurst: 100 +``` + +### For Medium Clusters (50-200 resources) + +```yaml +releases: + - name: my-app + trackMode: kubedog + kubedogQPS: 100 # default + kubedogBurst: 200 # default +``` + +### For Large Clusters (> 200 resources) + +```yaml +releases: + - name: my-app + trackMode: kubedog + kubedogQPS: 200 + kubedogBurst: 400 +``` + +### For Multiple Concurrent Releases + +When using `--concurrent` flag with multiple releases that use kubedog tracking: + +```yaml +releases: + - name: app1 + trackMode: kubedog + kubedogQPS: 50 + kubedogBurst: 100 + + - name: app2 + trackMode: kubedog + kubedogQPS: 50 + kubedogBurst: 100 +``` + +## Troubleshooting + +### Rate Limiting Errors + +**Error**: +``` +E0302 19:38:41.812322 91 reflector.go:204] "Failed to watch" err="client rate limiter Wait returned an error: context canceled" +``` + +**Solution**: Increase `kubedogQPS` and `kubedogBurst` values. + +### Connection Timeouts + +**Error**: +``` +context canceled while waiting for API server response +``` + +**Solution**: +1. Check network connectivity to the API server +2. Increase `kubedogBurst` to allow more concurrent requests +3. Decrease number of concurrent releases if using `--concurrent` flag + +### Slow Tracking + +**Symptom**: Resource tracking takes a long time to complete. + +**Solution**: +1. Use `trackKinds` to limit which resource types are tracked +2. Use `skipKinds` to exclude unnecessary resource types +3. Increase `kubedogQPS` to speed up API queries + +## Related Configuration + +### trackTimeout + +Sets the timeout for kubedog tracking (in seconds): + +```yaml +releases: + - name: my-app + trackMode: kubedog + trackTimeout: 600 # 10 minutes +``` + +### trackLogs + +Enable/disable log streaming from tracked resources: + +```yaml +releases: + - name: my-app + trackMode: kubedog + trackLogs: true # Show pod logs during tracking +``` + +### trackKinds / skipKinds + +Control which resource types to track: + +```yaml +releases: + - name: my-app + trackMode: kubedog + trackKinds: + - Deployment + - StatefulSet + skipKinds: + - ConfigMap + - Secret +``` + +## Implementation Details + +The kubedog client configuration uses: +- `k8s.io/client-go` for Kubernetes API communication +- Custom rate limiting via `rest.Config.QPS` and `rest.Config.Burst` +- Separate client cache per unique (kubeContext, kubeconfig, QPS, Burst) combination + +The default values (QPS=100, Burst=200) were chosen to: +- Prevent rate limiting errors in most common scenarios +- Support tracking of multiple resource types simultaneously +- Allow reasonable burst capacity for initial resource discovery +- Balance between tracking speed and API server load + +## See Also + +- [Issue #2445](https://github.com/helmfile/helmfile/issues/2445) - Original issue that led to configurable QPS/Burst +- [Kubedog Documentation](https://github.com/werf/kubedog) +- [Kubernetes Client Go Rate Limiting](https://kubernetes.io/docs/reference/config-api/apiserver-eventratelimit.v1alpha1/) diff --git a/examples/KUBEDOG_CONFIG_EXAMPLES.md b/examples/KUBEDOG_CONFIG_EXAMPLES.md new file mode 100644 index 00000000..57b76b95 --- /dev/null +++ b/examples/KUBEDOG_CONFIG_EXAMPLES.md @@ -0,0 +1,186 @@ +# Example: Kubedog Resource Tracking Configuration + +This example demonstrates various ways to configure kubedog resource tracking. + +## Basic Example + +```yaml +releases: + - name: simple-app + namespace: default + chart: ./charts/simple-app + trackMode: kubedog +``` + +Uses default QPS (100) and Burst (200). + +## Customized Rate Limiting + +```yaml +releases: + - name: high-throughput-app + namespace: production + chart: ./charts/app + trackMode: kubedog + # Increased limits for large-scale deployments + kubedogQPS: 200 + kubedogBurst: 400 + trackTimeout: 600 + trackLogs: true + trackKinds: + - Deployment + - StatefulSet +``` + +## Multiple Releases with Different Settings + +```yaml +releases: + # Small app - conservative limits + - name: frontend + namespace: web + chart: ./charts/frontend + trackMode: kubedog + kubedogQPS: 50 + kubedogBurst: 100 + + # Medium app - default limits + - name: backend + namespace: api + chart: ./charts/backend + trackMode: kubedog + + # Large app - increased limits + - name: data-processor + namespace: data + chart: ./charts/processor + trackMode: kubedog + kubedogQPS: 150 + kubedogBurst: 300 + trackKinds: + - Deployment + - StatefulSet + - Job +``` + +## Environment-Specific Configuration + +```yaml +environments: + development: + values: + - kubedogQPS: 50 + - kubedogBurst: 100 + staging: + values: + - kubedogQPS: 100 + - kubedogBurst: 200 + production: + values: + - kubedogQPS: 200 + - kubedogBurst: 400 + +releases: + - name: myapp + namespace: {{ .Environment.Name }} + chart: ./charts/myapp + trackMode: kubedog + kubedogQPS: {{ .Values.kubedogQPS }} + kubedogBurst: {{ .Values.kubedogBurst }} +``` + +## With Global Defaults + +```yaml +helmDefaults: + createNamespace: true + timeout: 300 + +releases: + - name: app1 + namespace: default + chart: ./charts/app + trackMode: kubedog + # Uses release-specific settings + kubedogQPS: 150 + kubedogBurst: 300 + + - name: app2 + namespace: default + chart: ./charts/app + trackMode: kubedog + # Uses default QPS=100, Burst=200 +``` + +## Selective Tracking + +```yaml +releases: + - name: complex-app + namespace: default + chart: ./charts/complex-app + trackMode: kubedog + kubedogQPS: 120 + kubedogBurst: 250 + # Only track deployments and jobs + trackKinds: + - Deployment + - Job + # Skip these resource types + skipKinds: + - ConfigMap + - Secret + - Ingress + # Track specific resources only + trackResources: + - kind: Deployment + name: main-app + - kind: Job + name: migration-job + namespace: default +``` + +## Testing the Configuration + +To test your kubedog configuration: + +```bash +# Apply with kubedog tracking +helmfile apply -n my-namespace -l app=myapp + +# With debug logging +helmfile apply -n my-namespace -l app=myapp --log-level debug + +# With specific environment +helmfile apply -e production -l app=myapp +``` + +## Expected Output + +When kubedog tracking is working correctly, you should see: + +``` +Tracking 5 resources from release myapp with kubedog +Tracking 5 resources with kubedog (filtered from 5 total) +┌ Status progress +│ DEPLOYMENT REPLICAS AVAILABLE UP-TO-DATE +│ myapp-main 1/1 1 1 +└ Status progress +All resources tracked successfully +UPDATED RELEASES: +NAME NAMESPACE CHART VERSION DURATION +myapp default ./charts/app 1.0.0 1m32s +``` + +## Troubleshooting Commands + +```bash +# Check current kubedog settings +helmfile build -n my-namespace -l app=myapp | grep -A 5 "kubedog" + +# Test with increased verbosity +helmfile apply -n my-namespace -l app=myapp --log-level debug 2>&1 | grep -i kubedog + +# Monitor API server requests (requires cluster access) +kubectl get --raw /metrics | grep apiserver_request_count +``` diff --git a/pkg/kubedog/options.go b/pkg/kubedog/options.go index 36c16ad2..a5b90752 100644 --- a/pkg/kubedog/options.go +++ b/pkg/kubedog/options.go @@ -18,12 +18,16 @@ type TrackOptions struct { Logs bool LogsSince time.Duration Filter *resource.FilterConfig + QPS float32 + Burst int } func NewTrackOptions() *TrackOptions { return &TrackOptions{ Timeout: 5 * time.Minute, LogsSince: 10 * time.Minute, + QPS: 100, + Burst: 200, } } @@ -41,3 +45,13 @@ func (o *TrackOptions) WithFilterConfig(config *resource.FilterConfig) *TrackOpt o.Filter = config return o } + +func (o *TrackOptions) WithQPS(qps float32) *TrackOptions { + o.QPS = qps + return o +} + +func (o *TrackOptions) WithBurst(burst int) *TrackOptions { + o.Burst = burst + return o +} diff --git a/pkg/kubedog/tracker.go b/pkg/kubedog/tracker.go index 752038b8..666ced83 100644 --- a/pkg/kubedog/tracker.go +++ b/pkg/kubedog/tracker.go @@ -8,11 +8,11 @@ import ( "sync" "time" - "github.com/werf/kubedog/pkg/kube" "github.com/werf/kubedog/pkg/tracker" "github.com/werf/kubedog/pkg/trackers/rollout/multitrack" "go.uber.org/zap" "k8s.io/client-go/kubernetes" + "k8s.io/client-go/tools/clientcmd" "github.com/helmfile/helmfile/pkg/resource" ) @@ -20,6 +20,8 @@ import ( type cacheKey struct { kubeContext string kubeconfig string + qps float32 + burst int } var ( @@ -41,6 +43,8 @@ type TrackerConfig struct { KubeContext string Kubeconfig string TrackOptions *TrackOptions + KubedogQPS *float32 + KubedogBurst *int } func NewTracker(config *TrackerConfig) (*Tracker, error) { @@ -54,16 +58,26 @@ func NewTracker(config *TrackerConfig) (*Tracker, error) { kubeconfig = os.Getenv("KUBECONFIG") } - clientSet, err := getOrCreateClient(config.KubeContext, kubeconfig) - if err != nil { - return nil, fmt.Errorf("failed to initialize kubernetes client: %w", err) - } - options := config.TrackOptions if options == nil { options = NewTrackOptions() } + qps := options.QPS + if config.KubedogQPS != nil { + qps = *config.KubedogQPS + } + + burst := options.Burst + if config.KubedogBurst != nil { + burst = *config.KubedogBurst + } + + clientSet, err := getOrCreateClient(config.KubeContext, kubeconfig, qps, burst) + if err != nil { + return nil, fmt.Errorf("failed to initialize kubernetes client: %w", err) + } + var filter *resource.ResourceFilter if options.Filter != nil { filter = resource.NewResourceFilter(options.Filter, logger) @@ -78,10 +92,12 @@ func NewTracker(config *TrackerConfig) (*Tracker, error) { }, nil } -func getOrCreateClient(kubeContext, kubeconfig string) (kubernetes.Interface, error) { +func getOrCreateClient(kubeContext, kubeconfig string, qps float32, burst int) (kubernetes.Interface, error) { key := cacheKey{ kubeContext: kubeContext, kubeconfig: kubeconfig, + qps: qps, + burst: burst, } kubeInitMu.Lock() @@ -91,18 +107,33 @@ func getOrCreateClient(kubeContext, kubeconfig string) (kubernetes.Interface, er return client, nil } - initOpts := kube.InitOptions{ - KubeConfigOptions: kube.KubeConfigOptions{ - Context: kubeContext, - ConfigPath: kubeconfig, - }, + var explicitPath string + if kubeconfig != "" { + explicitPath = kubeconfig + } + loadingRules := &clientcmd.ClientConfigLoadingRules{ + ExplicitPath: explicitPath, } - if err := kube.Init(initOpts); err != nil { - return nil, err + overrides := &clientcmd.ConfigOverrides{} + if kubeContext != "" { + overrides.CurrentContext = kubeContext + } + + cc := clientcmd.NewNonInteractiveDeferredLoadingClientConfig(loadingRules, overrides) + restConfig, err := cc.ClientConfig() + if err != nil { + return nil, fmt.Errorf("failed to load kubeconfig: %w", err) + } + + restConfig.QPS = qps + restConfig.Burst = burst + + client, err := kubernetes.NewForConfig(restConfig) + if err != nil { + return nil, fmt.Errorf("failed to create kubernetes client: %w", err) } - client := kube.Kubernetes clientCache[key] = client return client, nil diff --git a/pkg/kubedog/tracker_test.go b/pkg/kubedog/tracker_test.go index b09a96c2..6bc3eb2f 100644 --- a/pkg/kubedog/tracker_test.go +++ b/pkg/kubedog/tracker_test.go @@ -86,3 +86,45 @@ func TestTrackOptions_WithFilterConfig(t *testing.T) { assert.Equal(t, []string{"Deployment", "StatefulSet"}, opts.Filter.TrackKinds) assert.Equal(t, []string{"ConfigMap"}, opts.Filter.SkipKinds) } + +func TestTrackOptions_WithQPS(t *testing.T) { + opts := NewTrackOptions() + opts = opts.WithQPS(50.0) + + assert.Equal(t, float32(50.0), opts.QPS) +} + +func TestTrackOptions_WithBurst(t *testing.T) { + opts := NewTrackOptions() + opts = opts.WithBurst(100) + + assert.Equal(t, 100, opts.Burst) +} + +func TestTrackOptions_DefaultQPSBurst(t *testing.T) { + opts := NewTrackOptions() + + assert.Equal(t, float32(100), opts.QPS) + assert.Equal(t, 200, opts.Burst) +} + +func TestTrackerConfig_WithQPSBurst(t *testing.T) { + qps := float32(50.0) + burst := 100 + config := &TrackerConfig{ + Logger: nil, + Namespace: "test-ns", + KubeContext: "test-ctx", + Kubeconfig: "/test/kubeconfig", + TrackOptions: NewTrackOptions(), + KubedogQPS: &qps, + KubedogBurst: &burst, + } + + assert.NotNil(t, config) + assert.Equal(t, "test-ns", config.Namespace) + assert.Equal(t, &qps, config.KubedogQPS) + assert.Equal(t, &burst, config.KubedogBurst) + assert.Equal(t, float32(50.0), *config.KubedogQPS) + assert.Equal(t, 100, *config.KubedogBurst) +} diff --git a/pkg/state/helmx.go b/pkg/state/helmx.go index 6d82327c..62692e00 100644 --- a/pkg/state/helmx.go +++ b/pkg/state/helmx.go @@ -480,6 +480,8 @@ func (st *HelmState) trackWithKubedog(ctx context.Context, release *ReleaseSpec, KubeContext: kubeContext, Kubeconfig: st.kubeconfig, TrackOptions: trackOpts, + KubedogQPS: release.KubedogQPS, + KubedogBurst: release.KubedogBurst, }) if err != nil { return fmt.Errorf("failed to create kubedog tracker: %w", err) diff --git a/pkg/state/state.go b/pkg/state/state.go index 49b5d80b..ffadaf9a 100644 --- a/pkg/state/state.go +++ b/pkg/state/state.go @@ -466,6 +466,10 @@ type ReleaseSpec struct { SkipKinds []string `yaml:"skipKinds,omitempty"` // TrackResources is a whitelist of specific resources to track TrackResources []TrackResourceSpec `yaml:"trackResources,omitempty"` + // KubedogQPS specifies the QPS (queries per second) for kubedog kubernetes client + KubedogQPS *float32 `yaml:"kubedogQPS,omitempty"` + // KubedogBurst specifies the burst for kubedog kubernetes client + KubedogBurst *int `yaml:"kubedogBurst,omitempty"` } // TrackResourceSpec specifies a resource to track