284 lines
9.0 KiB
Go
284 lines
9.0 KiB
Go
// Copyright 2024 Google LLC
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package storage
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"strings"
|
|
"time"
|
|
|
|
mexporter "github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric"
|
|
"github.com/google/uuid"
|
|
"go.opentelemetry.io/contrib/detectors/gcp"
|
|
"go.opentelemetry.io/otel/attribute"
|
|
"go.opentelemetry.io/otel/sdk/metric"
|
|
"go.opentelemetry.io/otel/sdk/metric/metricdata"
|
|
"go.opentelemetry.io/otel/sdk/resource"
|
|
"google.golang.org/api/option"
|
|
"google.golang.org/grpc"
|
|
"google.golang.org/grpc/experimental/stats"
|
|
"google.golang.org/grpc/stats/opentelemetry"
|
|
)
|
|
|
|
const (
|
|
monitoredResourceName = "storage.googleapis.com/Client"
|
|
metricPrefix = "storage.googleapis.com/client/"
|
|
)
|
|
|
|
// Added to help with tests
|
|
type storageMonitoredResource struct {
|
|
project string
|
|
api string
|
|
location string
|
|
instance string
|
|
cloudPlatform string
|
|
host string
|
|
resource *resource.Resource
|
|
}
|
|
|
|
func (smr *storageMonitoredResource) exporter() (metric.Exporter, error) {
|
|
exporter, err := mexporter.New(
|
|
mexporter.WithProjectID(smr.project),
|
|
mexporter.WithMetricDescriptorTypeFormatter(metricFormatter),
|
|
mexporter.WithCreateServiceTimeSeries(),
|
|
mexporter.WithMonitoredResourceDescription(monitoredResourceName, []string{"project_id", "location", "cloud_platform", "host_id", "instance_id", "api"}),
|
|
)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("storage: creating metrics exporter: %w", err)
|
|
}
|
|
return exporter, nil
|
|
}
|
|
|
|
func newStorageMonitoredResource(ctx context.Context, project, api string, opts ...resource.Option) (*storageMonitoredResource, error) {
|
|
detectedAttrs, err := resource.New(ctx, opts...)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
smr := &storageMonitoredResource{
|
|
instance: uuid.New().String(),
|
|
api: api,
|
|
project: project,
|
|
}
|
|
s := detectedAttrs.Set()
|
|
// Attempt to use resource detector project id if project id wasn't
|
|
// identified using ADC as a last resort. Otherwise metrics cannot be started.
|
|
if p, present := s.Value("cloud.account.id"); present && smr.project == "" {
|
|
smr.project = p.AsString()
|
|
} else if !present && smr.project == "" {
|
|
return nil, errors.New("google cloud project is required to start client-side metrics")
|
|
}
|
|
if v, ok := s.Value("cloud.region"); ok {
|
|
smr.location = v.AsString()
|
|
} else {
|
|
smr.location = "global"
|
|
}
|
|
if v, ok := s.Value("cloud.platform"); ok {
|
|
smr.cloudPlatform = v.AsString()
|
|
} else {
|
|
smr.cloudPlatform = "unknown"
|
|
}
|
|
if v, ok := s.Value("host.id"); ok {
|
|
smr.host = v.AsString()
|
|
} else if v, ok := s.Value("faas.id"); ok {
|
|
smr.host = v.AsString()
|
|
} else {
|
|
smr.host = "unknown"
|
|
}
|
|
smr.resource, err = resource.New(ctx, resource.WithAttributes([]attribute.KeyValue{
|
|
{Key: "gcp.resource_type", Value: attribute.StringValue(monitoredResourceName)},
|
|
{Key: "project_id", Value: attribute.StringValue(smr.project)},
|
|
{Key: "api", Value: attribute.StringValue(smr.api)},
|
|
{Key: "instance_id", Value: attribute.StringValue(smr.instance)},
|
|
{Key: "location", Value: attribute.StringValue(smr.location)},
|
|
{Key: "cloud_platform", Value: attribute.StringValue(smr.cloudPlatform)},
|
|
{Key: "host_id", Value: attribute.StringValue(smr.host)},
|
|
}...))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return smr, nil
|
|
}
|
|
|
|
type metricsContext struct {
|
|
// client options passed to gRPC channels
|
|
clientOpts []option.ClientOption
|
|
// instance of metric reader used by gRPC client-side metrics
|
|
provider *metric.MeterProvider
|
|
// clean func to call when closing gRPC client
|
|
close func()
|
|
}
|
|
|
|
type metricsConfig struct {
|
|
project string
|
|
interval time.Duration
|
|
customExporter *metric.Exporter
|
|
manualReader *metric.ManualReader // used by tests
|
|
disableExporter bool // used by tests disables exports
|
|
resourceOpts []resource.Option // used by tests
|
|
}
|
|
|
|
func newGRPCMetricContext(ctx context.Context, cfg metricsConfig) (*metricsContext, error) {
|
|
var exporter metric.Exporter
|
|
meterOpts := []metric.Option{}
|
|
if cfg.customExporter == nil {
|
|
var ropts []resource.Option
|
|
if cfg.resourceOpts != nil {
|
|
ropts = cfg.resourceOpts
|
|
} else {
|
|
ropts = []resource.Option{resource.WithDetectors(gcp.NewDetector())}
|
|
}
|
|
smr, err := newStorageMonitoredResource(ctx, cfg.project, "grpc", ropts...)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
exporter, err = smr.exporter()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
meterOpts = append(meterOpts, metric.WithResource(smr.resource))
|
|
} else {
|
|
exporter = *cfg.customExporter
|
|
}
|
|
interval := time.Minute
|
|
if cfg.interval > 0 {
|
|
interval = cfg.interval
|
|
}
|
|
meterOpts = append(meterOpts,
|
|
// Metric views update histogram boundaries to be relevant to GCS
|
|
// otherwise default OTel histogram boundaries are used.
|
|
metric.WithView(
|
|
createHistogramView("grpc.client.attempt.duration", latencyHistogramBoundaries()),
|
|
createHistogramView("grpc.client.attempt.rcvd_total_compressed_message_size", sizeHistogramBoundaries()),
|
|
createHistogramView("grpc.client.attempt.sent_total_compressed_message_size", sizeHistogramBoundaries())),
|
|
)
|
|
if cfg.manualReader != nil {
|
|
meterOpts = append(meterOpts, metric.WithReader(cfg.manualReader))
|
|
}
|
|
if !cfg.disableExporter {
|
|
meterOpts = append(meterOpts, metric.WithReader(
|
|
metric.NewPeriodicReader(&exporterLogSuppressor{Exporter: exporter}, metric.WithInterval(interval))))
|
|
}
|
|
provider := metric.NewMeterProvider(meterOpts...)
|
|
mo := opentelemetry.MetricsOptions{
|
|
MeterProvider: provider,
|
|
Metrics: stats.NewMetrics(
|
|
"grpc.client.attempt.started",
|
|
"grpc.client.attempt.duration",
|
|
"grpc.client.attempt.sent_total_compressed_message_size",
|
|
"grpc.client.attempt.rcvd_total_compressed_message_size",
|
|
"grpc.client.call.duration",
|
|
"grpc.lb.wrr.rr_fallback",
|
|
"grpc.lb.wrr.endpoint_weight_not_yet_usable",
|
|
"grpc.lb.wrr.endpoint_weight_stale",
|
|
"grpc.lb.wrr.endpoint_weights",
|
|
"grpc.lb.rls.cache_entries",
|
|
"grpc.lb.rls.cache_size",
|
|
"grpc.lb.rls.default_target_picks",
|
|
"grpc.lb.rls.target_picks",
|
|
"grpc.lb.rls.failed_picks",
|
|
),
|
|
OptionalLabels: []string{"grpc.lb.locality"},
|
|
}
|
|
opts := []option.ClientOption{
|
|
option.WithGRPCDialOption(
|
|
opentelemetry.DialOption(opentelemetry.Options{MetricsOptions: mo})),
|
|
option.WithGRPCDialOption(
|
|
grpc.WithDefaultCallOptions(grpc.StaticMethodCallOption{})),
|
|
}
|
|
return &metricsContext{
|
|
clientOpts: opts,
|
|
provider: provider,
|
|
close: func() {
|
|
provider.Shutdown(ctx)
|
|
},
|
|
}, nil
|
|
}
|
|
|
|
// Silences permission errors after initial error is emitted to prevent
|
|
// chatty logs.
|
|
type exporterLogSuppressor struct {
|
|
metric.Exporter
|
|
emittedFailure bool
|
|
}
|
|
|
|
// Implements OTel SDK metric.Exporter interface to prevent noisy logs from
|
|
// lack of credentials after initial failure.
|
|
// https://pkg.go.dev/go.opentelemetry.io/otel/sdk/metric@v1.28.0#Exporter
|
|
func (e *exporterLogSuppressor) Export(ctx context.Context, rm *metricdata.ResourceMetrics) error {
|
|
if err := e.Exporter.Export(ctx, rm); err != nil && !e.emittedFailure {
|
|
if strings.Contains(err.Error(), "PermissionDenied") {
|
|
e.emittedFailure = true
|
|
return fmt.Errorf("gRPC metrics failed due permission issue: %w", err)
|
|
}
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func latencyHistogramBoundaries() []float64 {
|
|
boundaries := []float64{}
|
|
boundary := 0.0
|
|
increment := 0.002
|
|
// 2ms buckets for first 100ms, so we can have higher resolution for uploads and downloads in the 100 KiB range
|
|
for i := 0; i < 50; i++ {
|
|
boundaries = append(boundaries, boundary)
|
|
// increment by 2ms
|
|
boundary += increment
|
|
}
|
|
// For the remaining buckets do 10 10ms, 10 20ms, and so on, up until 5 minutes
|
|
for i := 0; i < 150 && boundary < 300; i++ {
|
|
boundaries = append(boundaries, boundary)
|
|
if i != 0 && i%10 == 0 {
|
|
increment *= 2
|
|
}
|
|
boundary += increment
|
|
}
|
|
return boundaries
|
|
}
|
|
|
|
func sizeHistogramBoundaries() []float64 {
|
|
kb := 1024.0
|
|
mb := 1024.0 * kb
|
|
gb := 1024.0 * mb
|
|
boundaries := []float64{}
|
|
boundary := 0.0
|
|
increment := 128 * kb
|
|
// 128 KiB increments up to 4MiB, then exponential growth
|
|
for len(boundaries) < 200 && boundary <= 16*gb {
|
|
boundaries = append(boundaries, boundary)
|
|
boundary += increment
|
|
if boundary >= 4*mb {
|
|
increment *= 2
|
|
}
|
|
}
|
|
return boundaries
|
|
}
|
|
|
|
func createHistogramView(name string, boundaries []float64) metric.View {
|
|
return metric.NewView(metric.Instrument{
|
|
Name: name,
|
|
Kind: metric.InstrumentKindHistogram,
|
|
}, metric.Stream{
|
|
Name: name,
|
|
Aggregation: metric.AggregationExplicitBucketHistogram{Boundaries: boundaries},
|
|
})
|
|
}
|
|
|
|
func metricFormatter(m metricdata.Metrics) string {
|
|
return metricPrefix + strings.ReplaceAll(string(m.Name), ".", "/")
|
|
}
|