feat(telemetry): migrate from Sentry to OpenTelemetry

- Initialize OTEL only when OTEL_EXPORTER_OTLP_ENDPOINT is set
- Support both OTLP/gRPC (host:port) and OTLP/HTTP (http[s]://...)
- Map CIRRUS_SENTRY_TAGS to OTEL resource attributes
- Replace Sentry transactions/events with OTEL spans + attributes
- Add Telemetry wrapper and update code to record errors + flush
- Remove sentry-cocoa dependency; add opentelemetry-swift + NIO

This preserves existing tags and measurements under OTEL and keeps telemetry disabled by default.
This commit is contained in:
Fedor Korotkov 2025-09-15 07:07:47 -04:00
parent 02bf5651e7
commit c60d08bf43
9 changed files with 278 additions and 81 deletions

View File

@ -1,5 +1,5 @@
{
"originHash" : "668bad809d4882f75f097e66a12a6dbc8e61ec998f1800a7e09439c854fadda1",
"originHash" : "aa0a5df26b9e35d1908d6876d045af7ce1899086d641507e5faa9d1f9bd29787",
"pins" : [
{
"identity" : "antlr4",
@ -46,6 +46,24 @@
"version" : "1.24.2"
}
},
{
"identity" : "opentelemetry-swift",
"kind" : "remoteSourceControl",
"location" : "https://github.com/open-telemetry/opentelemetry-swift",
"state" : {
"revision" : "6a2c29d53ff0b543b551b2221538bd3d0206c6d6",
"version" : "1.15.0"
}
},
{
"identity" : "opentracing-objc",
"kind" : "remoteSourceControl",
"location" : "https://github.com/undefinedlabs/opentracing-objc",
"state" : {
"revision" : "18c1a35ca966236cee0c5a714a51a73ff33384c1",
"version" : "0.5.2"
}
},
{
"identity" : "semaphore",
"kind" : "remoteSourceControl",
@ -55,15 +73,6 @@
"version" : "0.1.0"
}
},
{
"identity" : "sentry-cocoa",
"kind" : "remoteSourceControl",
"location" : "https://github.com/getsentry/sentry-cocoa",
"state" : {
"revision" : "65b3d2a7608685e8d4a37c68fa2c64f28d0b537e",
"version" : "8.51.1"
}
},
{
"identity" : "swift-algorithms",
"kind" : "remoteSourceControl",
@ -127,6 +136,15 @@
"version" : "1.6.1"
}
},
{
"identity" : "swift-metrics",
"kind" : "remoteSourceControl",
"location" : "https://github.com/apple/swift-metrics.git",
"state" : {
"revision" : "4c83e1cdf4ba538ef6e43a9bbd0bcc33a0ca46e3",
"version" : "2.7.0"
}
},
{
"identity" : "swift-nio",
"kind" : "remoteSourceControl",
@ -261,6 +279,15 @@
"branch" : "master",
"revision" : "e03289289155b4e7aa565e32862f9cb42140596a"
}
},
{
"identity" : "thrift-swift",
"kind" : "remoteSourceControl",
"location" : "https://github.com/undefinedlabs/Thrift-Swift",
"state" : {
"revision" : "18ff09e6b30e589ed38f90a1af23e193b8ecef8e",
"version" : "1.1.2"
}
}
],
"version" : 3

View File

@ -17,7 +17,7 @@ let package = Package(
.package(url: "https://github.com/antlr/antlr4", exact: "4.13.2"),
.package(url: "https://github.com/apple/swift-atomics.git", .upToNextMajor(from: "1.2.0")),
.package(url: "https://github.com/nicklockwood/SwiftFormat", from: "0.53.6"),
.package(url: "https://github.com/getsentry/sentry-cocoa", from: "8.51.1"),
.package(url: "https://github.com/open-telemetry/opentelemetry-swift", from: "1.7.0"),
.package(url: "https://github.com/cfilipov/TextTable", branch: "master"),
.package(url: "https://github.com/sersoft-gmbh/swift-sysctl.git", from: "1.8.0"),
.package(url: "https://github.com/orchetect/SwiftRadix", from: "1.3.1"),
@ -25,6 +25,7 @@ let package = Package(
.package(url: "https://github.com/fumoboy007/swift-retry", from: "0.2.3"),
.package(url: "https://github.com/jozefizso/swift-xattr", from: "3.0.0"),
.package(url: "https://github.com/grpc/grpc-swift.git", .upToNextMajor(from: "1.24.2")),
.package(url: "https://github.com/apple/swift-nio.git", from: "2.83.0"),
.package(url: "https://buf.build/gen/swift/git/1.24.2-00000000000000-17d7dedafb88.1/cirruslabs_tart-guest-agent_grpc_swift.git", revision: "1.24.2-00000000000000-17d7dedafb88.1"),
],
targets: [
@ -35,7 +36,11 @@ let package = Package(
.product(name: "SwiftDate", package: "SwiftDate"),
.product(name: "Antlr4Static", package: "Antlr4"),
.product(name: "Atomics", package: "swift-atomics"),
.product(name: "Sentry", package: "sentry-cocoa"),
.product(name: "OpenTelemetryApi", package: "opentelemetry-swift"),
.product(name: "OpenTelemetrySdk", package: "opentelemetry-swift"),
.product(name: "OpenTelemetryProtocolExporter", package: "opentelemetry-swift"),
.product(name: "OpenTelemetryProtocolExporterHTTP", package: "opentelemetry-swift"),
.product(name: "NIO", package: "swift-nio"),
.product(name: "TextTable", package: "TextTable"),
.product(name: "Sysctl", package: "swift-sysctl"),
.product(name: "SwiftRadix", package: "SwiftRadix"),

View File

@ -2,7 +2,6 @@ import ArgumentParser
import Foundation
import Network
import SystemConfiguration
import Sentry
enum IPResolutionStrategy: String, ExpressibleByArgument, CaseIterable {
case dhcp, arp, agent

View File

@ -1,6 +1,6 @@
import ArgumentParser
import Dispatch
import Sentry
import OpenTelemetryApi
import SwiftUI
import SwiftDate
@ -109,9 +109,10 @@ struct Prune: AsyncParsableCommand {
return
}
SentrySDK.configureScope { scope in
scope.setContext(value: ["requiredBytes": requiredBytes], key: "Prune")
}
// Record desired reclaim size as an event context
Telemetry.addEvent("Prune.required", attributes: [
"requiredBytes": .int(Int(requiredBytes))
])
// Figure out how much disk space is available
let attrs = try Config().tartCacheDir.resourceValues(forKeys: [
@ -123,18 +124,17 @@ struct Prune: AsyncParsableCommand {
UInt64(attrs.volumeAvailableCapacityForImportantUsage!)
)
SentrySDK.configureScope { scope in
scope.setContext(value: [
"volumeAvailableCapacity": attrs.volumeAvailableCapacity!,
"volumeAvailableCapacityForImportantUsage": attrs.volumeAvailableCapacityForImportantUsage!,
"volumeAvailableCapacityCalculated": volumeAvailableCapacityCalculated
], key: "Prune")
}
Telemetry.addEvent("Prune.capacity", attributes: [
"volumeAvailableCapacity": .int(Int(attrs.volumeAvailableCapacity!)),
"volumeAvailableCapacityForImportantUsage": .int(Int(attrs.volumeAvailableCapacityForImportantUsage!)),
"volumeAvailableCapacityCalculated": .int(Int(volumeAvailableCapacityCalculated))
])
if volumeAvailableCapacityCalculated <= 0 {
SentrySDK.capture(message: "Zero volume capacity reported") { scope in
scope.setLevel(.warning)
}
Telemetry.addEvent("Prune.warning", attributes: [
"message": .string("Zero volume capacity reported"),
"level": .string("warning")
])
return
}
@ -149,7 +149,7 @@ struct Prune: AsyncParsableCommand {
}
private static func reclaimIfPossible(_ reclaimBytes: UInt64, _ initiator: Prunable? = nil) throws {
let transaction = SentrySDK.startTransaction(name: "Pruning cache", operation: "prune", bindToScope: true)
let transaction = Telemetry.startTransaction(name: "Pruning cache", operation: "prune", bindToScope: true)
defer { transaction.finish() }
let prunableStorages: [PrunableStorage] = [VMStorageOCI(), try IPSWCache()]
@ -177,13 +177,16 @@ struct Prune: AsyncParsableCommand {
continue
}
try SentrySDK.span?.setData(value: prunable.allocatedSizeBytes(), key: prunable.url.path)
Telemetry.addEvent("Prune.prunable", attributes: [
"path": .string(prunable.url.path),
"size_bytes": .int(try prunable.allocatedSizeBytes())
])
cacheReclaimedBytes += try prunable.allocatedSizeBytes()
try prunable.delete()
}
SentrySDK.span?.setMeasurement(name: "gc_disk_reclaimed", value: cacheReclaimedBytes as NSNumber, unit: MeasurementUnitInformation.byte);
Telemetry.setAttribute("gc_disk_reclaimed", .int(cacheReclaimedBytes))
}
}

View File

@ -4,7 +4,7 @@ import Darwin
import Dispatch
import SwiftUI
import Virtualization
import Sentry
import OpenTelemetryApi
import System
var vm: VM?
@ -498,9 +498,9 @@ struct Run: AsyncParsableCommand {
Foundation.exit(0)
} catch {
// Capture the error into Sentry
SentrySDK.capture(error: error)
SentrySDK.flush(timeout: 2.seconds.timeInterval)
// Record the error into OpenTelemetry
Telemetry.recordError(error)
Telemetry.flush()
fputs("\(error)\n", stderr)

View File

@ -1,7 +1,7 @@
import ArgumentParser
import Darwin
import Foundation
import Sentry
import OpenTelemetryApi
@main
struct Root: AsyncParsableCommand {
@ -54,38 +54,9 @@ struct Root: AsyncParsableCommand {
// Parse command
var command = try parseAsRoot()
// Initialize Sentry
if let dsn = ProcessInfo.processInfo.environment["SENTRY_DSN"] {
SentrySDK.start { options in
options.dsn = dsn
options.releaseName = CI.release
options.tracesSampleRate = Float(
ProcessInfo.processInfo.environment["SENTRY_TRACES_SAMPLE_RATE"] ?? "1.0"
) as NSNumber?
// By default only 5XX are captured
// Let's capture everything but 401 (unauthorized)
options.enableCaptureFailedRequests = true
options.failedRequestStatusCodes = [
HttpStatusCodeRange(min: 400, max: 400),
HttpStatusCodeRange(min: 402, max: 599)
]
}
}
defer { SentrySDK.flush(timeout: 2.seconds.timeInterval) }
SentrySDK.configureScope { scope in
scope.setExtra(value: ProcessInfo.processInfo.arguments, key: "Command-line arguments")
}
// Enrich future events with Cirrus CI-specific tags
if let tags = ProcessInfo.processInfo.environment["CIRRUS_SENTRY_TAGS"] {
SentrySDK.configureScope { scope in
for (key, value) in tags.split(separator: ",").compactMap({ parseCirrusSentryTag($0) }) {
scope.setTag(value: value, key: key)
}
}
}
// Initialize OpenTelemetry if configured
Telemetry.bootstrapFromEnv()
defer { Telemetry.flush() }
// Run garbage-collection before each command (shouldn't take too long)
if type(of: command) != type(of: Pull()) && type(of: command) != type(of: Clone()){
@ -108,9 +79,9 @@ struct Root: AsyncParsableCommand {
Foundation.exit(execCustomExitCodeError.exitCode)
}
// Capture the error into Sentry
SentrySDK.capture(error: error)
SentrySDK.flush(timeout: 2.seconds.timeInterval)
// Record the error into OpenTelemetry
Telemetry.recordError(error)
Telemetry.flush()
// Handle a non-ArgumentParser's exception that requires a specific exit code to be set
if let errorWithExitCode = error as? HasExitCode {

View File

@ -0,0 +1,191 @@
import Foundation
#if canImport(OpenTelemetryApi)
import OpenTelemetryApi
import OpenTelemetrySdk
import OpenTelemetryProtocolExporterCommon
import OpenTelemetryProtocolExporterGrpc
import OpenTelemetryProtocolExporterHttp
import GRPC
import NIO
#endif
enum TelemetrySpanStatus {
case cancelled
}
final class TelemetrySpan {
#if canImport(OpenTelemetryApi)
private let span: Span?
#else
private let span: Any? = nil
#endif
init(_ span: Any?) {
#if canImport(OpenTelemetryApi)
self.span = span as? Span
#endif
}
func finish(status: TelemetrySpanStatus? = nil) {
#if canImport(OpenTelemetryApi)
if let span = span {
if let status = status {
switch status {
case .cancelled:
span.status = .error(description: "cancelled")
}
}
span.end()
if Telemetry.currentSpan === span {
Telemetry.currentSpan = nil
}
}
#endif
}
}
enum Telemetry {
#if canImport(OpenTelemetryApi)
static var tracer: Tracer = OpenTelemetry.instance.tracerProvider.get(instrumentationName: "tart", instrumentationVersion: CI.version)
static var currentSpan: Span?
private static var eventLoopGroup: EventLoopGroup?
private static var providerSdk: TracerProviderSdk?
#else
static var currentSpan: Any?
#endif
// Configure OpenTelemetry when OTEL_EXPORTER_OTLP_ENDPOINT is set.
static func bootstrapFromEnv() {
guard let endpoint = ProcessInfo.processInfo.environment["OTEL_EXPORTER_OTLP_ENDPOINT"], !endpoint.isEmpty else {
return
}
#if canImport(OpenTelemetryApi)
let resource = buildResource()
// Build exporter configuration
let headerList = parseHeaders(ProcessInfo.processInfo.environment["OTEL_EXPORTER_OTLP_HEADERS"]) // [(k,v)]
// Build exporter based on endpoint scheme
var exporter: SpanExporter
if endpoint.lowercased().hasPrefix("http://") || endpoint.lowercased().hasPrefix("https://") {
let url = URL(string: endpoint)!
let config = OtlpConfiguration(timeout: 10, headers: headerList, exportAsJson: false)
exporter = OtlpHttpTraceExporter(endpoint: url, config: config, envVarHeaders: nil)
} else {
// gRPC: parse host[:port]
let parts = endpoint.split(separator: ":", maxSplits: 1, omittingEmptySubsequences: true)
let host = String(parts.first!)
let port = parts.count > 1 ? Int(parts[1]) ?? 4317 : 4317
let group = MultiThreadedEventLoopGroup(numberOfThreads: 1)
eventLoopGroup = group
let channel = ClientConnection.insecure(group: group).connect(host: host, port: port)
let config = OtlpConfiguration(timeout: 10, headers: headerList, exportAsJson: false)
exporter = OtlpTraceExporter(channel: channel, config: config, envVarHeaders: nil)
}
let spanProcessor = BatchSpanProcessor(spanExporter: exporter)
let provider = TracerProviderBuilder()
.add(spanProcessor: spanProcessor)
.with(resource: resource)
.build()
providerSdk = provider
OpenTelemetry.registerTracerProvider(tracerProvider: provider)
tracer = OpenTelemetry.instance.tracerProvider.get(instrumentationName: "tart", instrumentationVersion: CI.version)
#endif
}
// Flush spans quickly on shutdown
static func flush() {
#if canImport(OpenTelemetryApi)
providerSdk?.forceFlush(timeout: 5)
if let group = eventLoopGroup {
try? group.syncShutdownGracefully()
eventLoopGroup = nil
}
#endif
}
static func startTransaction(name: String, operation: String? = nil, bindToScope: Bool = false) -> TelemetrySpan {
#if canImport(OpenTelemetryApi)
let builder = tracer.spanBuilder(spanName: name)
if let op = operation {
builder.setSpanKind(spanKind: .internal)
builder.setAttribute(key: "operation", value: op)
}
let span = builder.startSpan()
if bindToScope {
currentSpan = span
}
return TelemetrySpan(span)
#else
return TelemetrySpan(nil)
#endif
}
static func recordError(_ error: Error) {
#if canImport(OpenTelemetryApi)
let span = currentSpan ?? tracer.spanBuilder(spanName: "error").startSpan()
span.recordException(error)
span.status = .error(description: String(describing: error))
if currentSpan == nil {
span.end()
}
#endif
}
static func addEvent(_ name: String, attributes: [String: AttributeValue] = [:]) {
#if canImport(OpenTelemetryApi)
currentSpan?.addEvent(name: name, attributes: attributes)
#endif
}
static func setAttribute(_ key: String, _ value: AttributeValue) {
#if canImport(OpenTelemetryApi)
currentSpan?.setAttribute(key: key, value: value)
#endif
}
// Build a Resource with service + environment tags
private static func buildResource() -> Resource {
#if canImport(OpenTelemetryApi)
var attributes: [String: AttributeValue] = [
"service.name": .string("tart"),
"service.version": .string(CI.version),
"process.command_args": AttributeValue(ProcessInfo.processInfo.arguments)
]
// Migrate Sentry tags to resource attributes if present
if let tags = ProcessInfo.processInfo.environment["CIRRUS_SENTRY_TAGS"] {
for (k, v) in parseTags(tags) {
attributes[k] = .string(v)
}
}
return Resource(attributes: attributes)
#else
return Resource()
#endif
}
private static func parseTags(_ raw: String) -> [(String, String)] {
raw.split(separator: ",").compactMap { pair in
let parts = pair.split(separator: "=", maxSplits: 1)
guard parts.count == 2 else { return nil }
return (String(parts[0]), String(parts[1]))
}
}
private static func parseHeaders(_ raw: String?) -> [(String, String)] {
guard let raw = raw else { return [] }
var result: [(String, String)] = []
for part in raw.split(separator: ",") {
let kv = part.split(separator: "=", maxSplits: 1)
guard kv.count == 2 else { continue }
result.append((String(kv[0]), String(kv[1])))
}
return result
}
}

View File

@ -1,6 +1,6 @@
import Compression
import Foundation
import Sentry
import OpenTelemetryApi
enum OCIError: Error {
case ShouldBeExactlyOneLayer
@ -43,7 +43,7 @@ extension VMDirectory {
}
let diskCompressedSize = layers.map { Int64($0.size) }.reduce(0, +)
SentrySDK.span?.setMeasurement(name: "compressed_disk_size", value: diskCompressedSize as NSNumber, unit: MeasurementUnitInformation.byte)
Telemetry.setAttribute("compressed_disk_size", .int(Int(diskCompressedSize)))
let prettyDiskSize = String(format: "%.1f", Double(diskCompressedSize) / 1_000_000_000.0)
defaultLogger.appendNewLine("pulling disk (\(prettyDiskSize) GB compressed)...")

View File

@ -1,5 +1,5 @@
import Foundation
import Sentry
import OpenTelemetryApi
import Retry
class VMStorageOCI: PrunableStorage {
@ -141,9 +141,10 @@ class VMStorageOCI: PrunableStorage {
}
func pull(_ name: RemoteName, registry: Registry, concurrency: UInt, deduplicate: Bool) async throws {
SentrySDK.configureScope { scope in
scope.setContext(value: ["imageName": name.description], key: "OCI")
}
// Record image name for diagnostics
Telemetry.addEvent("OCI.pull.start", attributes: [
"imageName": .string(name.description)
])
defaultLogger.appendNewLine("pulling manifest...")
@ -177,7 +178,7 @@ class VMStorageOCI: PrunableStorage {
}
if !exists(digestName) {
let transaction = SentrySDK.startTransaction(name: name.description, operation: "pull", bindToScope: true)
let transaction = Telemetry.startTransaction(name: name.description, operation: "pull", bindToScope: true)
let tmpVMDir = try VMDirectory.temporaryDeterministic(key: name.description)
// Open an existing VM directory corresponding to this name, if any,
@ -190,9 +191,9 @@ class VMStorageOCI: PrunableStorage {
// Try to reclaim some cache space if we know the VM size in advance
if let uncompressedDiskSize = manifest.uncompressedDiskSize() {
SentrySDK.configureScope { scope in
scope.setContext(value: ["imageUncompressedDiskSize": uncompressedDiskSize], key: "OCI")
}
Telemetry.addEvent("OCI.pull.uncompressed_size", attributes: [
"bytes": .int(Int(uncompressedDiskSize))
])
let otherVMFilesSize: UInt64 = 128 * 1024 * 1024
@ -227,7 +228,7 @@ class VMStorageOCI: PrunableStorage {
try move(digestName, from: tmpVMDir)
transaction.finish()
}, onCancel: {
transaction.finish(status: SentrySpanStatus.cancelled)
transaction.finish(status: .cancelled)
try? FileManager.default.removeItem(at: tmpVMDir.baseURL)
})
} else {