Мониторинг GraphQL через Apollo Studio и альтернативы
GraphQL-мониторинг отличается от REST: не достаточно смотреть на эндпоинт /graphql — нужно видеть производительность каждой операции, поле, и resolver отдельно. Apollo Studio, GraphQL Hive и самостоятельные OpenTelemetry-решения дают эту детализацию.
Apollo Studio (GraphOS)
// Установка
npm install @apollo/server @apollo/server/plugin/usageReporting
import { ApolloServer } from '@apollo/server'
import { ApolloServerPluginUsageReporting } from '@apollo/server/plugin/usageReporting'
const server = new ApolloServer({
typeDefs,
resolvers,
plugins: [
ApolloServerPluginUsageReporting({
// APOLLO_KEY из переменных среды
sendVariableValues: {
// Не отправлять чувствительные переменные
exceptNames: ['password', 'token', 'secret']
},
sendHeaders: {
exceptNames: ['Authorization', 'Cookie']
},
// Выборка трассировок (100% дорого, 10% достаточно для анализа)
fieldLevelInstrumentation: 0.1
})
]
})
Apollo Studio предоставляет:
- Разбивку p50/p95/p99 latency по операциям
- Field usage — какие поля реально используют клиенты (помогает удалять deprecated)
- Schema checks — сравнение изменений схемы с реальными клиентскими запросами
- Alerts по деградации производительности
GraphQL Hive (self-hosted альтернатива)
# Docker Compose для GraphQL Hive
docker run -d \
-e HIVE_TOKEN=your-token \
-e TARGET=your-org/your-project/production \
ghcr.io/kamilkisiela/graphql-hive/cli:latest
import { useHive } from '@graphql-hive/client'
import { envelop, useSchema } from '@envelop/core'
const getEnveloped = envelop({
plugins: [
useSchema(schema),
useHive({
enabled: true,
token: process.env.HIVE_TOKEN,
usage: {
sampleRate: 1.0,
exclude: ['IntrospectionQuery']
},
reporting: {
author: 'CI Pipeline',
commit: process.env.GIT_SHA
}
})
]
})
OpenTelemetry трассировка резолверов
Для self-hosted мониторинга с Jaeger/Tempo:
import { ApolloServer } from '@apollo/server'
import { trace, SpanStatusCode } from '@opentelemetry/api'
// Плагин для трассировки каждого резолвера
const tracingPlugin = {
async requestDidStart({ request, contextValue }) {
const tracer = trace.getTracer('graphql')
const operationName = request.operationName || 'anonymous'
const span = tracer.startSpan(`graphql.operation`, {
attributes: {
'graphql.operation.name': operationName,
'graphql.operation.type': 'query',
}
})
return {
async executionDidStart() {
return {
willResolveField({ info }) {
const fieldSpan = tracer.startSpan(
`graphql.resolve.${info.parentType.name}.${info.fieldName}`,
{ parent: span }
)
return (error) => {
if (error) {
fieldSpan.setStatus({ code: SpanStatusCode.ERROR, message: error.message })
}
fieldSpan.end()
}
}
}
},
async willSendResponse({ response }) {
if (response.body.kind === 'single' && response.body.singleResult.errors) {
span.setStatus({ code: SpanStatusCode.ERROR })
}
span.end()
}
}
}
}
Prometheus метрики операций
import { Counter, Histogram, register } from 'prom-client'
const gqlOperationDuration = new Histogram({
name: 'graphql_operation_duration_seconds',
help: 'GraphQL operation execution time',
labelNames: ['operation_name', 'operation_type', 'status'],
buckets: [0.01, 0.05, 0.1, 0.5, 1, 2, 5]
})
const gqlFieldResolveDuration = new Histogram({
name: 'graphql_field_resolve_duration_seconds',
help: 'GraphQL field resolver execution time',
labelNames: ['type_name', 'field_name'],
buckets: [0.001, 0.005, 0.01, 0.05, 0.1, 0.5]
})
const gqlErrors = new Counter({
name: 'graphql_errors_total',
help: 'GraphQL errors count',
labelNames: ['operation_name', 'error_code']
})
const metricsPlugin = {
async requestDidStart({ request }) {
const startTime = Date.now()
const operationName = request.operationName || 'anonymous'
return {
async executionDidStart() {
return {
willResolveField({ info }) {
const fieldStart = Date.now()
return () => {
gqlFieldResolveDuration
.labels(info.parentType.name, info.fieldName)
.observe((Date.now() - fieldStart) / 1000)
}
}
}
},
async willSendResponse({ response }) {
const duration = (Date.now() - startTime) / 1000
const hasErrors = response.body?.singleResult?.errors?.length > 0
gqlOperationDuration
.labels(operationName, 'query', hasErrors ? 'error' : 'success')
.observe(duration)
if (hasErrors) {
for (const err of response.body.singleResult.errors) {
gqlErrors.labels(operationName, err.extensions?.code || 'UNKNOWN').inc()
}
}
}
}
}
}
Grafana дашборд запросов
# grafana/dashboards/graphql.json (фрагмент panels)
panels:
- title: "Top Slow Operations (p95)"
type: table
targets:
- expr: |
topk(10,
histogram_quantile(0.95,
rate(graphql_operation_duration_seconds_bucket[5m])
) by (operation_name)
)
- title: "Error Rate by Operation"
type: timeseries
targets:
- expr: |
rate(graphql_errors_total[5m]) by (operation_name)
- title: "Slowest Resolvers (p99)"
type: table
targets:
- expr: |
topk(20,
histogram_quantile(0.99,
rate(graphql_field_resolve_duration_seconds_bucket[5m])
) by (type_name, field_name)
)
Schema Change Alerts
// Автоматическая проверка обратной совместимости при деплое
// rover schema check (Apollo) или @graphql-inspector/core
import { diff, CriticalityLevel } from '@graphql-inspector/core'
async function checkSchemaCompatibility(oldSchema, newSchema) {
const changes = await diff(oldSchema, newSchema)
const breaking = changes.filter(
c => c.criticality.level === CriticalityLevel.Breaking
)
if (breaking.length > 0) {
console.error('Breaking schema changes detected:')
breaking.forEach(c => console.error(` - ${c.message}`))
process.exit(1)
}
}
Срок выполнения
Настройка GraphQL мониторинга с Apollo Studio или OpenTelemetry + Prometheus/Grafana — 1–2 рабочих дня.







