Add opentelemtry-based tracing

This adds tracing to fastapi, redis, and sqlalchemy. It uses the
recommended OLTP exporter to send the tracing data.
This commit is contained in:
Sven-Hendrik Haase 2024-08-03 13:20:04 +02:00
parent a54b6935a1
commit 1049102991
No known key found for this signature in database
GPG key ID: 39E4B877E62EB915
6 changed files with 960 additions and 616 deletions

View file

@ -14,6 +14,12 @@ from fastapi import FastAPI, HTTPException, Request, Response
from fastapi.responses import RedirectResponse
from fastapi.staticfiles import StaticFiles
from jinja2 import TemplateNotFound
from opentelemetry import trace
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
from opentelemetry.sdk.resources import Resource
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor
from sqlalchemy import and_
from starlette.exceptions import HTTPException as StarletteHTTPException
from starlette.middleware.authentication import AuthenticationMiddleware
@ -54,6 +60,17 @@ instrumentator().add(prometheus.http_requests_total())
instrumentator().instrument(app)
# Instrument FastAPI for tracing
FastAPIInstrumentor.instrument_app(app)
resource = Resource(attributes={"service.name": "aurweb"})
otlp_endpoint = aurweb.config.get("tracing", "otlp_endpont")
otlp_exporter = OTLPSpanExporter(endpoint=otlp_endpoint, insecure=True)
span_processor = BatchSpanProcessor(otlp_exporter)
trace.set_tracer_provider(TracerProvider(resource=resource))
trace.get_tracer_provider().add_span_processor(span_processor)
async def app_startup():
# https://stackoverflow.com/questions/67054759/about-the-maximum-recursion-error-in-fastapi
# Test failures have been observed by internal starlette code when

View file

@ -1,4 +1,5 @@
import fakeredis
from opentelemetry.instrumentation.redis import RedisInstrumentor
from redis import ConnectionPool, Redis
import aurweb.config
@ -7,6 +8,8 @@ from aurweb import aur_logging
logger = aur_logging.get_logger(__name__)
pool = None
RedisInstrumentor().instrument()
class FakeConnectionPool:
"""A fake ConnectionPool class which holds an internal reference

View file

@ -298,9 +298,12 @@ def get_engine(dbname: str = None, echo: bool = False):
connect_args["check_same_thread"] = False
kwargs = {"echo": echo, "connect_args": connect_args}
from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor
from sqlalchemy import create_engine
_engines[dbname] = create_engine(get_sqlalchemy_url(), **kwargs)
engine = create_engine(get_sqlalchemy_url(), **kwargs)
SQLAlchemyInstrumentor().instrument(engine=engine)
_engines[dbname] = engine
if is_sqlite: # pragma: no cover
setup_sqlite(_engines.get(dbname))

View file

@ -177,3 +177,6 @@ expiry_time_search = 600
expiry_time_statistics = 300
; number of seconds after a cache entry for rss queries expires, default is 5 minutes
expiry_time_rss = 300
[tracing]
otlp_nedpoint = "http://localhost:4318"

1537
poetry.lock generated

File diff suppressed because it is too large Load diff

View file

@ -69,7 +69,6 @@ httpx = "^0.27.0"
itsdangerous = "^2.1.2"
lxml = "^5.2.1"
orjson = "^3.10.0"
protobuf = "^5.26.1"
pygit2 = "^1.14.1"
python-multipart = "^0.0.9"
redis = "^5.0.3"
@ -89,7 +88,6 @@ SQLAlchemy = "^1.4.52"
uvicorn = "^0.29.0"
gunicorn = "^21.2.0"
Hypercorn = "^0.16.0"
prometheus-fastapi-instrumentator = "^7.0.0"
pytest-xdist = "^3.5.0"
filelock = "^3.13.3"
posix-ipc = "^1.1.1"
@ -98,6 +96,15 @@ fastapi = "^0.110.0"
srcinfo = "^0.1.2"
tomlkit = "^0.12.0"
# Tracing
prometheus-fastapi-instrumentator = "^7.0.0"
opentelemetry-api = "^1.26.0"
opentelemetry-sdk = "^1.26.0"
opentelemetry-exporter-otlp-proto-http = "^1.26.0"
opentelemetry-instrumentation-fastapi = "^0.47b0"
opentelemetry-instrumentation-redis = "^0.47b0"
opentelemetry-instrumentation-sqlalchemy = "^0.47b0"
[tool.poetry.dev-dependencies]
coverage = "^7.4.4"
pytest = "^8.1.1"