fix: Add opentelemtry-based tracing

This adds tracing to fastapi, redis, and sqlalchemy. It uses the
recommended OLTP exporter to send the tracing data.
This commit is contained in:
Sven-Hendrik Haase 2024-08-03 13:20:04 +02:00
parent a54b6935a1
commit a2cbf01eef
No known key found for this signature in database
GPG key ID: 39E4B877E62EB915
6 changed files with 960 additions and 616 deletions

View file

@ -14,6 +14,12 @@ from fastapi import FastAPI, HTTPException, Request, Response
from fastapi.responses import RedirectResponse from fastapi.responses import RedirectResponse
from fastapi.staticfiles import StaticFiles from fastapi.staticfiles import StaticFiles
from jinja2 import TemplateNotFound from jinja2 import TemplateNotFound
from opentelemetry import trace
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
from opentelemetry.sdk.resources import Resource
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor
from sqlalchemy import and_ from sqlalchemy import and_
from starlette.exceptions import HTTPException as StarletteHTTPException from starlette.exceptions import HTTPException as StarletteHTTPException
from starlette.middleware.authentication import AuthenticationMiddleware from starlette.middleware.authentication import AuthenticationMiddleware
@ -54,6 +60,17 @@ instrumentator().add(prometheus.http_requests_total())
instrumentator().instrument(app) instrumentator().instrument(app)
# Instrument FastAPI for tracing
FastAPIInstrumentor.instrument_app(app)
resource = Resource(attributes={"service.name": "aurweb"})
otlp_endpoint = aurweb.config.get("tracing", "otlp_endpont")
otlp_exporter = OTLPSpanExporter(endpoint=otlp_endpoint, insecure=True)
span_processor = BatchSpanProcessor(otlp_exporter)
trace.set_tracer_provider(TracerProvider(resource=resource))
trace.get_tracer_provider().add_span_processor(span_processor)
async def app_startup(): async def app_startup():
# https://stackoverflow.com/questions/67054759/about-the-maximum-recursion-error-in-fastapi # https://stackoverflow.com/questions/67054759/about-the-maximum-recursion-error-in-fastapi
# Test failures have been observed by internal starlette code when # Test failures have been observed by internal starlette code when

View file

@ -1,4 +1,5 @@
import fakeredis import fakeredis
from opentelemetry.instrumentation.redis import RedisInstrumentor
from redis import ConnectionPool, Redis from redis import ConnectionPool, Redis
import aurweb.config import aurweb.config
@ -7,6 +8,8 @@ from aurweb import aur_logging
logger = aur_logging.get_logger(__name__) logger = aur_logging.get_logger(__name__)
pool = None pool = None
RedisInstrumentor().instrument()
class FakeConnectionPool: class FakeConnectionPool:
"""A fake ConnectionPool class which holds an internal reference """A fake ConnectionPool class which holds an internal reference

View file

@ -298,9 +298,12 @@ def get_engine(dbname: str = None, echo: bool = False):
connect_args["check_same_thread"] = False connect_args["check_same_thread"] = False
kwargs = {"echo": echo, "connect_args": connect_args} kwargs = {"echo": echo, "connect_args": connect_args}
from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor
from sqlalchemy import create_engine from sqlalchemy import create_engine
_engines[dbname] = create_engine(get_sqlalchemy_url(), **kwargs) engine = create_engine(get_sqlalchemy_url(), **kwargs)
SQLAlchemyInstrumentor().instrument(engine=engine)
_engines[dbname] = engine
if is_sqlite: # pragma: no cover if is_sqlite: # pragma: no cover
setup_sqlite(_engines.get(dbname)) setup_sqlite(_engines.get(dbname))

View file

@ -177,3 +177,6 @@ expiry_time_search = 600
expiry_time_statistics = 300 expiry_time_statistics = 300
; number of seconds after a cache entry for rss queries expires, default is 5 minutes ; number of seconds after a cache entry for rss queries expires, default is 5 minutes
expiry_time_rss = 300 expiry_time_rss = 300
[tracing]
otlp_nedpoint = "http://localhost:4318"

1537
poetry.lock generated

File diff suppressed because it is too large Load diff

View file

@ -69,7 +69,6 @@ httpx = "^0.27.0"
itsdangerous = "^2.1.2" itsdangerous = "^2.1.2"
lxml = "^5.2.1" lxml = "^5.2.1"
orjson = "^3.10.0" orjson = "^3.10.0"
protobuf = "^5.26.1"
pygit2 = "^1.14.1" pygit2 = "^1.14.1"
python-multipart = "^0.0.9" python-multipart = "^0.0.9"
redis = "^5.0.3" redis = "^5.0.3"
@ -89,7 +88,6 @@ SQLAlchemy = "^1.4.52"
uvicorn = "^0.29.0" uvicorn = "^0.29.0"
gunicorn = "^21.2.0" gunicorn = "^21.2.0"
Hypercorn = "^0.16.0" Hypercorn = "^0.16.0"
prometheus-fastapi-instrumentator = "^7.0.0"
pytest-xdist = "^3.5.0" pytest-xdist = "^3.5.0"
filelock = "^3.13.3" filelock = "^3.13.3"
posix-ipc = "^1.1.1" posix-ipc = "^1.1.1"
@ -98,6 +96,15 @@ fastapi = "^0.110.0"
srcinfo = "^0.1.2" srcinfo = "^0.1.2"
tomlkit = "^0.12.0" tomlkit = "^0.12.0"
# Tracing
prometheus-fastapi-instrumentator = "^7.0.0"
opentelemetry-api = "^1.26.0"
opentelemetry-sdk = "^1.26.0"
opentelemetry-exporter-otlp-proto-http = "^1.26.0"
opentelemetry-instrumentation-fastapi = "^0.47b0"
opentelemetry-instrumentation-redis = "^0.47b0"
opentelemetry-instrumentation-sqlalchemy = "^0.47b0"
[tool.poetry.dev-dependencies] [tool.poetry.dev-dependencies]
coverage = "^7.4.4" coverage = "^7.4.4"
pytest = "^8.1.1" pytest = "^8.1.1"