fix(rpc): improve type=search performance

This patch brings in the use of .with_entities on our
RPC search query. This primarily fixes performance issues
we were seeing with large queries.

That being said, we do see a bit of a slowdown on
large record count rpc queries, but it's quite negligible
at this point.

We still do aim to perform better than the older PHP
implementation, so this is not a finishing patch by
any means.

Signed-off-by: Kevin Morris <kevr@0cost.org>
This commit is contained in:
Kevin Morris 2022-01-13 23:27:10 -08:00
parent d31a51742b
commit b4495a49bf
No known key found for this signature in database
GPG key ID: F7E46DED420788F3
2 changed files with 45 additions and 28 deletions

View file

@ -1,6 +1,6 @@
from sqlalchemy import and_, case, or_, orm
from aurweb import db, models, util
from aurweb import db, models
from aurweb.models import Package, PackageBase, User
from aurweb.models.dependency_type import CHECKDEPENDS_ID, DEPENDS_ID, MAKEDEPENDS_ID, OPTDEPENDS_ID
from aurweb.models.package_comaintainer import PackageComaintainer
@ -257,8 +257,10 @@ class RPCSearch(PackageSearch):
# Fix-up inherited search_by_cb to reflect RPC-specific by params.
# We keep: "nd", "n" and "m". We also overlay four new by params
# on top: "depends", "makedepends", "optdepends" and "checkdepends".
util.apply_all(RPCSearch.keys_removed,
lambda k: self.search_by_cb.pop(k))
self.search_by_cb = {
k: v for k, v in self.search_by_cb.items()
if k not in RPCSearch.keys_removed
}
self.search_by_cb.update({
"depends": self._search_by_depends,
"makedepends": self._search_by_makedepends,
@ -266,6 +268,9 @@ class RPCSearch(PackageSearch):
"checkdepends": self._search_by_checkdepends
})
# We always want an optional Maintainer in the RPC.
self._join_user()
def _join_depends(self, dep_type_id: int) -> orm.Query:
""" Join Package with PackageDependency and filter results
based on `dep_type_id`.

View file

@ -4,7 +4,7 @@ from collections import defaultdict
from typing import Any, Callable, Dict, List, NewType, Union
from fastapi.responses import HTMLResponse
from sqlalchemy import and_, literal
from sqlalchemy import and_, literal, orm
import aurweb.config as config
@ -123,34 +123,27 @@ class RPC:
# Produce RPC API compatible Popularity: If zero, it's an integer
# 0, otherwise, it's formatted to the 6th decimal place.
pop = package.PackageBase.Popularity
pop = package.Popularity
pop = 0 if not pop else float(util.number_format(pop, 6))
snapshot_uri = config.get("options", "snapshot_uri")
data = defaultdict(list)
data.update({
return {
"ID": package.ID,
"Name": package.Name,
"PackageBaseID": package.PackageBaseID,
"PackageBase": package.PackageBase.Name,
"PackageBase": package.PackageBaseName,
# Maintainer should be set following this update if one exists.
"Maintainer": None,
"Maintainer": package.Maintainer,
"Version": package.Version,
"Description": package.Description,
"URL": package.URL,
"URLPath": snapshot_uri % package.Name,
"NumVotes": package.PackageBase.NumVotes,
"NumVotes": package.NumVotes,
"Popularity": pop,
"OutOfDate": package.PackageBase.OutOfDateTS,
"FirstSubmitted": package.PackageBase.SubmittedTS,
"LastModified": package.PackageBase.ModifiedTS
})
if package.PackageBase.Maintainer is not None:
# We do have a maintainer: set the Maintainer key.
data["Maintainer"] = package.PackageBase.Maintainer.Username
return data
"OutOfDate": package.OutOfDateTS,
"FirstSubmitted": package.SubmittedTS,
"LastModified": package.ModifiedTS
}
def _get_info_json_data(self, package: models.Package) -> Dict[str, Any]:
data = self._get_json_data(package)
@ -178,19 +171,38 @@ class RPC:
:param packages: A list of Package instances or a Package ORM query
:param data_generator: Generator callable of single-Package JSON data
"""
output = []
for pkg in packages:
db.refresh(pkg)
output.append(data_generator(pkg))
return output
return [data_generator(pkg) for pkg in packages]
def _entities(self, query: orm.Query) -> orm.Query:
""" Select specific RPC columns on `query`. """
return query.with_entities(
models.Package.ID,
models.Package.Name,
models.Package.Version,
models.Package.Description,
models.Package.URL,
models.Package.PackageBaseID,
models.PackageBase.Name.label("PackageBaseName"),
models.PackageBase.NumVotes,
models.PackageBase.Popularity,
models.PackageBase.OutOfDateTS,
models.PackageBase.SubmittedTS,
models.PackageBase.ModifiedTS,
models.User.Username.label("Maintainer"),
).group_by(models.Package.ID)
def _handle_multiinfo_type(self, args: List[str] = [], **kwargs) \
-> List[Dict[str, Any]]:
self._enforce_args(args)
args = set(args)
packages = db.query(models.Package).join(models.PackageBase).filter(
models.Package.Name.in_(args))
packages = db.query(models.Package).join(models.PackageBase).join(
models.User,
models.User.ID == models.PackageBase.MaintainerUID,
isouter=True
).filter(models.Package.Name.in_(args))
packages = self._entities(packages)
ids = {pkg.ID for pkg in packages}
# Aliases for 80-width.
@ -293,7 +305,7 @@ class RPC:
search.search_by(by, arg)
max_results = config.getint("options", "max_rpc_results")
results = search.results().limit(max_results)
results = self._entities(search.results()).limit(max_results)
return self._assemble_json_data(results, self._get_json_data)
def _handle_msearch_type(self, args: List[str] = [], **kwargs)\