fix(rpc): improve type=search performance

This patch brings in the use of .with_entities on our
RPC search query. This primarily fixes performance issues
we were seeing with large queries.

That being said, we do see a bit of a slowdown on
large record count rpc queries, but it's quite negligible
at this point.

We still do aim to perform better than the older PHP
implementation, so this is not a finishing patch by
any means.

Signed-off-by: Kevin Morris <kevr@0cost.org>
This commit is contained in:
Kevin Morris 2022-01-13 23:27:10 -08:00
parent d31a51742b
commit b4495a49bf
No known key found for this signature in database
GPG key ID: F7E46DED420788F3
2 changed files with 45 additions and 28 deletions

View file

@ -1,6 +1,6 @@
from sqlalchemy import and_, case, or_, orm from sqlalchemy import and_, case, or_, orm
from aurweb import db, models, util from aurweb import db, models
from aurweb.models import Package, PackageBase, User from aurweb.models import Package, PackageBase, User
from aurweb.models.dependency_type import CHECKDEPENDS_ID, DEPENDS_ID, MAKEDEPENDS_ID, OPTDEPENDS_ID from aurweb.models.dependency_type import CHECKDEPENDS_ID, DEPENDS_ID, MAKEDEPENDS_ID, OPTDEPENDS_ID
from aurweb.models.package_comaintainer import PackageComaintainer from aurweb.models.package_comaintainer import PackageComaintainer
@ -257,8 +257,10 @@ class RPCSearch(PackageSearch):
# Fix-up inherited search_by_cb to reflect RPC-specific by params. # Fix-up inherited search_by_cb to reflect RPC-specific by params.
# We keep: "nd", "n" and "m". We also overlay four new by params # We keep: "nd", "n" and "m". We also overlay four new by params
# on top: "depends", "makedepends", "optdepends" and "checkdepends". # on top: "depends", "makedepends", "optdepends" and "checkdepends".
util.apply_all(RPCSearch.keys_removed, self.search_by_cb = {
lambda k: self.search_by_cb.pop(k)) k: v for k, v in self.search_by_cb.items()
if k not in RPCSearch.keys_removed
}
self.search_by_cb.update({ self.search_by_cb.update({
"depends": self._search_by_depends, "depends": self._search_by_depends,
"makedepends": self._search_by_makedepends, "makedepends": self._search_by_makedepends,
@ -266,6 +268,9 @@ class RPCSearch(PackageSearch):
"checkdepends": self._search_by_checkdepends "checkdepends": self._search_by_checkdepends
}) })
# We always want an optional Maintainer in the RPC.
self._join_user()
def _join_depends(self, dep_type_id: int) -> orm.Query: def _join_depends(self, dep_type_id: int) -> orm.Query:
""" Join Package with PackageDependency and filter results """ Join Package with PackageDependency and filter results
based on `dep_type_id`. based on `dep_type_id`.

View file

@ -4,7 +4,7 @@ from collections import defaultdict
from typing import Any, Callable, Dict, List, NewType, Union from typing import Any, Callable, Dict, List, NewType, Union
from fastapi.responses import HTMLResponse from fastapi.responses import HTMLResponse
from sqlalchemy import and_, literal from sqlalchemy import and_, literal, orm
import aurweb.config as config import aurweb.config as config
@ -123,34 +123,27 @@ class RPC:
# Produce RPC API compatible Popularity: If zero, it's an integer # Produce RPC API compatible Popularity: If zero, it's an integer
# 0, otherwise, it's formatted to the 6th decimal place. # 0, otherwise, it's formatted to the 6th decimal place.
pop = package.PackageBase.Popularity pop = package.Popularity
pop = 0 if not pop else float(util.number_format(pop, 6)) pop = 0 if not pop else float(util.number_format(pop, 6))
snapshot_uri = config.get("options", "snapshot_uri") snapshot_uri = config.get("options", "snapshot_uri")
data = defaultdict(list) return {
data.update({
"ID": package.ID, "ID": package.ID,
"Name": package.Name, "Name": package.Name,
"PackageBaseID": package.PackageBaseID, "PackageBaseID": package.PackageBaseID,
"PackageBase": package.PackageBase.Name, "PackageBase": package.PackageBaseName,
# Maintainer should be set following this update if one exists. # Maintainer should be set following this update if one exists.
"Maintainer": None, "Maintainer": package.Maintainer,
"Version": package.Version, "Version": package.Version,
"Description": package.Description, "Description": package.Description,
"URL": package.URL, "URL": package.URL,
"URLPath": snapshot_uri % package.Name, "URLPath": snapshot_uri % package.Name,
"NumVotes": package.PackageBase.NumVotes, "NumVotes": package.NumVotes,
"Popularity": pop, "Popularity": pop,
"OutOfDate": package.PackageBase.OutOfDateTS, "OutOfDate": package.OutOfDateTS,
"FirstSubmitted": package.PackageBase.SubmittedTS, "FirstSubmitted": package.SubmittedTS,
"LastModified": package.PackageBase.ModifiedTS "LastModified": package.ModifiedTS
}) }
if package.PackageBase.Maintainer is not None:
# We do have a maintainer: set the Maintainer key.
data["Maintainer"] = package.PackageBase.Maintainer.Username
return data
def _get_info_json_data(self, package: models.Package) -> Dict[str, Any]: def _get_info_json_data(self, package: models.Package) -> Dict[str, Any]:
data = self._get_json_data(package) data = self._get_json_data(package)
@ -178,19 +171,38 @@ class RPC:
:param packages: A list of Package instances or a Package ORM query :param packages: A list of Package instances or a Package ORM query
:param data_generator: Generator callable of single-Package JSON data :param data_generator: Generator callable of single-Package JSON data
""" """
output = [] return [data_generator(pkg) for pkg in packages]
for pkg in packages:
db.refresh(pkg) def _entities(self, query: orm.Query) -> orm.Query:
output.append(data_generator(pkg)) """ Select specific RPC columns on `query`. """
return output return query.with_entities(
models.Package.ID,
models.Package.Name,
models.Package.Version,
models.Package.Description,
models.Package.URL,
models.Package.PackageBaseID,
models.PackageBase.Name.label("PackageBaseName"),
models.PackageBase.NumVotes,
models.PackageBase.Popularity,
models.PackageBase.OutOfDateTS,
models.PackageBase.SubmittedTS,
models.PackageBase.ModifiedTS,
models.User.Username.label("Maintainer"),
).group_by(models.Package.ID)
def _handle_multiinfo_type(self, args: List[str] = [], **kwargs) \ def _handle_multiinfo_type(self, args: List[str] = [], **kwargs) \
-> List[Dict[str, Any]]: -> List[Dict[str, Any]]:
self._enforce_args(args) self._enforce_args(args)
args = set(args) args = set(args)
packages = db.query(models.Package).join(models.PackageBase).filter( packages = db.query(models.Package).join(models.PackageBase).join(
models.Package.Name.in_(args)) models.User,
models.User.ID == models.PackageBase.MaintainerUID,
isouter=True
).filter(models.Package.Name.in_(args))
packages = self._entities(packages)
ids = {pkg.ID for pkg in packages} ids = {pkg.ID for pkg in packages}
# Aliases for 80-width. # Aliases for 80-width.
@ -293,7 +305,7 @@ class RPC:
search.search_by(by, arg) search.search_by(by, arg)
max_results = config.getint("options", "max_rpc_results") max_results = config.getint("options", "max_rpc_results")
results = search.results().limit(max_results) results = self._entities(search.results()).limit(max_results)
return self._assemble_json_data(results, self._get_json_data) return self._assemble_json_data(results, self._get_json_data)
def _handle_msearch_type(self, args: List[str] = [], **kwargs)\ def _handle_msearch_type(self, args: List[str] = [], **kwargs)\