From b4495a49bf2cd75adeb02746217cd7b92df7a834 Mon Sep 17 00:00:00 2001 From: Kevin Morris Date: Thu, 13 Jan 2022 23:27:10 -0800 Subject: [PATCH] fix(rpc): improve type=search performance This patch brings in the use of .with_entities on our RPC search query. This primarily fixes performance issues we were seeing with large queries. That being said, we do see a bit of a slowdown on large record count rpc queries, but it's quite negligible at this point. We still do aim to perform better than the older PHP implementation, so this is not a finishing patch by any means. Signed-off-by: Kevin Morris --- aurweb/packages/search.py | 11 +++++-- aurweb/rpc.py | 62 +++++++++++++++++++++++---------------- 2 files changed, 45 insertions(+), 28 deletions(-) diff --git a/aurweb/packages/search.py b/aurweb/packages/search.py index 1bca732a..b4af5aab 100644 --- a/aurweb/packages/search.py +++ b/aurweb/packages/search.py @@ -1,6 +1,6 @@ from sqlalchemy import and_, case, or_, orm -from aurweb import db, models, util +from aurweb import db, models from aurweb.models import Package, PackageBase, User from aurweb.models.dependency_type import CHECKDEPENDS_ID, DEPENDS_ID, MAKEDEPENDS_ID, OPTDEPENDS_ID from aurweb.models.package_comaintainer import PackageComaintainer @@ -257,8 +257,10 @@ class RPCSearch(PackageSearch): # Fix-up inherited search_by_cb to reflect RPC-specific by params. # We keep: "nd", "n" and "m". We also overlay four new by params # on top: "depends", "makedepends", "optdepends" and "checkdepends". - util.apply_all(RPCSearch.keys_removed, - lambda k: self.search_by_cb.pop(k)) + self.search_by_cb = { + k: v for k, v in self.search_by_cb.items() + if k not in RPCSearch.keys_removed + } self.search_by_cb.update({ "depends": self._search_by_depends, "makedepends": self._search_by_makedepends, @@ -266,6 +268,9 @@ class RPCSearch(PackageSearch): "checkdepends": self._search_by_checkdepends }) + # We always want an optional Maintainer in the RPC. + self._join_user() + def _join_depends(self, dep_type_id: int) -> orm.Query: """ Join Package with PackageDependency and filter results based on `dep_type_id`. diff --git a/aurweb/rpc.py b/aurweb/rpc.py index 30fd2fbd..8757d9f9 100644 --- a/aurweb/rpc.py +++ b/aurweb/rpc.py @@ -4,7 +4,7 @@ from collections import defaultdict from typing import Any, Callable, Dict, List, NewType, Union from fastapi.responses import HTMLResponse -from sqlalchemy import and_, literal +from sqlalchemy import and_, literal, orm import aurweb.config as config @@ -123,34 +123,27 @@ class RPC: # Produce RPC API compatible Popularity: If zero, it's an integer # 0, otherwise, it's formatted to the 6th decimal place. - pop = package.PackageBase.Popularity + pop = package.Popularity pop = 0 if not pop else float(util.number_format(pop, 6)) snapshot_uri = config.get("options", "snapshot_uri") - data = defaultdict(list) - data.update({ + return { "ID": package.ID, "Name": package.Name, "PackageBaseID": package.PackageBaseID, - "PackageBase": package.PackageBase.Name, + "PackageBase": package.PackageBaseName, # Maintainer should be set following this update if one exists. - "Maintainer": None, + "Maintainer": package.Maintainer, "Version": package.Version, "Description": package.Description, "URL": package.URL, "URLPath": snapshot_uri % package.Name, - "NumVotes": package.PackageBase.NumVotes, + "NumVotes": package.NumVotes, "Popularity": pop, - "OutOfDate": package.PackageBase.OutOfDateTS, - "FirstSubmitted": package.PackageBase.SubmittedTS, - "LastModified": package.PackageBase.ModifiedTS - }) - - if package.PackageBase.Maintainer is not None: - # We do have a maintainer: set the Maintainer key. - data["Maintainer"] = package.PackageBase.Maintainer.Username - - return data + "OutOfDate": package.OutOfDateTS, + "FirstSubmitted": package.SubmittedTS, + "LastModified": package.ModifiedTS + } def _get_info_json_data(self, package: models.Package) -> Dict[str, Any]: data = self._get_json_data(package) @@ -178,19 +171,38 @@ class RPC: :param packages: A list of Package instances or a Package ORM query :param data_generator: Generator callable of single-Package JSON data """ - output = [] - for pkg in packages: - db.refresh(pkg) - output.append(data_generator(pkg)) - return output + return [data_generator(pkg) for pkg in packages] + + def _entities(self, query: orm.Query) -> orm.Query: + """ Select specific RPC columns on `query`. """ + return query.with_entities( + models.Package.ID, + models.Package.Name, + models.Package.Version, + models.Package.Description, + models.Package.URL, + models.Package.PackageBaseID, + models.PackageBase.Name.label("PackageBaseName"), + models.PackageBase.NumVotes, + models.PackageBase.Popularity, + models.PackageBase.OutOfDateTS, + models.PackageBase.SubmittedTS, + models.PackageBase.ModifiedTS, + models.User.Username.label("Maintainer"), + ).group_by(models.Package.ID) def _handle_multiinfo_type(self, args: List[str] = [], **kwargs) \ -> List[Dict[str, Any]]: self._enforce_args(args) args = set(args) - packages = db.query(models.Package).join(models.PackageBase).filter( - models.Package.Name.in_(args)) + packages = db.query(models.Package).join(models.PackageBase).join( + models.User, + models.User.ID == models.PackageBase.MaintainerUID, + isouter=True + ).filter(models.Package.Name.in_(args)) + packages = self._entities(packages) + ids = {pkg.ID for pkg in packages} # Aliases for 80-width. @@ -293,7 +305,7 @@ class RPC: search.search_by(by, arg) max_results = config.getint("options", "max_rpc_results") - results = search.results().limit(max_results) + results = self._entities(search.results()).limit(max_results) return self._assemble_json_data(results, self._get_json_data) def _handle_msearch_type(self, args: List[str] = [], **kwargs)\