From 56bd60559cdaa2f394e7743b82cb1692319ed8fc Mon Sep 17 00:00:00 2001 From: Kevin Morris Date: Thu, 23 Dec 2021 19:20:10 -0800 Subject: [PATCH] fix(packages.search): fix default ordering & improve performance - Use queries more closely aligned to PHP's implementation; removes the need for separate vote/notification queries. - Default sort by popularity Closes #214 Signed-off-by: Kevin Morris --- aurweb/packages/search.py | 163 +++++++++++------- aurweb/packages/util.py | 4 +- aurweb/routers/packages.py | 31 +++- templates/packages.html | 4 +- .../partials/packages/search_results.html | 17 +- test/test_packages_routes.py | 27 +++ test/test_rpc.py | 5 +- 7 files changed, 161 insertions(+), 90 deletions(-) diff --git a/aurweb/packages/search.py b/aurweb/packages/search.py index a14fe19b..c4ab36fe 100644 --- a/aurweb/packages/search.py +++ b/aurweb/packages/search.py @@ -1,9 +1,12 @@ from sqlalchemy import and_, case, or_, orm -from aurweb import config, db, models, util +from aurweb import db, models, util +from aurweb.models import Package, PackageBase, User from aurweb.models.dependency_type import CHECKDEPENDS_ID, DEPENDS_ID, MAKEDEPENDS_ID, OPTDEPENDS_ID - -DEFAULT_MAX_RESULTS = 2500 +from aurweb.models.package_comaintainer import PackageComaintainer +from aurweb.models.package_keyword import PackageKeyword +from aurweb.models.package_notification import PackageNotification +from aurweb.models.package_vote import PackageVote class PackageSearch: @@ -13,23 +16,21 @@ class PackageSearch: FULL_SORT_ORDER = {"d": "desc", "a": "asc"} def __init__(self, user: models.User = None): + self.query = db.query(Package).join(PackageBase) + self.user = user - self.query = db.query(models.Package).join(models.PackageBase) - if self.user: - PackageVote = models.PackageVote - join_vote_on = and_( - PackageVote.PackageBaseID == models.PackageBase.ID, - PackageVote.UsersID == self.user.ID) - - PackageNotification = models.PackageNotification - join_notif_on = and_( - PackageNotification.PackageBaseID == models.PackageBase.ID, - PackageNotification.UserID == self.user.ID) - self.query = self.query.join( - models.PackageVote, join_vote_on, isouter=True - ).join(models.PackageNotification, join_notif_on, isouter=True) + PackageVote, + and_(PackageVote.PackageBaseID == PackageBase.ID, + PackageVote.UsersID == self.user.ID), + isouter=True + ).join( + PackageNotification, + and_(PackageNotification.PackageBaseID == PackageBase.ID, + PackageNotification.UserID == self.user.ID), + isouter=True + ) self.ordering = "d" @@ -58,70 +59,92 @@ class PackageSearch: "l": self._sort_by_last_modified } + def _join_user(self, outer: bool = True) -> orm.Query: + """ Centralized joining of a package base's maintainer. """ + self.query = self.query.join( + User, + User.ID == PackageBase.MaintainerUID, + isouter=outer + ) + return self.query + def _search_by_namedesc(self, keywords: str) -> orm.Query: + self._join_user() self.query = self.query.filter( - or_(models.Package.Name.like(f"%{keywords}%"), - models.Package.Description.like(f"%{keywords}%")) + or_(Package.Name.like(f"%{keywords}%"), + Package.Description.like(f"%{keywords}%")) ) return self def _search_by_name(self, keywords: str) -> orm.Query: - self.query = self.query.filter( - models.Package.Name.like(f"%{keywords}%")) + self._join_user() + self.query = self.query.filter(Package.Name.like(f"%{keywords}%")) return self def _search_by_exact_name(self, keywords: str) -> orm.Query: - self.query = self.query.filter( - models.Package.Name == keywords) + self._join_user() + self.query = self.query.filter(Package.Name == keywords) return self def _search_by_pkgbase(self, keywords: str) -> orm.Query: - self.query = self.query.filter( - models.PackageBase.Name.like(f"%{keywords}%")) + self._join_user() + self.query = self.query.filter(PackageBase.Name.like(f"%{keywords}%")) return self def _search_by_exact_pkgbase(self, keywords: str) -> orm.Query: - self.query = self.query.filter( - models.PackageBase.Name == keywords) + self._join_user() + self.query = self.query.filter(PackageBase.Name == keywords) return self def _search_by_keywords(self, keywords: str) -> orm.Query: - self.query = self.query.join(models.PackageKeyword).filter( - models.PackageKeyword.Keyword == keywords + self._join_user() + self.query = self.query.join(PackageKeyword).filter( + PackageKeyword.Keyword == keywords ) return self def _search_by_maintainer(self, keywords: str) -> orm.Query: + self._join_user() if keywords: - self.query = self.query.join( - models.User, models.User.ID == models.PackageBase.MaintainerUID - ).filter(models.User.Username == keywords) - else: self.query = self.query.filter( - models.PackageBase.MaintainerUID.is_(None)) + and_(User.Username == keywords, + User.ID == PackageBase.MaintainerUID) + ) + else: + self.query = self.query.filter(PackageBase.MaintainerUID.is_(None)) return self def _search_by_comaintainer(self, keywords: str) -> orm.Query: - self.query = self.query.join(models.PackageComaintainer).join( - models.User, models.User.ID == models.PackageComaintainer.UsersID - ).filter(models.User.Username == keywords) + self._join_user() + exists_subq = db.query(PackageComaintainer).join(User).filter( + and_(PackageComaintainer.PackageBaseID == PackageBase.ID, + User.Username == keywords) + ).exists() + self.query = self.query.filter(db.query(exists_subq).scalar_subquery()) return self def _search_by_co_or_maintainer(self, keywords: str) -> orm.Query: - self.query = self.query.join( - models.PackageComaintainer, - isouter=True - ).join( - models.User, - or_(models.User.ID == models.PackageBase.MaintainerUID, - models.User.ID == models.PackageComaintainer.UsersID) - ).filter(models.User.Username == keywords) + self._join_user() + exists_subq = db.query(PackageComaintainer).join(User).filter( + and_(PackageComaintainer.PackageBaseID == PackageBase.ID, + User.Username == keywords) + ).exists() + self.query = self.query.filter( + or_(and_(User.Username == keywords, + User.ID == PackageBase.MaintainerUID), + db.query(exists_subq).scalar_subquery()) + ) return self def _search_by_submitter(self, keywords: str) -> orm.Query: - self.query = self.query.join( - models.User, models.User.ID == models.PackageBase.SubmitterUID - ).filter(models.User.Username == keywords) + self._join_user() + + uid = 0 + user = db.query(User).filter(User.Username == keywords).first() + if user: + uid = user.ID + + self.query = self.query.filter(PackageBase.SubmitterUID == uid) return self def search_by(self, search_by: str, keywords: str) -> orm.Query: @@ -138,12 +161,14 @@ class PackageSearch: def _sort_by_votes(self, order: str): column = getattr(models.PackageBase.NumVotes, order) - self.query = self.query.order_by(column()) + name = getattr(models.Package.Name, order) + self.query = self.query.order_by(column(), name()) return self def _sort_by_popularity(self, order: str): column = getattr(models.PackageBase.Popularity, order) - self.query = self.query.order_by(column()) + name = getattr(models.Package.Name, order) + self.query = self.query.order_by(column(), name()) return self def _sort_by_voted(self, order: str): @@ -154,7 +179,8 @@ class PackageSearch: case([(models.PackageVote.UsersID == self.user.ID, 1)], else_=0), order ) - self.query = self.query.order_by(column(), models.Package.Name.desc()) + name = getattr(models.Package.Name, order) + self.query = self.query.order_by(column(), name()) return self def _sort_by_notify(self, order: str): @@ -166,39 +192,42 @@ class PackageSearch: else_=0), order ) - self.query = self.query.order_by(column(), models.Package.Name.desc()) + name = getattr(models.Package.Name, order) + self.query = self.query.order_by(column(), name()) return self def _sort_by_maintainer(self, order: str): column = getattr(models.User.Username, order) - self.query = self.query.join( - models.User, - models.User.ID == models.PackageBase.MaintainerUID, - isouter=True - ).order_by(column()) + name = getattr(models.Package.Name, order) + self.query = self.query.order_by(column(), name()) return self def _sort_by_last_modified(self, order: str): column = getattr(models.PackageBase.ModifiedTS, order) - self.query = self.query.order_by(column()) + name = getattr(models.Package.Name, order) + self.query = self.query.order_by(column(), name()) return self def sort_by(self, sort_by: str, ordering: str = "d") -> orm.Query: if sort_by not in self.sort_by_cb: - sort_by = "n" # Default: Name. + sort_by = "p" # Default: Popularity callback = self.sort_by_cb.get(sort_by) if ordering not in self.FULL_SORT_ORDER: - ordering = "d" # Default: Descending. + ordering = "d" # Default: Descending ordering = self.FULL_SORT_ORDER.get(ordering) return callback(ordering) - def results(self) -> orm.Query: - # Store the total count of all records found up to limit. - limit = (config.getint("options", "max_search_results") - or DEFAULT_MAX_RESULTS) - self.total_count = self.query.limit(limit).count() + def count(self, limit: int) -> int: + """ + Return internal query's count up to `limit`. - # Return the query to the user. + :param limit: Upper bound + :return: Database count up to `limit` + """ + return self.query.limit(limit).count() + + def results(self) -> orm.Query: + """ Return internal query. """ return self.query @@ -279,4 +308,4 @@ class RPCSearch(PackageSearch): return result def results(self) -> orm.Query: - return self.query + return self.query.filter(models.PackageBase.PackagerUID.isnot(None)) diff --git a/aurweb/packages/util.py b/aurweb/packages/util.py index b6739c98..ffbc5eb5 100644 --- a/aurweb/packages/util.py +++ b/aurweb/packages/util.py @@ -200,7 +200,7 @@ def query_voted(query: List[models.Package], :return: Vote state dict (PackageBase.ID: int -> bool) """ output = defaultdict(bool) - query_set = {pkg.PackageBase.ID for pkg in query} + query_set = {pkg.PackageBaseID for pkg in query} voted = db.query(models.PackageVote).join( models.PackageBase, models.PackageBase.ID.in_(query_set) @@ -223,7 +223,7 @@ def query_notified(query: List[models.Package], :return: Notification state dict (PackageBase.ID: int -> bool) """ output = defaultdict(bool) - query_set = {pkg.PackageBase.ID for pkg in query} + query_set = {pkg.PackageBaseID for pkg in query} notified = db.query(models.PackageNotification).join( models.PackageBase, models.PackageBase.ID.in_(query_set) diff --git a/aurweb/routers/packages.py b/aurweb/routers/packages.py index 1ffd30a3..de4e32b6 100644 --- a/aurweb/routers/packages.py +++ b/aurweb/routers/packages.py @@ -20,7 +20,7 @@ from aurweb.packages import util as pkgutil from aurweb.packages import validate from aurweb.packages.requests import handle_request, update_closure_comment from aurweb.packages.search import PackageSearch -from aurweb.packages.util import get_pkg_or_base, get_pkgbase_comment, get_pkgreq_by_id, query_notified, query_voted +from aurweb.packages.util import get_pkg_or_base, get_pkgbase_comment, get_pkgreq_by_id from aurweb.scripts import notify, popupdate from aurweb.scripts.rendercomment import update_comment_render_fastapi from aurweb.templates import make_context, make_variable_context, render_raw_template, render_template @@ -45,7 +45,7 @@ async def packages_get(request: Request, context: Dict[str, Any], search_by = context["SeB"] = request.query_params.get("SeB", "nd") # Query sort by. - sort_by = context["SB"] = request.query_params.get("SB", "n") + sort_by = context["SB"] = request.query_params.get("SB", "p") # Query sort order. sort_order = request.query_params.get("SO", None) @@ -61,6 +61,12 @@ async def packages_get(request: Request, context: Dict[str, Any], for keyword in keywords: search.search_by(search_by, keyword) + # Collect search result count here; we've applied our keywords. + # Including more query operations below, like ordering, will + # increase the amount of time required to collect a count. + limit = config.getint("options", "max_search_results") + num_packages = search.count(limit) + flagged = request.query_params.get("outdated", None) if flagged: # If outdated was given, set it up in the context. @@ -96,16 +102,23 @@ async def packages_get(request: Request, context: Dict[str, Any], context["SO"] = sort_order # Insert search results into the context. - results = search.results() + results = search.results().with_entities( + models.Package.ID, + models.Package.Name, + models.Package.PackageBaseID, + models.Package.Version, + models.Package.Description, + models.PackageBase.Popularity, + models.PackageBase.NumVotes, + models.PackageBase.OutOfDateTS, + models.User.Username.label("Maintainer"), + models.PackageVote.PackageBaseID.label("Voted"), + models.PackageNotification.PackageBaseID.label("Notify") + ) packages = results.limit(per_page).offset(offset) - util.apply_all(packages, db.refresh) context["packages"] = packages - context["packages_voted"] = query_voted( - context.get("packages"), request.user) - context["packages_notified"] = query_notified( - context.get("packages"), request.user) - context["packages_count"] = search.total_count + context["packages_count"] = num_packages return render_template(request, "packages.html", context, status_code=status_code) diff --git a/templates/packages.html b/templates/packages.html index 9e4a71d1..61533524 100644 --- a/templates/packages.html +++ b/templates/packages.html @@ -45,7 +45,7 @@ {# /packages does things a bit roundabout-wise: If SeB is not given, "nd" is the default. - If SB is not given, "n" is the default. + If SB is not given, "p" is the default. If SO is not given, "d" is the default. However, we depend on flipping SO for column sorting. @@ -56,7 +56,7 @@ {% set SeB = "nd" %} {% endif %} {% if not SB %} - {% set SB = "n" %} + {% set SB = "p" %} {% endif %} {% if not SO %} {% set SO = "d" %} diff --git a/templates/partials/packages/search_results.html b/templates/partials/packages/search_results.html index 28cf0b48..680891c4 100644 --- a/templates/partials/packages/search_results.html +++ b/templates/partials/packages/search_results.html @@ -64,7 +64,7 @@ {% for pkg in packages %} - {% set flagged = pkg.PackageBase.OutOfDateTS %} + {% set flagged = pkg.OutOfDateTS %} {% if request.user.is_authenticated() %} @@ -81,28 +81,27 @@ {% else %} {{ pkg.Version }} {% endif %} - {{ pkg.PackageBase.NumVotes }} + {{ pkg.NumVotes }} - {{ pkg.PackageBase.Popularity | number_format(2) }} + {{ pkg.Popularity | number_format(2) }} {% if request.user.is_authenticated() %} - {% if pkg.PackageBase.ID in voted %} + {% if pkg.Voted %} {{ "Yes" | tr }} {% endif %} - {% if pkg.PackageBase.ID in notified %} + {% if pkg.Notify %} {{ "Yes" | tr }} {% endif %} {% endif %} {{ pkg.Description or '' }} - {% set maintainer = pkg.PackageBase.Maintainer %} - {% if maintainer %} - - {{ maintainer.Username }} + {% if pkg.Maintainer %} + + {{ pkg.Maintainer }} {% else %} {{ "orphan" | tr }} diff --git a/test/test_packages_routes.py b/test/test_packages_routes.py index ffe05248..1f590a1e 100644 --- a/test/test_packages_routes.py +++ b/test/test_packages_routes.py @@ -816,6 +816,33 @@ def test_packages_search_by_submitter(client: TestClient, assert len(rows) == 1 +def test_packages_sort_by_name(client: TestClient, packages: List[Package]): + with client as request: + response = request.get("/packages", params={ + "SB": "n", # Name + "SO": "a", # Ascending + "PP": "150" + }) + assert response.status_code == int(HTTPStatus.OK) + + root = parse_root(response.text) + rows = root.xpath('//table[@class="results"]/tbody/tr') + rows = [row.xpath('./td/a')[0].text.strip() for row in rows] + + with client as request: + response2 = request.get("/packages", params={ + "SB": "n", # Name + "SO": "d", # Ascending + "PP": "150" + }) + assert response2.status_code == int(HTTPStatus.OK) + + root = parse_root(response2.text) + rows2 = root.xpath('//table[@class="results"]/tbody/tr') + rows2 = [row.xpath('./td/a')[0].text.strip() for row in rows2] + assert rows == list(reversed(rows2)) + + def test_packages_sort_by_votes(client: TestClient, maintainer: User, packages: List[Package]): diff --git a/test/test_rpc.py b/test/test_rpc.py index acb82cad..6540b4a5 100644 --- a/test/test_rpc.py +++ b/test/test_rpc.py @@ -656,6 +656,9 @@ def test_rpc_msearch(client: TestClient, user: User, packages: List[Package]): data = response.json() assert data.get("resultcount") == 0 + with db.begin(): + packages[0].PackageBase.Maintainer = None + # A missing arg still succeeds, but it returns all orphans. # Just verify that we receive no error and the orphaned result. params.pop("arg") @@ -663,7 +666,7 @@ def test_rpc_msearch(client: TestClient, user: User, packages: List[Package]): data = response.json() assert data.get("resultcount") == 1 result = data.get("results")[0] - assert result.get("Name") == "woogly-chungus" + assert result.get("Name") == "big-chungus" def test_rpc_search_depends(client: TestClient, packages: List[Package],