fix(packages.search): fix default ordering & improve performance

- Use queries more closely aligned to PHP's implementation; removes
  the need for separate vote/notification queries.
- Default sort by popularity

Closes #214

Signed-off-by: Kevin Morris <kevr@0cost.org>
This commit is contained in:
Kevin Morris 2021-12-23 19:20:10 -08:00
parent e75aa386ea
commit 56bd60559c
No known key found for this signature in database
GPG key ID: F7E46DED420788F3
7 changed files with 161 additions and 90 deletions

View file

@ -1,9 +1,12 @@
from sqlalchemy import and_, case, or_, orm
from aurweb import config, db, models, util
from aurweb import db, models, util
from aurweb.models import Package, PackageBase, User
from aurweb.models.dependency_type import CHECKDEPENDS_ID, DEPENDS_ID, MAKEDEPENDS_ID, OPTDEPENDS_ID
DEFAULT_MAX_RESULTS = 2500
from aurweb.models.package_comaintainer import PackageComaintainer
from aurweb.models.package_keyword import PackageKeyword
from aurweb.models.package_notification import PackageNotification
from aurweb.models.package_vote import PackageVote
class PackageSearch:
@ -13,23 +16,21 @@ class PackageSearch:
FULL_SORT_ORDER = {"d": "desc", "a": "asc"}
def __init__(self, user: models.User = None):
self.query = db.query(Package).join(PackageBase)
self.user = user
self.query = db.query(models.Package).join(models.PackageBase)
if self.user:
PackageVote = models.PackageVote
join_vote_on = and_(
PackageVote.PackageBaseID == models.PackageBase.ID,
PackageVote.UsersID == self.user.ID)
PackageNotification = models.PackageNotification
join_notif_on = and_(
PackageNotification.PackageBaseID == models.PackageBase.ID,
PackageNotification.UserID == self.user.ID)
self.query = self.query.join(
models.PackageVote, join_vote_on, isouter=True
).join(models.PackageNotification, join_notif_on, isouter=True)
PackageVote,
and_(PackageVote.PackageBaseID == PackageBase.ID,
PackageVote.UsersID == self.user.ID),
isouter=True
).join(
PackageNotification,
and_(PackageNotification.PackageBaseID == PackageBase.ID,
PackageNotification.UserID == self.user.ID),
isouter=True
)
self.ordering = "d"
@ -58,70 +59,92 @@ class PackageSearch:
"l": self._sort_by_last_modified
}
def _join_user(self, outer: bool = True) -> orm.Query:
""" Centralized joining of a package base's maintainer. """
self.query = self.query.join(
User,
User.ID == PackageBase.MaintainerUID,
isouter=outer
)
return self.query
def _search_by_namedesc(self, keywords: str) -> orm.Query:
self._join_user()
self.query = self.query.filter(
or_(models.Package.Name.like(f"%{keywords}%"),
models.Package.Description.like(f"%{keywords}%"))
or_(Package.Name.like(f"%{keywords}%"),
Package.Description.like(f"%{keywords}%"))
)
return self
def _search_by_name(self, keywords: str) -> orm.Query:
self.query = self.query.filter(
models.Package.Name.like(f"%{keywords}%"))
self._join_user()
self.query = self.query.filter(Package.Name.like(f"%{keywords}%"))
return self
def _search_by_exact_name(self, keywords: str) -> orm.Query:
self.query = self.query.filter(
models.Package.Name == keywords)
self._join_user()
self.query = self.query.filter(Package.Name == keywords)
return self
def _search_by_pkgbase(self, keywords: str) -> orm.Query:
self.query = self.query.filter(
models.PackageBase.Name.like(f"%{keywords}%"))
self._join_user()
self.query = self.query.filter(PackageBase.Name.like(f"%{keywords}%"))
return self
def _search_by_exact_pkgbase(self, keywords: str) -> orm.Query:
self.query = self.query.filter(
models.PackageBase.Name == keywords)
self._join_user()
self.query = self.query.filter(PackageBase.Name == keywords)
return self
def _search_by_keywords(self, keywords: str) -> orm.Query:
self.query = self.query.join(models.PackageKeyword).filter(
models.PackageKeyword.Keyword == keywords
self._join_user()
self.query = self.query.join(PackageKeyword).filter(
PackageKeyword.Keyword == keywords
)
return self
def _search_by_maintainer(self, keywords: str) -> orm.Query:
self._join_user()
if keywords:
self.query = self.query.join(
models.User, models.User.ID == models.PackageBase.MaintainerUID
).filter(models.User.Username == keywords)
else:
self.query = self.query.filter(
models.PackageBase.MaintainerUID.is_(None))
and_(User.Username == keywords,
User.ID == PackageBase.MaintainerUID)
)
else:
self.query = self.query.filter(PackageBase.MaintainerUID.is_(None))
return self
def _search_by_comaintainer(self, keywords: str) -> orm.Query:
self.query = self.query.join(models.PackageComaintainer).join(
models.User, models.User.ID == models.PackageComaintainer.UsersID
).filter(models.User.Username == keywords)
self._join_user()
exists_subq = db.query(PackageComaintainer).join(User).filter(
and_(PackageComaintainer.PackageBaseID == PackageBase.ID,
User.Username == keywords)
).exists()
self.query = self.query.filter(db.query(exists_subq).scalar_subquery())
return self
def _search_by_co_or_maintainer(self, keywords: str) -> orm.Query:
self.query = self.query.join(
models.PackageComaintainer,
isouter=True
).join(
models.User,
or_(models.User.ID == models.PackageBase.MaintainerUID,
models.User.ID == models.PackageComaintainer.UsersID)
).filter(models.User.Username == keywords)
self._join_user()
exists_subq = db.query(PackageComaintainer).join(User).filter(
and_(PackageComaintainer.PackageBaseID == PackageBase.ID,
User.Username == keywords)
).exists()
self.query = self.query.filter(
or_(and_(User.Username == keywords,
User.ID == PackageBase.MaintainerUID),
db.query(exists_subq).scalar_subquery())
)
return self
def _search_by_submitter(self, keywords: str) -> orm.Query:
self.query = self.query.join(
models.User, models.User.ID == models.PackageBase.SubmitterUID
).filter(models.User.Username == keywords)
self._join_user()
uid = 0
user = db.query(User).filter(User.Username == keywords).first()
if user:
uid = user.ID
self.query = self.query.filter(PackageBase.SubmitterUID == uid)
return self
def search_by(self, search_by: str, keywords: str) -> orm.Query:
@ -138,12 +161,14 @@ class PackageSearch:
def _sort_by_votes(self, order: str):
column = getattr(models.PackageBase.NumVotes, order)
self.query = self.query.order_by(column())
name = getattr(models.Package.Name, order)
self.query = self.query.order_by(column(), name())
return self
def _sort_by_popularity(self, order: str):
column = getattr(models.PackageBase.Popularity, order)
self.query = self.query.order_by(column())
name = getattr(models.Package.Name, order)
self.query = self.query.order_by(column(), name())
return self
def _sort_by_voted(self, order: str):
@ -154,7 +179,8 @@ class PackageSearch:
case([(models.PackageVote.UsersID == self.user.ID, 1)], else_=0),
order
)
self.query = self.query.order_by(column(), models.Package.Name.desc())
name = getattr(models.Package.Name, order)
self.query = self.query.order_by(column(), name())
return self
def _sort_by_notify(self, order: str):
@ -166,39 +192,42 @@ class PackageSearch:
else_=0),
order
)
self.query = self.query.order_by(column(), models.Package.Name.desc())
name = getattr(models.Package.Name, order)
self.query = self.query.order_by(column(), name())
return self
def _sort_by_maintainer(self, order: str):
column = getattr(models.User.Username, order)
self.query = self.query.join(
models.User,
models.User.ID == models.PackageBase.MaintainerUID,
isouter=True
).order_by(column())
name = getattr(models.Package.Name, order)
self.query = self.query.order_by(column(), name())
return self
def _sort_by_last_modified(self, order: str):
column = getattr(models.PackageBase.ModifiedTS, order)
self.query = self.query.order_by(column())
name = getattr(models.Package.Name, order)
self.query = self.query.order_by(column(), name())
return self
def sort_by(self, sort_by: str, ordering: str = "d") -> orm.Query:
if sort_by not in self.sort_by_cb:
sort_by = "n" # Default: Name.
sort_by = "p" # Default: Popularity
callback = self.sort_by_cb.get(sort_by)
if ordering not in self.FULL_SORT_ORDER:
ordering = "d" # Default: Descending.
ordering = "d" # Default: Descending
ordering = self.FULL_SORT_ORDER.get(ordering)
return callback(ordering)
def results(self) -> orm.Query:
# Store the total count of all records found up to limit.
limit = (config.getint("options", "max_search_results")
or DEFAULT_MAX_RESULTS)
self.total_count = self.query.limit(limit).count()
def count(self, limit: int) -> int:
"""
Return internal query's count up to `limit`.
# Return the query to the user.
:param limit: Upper bound
:return: Database count up to `limit`
"""
return self.query.limit(limit).count()
def results(self) -> orm.Query:
""" Return internal query. """
return self.query
@ -279,4 +308,4 @@ class RPCSearch(PackageSearch):
return result
def results(self) -> orm.Query:
return self.query
return self.query.filter(models.PackageBase.PackagerUID.isnot(None))

View file

@ -200,7 +200,7 @@ def query_voted(query: List[models.Package],
:return: Vote state dict (PackageBase.ID: int -> bool)
"""
output = defaultdict(bool)
query_set = {pkg.PackageBase.ID for pkg in query}
query_set = {pkg.PackageBaseID for pkg in query}
voted = db.query(models.PackageVote).join(
models.PackageBase,
models.PackageBase.ID.in_(query_set)
@ -223,7 +223,7 @@ def query_notified(query: List[models.Package],
:return: Notification state dict (PackageBase.ID: int -> bool)
"""
output = defaultdict(bool)
query_set = {pkg.PackageBase.ID for pkg in query}
query_set = {pkg.PackageBaseID for pkg in query}
notified = db.query(models.PackageNotification).join(
models.PackageBase,
models.PackageBase.ID.in_(query_set)

View file

@ -20,7 +20,7 @@ from aurweb.packages import util as pkgutil
from aurweb.packages import validate
from aurweb.packages.requests import handle_request, update_closure_comment
from aurweb.packages.search import PackageSearch
from aurweb.packages.util import get_pkg_or_base, get_pkgbase_comment, get_pkgreq_by_id, query_notified, query_voted
from aurweb.packages.util import get_pkg_or_base, get_pkgbase_comment, get_pkgreq_by_id
from aurweb.scripts import notify, popupdate
from aurweb.scripts.rendercomment import update_comment_render_fastapi
from aurweb.templates import make_context, make_variable_context, render_raw_template, render_template
@ -45,7 +45,7 @@ async def packages_get(request: Request, context: Dict[str, Any],
search_by = context["SeB"] = request.query_params.get("SeB", "nd")
# Query sort by.
sort_by = context["SB"] = request.query_params.get("SB", "n")
sort_by = context["SB"] = request.query_params.get("SB", "p")
# Query sort order.
sort_order = request.query_params.get("SO", None)
@ -61,6 +61,12 @@ async def packages_get(request: Request, context: Dict[str, Any],
for keyword in keywords:
search.search_by(search_by, keyword)
# Collect search result count here; we've applied our keywords.
# Including more query operations below, like ordering, will
# increase the amount of time required to collect a count.
limit = config.getint("options", "max_search_results")
num_packages = search.count(limit)
flagged = request.query_params.get("outdated", None)
if flagged:
# If outdated was given, set it up in the context.
@ -96,16 +102,23 @@ async def packages_get(request: Request, context: Dict[str, Any],
context["SO"] = sort_order
# Insert search results into the context.
results = search.results()
results = search.results().with_entities(
models.Package.ID,
models.Package.Name,
models.Package.PackageBaseID,
models.Package.Version,
models.Package.Description,
models.PackageBase.Popularity,
models.PackageBase.NumVotes,
models.PackageBase.OutOfDateTS,
models.User.Username.label("Maintainer"),
models.PackageVote.PackageBaseID.label("Voted"),
models.PackageNotification.PackageBaseID.label("Notify")
)
packages = results.limit(per_page).offset(offset)
util.apply_all(packages, db.refresh)
context["packages"] = packages
context["packages_voted"] = query_voted(
context.get("packages"), request.user)
context["packages_notified"] = query_notified(
context.get("packages"), request.user)
context["packages_count"] = search.total_count
context["packages_count"] = num_packages
return render_template(request, "packages.html", context,
status_code=status_code)

View file

@ -45,7 +45,7 @@
{# /packages does things a bit roundabout-wise:
If SeB is not given, "nd" is the default.
If SB is not given, "n" is the default.
If SB is not given, "p" is the default.
If SO is not given, "d" is the default.
However, we depend on flipping SO for column sorting.
@ -56,7 +56,7 @@
{% set SeB = "nd" %}
{% endif %}
{% if not SB %}
{% set SB = "n" %}
{% set SB = "p" %}
{% endif %}
{% if not SO %}
{% set SO = "d" %}

View file

@ -64,7 +64,7 @@
</thead>
<tbody>
{% for pkg in packages %}
{% set flagged = pkg.PackageBase.OutOfDateTS %}
{% set flagged = pkg.OutOfDateTS %}
<tr>
{% if request.user.is_authenticated() %}
<td>
@ -81,28 +81,27 @@
{% else %}
<td>{{ pkg.Version }}</td>
{% endif %}
<td>{{ pkg.PackageBase.NumVotes }}</td>
<td>{{ pkg.NumVotes }}</td>
<td>
{{ pkg.PackageBase.Popularity | number_format(2) }}
{{ pkg.Popularity | number_format(2) }}
</td>
{% if request.user.is_authenticated() %}
<td>
{% if pkg.PackageBase.ID in voted %}
{% if pkg.Voted %}
{{ "Yes" | tr }}
{% endif %}
</td>
<td>
{% if pkg.PackageBase.ID in notified %}
{% if pkg.Notify %}
{{ "Yes" | tr }}
{% endif %}
</td>
{% endif %}
<td class="wrap">{{ pkg.Description or '' }}</td>
<td>
{% set maintainer = pkg.PackageBase.Maintainer %}
{% if maintainer %}
<a href="/account/{{ maintainer.Username }}">
{{ maintainer.Username }}
{% if pkg.Maintainer %}
<a href="/account/{{ pkg.Maintainer }}">
{{ pkg.Maintainer }}
</a>
{% else %}
<span class="error">{{ "orphan" | tr }}</span>

View file

@ -816,6 +816,33 @@ def test_packages_search_by_submitter(client: TestClient,
assert len(rows) == 1
def test_packages_sort_by_name(client: TestClient, packages: List[Package]):
with client as request:
response = request.get("/packages", params={
"SB": "n", # Name
"SO": "a", # Ascending
"PP": "150"
})
assert response.status_code == int(HTTPStatus.OK)
root = parse_root(response.text)
rows = root.xpath('//table[@class="results"]/tbody/tr')
rows = [row.xpath('./td/a')[0].text.strip() for row in rows]
with client as request:
response2 = request.get("/packages", params={
"SB": "n", # Name
"SO": "d", # Ascending
"PP": "150"
})
assert response2.status_code == int(HTTPStatus.OK)
root = parse_root(response2.text)
rows2 = root.xpath('//table[@class="results"]/tbody/tr')
rows2 = [row.xpath('./td/a')[0].text.strip() for row in rows2]
assert rows == list(reversed(rows2))
def test_packages_sort_by_votes(client: TestClient,
maintainer: User,
packages: List[Package]):

View file

@ -656,6 +656,9 @@ def test_rpc_msearch(client: TestClient, user: User, packages: List[Package]):
data = response.json()
assert data.get("resultcount") == 0
with db.begin():
packages[0].PackageBase.Maintainer = None
# A missing arg still succeeds, but it returns all orphans.
# Just verify that we receive no error and the orphaned result.
params.pop("arg")
@ -663,7 +666,7 @@ def test_rpc_msearch(client: TestClient, user: User, packages: List[Package]):
data = response.json()
assert data.get("resultcount") == 1
result = data.get("results")[0]
assert result.get("Name") == "woogly-chungus"
assert result.get("Name") == "big-chungus"
def test_rpc_search_depends(client: TestClient, packages: List[Package],