mirror of
https://gitlab.archlinux.org/archlinux/aurweb.git
synced 2025-02-03 10:43:03 +01:00
feat: archive git repository (experimental)
See doc/git-archive.md for general Git archive specifications See doc/repos/metadata-repo.md for info and direction related to the new Git metadata archive
This commit is contained in:
parent
ec3152014b
commit
30e72d2db5
34 changed files with 1104 additions and 50 deletions
1
aurweb/archives/__init__.py
Normal file
1
aurweb/archives/__init__.py
Normal file
|
@ -0,0 +1 @@
|
||||||
|
# aurweb.archives
|
1
aurweb/archives/spec/__init__.py
Normal file
1
aurweb/archives/spec/__init__.py
Normal file
|
@ -0,0 +1 @@
|
||||||
|
# aurweb.archives.spec
|
77
aurweb/archives/spec/base.py
Normal file
77
aurweb/archives/spec/base.py
Normal file
|
@ -0,0 +1,77 @@
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict, Iterable, List, Set
|
||||||
|
|
||||||
|
|
||||||
|
class GitInfo:
|
||||||
|
"""Information about a Git repository."""
|
||||||
|
|
||||||
|
""" Path to Git repository. """
|
||||||
|
path: str
|
||||||
|
|
||||||
|
""" Local Git repository configuration. """
|
||||||
|
config: Dict[str, Any]
|
||||||
|
|
||||||
|
def __init__(self, path: str, config: Dict[str, Any] = dict()) -> "GitInfo":
|
||||||
|
self.path = Path(path)
|
||||||
|
self.config = config
|
||||||
|
|
||||||
|
|
||||||
|
class SpecOutput:
|
||||||
|
"""Class used for git_archive.py output details."""
|
||||||
|
|
||||||
|
""" Filename relative to the Git repository root. """
|
||||||
|
filename: Path
|
||||||
|
|
||||||
|
""" Git repository information. """
|
||||||
|
git_info: GitInfo
|
||||||
|
|
||||||
|
""" Bytes bound for `SpecOutput.filename`. """
|
||||||
|
data: bytes
|
||||||
|
|
||||||
|
def __init__(self, filename: str, git_info: GitInfo, data: bytes) -> "SpecOutput":
|
||||||
|
self.filename = filename
|
||||||
|
self.git_info = git_info
|
||||||
|
self.data = data
|
||||||
|
|
||||||
|
|
||||||
|
class SpecBase:
|
||||||
|
"""
|
||||||
|
Base for Spec classes defined in git_archve.py --spec modules.
|
||||||
|
|
||||||
|
All supported --spec modules must contain the following classes:
|
||||||
|
- Spec(SpecBase)
|
||||||
|
"""
|
||||||
|
|
||||||
|
""" A list of SpecOutputs, each of which contain output file data. """
|
||||||
|
outputs: List[SpecOutput] = list()
|
||||||
|
|
||||||
|
""" A set of repositories to commit changes to. """
|
||||||
|
repos: Set[str] = set()
|
||||||
|
|
||||||
|
def generate(self) -> Iterable[SpecOutput]:
|
||||||
|
"""
|
||||||
|
"Pure virtual" output generator.
|
||||||
|
|
||||||
|
`SpecBase.outputs` and `SpecBase.repos` should be populated within an
|
||||||
|
overridden version of this function in SpecBase derivatives.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
def add_output(self, filename: str, git_info: GitInfo, data: bytes) -> None:
|
||||||
|
"""
|
||||||
|
Add a SpecOutput instance to the set of outputs.
|
||||||
|
|
||||||
|
:param filename: Filename relative to the git repository root
|
||||||
|
:param git_info: GitInfo instance
|
||||||
|
:param data: Binary data bound for `filename`
|
||||||
|
"""
|
||||||
|
if git_info.path not in self.repos:
|
||||||
|
self.repos.add(git_info.path)
|
||||||
|
|
||||||
|
self.outputs.append(
|
||||||
|
SpecOutput(
|
||||||
|
filename,
|
||||||
|
git_info,
|
||||||
|
data,
|
||||||
|
)
|
||||||
|
)
|
85
aurweb/archives/spec/metadata.py
Normal file
85
aurweb/archives/spec/metadata.py
Normal file
|
@ -0,0 +1,85 @@
|
||||||
|
from typing import Iterable
|
||||||
|
|
||||||
|
import orjson
|
||||||
|
|
||||||
|
from aurweb import config, db
|
||||||
|
from aurweb.models import Package, PackageBase, User
|
||||||
|
from aurweb.rpc import RPC
|
||||||
|
|
||||||
|
from .base import GitInfo, SpecBase, SpecOutput
|
||||||
|
|
||||||
|
ORJSON_OPTS = orjson.OPT_SORT_KEYS | orjson.OPT_INDENT_2
|
||||||
|
|
||||||
|
|
||||||
|
class Spec(SpecBase):
|
||||||
|
def __init__(self) -> "Spec":
|
||||||
|
self.metadata_repo = GitInfo(
|
||||||
|
config.get("git-archive", "metadata-repo"),
|
||||||
|
)
|
||||||
|
|
||||||
|
def generate(self) -> Iterable[SpecOutput]:
|
||||||
|
# Base query used by the RPC.
|
||||||
|
base_query = (
|
||||||
|
db.query(Package)
|
||||||
|
.join(PackageBase)
|
||||||
|
.join(User, PackageBase.MaintainerUID == User.ID)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create an instance of RPC, use it to get entities from
|
||||||
|
# our query and perform a metadata subquery for all packages.
|
||||||
|
rpc = RPC(version=5, type="info")
|
||||||
|
print("performing package database query")
|
||||||
|
packages = rpc.entities(base_query).all()
|
||||||
|
print("performing package database subqueries")
|
||||||
|
rpc.subquery({pkg.ID for pkg in packages})
|
||||||
|
|
||||||
|
pkgbases, pkgnames = dict(), dict()
|
||||||
|
for package in packages:
|
||||||
|
# Produce RPC type=info data for `package`
|
||||||
|
data = rpc.get_info_json_data(package)
|
||||||
|
|
||||||
|
pkgbase_name = data.get("PackageBase")
|
||||||
|
pkgbase_data = {
|
||||||
|
"ID": data.pop("PackageBaseID"),
|
||||||
|
"URLPath": data.pop("URLPath"),
|
||||||
|
"FirstSubmitted": data.pop("FirstSubmitted"),
|
||||||
|
"LastModified": data.pop("LastModified"),
|
||||||
|
"OutOfDate": data.pop("OutOfDate"),
|
||||||
|
"Maintainer": data.pop("Maintainer"),
|
||||||
|
"Keywords": data.pop("Keywords"),
|
||||||
|
"NumVotes": data.pop("NumVotes"),
|
||||||
|
"Popularity": data.pop("Popularity"),
|
||||||
|
"PopularityUpdated": package.PopularityUpdated.timestamp(),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Store the data in `pkgbases` dict. We do this so we only
|
||||||
|
# end up processing a single `pkgbase` if repeated after
|
||||||
|
# this loop
|
||||||
|
pkgbases[pkgbase_name] = pkgbase_data
|
||||||
|
|
||||||
|
# Remove Popularity and NumVotes from package data.
|
||||||
|
# These fields change quite often which causes git data
|
||||||
|
# modification to explode.
|
||||||
|
# data.pop("NumVotes")
|
||||||
|
# data.pop("Popularity")
|
||||||
|
|
||||||
|
# Remove the ID key from package json.
|
||||||
|
data.pop("ID")
|
||||||
|
|
||||||
|
# Add the `package`.Name to the pkgnames set
|
||||||
|
name = data.get("Name")
|
||||||
|
pkgnames[name] = data
|
||||||
|
|
||||||
|
# Add metadata outputs
|
||||||
|
self.add_output(
|
||||||
|
"pkgname.json",
|
||||||
|
self.metadata_repo,
|
||||||
|
orjson.dumps(pkgnames, option=ORJSON_OPTS),
|
||||||
|
)
|
||||||
|
self.add_output(
|
||||||
|
"pkgbase.json",
|
||||||
|
self.metadata_repo,
|
||||||
|
orjson.dumps(pkgbases, option=ORJSON_OPTS),
|
||||||
|
)
|
||||||
|
|
||||||
|
return self.outputs
|
32
aurweb/archives/spec/pkgbases.py
Normal file
32
aurweb/archives/spec/pkgbases.py
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
from typing import Iterable
|
||||||
|
|
||||||
|
import orjson
|
||||||
|
|
||||||
|
from aurweb import config, db
|
||||||
|
from aurweb.models import PackageBase
|
||||||
|
|
||||||
|
from .base import GitInfo, SpecBase, SpecOutput
|
||||||
|
|
||||||
|
ORJSON_OPTS = orjson.OPT_SORT_KEYS | orjson.OPT_INDENT_2
|
||||||
|
|
||||||
|
|
||||||
|
class Spec(SpecBase):
|
||||||
|
def __init__(self) -> "Spec":
|
||||||
|
self.pkgbases_repo = GitInfo(config.get("git-archive", "pkgbases-repo"))
|
||||||
|
|
||||||
|
def generate(self) -> Iterable[SpecOutput]:
|
||||||
|
filt = PackageBase.PackagerUID.isnot(None)
|
||||||
|
query = (
|
||||||
|
db.query(PackageBase.Name)
|
||||||
|
.filter(filt)
|
||||||
|
.order_by(PackageBase.Name.asc())
|
||||||
|
.all()
|
||||||
|
)
|
||||||
|
pkgbases = [pkgbase.Name for pkgbase in query]
|
||||||
|
|
||||||
|
self.add_output(
|
||||||
|
"pkgbase.json",
|
||||||
|
self.pkgbases_repo,
|
||||||
|
orjson.dumps(pkgbases, option=ORJSON_OPTS),
|
||||||
|
)
|
||||||
|
return self.outputs
|
33
aurweb/archives/spec/pkgnames.py
Normal file
33
aurweb/archives/spec/pkgnames.py
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
from typing import Iterable
|
||||||
|
|
||||||
|
import orjson
|
||||||
|
|
||||||
|
from aurweb import config, db
|
||||||
|
from aurweb.models import Package, PackageBase
|
||||||
|
|
||||||
|
from .base import GitInfo, SpecBase, SpecOutput
|
||||||
|
|
||||||
|
ORJSON_OPTS = orjson.OPT_SORT_KEYS | orjson.OPT_INDENT_2
|
||||||
|
|
||||||
|
|
||||||
|
class Spec(SpecBase):
|
||||||
|
def __init__(self) -> "Spec":
|
||||||
|
self.pkgnames_repo = GitInfo(config.get("git-archive", "pkgnames-repo"))
|
||||||
|
|
||||||
|
def generate(self) -> Iterable[SpecOutput]:
|
||||||
|
filt = PackageBase.PackagerUID.isnot(None)
|
||||||
|
query = (
|
||||||
|
db.query(Package.Name)
|
||||||
|
.join(PackageBase, PackageBase.ID == Package.PackageBaseID)
|
||||||
|
.filter(filt)
|
||||||
|
.order_by(Package.Name.asc())
|
||||||
|
.all()
|
||||||
|
)
|
||||||
|
pkgnames = [pkg.Name for pkg in query]
|
||||||
|
|
||||||
|
self.add_output(
|
||||||
|
"pkgname.json",
|
||||||
|
self.pkgnames_repo,
|
||||||
|
orjson.dumps(pkgnames, option=ORJSON_OPTS),
|
||||||
|
)
|
||||||
|
return self.outputs
|
26
aurweb/archives/spec/users.py
Normal file
26
aurweb/archives/spec/users.py
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
from typing import Iterable
|
||||||
|
|
||||||
|
import orjson
|
||||||
|
|
||||||
|
from aurweb import config, db
|
||||||
|
from aurweb.models import User
|
||||||
|
|
||||||
|
from .base import GitInfo, SpecBase, SpecOutput
|
||||||
|
|
||||||
|
ORJSON_OPTS = orjson.OPT_SORT_KEYS | orjson.OPT_INDENT_2
|
||||||
|
|
||||||
|
|
||||||
|
class Spec(SpecBase):
|
||||||
|
def __init__(self) -> "Spec":
|
||||||
|
self.users_repo = GitInfo(config.get("git-archive", "users-repo"))
|
||||||
|
|
||||||
|
def generate(self) -> Iterable[SpecOutput]:
|
||||||
|
query = db.query(User.Username).order_by(User.Username.asc()).all()
|
||||||
|
users = [user.Username for user in query]
|
||||||
|
|
||||||
|
self.add_output(
|
||||||
|
"users.json",
|
||||||
|
self.users_repo,
|
||||||
|
orjson.dumps(users, option=ORJSON_OPTS),
|
||||||
|
)
|
||||||
|
return self.outputs
|
|
@ -64,3 +64,13 @@ class PackageBase(Base):
|
||||||
if key in PackageBase.TO_FLOAT and not isinstance(attr, float):
|
if key in PackageBase.TO_FLOAT and not isinstance(attr, float):
|
||||||
return float(attr)
|
return float(attr)
|
||||||
return attr
|
return attr
|
||||||
|
|
||||||
|
|
||||||
|
def popularity_decay(pkgbase: PackageBase, utcnow: int):
|
||||||
|
"""Return the delta between now and the last time popularity was updated, in days"""
|
||||||
|
return int((utcnow - pkgbase.PopularityUpdated.timestamp()) / 86400)
|
||||||
|
|
||||||
|
|
||||||
|
def popularity(pkgbase: PackageBase, utcnow: int):
|
||||||
|
"""Return up-to-date popularity"""
|
||||||
|
return float(pkgbase.Popularity) * (0.98 ** popularity_decay(pkgbase, utcnow))
|
||||||
|
|
|
@ -3,8 +3,9 @@ from typing import Any
|
||||||
from fastapi import Request
|
from fastapi import Request
|
||||||
from sqlalchemy import and_
|
from sqlalchemy import and_
|
||||||
|
|
||||||
from aurweb import config, db, defaults, l10n, util
|
from aurweb import config, db, defaults, l10n, time, util
|
||||||
from aurweb.models import PackageBase, User
|
from aurweb.models import PackageBase, User
|
||||||
|
from aurweb.models.package_base import popularity
|
||||||
from aurweb.models.package_comaintainer import PackageComaintainer
|
from aurweb.models.package_comaintainer import PackageComaintainer
|
||||||
from aurweb.models.package_comment import PackageComment
|
from aurweb.models.package_comment import PackageComment
|
||||||
from aurweb.models.package_request import PENDING_ID, PackageRequest
|
from aurweb.models.package_request import PENDING_ID, PackageRequest
|
||||||
|
@ -81,6 +82,8 @@ def make_context(
|
||||||
and_(PackageRequest.Status == PENDING_ID, PackageRequest.ClosedTS.is_(None))
|
and_(PackageRequest.Status == PENDING_ID, PackageRequest.ClosedTS.is_(None))
|
||||||
).count()
|
).count()
|
||||||
|
|
||||||
|
context["popularity"] = popularity(pkgbase, time.utcnow())
|
||||||
|
|
||||||
return context
|
return context
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -6,9 +6,10 @@ from fastapi.responses import HTMLResponse
|
||||||
from sqlalchemy import and_, literal, orm
|
from sqlalchemy import and_, literal, orm
|
||||||
|
|
||||||
import aurweb.config as config
|
import aurweb.config as config
|
||||||
from aurweb import db, defaults, models
|
from aurweb import db, defaults, models, time
|
||||||
from aurweb.exceptions import RPCError
|
from aurweb.exceptions import RPCError
|
||||||
from aurweb.filters import number_format
|
from aurweb.filters import number_format
|
||||||
|
from aurweb.models.package_base import popularity
|
||||||
from aurweb.packages.search import RPCSearch
|
from aurweb.packages.search import RPCSearch
|
||||||
|
|
||||||
TYPE_MAPPING = {
|
TYPE_MAPPING = {
|
||||||
|
@ -120,16 +121,15 @@ class RPC:
|
||||||
if not args:
|
if not args:
|
||||||
raise RPCError("No request type/data specified.")
|
raise RPCError("No request type/data specified.")
|
||||||
|
|
||||||
def _get_json_data(self, package: models.Package) -> dict[str, Any]:
|
def get_json_data(self, package: models.Package) -> dict[str, Any]:
|
||||||
"""Produce dictionary data of one Package that can be JSON-serialized.
|
"""Produce dictionary data of one Package that can be JSON-serialized.
|
||||||
|
|
||||||
:param package: Package instance
|
:param package: Package instance
|
||||||
:returns: JSON-serializable dictionary
|
:returns: JSON-serializable dictionary
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Produce RPC API compatible Popularity: If zero, it's an integer
|
# Normalize Popularity for RPC output to 6 decimal precision
|
||||||
# 0, otherwise, it's formatted to the 6th decimal place.
|
pop = popularity(package, time.utcnow())
|
||||||
pop = package.Popularity
|
|
||||||
pop = 0 if not pop else float(number_format(pop, 6))
|
pop = 0 if not pop else float(number_format(pop, 6))
|
||||||
|
|
||||||
snapshot_uri = config.get("options", "snapshot_uri")
|
snapshot_uri = config.get("options", "snapshot_uri")
|
||||||
|
@ -151,8 +151,8 @@ class RPC:
|
||||||
"LastModified": package.ModifiedTS,
|
"LastModified": package.ModifiedTS,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _get_info_json_data(self, package: models.Package) -> dict[str, Any]:
|
def get_info_json_data(self, package: models.Package) -> dict[str, Any]:
|
||||||
data = self._get_json_data(package)
|
data = self.get_json_data(package)
|
||||||
|
|
||||||
# All info results have _at least_ an empty list of
|
# All info results have _at least_ an empty list of
|
||||||
# License and Keywords.
|
# License and Keywords.
|
||||||
|
@ -176,7 +176,7 @@ class RPC:
|
||||||
"""
|
"""
|
||||||
return [data_generator(pkg) for pkg in packages]
|
return [data_generator(pkg) for pkg in packages]
|
||||||
|
|
||||||
def _entities(self, query: orm.Query) -> orm.Query:
|
def entities(self, query: orm.Query) -> orm.Query:
|
||||||
"""Select specific RPC columns on `query`."""
|
"""Select specific RPC columns on `query`."""
|
||||||
return query.with_entities(
|
return query.with_entities(
|
||||||
models.Package.ID,
|
models.Package.ID,
|
||||||
|
@ -188,38 +188,14 @@ class RPC:
|
||||||
models.PackageBase.Name.label("PackageBaseName"),
|
models.PackageBase.Name.label("PackageBaseName"),
|
||||||
models.PackageBase.NumVotes,
|
models.PackageBase.NumVotes,
|
||||||
models.PackageBase.Popularity,
|
models.PackageBase.Popularity,
|
||||||
|
models.PackageBase.PopularityUpdated,
|
||||||
models.PackageBase.OutOfDateTS,
|
models.PackageBase.OutOfDateTS,
|
||||||
models.PackageBase.SubmittedTS,
|
models.PackageBase.SubmittedTS,
|
||||||
models.PackageBase.ModifiedTS,
|
models.PackageBase.ModifiedTS,
|
||||||
models.User.Username.label("Maintainer"),
|
models.User.Username.label("Maintainer"),
|
||||||
).group_by(models.Package.ID)
|
).group_by(models.Package.ID)
|
||||||
|
|
||||||
def _handle_multiinfo_type(
|
def subquery(self, ids: set[int]):
|
||||||
self, args: list[str] = [], **kwargs
|
|
||||||
) -> list[dict[str, Any]]:
|
|
||||||
self._enforce_args(args)
|
|
||||||
args = set(args)
|
|
||||||
|
|
||||||
packages = (
|
|
||||||
db.query(models.Package)
|
|
||||||
.join(models.PackageBase)
|
|
||||||
.join(
|
|
||||||
models.User,
|
|
||||||
models.User.ID == models.PackageBase.MaintainerUID,
|
|
||||||
isouter=True,
|
|
||||||
)
|
|
||||||
.filter(models.Package.Name.in_(args))
|
|
||||||
)
|
|
||||||
|
|
||||||
max_results = config.getint("options", "max_rpc_results")
|
|
||||||
packages = self._entities(packages).limit(max_results + 1)
|
|
||||||
|
|
||||||
if packages.count() > max_results:
|
|
||||||
raise RPCError("Too many package results.")
|
|
||||||
|
|
||||||
ids = {pkg.ID for pkg in packages}
|
|
||||||
|
|
||||||
# Aliases for 80-width.
|
|
||||||
Package = models.Package
|
Package = models.Package
|
||||||
PackageKeyword = models.PackageKeyword
|
PackageKeyword = models.PackageKeyword
|
||||||
|
|
||||||
|
@ -311,7 +287,33 @@ class RPC:
|
||||||
|
|
||||||
self.extra_info[record.ID][type_].append(name)
|
self.extra_info[record.ID][type_].append(name)
|
||||||
|
|
||||||
return self._assemble_json_data(packages, self._get_info_json_data)
|
def _handle_multiinfo_type(
|
||||||
|
self, args: list[str] = [], **kwargs
|
||||||
|
) -> list[dict[str, Any]]:
|
||||||
|
self._enforce_args(args)
|
||||||
|
args = set(args)
|
||||||
|
|
||||||
|
packages = (
|
||||||
|
db.query(models.Package)
|
||||||
|
.join(models.PackageBase)
|
||||||
|
.join(
|
||||||
|
models.User,
|
||||||
|
models.User.ID == models.PackageBase.MaintainerUID,
|
||||||
|
isouter=True,
|
||||||
|
)
|
||||||
|
.filter(models.Package.Name.in_(args))
|
||||||
|
)
|
||||||
|
|
||||||
|
max_results = config.getint("options", "max_rpc_results")
|
||||||
|
packages = self.entities(packages).limit(max_results + 1)
|
||||||
|
|
||||||
|
if packages.count() > max_results:
|
||||||
|
raise RPCError("Too many package results.")
|
||||||
|
|
||||||
|
ids = {pkg.ID for pkg in packages}
|
||||||
|
self.subquery(ids)
|
||||||
|
|
||||||
|
return self._assemble_json_data(packages, self.get_info_json_data)
|
||||||
|
|
||||||
def _handle_search_type(
|
def _handle_search_type(
|
||||||
self, by: str = defaults.RPC_SEARCH_BY, args: list[str] = []
|
self, by: str = defaults.RPC_SEARCH_BY, args: list[str] = []
|
||||||
|
@ -330,12 +332,12 @@ class RPC:
|
||||||
search.search_by(by, arg)
|
search.search_by(by, arg)
|
||||||
|
|
||||||
max_results = config.getint("options", "max_rpc_results")
|
max_results = config.getint("options", "max_rpc_results")
|
||||||
results = self._entities(search.results()).limit(max_results + 1).all()
|
results = self.entities(search.results()).limit(max_results + 1).all()
|
||||||
|
|
||||||
if len(results) > max_results:
|
if len(results) > max_results:
|
||||||
raise RPCError("Too many package results.")
|
raise RPCError("Too many package results.")
|
||||||
|
|
||||||
return self._assemble_json_data(results, self._get_json_data)
|
return self._assemble_json_data(results, self.get_json_data)
|
||||||
|
|
||||||
def _handle_msearch_type(
|
def _handle_msearch_type(
|
||||||
self, args: list[str] = [], **kwargs
|
self, args: list[str] = [], **kwargs
|
||||||
|
|
|
@ -155,6 +155,12 @@ PackageBases = Table(
|
||||||
nullable=False,
|
nullable=False,
|
||||||
server_default=text("0"),
|
server_default=text("0"),
|
||||||
),
|
),
|
||||||
|
Column(
|
||||||
|
"PopularityUpdated",
|
||||||
|
TIMESTAMP,
|
||||||
|
nullable=False,
|
||||||
|
server_default=text("'1970-01-01 00:00:01.000000'"),
|
||||||
|
),
|
||||||
Column("OutOfDateTS", BIGINT(unsigned=True)),
|
Column("OutOfDateTS", BIGINT(unsigned=True)),
|
||||||
Column("FlaggerComment", Text, nullable=False),
|
Column("FlaggerComment", Text, nullable=False),
|
||||||
Column("SubmittedTS", BIGINT(unsigned=True), nullable=False),
|
Column("SubmittedTS", BIGINT(unsigned=True), nullable=False),
|
||||||
|
|
125
aurweb/scripts/git_archive.py
Normal file
125
aurweb/scripts/git_archive.py
Normal file
|
@ -0,0 +1,125 @@
|
||||||
|
import argparse
|
||||||
|
import importlib
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import traceback
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
import orjson
|
||||||
|
import pygit2
|
||||||
|
|
||||||
|
from aurweb import config
|
||||||
|
|
||||||
|
# Constants
|
||||||
|
REF = "refs/heads/master"
|
||||||
|
ORJSON_OPTS = orjson.OPT_SORT_KEYS | orjson.OPT_INDENT_2
|
||||||
|
|
||||||
|
|
||||||
|
def init_repository(git_info) -> None:
|
||||||
|
pygit2.init_repository(git_info.path)
|
||||||
|
repo = pygit2.Repository(git_info.path)
|
||||||
|
for k, v in git_info.config.items():
|
||||||
|
repo.config[k] = v
|
||||||
|
|
||||||
|
|
||||||
|
def parse_args():
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument(
|
||||||
|
"--spec",
|
||||||
|
type=str,
|
||||||
|
required=True,
|
||||||
|
help="name of spec module in the aurweb.archives.spec package",
|
||||||
|
)
|
||||||
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
def update_repository(repo: pygit2.Repository):
|
||||||
|
# Use git status to determine file changes
|
||||||
|
has_changes = False
|
||||||
|
changes = repo.status()
|
||||||
|
for filepath, flags in changes.items():
|
||||||
|
if flags != pygit2.GIT_STATUS_CURRENT:
|
||||||
|
has_changes = True
|
||||||
|
break
|
||||||
|
|
||||||
|
if has_changes:
|
||||||
|
print("diff detected, committing")
|
||||||
|
# Add everything in the tree.
|
||||||
|
print("adding files to git tree")
|
||||||
|
|
||||||
|
# Add the tree to staging
|
||||||
|
repo.index.read()
|
||||||
|
repo.index.add_all()
|
||||||
|
repo.index.write()
|
||||||
|
tree = repo.index.write_tree()
|
||||||
|
|
||||||
|
# Determine base commit; if repo.head.target raises GitError,
|
||||||
|
# we have no current commits
|
||||||
|
try:
|
||||||
|
base = [repo.head.target]
|
||||||
|
except pygit2.GitError:
|
||||||
|
base = []
|
||||||
|
|
||||||
|
utcnow = datetime.utcnow()
|
||||||
|
author = pygit2.Signature(
|
||||||
|
config.get("git-archive", "author"),
|
||||||
|
config.get("git-archive", "author-email"),
|
||||||
|
int(utcnow.timestamp()),
|
||||||
|
0,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Commit the changes
|
||||||
|
timestamp = utcnow.strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
title = f"update - {timestamp}"
|
||||||
|
repo.create_commit(REF, author, author, title, tree, base)
|
||||||
|
|
||||||
|
print("committed changes")
|
||||||
|
else:
|
||||||
|
print("no diff detected")
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
args = parse_args()
|
||||||
|
|
||||||
|
print(f"loading '{args.spec}' spec")
|
||||||
|
spec_package = "aurweb.archives.spec"
|
||||||
|
module_path = f"{spec_package}.{args.spec}"
|
||||||
|
spec_module = importlib.import_module(module_path)
|
||||||
|
print(f"loaded '{args.spec}'")
|
||||||
|
|
||||||
|
# Track repositories that the spec modifies. After we run
|
||||||
|
# through specs, we want to make a single commit for all
|
||||||
|
# repositories that contain changes.
|
||||||
|
repos = dict()
|
||||||
|
|
||||||
|
print(f"running '{args.spec}' spec...")
|
||||||
|
spec = spec_module.Spec()
|
||||||
|
for output in spec.generate():
|
||||||
|
if not os.path.exists(output.git_info.path / ".git"):
|
||||||
|
init_repository(output.git_info)
|
||||||
|
|
||||||
|
path = output.git_info.path / output.filename
|
||||||
|
with open(path, "wb") as f:
|
||||||
|
f.write(output.data)
|
||||||
|
|
||||||
|
if output.git_info.path not in repos:
|
||||||
|
repos[output.git_info.path] = pygit2.Repository(output.git_info.path)
|
||||||
|
|
||||||
|
print(f"done running '{args.spec}' spec")
|
||||||
|
|
||||||
|
print("processing repositories")
|
||||||
|
for path in spec.repos:
|
||||||
|
print(f"processing repository: {path}")
|
||||||
|
update_repository(pygit2.Repository(path))
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
try:
|
||||||
|
sys.exit(main())
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
sys.exit(0)
|
||||||
|
except Exception:
|
||||||
|
traceback.print_exc()
|
||||||
|
sys.exit(1)
|
|
@ -188,6 +188,7 @@ def _main():
|
||||||
USERS = aurweb.config.get("mkpkglists", "userfile")
|
USERS = aurweb.config.get("mkpkglists", "userfile")
|
||||||
|
|
||||||
bench = Benchmark()
|
bench = Benchmark()
|
||||||
|
logger.warning(f"{sys.argv[0]} is deprecated and will be soon be removed")
|
||||||
logger.info("Started re-creating archives, wait a while...")
|
logger.info("Started re-creating archives, wait a while...")
|
||||||
|
|
||||||
query = (
|
query = (
|
||||||
|
|
|
@ -1,9 +1,10 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
from sqlalchemy import and_, func
|
from sqlalchemy import and_, func
|
||||||
from sqlalchemy.sql.functions import coalesce, sum as _sum
|
from sqlalchemy.sql.functions import coalesce, sum as _sum
|
||||||
|
|
||||||
from aurweb import db, time
|
from aurweb import config, db, time
|
||||||
from aurweb.models import PackageBase, PackageVote
|
from aurweb.models import PackageBase, PackageVote
|
||||||
|
|
||||||
|
|
||||||
|
@ -46,13 +47,24 @@ def run_variable(pkgbases: list[PackageBase] = []) -> None:
|
||||||
|
|
||||||
ids = set()
|
ids = set()
|
||||||
if pkgbases:
|
if pkgbases:
|
||||||
|
# If `pkgbases` were given, we should forcefully update the given
|
||||||
|
# package base records' popularities.
|
||||||
ids = {pkgbase.ID for pkgbase in pkgbases}
|
ids = {pkgbase.ID for pkgbase in pkgbases}
|
||||||
query = query.filter(PackageBase.ID.in_(ids))
|
query = query.filter(PackageBase.ID.in_(ids))
|
||||||
|
else:
|
||||||
|
# Otherwise, we should only update popularities which have exceeded
|
||||||
|
# the popularity interval length.
|
||||||
|
interval = config.getint("git-archive", "popularity-interval")
|
||||||
|
query = query.filter(
|
||||||
|
PackageBase.PopularityUpdated
|
||||||
|
<= datetime.fromtimestamp((now - interval))
|
||||||
|
)
|
||||||
|
|
||||||
query.update(
|
query.update(
|
||||||
{
|
{
|
||||||
"NumVotes": votes_subq.scalar_subquery(),
|
"NumVotes": votes_subq.scalar_subquery(),
|
||||||
"Popularity": pop_subq.scalar_subquery(),
|
"Popularity": pop_subq.scalar_subquery(),
|
||||||
|
"PopularityUpdated": datetime.fromtimestamp(now),
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,4 @@
|
||||||
import os
|
import os
|
||||||
import shlex
|
|
||||||
from subprocess import PIPE, Popen
|
|
||||||
from typing import Tuple
|
from typing import Tuple
|
||||||
|
|
||||||
import py
|
import py
|
||||||
|
@ -8,6 +6,7 @@ import py
|
||||||
from aurweb.models import Package
|
from aurweb.models import Package
|
||||||
from aurweb.templates import base_template
|
from aurweb.templates import base_template
|
||||||
from aurweb.testing.filelock import FileLock
|
from aurweb.testing.filelock import FileLock
|
||||||
|
from aurweb.util import shell_exec
|
||||||
|
|
||||||
|
|
||||||
class GitRepository:
|
class GitRepository:
|
||||||
|
@ -24,10 +23,7 @@ class GitRepository:
|
||||||
self.file_lock.lock(on_create=self._setup)
|
self.file_lock.lock(on_create=self._setup)
|
||||||
|
|
||||||
def _exec(self, cmdline: str, cwd: str) -> Tuple[int, str, str]:
|
def _exec(self, cmdline: str, cwd: str) -> Tuple[int, str, str]:
|
||||||
args = shlex.split(cmdline)
|
return shell_exec(cmdline, cwd)
|
||||||
proc = Popen(args, cwd=cwd, stdout=PIPE, stderr=PIPE)
|
|
||||||
out, err = proc.communicate()
|
|
||||||
return (proc.returncode, out.decode().strip(), err.decode().strip())
|
|
||||||
|
|
||||||
def _exec_repository(self, cmdline: str) -> Tuple[int, str, str]:
|
def _exec_repository(self, cmdline: str) -> Tuple[int, str, str]:
|
||||||
return self._exec(cmdline, cwd=str(self.file_lock.path))
|
return self._exec(cmdline, cwd=str(self.file_lock.path))
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
import math
|
import math
|
||||||
import re
|
import re
|
||||||
import secrets
|
import secrets
|
||||||
|
import shlex
|
||||||
import string
|
import string
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from http import HTTPStatus
|
from http import HTTPStatus
|
||||||
|
@ -192,3 +193,10 @@ def parse_ssh_key(string: str) -> Tuple[str, str]:
|
||||||
def parse_ssh_keys(string: str) -> list[Tuple[str, str]]:
|
def parse_ssh_keys(string: str) -> list[Tuple[str, str]]:
|
||||||
"""Parse a list of SSH public keys."""
|
"""Parse a list of SSH public keys."""
|
||||||
return [parse_ssh_key(e) for e in string.splitlines()]
|
return [parse_ssh_key(e) for e in string.splitlines()]
|
||||||
|
|
||||||
|
|
||||||
|
def shell_exec(cmdline: str, cwd: str) -> Tuple[int, str, str]:
|
||||||
|
args = shlex.split(cmdline)
|
||||||
|
proc = Popen(args, cwd=cwd, stdout=PIPE, stderr=PIPE)
|
||||||
|
out, err = proc.communicate()
|
||||||
|
return (proc.returncode, out.decode().strip(), err.decode().strip())
|
||||||
|
|
|
@ -131,6 +131,18 @@ packagesmetaextfile = /srv/http/aurweb/web/html/packages-meta-ext-v1.json.gz
|
||||||
pkgbasefile = /srv/http/aurweb/web/html/pkgbase.gz
|
pkgbasefile = /srv/http/aurweb/web/html/pkgbase.gz
|
||||||
userfile = /srv/http/aurweb/web/html/users.gz
|
userfile = /srv/http/aurweb/web/html/users.gz
|
||||||
|
|
||||||
|
[git-archive]
|
||||||
|
author = git_archive.py
|
||||||
|
author-email = no-reply@archlinux.org
|
||||||
|
|
||||||
|
; One week worth of seconds (86400 * 7)
|
||||||
|
popularity-interval = 604800
|
||||||
|
|
||||||
|
metadata-repo = /srv/http/aurweb/metadata.git
|
||||||
|
users-repo = /srv/http/aurweb/users.git
|
||||||
|
pkgbases-repo = /srv/http/aurweb/pkgbases.git
|
||||||
|
pkgnames-repo = /srv/http/aurweb/pkgnames.git
|
||||||
|
|
||||||
[devel]
|
[devel]
|
||||||
; commit_url is a format string used to produce a link to a commit hash.
|
; commit_url is a format string used to produce a link to a commit hash.
|
||||||
commit_url = https://gitlab.archlinux.org/archlinux/aurweb/-/commits/%s
|
commit_url = https://gitlab.archlinux.org/archlinux/aurweb/-/commits/%s
|
||||||
|
|
|
@ -76,5 +76,11 @@ packagesmetaextfile = /var/lib/aurweb/archives/packages-meta-ext-v1.json.gz
|
||||||
pkgbasefile = /var/lib/aurweb/archives/pkgbase.gz
|
pkgbasefile = /var/lib/aurweb/archives/pkgbase.gz
|
||||||
userfile = /var/lib/aurweb/archives/users.gz
|
userfile = /var/lib/aurweb/archives/users.gz
|
||||||
|
|
||||||
|
[git-archive]
|
||||||
|
metadata-repo = metadata.git
|
||||||
|
users-repo = users.git
|
||||||
|
pkgbases-repo = pkgbases.git
|
||||||
|
pkgnames-repo = pkgnames.git
|
||||||
|
|
||||||
[aurblup]
|
[aurblup]
|
||||||
db-path = YOUR_AUR_ROOT/aurblup/
|
db-path = YOUR_AUR_ROOT/aurblup/
|
||||||
|
|
75
doc/git-archive.md
Normal file
75
doc/git-archive.md
Normal file
|
@ -0,0 +1,75 @@
|
||||||
|
# aurweb Git Archive Specification
|
||||||
|
|
||||||
|
<span style="color: red">
|
||||||
|
WARNING: This aurweb Git Archive implementation is
|
||||||
|
experimental and may be changed.
|
||||||
|
</span>
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
This git archive specification refers to the archive git repositories
|
||||||
|
created by [aurweb/scripts/git_archive.py](aurweb/scripts/git_archive.py)
|
||||||
|
using [spec modules](#spec-modules).
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
- `[git-archive]`
|
||||||
|
- `author`
|
||||||
|
- Git commit author
|
||||||
|
- `author-email`
|
||||||
|
- Git commit author email
|
||||||
|
|
||||||
|
See an [official spec](#official-specs)'s documentation for spec-specific
|
||||||
|
configurations.
|
||||||
|
|
||||||
|
## Fetch/Update Archives
|
||||||
|
|
||||||
|
When a client has not yet fetched any initial archives, they should clone
|
||||||
|
the repository:
|
||||||
|
|
||||||
|
$ git clone https://aur.archlinux.org/archive.git aurweb-archive
|
||||||
|
|
||||||
|
When updating, the repository is already cloned and changes need to be pulled
|
||||||
|
from remote:
|
||||||
|
|
||||||
|
# To update:
|
||||||
|
$ cd aurweb-archive && git pull
|
||||||
|
|
||||||
|
For end-user production applications, see
|
||||||
|
[Minimize Disk Space](#minimize-disk-space).
|
||||||
|
|
||||||
|
## Minimize Disk Space
|
||||||
|
|
||||||
|
Using `git gc` on the repository will compress revisions and remove
|
||||||
|
unreachable objects which grow the repository a considerable amount
|
||||||
|
each commit. It is recommended that the following command is used
|
||||||
|
after cloning the archive or pulling updates:
|
||||||
|
|
||||||
|
$ cd aurweb-archive && git gc --aggressive
|
||||||
|
|
||||||
|
## Spec Modules
|
||||||
|
|
||||||
|
Each aurweb spec module belongs to the `aurweb.archives.spec` package. For
|
||||||
|
example: a spec named "example" would be located at
|
||||||
|
`aurweb.archives.spec.example`.
|
||||||
|
|
||||||
|
[Official spec listings](#official-specs) use the following format:
|
||||||
|
|
||||||
|
- `spec_name`
|
||||||
|
- Spec description; what this spec produces
|
||||||
|
- `<link to repo documentation>`
|
||||||
|
|
||||||
|
### Official Specs
|
||||||
|
|
||||||
|
- [metadata](doc/specs/metadata.md)
|
||||||
|
- Package RPC `type=info` metadata
|
||||||
|
- [metadata-repo](repos/metadata-repo.md)
|
||||||
|
- [users](doc/specs/users.md)
|
||||||
|
- List of users found in the database
|
||||||
|
- [users-repo](repos/users-repo.md)
|
||||||
|
- [pkgbases](doc/specs/pkgbases.md)
|
||||||
|
- List of package bases found in the database
|
||||||
|
- [pkgbases-repo](repos/pkgbases-repo.md)
|
||||||
|
- [pkgnames](doc/specs/pkgnames.md)
|
||||||
|
- List of package names found in the database
|
||||||
|
- [pkgnames-repo](repos/pkgnames-repo.md)
|
|
@ -70,20 +70,48 @@ computations and clean up the database:
|
||||||
* aurweb-pkgmaint automatically removes empty repositories that were created
|
* aurweb-pkgmaint automatically removes empty repositories that were created
|
||||||
within the last 24 hours but never populated.
|
within the last 24 hours but never populated.
|
||||||
|
|
||||||
* aurweb-mkpkglists generates the package list files; it takes an optional
|
* [Deprecated] aurweb-mkpkglists generates the package list files; it takes
|
||||||
--extended flag, which additionally produces multiinfo metadata. It also
|
an optional --extended flag, which additionally produces multiinfo metadata.
|
||||||
generates {archive.gz}.sha256 files that should be located within
|
It also generates {archive.gz}.sha256 files that should be located within
|
||||||
mkpkglists.archivedir which contain a SHA-256 hash of their matching
|
mkpkglists.archivedir which contain a SHA-256 hash of their matching
|
||||||
.gz counterpart.
|
.gz counterpart.
|
||||||
|
|
||||||
* aurweb-usermaint removes the last login IP address of all users that did not
|
* aurweb-usermaint removes the last login IP address of all users that did not
|
||||||
login within the past seven days.
|
login within the past seven days.
|
||||||
|
|
||||||
|
* aurweb-git-archive generates Git repository archives based on a --spec.
|
||||||
|
This script is a new generation of aurweb-mkpkglists, which creates and
|
||||||
|
maintains Git repository versions of the archives produced by
|
||||||
|
aurweb-mkpkglists. See doc/git-archive.md for detailed documentation.
|
||||||
|
|
||||||
These scripts can be installed by running `poetry install` and are
|
These scripts can be installed by running `poetry install` and are
|
||||||
usually scheduled using Cron. The current setup is:
|
usually scheduled using Cron. The current setup is:
|
||||||
|
|
||||||
----
|
----
|
||||||
*/5 * * * * poetry run aurweb-mkpkglists [--extended]
|
# Run aurweb-git-archive --spec metadata directly after
|
||||||
|
# aurweb-mkpkglists so that they are executed sequentially, since
|
||||||
|
# both scripts are quite heavy. `aurweb-mkpkglists` should be removed
|
||||||
|
# from here once its deprecation period has ended.
|
||||||
|
*/5 * * * * poetry run aurweb-mkpkglists [--extended] && poetry run aurweb-git-archive --spec metadata
|
||||||
|
|
||||||
|
# Update popularity once an hour. This is done to reduce the amount
|
||||||
|
# of changes caused by popularity data. Even if a package is otherwise
|
||||||
|
# unchanged, popularity is recalculated every 5 minutes via aurweb-popupdate,
|
||||||
|
# which causes changes for a large chunk of packages.
|
||||||
|
#
|
||||||
|
# At this interval, clients can still take advantage of popularity
|
||||||
|
# data, but its updates are guarded behind hour-long intervals.
|
||||||
|
*/60 * * * * poetry run aurweb-git-archive --spec popularity
|
||||||
|
|
||||||
|
# Usernames
|
||||||
|
*/5 * * * * poetry run aurweb-git-archive --spec users
|
||||||
|
|
||||||
|
# Package base names
|
||||||
|
*/5 * * * * poetry run aurweb-git-archive --spec pkgbases
|
||||||
|
|
||||||
|
# Package names
|
||||||
|
*/5 * * * * poetry run aurweb-git-archive --spec pkgnames
|
||||||
|
|
||||||
1 */2 * * * poetry run aurweb-popupdate
|
1 */2 * * * poetry run aurweb-popupdate
|
||||||
2 */2 * * * poetry run aurweb-aurblup
|
2 */2 * * * poetry run aurweb-aurblup
|
||||||
3 */2 * * * poetry run aurweb-pkgmaint
|
3 */2 * * * poetry run aurweb-pkgmaint
|
||||||
|
|
121
doc/repos/metadata-repo.md
Normal file
121
doc/repos/metadata-repo.md
Normal file
|
@ -0,0 +1,121 @@
|
||||||
|
# Repository: metadata-repo
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
The resulting repository contains RPC `type=info` JSON data for packages,
|
||||||
|
split into two different files:
|
||||||
|
|
||||||
|
- `pkgbase.json` contains details about each package base in the AUR
|
||||||
|
- `pkgname.json` contains details about each package in the AUR
|
||||||
|
|
||||||
|
See [Data](#data) for a breakdown of how data is presented in this
|
||||||
|
repository based off of a RPC `type=info` base.
|
||||||
|
|
||||||
|
See [File Layout](#file-layout) for a detailed summary of the layout
|
||||||
|
of these files and the data contained within.
|
||||||
|
|
||||||
|
**NOTE: `Popularity` now requires a client-side calculation, see [Popularity Calculation](#popularity-calculation).**
|
||||||
|
|
||||||
|
## Data
|
||||||
|
|
||||||
|
This repository contains RPC `type=info` data for all packages found
|
||||||
|
in AUR's database, reorganized to be suitable for Git repository
|
||||||
|
changes.
|
||||||
|
|
||||||
|
- `pkgname.json` holds Package-specific metadata
|
||||||
|
- Some fields have been removed from `pkgname.json` objects
|
||||||
|
- `ID`
|
||||||
|
- `PackageBaseID -> ID` (moved to `pkgbase.json`)
|
||||||
|
- `NumVotes` (moved to `pkgbase.json`)
|
||||||
|
- `Popularity` (moved to `pkgbase.json`)
|
||||||
|
- `pkgbase.json` holds PackageBase-specific metadata
|
||||||
|
- Package Base fields from `pkgname.json` have been moved over to
|
||||||
|
`pkgbase.json`
|
||||||
|
- `ID`
|
||||||
|
- `Keywords`
|
||||||
|
- `FirstSubmitted`
|
||||||
|
- `LastModified`
|
||||||
|
- `OutOfDate`
|
||||||
|
- `Maintainer`
|
||||||
|
- `URLPath`
|
||||||
|
- `NumVotes`
|
||||||
|
- `Popularity`
|
||||||
|
- `PopularityUpdated`
|
||||||
|
|
||||||
|
## Popularity Calculation
|
||||||
|
|
||||||
|
Clients intending to use popularity data from this archive **must**
|
||||||
|
perform a decay calculation on their end to reflect a close approximation
|
||||||
|
of up-to-date popularity.
|
||||||
|
|
||||||
|
Putting this step onto the client allows the server to maintain
|
||||||
|
less popularity record updates, dramatically improving archiving
|
||||||
|
of popularity data. The same calculation is done on the server-side
|
||||||
|
when producing outputs for RPC `type=info` and package pages.
|
||||||
|
|
||||||
|
```
|
||||||
|
Let T = Current UTC timestamp in seconds
|
||||||
|
Let PU = PopularityUpdated timestamp in seconds
|
||||||
|
|
||||||
|
# The delta between now and PU in days
|
||||||
|
Let D = (T - PU) / 86400
|
||||||
|
|
||||||
|
# Calculate up-to-date popularity:
|
||||||
|
P = Popularity * (0.98^D)
|
||||||
|
```
|
||||||
|
|
||||||
|
We can see that the resulting up-to-date popularity value decays as
|
||||||
|
the exponent is increased:
|
||||||
|
- `1.0 * (0.98^1) = 0.98`
|
||||||
|
- `1.0 * (0.98^2) = 0.96039999`
|
||||||
|
- ...
|
||||||
|
|
||||||
|
This decay calculation is essentially pushing back the date found for
|
||||||
|
votes by the exponent, which takes into account the time-factor. However,
|
||||||
|
since this calculation is based off of decimals and exponents, it
|
||||||
|
eventually becomes imprecise. The AUR updates these records on a forced
|
||||||
|
interval and whenever a vote is added to or removed from a particular package
|
||||||
|
to avoid imprecision from being an issue for clients
|
||||||
|
|
||||||
|
## File Layout
|
||||||
|
|
||||||
|
#### pkgbase.json:
|
||||||
|
|
||||||
|
{
|
||||||
|
"pkgbase1": {
|
||||||
|
"FirstSubmitted": 123456,
|
||||||
|
"ID": 1,
|
||||||
|
"LastModified": 123456,
|
||||||
|
"Maintainer": "kevr",
|
||||||
|
"OutOfDate": null,
|
||||||
|
"URLPath": "/cgit/aur.git/snapshot/pkgbase1.tar.gz",
|
||||||
|
"NumVotes": 1,
|
||||||
|
"Popularity": 1.0,
|
||||||
|
"PopularityUpdated": 12345567753.0
|
||||||
|
},
|
||||||
|
...
|
||||||
|
}
|
||||||
|
|
||||||
|
#### pkgname.json:
|
||||||
|
|
||||||
|
{
|
||||||
|
"pkg1": {
|
||||||
|
"CheckDepends": [], # Only included if a check dependency exists
|
||||||
|
"Conflicts": [], # Only included if a conflict exists
|
||||||
|
"Depends": [], # Only included if a dependency exists
|
||||||
|
"Description": "some description",
|
||||||
|
"Groups": [], # Only included if a group exists
|
||||||
|
"ID": 1,
|
||||||
|
"Keywords": [],
|
||||||
|
"License": [],
|
||||||
|
"MakeDepends": [], # Only included if a make dependency exists
|
||||||
|
"Name": "pkg1",
|
||||||
|
"OptDepends": [], # Only included if an opt dependency exists
|
||||||
|
"PackageBase": "pkgbase1",
|
||||||
|
"Provides": [], # Only included if `provides` is defined
|
||||||
|
"Replaces": [], # Only included if `replaces` is defined
|
||||||
|
"URL": "https://some_url.com",
|
||||||
|
"Version": "1.0-1"
|
||||||
|
},
|
||||||
|
...
|
||||||
|
}
|
15
doc/repos/pkgbases-repo.md
Normal file
15
doc/repos/pkgbases-repo.md
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
# Repository: pkgbases-repo
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
- `pkgbase.json` contains a list of package base names
|
||||||
|
|
||||||
|
## File Layout
|
||||||
|
|
||||||
|
### pkgbase.json:
|
||||||
|
|
||||||
|
[
|
||||||
|
"pkgbase1",
|
||||||
|
"pkgbase2",
|
||||||
|
...
|
||||||
|
]
|
15
doc/repos/pkgnames-repo.md
Normal file
15
doc/repos/pkgnames-repo.md
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
# Repository: pkgnames-repo
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
- `pkgname.json` contains a list of package names
|
||||||
|
|
||||||
|
## File Layout
|
||||||
|
|
||||||
|
### pkgname.json:
|
||||||
|
|
||||||
|
[
|
||||||
|
"pkgname1",
|
||||||
|
"pkgname2",
|
||||||
|
...
|
||||||
|
]
|
15
doc/repos/users-repo.md
Normal file
15
doc/repos/users-repo.md
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
# Repository: users-repo
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
- `users.json` contains a list of usernames
|
||||||
|
|
||||||
|
## File Layout
|
||||||
|
|
||||||
|
### users.json:
|
||||||
|
|
||||||
|
[
|
||||||
|
"user1",
|
||||||
|
"user2",
|
||||||
|
...
|
||||||
|
]
|
14
doc/specs/metadata.md
Normal file
14
doc/specs/metadata.md
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
# Git Archive Spec: metadata
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
- `[git-archive]`
|
||||||
|
- `metadata-repo`
|
||||||
|
- Path to package metadata git repository location
|
||||||
|
|
||||||
|
## Repositories
|
||||||
|
|
||||||
|
For documentation on each one of these repositories, follow their link,
|
||||||
|
which brings you to a topical markdown for that repository.
|
||||||
|
|
||||||
|
- [metadata-repo](doc/repos/metadata-repo.md)
|
14
doc/specs/pkgbases.md
Normal file
14
doc/specs/pkgbases.md
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
# Git Archive Spec: pkgbases
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
- `[git-archive]`
|
||||||
|
- `pkgbases-repo`
|
||||||
|
- Path to pkgbases git repository location
|
||||||
|
|
||||||
|
## Repositories
|
||||||
|
|
||||||
|
For documentation on each one of these repositories, follow their link,
|
||||||
|
which brings you to a topical markdown for that repository.
|
||||||
|
|
||||||
|
- [pkgbases-repo](doc/repos/pkgbases-repo.md)
|
14
doc/specs/pkgnames.md
Normal file
14
doc/specs/pkgnames.md
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
# Git Archive Spec: pkgnames
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
- `[git-archive]`
|
||||||
|
- `pkgnames-repo`
|
||||||
|
- Path to pkgnames git repository location
|
||||||
|
|
||||||
|
## Repositories
|
||||||
|
|
||||||
|
For documentation on each one of these repositories, follow their link,
|
||||||
|
which brings you to a topical markdown for that repository.
|
||||||
|
|
||||||
|
- [pkgnames-repo](doc/repos/pkgnames-repo.md)
|
14
doc/specs/popularity.md
Normal file
14
doc/specs/popularity.md
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
# Git Archive Spec: popularity
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
- `[git-archive]`
|
||||||
|
- `popularity-repo`
|
||||||
|
- Path to popularity git repository location
|
||||||
|
|
||||||
|
## Repositories
|
||||||
|
|
||||||
|
For documentation on each one of these repositories, follow their link,
|
||||||
|
which brings you to a topical markdown for that repository.
|
||||||
|
|
||||||
|
- [popularity-repo](doc/repos/popularity-repo.md)
|
14
doc/specs/users.md
Normal file
14
doc/specs/users.md
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
# Git Archive Spec: users
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
- `[git-archive]`
|
||||||
|
- `users-repo`
|
||||||
|
- Path to users git repository location
|
||||||
|
|
||||||
|
## Repositories
|
||||||
|
|
||||||
|
For documentation on each one of these repositories, follow their link,
|
||||||
|
which brings you to a topical markdown for that repository.
|
||||||
|
|
||||||
|
- [users-repo](doc/repos/users-repo.md)
|
|
@ -0,0 +1,33 @@
|
||||||
|
"""add PopularityUpdated to PackageBase
|
||||||
|
|
||||||
|
Revision ID: 6441d3b65270
|
||||||
|
Revises: d64e5571bc8d
|
||||||
|
Create Date: 2022-09-22 18:08:03.280664
|
||||||
|
|
||||||
|
"""
|
||||||
|
from alembic import op
|
||||||
|
from sqlalchemy.exc import OperationalError
|
||||||
|
|
||||||
|
from aurweb.models.package_base import PackageBase
|
||||||
|
from aurweb.scripts import popupdate
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision = "6441d3b65270"
|
||||||
|
down_revision = "d64e5571bc8d"
|
||||||
|
branch_labels = None
|
||||||
|
depends_on = None
|
||||||
|
|
||||||
|
table = PackageBase.__table__
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade():
|
||||||
|
try:
|
||||||
|
op.add_column(table.name, table.c.PopularityUpdated)
|
||||||
|
except OperationalError:
|
||||||
|
print(f"table '{table.name}' already exists, skipping migration")
|
||||||
|
|
||||||
|
popupdate.run_variable()
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade():
|
||||||
|
op.drop_column(table.name, "PopularityUpdated")
|
|
@ -117,3 +117,4 @@ aurweb-tuvotereminder = "aurweb.scripts.tuvotereminder:main"
|
||||||
aurweb-usermaint = "aurweb.scripts.usermaint:main"
|
aurweb-usermaint = "aurweb.scripts.usermaint:main"
|
||||||
aurweb-config = "aurweb.scripts.config:main"
|
aurweb-config = "aurweb.scripts.config:main"
|
||||||
aurweb-adduser = "aurweb.scripts.adduser:main"
|
aurweb-adduser = "aurweb.scripts.adduser:main"
|
||||||
|
aurweb-git-archive = "aurweb.scripts.git_archive:main"
|
||||||
|
|
|
@ -149,7 +149,7 @@
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>{{ "Popularity" | tr }}:</th>
|
<th>{{ "Popularity" | tr }}:</th>
|
||||||
<td>{{ pkgbase.Popularity | number_format(6 if pkgbase.Popularity <= 0.2 else 2) }}</td>
|
<td>{{ popularity | number_format(6 if popularity <= 0.2 else 2) }}</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th>{{ "First Submitted" | tr }}:</th>
|
<th>{{ "First Submitted" | tr }}:</th>
|
||||||
|
|
241
test/test_git_archives.py
Normal file
241
test/test_git_archives.py
Normal file
|
@ -0,0 +1,241 @@
|
||||||
|
from http import HTTPStatus
|
||||||
|
from typing import Tuple
|
||||||
|
from unittest import mock
|
||||||
|
|
||||||
|
import py
|
||||||
|
import pygit2
|
||||||
|
import pytest
|
||||||
|
from fastapi.testclient import TestClient
|
||||||
|
|
||||||
|
from aurweb import asgi, config, db
|
||||||
|
from aurweb.archives.spec.base import GitInfo, SpecBase
|
||||||
|
from aurweb.models import Package, PackageBase, User
|
||||||
|
from aurweb.scripts import git_archive
|
||||||
|
from aurweb.testing.requests import Request
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_metadata_archive(
|
||||||
|
tmp_path: py.path.local,
|
||||||
|
) -> Tuple[py.path.local, py.path.local]:
|
||||||
|
metadata_path = tmp_path / "metadata.git"
|
||||||
|
|
||||||
|
get_ = config.get
|
||||||
|
|
||||||
|
def mock_config(section: str, option: str) -> str:
|
||||||
|
if section == "git-archive":
|
||||||
|
if option == "metadata-repo":
|
||||||
|
return str(metadata_path)
|
||||||
|
return get_(section, option)
|
||||||
|
|
||||||
|
with mock.patch("aurweb.config.get", side_effect=mock_config):
|
||||||
|
yield metadata_path
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_users_archive(tmp_path: py.path.local) -> py.path.local:
|
||||||
|
users_path = tmp_path / "users.git"
|
||||||
|
|
||||||
|
get_ = config.get
|
||||||
|
|
||||||
|
def mock_config(section: str, option: str) -> str:
|
||||||
|
if section == "git-archive":
|
||||||
|
if option == "users-repo":
|
||||||
|
return str(users_path)
|
||||||
|
return get_(section, option)
|
||||||
|
|
||||||
|
with mock.patch("aurweb.config.get", side_effect=mock_config):
|
||||||
|
yield users_path
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_pkgbases_archive(tmp_path: py.path.local) -> py.path.local:
|
||||||
|
pkgbases_path = tmp_path / "pkgbases.git"
|
||||||
|
|
||||||
|
get_ = config.get
|
||||||
|
|
||||||
|
def mock_config(section: str, option: str) -> str:
|
||||||
|
if section == "git-archive":
|
||||||
|
if option == "pkgbases-repo":
|
||||||
|
return str(pkgbases_path)
|
||||||
|
return get_(section, option)
|
||||||
|
|
||||||
|
with mock.patch("aurweb.config.get", side_effect=mock_config):
|
||||||
|
yield pkgbases_path
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_pkgnames_archive(tmp_path: py.path.local) -> py.path.local:
|
||||||
|
pkgnames_path = tmp_path / "pkgnames.git"
|
||||||
|
|
||||||
|
get_ = config.get
|
||||||
|
|
||||||
|
def mock_config(section: str, option: str) -> str:
|
||||||
|
if section == "git-archive":
|
||||||
|
if option == "pkgnames-repo":
|
||||||
|
return str(pkgnames_path)
|
||||||
|
return get_(section, option)
|
||||||
|
|
||||||
|
with mock.patch("aurweb.config.get", side_effect=mock_config):
|
||||||
|
yield pkgnames_path
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def metadata(mock_metadata_archive: py.path.local) -> py.path.local:
|
||||||
|
args = [__name__, "--spec", "metadata"]
|
||||||
|
with mock.patch("sys.argv", args):
|
||||||
|
yield mock_metadata_archive
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def users(mock_users_archive: py.path.local) -> py.path.local:
|
||||||
|
args = [__name__, "--spec", "users"]
|
||||||
|
with mock.patch("sys.argv", args):
|
||||||
|
yield mock_users_archive
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def pkgbases(mock_pkgbases_archive: py.path.local) -> py.path.local:
|
||||||
|
args = [__name__, "--spec", "pkgbases"]
|
||||||
|
with mock.patch("sys.argv", args):
|
||||||
|
yield mock_pkgbases_archive
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def pkgnames(mock_pkgnames_archive: py.path.local) -> py.path.local:
|
||||||
|
args = [__name__, "--spec", "pkgnames"]
|
||||||
|
with mock.patch("sys.argv", args):
|
||||||
|
yield mock_pkgnames_archive
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def client() -> TestClient:
|
||||||
|
yield TestClient(app=asgi.app)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def user(db_test: None) -> User:
|
||||||
|
with db.begin():
|
||||||
|
user_ = db.create(
|
||||||
|
User,
|
||||||
|
Username="test",
|
||||||
|
Email="test@example.org",
|
||||||
|
Passwd="testPassword",
|
||||||
|
)
|
||||||
|
|
||||||
|
yield user_
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def package(user: User) -> Package:
|
||||||
|
with db.begin():
|
||||||
|
pkgbase_ = db.create(
|
||||||
|
PackageBase,
|
||||||
|
Name="test",
|
||||||
|
Maintainer=user,
|
||||||
|
Packager=user,
|
||||||
|
)
|
||||||
|
|
||||||
|
pkg_ = db.create(
|
||||||
|
Package,
|
||||||
|
PackageBase=pkgbase_,
|
||||||
|
Name="test",
|
||||||
|
)
|
||||||
|
|
||||||
|
yield pkg_
|
||||||
|
|
||||||
|
|
||||||
|
def commit_count(repo: pygit2.Repository) -> int:
|
||||||
|
commits = 0
|
||||||
|
for _ in repo.walk(repo.head.target):
|
||||||
|
commits += 1
|
||||||
|
return commits
|
||||||
|
|
||||||
|
|
||||||
|
def test_specbase_raises_notimplementederror():
|
||||||
|
spec = SpecBase()
|
||||||
|
with pytest.raises(NotImplementedError):
|
||||||
|
spec.generate()
|
||||||
|
|
||||||
|
|
||||||
|
def test_gitinfo_config(tmpdir: py.path.local):
|
||||||
|
path = tmpdir / "test.git"
|
||||||
|
git_info = GitInfo(path, {"user.name": "Test Person"})
|
||||||
|
git_archive.init_repository(git_info)
|
||||||
|
|
||||||
|
repo = pygit2.Repository(path)
|
||||||
|
assert repo.config["user.name"] == "Test Person"
|
||||||
|
|
||||||
|
|
||||||
|
def test_metadata(metadata: py.path.local, package: Package):
|
||||||
|
# Run main(), which creates mock_metadata_archive and commits current
|
||||||
|
# package data to it, exercising the "diff detected, committing" path
|
||||||
|
assert git_archive.main() == 0
|
||||||
|
repo = pygit2.Repository(metadata)
|
||||||
|
assert commit_count(repo) == 1
|
||||||
|
|
||||||
|
# Run main() again to exercise the "no diff detected" path
|
||||||
|
assert git_archive.main() == 0
|
||||||
|
repo = pygit2.Repository(metadata)
|
||||||
|
assert commit_count(repo) == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_metadata_change(
|
||||||
|
client: TestClient, metadata: py.path.local, user: User, package: Package
|
||||||
|
):
|
||||||
|
"""Test that metadata changes via aurweb cause git_archive to produce diffs."""
|
||||||
|
# Run main(), which creates mock_metadata_archive and commits current
|
||||||
|
# package data to it, exercising the "diff detected, committing" path
|
||||||
|
assert git_archive.main() == 0
|
||||||
|
repo = pygit2.Repository(metadata)
|
||||||
|
assert commit_count(repo) == 1
|
||||||
|
|
||||||
|
# Now, we modify `package`-related metadata via aurweb POST.
|
||||||
|
pkgbasename = package.PackageBase.Name
|
||||||
|
cookies = {"AURSID": user.login(Request(), "testPassword")}
|
||||||
|
|
||||||
|
with client as request:
|
||||||
|
endp = f"/pkgbase/{pkgbasename}/keywords"
|
||||||
|
post_data = {"keywords": "abc def"}
|
||||||
|
resp = request.post(endp, data=post_data, cookies=cookies, allow_redirects=True)
|
||||||
|
assert resp.status_code == HTTPStatus.OK
|
||||||
|
|
||||||
|
# Run main() again, which should now produce a new commit with the
|
||||||
|
# keyword changes we just made
|
||||||
|
assert git_archive.main() == 0
|
||||||
|
repo = pygit2.Repository(metadata)
|
||||||
|
assert commit_count(repo) == 2
|
||||||
|
|
||||||
|
|
||||||
|
def test_metadata_delete(client: TestClient, metadata: py.path.local, package: Package):
|
||||||
|
# Run main(), which creates mock_metadata_archive and commits current
|
||||||
|
# package data to it, exercising the "diff detected, committing" path
|
||||||
|
assert git_archive.main() == 0
|
||||||
|
repo = pygit2.Repository(metadata)
|
||||||
|
assert commit_count(repo) == 1
|
||||||
|
|
||||||
|
with db.begin():
|
||||||
|
db.delete(package)
|
||||||
|
|
||||||
|
# The deletion here should have caused a diff to be produced in git
|
||||||
|
assert git_archive.main() == 0
|
||||||
|
repo = pygit2.Repository(metadata)
|
||||||
|
assert commit_count(repo) == 2
|
||||||
|
|
||||||
|
|
||||||
|
def test_users(users: py.path.local, user: User):
|
||||||
|
assert git_archive.main() == 0
|
||||||
|
repo = pygit2.Repository(users)
|
||||||
|
assert commit_count(repo) == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_pkgbases(pkgbases: py.path.local, package: Package):
|
||||||
|
assert git_archive.main() == 0
|
||||||
|
repo = pygit2.Repository(pkgbases)
|
||||||
|
assert commit_count(repo) == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_pkgnames(pkgnames: py.path.local, package: Package):
|
||||||
|
assert git_archive.main() == 0
|
||||||
|
repo = pygit2.Repository(pkgnames)
|
||||||
|
assert commit_count(repo) == 1
|
|
@ -9,6 +9,7 @@ from aurweb.filters import as_timezone, number_format, timestamp_to_datetime as
|
||||||
from aurweb.models import Package, PackageBase, User
|
from aurweb.models import Package, PackageBase, User
|
||||||
from aurweb.models.account_type import USER_ID
|
from aurweb.models.account_type import USER_ID
|
||||||
from aurweb.models.license import License
|
from aurweb.models.license import License
|
||||||
|
from aurweb.models.package_base import popularity
|
||||||
from aurweb.models.package_license import PackageLicense
|
from aurweb.models.package_license import PackageLicense
|
||||||
from aurweb.models.package_relation import PackageRelation
|
from aurweb.models.package_relation import PackageRelation
|
||||||
from aurweb.models.relation_type import PROVIDES_ID, REPLACES_ID
|
from aurweb.models.relation_type import PROVIDES_ID, REPLACES_ID
|
||||||
|
@ -287,12 +288,14 @@ def test_package_details(user: User, package: Package):
|
||||||
"""Test package details with most fields populated, but not all."""
|
"""Test package details with most fields populated, but not all."""
|
||||||
request = Request(user=user, authenticated=True)
|
request = Request(user=user, authenticated=True)
|
||||||
context = make_context(request, "Test Details")
|
context = make_context(request, "Test Details")
|
||||||
|
|
||||||
context.update(
|
context.update(
|
||||||
{
|
{
|
||||||
"request": request,
|
"request": request,
|
||||||
"git_clone_uri_anon": GIT_CLONE_URI_ANON,
|
"git_clone_uri_anon": GIT_CLONE_URI_ANON,
|
||||||
"git_clone_uri_priv": GIT_CLONE_URI_PRIV,
|
"git_clone_uri_priv": GIT_CLONE_URI_PRIV,
|
||||||
"pkgbase": package.PackageBase,
|
"pkgbase": package.PackageBase,
|
||||||
|
"popularity": popularity(package.PackageBase, time.utcnow()),
|
||||||
"package": package,
|
"package": package,
|
||||||
"comaintainers": [],
|
"comaintainers": [],
|
||||||
}
|
}
|
||||||
|
@ -329,6 +332,7 @@ def test_package_details_filled(user: User, package: Package):
|
||||||
"git_clone_uri_anon": GIT_CLONE_URI_ANON,
|
"git_clone_uri_anon": GIT_CLONE_URI_ANON,
|
||||||
"git_clone_uri_priv": GIT_CLONE_URI_PRIV,
|
"git_clone_uri_priv": GIT_CLONE_URI_PRIV,
|
||||||
"pkgbase": package.PackageBase,
|
"pkgbase": package.PackageBase,
|
||||||
|
"popularity": popularity(package.PackageBase, time.utcnow()),
|
||||||
"package": package,
|
"package": package,
|
||||||
"comaintainers": [],
|
"comaintainers": [],
|
||||||
"licenses": package.package_licenses,
|
"licenses": package.package_licenses,
|
||||||
|
|
Loading…
Add table
Reference in a new issue