feat: archive git repository (experimental)

See doc/git-archive.md for general Git archive specifications
See doc/repos/metadata-repo.md for info and direction related to the new Git metadata archive
This commit is contained in:
Kevin Morris 2022-09-24 16:51:25 +00:00
parent ec3152014b
commit 30e72d2db5
34 changed files with 1104 additions and 50 deletions

View file

@ -0,0 +1 @@
# aurweb.archives

View file

@ -0,0 +1 @@
# aurweb.archives.spec

View file

@ -0,0 +1,77 @@
from pathlib import Path
from typing import Any, Dict, Iterable, List, Set
class GitInfo:
"""Information about a Git repository."""
""" Path to Git repository. """
path: str
""" Local Git repository configuration. """
config: Dict[str, Any]
def __init__(self, path: str, config: Dict[str, Any] = dict()) -> "GitInfo":
self.path = Path(path)
self.config = config
class SpecOutput:
"""Class used for git_archive.py output details."""
""" Filename relative to the Git repository root. """
filename: Path
""" Git repository information. """
git_info: GitInfo
""" Bytes bound for `SpecOutput.filename`. """
data: bytes
def __init__(self, filename: str, git_info: GitInfo, data: bytes) -> "SpecOutput":
self.filename = filename
self.git_info = git_info
self.data = data
class SpecBase:
"""
Base for Spec classes defined in git_archve.py --spec modules.
All supported --spec modules must contain the following classes:
- Spec(SpecBase)
"""
""" A list of SpecOutputs, each of which contain output file data. """
outputs: List[SpecOutput] = list()
""" A set of repositories to commit changes to. """
repos: Set[str] = set()
def generate(self) -> Iterable[SpecOutput]:
"""
"Pure virtual" output generator.
`SpecBase.outputs` and `SpecBase.repos` should be populated within an
overridden version of this function in SpecBase derivatives.
"""
raise NotImplementedError()
def add_output(self, filename: str, git_info: GitInfo, data: bytes) -> None:
"""
Add a SpecOutput instance to the set of outputs.
:param filename: Filename relative to the git repository root
:param git_info: GitInfo instance
:param data: Binary data bound for `filename`
"""
if git_info.path not in self.repos:
self.repos.add(git_info.path)
self.outputs.append(
SpecOutput(
filename,
git_info,
data,
)
)

View file

@ -0,0 +1,85 @@
from typing import Iterable
import orjson
from aurweb import config, db
from aurweb.models import Package, PackageBase, User
from aurweb.rpc import RPC
from .base import GitInfo, SpecBase, SpecOutput
ORJSON_OPTS = orjson.OPT_SORT_KEYS | orjson.OPT_INDENT_2
class Spec(SpecBase):
def __init__(self) -> "Spec":
self.metadata_repo = GitInfo(
config.get("git-archive", "metadata-repo"),
)
def generate(self) -> Iterable[SpecOutput]:
# Base query used by the RPC.
base_query = (
db.query(Package)
.join(PackageBase)
.join(User, PackageBase.MaintainerUID == User.ID)
)
# Create an instance of RPC, use it to get entities from
# our query and perform a metadata subquery for all packages.
rpc = RPC(version=5, type="info")
print("performing package database query")
packages = rpc.entities(base_query).all()
print("performing package database subqueries")
rpc.subquery({pkg.ID for pkg in packages})
pkgbases, pkgnames = dict(), dict()
for package in packages:
# Produce RPC type=info data for `package`
data = rpc.get_info_json_data(package)
pkgbase_name = data.get("PackageBase")
pkgbase_data = {
"ID": data.pop("PackageBaseID"),
"URLPath": data.pop("URLPath"),
"FirstSubmitted": data.pop("FirstSubmitted"),
"LastModified": data.pop("LastModified"),
"OutOfDate": data.pop("OutOfDate"),
"Maintainer": data.pop("Maintainer"),
"Keywords": data.pop("Keywords"),
"NumVotes": data.pop("NumVotes"),
"Popularity": data.pop("Popularity"),
"PopularityUpdated": package.PopularityUpdated.timestamp(),
}
# Store the data in `pkgbases` dict. We do this so we only
# end up processing a single `pkgbase` if repeated after
# this loop
pkgbases[pkgbase_name] = pkgbase_data
# Remove Popularity and NumVotes from package data.
# These fields change quite often which causes git data
# modification to explode.
# data.pop("NumVotes")
# data.pop("Popularity")
# Remove the ID key from package json.
data.pop("ID")
# Add the `package`.Name to the pkgnames set
name = data.get("Name")
pkgnames[name] = data
# Add metadata outputs
self.add_output(
"pkgname.json",
self.metadata_repo,
orjson.dumps(pkgnames, option=ORJSON_OPTS),
)
self.add_output(
"pkgbase.json",
self.metadata_repo,
orjson.dumps(pkgbases, option=ORJSON_OPTS),
)
return self.outputs

View file

@ -0,0 +1,32 @@
from typing import Iterable
import orjson
from aurweb import config, db
from aurweb.models import PackageBase
from .base import GitInfo, SpecBase, SpecOutput
ORJSON_OPTS = orjson.OPT_SORT_KEYS | orjson.OPT_INDENT_2
class Spec(SpecBase):
def __init__(self) -> "Spec":
self.pkgbases_repo = GitInfo(config.get("git-archive", "pkgbases-repo"))
def generate(self) -> Iterable[SpecOutput]:
filt = PackageBase.PackagerUID.isnot(None)
query = (
db.query(PackageBase.Name)
.filter(filt)
.order_by(PackageBase.Name.asc())
.all()
)
pkgbases = [pkgbase.Name for pkgbase in query]
self.add_output(
"pkgbase.json",
self.pkgbases_repo,
orjson.dumps(pkgbases, option=ORJSON_OPTS),
)
return self.outputs

View file

@ -0,0 +1,33 @@
from typing import Iterable
import orjson
from aurweb import config, db
from aurweb.models import Package, PackageBase
from .base import GitInfo, SpecBase, SpecOutput
ORJSON_OPTS = orjson.OPT_SORT_KEYS | orjson.OPT_INDENT_2
class Spec(SpecBase):
def __init__(self) -> "Spec":
self.pkgnames_repo = GitInfo(config.get("git-archive", "pkgnames-repo"))
def generate(self) -> Iterable[SpecOutput]:
filt = PackageBase.PackagerUID.isnot(None)
query = (
db.query(Package.Name)
.join(PackageBase, PackageBase.ID == Package.PackageBaseID)
.filter(filt)
.order_by(Package.Name.asc())
.all()
)
pkgnames = [pkg.Name for pkg in query]
self.add_output(
"pkgname.json",
self.pkgnames_repo,
orjson.dumps(pkgnames, option=ORJSON_OPTS),
)
return self.outputs

View file

@ -0,0 +1,26 @@
from typing import Iterable
import orjson
from aurweb import config, db
from aurweb.models import User
from .base import GitInfo, SpecBase, SpecOutput
ORJSON_OPTS = orjson.OPT_SORT_KEYS | orjson.OPT_INDENT_2
class Spec(SpecBase):
def __init__(self) -> "Spec":
self.users_repo = GitInfo(config.get("git-archive", "users-repo"))
def generate(self) -> Iterable[SpecOutput]:
query = db.query(User.Username).order_by(User.Username.asc()).all()
users = [user.Username for user in query]
self.add_output(
"users.json",
self.users_repo,
orjson.dumps(users, option=ORJSON_OPTS),
)
return self.outputs