mirror of
https://gitlab.archlinux.org/archlinux/aurweb.git
synced 2025-02-03 10:43:03 +01:00
feat: archive git repository (experimental)
See doc/git-archive.md for general Git archive specifications See doc/repos/metadata-repo.md for info and direction related to the new Git metadata archive
This commit is contained in:
parent
ec3152014b
commit
30e72d2db5
34 changed files with 1104 additions and 50 deletions
1
aurweb/archives/__init__.py
Normal file
1
aurweb/archives/__init__.py
Normal file
|
@ -0,0 +1 @@
|
|||
# aurweb.archives
|
1
aurweb/archives/spec/__init__.py
Normal file
1
aurweb/archives/spec/__init__.py
Normal file
|
@ -0,0 +1 @@
|
|||
# aurweb.archives.spec
|
77
aurweb/archives/spec/base.py
Normal file
77
aurweb/archives/spec/base.py
Normal file
|
@ -0,0 +1,77 @@
|
|||
from pathlib import Path
|
||||
from typing import Any, Dict, Iterable, List, Set
|
||||
|
||||
|
||||
class GitInfo:
|
||||
"""Information about a Git repository."""
|
||||
|
||||
""" Path to Git repository. """
|
||||
path: str
|
||||
|
||||
""" Local Git repository configuration. """
|
||||
config: Dict[str, Any]
|
||||
|
||||
def __init__(self, path: str, config: Dict[str, Any] = dict()) -> "GitInfo":
|
||||
self.path = Path(path)
|
||||
self.config = config
|
||||
|
||||
|
||||
class SpecOutput:
|
||||
"""Class used for git_archive.py output details."""
|
||||
|
||||
""" Filename relative to the Git repository root. """
|
||||
filename: Path
|
||||
|
||||
""" Git repository information. """
|
||||
git_info: GitInfo
|
||||
|
||||
""" Bytes bound for `SpecOutput.filename`. """
|
||||
data: bytes
|
||||
|
||||
def __init__(self, filename: str, git_info: GitInfo, data: bytes) -> "SpecOutput":
|
||||
self.filename = filename
|
||||
self.git_info = git_info
|
||||
self.data = data
|
||||
|
||||
|
||||
class SpecBase:
|
||||
"""
|
||||
Base for Spec classes defined in git_archve.py --spec modules.
|
||||
|
||||
All supported --spec modules must contain the following classes:
|
||||
- Spec(SpecBase)
|
||||
"""
|
||||
|
||||
""" A list of SpecOutputs, each of which contain output file data. """
|
||||
outputs: List[SpecOutput] = list()
|
||||
|
||||
""" A set of repositories to commit changes to. """
|
||||
repos: Set[str] = set()
|
||||
|
||||
def generate(self) -> Iterable[SpecOutput]:
|
||||
"""
|
||||
"Pure virtual" output generator.
|
||||
|
||||
`SpecBase.outputs` and `SpecBase.repos` should be populated within an
|
||||
overridden version of this function in SpecBase derivatives.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def add_output(self, filename: str, git_info: GitInfo, data: bytes) -> None:
|
||||
"""
|
||||
Add a SpecOutput instance to the set of outputs.
|
||||
|
||||
:param filename: Filename relative to the git repository root
|
||||
:param git_info: GitInfo instance
|
||||
:param data: Binary data bound for `filename`
|
||||
"""
|
||||
if git_info.path not in self.repos:
|
||||
self.repos.add(git_info.path)
|
||||
|
||||
self.outputs.append(
|
||||
SpecOutput(
|
||||
filename,
|
||||
git_info,
|
||||
data,
|
||||
)
|
||||
)
|
85
aurweb/archives/spec/metadata.py
Normal file
85
aurweb/archives/spec/metadata.py
Normal file
|
@ -0,0 +1,85 @@
|
|||
from typing import Iterable
|
||||
|
||||
import orjson
|
||||
|
||||
from aurweb import config, db
|
||||
from aurweb.models import Package, PackageBase, User
|
||||
from aurweb.rpc import RPC
|
||||
|
||||
from .base import GitInfo, SpecBase, SpecOutput
|
||||
|
||||
ORJSON_OPTS = orjson.OPT_SORT_KEYS | orjson.OPT_INDENT_2
|
||||
|
||||
|
||||
class Spec(SpecBase):
|
||||
def __init__(self) -> "Spec":
|
||||
self.metadata_repo = GitInfo(
|
||||
config.get("git-archive", "metadata-repo"),
|
||||
)
|
||||
|
||||
def generate(self) -> Iterable[SpecOutput]:
|
||||
# Base query used by the RPC.
|
||||
base_query = (
|
||||
db.query(Package)
|
||||
.join(PackageBase)
|
||||
.join(User, PackageBase.MaintainerUID == User.ID)
|
||||
)
|
||||
|
||||
# Create an instance of RPC, use it to get entities from
|
||||
# our query and perform a metadata subquery for all packages.
|
||||
rpc = RPC(version=5, type="info")
|
||||
print("performing package database query")
|
||||
packages = rpc.entities(base_query).all()
|
||||
print("performing package database subqueries")
|
||||
rpc.subquery({pkg.ID for pkg in packages})
|
||||
|
||||
pkgbases, pkgnames = dict(), dict()
|
||||
for package in packages:
|
||||
# Produce RPC type=info data for `package`
|
||||
data = rpc.get_info_json_data(package)
|
||||
|
||||
pkgbase_name = data.get("PackageBase")
|
||||
pkgbase_data = {
|
||||
"ID": data.pop("PackageBaseID"),
|
||||
"URLPath": data.pop("URLPath"),
|
||||
"FirstSubmitted": data.pop("FirstSubmitted"),
|
||||
"LastModified": data.pop("LastModified"),
|
||||
"OutOfDate": data.pop("OutOfDate"),
|
||||
"Maintainer": data.pop("Maintainer"),
|
||||
"Keywords": data.pop("Keywords"),
|
||||
"NumVotes": data.pop("NumVotes"),
|
||||
"Popularity": data.pop("Popularity"),
|
||||
"PopularityUpdated": package.PopularityUpdated.timestamp(),
|
||||
}
|
||||
|
||||
# Store the data in `pkgbases` dict. We do this so we only
|
||||
# end up processing a single `pkgbase` if repeated after
|
||||
# this loop
|
||||
pkgbases[pkgbase_name] = pkgbase_data
|
||||
|
||||
# Remove Popularity and NumVotes from package data.
|
||||
# These fields change quite often which causes git data
|
||||
# modification to explode.
|
||||
# data.pop("NumVotes")
|
||||
# data.pop("Popularity")
|
||||
|
||||
# Remove the ID key from package json.
|
||||
data.pop("ID")
|
||||
|
||||
# Add the `package`.Name to the pkgnames set
|
||||
name = data.get("Name")
|
||||
pkgnames[name] = data
|
||||
|
||||
# Add metadata outputs
|
||||
self.add_output(
|
||||
"pkgname.json",
|
||||
self.metadata_repo,
|
||||
orjson.dumps(pkgnames, option=ORJSON_OPTS),
|
||||
)
|
||||
self.add_output(
|
||||
"pkgbase.json",
|
||||
self.metadata_repo,
|
||||
orjson.dumps(pkgbases, option=ORJSON_OPTS),
|
||||
)
|
||||
|
||||
return self.outputs
|
32
aurweb/archives/spec/pkgbases.py
Normal file
32
aurweb/archives/spec/pkgbases.py
Normal file
|
@ -0,0 +1,32 @@
|
|||
from typing import Iterable
|
||||
|
||||
import orjson
|
||||
|
||||
from aurweb import config, db
|
||||
from aurweb.models import PackageBase
|
||||
|
||||
from .base import GitInfo, SpecBase, SpecOutput
|
||||
|
||||
ORJSON_OPTS = orjson.OPT_SORT_KEYS | orjson.OPT_INDENT_2
|
||||
|
||||
|
||||
class Spec(SpecBase):
|
||||
def __init__(self) -> "Spec":
|
||||
self.pkgbases_repo = GitInfo(config.get("git-archive", "pkgbases-repo"))
|
||||
|
||||
def generate(self) -> Iterable[SpecOutput]:
|
||||
filt = PackageBase.PackagerUID.isnot(None)
|
||||
query = (
|
||||
db.query(PackageBase.Name)
|
||||
.filter(filt)
|
||||
.order_by(PackageBase.Name.asc())
|
||||
.all()
|
||||
)
|
||||
pkgbases = [pkgbase.Name for pkgbase in query]
|
||||
|
||||
self.add_output(
|
||||
"pkgbase.json",
|
||||
self.pkgbases_repo,
|
||||
orjson.dumps(pkgbases, option=ORJSON_OPTS),
|
||||
)
|
||||
return self.outputs
|
33
aurweb/archives/spec/pkgnames.py
Normal file
33
aurweb/archives/spec/pkgnames.py
Normal file
|
@ -0,0 +1,33 @@
|
|||
from typing import Iterable
|
||||
|
||||
import orjson
|
||||
|
||||
from aurweb import config, db
|
||||
from aurweb.models import Package, PackageBase
|
||||
|
||||
from .base import GitInfo, SpecBase, SpecOutput
|
||||
|
||||
ORJSON_OPTS = orjson.OPT_SORT_KEYS | orjson.OPT_INDENT_2
|
||||
|
||||
|
||||
class Spec(SpecBase):
|
||||
def __init__(self) -> "Spec":
|
||||
self.pkgnames_repo = GitInfo(config.get("git-archive", "pkgnames-repo"))
|
||||
|
||||
def generate(self) -> Iterable[SpecOutput]:
|
||||
filt = PackageBase.PackagerUID.isnot(None)
|
||||
query = (
|
||||
db.query(Package.Name)
|
||||
.join(PackageBase, PackageBase.ID == Package.PackageBaseID)
|
||||
.filter(filt)
|
||||
.order_by(Package.Name.asc())
|
||||
.all()
|
||||
)
|
||||
pkgnames = [pkg.Name for pkg in query]
|
||||
|
||||
self.add_output(
|
||||
"pkgname.json",
|
||||
self.pkgnames_repo,
|
||||
orjson.dumps(pkgnames, option=ORJSON_OPTS),
|
||||
)
|
||||
return self.outputs
|
26
aurweb/archives/spec/users.py
Normal file
26
aurweb/archives/spec/users.py
Normal file
|
@ -0,0 +1,26 @@
|
|||
from typing import Iterable
|
||||
|
||||
import orjson
|
||||
|
||||
from aurweb import config, db
|
||||
from aurweb.models import User
|
||||
|
||||
from .base import GitInfo, SpecBase, SpecOutput
|
||||
|
||||
ORJSON_OPTS = orjson.OPT_SORT_KEYS | orjson.OPT_INDENT_2
|
||||
|
||||
|
||||
class Spec(SpecBase):
|
||||
def __init__(self) -> "Spec":
|
||||
self.users_repo = GitInfo(config.get("git-archive", "users-repo"))
|
||||
|
||||
def generate(self) -> Iterable[SpecOutput]:
|
||||
query = db.query(User.Username).order_by(User.Username.asc()).all()
|
||||
users = [user.Username for user in query]
|
||||
|
||||
self.add_output(
|
||||
"users.json",
|
||||
self.users_repo,
|
||||
orjson.dumps(users, option=ORJSON_OPTS),
|
||||
)
|
||||
return self.outputs
|
Loading…
Add table
Add a link
Reference in a new issue