diff --git a/aurweb/routers/html.py b/aurweb/routers/html.py
index 52e3f818..8d32089a 100644
--- a/aurweb/routers/html.py
+++ b/aurweb/routers/html.py
@@ -213,6 +213,19 @@ async def index(request: Request):
return render_template(request, "index.html", context)
+@router.get("/{archive}.sha256")
+async def archive_sha256(request: Request, archive: str):
+ archivedir = aurweb.config.get("mkpkglists", "archivedir")
+ hashfile = os.path.join(archivedir, f"{archive}.sha256")
+ if not os.path.exists(hashfile):
+ raise HTTPException(status_code=HTTPStatus.NOT_FOUND)
+
+ with open(hashfile) as f:
+ hash_value = f.read()
+ headers = {"Content-Type": "text/plain"}
+ return Response(hash_value, headers=headers)
+
+
@router.get("/metrics")
async def metrics(request: Request):
registry = CollectorRegistry()
diff --git a/aurweb/scripts/mkpkglists.py b/aurweb/scripts/mkpkglists.py
index dd2e9e86..00096d74 100755
--- a/aurweb/scripts/mkpkglists.py
+++ b/aurweb/scripts/mkpkglists.py
@@ -19,8 +19,12 @@ on the following, right-hand side fields are added to each item.
"""
import gzip
+import hashlib
+import io
import os
+import shutil
import sys
+import tempfile
from collections import defaultdict
from typing import Any, Dict
@@ -37,15 +41,6 @@ from aurweb.models import Package, PackageBase, User
logger = logging.get_logger("aurweb.scripts.mkpkglists")
-archivedir = aurweb.config.get("mkpkglists", "archivedir")
-os.makedirs(archivedir, exist_ok=True)
-
-PACKAGES = aurweb.config.get('mkpkglists', 'packagesfile')
-META = aurweb.config.get('mkpkglists', 'packagesmetafile')
-META_EXT = aurweb.config.get('mkpkglists', 'packagesmetaextfile')
-PKGBASE = aurweb.config.get('mkpkglists', 'pkgbasefile')
-USERS = aurweb.config.get('mkpkglists', 'userfile')
-
TYPE_MAP = {
"depends": "Depends",
@@ -174,7 +169,24 @@ def as_dict(package: Package) -> Dict[str, Any]:
}
+def sha256sum(file_path: str) -> str:
+ hash = hashlib.sha256()
+ with open(file_path, "rb") as f:
+ while chunk := f.read(io.DEFAULT_BUFFER_SIZE):
+ hash.update(chunk)
+ return hash.hexdigest()
+
+
def _main():
+ archivedir = aurweb.config.get("mkpkglists", "archivedir")
+ os.makedirs(archivedir, exist_ok=True)
+
+ PACKAGES = aurweb.config.get('mkpkglists', 'packagesfile')
+ META = aurweb.config.get('mkpkglists', 'packagesmetafile')
+ META_EXT = aurweb.config.get('mkpkglists', 'packagesmetaextfile')
+ PKGBASE = aurweb.config.get('mkpkglists', 'pkgbasefile')
+ USERS = aurweb.config.get('mkpkglists', 'userfile')
+
bench = Benchmark()
logger.info("Started re-creating archives, wait a while...")
@@ -204,9 +216,14 @@ def _main():
# Produce packages-meta-v1.json.gz
output = list()
snapshot_uri = aurweb.config.get("options", "snapshot_uri")
+
+ tmpdir = tempfile.mkdtemp()
+ tmp_packages = os.path.join(tmpdir, os.path.basename(PACKAGES))
+ tmp_meta = os.path.join(tmpdir, os.path.basename(META))
+ tmp_metaext = os.path.join(tmpdir, os.path.basename(META_EXT))
gzips = {
- "packages": gzip.open(PACKAGES, "wt"),
- "meta": gzip.open(META, "wb"),
+ "packages": gzip.open(tmp_packages, "wt"),
+ "meta": gzip.open(tmp_meta, "wb"),
}
# Append list opening to the metafile.
@@ -215,7 +232,7 @@ def _main():
# Produce packages.gz + packages-meta-ext-v1.json.gz
extended = False
if len(sys.argv) > 1 and sys.argv[1] in EXTENDED_FIELD_HANDLERS:
- gzips["meta_ext"] = gzip.open(META_EXT, "wb")
+ gzips["meta_ext"] = gzip.open(tmp_metaext, "wb")
# Append list opening to the meta_ext file.
gzips.get("meta_ext").write(b"[\n")
f = EXTENDED_FIELD_HANDLERS.get(sys.argv[1])
@@ -258,14 +275,38 @@ def _main():
# Produce pkgbase.gz
query = db.query(PackageBase.Name).filter(
PackageBase.PackagerUID.isnot(None)).all()
- with gzip.open(PKGBASE, "wt") as f:
+ tmp_pkgbase = os.path.join(tmpdir, os.path.basename(PKGBASE))
+ with gzip.open(tmp_pkgbase, "wt") as f:
f.writelines([f"{base.Name}\n" for i, base in enumerate(query)])
# Produce users.gz
query = db.query(User.Username).all()
- with gzip.open(USERS, "wt") as f:
+ tmp_users = os.path.join(tmpdir, os.path.basename(USERS))
+ with gzip.open(tmp_users, "wt") as f:
f.writelines([f"{user.Username}\n" for i, user in enumerate(query)])
+ files = [
+ (tmp_packages, PACKAGES),
+ (tmp_meta, META),
+ (tmp_pkgbase, PKGBASE),
+ (tmp_users, USERS),
+ ]
+ if len(sys.argv) > 1 and sys.argv[1] in EXTENDED_FIELD_HANDLERS:
+ files.append((tmp_metaext, META_EXT))
+
+ for src, dst in files:
+ checksum = sha256sum(src)
+ base = os.path.basename(src)
+ checksum_formatted = f"SHA256 ({base}) = {checksum}"
+
+ checksum_file = f"{dst}.sha256"
+ with open(checksum_file, "w") as f:
+ f.write(checksum_formatted)
+
+ # Move the new archive into its rightful place.
+ shutil.move(src, dst)
+
+ os.removedirs(tmpdir)
seconds = filters.number_format(bench.end(), 4)
logger.info(f"Completed in {seconds} seconds.")
diff --git a/doc/maintenance.txt b/doc/maintenance.txt
index fbde1fff..c52cf76f 100644
--- a/doc/maintenance.txt
+++ b/doc/maintenance.txt
@@ -71,7 +71,10 @@ computations and clean up the database:
within the last 24 hours but never populated.
* aurweb-mkpkglists generates the package list files; it takes an optional
- --extended flag, which additionally produces multiinfo metadata.
+ --extended flag, which additionally produces multiinfo metadata. It also
+ generates {archive.gz}.sha256 files that should be located within
+ mkpkglists.archivedir which contain a SHA-256 hash of their matching
+ .gz counterpart.
* aurweb-usermaint removes the last login IP address of all users that did not
login within the past seven days.
diff --git a/docker-compose.yml b/docker-compose.yml
index 5046e89a..a56cbe72 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -237,6 +237,7 @@ services:
cron:
condition: service_started
volumes:
+ - archives:/var/lib/aurweb/archives
- mariadb_run:/var/run/mysqld
ports:
- "127.0.0.1:18000:8000"
diff --git a/test/test_html.py b/test/test_html.py
index df018a03..25834987 100644
--- a/test/test_html.py
+++ b/test/test_html.py
@@ -1,5 +1,10 @@
""" A test suite used to test HTML renders in different cases. """
+import hashlib
+import os
+import tempfile
+
from http import HTTPStatus
+from unittest import mock
import fastapi
import pytest
@@ -7,7 +12,7 @@ import pytest
from fastapi import HTTPException
from fastapi.testclient import TestClient
-from aurweb import asgi, db
+from aurweb import asgi, config, db
from aurweb.models import PackageBase
from aurweb.models.account_type import TRUSTED_USER_ID, USER_ID
from aurweb.models.user import User
@@ -125,6 +130,29 @@ def test_get_successes():
assert successes[0].text.strip() == "Test"
+def test_archive_sig(client: TestClient):
+ hash_value = hashlib.sha256(b'test').hexdigest()
+
+ with tempfile.TemporaryDirectory() as tmpdir:
+ packages_sha256 = os.path.join(tmpdir, "packages.gz.sha256")
+ with open(packages_sha256, "w") as f:
+ f.write(hash_value)
+
+ config_get = config.get
+
+ def mock_config(section: str, key: str):
+ if key == "archivedir":
+ return tmpdir
+ return config_get(section, key)
+
+ with mock.patch("aurweb.config.get", side_effect=mock_config):
+ with client as request:
+ resp = request.get("/packages.gz.sha256")
+
+ assert resp.status_code == int(HTTPStatus.OK)
+ assert resp.text == hash_value
+
+
def test_metrics(client: TestClient):
with client as request:
resp = request.get("/metrics")
diff --git a/test/test_mkpkglists.py b/test/test_mkpkglists.py
index ee66e4e1..7b538e02 100644
--- a/test/test_mkpkglists.py
+++ b/test/test_mkpkglists.py
@@ -1,58 +1,34 @@
+import gzip
import json
+import os
-from typing import List, Union
+from typing import List
from unittest import mock
+import py
import pytest
-from aurweb import config, db, util
+from aurweb import config, db
from aurweb.models import License, Package, PackageBase, PackageDependency, PackageLicense, User
from aurweb.models.account_type import USER_ID
from aurweb.models.dependency_type import DEPENDS_ID
-from aurweb.testing import noop
-
-class FakeFile:
- data = str()
- __exit__ = noop
-
- def __init__(self, modes: str) -> "FakeFile":
- self.modes = modes
-
- def __enter__(self, *args, **kwargs) -> "FakeFile":
- return self
-
- def write(self, data: Union[str, bytes]) -> None:
- if isinstance(data, bytes):
- data = data.decode()
- self.data += data
-
- def writelines(self, dataset: List[Union[str, bytes]]) -> None:
- util.apply_all(dataset, self.write)
-
- def close(self) -> None:
- return
-
-
-class MockGzipOpen:
- def __init__(self):
- self.gzips = dict()
-
- def open(self, archive: str, modes: str):
- self.gzips[archive] = FakeFile(modes)
- return self.gzips.get(archive)
-
- def get(self, key: str) -> FakeFile:
- return self.gzips.get(key)
-
- def __getitem__(self, key: str) -> FakeFile:
- return self.get(key)
-
- def __contains__(self, key: str) -> bool:
- return key in self.gzips
-
- def data(self, archive: str):
- return self.get(archive).data
+META_KEYS = [
+ "ID",
+ "Name",
+ "PackageBaseID",
+ "PackageBase",
+ "Version",
+ "Description",
+ "URL",
+ "NumVotes",
+ "Popularity",
+ "OutOfDate",
+ "Maintainer",
+ "FirstSubmitted",
+ "LastModified",
+ "URLPath",
+]
@pytest.fixture(autouse=True)
@@ -95,121 +71,154 @@ def packages(user: User) -> List[Package]:
yield sorted(output, key=lambda k: k.Name)
-@mock.patch("os.makedirs", side_effect=noop)
-def test_mkpkglists_empty(makedirs: mock.MagicMock):
- gzips = MockGzipOpen()
- with mock.patch("gzip.open", side_effect=gzips.open):
- from aurweb.scripts import mkpkglists
- mkpkglists.main()
+@pytest.fixture
+def config_mock(tmpdir: py.path.local) -> None:
+ config_get = config.get
+ archivedir = config.get("mkpkglists", "archivedir")
- archives = config.get_section("mkpkglists")
- archives.pop("archivedir")
- archives.pop("packagesmetaextfile")
+ def mock_config(section: str, key: str) -> str:
+ if section == "mkpkglists":
+ if key == "archivedir":
+ return str(tmpdir)
+ return config_get(section, key).replace(archivedir, str(tmpdir))
+ return config_get(section, key)
- for archive in archives.values():
- assert archive in gzips
+ with mock.patch("aurweb.config.get", side_effect=mock_config):
+ config.rehash()
+ yield
+ config.rehash()
- # Expect that packagesfile got created, but is empty because
- # we have no DB records.
- packages_file = archives.get("packagesfile")
- assert gzips.data(packages_file) == str()
- # Expect that pkgbasefile got created, but is empty because
- # we have no DB records.
- users_file = archives.get("pkgbasefile")
- assert gzips.data(users_file) == str()
+def test_mkpkglists(tmpdir: py.path.local, config_mock: None, user: User, packages: List[Package]):
+ from aurweb.scripts import mkpkglists
+ mkpkglists.main()
- # Expect that userfile got created, but is empty because
- # we have no DB records.
- users_file = archives.get("userfile")
- assert gzips.data(users_file) == str()
+ PACKAGES = config.get("mkpkglists", "packagesfile")
+ META = config.get("mkpkglists", "packagesmetafile")
+ PKGBASE = config.get("mkpkglists", "pkgbasefile")
+ USERS = config.get("mkpkglists", "userfile")
- # Expect that packagesmetafile got created, but is empty because
- # we have no DB records; it's still a valid empty JSON list.
- meta_file = archives.get("packagesmetafile")
- assert gzips.data(meta_file) == "[\n]"
+ expectations = [
+ (
+ PACKAGES,
+ "pkg_0\npkg_1\npkg_2\npkg_3\npkg_4\n",
+ ),
+ (
+ PKGBASE,
+ "pkgbase_0\npkgbase_1\npkgbase_2\npkgbase_3\npkgbase_4\n",
+ ),
+ (
+ USERS,
+ "test\n"
+ ),
+ ]
+
+ for (file, expected_content) in expectations:
+ with gzip.open(file, "r") as f:
+ file_content = f.read().decode()
+ assert file_content == expected_content
+
+ with gzip.open(META) as f:
+ metadata = json.load(f)
+
+ assert len(metadata) == len(packages)
+ for pkg in metadata:
+ for key in META_KEYS:
+ assert key in pkg, f"{pkg=} record does not have {key=}"
+
+ for file in (PACKAGES, PKGBASE, USERS, META):
+ with open(f"{file}.sha256") as f:
+ file_sig_content = f.read()
+ expected_prefix = f"SHA256 ({os.path.basename(file)}) = "
+ assert file_sig_content.startswith(expected_prefix)
+ assert len(file_sig_content) == len(expected_prefix) + 64
@mock.patch("sys.argv", ["mkpkglists", "--extended"])
-@mock.patch("os.makedirs", side_effect=noop)
-def test_mkpkglists_extended_empty(makedirs: mock.MagicMock):
- gzips = MockGzipOpen()
- with mock.patch("gzip.open", side_effect=gzips.open):
- from aurweb.scripts import mkpkglists
- mkpkglists.main()
+def test_mkpkglists_extended_empty(config_mock: None):
+ from aurweb.scripts import mkpkglists
+ mkpkglists.main()
- archives = config.get_section("mkpkglists")
- archives.pop("archivedir")
+ PACKAGES = config.get("mkpkglists", "packagesfile")
+ META = config.get("mkpkglists", "packagesmetafile")
+ META_EXT = config.get("mkpkglists", "packagesmetaextfile")
+ PKGBASE = config.get("mkpkglists", "pkgbasefile")
+ USERS = config.get("mkpkglists", "userfile")
- for archive in archives.values():
- assert archive in gzips
+ expectations = [
+ (PACKAGES, ""),
+ (PKGBASE, ""),
+ (USERS, ""),
+ (META, "[\n]"),
+ (META_EXT, "[\n]"),
+ ]
- # Expect that packagesfile got created, but is empty because
- # we have no DB records.
- packages_file = archives.get("packagesfile")
- assert gzips.data(packages_file) == str()
+ for (file, expected_content) in expectations:
+ with gzip.open(file, "r") as f:
+ file_content = f.read().decode()
+ assert file_content == expected_content, f"{file=} contents malformed"
- # Expect that pkgbasefile got created, but is empty because
- # we have no DB records.
- users_file = archives.get("pkgbasefile")
- assert gzips.data(users_file) == str()
-
- # Expect that userfile got created, but is empty because
- # we have no DB records.
- users_file = archives.get("userfile")
- assert gzips.data(users_file) == str()
-
- # Expect that packagesmetafile got created, but is empty because
- # we have no DB records; it's still a valid empty JSON list.
- meta_file = archives.get("packagesmetafile")
- assert gzips.data(meta_file) == "[\n]"
-
- # Expect that packagesmetafile got created, but is empty because
- # we have no DB records; it's still a valid empty JSON list.
- meta_file = archives.get("packagesmetaextfile")
- assert gzips.data(meta_file) == "[\n]"
+ for file in (PACKAGES, PKGBASE, USERS, META, META_EXT):
+ with open(f"{file}.sha256") as f:
+ file_sig_content = f.read()
+ expected_prefix = f"SHA256 ({os.path.basename(file)}) = "
+ assert file_sig_content.startswith(expected_prefix)
+ assert len(file_sig_content) == len(expected_prefix) + 64
@mock.patch("sys.argv", ["mkpkglists", "--extended"])
-@mock.patch("os.makedirs", side_effect=noop)
-def test_mkpkglists_extended(makedirs: mock.MagicMock, user: User,
+def test_mkpkglists_extended(config_mock: None, user: User,
packages: List[Package]):
- gzips = MockGzipOpen()
- with mock.patch("gzip.open", side_effect=gzips.open):
- from aurweb.scripts import mkpkglists
- mkpkglists.main()
+ from aurweb.scripts import mkpkglists
+ mkpkglists.main()
- archives = config.get_section("mkpkglists")
- archives.pop("archivedir")
+ PACKAGES = config.get("mkpkglists", "packagesfile")
+ META = config.get("mkpkglists", "packagesmetafile")
+ META_EXT = config.get("mkpkglists", "packagesmetaextfile")
+ PKGBASE = config.get("mkpkglists", "pkgbasefile")
+ USERS = config.get("mkpkglists", "userfile")
- for archive in archives.values():
- assert archive in gzips
+ expectations = [
+ (
+ PACKAGES,
+ "pkg_0\npkg_1\npkg_2\npkg_3\npkg_4\n",
+ ),
+ (
+ PKGBASE,
+ "pkgbase_0\npkgbase_1\npkgbase_2\npkgbase_3\npkgbase_4\n",
+ ),
+ (
+ USERS,
+ "test\n"
+ ),
+ ]
- # Expect that packagesfile got created, but is empty because
- # we have no DB records.
- packages_file = archives.get("packagesfile")
- expected = "\n".join([p.Name for p in packages]) + "\n"
- assert gzips.data(packages_file) == expected
+ for (file, expected_content) in expectations:
+ with gzip.open(file, "r") as f:
+ file_content = f.read().decode()
+ assert file_content == expected_content
- # Expect that pkgbasefile got created, but is empty because
- # we have no DB records.
- users_file = archives.get("pkgbasefile")
- expected = "\n".join([p.PackageBase.Name for p in packages]) + "\n"
- assert gzips.data(users_file) == expected
+ with gzip.open(META) as f:
+ metadata = json.load(f)
- # Expect that userfile got created, but is empty because
- # we have no DB records.
- users_file = archives.get("userfile")
- assert gzips.data(users_file) == "test\n"
+ assert len(metadata) == len(packages)
+ for pkg in metadata:
+ for key in META_KEYS:
+ assert key in pkg, f"{pkg=} record does not have {key=}"
- # Expect that packagesmetafile got created, but is empty because
- # we have no DB records; it's still a valid empty JSON list.
- meta_file = archives.get("packagesmetafile")
- data = json.loads(gzips.data(meta_file))
- assert len(data) == 5
+ with gzip.open(META_EXT) as f:
+ extended_metadata = json.load(f)
- # Expect that packagesmetafile got created, but is empty because
- # we have no DB records; it's still a valid empty JSON list.
- meta_file = archives.get("packagesmetaextfile")
- data = json.loads(gzips.data(meta_file))
- assert len(data) == 5
+ assert len(extended_metadata) == len(packages)
+ for pkg in extended_metadata:
+ for key in META_KEYS:
+ assert key in pkg, f"{pkg=} record does not have {key=}"
+ assert isinstance(pkg["Depends"], list)
+ assert isinstance(pkg["License"], list)
+
+ for file in (PACKAGES, PKGBASE, USERS, META, META_EXT):
+ with open(f"{file}.sha256") as f:
+ file_sig_content = f.read()
+ expected_prefix = f"SHA256 ({os.path.basename(file)}) = "
+ assert file_sig_content.startswith(expected_prefix)
+ assert len(file_sig_content) == len(expected_prefix) + 64