diff --git a/aurweb/routers/html.py b/aurweb/routers/html.py index 52e3f818..8d32089a 100644 --- a/aurweb/routers/html.py +++ b/aurweb/routers/html.py @@ -213,6 +213,19 @@ async def index(request: Request): return render_template(request, "index.html", context) +@router.get("/{archive}.sha256") +async def archive_sha256(request: Request, archive: str): + archivedir = aurweb.config.get("mkpkglists", "archivedir") + hashfile = os.path.join(archivedir, f"{archive}.sha256") + if not os.path.exists(hashfile): + raise HTTPException(status_code=HTTPStatus.NOT_FOUND) + + with open(hashfile) as f: + hash_value = f.read() + headers = {"Content-Type": "text/plain"} + return Response(hash_value, headers=headers) + + @router.get("/metrics") async def metrics(request: Request): registry = CollectorRegistry() diff --git a/aurweb/scripts/mkpkglists.py b/aurweb/scripts/mkpkglists.py index dd2e9e86..00096d74 100755 --- a/aurweb/scripts/mkpkglists.py +++ b/aurweb/scripts/mkpkglists.py @@ -19,8 +19,12 @@ on the following, right-hand side fields are added to each item. """ import gzip +import hashlib +import io import os +import shutil import sys +import tempfile from collections import defaultdict from typing import Any, Dict @@ -37,15 +41,6 @@ from aurweb.models import Package, PackageBase, User logger = logging.get_logger("aurweb.scripts.mkpkglists") -archivedir = aurweb.config.get("mkpkglists", "archivedir") -os.makedirs(archivedir, exist_ok=True) - -PACKAGES = aurweb.config.get('mkpkglists', 'packagesfile') -META = aurweb.config.get('mkpkglists', 'packagesmetafile') -META_EXT = aurweb.config.get('mkpkglists', 'packagesmetaextfile') -PKGBASE = aurweb.config.get('mkpkglists', 'pkgbasefile') -USERS = aurweb.config.get('mkpkglists', 'userfile') - TYPE_MAP = { "depends": "Depends", @@ -174,7 +169,24 @@ def as_dict(package: Package) -> Dict[str, Any]: } +def sha256sum(file_path: str) -> str: + hash = hashlib.sha256() + with open(file_path, "rb") as f: + while chunk := f.read(io.DEFAULT_BUFFER_SIZE): + hash.update(chunk) + return hash.hexdigest() + + def _main(): + archivedir = aurweb.config.get("mkpkglists", "archivedir") + os.makedirs(archivedir, exist_ok=True) + + PACKAGES = aurweb.config.get('mkpkglists', 'packagesfile') + META = aurweb.config.get('mkpkglists', 'packagesmetafile') + META_EXT = aurweb.config.get('mkpkglists', 'packagesmetaextfile') + PKGBASE = aurweb.config.get('mkpkglists', 'pkgbasefile') + USERS = aurweb.config.get('mkpkglists', 'userfile') + bench = Benchmark() logger.info("Started re-creating archives, wait a while...") @@ -204,9 +216,14 @@ def _main(): # Produce packages-meta-v1.json.gz output = list() snapshot_uri = aurweb.config.get("options", "snapshot_uri") + + tmpdir = tempfile.mkdtemp() + tmp_packages = os.path.join(tmpdir, os.path.basename(PACKAGES)) + tmp_meta = os.path.join(tmpdir, os.path.basename(META)) + tmp_metaext = os.path.join(tmpdir, os.path.basename(META_EXT)) gzips = { - "packages": gzip.open(PACKAGES, "wt"), - "meta": gzip.open(META, "wb"), + "packages": gzip.open(tmp_packages, "wt"), + "meta": gzip.open(tmp_meta, "wb"), } # Append list opening to the metafile. @@ -215,7 +232,7 @@ def _main(): # Produce packages.gz + packages-meta-ext-v1.json.gz extended = False if len(sys.argv) > 1 and sys.argv[1] in EXTENDED_FIELD_HANDLERS: - gzips["meta_ext"] = gzip.open(META_EXT, "wb") + gzips["meta_ext"] = gzip.open(tmp_metaext, "wb") # Append list opening to the meta_ext file. gzips.get("meta_ext").write(b"[\n") f = EXTENDED_FIELD_HANDLERS.get(sys.argv[1]) @@ -258,14 +275,38 @@ def _main(): # Produce pkgbase.gz query = db.query(PackageBase.Name).filter( PackageBase.PackagerUID.isnot(None)).all() - with gzip.open(PKGBASE, "wt") as f: + tmp_pkgbase = os.path.join(tmpdir, os.path.basename(PKGBASE)) + with gzip.open(tmp_pkgbase, "wt") as f: f.writelines([f"{base.Name}\n" for i, base in enumerate(query)]) # Produce users.gz query = db.query(User.Username).all() - with gzip.open(USERS, "wt") as f: + tmp_users = os.path.join(tmpdir, os.path.basename(USERS)) + with gzip.open(tmp_users, "wt") as f: f.writelines([f"{user.Username}\n" for i, user in enumerate(query)]) + files = [ + (tmp_packages, PACKAGES), + (tmp_meta, META), + (tmp_pkgbase, PKGBASE), + (tmp_users, USERS), + ] + if len(sys.argv) > 1 and sys.argv[1] in EXTENDED_FIELD_HANDLERS: + files.append((tmp_metaext, META_EXT)) + + for src, dst in files: + checksum = sha256sum(src) + base = os.path.basename(src) + checksum_formatted = f"SHA256 ({base}) = {checksum}" + + checksum_file = f"{dst}.sha256" + with open(checksum_file, "w") as f: + f.write(checksum_formatted) + + # Move the new archive into its rightful place. + shutil.move(src, dst) + + os.removedirs(tmpdir) seconds = filters.number_format(bench.end(), 4) logger.info(f"Completed in {seconds} seconds.") diff --git a/doc/maintenance.txt b/doc/maintenance.txt index fbde1fff..c52cf76f 100644 --- a/doc/maintenance.txt +++ b/doc/maintenance.txt @@ -71,7 +71,10 @@ computations and clean up the database: within the last 24 hours but never populated. * aurweb-mkpkglists generates the package list files; it takes an optional - --extended flag, which additionally produces multiinfo metadata. + --extended flag, which additionally produces multiinfo metadata. It also + generates {archive.gz}.sha256 files that should be located within + mkpkglists.archivedir which contain a SHA-256 hash of their matching + .gz counterpart. * aurweb-usermaint removes the last login IP address of all users that did not login within the past seven days. diff --git a/docker-compose.yml b/docker-compose.yml index 5046e89a..a56cbe72 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -237,6 +237,7 @@ services: cron: condition: service_started volumes: + - archives:/var/lib/aurweb/archives - mariadb_run:/var/run/mysqld ports: - "127.0.0.1:18000:8000" diff --git a/test/test_html.py b/test/test_html.py index df018a03..25834987 100644 --- a/test/test_html.py +++ b/test/test_html.py @@ -1,5 +1,10 @@ """ A test suite used to test HTML renders in different cases. """ +import hashlib +import os +import tempfile + from http import HTTPStatus +from unittest import mock import fastapi import pytest @@ -7,7 +12,7 @@ import pytest from fastapi import HTTPException from fastapi.testclient import TestClient -from aurweb import asgi, db +from aurweb import asgi, config, db from aurweb.models import PackageBase from aurweb.models.account_type import TRUSTED_USER_ID, USER_ID from aurweb.models.user import User @@ -125,6 +130,29 @@ def test_get_successes(): assert successes[0].text.strip() == "Test" +def test_archive_sig(client: TestClient): + hash_value = hashlib.sha256(b'test').hexdigest() + + with tempfile.TemporaryDirectory() as tmpdir: + packages_sha256 = os.path.join(tmpdir, "packages.gz.sha256") + with open(packages_sha256, "w") as f: + f.write(hash_value) + + config_get = config.get + + def mock_config(section: str, key: str): + if key == "archivedir": + return tmpdir + return config_get(section, key) + + with mock.patch("aurweb.config.get", side_effect=mock_config): + with client as request: + resp = request.get("/packages.gz.sha256") + + assert resp.status_code == int(HTTPStatus.OK) + assert resp.text == hash_value + + def test_metrics(client: TestClient): with client as request: resp = request.get("/metrics") diff --git a/test/test_mkpkglists.py b/test/test_mkpkglists.py index ee66e4e1..7b538e02 100644 --- a/test/test_mkpkglists.py +++ b/test/test_mkpkglists.py @@ -1,58 +1,34 @@ +import gzip import json +import os -from typing import List, Union +from typing import List from unittest import mock +import py import pytest -from aurweb import config, db, util +from aurweb import config, db from aurweb.models import License, Package, PackageBase, PackageDependency, PackageLicense, User from aurweb.models.account_type import USER_ID from aurweb.models.dependency_type import DEPENDS_ID -from aurweb.testing import noop - -class FakeFile: - data = str() - __exit__ = noop - - def __init__(self, modes: str) -> "FakeFile": - self.modes = modes - - def __enter__(self, *args, **kwargs) -> "FakeFile": - return self - - def write(self, data: Union[str, bytes]) -> None: - if isinstance(data, bytes): - data = data.decode() - self.data += data - - def writelines(self, dataset: List[Union[str, bytes]]) -> None: - util.apply_all(dataset, self.write) - - def close(self) -> None: - return - - -class MockGzipOpen: - def __init__(self): - self.gzips = dict() - - def open(self, archive: str, modes: str): - self.gzips[archive] = FakeFile(modes) - return self.gzips.get(archive) - - def get(self, key: str) -> FakeFile: - return self.gzips.get(key) - - def __getitem__(self, key: str) -> FakeFile: - return self.get(key) - - def __contains__(self, key: str) -> bool: - return key in self.gzips - - def data(self, archive: str): - return self.get(archive).data +META_KEYS = [ + "ID", + "Name", + "PackageBaseID", + "PackageBase", + "Version", + "Description", + "URL", + "NumVotes", + "Popularity", + "OutOfDate", + "Maintainer", + "FirstSubmitted", + "LastModified", + "URLPath", +] @pytest.fixture(autouse=True) @@ -95,121 +71,154 @@ def packages(user: User) -> List[Package]: yield sorted(output, key=lambda k: k.Name) -@mock.patch("os.makedirs", side_effect=noop) -def test_mkpkglists_empty(makedirs: mock.MagicMock): - gzips = MockGzipOpen() - with mock.patch("gzip.open", side_effect=gzips.open): - from aurweb.scripts import mkpkglists - mkpkglists.main() +@pytest.fixture +def config_mock(tmpdir: py.path.local) -> None: + config_get = config.get + archivedir = config.get("mkpkglists", "archivedir") - archives = config.get_section("mkpkglists") - archives.pop("archivedir") - archives.pop("packagesmetaextfile") + def mock_config(section: str, key: str) -> str: + if section == "mkpkglists": + if key == "archivedir": + return str(tmpdir) + return config_get(section, key).replace(archivedir, str(tmpdir)) + return config_get(section, key) - for archive in archives.values(): - assert archive in gzips + with mock.patch("aurweb.config.get", side_effect=mock_config): + config.rehash() + yield + config.rehash() - # Expect that packagesfile got created, but is empty because - # we have no DB records. - packages_file = archives.get("packagesfile") - assert gzips.data(packages_file) == str() - # Expect that pkgbasefile got created, but is empty because - # we have no DB records. - users_file = archives.get("pkgbasefile") - assert gzips.data(users_file) == str() +def test_mkpkglists(tmpdir: py.path.local, config_mock: None, user: User, packages: List[Package]): + from aurweb.scripts import mkpkglists + mkpkglists.main() - # Expect that userfile got created, but is empty because - # we have no DB records. - users_file = archives.get("userfile") - assert gzips.data(users_file) == str() + PACKAGES = config.get("mkpkglists", "packagesfile") + META = config.get("mkpkglists", "packagesmetafile") + PKGBASE = config.get("mkpkglists", "pkgbasefile") + USERS = config.get("mkpkglists", "userfile") - # Expect that packagesmetafile got created, but is empty because - # we have no DB records; it's still a valid empty JSON list. - meta_file = archives.get("packagesmetafile") - assert gzips.data(meta_file) == "[\n]" + expectations = [ + ( + PACKAGES, + "pkg_0\npkg_1\npkg_2\npkg_3\npkg_4\n", + ), + ( + PKGBASE, + "pkgbase_0\npkgbase_1\npkgbase_2\npkgbase_3\npkgbase_4\n", + ), + ( + USERS, + "test\n" + ), + ] + + for (file, expected_content) in expectations: + with gzip.open(file, "r") as f: + file_content = f.read().decode() + assert file_content == expected_content + + with gzip.open(META) as f: + metadata = json.load(f) + + assert len(metadata) == len(packages) + for pkg in metadata: + for key in META_KEYS: + assert key in pkg, f"{pkg=} record does not have {key=}" + + for file in (PACKAGES, PKGBASE, USERS, META): + with open(f"{file}.sha256") as f: + file_sig_content = f.read() + expected_prefix = f"SHA256 ({os.path.basename(file)}) = " + assert file_sig_content.startswith(expected_prefix) + assert len(file_sig_content) == len(expected_prefix) + 64 @mock.patch("sys.argv", ["mkpkglists", "--extended"]) -@mock.patch("os.makedirs", side_effect=noop) -def test_mkpkglists_extended_empty(makedirs: mock.MagicMock): - gzips = MockGzipOpen() - with mock.patch("gzip.open", side_effect=gzips.open): - from aurweb.scripts import mkpkglists - mkpkglists.main() +def test_mkpkglists_extended_empty(config_mock: None): + from aurweb.scripts import mkpkglists + mkpkglists.main() - archives = config.get_section("mkpkglists") - archives.pop("archivedir") + PACKAGES = config.get("mkpkglists", "packagesfile") + META = config.get("mkpkglists", "packagesmetafile") + META_EXT = config.get("mkpkglists", "packagesmetaextfile") + PKGBASE = config.get("mkpkglists", "pkgbasefile") + USERS = config.get("mkpkglists", "userfile") - for archive in archives.values(): - assert archive in gzips + expectations = [ + (PACKAGES, ""), + (PKGBASE, ""), + (USERS, ""), + (META, "[\n]"), + (META_EXT, "[\n]"), + ] - # Expect that packagesfile got created, but is empty because - # we have no DB records. - packages_file = archives.get("packagesfile") - assert gzips.data(packages_file) == str() + for (file, expected_content) in expectations: + with gzip.open(file, "r") as f: + file_content = f.read().decode() + assert file_content == expected_content, f"{file=} contents malformed" - # Expect that pkgbasefile got created, but is empty because - # we have no DB records. - users_file = archives.get("pkgbasefile") - assert gzips.data(users_file) == str() - - # Expect that userfile got created, but is empty because - # we have no DB records. - users_file = archives.get("userfile") - assert gzips.data(users_file) == str() - - # Expect that packagesmetafile got created, but is empty because - # we have no DB records; it's still a valid empty JSON list. - meta_file = archives.get("packagesmetafile") - assert gzips.data(meta_file) == "[\n]" - - # Expect that packagesmetafile got created, but is empty because - # we have no DB records; it's still a valid empty JSON list. - meta_file = archives.get("packagesmetaextfile") - assert gzips.data(meta_file) == "[\n]" + for file in (PACKAGES, PKGBASE, USERS, META, META_EXT): + with open(f"{file}.sha256") as f: + file_sig_content = f.read() + expected_prefix = f"SHA256 ({os.path.basename(file)}) = " + assert file_sig_content.startswith(expected_prefix) + assert len(file_sig_content) == len(expected_prefix) + 64 @mock.patch("sys.argv", ["mkpkglists", "--extended"]) -@mock.patch("os.makedirs", side_effect=noop) -def test_mkpkglists_extended(makedirs: mock.MagicMock, user: User, +def test_mkpkglists_extended(config_mock: None, user: User, packages: List[Package]): - gzips = MockGzipOpen() - with mock.patch("gzip.open", side_effect=gzips.open): - from aurweb.scripts import mkpkglists - mkpkglists.main() + from aurweb.scripts import mkpkglists + mkpkglists.main() - archives = config.get_section("mkpkglists") - archives.pop("archivedir") + PACKAGES = config.get("mkpkglists", "packagesfile") + META = config.get("mkpkglists", "packagesmetafile") + META_EXT = config.get("mkpkglists", "packagesmetaextfile") + PKGBASE = config.get("mkpkglists", "pkgbasefile") + USERS = config.get("mkpkglists", "userfile") - for archive in archives.values(): - assert archive in gzips + expectations = [ + ( + PACKAGES, + "pkg_0\npkg_1\npkg_2\npkg_3\npkg_4\n", + ), + ( + PKGBASE, + "pkgbase_0\npkgbase_1\npkgbase_2\npkgbase_3\npkgbase_4\n", + ), + ( + USERS, + "test\n" + ), + ] - # Expect that packagesfile got created, but is empty because - # we have no DB records. - packages_file = archives.get("packagesfile") - expected = "\n".join([p.Name for p in packages]) + "\n" - assert gzips.data(packages_file) == expected + for (file, expected_content) in expectations: + with gzip.open(file, "r") as f: + file_content = f.read().decode() + assert file_content == expected_content - # Expect that pkgbasefile got created, but is empty because - # we have no DB records. - users_file = archives.get("pkgbasefile") - expected = "\n".join([p.PackageBase.Name for p in packages]) + "\n" - assert gzips.data(users_file) == expected + with gzip.open(META) as f: + metadata = json.load(f) - # Expect that userfile got created, but is empty because - # we have no DB records. - users_file = archives.get("userfile") - assert gzips.data(users_file) == "test\n" + assert len(metadata) == len(packages) + for pkg in metadata: + for key in META_KEYS: + assert key in pkg, f"{pkg=} record does not have {key=}" - # Expect that packagesmetafile got created, but is empty because - # we have no DB records; it's still a valid empty JSON list. - meta_file = archives.get("packagesmetafile") - data = json.loads(gzips.data(meta_file)) - assert len(data) == 5 + with gzip.open(META_EXT) as f: + extended_metadata = json.load(f) - # Expect that packagesmetafile got created, but is empty because - # we have no DB records; it's still a valid empty JSON list. - meta_file = archives.get("packagesmetaextfile") - data = json.loads(gzips.data(meta_file)) - assert len(data) == 5 + assert len(extended_metadata) == len(packages) + for pkg in extended_metadata: + for key in META_KEYS: + assert key in pkg, f"{pkg=} record does not have {key=}" + assert isinstance(pkg["Depends"], list) + assert isinstance(pkg["License"], list) + + for file in (PACKAGES, PKGBASE, USERS, META, META_EXT): + with open(f"{file}.sha256") as f: + file_sig_content = f.read() + expected_prefix = f"SHA256 ({os.path.basename(file)}) = " + assert file_sig_content.startswith(expected_prefix) + assert len(file_sig_content) == len(expected_prefix) + 64