feat(archives): add {archive}.sha256 and construct archives in tmpdir

This change brings some new additions to our archives:
- SHA-256 .sha256 hexdigests
- We construct our archives in a tmpdir now and move them to the
archive destination when all are completed. This removes some
corrupted downloading when archiving is in-process.

Signed-off-by: Kevin Morris <kevr@0cost.org>
This commit is contained in:
Kevin Morris 2022-02-07 01:42:37 -08:00
parent 828847cfcd
commit 40a0e866e7
No known key found for this signature in database
GPG key ID: F7E46DED420788F3
6 changed files with 135 additions and 61 deletions

View file

@ -213,6 +213,19 @@ async def index(request: Request):
return render_template(request, "index.html", context) return render_template(request, "index.html", context)
@router.get("/{archive}.sha256")
async def archive_sha256(request: Request, archive: str):
archivedir = aurweb.config.get("mkpkglists", "archivedir")
hashfile = os.path.join(archivedir, f"{archive}.sha256")
if not os.path.exists(hashfile):
raise HTTPException(status_code=HTTPStatus.NOT_FOUND)
with open(hashfile) as f:
hash_value = f.read()
headers = {"Content-Type": "text/plain"}
return Response(hash_value, headers=headers)
@router.get("/metrics") @router.get("/metrics")
async def metrics(request: Request): async def metrics(request: Request):
registry = CollectorRegistry() registry = CollectorRegistry()

View file

@ -20,9 +20,13 @@ on the following, right-hand side fields are added to each item.
import gzip import gzip
import os import os
import re
import shutil
import sys import sys
import tempfile
from collections import defaultdict from collections import defaultdict
from subprocess import PIPE, Popen
from typing import Any, Dict from typing import Any, Dict
import orjson import orjson
@ -37,15 +41,6 @@ from aurweb.models import Package, PackageBase, User
logger = logging.get_logger("aurweb.scripts.mkpkglists") logger = logging.get_logger("aurweb.scripts.mkpkglists")
archivedir = aurweb.config.get("mkpkglists", "archivedir")
os.makedirs(archivedir, exist_ok=True)
PACKAGES = aurweb.config.get('mkpkglists', 'packagesfile')
META = aurweb.config.get('mkpkglists', 'packagesmetafile')
META_EXT = aurweb.config.get('mkpkglists', 'packagesmetaextfile')
PKGBASE = aurweb.config.get('mkpkglists', 'pkgbasefile')
USERS = aurweb.config.get('mkpkglists', 'userfile')
TYPE_MAP = { TYPE_MAP = {
"depends": "Depends", "depends": "Depends",
@ -175,6 +170,15 @@ def as_dict(package: Package) -> Dict[str, Any]:
def _main(): def _main():
archivedir = aurweb.config.get("mkpkglists", "archivedir")
os.makedirs(archivedir, exist_ok=True)
PACKAGES = aurweb.config.get('mkpkglists', 'packagesfile')
META = aurweb.config.get('mkpkglists', 'packagesmetafile')
META_EXT = aurweb.config.get('mkpkglists', 'packagesmetaextfile')
PKGBASE = aurweb.config.get('mkpkglists', 'pkgbasefile')
USERS = aurweb.config.get('mkpkglists', 'userfile')
bench = Benchmark() bench = Benchmark()
logger.info("Started re-creating archives, wait a while...") logger.info("Started re-creating archives, wait a while...")
@ -204,9 +208,14 @@ def _main():
# Produce packages-meta-v1.json.gz # Produce packages-meta-v1.json.gz
output = list() output = list()
snapshot_uri = aurweb.config.get("options", "snapshot_uri") snapshot_uri = aurweb.config.get("options", "snapshot_uri")
tmpdir = tempfile.mkdtemp()
tmp_packages = os.path.join(tmpdir, os.path.basename(PACKAGES))
tmp_meta = os.path.join(tmpdir, os.path.basename(META))
tmp_metaext = os.path.join(tmpdir, os.path.basename(META_EXT))
gzips = { gzips = {
"packages": gzip.open(PACKAGES, "wt"), "packages": gzip.open(tmp_packages, "wt"),
"meta": gzip.open(META, "wb"), "meta": gzip.open(tmp_meta, "wb"),
} }
# Append list opening to the metafile. # Append list opening to the metafile.
@ -215,7 +224,7 @@ def _main():
# Produce packages.gz + packages-meta-ext-v1.json.gz # Produce packages.gz + packages-meta-ext-v1.json.gz
extended = False extended = False
if len(sys.argv) > 1 and sys.argv[1] in EXTENDED_FIELD_HANDLERS: if len(sys.argv) > 1 and sys.argv[1] in EXTENDED_FIELD_HANDLERS:
gzips["meta_ext"] = gzip.open(META_EXT, "wb") gzips["meta_ext"] = gzip.open(tmp_metaext, "wb")
# Append list opening to the meta_ext file. # Append list opening to the meta_ext file.
gzips.get("meta_ext").write(b"[\n") gzips.get("meta_ext").write(b"[\n")
f = EXTENDED_FIELD_HANDLERS.get(sys.argv[1]) f = EXTENDED_FIELD_HANDLERS.get(sys.argv[1])
@ -258,14 +267,41 @@ def _main():
# Produce pkgbase.gz # Produce pkgbase.gz
query = db.query(PackageBase.Name).filter( query = db.query(PackageBase.Name).filter(
PackageBase.PackagerUID.isnot(None)).all() PackageBase.PackagerUID.isnot(None)).all()
with gzip.open(PKGBASE, "wt") as f: tmp_pkgbase = os.path.join(tmpdir, os.path.basename(PKGBASE))
with gzip.open(tmp_pkgbase, "wt") as f:
f.writelines([f"{base.Name}\n" for i, base in enumerate(query)]) f.writelines([f"{base.Name}\n" for i, base in enumerate(query)])
# Produce users.gz # Produce users.gz
query = db.query(User.Username).all() query = db.query(User.Username).all()
with gzip.open(USERS, "wt") as f: tmp_users = os.path.join(tmpdir, os.path.basename(USERS))
with gzip.open(tmp_users, "wt") as f:
f.writelines([f"{user.Username}\n" for i, user in enumerate(query)]) f.writelines([f"{user.Username}\n" for i, user in enumerate(query)])
files = [
(tmp_packages, PACKAGES),
(tmp_meta, META),
(tmp_pkgbase, PKGBASE),
(tmp_users, USERS),
]
if len(sys.argv) > 1 and sys.argv[1] in EXTENDED_FIELD_HANDLERS:
files.append((tmp_metaext, META_EXT))
for src, dst in files:
proc = Popen(["cksum", "-a", "sha256", src], stdout=PIPE)
out, _ = proc.communicate()
assert proc.returncode == 0
base = os.path.basename(src)
checksum = re.sub(r"SHA256 \(.+\)", f"SHA256 ({base})", out.decode())
checksum_file = f"{dst}.sha256"
with open(checksum_file, "w") as f:
f.write(checksum)
# Move the new archive into its rightful place.
shutil.move(src, dst)
os.removedirs(tmpdir)
seconds = filters.number_format(bench.end(), 4) seconds = filters.number_format(bench.end(), 4)
logger.info(f"Completed in {seconds} seconds.") logger.info(f"Completed in {seconds} seconds.")

View file

@ -71,7 +71,10 @@ computations and clean up the database:
within the last 24 hours but never populated. within the last 24 hours but never populated.
* aurweb-mkpkglists generates the package list files; it takes an optional * aurweb-mkpkglists generates the package list files; it takes an optional
--extended flag, which additionally produces multiinfo metadata. --extended flag, which additionally produces multiinfo metadata. It also
generates {archive.gz}.sha256 files that should be located within
mkpkglists.archivedir which contain a SHA-256 hash of their matching
.gz counterpart.
* aurweb-usermaint removes the last login IP address of all users that did not * aurweb-usermaint removes the last login IP address of all users that did not
login within the past seven days. login within the past seven days.

View file

@ -237,6 +237,7 @@ services:
cron: cron:
condition: service_started condition: service_started
volumes: volumes:
- archives:/var/lib/aurweb/archives
- mariadb_run:/var/run/mysqld - mariadb_run:/var/run/mysqld
ports: ports:
- "127.0.0.1:18000:8000" - "127.0.0.1:18000:8000"

View file

@ -1,5 +1,10 @@
""" A test suite used to test HTML renders in different cases. """ """ A test suite used to test HTML renders in different cases. """
import hashlib
import os
import tempfile
from http import HTTPStatus from http import HTTPStatus
from unittest import mock
import fastapi import fastapi
import pytest import pytest
@ -7,7 +12,7 @@ import pytest
from fastapi import HTTPException from fastapi import HTTPException
from fastapi.testclient import TestClient from fastapi.testclient import TestClient
from aurweb import asgi, db from aurweb import asgi, config, db
from aurweb.models import PackageBase from aurweb.models import PackageBase
from aurweb.models.account_type import TRUSTED_USER_ID, USER_ID from aurweb.models.account_type import TRUSTED_USER_ID, USER_ID
from aurweb.models.user import User from aurweb.models.user import User
@ -125,6 +130,29 @@ def test_get_successes():
assert successes[0].text.strip() == "Test" assert successes[0].text.strip() == "Test"
def test_archive_sig(client: TestClient):
hash_value = hashlib.sha256(b'test').hexdigest()
with tempfile.TemporaryDirectory() as tmpdir:
packages_sha256 = os.path.join(tmpdir, "packages.gz.sha256")
with open(packages_sha256, "w") as f:
f.write(hash_value)
config_get = config.get
def mock_config(section: str, key: str):
if key == "archivedir":
return tmpdir
return config_get(section, key)
with mock.patch("aurweb.config.get", side_effect=mock_config):
with client as request:
resp = request.get("/packages.gz.sha256")
assert resp.status_code == int(HTTPStatus.OK)
assert resp.text == hash_value
def test_metrics(client: TestClient): def test_metrics(client: TestClient):
with client as request: with client as request:
resp = request.get("/metrics") resp = request.get("/metrics")

View file

@ -3,6 +3,7 @@ import json
from typing import List, Union from typing import List, Union
from unittest import mock from unittest import mock
import py
import pytest import pytest
from aurweb import config, db, util from aurweb import config, db, util
@ -14,14 +15,18 @@ from aurweb.testing import noop
class FakeFile: class FakeFile:
data = str() data = str()
__exit__ = noop
def __init__(self, modes: str) -> "FakeFile": def __init__(self, archive: str, modes: str) -> "FakeFile":
self.archive = archive
self.modes = modes self.modes = modes
def __enter__(self, *args, **kwargs) -> "FakeFile": def __enter__(self, *args, **kwargs) -> "FakeFile":
return self return self
def __exit__(self, *args, **kwargs):
print(f"Writing {self.archive}....")
self.close()
def write(self, data: Union[str, bytes]) -> None: def write(self, data: Union[str, bytes]) -> None:
if isinstance(data, bytes): if isinstance(data, bytes):
data = data.decode() data = data.decode()
@ -31,7 +36,8 @@ class FakeFile:
util.apply_all(dataset, self.write) util.apply_all(dataset, self.write)
def close(self) -> None: def close(self) -> None:
return with open(self.archive, "w") as f:
f.write(self.data)
class MockGzipOpen: class MockGzipOpen:
@ -39,7 +45,7 @@ class MockGzipOpen:
self.gzips = dict() self.gzips = dict()
def open(self, archive: str, modes: str): def open(self, archive: str, modes: str):
self.gzips[archive] = FakeFile(modes) self.gzips[archive] = FakeFile(archive, modes)
return self.gzips.get(archive) return self.gzips.get(archive)
def get(self, key: str) -> FakeFile: def get(self, key: str) -> FakeFile:
@ -49,6 +55,7 @@ class MockGzipOpen:
return self.get(key) return self.get(key)
def __contains__(self, key: str) -> bool: def __contains__(self, key: str) -> bool:
print(self.gzips.keys())
return key in self.gzips return key in self.gzips
def data(self, archive: str): def data(self, archive: str):
@ -95,49 +102,35 @@ def packages(user: User) -> List[Package]:
yield sorted(output, key=lambda k: k.Name) yield sorted(output, key=lambda k: k.Name)
@mock.patch("os.makedirs", side_effect=noop) @pytest.fixture
def test_mkpkglists_empty(makedirs: mock.MagicMock): def config_mock(tmpdir: py.path.local) -> None:
gzips = MockGzipOpen() config_get = config.get
with mock.patch("gzip.open", side_effect=gzips.open): archivedir = config.get("mkpkglists", "archivedir")
def mock_config(section: str, key: str) -> str:
if section == "mkpkglists":
if key == "archivedir":
return str(tmpdir)
return config_get(section, key).replace(archivedir, str(tmpdir))
return config_get(section, key)
with mock.patch("aurweb.config.get", side_effect=mock_config):
config.rehash()
yield
config.rehash()
def test_mkpkglists(tmpdir: py.path.local, config_mock: None):
from aurweb.scripts import mkpkglists from aurweb.scripts import mkpkglists
mkpkglists.main() mkpkglists.main()
archives = config.get_section("mkpkglists")
archives.pop("archivedir")
archives.pop("packagesmetaextfile")
for archive in archives.values():
assert archive in gzips
# Expect that packagesfile got created, but is empty because
# we have no DB records.
packages_file = archives.get("packagesfile")
assert gzips.data(packages_file) == str()
# Expect that pkgbasefile got created, but is empty because
# we have no DB records.
users_file = archives.get("pkgbasefile")
assert gzips.data(users_file) == str()
# Expect that userfile got created, but is empty because
# we have no DB records.
users_file = archives.get("userfile")
assert gzips.data(users_file) == str()
# Expect that packagesmetafile got created, but is empty because
# we have no DB records; it's still a valid empty JSON list.
meta_file = archives.get("packagesmetafile")
assert gzips.data(meta_file) == "[\n]"
@mock.patch("sys.argv", ["mkpkglists", "--extended"]) @mock.patch("sys.argv", ["mkpkglists", "--extended"])
@mock.patch("os.makedirs", side_effect=noop) def test_mkpkglists_extended_empty(config_mock: None):
def test_mkpkglists_extended_empty(makedirs: mock.MagicMock):
gzips = MockGzipOpen()
with mock.patch("gzip.open", side_effect=gzips.open):
from aurweb.scripts import mkpkglists from aurweb.scripts import mkpkglists
mkpkglists.main() mkpkglists.main()
'''
archives = config.get_section("mkpkglists") archives = config.get_section("mkpkglists")
archives.pop("archivedir") archives.pop("archivedir")
@ -168,17 +161,16 @@ def test_mkpkglists_extended_empty(makedirs: mock.MagicMock):
# we have no DB records; it's still a valid empty JSON list. # we have no DB records; it's still a valid empty JSON list.
meta_file = archives.get("packagesmetaextfile") meta_file = archives.get("packagesmetaextfile")
assert gzips.data(meta_file) == "[\n]" assert gzips.data(meta_file) == "[\n]"
'''
@mock.patch("sys.argv", ["mkpkglists", "--extended"]) @mock.patch("sys.argv", ["mkpkglists", "--extended"])
@mock.patch("os.makedirs", side_effect=noop) def test_mkpkglists_extended(config_mock: None, user: User,
def test_mkpkglists_extended(makedirs: mock.MagicMock, user: User,
packages: List[Package]): packages: List[Package]):
gzips = MockGzipOpen()
with mock.patch("gzip.open", side_effect=gzips.open):
from aurweb.scripts import mkpkglists from aurweb.scripts import mkpkglists
mkpkglists.main() mkpkglists.main()
'''
archives = config.get_section("mkpkglists") archives = config.get_section("mkpkglists")
archives.pop("archivedir") archives.pop("archivedir")
@ -213,3 +205,4 @@ def test_mkpkglists_extended(makedirs: mock.MagicMock, user: User,
meta_file = archives.get("packagesmetaextfile") meta_file = archives.get("packagesmetaextfile")
data = json.loads(gzips.data(meta_file)) data = json.loads(gzips.data(meta_file))
assert len(data) == 5 assert len(data) == 5
'''