feat(archives): add {archive}.sha256 and construct archives in tmpdir

This change brings some new additions to our archives:
- SHA-256 .sha256 hexdigests
- We construct our archives in a tmpdir now and move them to the
archive destination when all are completed. This removes some
corrupted downloading when archiving is in-process.

Signed-off-by: Kevin Morris <kevr@0cost.org>
This commit is contained in:
Kevin Morris 2022-02-07 01:42:37 -08:00
parent 828847cfcd
commit 40a0e866e7
No known key found for this signature in database
GPG key ID: F7E46DED420788F3
6 changed files with 135 additions and 61 deletions

View file

@ -213,6 +213,19 @@ async def index(request: Request):
return render_template(request, "index.html", context)
@router.get("/{archive}.sha256")
async def archive_sha256(request: Request, archive: str):
archivedir = aurweb.config.get("mkpkglists", "archivedir")
hashfile = os.path.join(archivedir, f"{archive}.sha256")
if not os.path.exists(hashfile):
raise HTTPException(status_code=HTTPStatus.NOT_FOUND)
with open(hashfile) as f:
hash_value = f.read()
headers = {"Content-Type": "text/plain"}
return Response(hash_value, headers=headers)
@router.get("/metrics")
async def metrics(request: Request):
registry = CollectorRegistry()

View file

@ -20,9 +20,13 @@ on the following, right-hand side fields are added to each item.
import gzip
import os
import re
import shutil
import sys
import tempfile
from collections import defaultdict
from subprocess import PIPE, Popen
from typing import Any, Dict
import orjson
@ -37,15 +41,6 @@ from aurweb.models import Package, PackageBase, User
logger = logging.get_logger("aurweb.scripts.mkpkglists")
archivedir = aurweb.config.get("mkpkglists", "archivedir")
os.makedirs(archivedir, exist_ok=True)
PACKAGES = aurweb.config.get('mkpkglists', 'packagesfile')
META = aurweb.config.get('mkpkglists', 'packagesmetafile')
META_EXT = aurweb.config.get('mkpkglists', 'packagesmetaextfile')
PKGBASE = aurweb.config.get('mkpkglists', 'pkgbasefile')
USERS = aurweb.config.get('mkpkglists', 'userfile')
TYPE_MAP = {
"depends": "Depends",
@ -175,6 +170,15 @@ def as_dict(package: Package) -> Dict[str, Any]:
def _main():
archivedir = aurweb.config.get("mkpkglists", "archivedir")
os.makedirs(archivedir, exist_ok=True)
PACKAGES = aurweb.config.get('mkpkglists', 'packagesfile')
META = aurweb.config.get('mkpkglists', 'packagesmetafile')
META_EXT = aurweb.config.get('mkpkglists', 'packagesmetaextfile')
PKGBASE = aurweb.config.get('mkpkglists', 'pkgbasefile')
USERS = aurweb.config.get('mkpkglists', 'userfile')
bench = Benchmark()
logger.info("Started re-creating archives, wait a while...")
@ -204,9 +208,14 @@ def _main():
# Produce packages-meta-v1.json.gz
output = list()
snapshot_uri = aurweb.config.get("options", "snapshot_uri")
tmpdir = tempfile.mkdtemp()
tmp_packages = os.path.join(tmpdir, os.path.basename(PACKAGES))
tmp_meta = os.path.join(tmpdir, os.path.basename(META))
tmp_metaext = os.path.join(tmpdir, os.path.basename(META_EXT))
gzips = {
"packages": gzip.open(PACKAGES, "wt"),
"meta": gzip.open(META, "wb"),
"packages": gzip.open(tmp_packages, "wt"),
"meta": gzip.open(tmp_meta, "wb"),
}
# Append list opening to the metafile.
@ -215,7 +224,7 @@ def _main():
# Produce packages.gz + packages-meta-ext-v1.json.gz
extended = False
if len(sys.argv) > 1 and sys.argv[1] in EXTENDED_FIELD_HANDLERS:
gzips["meta_ext"] = gzip.open(META_EXT, "wb")
gzips["meta_ext"] = gzip.open(tmp_metaext, "wb")
# Append list opening to the meta_ext file.
gzips.get("meta_ext").write(b"[\n")
f = EXTENDED_FIELD_HANDLERS.get(sys.argv[1])
@ -258,14 +267,41 @@ def _main():
# Produce pkgbase.gz
query = db.query(PackageBase.Name).filter(
PackageBase.PackagerUID.isnot(None)).all()
with gzip.open(PKGBASE, "wt") as f:
tmp_pkgbase = os.path.join(tmpdir, os.path.basename(PKGBASE))
with gzip.open(tmp_pkgbase, "wt") as f:
f.writelines([f"{base.Name}\n" for i, base in enumerate(query)])
# Produce users.gz
query = db.query(User.Username).all()
with gzip.open(USERS, "wt") as f:
tmp_users = os.path.join(tmpdir, os.path.basename(USERS))
with gzip.open(tmp_users, "wt") as f:
f.writelines([f"{user.Username}\n" for i, user in enumerate(query)])
files = [
(tmp_packages, PACKAGES),
(tmp_meta, META),
(tmp_pkgbase, PKGBASE),
(tmp_users, USERS),
]
if len(sys.argv) > 1 and sys.argv[1] in EXTENDED_FIELD_HANDLERS:
files.append((tmp_metaext, META_EXT))
for src, dst in files:
proc = Popen(["cksum", "-a", "sha256", src], stdout=PIPE)
out, _ = proc.communicate()
assert proc.returncode == 0
base = os.path.basename(src)
checksum = re.sub(r"SHA256 \(.+\)", f"SHA256 ({base})", out.decode())
checksum_file = f"{dst}.sha256"
with open(checksum_file, "w") as f:
f.write(checksum)
# Move the new archive into its rightful place.
shutil.move(src, dst)
os.removedirs(tmpdir)
seconds = filters.number_format(bench.end(), 4)
logger.info(f"Completed in {seconds} seconds.")

View file

@ -71,7 +71,10 @@ computations and clean up the database:
within the last 24 hours but never populated.
* aurweb-mkpkglists generates the package list files; it takes an optional
--extended flag, which additionally produces multiinfo metadata.
--extended flag, which additionally produces multiinfo metadata. It also
generates {archive.gz}.sha256 files that should be located within
mkpkglists.archivedir which contain a SHA-256 hash of their matching
.gz counterpart.
* aurweb-usermaint removes the last login IP address of all users that did not
login within the past seven days.

View file

@ -237,6 +237,7 @@ services:
cron:
condition: service_started
volumes:
- archives:/var/lib/aurweb/archives
- mariadb_run:/var/run/mysqld
ports:
- "127.0.0.1:18000:8000"

View file

@ -1,5 +1,10 @@
""" A test suite used to test HTML renders in different cases. """
import hashlib
import os
import tempfile
from http import HTTPStatus
from unittest import mock
import fastapi
import pytest
@ -7,7 +12,7 @@ import pytest
from fastapi import HTTPException
from fastapi.testclient import TestClient
from aurweb import asgi, db
from aurweb import asgi, config, db
from aurweb.models import PackageBase
from aurweb.models.account_type import TRUSTED_USER_ID, USER_ID
from aurweb.models.user import User
@ -125,6 +130,29 @@ def test_get_successes():
assert successes[0].text.strip() == "Test"
def test_archive_sig(client: TestClient):
hash_value = hashlib.sha256(b'test').hexdigest()
with tempfile.TemporaryDirectory() as tmpdir:
packages_sha256 = os.path.join(tmpdir, "packages.gz.sha256")
with open(packages_sha256, "w") as f:
f.write(hash_value)
config_get = config.get
def mock_config(section: str, key: str):
if key == "archivedir":
return tmpdir
return config_get(section, key)
with mock.patch("aurweb.config.get", side_effect=mock_config):
with client as request:
resp = request.get("/packages.gz.sha256")
assert resp.status_code == int(HTTPStatus.OK)
assert resp.text == hash_value
def test_metrics(client: TestClient):
with client as request:
resp = request.get("/metrics")

View file

@ -3,6 +3,7 @@ import json
from typing import List, Union
from unittest import mock
import py
import pytest
from aurweb import config, db, util
@ -14,14 +15,18 @@ from aurweb.testing import noop
class FakeFile:
data = str()
__exit__ = noop
def __init__(self, modes: str) -> "FakeFile":
def __init__(self, archive: str, modes: str) -> "FakeFile":
self.archive = archive
self.modes = modes
def __enter__(self, *args, **kwargs) -> "FakeFile":
return self
def __exit__(self, *args, **kwargs):
print(f"Writing {self.archive}....")
self.close()
def write(self, data: Union[str, bytes]) -> None:
if isinstance(data, bytes):
data = data.decode()
@ -31,7 +36,8 @@ class FakeFile:
util.apply_all(dataset, self.write)
def close(self) -> None:
return
with open(self.archive, "w") as f:
f.write(self.data)
class MockGzipOpen:
@ -39,7 +45,7 @@ class MockGzipOpen:
self.gzips = dict()
def open(self, archive: str, modes: str):
self.gzips[archive] = FakeFile(modes)
self.gzips[archive] = FakeFile(archive, modes)
return self.gzips.get(archive)
def get(self, key: str) -> FakeFile:
@ -49,6 +55,7 @@ class MockGzipOpen:
return self.get(key)
def __contains__(self, key: str) -> bool:
print(self.gzips.keys())
return key in self.gzips
def data(self, archive: str):
@ -95,49 +102,35 @@ def packages(user: User) -> List[Package]:
yield sorted(output, key=lambda k: k.Name)
@mock.patch("os.makedirs", side_effect=noop)
def test_mkpkglists_empty(makedirs: mock.MagicMock):
gzips = MockGzipOpen()
with mock.patch("gzip.open", side_effect=gzips.open):
@pytest.fixture
def config_mock(tmpdir: py.path.local) -> None:
config_get = config.get
archivedir = config.get("mkpkglists", "archivedir")
def mock_config(section: str, key: str) -> str:
if section == "mkpkglists":
if key == "archivedir":
return str(tmpdir)
return config_get(section, key).replace(archivedir, str(tmpdir))
return config_get(section, key)
with mock.patch("aurweb.config.get", side_effect=mock_config):
config.rehash()
yield
config.rehash()
def test_mkpkglists(tmpdir: py.path.local, config_mock: None):
from aurweb.scripts import mkpkglists
mkpkglists.main()
archives = config.get_section("mkpkglists")
archives.pop("archivedir")
archives.pop("packagesmetaextfile")
for archive in archives.values():
assert archive in gzips
# Expect that packagesfile got created, but is empty because
# we have no DB records.
packages_file = archives.get("packagesfile")
assert gzips.data(packages_file) == str()
# Expect that pkgbasefile got created, but is empty because
# we have no DB records.
users_file = archives.get("pkgbasefile")
assert gzips.data(users_file) == str()
# Expect that userfile got created, but is empty because
# we have no DB records.
users_file = archives.get("userfile")
assert gzips.data(users_file) == str()
# Expect that packagesmetafile got created, but is empty because
# we have no DB records; it's still a valid empty JSON list.
meta_file = archives.get("packagesmetafile")
assert gzips.data(meta_file) == "[\n]"
@mock.patch("sys.argv", ["mkpkglists", "--extended"])
@mock.patch("os.makedirs", side_effect=noop)
def test_mkpkglists_extended_empty(makedirs: mock.MagicMock):
gzips = MockGzipOpen()
with mock.patch("gzip.open", side_effect=gzips.open):
def test_mkpkglists_extended_empty(config_mock: None):
from aurweb.scripts import mkpkglists
mkpkglists.main()
'''
archives = config.get_section("mkpkglists")
archives.pop("archivedir")
@ -168,17 +161,16 @@ def test_mkpkglists_extended_empty(makedirs: mock.MagicMock):
# we have no DB records; it's still a valid empty JSON list.
meta_file = archives.get("packagesmetaextfile")
assert gzips.data(meta_file) == "[\n]"
'''
@mock.patch("sys.argv", ["mkpkglists", "--extended"])
@mock.patch("os.makedirs", side_effect=noop)
def test_mkpkglists_extended(makedirs: mock.MagicMock, user: User,
def test_mkpkglists_extended(config_mock: None, user: User,
packages: List[Package]):
gzips = MockGzipOpen()
with mock.patch("gzip.open", side_effect=gzips.open):
from aurweb.scripts import mkpkglists
mkpkglists.main()
'''
archives = config.get_section("mkpkglists")
archives.pop("archivedir")
@ -213,3 +205,4 @@ def test_mkpkglists_extended(makedirs: mock.MagicMock, user: User,
meta_file = archives.get("packagesmetaextfile")
data = json.loads(gzips.data(meta_file))
assert len(data) == 5
'''