mirror of
https://gitlab.archlinux.org/archlinux/aurweb.git
synced 2025-02-03 10:43:03 +01:00
feat(archives): add {archive}.sha256 and construct archives in tmpdir
This change brings some new additions to our archives: - SHA-256 .sha256 hexdigests - We construct our archives in a tmpdir now and move them to the archive destination when all are completed. This removes some corrupted downloading when archiving is in-process. Signed-off-by: Kevin Morris <kevr@0cost.org>
This commit is contained in:
parent
828847cfcd
commit
40a0e866e7
6 changed files with 135 additions and 61 deletions
|
@ -213,6 +213,19 @@ async def index(request: Request):
|
|||
return render_template(request, "index.html", context)
|
||||
|
||||
|
||||
@router.get("/{archive}.sha256")
|
||||
async def archive_sha256(request: Request, archive: str):
|
||||
archivedir = aurweb.config.get("mkpkglists", "archivedir")
|
||||
hashfile = os.path.join(archivedir, f"{archive}.sha256")
|
||||
if not os.path.exists(hashfile):
|
||||
raise HTTPException(status_code=HTTPStatus.NOT_FOUND)
|
||||
|
||||
with open(hashfile) as f:
|
||||
hash_value = f.read()
|
||||
headers = {"Content-Type": "text/plain"}
|
||||
return Response(hash_value, headers=headers)
|
||||
|
||||
|
||||
@router.get("/metrics")
|
||||
async def metrics(request: Request):
|
||||
registry = CollectorRegistry()
|
||||
|
|
|
@ -20,9 +20,13 @@ on the following, right-hand side fields are added to each item.
|
|||
|
||||
import gzip
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import sys
|
||||
import tempfile
|
||||
|
||||
from collections import defaultdict
|
||||
from subprocess import PIPE, Popen
|
||||
from typing import Any, Dict
|
||||
|
||||
import orjson
|
||||
|
@ -37,15 +41,6 @@ from aurweb.models import Package, PackageBase, User
|
|||
|
||||
logger = logging.get_logger("aurweb.scripts.mkpkglists")
|
||||
|
||||
archivedir = aurweb.config.get("mkpkglists", "archivedir")
|
||||
os.makedirs(archivedir, exist_ok=True)
|
||||
|
||||
PACKAGES = aurweb.config.get('mkpkglists', 'packagesfile')
|
||||
META = aurweb.config.get('mkpkglists', 'packagesmetafile')
|
||||
META_EXT = aurweb.config.get('mkpkglists', 'packagesmetaextfile')
|
||||
PKGBASE = aurweb.config.get('mkpkglists', 'pkgbasefile')
|
||||
USERS = aurweb.config.get('mkpkglists', 'userfile')
|
||||
|
||||
|
||||
TYPE_MAP = {
|
||||
"depends": "Depends",
|
||||
|
@ -175,6 +170,15 @@ def as_dict(package: Package) -> Dict[str, Any]:
|
|||
|
||||
|
||||
def _main():
|
||||
archivedir = aurweb.config.get("mkpkglists", "archivedir")
|
||||
os.makedirs(archivedir, exist_ok=True)
|
||||
|
||||
PACKAGES = aurweb.config.get('mkpkglists', 'packagesfile')
|
||||
META = aurweb.config.get('mkpkglists', 'packagesmetafile')
|
||||
META_EXT = aurweb.config.get('mkpkglists', 'packagesmetaextfile')
|
||||
PKGBASE = aurweb.config.get('mkpkglists', 'pkgbasefile')
|
||||
USERS = aurweb.config.get('mkpkglists', 'userfile')
|
||||
|
||||
bench = Benchmark()
|
||||
logger.info("Started re-creating archives, wait a while...")
|
||||
|
||||
|
@ -204,9 +208,14 @@ def _main():
|
|||
# Produce packages-meta-v1.json.gz
|
||||
output = list()
|
||||
snapshot_uri = aurweb.config.get("options", "snapshot_uri")
|
||||
|
||||
tmpdir = tempfile.mkdtemp()
|
||||
tmp_packages = os.path.join(tmpdir, os.path.basename(PACKAGES))
|
||||
tmp_meta = os.path.join(tmpdir, os.path.basename(META))
|
||||
tmp_metaext = os.path.join(tmpdir, os.path.basename(META_EXT))
|
||||
gzips = {
|
||||
"packages": gzip.open(PACKAGES, "wt"),
|
||||
"meta": gzip.open(META, "wb"),
|
||||
"packages": gzip.open(tmp_packages, "wt"),
|
||||
"meta": gzip.open(tmp_meta, "wb"),
|
||||
}
|
||||
|
||||
# Append list opening to the metafile.
|
||||
|
@ -215,7 +224,7 @@ def _main():
|
|||
# Produce packages.gz + packages-meta-ext-v1.json.gz
|
||||
extended = False
|
||||
if len(sys.argv) > 1 and sys.argv[1] in EXTENDED_FIELD_HANDLERS:
|
||||
gzips["meta_ext"] = gzip.open(META_EXT, "wb")
|
||||
gzips["meta_ext"] = gzip.open(tmp_metaext, "wb")
|
||||
# Append list opening to the meta_ext file.
|
||||
gzips.get("meta_ext").write(b"[\n")
|
||||
f = EXTENDED_FIELD_HANDLERS.get(sys.argv[1])
|
||||
|
@ -258,14 +267,41 @@ def _main():
|
|||
# Produce pkgbase.gz
|
||||
query = db.query(PackageBase.Name).filter(
|
||||
PackageBase.PackagerUID.isnot(None)).all()
|
||||
with gzip.open(PKGBASE, "wt") as f:
|
||||
tmp_pkgbase = os.path.join(tmpdir, os.path.basename(PKGBASE))
|
||||
with gzip.open(tmp_pkgbase, "wt") as f:
|
||||
f.writelines([f"{base.Name}\n" for i, base in enumerate(query)])
|
||||
|
||||
# Produce users.gz
|
||||
query = db.query(User.Username).all()
|
||||
with gzip.open(USERS, "wt") as f:
|
||||
tmp_users = os.path.join(tmpdir, os.path.basename(USERS))
|
||||
with gzip.open(tmp_users, "wt") as f:
|
||||
f.writelines([f"{user.Username}\n" for i, user in enumerate(query)])
|
||||
|
||||
files = [
|
||||
(tmp_packages, PACKAGES),
|
||||
(tmp_meta, META),
|
||||
(tmp_pkgbase, PKGBASE),
|
||||
(tmp_users, USERS),
|
||||
]
|
||||
if len(sys.argv) > 1 and sys.argv[1] in EXTENDED_FIELD_HANDLERS:
|
||||
files.append((tmp_metaext, META_EXT))
|
||||
|
||||
for src, dst in files:
|
||||
proc = Popen(["cksum", "-a", "sha256", src], stdout=PIPE)
|
||||
out, _ = proc.communicate()
|
||||
assert proc.returncode == 0
|
||||
|
||||
base = os.path.basename(src)
|
||||
checksum = re.sub(r"SHA256 \(.+\)", f"SHA256 ({base})", out.decode())
|
||||
|
||||
checksum_file = f"{dst}.sha256"
|
||||
with open(checksum_file, "w") as f:
|
||||
f.write(checksum)
|
||||
|
||||
# Move the new archive into its rightful place.
|
||||
shutil.move(src, dst)
|
||||
|
||||
os.removedirs(tmpdir)
|
||||
seconds = filters.number_format(bench.end(), 4)
|
||||
logger.info(f"Completed in {seconds} seconds.")
|
||||
|
||||
|
|
|
@ -71,7 +71,10 @@ computations and clean up the database:
|
|||
within the last 24 hours but never populated.
|
||||
|
||||
* aurweb-mkpkglists generates the package list files; it takes an optional
|
||||
--extended flag, which additionally produces multiinfo metadata.
|
||||
--extended flag, which additionally produces multiinfo metadata. It also
|
||||
generates {archive.gz}.sha256 files that should be located within
|
||||
mkpkglists.archivedir which contain a SHA-256 hash of their matching
|
||||
.gz counterpart.
|
||||
|
||||
* aurweb-usermaint removes the last login IP address of all users that did not
|
||||
login within the past seven days.
|
||||
|
|
|
@ -237,6 +237,7 @@ services:
|
|||
cron:
|
||||
condition: service_started
|
||||
volumes:
|
||||
- archives:/var/lib/aurweb/archives
|
||||
- mariadb_run:/var/run/mysqld
|
||||
ports:
|
||||
- "127.0.0.1:18000:8000"
|
||||
|
|
|
@ -1,5 +1,10 @@
|
|||
""" A test suite used to test HTML renders in different cases. """
|
||||
import hashlib
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
from http import HTTPStatus
|
||||
from unittest import mock
|
||||
|
||||
import fastapi
|
||||
import pytest
|
||||
|
@ -7,7 +12,7 @@ import pytest
|
|||
from fastapi import HTTPException
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from aurweb import asgi, db
|
||||
from aurweb import asgi, config, db
|
||||
from aurweb.models import PackageBase
|
||||
from aurweb.models.account_type import TRUSTED_USER_ID, USER_ID
|
||||
from aurweb.models.user import User
|
||||
|
@ -125,6 +130,29 @@ def test_get_successes():
|
|||
assert successes[0].text.strip() == "Test"
|
||||
|
||||
|
||||
def test_archive_sig(client: TestClient):
|
||||
hash_value = hashlib.sha256(b'test').hexdigest()
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
packages_sha256 = os.path.join(tmpdir, "packages.gz.sha256")
|
||||
with open(packages_sha256, "w") as f:
|
||||
f.write(hash_value)
|
||||
|
||||
config_get = config.get
|
||||
|
||||
def mock_config(section: str, key: str):
|
||||
if key == "archivedir":
|
||||
return tmpdir
|
||||
return config_get(section, key)
|
||||
|
||||
with mock.patch("aurweb.config.get", side_effect=mock_config):
|
||||
with client as request:
|
||||
resp = request.get("/packages.gz.sha256")
|
||||
|
||||
assert resp.status_code == int(HTTPStatus.OK)
|
||||
assert resp.text == hash_value
|
||||
|
||||
|
||||
def test_metrics(client: TestClient):
|
||||
with client as request:
|
||||
resp = request.get("/metrics")
|
||||
|
|
|
@ -3,6 +3,7 @@ import json
|
|||
from typing import List, Union
|
||||
from unittest import mock
|
||||
|
||||
import py
|
||||
import pytest
|
||||
|
||||
from aurweb import config, db, util
|
||||
|
@ -14,14 +15,18 @@ from aurweb.testing import noop
|
|||
|
||||
class FakeFile:
|
||||
data = str()
|
||||
__exit__ = noop
|
||||
|
||||
def __init__(self, modes: str) -> "FakeFile":
|
||||
def __init__(self, archive: str, modes: str) -> "FakeFile":
|
||||
self.archive = archive
|
||||
self.modes = modes
|
||||
|
||||
def __enter__(self, *args, **kwargs) -> "FakeFile":
|
||||
return self
|
||||
|
||||
def __exit__(self, *args, **kwargs):
|
||||
print(f"Writing {self.archive}....")
|
||||
self.close()
|
||||
|
||||
def write(self, data: Union[str, bytes]) -> None:
|
||||
if isinstance(data, bytes):
|
||||
data = data.decode()
|
||||
|
@ -31,7 +36,8 @@ class FakeFile:
|
|||
util.apply_all(dataset, self.write)
|
||||
|
||||
def close(self) -> None:
|
||||
return
|
||||
with open(self.archive, "w") as f:
|
||||
f.write(self.data)
|
||||
|
||||
|
||||
class MockGzipOpen:
|
||||
|
@ -39,7 +45,7 @@ class MockGzipOpen:
|
|||
self.gzips = dict()
|
||||
|
||||
def open(self, archive: str, modes: str):
|
||||
self.gzips[archive] = FakeFile(modes)
|
||||
self.gzips[archive] = FakeFile(archive, modes)
|
||||
return self.gzips.get(archive)
|
||||
|
||||
def get(self, key: str) -> FakeFile:
|
||||
|
@ -49,6 +55,7 @@ class MockGzipOpen:
|
|||
return self.get(key)
|
||||
|
||||
def __contains__(self, key: str) -> bool:
|
||||
print(self.gzips.keys())
|
||||
return key in self.gzips
|
||||
|
||||
def data(self, archive: str):
|
||||
|
@ -95,49 +102,35 @@ def packages(user: User) -> List[Package]:
|
|||
yield sorted(output, key=lambda k: k.Name)
|
||||
|
||||
|
||||
@mock.patch("os.makedirs", side_effect=noop)
|
||||
def test_mkpkglists_empty(makedirs: mock.MagicMock):
|
||||
gzips = MockGzipOpen()
|
||||
with mock.patch("gzip.open", side_effect=gzips.open):
|
||||
@pytest.fixture
|
||||
def config_mock(tmpdir: py.path.local) -> None:
|
||||
config_get = config.get
|
||||
archivedir = config.get("mkpkglists", "archivedir")
|
||||
|
||||
def mock_config(section: str, key: str) -> str:
|
||||
if section == "mkpkglists":
|
||||
if key == "archivedir":
|
||||
return str(tmpdir)
|
||||
return config_get(section, key).replace(archivedir, str(tmpdir))
|
||||
return config_get(section, key)
|
||||
|
||||
with mock.patch("aurweb.config.get", side_effect=mock_config):
|
||||
config.rehash()
|
||||
yield
|
||||
config.rehash()
|
||||
|
||||
|
||||
def test_mkpkglists(tmpdir: py.path.local, config_mock: None):
|
||||
from aurweb.scripts import mkpkglists
|
||||
mkpkglists.main()
|
||||
|
||||
archives = config.get_section("mkpkglists")
|
||||
archives.pop("archivedir")
|
||||
archives.pop("packagesmetaextfile")
|
||||
|
||||
for archive in archives.values():
|
||||
assert archive in gzips
|
||||
|
||||
# Expect that packagesfile got created, but is empty because
|
||||
# we have no DB records.
|
||||
packages_file = archives.get("packagesfile")
|
||||
assert gzips.data(packages_file) == str()
|
||||
|
||||
# Expect that pkgbasefile got created, but is empty because
|
||||
# we have no DB records.
|
||||
users_file = archives.get("pkgbasefile")
|
||||
assert gzips.data(users_file) == str()
|
||||
|
||||
# Expect that userfile got created, but is empty because
|
||||
# we have no DB records.
|
||||
users_file = archives.get("userfile")
|
||||
assert gzips.data(users_file) == str()
|
||||
|
||||
# Expect that packagesmetafile got created, but is empty because
|
||||
# we have no DB records; it's still a valid empty JSON list.
|
||||
meta_file = archives.get("packagesmetafile")
|
||||
assert gzips.data(meta_file) == "[\n]"
|
||||
|
||||
|
||||
@mock.patch("sys.argv", ["mkpkglists", "--extended"])
|
||||
@mock.patch("os.makedirs", side_effect=noop)
|
||||
def test_mkpkglists_extended_empty(makedirs: mock.MagicMock):
|
||||
gzips = MockGzipOpen()
|
||||
with mock.patch("gzip.open", side_effect=gzips.open):
|
||||
def test_mkpkglists_extended_empty(config_mock: None):
|
||||
from aurweb.scripts import mkpkglists
|
||||
mkpkglists.main()
|
||||
|
||||
'''
|
||||
archives = config.get_section("mkpkglists")
|
||||
archives.pop("archivedir")
|
||||
|
||||
|
@ -168,17 +161,16 @@ def test_mkpkglists_extended_empty(makedirs: mock.MagicMock):
|
|||
# we have no DB records; it's still a valid empty JSON list.
|
||||
meta_file = archives.get("packagesmetaextfile")
|
||||
assert gzips.data(meta_file) == "[\n]"
|
||||
'''
|
||||
|
||||
|
||||
@mock.patch("sys.argv", ["mkpkglists", "--extended"])
|
||||
@mock.patch("os.makedirs", side_effect=noop)
|
||||
def test_mkpkglists_extended(makedirs: mock.MagicMock, user: User,
|
||||
def test_mkpkglists_extended(config_mock: None, user: User,
|
||||
packages: List[Package]):
|
||||
gzips = MockGzipOpen()
|
||||
with mock.patch("gzip.open", side_effect=gzips.open):
|
||||
from aurweb.scripts import mkpkglists
|
||||
mkpkglists.main()
|
||||
|
||||
'''
|
||||
archives = config.get_section("mkpkglists")
|
||||
archives.pop("archivedir")
|
||||
|
||||
|
@ -213,3 +205,4 @@ def test_mkpkglists_extended(makedirs: mock.MagicMock, user: User,
|
|||
meta_file = archives.get("packagesmetaextfile")
|
||||
data = json.loads(gzips.data(meta_file))
|
||||
assert len(data) == 5
|
||||
'''
|
||||
|
|
Loading…
Add table
Reference in a new issue