fixup: feat(archives): add .sha256 and construct archives in tmpdir

This commit is contained in:
Awal Garg 2022-02-09 06:38:02 +05:30
parent 40a0e866e7
commit b119db251b
2 changed files with 140 additions and 119 deletions

View file

@ -19,14 +19,14 @@ on the following, right-hand side fields are added to each item.
""" """
import gzip import gzip
import hashlib
import io
import os import os
import re
import shutil import shutil
import sys import sys
import tempfile import tempfile
from collections import defaultdict from collections import defaultdict
from subprocess import PIPE, Popen
from typing import Any, Dict from typing import Any, Dict
import orjson import orjson
@ -169,6 +169,14 @@ def as_dict(package: Package) -> Dict[str, Any]:
} }
def sha256sum(file_path: str) -> str:
hash = hashlib.sha256()
with open(file_path, "rb") as f:
while chunk := f.read(io.DEFAULT_BUFFER_SIZE):
hash.update(chunk)
return hash.hexdigest()
def _main(): def _main():
archivedir = aurweb.config.get("mkpkglists", "archivedir") archivedir = aurweb.config.get("mkpkglists", "archivedir")
os.makedirs(archivedir, exist_ok=True) os.makedirs(archivedir, exist_ok=True)
@ -287,16 +295,13 @@ def _main():
files.append((tmp_metaext, META_EXT)) files.append((tmp_metaext, META_EXT))
for src, dst in files: for src, dst in files:
proc = Popen(["cksum", "-a", "sha256", src], stdout=PIPE) checksum = sha256sum(src)
out, _ = proc.communicate()
assert proc.returncode == 0
base = os.path.basename(src) base = os.path.basename(src)
checksum = re.sub(r"SHA256 \(.+\)", f"SHA256 ({base})", out.decode()) checksum_formatted = f"SHA256 ({base}) = {checksum}"
checksum_file = f"{dst}.sha256" checksum_file = f"{dst}.sha256"
with open(checksum_file, "w") as f: with open(checksum_file, "w") as f:
f.write(checksum) f.write(checksum_formatted)
# Move the new archive into its rightful place. # Move the new archive into its rightful place.
shutil.move(src, dst) shutil.move(src, dst)

View file

@ -1,65 +1,34 @@
import gzip
import json import json
import os
from typing import List, Union from typing import List
from unittest import mock from unittest import mock
import py import py
import pytest import pytest
from aurweb import config, db, util from aurweb import config, db
from aurweb.models import License, Package, PackageBase, PackageDependency, PackageLicense, User from aurweb.models import License, Package, PackageBase, PackageDependency, PackageLicense, User
from aurweb.models.account_type import USER_ID from aurweb.models.account_type import USER_ID
from aurweb.models.dependency_type import DEPENDS_ID from aurweb.models.dependency_type import DEPENDS_ID
from aurweb.testing import noop
META_KEYS = [
class FakeFile: "ID",
data = str() "Name",
"PackageBaseID",
def __init__(self, archive: str, modes: str) -> "FakeFile": "PackageBase",
self.archive = archive "Version",
self.modes = modes "Description",
"URL",
def __enter__(self, *args, **kwargs) -> "FakeFile": "NumVotes",
return self "Popularity",
"OutOfDate",
def __exit__(self, *args, **kwargs): "Maintainer",
print(f"Writing {self.archive}....") "FirstSubmitted",
self.close() "LastModified",
"URLPath",
def write(self, data: Union[str, bytes]) -> None: ]
if isinstance(data, bytes):
data = data.decode()
self.data += data
def writelines(self, dataset: List[Union[str, bytes]]) -> None:
util.apply_all(dataset, self.write)
def close(self) -> None:
with open(self.archive, "w") as f:
f.write(self.data)
class MockGzipOpen:
def __init__(self):
self.gzips = dict()
def open(self, archive: str, modes: str):
self.gzips[archive] = FakeFile(archive, modes)
return self.gzips.get(archive)
def get(self, key: str) -> FakeFile:
return self.gzips.get(key)
def __getitem__(self, key: str) -> FakeFile:
return self.get(key)
def __contains__(self, key: str) -> bool:
print(self.gzips.keys())
return key in self.gzips
def data(self, archive: str):
return self.get(archive).data
@pytest.fixture(autouse=True) @pytest.fixture(autouse=True)
@ -120,48 +89,81 @@ def config_mock(tmpdir: py.path.local) -> None:
config.rehash() config.rehash()
def test_mkpkglists(tmpdir: py.path.local, config_mock: None): def test_mkpkglists(tmpdir: py.path.local, config_mock: None, user: User, packages: List[Package]):
from aurweb.scripts import mkpkglists from aurweb.scripts import mkpkglists
mkpkglists.main() mkpkglists.main()
PACKAGES = config.get("mkpkglists", "packagesfile")
META = config.get("mkpkglists", "packagesmetafile")
PKGBASE = config.get("mkpkglists", "pkgbasefile")
USERS = config.get("mkpkglists", "userfile")
expectations = [
(
PACKAGES,
"pkg_0\npkg_1\npkg_2\npkg_3\npkg_4\n",
),
(
PKGBASE,
"pkgbase_0\npkgbase_1\npkgbase_2\npkgbase_3\npkgbase_4\n",
),
(
USERS,
"test\n"
),
]
for (file, expected_content) in expectations:
with gzip.open(file, "r") as f:
file_content = f.read().decode()
assert file_content == expected_content
with gzip.open(META) as f:
metadata = json.load(f)
assert len(metadata) == len(packages)
for pkg in metadata:
for key in META_KEYS:
assert key in pkg, f"{pkg=} record does not have {key=}"
for file in (PACKAGES, PKGBASE, USERS, META):
with open(f"{file}.sha256") as f:
file_sig_content = f.read()
expected_prefix = f"SHA256 ({os.path.basename(file)}) = "
assert file_sig_content.startswith(expected_prefix)
assert len(file_sig_content) == len(expected_prefix) + 64
@mock.patch("sys.argv", ["mkpkglists", "--extended"]) @mock.patch("sys.argv", ["mkpkglists", "--extended"])
def test_mkpkglists_extended_empty(config_mock: None): def test_mkpkglists_extended_empty(config_mock: None):
from aurweb.scripts import mkpkglists from aurweb.scripts import mkpkglists
mkpkglists.main() mkpkglists.main()
''' PACKAGES = config.get("mkpkglists", "packagesfile")
archives = config.get_section("mkpkglists") META = config.get("mkpkglists", "packagesmetafile")
archives.pop("archivedir") META_EXT = config.get("mkpkglists", "packagesmetaextfile")
PKGBASE = config.get("mkpkglists", "pkgbasefile")
USERS = config.get("mkpkglists", "userfile")
for archive in archives.values(): expectations = [
assert archive in gzips (PACKAGES, ""),
(PKGBASE, ""),
(USERS, ""),
(META, "[\n]"),
(META_EXT, "[\n]"),
]
# Expect that packagesfile got created, but is empty because for (file, expected_content) in expectations:
# we have no DB records. with gzip.open(file, "r") as f:
packages_file = archives.get("packagesfile") file_content = f.read().decode()
assert gzips.data(packages_file) == str() assert file_content == expected_content, f"{file=} contents malformed"
# Expect that pkgbasefile got created, but is empty because for file in (PACKAGES, PKGBASE, USERS, META, META_EXT):
# we have no DB records. with open(f"{file}.sha256") as f:
users_file = archives.get("pkgbasefile") file_sig_content = f.read()
assert gzips.data(users_file) == str() expected_prefix = f"SHA256 ({os.path.basename(file)}) = "
assert file_sig_content.startswith(expected_prefix)
# Expect that userfile got created, but is empty because assert len(file_sig_content) == len(expected_prefix) + 64
# we have no DB records.
users_file = archives.get("userfile")
assert gzips.data(users_file) == str()
# Expect that packagesmetafile got created, but is empty because
# we have no DB records; it's still a valid empty JSON list.
meta_file = archives.get("packagesmetafile")
assert gzips.data(meta_file) == "[\n]"
# Expect that packagesmetafile got created, but is empty because
# we have no DB records; it's still a valid empty JSON list.
meta_file = archives.get("packagesmetaextfile")
assert gzips.data(meta_file) == "[\n]"
'''
@mock.patch("sys.argv", ["mkpkglists", "--extended"]) @mock.patch("sys.argv", ["mkpkglists", "--extended"])
@ -170,39 +172,53 @@ def test_mkpkglists_extended(config_mock: None, user: User,
from aurweb.scripts import mkpkglists from aurweb.scripts import mkpkglists
mkpkglists.main() mkpkglists.main()
''' PACKAGES = config.get("mkpkglists", "packagesfile")
archives = config.get_section("mkpkglists") META = config.get("mkpkglists", "packagesmetafile")
archives.pop("archivedir") META_EXT = config.get("mkpkglists", "packagesmetaextfile")
PKGBASE = config.get("mkpkglists", "pkgbasefile")
USERS = config.get("mkpkglists", "userfile")
for archive in archives.values(): expectations = [
assert archive in gzips (
PACKAGES,
"pkg_0\npkg_1\npkg_2\npkg_3\npkg_4\n",
),
(
PKGBASE,
"pkgbase_0\npkgbase_1\npkgbase_2\npkgbase_3\npkgbase_4\n",
),
(
USERS,
"test\n"
),
]
# Expect that packagesfile got created, but is empty because for (file, expected_content) in expectations:
# we have no DB records. with gzip.open(file, "r") as f:
packages_file = archives.get("packagesfile") file_content = f.read().decode()
expected = "\n".join([p.Name for p in packages]) + "\n" assert file_content == expected_content
assert gzips.data(packages_file) == expected
# Expect that pkgbasefile got created, but is empty because with gzip.open(META) as f:
# we have no DB records. metadata = json.load(f)
users_file = archives.get("pkgbasefile")
expected = "\n".join([p.PackageBase.Name for p in packages]) + "\n"
assert gzips.data(users_file) == expected
# Expect that userfile got created, but is empty because assert len(metadata) == len(packages)
# we have no DB records. for pkg in metadata:
users_file = archives.get("userfile") for key in META_KEYS:
assert gzips.data(users_file) == "test\n" assert key in pkg, f"{pkg=} record does not have {key=}"
# Expect that packagesmetafile got created, but is empty because with gzip.open(META_EXT) as f:
# we have no DB records; it's still a valid empty JSON list. extended_metadata = json.load(f)
meta_file = archives.get("packagesmetafile")
data = json.loads(gzips.data(meta_file))
assert len(data) == 5
# Expect that packagesmetafile got created, but is empty because assert len(extended_metadata) == len(packages)
# we have no DB records; it's still a valid empty JSON list. for pkg in extended_metadata:
meta_file = archives.get("packagesmetaextfile") for key in META_KEYS:
data = json.loads(gzips.data(meta_file)) assert key in pkg, f"{pkg=} record does not have {key=}"
assert len(data) == 5 assert isinstance(pkg["Depends"], list)
''' assert isinstance(pkg["License"], list)
for file in (PACKAGES, PKGBASE, USERS, META, META_EXT):
with open(f"{file}.sha256") as f:
file_sig_content = f.read()
expected_prefix = f"SHA256 ({os.path.basename(file)}) = "
assert file_sig_content.startswith(expected_prefix)
assert len(file_sig_content) == len(expected_prefix) + 64