Merge branch 'mr440'

This commit is contained in:
Kevin Morris 2022-02-08 17:49:50 -08:00
commit b6321bbdc5
No known key found for this signature in database
GPG key ID: F7E46DED420788F3
6 changed files with 250 additions and 155 deletions

View file

@ -213,6 +213,19 @@ async def index(request: Request):
return render_template(request, "index.html", context)
@router.get("/{archive}.sha256")
async def archive_sha256(request: Request, archive: str):
archivedir = aurweb.config.get("mkpkglists", "archivedir")
hashfile = os.path.join(archivedir, f"{archive}.sha256")
if not os.path.exists(hashfile):
raise HTTPException(status_code=HTTPStatus.NOT_FOUND)
with open(hashfile) as f:
hash_value = f.read()
headers = {"Content-Type": "text/plain"}
return Response(hash_value, headers=headers)
@router.get("/metrics")
async def metrics(request: Request):
registry = CollectorRegistry()

View file

@ -19,8 +19,12 @@ on the following, right-hand side fields are added to each item.
"""
import gzip
import hashlib
import io
import os
import shutil
import sys
import tempfile
from collections import defaultdict
from typing import Any, Dict
@ -37,15 +41,6 @@ from aurweb.models import Package, PackageBase, User
logger = logging.get_logger("aurweb.scripts.mkpkglists")
archivedir = aurweb.config.get("mkpkglists", "archivedir")
os.makedirs(archivedir, exist_ok=True)
PACKAGES = aurweb.config.get('mkpkglists', 'packagesfile')
META = aurweb.config.get('mkpkglists', 'packagesmetafile')
META_EXT = aurweb.config.get('mkpkglists', 'packagesmetaextfile')
PKGBASE = aurweb.config.get('mkpkglists', 'pkgbasefile')
USERS = aurweb.config.get('mkpkglists', 'userfile')
TYPE_MAP = {
"depends": "Depends",
@ -174,7 +169,24 @@ def as_dict(package: Package) -> Dict[str, Any]:
}
def sha256sum(file_path: str) -> str:
hash = hashlib.sha256()
with open(file_path, "rb") as f:
while chunk := f.read(io.DEFAULT_BUFFER_SIZE):
hash.update(chunk)
return hash.hexdigest()
def _main():
archivedir = aurweb.config.get("mkpkglists", "archivedir")
os.makedirs(archivedir, exist_ok=True)
PACKAGES = aurweb.config.get('mkpkglists', 'packagesfile')
META = aurweb.config.get('mkpkglists', 'packagesmetafile')
META_EXT = aurweb.config.get('mkpkglists', 'packagesmetaextfile')
PKGBASE = aurweb.config.get('mkpkglists', 'pkgbasefile')
USERS = aurweb.config.get('mkpkglists', 'userfile')
bench = Benchmark()
logger.info("Started re-creating archives, wait a while...")
@ -204,9 +216,14 @@ def _main():
# Produce packages-meta-v1.json.gz
output = list()
snapshot_uri = aurweb.config.get("options", "snapshot_uri")
tmpdir = tempfile.mkdtemp()
tmp_packages = os.path.join(tmpdir, os.path.basename(PACKAGES))
tmp_meta = os.path.join(tmpdir, os.path.basename(META))
tmp_metaext = os.path.join(tmpdir, os.path.basename(META_EXT))
gzips = {
"packages": gzip.open(PACKAGES, "wt"),
"meta": gzip.open(META, "wb"),
"packages": gzip.open(tmp_packages, "wt"),
"meta": gzip.open(tmp_meta, "wb"),
}
# Append list opening to the metafile.
@ -215,7 +232,7 @@ def _main():
# Produce packages.gz + packages-meta-ext-v1.json.gz
extended = False
if len(sys.argv) > 1 and sys.argv[1] in EXTENDED_FIELD_HANDLERS:
gzips["meta_ext"] = gzip.open(META_EXT, "wb")
gzips["meta_ext"] = gzip.open(tmp_metaext, "wb")
# Append list opening to the meta_ext file.
gzips.get("meta_ext").write(b"[\n")
f = EXTENDED_FIELD_HANDLERS.get(sys.argv[1])
@ -258,14 +275,38 @@ def _main():
# Produce pkgbase.gz
query = db.query(PackageBase.Name).filter(
PackageBase.PackagerUID.isnot(None)).all()
with gzip.open(PKGBASE, "wt") as f:
tmp_pkgbase = os.path.join(tmpdir, os.path.basename(PKGBASE))
with gzip.open(tmp_pkgbase, "wt") as f:
f.writelines([f"{base.Name}\n" for i, base in enumerate(query)])
# Produce users.gz
query = db.query(User.Username).all()
with gzip.open(USERS, "wt") as f:
tmp_users = os.path.join(tmpdir, os.path.basename(USERS))
with gzip.open(tmp_users, "wt") as f:
f.writelines([f"{user.Username}\n" for i, user in enumerate(query)])
files = [
(tmp_packages, PACKAGES),
(tmp_meta, META),
(tmp_pkgbase, PKGBASE),
(tmp_users, USERS),
]
if len(sys.argv) > 1 and sys.argv[1] in EXTENDED_FIELD_HANDLERS:
files.append((tmp_metaext, META_EXT))
for src, dst in files:
checksum = sha256sum(src)
base = os.path.basename(src)
checksum_formatted = f"SHA256 ({base}) = {checksum}"
checksum_file = f"{dst}.sha256"
with open(checksum_file, "w") as f:
f.write(checksum_formatted)
# Move the new archive into its rightful place.
shutil.move(src, dst)
os.removedirs(tmpdir)
seconds = filters.number_format(bench.end(), 4)
logger.info(f"Completed in {seconds} seconds.")

View file

@ -71,7 +71,10 @@ computations and clean up the database:
within the last 24 hours but never populated.
* aurweb-mkpkglists generates the package list files; it takes an optional
--extended flag, which additionally produces multiinfo metadata.
--extended flag, which additionally produces multiinfo metadata. It also
generates {archive.gz}.sha256 files that should be located within
mkpkglists.archivedir which contain a SHA-256 hash of their matching
.gz counterpart.
* aurweb-usermaint removes the last login IP address of all users that did not
login within the past seven days.

View file

@ -237,6 +237,7 @@ services:
cron:
condition: service_started
volumes:
- archives:/var/lib/aurweb/archives
- mariadb_run:/var/run/mysqld
ports:
- "127.0.0.1:18000:8000"

View file

@ -1,5 +1,10 @@
""" A test suite used to test HTML renders in different cases. """
import hashlib
import os
import tempfile
from http import HTTPStatus
from unittest import mock
import fastapi
import pytest
@ -7,7 +12,7 @@ import pytest
from fastapi import HTTPException
from fastapi.testclient import TestClient
from aurweb import asgi, db
from aurweb import asgi, config, db
from aurweb.models import PackageBase
from aurweb.models.account_type import TRUSTED_USER_ID, USER_ID
from aurweb.models.user import User
@ -125,6 +130,29 @@ def test_get_successes():
assert successes[0].text.strip() == "Test"
def test_archive_sig(client: TestClient):
hash_value = hashlib.sha256(b'test').hexdigest()
with tempfile.TemporaryDirectory() as tmpdir:
packages_sha256 = os.path.join(tmpdir, "packages.gz.sha256")
with open(packages_sha256, "w") as f:
f.write(hash_value)
config_get = config.get
def mock_config(section: str, key: str):
if key == "archivedir":
return tmpdir
return config_get(section, key)
with mock.patch("aurweb.config.get", side_effect=mock_config):
with client as request:
resp = request.get("/packages.gz.sha256")
assert resp.status_code == int(HTTPStatus.OK)
assert resp.text == hash_value
def test_metrics(client: TestClient):
with client as request:
resp = request.get("/metrics")

View file

@ -1,58 +1,34 @@
import gzip
import json
import os
from typing import List, Union
from typing import List
from unittest import mock
import py
import pytest
from aurweb import config, db, util
from aurweb import config, db
from aurweb.models import License, Package, PackageBase, PackageDependency, PackageLicense, User
from aurweb.models.account_type import USER_ID
from aurweb.models.dependency_type import DEPENDS_ID
from aurweb.testing import noop
class FakeFile:
data = str()
__exit__ = noop
def __init__(self, modes: str) -> "FakeFile":
self.modes = modes
def __enter__(self, *args, **kwargs) -> "FakeFile":
return self
def write(self, data: Union[str, bytes]) -> None:
if isinstance(data, bytes):
data = data.decode()
self.data += data
def writelines(self, dataset: List[Union[str, bytes]]) -> None:
util.apply_all(dataset, self.write)
def close(self) -> None:
return
class MockGzipOpen:
def __init__(self):
self.gzips = dict()
def open(self, archive: str, modes: str):
self.gzips[archive] = FakeFile(modes)
return self.gzips.get(archive)
def get(self, key: str) -> FakeFile:
return self.gzips.get(key)
def __getitem__(self, key: str) -> FakeFile:
return self.get(key)
def __contains__(self, key: str) -> bool:
return key in self.gzips
def data(self, archive: str):
return self.get(archive).data
META_KEYS = [
"ID",
"Name",
"PackageBaseID",
"PackageBase",
"Version",
"Description",
"URL",
"NumVotes",
"Popularity",
"OutOfDate",
"Maintainer",
"FirstSubmitted",
"LastModified",
"URLPath",
]
@pytest.fixture(autouse=True)
@ -95,121 +71,154 @@ def packages(user: User) -> List[Package]:
yield sorted(output, key=lambda k: k.Name)
@mock.patch("os.makedirs", side_effect=noop)
def test_mkpkglists_empty(makedirs: mock.MagicMock):
gzips = MockGzipOpen()
with mock.patch("gzip.open", side_effect=gzips.open):
@pytest.fixture
def config_mock(tmpdir: py.path.local) -> None:
config_get = config.get
archivedir = config.get("mkpkglists", "archivedir")
def mock_config(section: str, key: str) -> str:
if section == "mkpkglists":
if key == "archivedir":
return str(tmpdir)
return config_get(section, key).replace(archivedir, str(tmpdir))
return config_get(section, key)
with mock.patch("aurweb.config.get", side_effect=mock_config):
config.rehash()
yield
config.rehash()
def test_mkpkglists(tmpdir: py.path.local, config_mock: None, user: User, packages: List[Package]):
from aurweb.scripts import mkpkglists
mkpkglists.main()
archives = config.get_section("mkpkglists")
archives.pop("archivedir")
archives.pop("packagesmetaextfile")
PACKAGES = config.get("mkpkglists", "packagesfile")
META = config.get("mkpkglists", "packagesmetafile")
PKGBASE = config.get("mkpkglists", "pkgbasefile")
USERS = config.get("mkpkglists", "userfile")
for archive in archives.values():
assert archive in gzips
expectations = [
(
PACKAGES,
"pkg_0\npkg_1\npkg_2\npkg_3\npkg_4\n",
),
(
PKGBASE,
"pkgbase_0\npkgbase_1\npkgbase_2\npkgbase_3\npkgbase_4\n",
),
(
USERS,
"test\n"
),
]
# Expect that packagesfile got created, but is empty because
# we have no DB records.
packages_file = archives.get("packagesfile")
assert gzips.data(packages_file) == str()
for (file, expected_content) in expectations:
with gzip.open(file, "r") as f:
file_content = f.read().decode()
assert file_content == expected_content
# Expect that pkgbasefile got created, but is empty because
# we have no DB records.
users_file = archives.get("pkgbasefile")
assert gzips.data(users_file) == str()
with gzip.open(META) as f:
metadata = json.load(f)
# Expect that userfile got created, but is empty because
# we have no DB records.
users_file = archives.get("userfile")
assert gzips.data(users_file) == str()
assert len(metadata) == len(packages)
for pkg in metadata:
for key in META_KEYS:
assert key in pkg, f"{pkg=} record does not have {key=}"
# Expect that packagesmetafile got created, but is empty because
# we have no DB records; it's still a valid empty JSON list.
meta_file = archives.get("packagesmetafile")
assert gzips.data(meta_file) == "[\n]"
for file in (PACKAGES, PKGBASE, USERS, META):
with open(f"{file}.sha256") as f:
file_sig_content = f.read()
expected_prefix = f"SHA256 ({os.path.basename(file)}) = "
assert file_sig_content.startswith(expected_prefix)
assert len(file_sig_content) == len(expected_prefix) + 64
@mock.patch("sys.argv", ["mkpkglists", "--extended"])
@mock.patch("os.makedirs", side_effect=noop)
def test_mkpkglists_extended_empty(makedirs: mock.MagicMock):
gzips = MockGzipOpen()
with mock.patch("gzip.open", side_effect=gzips.open):
def test_mkpkglists_extended_empty(config_mock: None):
from aurweb.scripts import mkpkglists
mkpkglists.main()
archives = config.get_section("mkpkglists")
archives.pop("archivedir")
PACKAGES = config.get("mkpkglists", "packagesfile")
META = config.get("mkpkglists", "packagesmetafile")
META_EXT = config.get("mkpkglists", "packagesmetaextfile")
PKGBASE = config.get("mkpkglists", "pkgbasefile")
USERS = config.get("mkpkglists", "userfile")
for archive in archives.values():
assert archive in gzips
expectations = [
(PACKAGES, ""),
(PKGBASE, ""),
(USERS, ""),
(META, "[\n]"),
(META_EXT, "[\n]"),
]
# Expect that packagesfile got created, but is empty because
# we have no DB records.
packages_file = archives.get("packagesfile")
assert gzips.data(packages_file) == str()
for (file, expected_content) in expectations:
with gzip.open(file, "r") as f:
file_content = f.read().decode()
assert file_content == expected_content, f"{file=} contents malformed"
# Expect that pkgbasefile got created, but is empty because
# we have no DB records.
users_file = archives.get("pkgbasefile")
assert gzips.data(users_file) == str()
# Expect that userfile got created, but is empty because
# we have no DB records.
users_file = archives.get("userfile")
assert gzips.data(users_file) == str()
# Expect that packagesmetafile got created, but is empty because
# we have no DB records; it's still a valid empty JSON list.
meta_file = archives.get("packagesmetafile")
assert gzips.data(meta_file) == "[\n]"
# Expect that packagesmetafile got created, but is empty because
# we have no DB records; it's still a valid empty JSON list.
meta_file = archives.get("packagesmetaextfile")
assert gzips.data(meta_file) == "[\n]"
for file in (PACKAGES, PKGBASE, USERS, META, META_EXT):
with open(f"{file}.sha256") as f:
file_sig_content = f.read()
expected_prefix = f"SHA256 ({os.path.basename(file)}) = "
assert file_sig_content.startswith(expected_prefix)
assert len(file_sig_content) == len(expected_prefix) + 64
@mock.patch("sys.argv", ["mkpkglists", "--extended"])
@mock.patch("os.makedirs", side_effect=noop)
def test_mkpkglists_extended(makedirs: mock.MagicMock, user: User,
def test_mkpkglists_extended(config_mock: None, user: User,
packages: List[Package]):
gzips = MockGzipOpen()
with mock.patch("gzip.open", side_effect=gzips.open):
from aurweb.scripts import mkpkglists
mkpkglists.main()
archives = config.get_section("mkpkglists")
archives.pop("archivedir")
PACKAGES = config.get("mkpkglists", "packagesfile")
META = config.get("mkpkglists", "packagesmetafile")
META_EXT = config.get("mkpkglists", "packagesmetaextfile")
PKGBASE = config.get("mkpkglists", "pkgbasefile")
USERS = config.get("mkpkglists", "userfile")
for archive in archives.values():
assert archive in gzips
expectations = [
(
PACKAGES,
"pkg_0\npkg_1\npkg_2\npkg_3\npkg_4\n",
),
(
PKGBASE,
"pkgbase_0\npkgbase_1\npkgbase_2\npkgbase_3\npkgbase_4\n",
),
(
USERS,
"test\n"
),
]
# Expect that packagesfile got created, but is empty because
# we have no DB records.
packages_file = archives.get("packagesfile")
expected = "\n".join([p.Name for p in packages]) + "\n"
assert gzips.data(packages_file) == expected
for (file, expected_content) in expectations:
with gzip.open(file, "r") as f:
file_content = f.read().decode()
assert file_content == expected_content
# Expect that pkgbasefile got created, but is empty because
# we have no DB records.
users_file = archives.get("pkgbasefile")
expected = "\n".join([p.PackageBase.Name for p in packages]) + "\n"
assert gzips.data(users_file) == expected
with gzip.open(META) as f:
metadata = json.load(f)
# Expect that userfile got created, but is empty because
# we have no DB records.
users_file = archives.get("userfile")
assert gzips.data(users_file) == "test\n"
assert len(metadata) == len(packages)
for pkg in metadata:
for key in META_KEYS:
assert key in pkg, f"{pkg=} record does not have {key=}"
# Expect that packagesmetafile got created, but is empty because
# we have no DB records; it's still a valid empty JSON list.
meta_file = archives.get("packagesmetafile")
data = json.loads(gzips.data(meta_file))
assert len(data) == 5
with gzip.open(META_EXT) as f:
extended_metadata = json.load(f)
# Expect that packagesmetafile got created, but is empty because
# we have no DB records; it's still a valid empty JSON list.
meta_file = archives.get("packagesmetaextfile")
data = json.loads(gzips.data(meta_file))
assert len(data) == 5
assert len(extended_metadata) == len(packages)
for pkg in extended_metadata:
for key in META_KEYS:
assert key in pkg, f"{pkg=} record does not have {key=}"
assert isinstance(pkg["Depends"], list)
assert isinstance(pkg["License"], list)
for file in (PACKAGES, PKGBASE, USERS, META, META_EXT):
with open(f"{file}.sha256") as f:
file_sig_content = f.read()
expected_prefix = f"SHA256 ({os.path.basename(file)}) = "
assert file_sig_content.startswith(expected_prefix)
assert len(file_sig_content) == len(expected_prefix) + 64