fix(conftest): use synchronization locks for setup_database

We were running into data race issues where the `fn.is_file()`
check would occur twice before writing the file in the `else`
clause. For this reason, a new aurweb.lock.Lock class has been
added which doubles as a thread and process lock. We can use
this elsewhere in the future, but we are also able to use it
to solve this kind of data race issue.

That being said, we still need the lock file state to tell us
when the first caller acquired the lock.

Signed-off-by: Kevin Morris <kevr@0cost.org>
This commit is contained in:
Kevin Morris 2021-11-22 20:06:50 -08:00
parent 155aa47a1a
commit 4b0cb0721d
No known key found for this signature in database
GPG key ID: F7E46DED420788F3
3 changed files with 95 additions and 24 deletions

View file

@ -0,0 +1,32 @@
import hashlib
import os
from typing import Callable
from posix_ipc import O_CREAT, Semaphore
from aurweb import logging
logger = logging.get_logger(__name__)
def default_on_create(path):
logger.info(f"Filelock at {path} acquired.")
class FileLock:
def __init__(self, tmpdir, name: str):
self.root = tmpdir
self.path = str(self.root / name)
self._file = str(self.root / (f"{name}.1"))
def lock(self, on_create: Callable = default_on_create):
hash = hashlib.sha1(self.path.encode()).hexdigest()
with Semaphore(f"/{hash}-lock", flags=O_CREAT, initial_value=1):
retval = os.path.exists(self._file)
if not retval:
with open(self._file, "w") as f:
f.write("1")
on_create(self.path)
return retval

View file

@ -38,10 +38,14 @@ It is done this way because migration has a large cost; migrating
ahead of each function takes too long when compared to this method.
"""
import os
import pathlib
from multiprocessing import Lock
import py
import pytest
from filelock import FileLock
from posix_ipc import O_CREAT, Semaphore
from sqlalchemy import create_engine
from sqlalchemy.engine import URL
from sqlalchemy.engine.base import Engine
@ -53,9 +57,13 @@ import aurweb.db
from aurweb import initdb, logging, testing
from aurweb.testing.email import Email
from aurweb.testing.filelock import FileLock
logger = logging.get_logger(__name__)
# Synchronization lock for database setup.
setup_lock = Lock()
def test_engine() -> Engine:
"""
@ -105,7 +113,12 @@ def _create_database(engine: Engine, dbname: str) -> None:
try:
conn.execute(f"CREATE DATABASE {dbname}")
except ProgrammingError: # pragma: no cover
pass
# The database most likely already existed if we hit
# a ProgrammingError. Just drop the database and try
# again. If at that point things still fail, any
# exception will be propogated up to the caller.
conn.execute(f"DROP DATABASE {dbname}")
conn.execute(f"CREATE DATABASE {dbname}")
conn.close()
initdb.run(AlembicArgs)
@ -124,20 +137,24 @@ def _drop_database(engine: Engine, dbname: str) -> None:
def setup_email():
if not os.path.exists(Email.TEST_DIR):
os.makedirs(Email.TEST_DIR)
# TODO: Fix this data race! This try/catch is ugly; why is it even
# racing here? Perhaps we need to multiproc + multithread lock
# inside of setup_database to block the check?
with Semaphore("/test-emails", flags=O_CREAT, initial_value=1):
if not os.path.exists(Email.TEST_DIR):
# Create the directory.
os.makedirs(Email.TEST_DIR)
# Cleanup all email files for this test suite.
prefix = Email.email_prefix(suite=True)
files = os.listdir(Email.TEST_DIR)
for file in files:
if file.startswith(prefix):
os.remove(os.path.join(Email.TEST_DIR, file))
# Cleanup all email files for this test suite.
prefix = Email.email_prefix(suite=True)
files = os.listdir(Email.TEST_DIR)
for file in files:
if file.startswith(prefix):
os.remove(os.path.join(Email.TEST_DIR, file))
@pytest.fixture(scope="module")
def setup_database(tmp_path_factory: pytest.fixture,
worker_id: pytest.fixture) -> None:
def setup_database(tmp_path_factory: pathlib.Path, worker_id: str) -> None:
""" Create and drop a database for the suite this fixture is used in. """
engine = test_engine()
dbname = aurweb.db.name()
@ -149,19 +166,15 @@ def setup_database(tmp_path_factory: pytest.fixture,
_drop_database(engine, dbname)
return
root_tmp_dir = tmp_path_factory.getbasetemp().parent
fn = root_tmp_dir / dbname
def setup(path):
setup_email()
_create_database(engine, dbname)
with FileLock(str(fn) + ".lock"):
if fn.is_file():
# If the data file exists, skip database creation.
yield
else:
# Otherwise, create the data file and create the database.
fn.write_text("1")
setup_email()
yield _create_database(engine, dbname)
_drop_database(engine, dbname)
tmpdir = tmp_path_factory.getbasetemp().parent
file_lock = FileLock(tmpdir, dbname)
file_lock.lock(on_create=setup)
yield # Run the test function depending on this fixture.
_drop_database(engine, dbname) # Cleanup the database.
@pytest.fixture(scope="module")

26
test/test_filelock.py Normal file
View file

@ -0,0 +1,26 @@
import py
from _pytest.logging import LogCaptureFixture
from aurweb.testing.filelock import FileLock
def test_filelock(tmpdir: py.path.local):
cb_path = None
def setup(path: str):
nonlocal cb_path
cb_path = str(path)
flock = FileLock(tmpdir, "test")
assert not flock.lock(on_create=setup)
assert cb_path == str(tmpdir / "test")
assert flock.lock()
def test_filelock_default(caplog: LogCaptureFixture, tmpdir: py.path.local):
# Test default_on_create here.
flock = FileLock(tmpdir, "test")
assert not flock.lock()
assert caplog.messages[0] == f"Filelock at {flock.path} acquired."
assert flock.lock()