fix(mkpkglists): remove caching

We really need caching for this; however, our current caching
method will cause the script to bypass changes to columns
if they have nothing to do with IDs.

Signed-off-by: Kevin Morris <kevr@0cost.org>
This commit is contained in:
Kevin Morris 2021-11-06 17:13:16 -07:00
parent cdca8bd295
commit 9f1f399957
No known key found for this signature in database
GPG key ID: F7E46DED420788F3

View file

@ -20,60 +20,23 @@ on the following, right-hand side fields are added to each item.
import datetime import datetime
import gzip import gzip
import os
import sys import sys
from collections import defaultdict from collections import defaultdict
from decimal import Decimal from decimal import Decimal
from typing import Tuple
import orjson import orjson
import aurweb.config import aurweb.config
import aurweb.db import aurweb.db
def state_path(archive: str) -> str:
# A hard-coded /tmp state directory.
# TODO: Use Redis cache to store this state after we merge
# FastAPI into master and removed PHP from the tree.
return os.path.join("/tmp", os.path.basename(archive) + ".state")
packagesfile = aurweb.config.get('mkpkglists', 'packagesfile') packagesfile = aurweb.config.get('mkpkglists', 'packagesfile')
packagesmetafile = aurweb.config.get('mkpkglists', 'packagesmetafile') packagesmetafile = aurweb.config.get('mkpkglists', 'packagesmetafile')
packagesmetaextfile = aurweb.config.get('mkpkglists', 'packagesmetaextfile') packagesmetaextfile = aurweb.config.get('mkpkglists', 'packagesmetaextfile')
packages_state = state_path(packagesfile)
pkgbasefile = aurweb.config.get('mkpkglists', 'pkgbasefile') pkgbasefile = aurweb.config.get('mkpkglists', 'pkgbasefile')
pkgbases_state = state_path(pkgbasefile)
userfile = aurweb.config.get('mkpkglists', 'userfile') userfile = aurweb.config.get('mkpkglists', 'userfile')
users_state = state_path(userfile)
def should_update(state: str, tablename: str) -> Tuple[bool, int]:
if aurweb.config.get("database", "backend") != "mysql":
return (False, 0)
db_name = aurweb.config.get("database", "name")
conn = aurweb.db.Connection()
cur = conn.execute("SELECT auto_increment FROM information_schema.tables "
"WHERE table_schema = ? AND table_name = ?",
(db_name, tablename,))
update_time = cur.fetchone()[0]
saved_update_time = 0
if os.path.exists(state):
with open(state) as f:
saved_update_time = int(f.read().strip())
return (saved_update_time == update_time, update_time)
def update_state(state: str, update_time: int) -> None:
with open(state, "w") as f:
f.write(str(update_time))
TYPE_MAP = { TYPE_MAP = {
@ -197,83 +160,69 @@ def main():
pkgbaselist_header = "# AUR package base list, generated on " + datestr pkgbaselist_header = "# AUR package base list, generated on " + datestr
userlist_header = "# AUR user name list, generated on " + datestr userlist_header = "# AUR user name list, generated on " + datestr
updated, update_time = should_update(packages_state, "Packages") # Query columns; copied from RPC.
if not updated: columns = ("Packages.ID, Packages.Name, "
print("Updating Packages...") "PackageBases.ID AS PackageBaseID, "
"PackageBases.Name AS PackageBase, "
"Version, Description, URL, NumVotes, "
"Popularity, OutOfDateTS AS OutOfDate, "
"Users.UserName AS Maintainer, "
"SubmittedTS AS FirstSubmitted, "
"ModifiedTS AS LastModified")
# Query columns; copied from RPC. # Perform query.
columns = ("Packages.ID, Packages.Name, " cur = conn.execute(f"SELECT {columns} FROM Packages "
"PackageBases.ID AS PackageBaseID, " "LEFT JOIN PackageBases "
"PackageBases.Name AS PackageBase, " "ON PackageBases.ID = Packages.PackageBaseID "
"Version, Description, URL, NumVotes, " "LEFT JOIN Users "
"Popularity, OutOfDateTS AS OutOfDate, " "ON PackageBases.MaintainerUID = Users.ID "
"Users.UserName AS Maintainer, " "WHERE PackageBases.PackagerUID IS NOT NULL")
"SubmittedTS AS FirstSubmitted, "
"ModifiedTS AS LastModified")
# Perform query. # Produce packages-meta-v1.json.gz
cur = conn.execute(f"SELECT {columns} FROM Packages " output = list()
"LEFT JOIN PackageBases " snapshot_uri = aurweb.config.get("options", "snapshot_uri")
"ON PackageBases.ID = Packages.PackageBaseID " for result in cur.fetchall():
"LEFT JOIN Users " item = {
"ON PackageBases.MaintainerUID = Users.ID " column[0]: is_decimal(result[i])
"WHERE PackageBases.PackagerUID IS NOT NULL") for i, column in enumerate(cur.description)
}
item["URLPath"] = snapshot_uri % item.get("Name")
output.append(item)
# Produce packages-meta-v1.json.gz write_archive(packagesmetafile, output)
output = list()
snapshot_uri = aurweb.config.get("options", "snapshot_uri")
for result in cur.fetchall():
item = {
column[0]: is_decimal(result[i])
for i, column in enumerate(cur.description)
}
item["URLPath"] = snapshot_uri % item.get("Name")
output.append(item)
write_archive(packagesmetafile, output) # Produce packages-meta-ext-v1.json.gz
if len(sys.argv) > 1 and sys.argv[1] in EXTENDED_FIELD_HANDLERS:
f = EXTENDED_FIELD_HANDLERS.get(sys.argv[1])
data = f()
# Produce packages-meta-ext-v1.json.gz default_ = {"Groups": [], "License": [], "Keywords": []}
if len(sys.argv) > 1 and sys.argv[1] in EXTENDED_FIELD_HANDLERS: for i in range(len(output)):
f = EXTENDED_FIELD_HANDLERS.get(sys.argv[1]) data_ = data.get(output[i].get("ID"), default_)
data = f() output[i].update(data_)
default_ = {"Groups": [], "License": [], "Keywords": []} write_archive(packagesmetaextfile, output)
for i in range(len(output)):
data_ = data.get(output[i].get("ID"), default_)
output[i].update(data_)
write_archive(packagesmetaextfile, output) # Produce packages.gz
with gzip.open(packagesfile, "wb") as f:
f.write(bytes(pkglist_header + "\n", "UTF-8"))
f.writelines([
bytes(x.get("Name") + "\n", "UTF-8")
for x in output
])
# Produce packages.gz # Produce pkgbase.gz
with gzip.open(packagesfile, "wb") as f: with gzip.open(pkgbasefile, "w") as f:
f.write(bytes(pkglist_header + "\n", "UTF-8")) f.write(bytes(pkgbaselist_header + "\n", "UTF-8"))
f.writelines([ cur = conn.execute("SELECT Name FROM PackageBases " +
bytes(x.get("Name") + "\n", "UTF-8") "WHERE PackagerUID IS NOT NULL")
for x in output f.writelines([bytes(x[0] + "\n", "UTF-8") for x in cur.fetchall()])
])
update_state(packages_state, update_time) # Produce users.gz
with gzip.open(userfile, "w") as f:
updated, update_time = should_update(pkgbases_state, "PackageBases") f.write(bytes(userlist_header + "\n", "UTF-8"))
if not updated: cur = conn.execute("SELECT UserName FROM Users")
print("Updating PackageBases...") f.writelines([bytes(x[0] + "\n", "UTF-8") for x in cur.fetchall()])
# Produce pkgbase.gz
with gzip.open(pkgbasefile, "w") as f:
f.write(bytes(pkgbaselist_header + "\n", "UTF-8"))
cur = conn.execute("SELECT Name FROM PackageBases " +
"WHERE PackagerUID IS NOT NULL")
f.writelines([bytes(x[0] + "\n", "UTF-8") for x in cur.fetchall()])
update_state(pkgbases_state, update_time)
updated, update_time = should_update(users_state, "Users")
if not updated:
print("Updating Users...")
# Produce users.gz
with gzip.open(userfile, "w") as f:
f.write(bytes(userlist_header + "\n", "UTF-8"))
cur = conn.execute("SELECT UserName FROM Users")
f.writelines([bytes(x[0] + "\n", "UTF-8") for x in cur.fetchall()])
update_state(users_state, update_time)
conn.close() conn.close()