mirror of
https://gitlab.archlinux.org/archlinux/aurweb.git
synced 2025-02-03 10:43:03 +01:00
fix(mkpkglists): remove caching
We really need caching for this; however, our current caching method will cause the script to bypass changes to columns if they have nothing to do with IDs. Signed-off-by: Kevin Morris <kevr@0cost.org>
This commit is contained in:
parent
cdca8bd295
commit
9f1f399957
1 changed files with 54 additions and 105 deletions
|
@ -20,60 +20,23 @@ on the following, right-hand side fields are added to each item.
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
import gzip
|
import gzip
|
||||||
import os
|
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from decimal import Decimal
|
from decimal import Decimal
|
||||||
from typing import Tuple
|
|
||||||
|
|
||||||
import orjson
|
import orjson
|
||||||
|
|
||||||
import aurweb.config
|
import aurweb.config
|
||||||
import aurweb.db
|
import aurweb.db
|
||||||
|
|
||||||
|
|
||||||
def state_path(archive: str) -> str:
|
|
||||||
# A hard-coded /tmp state directory.
|
|
||||||
# TODO: Use Redis cache to store this state after we merge
|
|
||||||
# FastAPI into master and removed PHP from the tree.
|
|
||||||
return os.path.join("/tmp", os.path.basename(archive) + ".state")
|
|
||||||
|
|
||||||
|
|
||||||
packagesfile = aurweb.config.get('mkpkglists', 'packagesfile')
|
packagesfile = aurweb.config.get('mkpkglists', 'packagesfile')
|
||||||
packagesmetafile = aurweb.config.get('mkpkglists', 'packagesmetafile')
|
packagesmetafile = aurweb.config.get('mkpkglists', 'packagesmetafile')
|
||||||
packagesmetaextfile = aurweb.config.get('mkpkglists', 'packagesmetaextfile')
|
packagesmetaextfile = aurweb.config.get('mkpkglists', 'packagesmetaextfile')
|
||||||
packages_state = state_path(packagesfile)
|
|
||||||
|
|
||||||
pkgbasefile = aurweb.config.get('mkpkglists', 'pkgbasefile')
|
pkgbasefile = aurweb.config.get('mkpkglists', 'pkgbasefile')
|
||||||
pkgbases_state = state_path(pkgbasefile)
|
|
||||||
|
|
||||||
userfile = aurweb.config.get('mkpkglists', 'userfile')
|
userfile = aurweb.config.get('mkpkglists', 'userfile')
|
||||||
users_state = state_path(userfile)
|
|
||||||
|
|
||||||
|
|
||||||
def should_update(state: str, tablename: str) -> Tuple[bool, int]:
|
|
||||||
if aurweb.config.get("database", "backend") != "mysql":
|
|
||||||
return (False, 0)
|
|
||||||
|
|
||||||
db_name = aurweb.config.get("database", "name")
|
|
||||||
conn = aurweb.db.Connection()
|
|
||||||
cur = conn.execute("SELECT auto_increment FROM information_schema.tables "
|
|
||||||
"WHERE table_schema = ? AND table_name = ?",
|
|
||||||
(db_name, tablename,))
|
|
||||||
update_time = cur.fetchone()[0]
|
|
||||||
|
|
||||||
saved_update_time = 0
|
|
||||||
if os.path.exists(state):
|
|
||||||
with open(state) as f:
|
|
||||||
saved_update_time = int(f.read().strip())
|
|
||||||
|
|
||||||
return (saved_update_time == update_time, update_time)
|
|
||||||
|
|
||||||
|
|
||||||
def update_state(state: str, update_time: int) -> None:
|
|
||||||
with open(state, "w") as f:
|
|
||||||
f.write(str(update_time))
|
|
||||||
|
|
||||||
|
|
||||||
TYPE_MAP = {
|
TYPE_MAP = {
|
||||||
|
@ -197,83 +160,69 @@ def main():
|
||||||
pkgbaselist_header = "# AUR package base list, generated on " + datestr
|
pkgbaselist_header = "# AUR package base list, generated on " + datestr
|
||||||
userlist_header = "# AUR user name list, generated on " + datestr
|
userlist_header = "# AUR user name list, generated on " + datestr
|
||||||
|
|
||||||
updated, update_time = should_update(packages_state, "Packages")
|
# Query columns; copied from RPC.
|
||||||
if not updated:
|
columns = ("Packages.ID, Packages.Name, "
|
||||||
print("Updating Packages...")
|
"PackageBases.ID AS PackageBaseID, "
|
||||||
|
"PackageBases.Name AS PackageBase, "
|
||||||
|
"Version, Description, URL, NumVotes, "
|
||||||
|
"Popularity, OutOfDateTS AS OutOfDate, "
|
||||||
|
"Users.UserName AS Maintainer, "
|
||||||
|
"SubmittedTS AS FirstSubmitted, "
|
||||||
|
"ModifiedTS AS LastModified")
|
||||||
|
|
||||||
# Query columns; copied from RPC.
|
# Perform query.
|
||||||
columns = ("Packages.ID, Packages.Name, "
|
cur = conn.execute(f"SELECT {columns} FROM Packages "
|
||||||
"PackageBases.ID AS PackageBaseID, "
|
"LEFT JOIN PackageBases "
|
||||||
"PackageBases.Name AS PackageBase, "
|
"ON PackageBases.ID = Packages.PackageBaseID "
|
||||||
"Version, Description, URL, NumVotes, "
|
"LEFT JOIN Users "
|
||||||
"Popularity, OutOfDateTS AS OutOfDate, "
|
"ON PackageBases.MaintainerUID = Users.ID "
|
||||||
"Users.UserName AS Maintainer, "
|
"WHERE PackageBases.PackagerUID IS NOT NULL")
|
||||||
"SubmittedTS AS FirstSubmitted, "
|
|
||||||
"ModifiedTS AS LastModified")
|
|
||||||
|
|
||||||
# Perform query.
|
# Produce packages-meta-v1.json.gz
|
||||||
cur = conn.execute(f"SELECT {columns} FROM Packages "
|
output = list()
|
||||||
"LEFT JOIN PackageBases "
|
snapshot_uri = aurweb.config.get("options", "snapshot_uri")
|
||||||
"ON PackageBases.ID = Packages.PackageBaseID "
|
for result in cur.fetchall():
|
||||||
"LEFT JOIN Users "
|
item = {
|
||||||
"ON PackageBases.MaintainerUID = Users.ID "
|
column[0]: is_decimal(result[i])
|
||||||
"WHERE PackageBases.PackagerUID IS NOT NULL")
|
for i, column in enumerate(cur.description)
|
||||||
|
}
|
||||||
|
item["URLPath"] = snapshot_uri % item.get("Name")
|
||||||
|
output.append(item)
|
||||||
|
|
||||||
# Produce packages-meta-v1.json.gz
|
write_archive(packagesmetafile, output)
|
||||||
output = list()
|
|
||||||
snapshot_uri = aurweb.config.get("options", "snapshot_uri")
|
|
||||||
for result in cur.fetchall():
|
|
||||||
item = {
|
|
||||||
column[0]: is_decimal(result[i])
|
|
||||||
for i, column in enumerate(cur.description)
|
|
||||||
}
|
|
||||||
item["URLPath"] = snapshot_uri % item.get("Name")
|
|
||||||
output.append(item)
|
|
||||||
|
|
||||||
write_archive(packagesmetafile, output)
|
# Produce packages-meta-ext-v1.json.gz
|
||||||
|
if len(sys.argv) > 1 and sys.argv[1] in EXTENDED_FIELD_HANDLERS:
|
||||||
|
f = EXTENDED_FIELD_HANDLERS.get(sys.argv[1])
|
||||||
|
data = f()
|
||||||
|
|
||||||
# Produce packages-meta-ext-v1.json.gz
|
default_ = {"Groups": [], "License": [], "Keywords": []}
|
||||||
if len(sys.argv) > 1 and sys.argv[1] in EXTENDED_FIELD_HANDLERS:
|
for i in range(len(output)):
|
||||||
f = EXTENDED_FIELD_HANDLERS.get(sys.argv[1])
|
data_ = data.get(output[i].get("ID"), default_)
|
||||||
data = f()
|
output[i].update(data_)
|
||||||
|
|
||||||
default_ = {"Groups": [], "License": [], "Keywords": []}
|
write_archive(packagesmetaextfile, output)
|
||||||
for i in range(len(output)):
|
|
||||||
data_ = data.get(output[i].get("ID"), default_)
|
|
||||||
output[i].update(data_)
|
|
||||||
|
|
||||||
write_archive(packagesmetaextfile, output)
|
# Produce packages.gz
|
||||||
|
with gzip.open(packagesfile, "wb") as f:
|
||||||
|
f.write(bytes(pkglist_header + "\n", "UTF-8"))
|
||||||
|
f.writelines([
|
||||||
|
bytes(x.get("Name") + "\n", "UTF-8")
|
||||||
|
for x in output
|
||||||
|
])
|
||||||
|
|
||||||
# Produce packages.gz
|
# Produce pkgbase.gz
|
||||||
with gzip.open(packagesfile, "wb") as f:
|
with gzip.open(pkgbasefile, "w") as f:
|
||||||
f.write(bytes(pkglist_header + "\n", "UTF-8"))
|
f.write(bytes(pkgbaselist_header + "\n", "UTF-8"))
|
||||||
f.writelines([
|
cur = conn.execute("SELECT Name FROM PackageBases " +
|
||||||
bytes(x.get("Name") + "\n", "UTF-8")
|
"WHERE PackagerUID IS NOT NULL")
|
||||||
for x in output
|
f.writelines([bytes(x[0] + "\n", "UTF-8") for x in cur.fetchall()])
|
||||||
])
|
|
||||||
|
|
||||||
update_state(packages_state, update_time)
|
# Produce users.gz
|
||||||
|
with gzip.open(userfile, "w") as f:
|
||||||
updated, update_time = should_update(pkgbases_state, "PackageBases")
|
f.write(bytes(userlist_header + "\n", "UTF-8"))
|
||||||
if not updated:
|
cur = conn.execute("SELECT UserName FROM Users")
|
||||||
print("Updating PackageBases...")
|
f.writelines([bytes(x[0] + "\n", "UTF-8") for x in cur.fetchall()])
|
||||||
# Produce pkgbase.gz
|
|
||||||
with gzip.open(pkgbasefile, "w") as f:
|
|
||||||
f.write(bytes(pkgbaselist_header + "\n", "UTF-8"))
|
|
||||||
cur = conn.execute("SELECT Name FROM PackageBases " +
|
|
||||||
"WHERE PackagerUID IS NOT NULL")
|
|
||||||
f.writelines([bytes(x[0] + "\n", "UTF-8") for x in cur.fetchall()])
|
|
||||||
update_state(pkgbases_state, update_time)
|
|
||||||
|
|
||||||
updated, update_time = should_update(users_state, "Users")
|
|
||||||
if not updated:
|
|
||||||
print("Updating Users...")
|
|
||||||
# Produce users.gz
|
|
||||||
with gzip.open(userfile, "w") as f:
|
|
||||||
f.write(bytes(userlist_header + "\n", "UTF-8"))
|
|
||||||
cur = conn.execute("SELECT UserName FROM Users")
|
|
||||||
f.writelines([bytes(x[0] + "\n", "UTF-8") for x in cur.fetchall()])
|
|
||||||
update_state(users_state, update_time)
|
|
||||||
|
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue