mirror of
https://gitlab.archlinux.org/archlinux/aurweb.git
synced 2025-02-03 10:43:03 +01:00
Move comments from the Packages table to PackageBases. Sharing comments makes sense since they almost always refer to a source package. Signed-off-by: Lukas Fleischer <archlinux@cryptocrack.de>
306 lines
9.1 KiB
Python
Executable file
306 lines
9.1 KiB
Python
Executable file
#!/usr/bin/python3
|
|
"""
|
|
usage: gendummydata.py outputfilename.sql
|
|
"""
|
|
#
|
|
# This script seeds the AUR database with dummy data for
|
|
# use during development/testing. It uses random entries
|
|
# from /usr/share/dict/words to create user accounts and
|
|
# package names. It generates the SQL statements to
|
|
# insert these users/packages into the AUR database.
|
|
#
|
|
import random
|
|
import time
|
|
import os
|
|
import sys
|
|
import io
|
|
import logging
|
|
|
|
LOG_LEVEL = logging.DEBUG # logging level. set to logging.INFO to reduce output
|
|
SEED_FILE = "/usr/share/dict/words"
|
|
DB_HOST = os.getenv("DB_HOST", "localhost")
|
|
DB_NAME = os.getenv("DB_NAME", "AUR")
|
|
DB_USER = os.getenv("DB_USER", "aur")
|
|
DB_PASS = os.getenv("DB_PASS", "aur")
|
|
USER_ID = 5 # Users.ID of first bogus user
|
|
PKG_ID = 1 # Packages.ID of first package
|
|
MAX_USERS = 300 # how many users to 'register'
|
|
MAX_DEVS = .1 # what percentage of MAX_USERS are Developers
|
|
MAX_TUS = .2 # what percentage of MAX_USERS are Trusted Users
|
|
MAX_PKGS = 900 # how many packages to load
|
|
PKG_DEPS = (1, 5) # min/max depends a package has
|
|
PKG_SRC = (1, 3) # min/max sources a package has
|
|
PKG_CMNTS = (1, 5) # min/max number of comments a package has
|
|
CATEGORIES_COUNT = 17 # the number of categories from aur-schema
|
|
VOTING = (0, .30) # percentage range for package voting
|
|
OPEN_PROPOSALS = 5 # number of open trusted user proposals
|
|
CLOSE_PROPOSALS = 15 # number of closed trusted user proposals
|
|
RANDOM_TLDS = ("edu", "com", "org", "net", "tw", "ru", "pl", "de", "es")
|
|
RANDOM_URL = ("http://www.", "ftp://ftp.", "http://", "ftp://")
|
|
RANDOM_LOCS = ("pub", "release", "files", "downloads", "src")
|
|
FORTUNE_FILE = "/usr/share/fortune/cookie"
|
|
|
|
# setup logging
|
|
logformat = "%(levelname)s: %(message)s"
|
|
logging.basicConfig(format=logformat, level=LOG_LEVEL)
|
|
log = logging.getLogger()
|
|
|
|
if len(sys.argv) != 2:
|
|
log.error("Missing output filename argument")
|
|
raise SystemExit
|
|
|
|
# make sure the seed file exists
|
|
#
|
|
if not os.path.exists(SEED_FILE):
|
|
log.error("Please install the 'words' Arch package")
|
|
raise SystemExit
|
|
|
|
# make sure comments can be created
|
|
#
|
|
if not os.path.exists(FORTUNE_FILE):
|
|
log.error("Please install the 'fortune-mod' Arch package")
|
|
raise SystemExit
|
|
|
|
# track what users/package names have been used
|
|
#
|
|
seen_users = {}
|
|
seen_pkgs = {}
|
|
user_keys = []
|
|
|
|
# some functions to generate random data
|
|
#
|
|
def genVersion():
|
|
ver = []
|
|
ver.append("%d" % random.randrange(0,10))
|
|
ver.append("%d" % random.randrange(0,20))
|
|
if random.randrange(0,2) == 0:
|
|
ver.append("%d" % random.randrange(0,100))
|
|
return ".".join(ver) + "-%d" % random.randrange(1,11)
|
|
def genCategory():
|
|
return random.randrange(1,CATEGORIES_COUNT)
|
|
def genUID():
|
|
return seen_users[user_keys[random.randrange(0,len(user_keys))]]
|
|
def genFortune():
|
|
return fortunes[random.randrange(0,len(fortunes))].replace("'", "")
|
|
|
|
|
|
# load the words, and make sure there are enough words for users/pkgs
|
|
#
|
|
log.debug("Grabbing words from seed file...")
|
|
fp = open(SEED_FILE, "r", encoding="utf-8")
|
|
contents = fp.readlines()
|
|
fp.close()
|
|
if MAX_USERS > len(contents):
|
|
MAX_USERS = len(contents)
|
|
if MAX_PKGS > len(contents):
|
|
MAX_PKGS = len(contents)
|
|
if len(contents) - MAX_USERS > MAX_PKGS:
|
|
need_dupes = 0
|
|
else:
|
|
need_dupes = 1
|
|
|
|
# select random usernames
|
|
#
|
|
log.debug("Generating random user names...")
|
|
user_id = USER_ID
|
|
while len(seen_users) < MAX_USERS:
|
|
user = random.randrange(0, len(contents))
|
|
word = contents[user].replace("'", "").replace(".","").replace(" ", "_")
|
|
word = word.strip().lower()
|
|
if word not in seen_users:
|
|
seen_users[word] = user_id
|
|
user_id += 1
|
|
user_keys = list(seen_users.keys())
|
|
|
|
# select random package names
|
|
#
|
|
log.debug("Generating random package names...")
|
|
num_pkgs = PKG_ID
|
|
while len(seen_pkgs) < MAX_PKGS:
|
|
pkg = random.randrange(0, len(contents))
|
|
word = contents[pkg].replace("'", "").replace(".","").replace(" ", "_")
|
|
word = word.strip().lower()
|
|
if not need_dupes:
|
|
if word not in seen_pkgs and word not in seen_users:
|
|
seen_pkgs[word] = num_pkgs
|
|
num_pkgs += 1
|
|
else:
|
|
if word not in seen_pkgs:
|
|
seen_pkgs[word] = num_pkgs
|
|
num_pkgs += 1
|
|
|
|
# free up contents memory
|
|
#
|
|
contents = None
|
|
|
|
# developer/tu IDs
|
|
#
|
|
developers = []
|
|
trustedusers = []
|
|
has_devs = 0
|
|
has_tus = 0
|
|
|
|
# Just let python throw the errors if any happen
|
|
#
|
|
out = open(sys.argv[1], "w", encoding="utf-8")
|
|
out.write("BEGIN;\n")
|
|
|
|
# Begin by creating the User statements
|
|
#
|
|
log.debug("Creating SQL statements for users.")
|
|
for u in user_keys:
|
|
account_type = 1 # default to normal user
|
|
if not has_devs or not has_tus:
|
|
account_type = random.randrange(1, 4)
|
|
if account_type == 3 and not has_devs:
|
|
# this will be a dev account
|
|
#
|
|
developers.append(seen_users[u])
|
|
if len(developers) >= MAX_DEVS * MAX_USERS:
|
|
has_devs = 1
|
|
elif account_type == 2 and not has_tus:
|
|
# this will be a trusted user account
|
|
#
|
|
trustedusers.append(seen_users[u])
|
|
if len(trustedusers) >= MAX_TUS * MAX_USERS:
|
|
has_tus = 1
|
|
else:
|
|
# a normal user account
|
|
#
|
|
pass
|
|
|
|
s = ("INSERT INTO Users (ID, AccountTypeID, Username, Email, Passwd)"
|
|
" VALUES (%d, %d, '%s', '%s@example.com', MD5('%s'));\n")
|
|
s = s % (seen_users[u], account_type, u, u, u)
|
|
out.write(s)
|
|
|
|
log.debug("Number of developers: %d" % len(developers))
|
|
log.debug("Number of trusted users: %d" % len(trustedusers))
|
|
log.debug("Number of users: %d" % (MAX_USERS-len(developers)-len(trustedusers)))
|
|
log.debug("Number of packages: %d" % MAX_PKGS)
|
|
|
|
log.debug("Gathering text from fortune file...")
|
|
fp = open(FORTUNE_FILE, "r", encoding="utf-8")
|
|
fortunes = fp.read().split("%\n")
|
|
fp.close()
|
|
|
|
# Create the package statements
|
|
#
|
|
log.debug("Creating SQL statements for packages.")
|
|
count = 0
|
|
for p in list(seen_pkgs.keys()):
|
|
NOW = int(time.time())
|
|
if count % 2 == 0:
|
|
muid = developers[random.randrange(0,len(developers))]
|
|
else:
|
|
muid = trustedusers[random.randrange(0,len(trustedusers))]
|
|
if count % 20 == 0: # every so often, there are orphans...
|
|
muid = "NULL"
|
|
|
|
uuid = genUID() # the submitter/user
|
|
|
|
s = ("INSERT INTO PackageBases (ID, Name, CategoryID, SubmittedTS, "
|
|
"SubmitterUID, MaintainerUID) VALUES (%d, '%s', %d, %d, %d, %s);\n")
|
|
s = s % (seen_pkgs[p], p, genCategory(), NOW, uuid, muid)
|
|
out.write(s)
|
|
|
|
s = ("INSERT INTO Packages (ID, PackageBaseID, Name, Version) VALUES "
|
|
"(%d, %d, '%s', '%s');\n")
|
|
s = s % (seen_pkgs[p], seen_pkgs[p], p, genVersion())
|
|
out.write(s)
|
|
|
|
count += 1
|
|
|
|
# create random comments for this package
|
|
#
|
|
num_comments = random.randrange(PKG_CMNTS[0], PKG_CMNTS[1])
|
|
for i in range(0, num_comments):
|
|
now = NOW + random.randrange(400, 86400*3)
|
|
s = ("INSERT INTO PackageComments (PackageBaseID, UsersID,"
|
|
" Comments, CommentTS) VALUES (%d, %d, '%s', %d);\n")
|
|
s = s % (seen_pkgs[p], genUID(), genFortune(), now)
|
|
out.write(s)
|
|
|
|
# Cast votes
|
|
#
|
|
track_votes = {}
|
|
log.debug("Casting votes for packages.")
|
|
for u in user_keys:
|
|
num_votes = random.randrange(int(len(seen_pkgs)*VOTING[0]),
|
|
int(len(seen_pkgs)*VOTING[1]))
|
|
pkgvote = {}
|
|
for v in range(num_votes):
|
|
pkg = random.randrange(1, len(seen_pkgs) + 1)
|
|
if pkg not in pkgvote:
|
|
s = ("INSERT INTO PackageVotes (UsersID, PackageBaseID)"
|
|
" VALUES (%d, %d);\n")
|
|
s = s % (seen_users[u], pkg)
|
|
pkgvote[pkg] = 1
|
|
if pkg not in track_votes:
|
|
track_votes[pkg] = 0
|
|
track_votes[pkg] += 1
|
|
out.write(s)
|
|
|
|
# Update statements for package votes
|
|
#
|
|
for p in list(track_votes.keys()):
|
|
s = "UPDATE PackageBases SET NumVotes = %d WHERE ID = %d;\n"
|
|
s = s % (track_votes[p], p)
|
|
out.write(s)
|
|
|
|
# Create package dependencies and sources
|
|
#
|
|
log.debug("Creating statements for package depends/sources.")
|
|
for p in list(seen_pkgs.keys()):
|
|
num_deps = random.randrange(PKG_DEPS[0], PKG_DEPS[1])
|
|
this_deps = {}
|
|
i = 0
|
|
while i != num_deps:
|
|
dep = random.choice([k for k in seen_pkgs])
|
|
if dep not in this_deps:
|
|
s = "INSERT INTO PackageDepends VALUES (%d, '%s', NULL);\n"
|
|
s = s % (seen_pkgs[p], dep)
|
|
out.write(s)
|
|
i += 1
|
|
|
|
num_sources = random.randrange(PKG_SRC[0], PKG_SRC[1])
|
|
for i in range(num_sources):
|
|
src_file = user_keys[random.randrange(0, len(user_keys))]
|
|
src = "%s%s.%s/%s/%s-%s.tar.gz" % (
|
|
RANDOM_URL[random.randrange(0,len(RANDOM_URL))],
|
|
p, RANDOM_TLDS[random.randrange(0,len(RANDOM_TLDS))],
|
|
RANDOM_LOCS[random.randrange(0,len(RANDOM_LOCS))],
|
|
src_file, genVersion())
|
|
s = "INSERT INTO PackageSources VALUES (%d, '%s');\n"
|
|
s = s % (seen_pkgs[p], src)
|
|
out.write(s)
|
|
|
|
# Create trusted user proposals
|
|
#
|
|
log.debug("Creating SQL statements for trusted user proposals.")
|
|
count=0
|
|
for t in range(0, OPEN_PROPOSALS+CLOSE_PROPOSALS):
|
|
now = int(time.time())
|
|
if count < CLOSE_PROPOSALS:
|
|
start = now - random.randrange(3600*24*7, 3600*24*21)
|
|
end = now - random.randrange(0, 3600*24*7)
|
|
else:
|
|
start = now
|
|
end = now + random.randrange(3600*24, 3600*24*7)
|
|
if count % 5 == 0: # Don't make the vote about anyone once in a while
|
|
user = ""
|
|
else:
|
|
user = user_keys[random.randrange(0,len(user_keys))]
|
|
suid = trustedusers[random.randrange(0,len(trustedusers))]
|
|
s = ("INSERT INTO TU_VoteInfo (Agenda, User, Submitted, End,"
|
|
" SubmitterID) VALUES ('%s', '%s', %d, %d, %d);\n")
|
|
s = s % (genFortune(), user, start, end, suid)
|
|
out.write(s)
|
|
count += 1
|
|
|
|
# close output file
|
|
#
|
|
out.write("COMMIT;\n")
|
|
out.write("\n")
|
|
out.close()
|
|
log.debug("Done.")
|