Compare commits

...

4 Commits

4 changed files with 48 additions and 20 deletions

View File

@ -1,5 +1,7 @@
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import MySQLdb import MySQLdb
from pebble import ProcessPool
from concurrent.futures import TimeoutError
import markovify import markovify
import requests import requests
from Crypto.PublicKey import RSA from Crypto.PublicKey import RSA
@ -46,7 +48,9 @@ def generate_output(handle):
host = cfg['db_host'], host = cfg['db_host'],
user=cfg['db_user'], user=cfg['db_user'],
passwd=cfg['db_pass'], passwd=cfg['db_pass'],
db=cfg['db_name'] db=cfg['db_name'],
use_unicode=True,
charset="utf8mb4"
) )
# print("Generating post for {}".format(handle)) # print("Generating post for {}".format(handle))
dc = db.cursor(MySQLdb.cursors.DictCursor) dc = db.cursor(MySQLdb.cursors.DictCursor)
@ -88,7 +92,7 @@ def generate_output(handle):
posts = "\n".join(list(sum(c.fetchall(), ()))) posts = "\n".join(list(sum(c.fetchall(), ())))
if len(posts) == 0: if len(posts) == 0:
print("{} - No posts to learn from.".format(handle)) print("{} - No posts to learn from.".format(handle))
return return bot, None
if bot['fake_mentions'] == 'never': if bot['fake_mentions'] == 'never':
# remove all mentions from the training data before the markov model sees it # remove all mentions from the training data before the markov model sees it
@ -99,8 +103,8 @@ def generate_output(handle):
post = None post = None
# even with such a high tries value for markovify, it still sometimes returns none. # even with such a high tries value for markovify, it still sometimes returns none.
# so we implement our own tries function as well, and try ten times. # so we implement our own tries function as well, and try five times.
while post is None and tries < 10: while post is None and tries < 5:
post = model.make_short_sentence(bot['length'], tries = 1000) post = model.make_short_sentence(bot['length'], tries = 1000)
tries += 1 tries += 1
@ -138,6 +142,10 @@ def make_post(args):
bot, post = generate_output(handle) bot, post = generate_output(handle)
# post will be None if there's no posts for the bot to learn from.
# in such a case, we should just exit without doing anything.
if post == None: return
client = Mastodon( client = Mastodon(
client_id = bot['client_id'], client_id = bot['client_id'],
client_secret = bot['client_secret'], client_secret = bot['client_secret'],
@ -149,7 +157,9 @@ def make_post(args):
host = cfg['db_host'], host = cfg['db_host'],
user=cfg['db_user'], user=cfg['db_user'],
passwd=cfg['db_pass'], passwd=cfg['db_pass'],
db=cfg['db_name'] db=cfg['db_name'],
use_unicode=True,
charset="utf8mb4"
) )
c = db.cursor() c = db.cursor()
@ -173,7 +183,7 @@ def make_post(args):
# this needs to be dealt with properly later on, but for now, we'll just disable the bot # this needs to be dealt with properly later on, but for now, we'll just disable the bot
c.execute("UPDATE bots SET enabled = FALSE WHERE handle = %s", (handle,)) c.execute("UPDATE bots SET enabled = FALSE WHERE handle = %s", (handle,))
except: except:
print("Failed to create post for {}".format(handle)) print("Failed to submit post for {}".format(handle))
if id == None: if id == None:
# this wasn't a reply, it was a regular post, so update the last post date # this wasn't a reply, it was a regular post, so update the last post date
@ -181,12 +191,31 @@ def make_post(args):
db.commit() db.commit()
c.close() c.close()
def do_in_pool(function, data, timeout=30, silent=False):
with ProcessPool(max_workers=cfg['service_threads']) as p:
index = 0
future = p.map(function, data)
iterator = future.result()
while True:
try:
result = next(iterator)
except StopIteration:
# all threads are done
break
except TimeoutError as error:
if not silent: print("Timed out on {}.".format(data[index]))
finally:
index += 1
def get_key(): def get_key():
db = MySQLdb.connect( db = MySQLdb.connect(
host = cfg['db_host'], host = cfg['db_host'],
user=cfg['db_user'], user=cfg['db_user'],
passwd=cfg['db_pass'], passwd=cfg['db_pass'],
db=cfg['db_name'] db=cfg['db_name'],
use_unicode=True,
charset="utf8mb4"
) )
dc = db.cursor(MySQLdb.cursors.DictCursor) dc = db.cursor(MySQLdb.cursors.DictCursor)
@ -199,9 +228,9 @@ def get_key():
key['private'] = privkey.exportKey('PEM').decode('utf-8') key['private'] = privkey.exportKey('PEM').decode('utf-8')
key['public'] = privkey.publickey().exportKey('PEM').decode('utf-8') key['public'] = privkey.publickey().exportKey('PEM').decode('utf-8')
dc.execute("INSERT INTO http_auth_key (private, public) VALUES (%s, %s)", (key['private'], key['public'])) dc.execute("INSERT INTO http_auth_key (private, public) VALUES (%s, %s)", (key['private'], key['public']))
dc.close() dc.close()
db.commit() db.commit()
@ -243,4 +272,4 @@ def signed_get(url, timeout = 10, additional_headers = {}, request_json = True):
r = requests.Request('GET', url, headers) r = requests.Request('GET', url, headers)
return r.headers return r.headers
# return requests.get(url, timeout = timeout) # return requests.get(url, timeout = timeout)

View File

@ -2,7 +2,6 @@
import MySQLdb import MySQLdb
import requests import requests
from multiprocessing import Pool
import json, re import json, re
import functions import functions
@ -131,7 +130,7 @@ cursor.execute("SELECT `handle`, `outbox` FROM `fedi_accounts` ORDER BY RAND()")
accounts = cursor.fetchall() accounts = cursor.fetchall()
cursor.close() cursor.close()
db.close() db.close()
with Pool(cfg['service_threads']) as p:
p.map(scrape_posts, accounts) functions.do_in_pool(scrape_posts, accounts, timeout=60)
print("Done!") print("Done!")

View File

@ -1,9 +1,10 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import json
import MySQLdb import MySQLdb
from mastodon import Mastodon from mastodon import Mastodon
from multiprocessing import Pool
import requests import requests
import json
import functions import functions
cfg = json.load(open('config.json')) cfg = json.load(open('config.json'))
@ -73,8 +74,7 @@ cursor.execute("SELECT handle FROM bots WHERE enabled = TRUE AND TIMESTAMPDIFF(M
# cursor.execute("SELECT handle FROM bots WHERE enabled = TRUE") # cursor.execute("SELECT handle FROM bots WHERE enabled = TRUE")
bots = cursor.fetchall() bots = cursor.fetchall()
with Pool(cfg['service_threads']) as p: functions.do_in_pool(functions.make_post, bots, 15)
p.map(functions.make_post, bots)
print("Updating cached icons") print("Updating cached icons")
dc = db.cursor(MySQLdb.cursors.DictCursor) dc = db.cursor(MySQLdb.cursors.DictCursor)
@ -86,7 +86,6 @@ ON bots.credentials_id = credentials.id
WHERE TIMESTAMPDIFF(HOUR, icon_update_time, CURRENT_TIMESTAMP()) > 2""") WHERE TIMESTAMPDIFF(HOUR, icon_update_time, CURRENT_TIMESTAMP()) > 2""")
bots = dc.fetchall() bots = dc.fetchall()
with Pool(cfg['service_threads']) as p: functions.do_in_pool(update_icon, bots)
p.map(update_icon, bots)
db.commit() db.commit()

View File

@ -7,5 +7,6 @@ flask-mysqldb==0.2.0
bcrypt == 3.1.7 bcrypt == 3.1.7
requests==2.23.0 requests==2.23.0
http-ece==1.1.0 http-ece==1.1.0
pycryptodome==3.9.7 pycryptodome==3.9.7
cryptography==2.9.2 cryptography==2.9.2
pebble==4.5.3