mirror of
https://github.com/Lynnesbian/FediBooks/
synced 2024-11-26 00:58:59 +00:00
Compare commits
No commits in common. "954544205ec7f794d4a93f0b8a56ef49da60c816" and "d9d7f751c646984975d8d52615d04f7e3071ad38" have entirely different histories.
954544205e
...
d9d7f751c6
4 changed files with 20 additions and 48 deletions
|
@ -1,7 +1,5 @@
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
import MySQLdb
|
import MySQLdb
|
||||||
from pebble import ProcessPool
|
|
||||||
from concurrent.futures import TimeoutError
|
|
||||||
import markovify
|
import markovify
|
||||||
import requests
|
import requests
|
||||||
from Crypto.PublicKey import RSA
|
from Crypto.PublicKey import RSA
|
||||||
|
@ -48,9 +46,7 @@ def generate_output(handle):
|
||||||
host = cfg['db_host'],
|
host = cfg['db_host'],
|
||||||
user=cfg['db_user'],
|
user=cfg['db_user'],
|
||||||
passwd=cfg['db_pass'],
|
passwd=cfg['db_pass'],
|
||||||
db=cfg['db_name'],
|
db=cfg['db_name']
|
||||||
use_unicode=True,
|
|
||||||
charset="utf8mb4"
|
|
||||||
)
|
)
|
||||||
# print("Generating post for {}".format(handle))
|
# print("Generating post for {}".format(handle))
|
||||||
dc = db.cursor(MySQLdb.cursors.DictCursor)
|
dc = db.cursor(MySQLdb.cursors.DictCursor)
|
||||||
|
@ -92,7 +88,7 @@ def generate_output(handle):
|
||||||
posts = "\n".join(list(sum(c.fetchall(), ())))
|
posts = "\n".join(list(sum(c.fetchall(), ())))
|
||||||
if len(posts) == 0:
|
if len(posts) == 0:
|
||||||
print("{} - No posts to learn from.".format(handle))
|
print("{} - No posts to learn from.".format(handle))
|
||||||
return bot, None
|
return
|
||||||
|
|
||||||
if bot['fake_mentions'] == 'never':
|
if bot['fake_mentions'] == 'never':
|
||||||
# remove all mentions from the training data before the markov model sees it
|
# remove all mentions from the training data before the markov model sees it
|
||||||
|
@ -103,8 +99,8 @@ def generate_output(handle):
|
||||||
post = None
|
post = None
|
||||||
|
|
||||||
# even with such a high tries value for markovify, it still sometimes returns none.
|
# even with such a high tries value for markovify, it still sometimes returns none.
|
||||||
# so we implement our own tries function as well, and try five times.
|
# so we implement our own tries function as well, and try ten times.
|
||||||
while post is None and tries < 5:
|
while post is None and tries < 10:
|
||||||
post = model.make_short_sentence(bot['length'], tries = 1000)
|
post = model.make_short_sentence(bot['length'], tries = 1000)
|
||||||
tries += 1
|
tries += 1
|
||||||
|
|
||||||
|
@ -142,10 +138,6 @@ def make_post(args):
|
||||||
|
|
||||||
bot, post = generate_output(handle)
|
bot, post = generate_output(handle)
|
||||||
|
|
||||||
# post will be None if there's no posts for the bot to learn from.
|
|
||||||
# in such a case, we should just exit without doing anything.
|
|
||||||
if post == None: return
|
|
||||||
|
|
||||||
client = Mastodon(
|
client = Mastodon(
|
||||||
client_id = bot['client_id'],
|
client_id = bot['client_id'],
|
||||||
client_secret = bot['client_secret'],
|
client_secret = bot['client_secret'],
|
||||||
|
@ -157,9 +149,7 @@ def make_post(args):
|
||||||
host = cfg['db_host'],
|
host = cfg['db_host'],
|
||||||
user=cfg['db_user'],
|
user=cfg['db_user'],
|
||||||
passwd=cfg['db_pass'],
|
passwd=cfg['db_pass'],
|
||||||
db=cfg['db_name'],
|
db=cfg['db_name']
|
||||||
use_unicode=True,
|
|
||||||
charset="utf8mb4"
|
|
||||||
)
|
)
|
||||||
c = db.cursor()
|
c = db.cursor()
|
||||||
|
|
||||||
|
@ -183,7 +173,7 @@ def make_post(args):
|
||||||
# this needs to be dealt with properly later on, but for now, we'll just disable the bot
|
# this needs to be dealt with properly later on, but for now, we'll just disable the bot
|
||||||
c.execute("UPDATE bots SET enabled = FALSE WHERE handle = %s", (handle,))
|
c.execute("UPDATE bots SET enabled = FALSE WHERE handle = %s", (handle,))
|
||||||
except:
|
except:
|
||||||
print("Failed to submit post for {}".format(handle))
|
print("Failed to create post for {}".format(handle))
|
||||||
|
|
||||||
if id == None:
|
if id == None:
|
||||||
# this wasn't a reply, it was a regular post, so update the last post date
|
# this wasn't a reply, it was a regular post, so update the last post date
|
||||||
|
@ -191,31 +181,12 @@ def make_post(args):
|
||||||
db.commit()
|
db.commit()
|
||||||
c.close()
|
c.close()
|
||||||
|
|
||||||
def do_in_pool(function, data, timeout=30, silent=False):
|
|
||||||
with ProcessPool(max_workers=cfg['service_threads']) as p:
|
|
||||||
index = 0
|
|
||||||
future = p.map(function, data)
|
|
||||||
iterator = future.result()
|
|
||||||
|
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
result = next(iterator)
|
|
||||||
except StopIteration:
|
|
||||||
# all threads are done
|
|
||||||
break
|
|
||||||
except TimeoutError as error:
|
|
||||||
if not silent: print("Timed out on {}.".format(data[index]))
|
|
||||||
finally:
|
|
||||||
index += 1
|
|
||||||
|
|
||||||
def get_key():
|
def get_key():
|
||||||
db = MySQLdb.connect(
|
db = MySQLdb.connect(
|
||||||
host = cfg['db_host'],
|
host = cfg['db_host'],
|
||||||
user=cfg['db_user'],
|
user=cfg['db_user'],
|
||||||
passwd=cfg['db_pass'],
|
passwd=cfg['db_pass'],
|
||||||
db=cfg['db_name'],
|
db=cfg['db_name']
|
||||||
use_unicode=True,
|
|
||||||
charset="utf8mb4"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
dc = db.cursor(MySQLdb.cursors.DictCursor)
|
dc = db.cursor(MySQLdb.cursors.DictCursor)
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
|
|
||||||
import MySQLdb
|
import MySQLdb
|
||||||
import requests
|
import requests
|
||||||
|
from multiprocessing import Pool
|
||||||
import json, re
|
import json, re
|
||||||
import functions
|
import functions
|
||||||
|
|
||||||
|
@ -130,7 +131,7 @@ cursor.execute("SELECT `handle`, `outbox` FROM `fedi_accounts` ORDER BY RAND()")
|
||||||
accounts = cursor.fetchall()
|
accounts = cursor.fetchall()
|
||||||
cursor.close()
|
cursor.close()
|
||||||
db.close()
|
db.close()
|
||||||
|
with Pool(cfg['service_threads']) as p:
|
||||||
functions.do_in_pool(scrape_posts, accounts, timeout=60)
|
p.map(scrape_posts, accounts)
|
||||||
|
|
||||||
print("Done!")
|
print("Done!")
|
||||||
|
|
|
@ -1,10 +1,9 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
import json
|
|
||||||
|
|
||||||
import MySQLdb
|
import MySQLdb
|
||||||
from mastodon import Mastodon
|
from mastodon import Mastodon
|
||||||
|
from multiprocessing import Pool
|
||||||
import requests
|
import requests
|
||||||
|
import json
|
||||||
import functions
|
import functions
|
||||||
|
|
||||||
cfg = json.load(open('config.json'))
|
cfg = json.load(open('config.json'))
|
||||||
|
@ -74,7 +73,8 @@ cursor.execute("SELECT handle FROM bots WHERE enabled = TRUE AND TIMESTAMPDIFF(M
|
||||||
# cursor.execute("SELECT handle FROM bots WHERE enabled = TRUE")
|
# cursor.execute("SELECT handle FROM bots WHERE enabled = TRUE")
|
||||||
bots = cursor.fetchall()
|
bots = cursor.fetchall()
|
||||||
|
|
||||||
functions.do_in_pool(functions.make_post, bots, 15)
|
with Pool(cfg['service_threads']) as p:
|
||||||
|
p.map(functions.make_post, bots)
|
||||||
|
|
||||||
print("Updating cached icons")
|
print("Updating cached icons")
|
||||||
dc = db.cursor(MySQLdb.cursors.DictCursor)
|
dc = db.cursor(MySQLdb.cursors.DictCursor)
|
||||||
|
@ -86,6 +86,7 @@ ON bots.credentials_id = credentials.id
|
||||||
WHERE TIMESTAMPDIFF(HOUR, icon_update_time, CURRENT_TIMESTAMP()) > 2""")
|
WHERE TIMESTAMPDIFF(HOUR, icon_update_time, CURRENT_TIMESTAMP()) > 2""")
|
||||||
bots = dc.fetchall()
|
bots = dc.fetchall()
|
||||||
|
|
||||||
functions.do_in_pool(update_icon, bots)
|
with Pool(cfg['service_threads']) as p:
|
||||||
|
p.map(update_icon, bots)
|
||||||
|
|
||||||
db.commit()
|
db.commit()
|
||||||
|
|
|
@ -9,4 +9,3 @@ requests==2.23.0
|
||||||
http-ece==1.1.0
|
http-ece==1.1.0
|
||||||
pycryptodome==3.9.7
|
pycryptodome==3.9.7
|
||||||
cryptography==2.9.2
|
cryptography==2.9.2
|
||||||
pebble==4.5.3
|
|
||||||
|
|
Loading…
Reference in a new issue