1
0
Fork 0
mirror of https://github.com/Lynnesbian/FediBooks/ synced 2024-11-25 16:48:58 +00:00

moved make_post to functions.py

This commit is contained in:
Lynne Megido 2019-09-09 13:39:28 +10:00
parent 436a911397
commit 7b5227c3dc
2 changed files with 78 additions and 67 deletions

View file

@ -1,5 +1,14 @@
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import html, re import MySQLdb
import markovify
from mastodon import Mastodon
import html, re, json
cfg = json.load(open('config.json'))
class nlt_fixed(markovify.NewlineText): # modified version of NewlineText that never rejects sentences
def test_sentence_input(self, sentence):
return True # all sentences are valid <3
def extract_post(post): def extract_post(post):
post = html.unescape(post) # convert HTML escape codes to text post = html.unescape(post) # convert HTML escape codes to text
@ -24,3 +33,70 @@ def extract_post(post):
text = re.sub("https://([^/]+)/users/([^ ]+)", r"@\2@\1", text) # put pleroma-style mentions back in text = re.sub("https://([^/]+)/users/([^ ]+)", r"@\2@\1", text) # put pleroma-style mentions back in
text = text.rstrip("\n") # remove trailing newline(s) text = text.rstrip("\n") # remove trailing newline(s)
return text return text
def make_post(handle):
handle = handle[0]
db = MySQLdb.connect(
host = cfg['db_host'],
user=cfg['db_user'],
passwd=cfg['db_pass'],
db=cfg['db_name']
)
print("Generating post for {}".format(handle))
c = db.cursor()
c.execute("""
SELECT
learn_from_cw, client_id, client_secret, secret
FROM
bots, credentials
WHERE
bots.credentials_id = (SELECT
credentials_id
FROM
bots
WHERE
handle = %s)
""", (handle,))
bot = c.fetchone()
client = Mastodon(
client_id = bot[1],
client_secret = bot[2],
access_token = bot[3],
api_base_url = "https://{}".format(handle.split("@")[2])
)
# by default, only select posts that don't have CWs.
# if learn_from_cw, then also select posts with CWs
cw_list = [False]
if bot[0]:
cw_list = [False, True]
# select 1000 random posts for the bot to learn from
c.execute("SELECT content FROM posts WHERE fedi_id IN (SELECT fedi_id FROM bot_learned_accounts WHERE bot_id = %s) AND cw IN %s ORDER BY RAND() LIMIT 1000", (handle, cw_list))
# this line is a little gross/optimised but here's what it does
# 1. fetch all of the results from the above query
# 2. turn (('this',), ('format')) into ('this', 'format')
# 3. convert the tuple to a list
# 4. join the list into a string separated by newlines
posts = "\n".join(list(sum(c.fetchall(), ())))
model = nlt_fixed(posts)
tries = 0
sentence = None
# even with such a high tries value for markovify, it still sometimes returns none.
# so we implement our own tries function as well, and try ten times.
while sentence is None and tries < 10:
sentence = model.make_short_sentence(500, tries = 10000)
tries += 1
# TODO: mention handling
if sentence == None:
# TODO: send an error email
pass
else:
client.status_post(sentence)
# TODO: update date of last post

View file

@ -9,10 +9,6 @@ import functions
cfg = json.load(open('config.json')) cfg = json.load(open('config.json'))
class nlt_fixed(markovify.NewlineText): # modified version of NewlineText that never rejects sentences
def test_sentence_input(self, sentence):
return True # all sentences are valid <3
def scrape_posts(account): def scrape_posts(account):
handle = account[0] handle = account[0]
outbox = account[1] outbox = account[1]
@ -88,67 +84,6 @@ def scrape_posts(account):
db.commit() db.commit()
c.close() c.close()
def make_post(handle):
handle = handle[0]
print("Generating post for {}".format(handle))
c = db.cursor()
c.execute("""
SELECT
learn_from_cw, client_id, client_secret, secret
FROM
bots, credentials
WHERE
bots.credentials_id = (SELECT
credentials_id
FROM
bots
WHERE
handle = %s)
""", (handle,))
bot = c.fetchone()
client = Mastodon(
client_id = bot[1],
client_secret = bot[2],
access_token = bot[3],
api_base_url = "https://{}".format(handle.split("@")[2])
)
# by default, only select posts that don't have CWs.
# if learn_from_cw, then also select posts with CWs
cw_list = [False]
if bot[0]:
cw_list = [False, True]
# select 1000 random posts for the bot to learn from
c.execute("SELECT content FROM posts WHERE fedi_id IN (SELECT fedi_id FROM bot_learned_accounts WHERE bot_id = %s) AND cw IN %s ORDER BY RAND() LIMIT 1000", (handle, cw_list))
# this line is a little gross/optimised but here's what it does
# 1. fetch all of the results from the above query
# 2. turn (('this',), ('format')) into ('this', 'format')
# 3. convert the tuple to a list
# 4. join the list into a string separated by newlines
posts = "\n".join(list(sum(c.fetchall(), ())))
model = nlt_fixed(posts)
tries = 0
sentence = None
# even with such a high tries value for markovify, it still sometimes returns none.
# so we implement our own tries function as well, and try ten times.
while sentence is None and tries < 10:
sentence = model.make_short_sentence(500, tries = 10000)
tries += 1
# TODO: mention handling
if sentence == None:
# TODO: send an error email
pass
else:
client.status_post(sentence)
# TODO: update date of last post
print("Establishing DB connection") print("Establishing DB connection")
db = MySQLdb.connect( db = MySQLdb.connect(
host = cfg['db_host'], host = cfg['db_host'],
@ -173,6 +108,6 @@ cursor.execute("SELECT handle FROM bots WHERE enabled = TRUE")
bots = cursor.fetchall() bots = cursor.fetchall()
with Pool(8) as p: with Pool(8) as p:
p.map(make_post, bots) p.map(functions.make_post, bots)
#TODO: other cron tasks should be done here, like updating profile pictures #TODO: other cron tasks should be done here, like updating profile pictures