from bs4 import BeautifulSoup
import MySQLdb
import markovify
from mastodon import Mastodon, MastodonUnauthorizedError
import html, re, json
cfg = json.load(open('config.json'))
class nlt_fixed(markovify.NewlineText): # modified version of NewlineText that never rejects sentences
def test_sentence_input(self, sentence):
return True # all sentences are valid <3
def extract_post(post):
post = html.unescape(post) # convert HTML escape codes to text
soup = BeautifulSoup(post, "html.parser")
for lb in soup.select("br"): # replace
with linebreak
lb.insert_after("\n")
lb.decompose()
for p in soup.select("p"): # ditto for
p.insert_after("\n") p.unwrap() for ht in soup.select("a.hashtag"): # convert hashtags from links to text ht.unwrap() for link in soup.select("a"): #ocnvert