1
0
Fork 0
mirror of https://github.com/Lynnesbian/FediBooks/ synced 2024-11-26 00:58:59 +00:00

Compare commits

..

No commits in common. "0997de535b7fda785cbdd9a6d4813d9a32175c66" and "954544205ec7f794d4a93f0b8a56ef49da60c816" have entirely different histories.

5 changed files with 25 additions and 20 deletions

3
.gitignore vendored
View file

@ -3,5 +3,4 @@ config.json
planning.txt planning.txt
*.pyc *.pyc
/debug /debug
lynnesbian.json lynnesbian.json
test.py

View file

@ -21,10 +21,12 @@ def extract_post(post):
post = html.unescape(post) # convert HTML escape codes to text post = html.unescape(post) # convert HTML escape codes to text
soup = BeautifulSoup(post, "html.parser") soup = BeautifulSoup(post, "html.parser")
for lb in soup.select("br"): # replace <br> with linebreak for lb in soup.select("br"): # replace <br> with linebreak
lb.replace_with("\n") lb.insert_after("\n")
lb.decompose()
for p in soup.select("p"): # ditto for <p> for p in soup.select("p"): # ditto for <p>
p.replace_with("\n") p.insert_after("\n")
p.unwrap()
for ht in soup.select("a.hashtag"): # convert hashtags from links to text for ht in soup.select("a.hashtag"): # convert hashtags from links to text
ht.unwrap() ht.unwrap()
@ -32,7 +34,8 @@ def extract_post(post):
for link in soup.select("a"): #ocnvert <a href='https://example.com>example.com</a> to just https://example.com for link in soup.select("a"): #ocnvert <a href='https://example.com>example.com</a> to just https://example.com
if 'href' in link: if 'href' in link:
# apparently not all a tags have a href, which is understandable if you're doing normal web stuff, but on a social media platform?? # apparently not all a tags have a href, which is understandable if you're doing normal web stuff, but on a social media platform??
link.replace_with(link["href"]) link.insert_after(link["href"])
link.decompose()
text = soup.get_text() text = soup.get_text()
text = re.sub(r"https://([^/]+)/(@[^\s]+)", r"\2@\1", text) # put mastodon-style mentions back in text = re.sub(r"https://([^/]+)/(@[^\s]+)", r"\2@\1", text) # put mastodon-style mentions back in
@ -188,19 +191,22 @@ def make_post(args):
db.commit() db.commit()
c.close() c.close()
def task_done(future):
try:
result = future.result() # blocks until results are ready
except TimeoutError as error:
if not future.silent: print("Timed out on {}.".format(future.function_data))
def do_in_pool(function, data, timeout=30, silent=False): def do_in_pool(function, data, timeout=30, silent=False):
with ProcessPool(max_workers=5, max_tasks=10) as pool: with ProcessPool(max_workers=cfg['service_threads']) as p:
for i in data: index = 0
future = pool.schedule(function, args=[i], timeout=timeout) future = p.map(function, data)
future.silent = silent iterator = future.result()
future.function_data = i
future.add_done_callback(task_done) while True:
try:
result = next(iterator)
except StopIteration:
# all threads are done
break
except TimeoutError as error:
if not silent: print("Timed out on {}.".format(data[index]))
finally:
index += 1
def get_key(): def get_key():
db = MySQLdb.connect( db = MySQLdb.connect(

View file

@ -50,7 +50,7 @@ def scrape_posts(account):
# here we go! # here we go!
# warning: scraping posts from outbox.json is messy stuff # warning: scraping posts from outbox.json is messy stuff
while not done and 'orderedItems' in j and len(j['orderedItems']) > 0: while not done and len(j['orderedItems']) > 0:
for oi in j['orderedItems']: for oi in j['orderedItems']:
if oi['type'] == "Create": if oi['type'] == "Create":
# this is a status/post/toot/florp/whatever # this is a status/post/toot/florp/whatever

View file

@ -30,7 +30,7 @@ def update_icon(bot):
if r.status_code != 200: if r.status_code != 200:
raise raise
except: except:
print("{} is down - can't update icon for {}.".format(url, bot['handle'])) print("{} is down.".format(url))
return return
client = Mastodon( client = Mastodon(

View file

@ -1,6 +1,6 @@
Mastodon.py==1.5.1 Mastodon.py==1.5.1
markovify==0.8.0 markovify==0.8.0
beautifulsoup4==4.9.1 beautifulsoup4==4.9.0
requests==2.23.0 requests==2.23.0
Flask==1.1.2 Flask==1.1.2
flask-mysqldb==0.2.0 flask-mysqldb==0.2.0