1
0
Fork 0
mirror of https://github.com/Lynnesbian/FediBooks/ synced 2024-11-25 08:38:59 +00:00

fixed some more issues that didn't happen before

This commit is contained in:
Lynne Megido 2019-09-11 18:16:36 +10:00
parent 8fceadf93d
commit f982e54a2d

View file

@ -1,3 +1,5 @@
#!/usr/bin/env python3
import MySQLdb
import requests
from multiprocessing import Pool
@ -14,7 +16,7 @@ def scrape_posts(account):
last_post = 0
c.execute("SELECT COUNT(*) FROM `posts` WHERE `fedi_id` = %s", (handle,))
count = c.fetchone()
if count is not None and count[0] > 0:
if count is not None and int(count[0]) > 0:
# we've downloaded this user's posts before
# find out the most recently downloaded post of theirs
c.execute("SELECT `post_id` FROM `posts` WHERE `fedi_id` = %s ORDER BY `id` DESC LIMIT 1", (handle,))
@ -41,7 +43,8 @@ def scrape_posts(account):
# first, check to see if we already have this in the database
post_id = re.search(r"([^\/]+)/?$", oi['object']['id']).group(1) # extract 123 from https://example.com/posts/123/
c.execute("SELECT COUNT(*) FROM `posts` WHERE `fedi_id` = %s AND `post_id` = %s", (handle, post_id))
if c.fetchone()[0] > 0:
count = c.fetchone()
if count is not None and int(count[0]) > 0:
# this post is already in the DB.
# we'll set done to true because we've caught up to where we were last time.
done = True
@ -80,7 +83,7 @@ def scrape_posts(account):
j = r.json()
db.commit()
c.close()
print("Finished scraping {}".format(handle))
print("Establishing DB connection")