1
0
Fork 0
mirror of https://github.com/Lynnesbian/FediBooks/ synced 2024-11-10 18:28:59 +00:00

use a new db connection for each scrape thread

This commit is contained in:
Lynne Megido 2019-09-13 12:11:01 +10:00
parent af1f8c6d8b
commit a9fcda794c

View file

@ -9,6 +9,14 @@ import functions
cfg = json.load(open('config.json')) cfg = json.load(open('config.json'))
def scrape_posts(account): def scrape_posts(account):
db = MySQLdb.connect(
host = cfg['db_host'],
user=cfg['db_user'],
passwd=cfg['db_pass'],
db=cfg['db_name'],
use_unicode=True,
charset="utf8mb4"
)
handle = account[0] handle = account[0]
outbox = account[1] outbox = account[1]
print("Scraping {}".format(handle)) print("Scraping {}".format(handle))
@ -85,6 +93,7 @@ def scrape_posts(account):
db.commit() db.commit()
db.commit()
print("Finished scraping {}".format(handle)) print("Finished scraping {}".format(handle))
print("Establishing DB connection") print("Establishing DB connection")
@ -102,9 +111,9 @@ cursor = db.cursor()
print("Downloading posts") print("Downloading posts")
cursor.execute("SELECT `handle`, `outbox` FROM `fedi_accounts` ORDER BY RAND()") cursor.execute("SELECT `handle`, `outbox` FROM `fedi_accounts` ORDER BY RAND()")
accounts = cursor.fetchall() accounts = cursor.fetchall()
cursor.close()
db.close()
with Pool(cfg['service_threads']) as p: with Pool(cfg['service_threads']) as p:
p.map(scrape_posts, accounts) p.map(scrape_posts, accounts)
db.commit()
print("Done!") print("Done!")