mirror of
https://github.com/Lynnesbian/FediBooks/
synced 2024-11-25 08:38:59 +00:00
use a new db connection for each scrape thread
This commit is contained in:
parent
af1f8c6d8b
commit
a9fcda794c
1 changed files with 11 additions and 2 deletions
13
scrape.py
13
scrape.py
|
@ -9,6 +9,14 @@ import functions
|
|||
cfg = json.load(open('config.json'))
|
||||
|
||||
def scrape_posts(account):
|
||||
db = MySQLdb.connect(
|
||||
host = cfg['db_host'],
|
||||
user=cfg['db_user'],
|
||||
passwd=cfg['db_pass'],
|
||||
db=cfg['db_name'],
|
||||
use_unicode=True,
|
||||
charset="utf8mb4"
|
||||
)
|
||||
handle = account[0]
|
||||
outbox = account[1]
|
||||
print("Scraping {}".format(handle))
|
||||
|
@ -85,6 +93,7 @@ def scrape_posts(account):
|
|||
|
||||
db.commit()
|
||||
|
||||
db.commit()
|
||||
print("Finished scraping {}".format(handle))
|
||||
|
||||
print("Establishing DB connection")
|
||||
|
@ -102,9 +111,9 @@ cursor = db.cursor()
|
|||
print("Downloading posts")
|
||||
cursor.execute("SELECT `handle`, `outbox` FROM `fedi_accounts` ORDER BY RAND()")
|
||||
accounts = cursor.fetchall()
|
||||
cursor.close()
|
||||
db.close()
|
||||
with Pool(cfg['service_threads']) as p:
|
||||
p.map(scrape_posts, accounts)
|
||||
|
||||
db.commit()
|
||||
|
||||
print("Done!")
|
||||
|
|
Loading…
Reference in a new issue