mirror of
https://github.com/Lynnesbian/FediBooks/
synced 2024-11-25 08:38:59 +00:00
use a new db connection for each scrape thread
This commit is contained in:
parent
af1f8c6d8b
commit
a9fcda794c
1 changed files with 11 additions and 2 deletions
13
scrape.py
13
scrape.py
|
@ -9,6 +9,14 @@ import functions
|
||||||
cfg = json.load(open('config.json'))
|
cfg = json.load(open('config.json'))
|
||||||
|
|
||||||
def scrape_posts(account):
|
def scrape_posts(account):
|
||||||
|
db = MySQLdb.connect(
|
||||||
|
host = cfg['db_host'],
|
||||||
|
user=cfg['db_user'],
|
||||||
|
passwd=cfg['db_pass'],
|
||||||
|
db=cfg['db_name'],
|
||||||
|
use_unicode=True,
|
||||||
|
charset="utf8mb4"
|
||||||
|
)
|
||||||
handle = account[0]
|
handle = account[0]
|
||||||
outbox = account[1]
|
outbox = account[1]
|
||||||
print("Scraping {}".format(handle))
|
print("Scraping {}".format(handle))
|
||||||
|
@ -85,6 +93,7 @@ def scrape_posts(account):
|
||||||
|
|
||||||
db.commit()
|
db.commit()
|
||||||
|
|
||||||
|
db.commit()
|
||||||
print("Finished scraping {}".format(handle))
|
print("Finished scraping {}".format(handle))
|
||||||
|
|
||||||
print("Establishing DB connection")
|
print("Establishing DB connection")
|
||||||
|
@ -102,9 +111,9 @@ cursor = db.cursor()
|
||||||
print("Downloading posts")
|
print("Downloading posts")
|
||||||
cursor.execute("SELECT `handle`, `outbox` FROM `fedi_accounts` ORDER BY RAND()")
|
cursor.execute("SELECT `handle`, `outbox` FROM `fedi_accounts` ORDER BY RAND()")
|
||||||
accounts = cursor.fetchall()
|
accounts = cursor.fetchall()
|
||||||
|
cursor.close()
|
||||||
|
db.close()
|
||||||
with Pool(cfg['service_threads']) as p:
|
with Pool(cfg['service_threads']) as p:
|
||||||
p.map(scrape_posts, accounts)
|
p.map(scrape_posts, accounts)
|
||||||
|
|
||||||
db.commit()
|
|
||||||
|
|
||||||
print("Done!")
|
print("Done!")
|
||||||
|
|
Loading…
Reference in a new issue