fedi dev is fun!!!

don't override manually set done value
fixed silly mistake in logging
2025-07-03 11:58:05 +00:00 · 2020-03-18 15:09:04 +10:00 · 2020-03-18 15:06:03 +10:00 · 2020-03-18 15:05:32 +10:00 · 2020-03-18 15:03:51 +10:00 · 2020-03-18 15:02:55 +10:00
3 changed files with 41 additions and 24 deletions
--- a/app/functions.py
+++ b/app/functions.py
@ -25,7 +25,9 @@ def extract_post(post):
 		ht.unwrap()

 	for link in soup.select("a"): #ocnvert <a href='https://example.com>example.com</a> to just https://example.com
-		link.insert_after(link["href"])
+		if 'href' in link:
+			# apparently not all a tags have a href, which is understandable if you're doing normal web stuff, but on a social media platform??
+			link.insert_after(link["href"])
 		link.decompose()

 	text = soup.get_text()
--- a/app/scrape.py
+++ b/app/scrape.py
@ -30,22 +30,27 @@ def scrape_posts(account):
 		c.execute("SELECT `post_id` FROM `posts` WHERE `fedi_id` = %s ORDER BY `id` DESC LIMIT 1", (handle,))
 		last_post = c.fetchone()[0]

-	r = requests.get(outbox)
-	j = r.json()
-	# check for pleroma
-	pleroma = 'next' not in j
-	if pleroma:
-		if 'first' in j:
-			# backwards compatibility for older (pre-v1.0.7) pleroma instances
-			j = j['first']
-	else:
-		uri = "{}&min_id={}".format(outbox, last_post)
-		r = requests.get(uri)
+	done = False
+
+	try:
+		r = requests.get(outbox, timeout = 10)
 		j = r.json()
+		# check for pleroma
+		pleroma = 'next' not in j
+		if pleroma:
+			if 'first' in j:
+				# backwards compatibility for older (pre-v1.0.7) pleroma instances
+				j = j['first']
+		else:
+			uri = "{}&min_id={}".format(outbox, last_post)
+			r = requests.get(uri, timeout = 10)
+			j = r.json()
+	except:
+		print("Couldn't load or parse outbox at URL {}".format(outbox))
+		done = True

 	# here we go!
 	# warning: scraping posts from outbox.json is messy stuff
-	done = False
 	while not done and len(j['orderedItems']) > 0:
 		for oi in j['orderedItems']:
 			if oi['type'] == "Create":
@ -78,19 +83,24 @@ def scrape_posts(account):
 					))
 				except:
 					#TODO: error handling
-					raise
+					print("Failed to insert post {} for user {}".format(post_id, handle))

 		if not done:
-			if pleroma:
-				if 'next' in j:
-					r = requests.get(j['next'], timeout = 10)
+			try:
+				if pleroma:
+					if 'next' in j:
+						r = requests.get(j['next'], timeout = 10)
+					else:
+						done = True
 				else:
-					done = True
-			else:
-				if 'prev' in j:
-					r = requests.get(j['prev'], timeout = 10)
-				else:
-					done = True
+					if 'prev' in j:
+						r = requests.get(j['prev'], timeout = 10)
+					else:
+						done = True
+			except requests.Timeout:
+				print("Timed out while loading next page for {}".format(handle))
+			except:
+				print("Encountered unknown error while getting next page for {}".format(handle))

 			if r.status_code == 429:
 				# we are now being ratelimited, move on to the next user
--- a/app/webui.py
+++ b/app/webui.py
@ -1,4 +1,4 @@
-from flask import Flask, render_template, session, request, redirect, url_for, send_file
+from flask import Flask, render_template, session, request, redirect, url_for, send_file, jsonify
 from flask_mysqldb import MySQL

 from mastodon import Mastodon
@ -370,6 +370,11 @@ def img_bot_generic():
 def favicon():
 	return send_file("static/favicon.ico")

+# @app.route("/.well-known/webfinger")
+# def webfinger():
+# 	return render_template("webfinger.json", base_uri = cfg['base_uri']), 200, {'Content-type':'application/json'}
+
+
 def bot_check(bot):
 	# check to ensure bot is owned by user
 	c = mysql.connection.cursor()
Author	SHA1	Message	Date
Lynnesbian	bd2b064153	fedi dev is fun!!!	2020-03-18 15:09:04 +10:00
Lynnesbian	dada8514e4	don't override manually set done value	2020-03-18 15:06:03 +10:00
Lynnesbian	996d52d542	fixed silly mistake in logging	2020-03-18 15:05:32 +10:00
Lynnesbian	61f95f654a	comment out as of yet unused route	2020-03-18 15:03:51 +10:00
Lynnesbian	fc60b6d937	chmod +x app/scrape.py	2020-03-18 15:02:55 +10:00
Lynnesbian	87b9f64d75	added http timeouts to initial requests, better error handling	2020-03-18 15:00:18 +10:00
editor.table_modal.label.rows
editor.table_modal.label.columns