get auction id from url

This commit is contained in:
Lynne Megido 2020-08-21 20:52:03 +10:00
parent 90b37d6861
commit 1c139f500e
Signed by: lynnesbian
GPG key ID: F0A184B5213D9F90

View file

@ -1,21 +1,27 @@
import requests import requests
import re, json import re, json
from datetime import datetime, timezone, timedelta from datetime import datetime, timezone, timedelta
import functions
JST = timezone(timedelta(hours = 9)) JST = timezone(timedelta(hours = 9))
class YahooAuctionsItem: class YahooAuctionsItem:
def __init__(self, name, url): def __init__(self, name: str, url: str):
# note - incoming url is not validated in any way!
self.name = name self.name = name
self.url = url self.url = re.match(url.rstrip("/"), r"([^?]+)") # remove trailing slashes and query params
self.id = re.match(self.url, r".+\/(.+?)$") # extract "x12345" from "https://buyee.jp/whatever/blah/x12345"
self.last_checked = datetime.fromisoformat('1970-01-01') self.last_checked = datetime.fromisoformat('1970-01-01')
self.available = True self.available = True
self.update() self.update()
def update(self): def update(self):
try: try:
# the good news is, yahoo japan returns all the data we need in handy json format
# the bad news is that the only way to get that json format is to download the whole auction page and grep it
# r = requests.get("https://page.auctions.yahoo.co.jp/jp/auction/k487846283").text # r = requests.get("https://page.auctions.yahoo.co.jp/jp/auction/k487846283").text
r = open("yahoo.html").read() r = open("yahoo.html").read()
j = json.loads(re.match(r'.*var pageData ?= ?(\{.*?\});', r, re.DOTALL).group(1)) j = json.loads(re.match(r'.*var pageData ?= ?(\{.*?\});', r, re.DOTALL).group(1))