added documentation

This commit is contained in:
Jonas_Jones 2023-12-01 20:47:14 +01:00
parent 61f0c87ef9
commit 01ee0e5b7d

View file

@ -6,8 +6,6 @@ import re
import json import json
import time import time
dotenv.load_dotenv()
def fetch_main_reddit_wiki_page(subreddit_name, page_name): def fetch_main_reddit_wiki_page(subreddit_name, page_name):
try: try:
@ -86,8 +84,13 @@ def convert_monthly_content_to_json(content, year, month):
if parts[5]: if parts[5]:
parts[5] = parts[5].group(1) parts[5] = parts[5].group(1)
if parts[5].startswith("/"): if parts[5].startswith("/"):
# if the link is a relative link, add the domain
parts[5] = "https://www.reddit.com" + parts[5] parts[5] = "https://www.reddit.com" + parts[5]
# if the song links are provided, parse them
# do this by splitting the string by " / "
# and then parsing the markdown syntax
# to get the actual link
if parts[6] != "": if parts[6] != "":
parts[6] = parts[6].split(" / ") parts[6] = parts[6].split(" / ")
links = [] links = []
@ -97,18 +100,25 @@ def convert_monthly_content_to_json(content, year, month):
if link: if link:
link = link.group(1) link = link.group(1)
links.append(link) links.append(link)
# replace the string with the list of links
parts[6] = links parts[6] = links
# remove the last element if it's empty
# sometimes the markdown is messy
if parts[-1] == "": if parts[-1] == "":
parts.pop(-1) parts.pop(-1)
else: else:
# if the song links are not provided, replace the string with an empty list
parts[6] = [] parts[6] = []
# add the reddit link to the list of links
reddit = parts.pop(5) reddit = parts.pop(5)
if reddit != "": if reddit != "":
parts[5].append(reddit) parts[5].append(reddit)
# remove the "th", "st", "nd", "rd" from the day
parts[0] = parts[0].replace('th', '').replace('st', '').replace('nd', '').replace('rd', '') parts[0] = parts[0].replace('th', '').replace('st', '').replace('nd', '').replace('rd', '')
# create a json entry from the parsed data
json_entry = { json_entry = {
"date": f"{year}-{month}-{parts[0]}", "date": f"{year}-{month}-{parts[0]}",
"time": parts[1], "time": parts[1],
@ -120,10 +130,13 @@ def convert_monthly_content_to_json(content, year, month):
json_data.append(json_entry) json_data.append(json_entry)
#print(json_entry)
except Exception as e: except Exception as e:
# if the line doesn't start with a pipe, ignore it
# these lines are not part of the table
if not line.startswith("|"): if not line.startswith("|"):
continue continue
# other issues are logged but ignored
else: else:
print("[IGNORED] Error parsing line: '" + line + "'") print("[IGNORED] Error parsing line: '" + line + "'")
print(e) print(e)
@ -138,7 +151,9 @@ def fetch_monthly_page(wiki_link, subreddit_name):
subreddit = reddit.subreddit(subreddit_name) subreddit = reddit.subreddit(subreddit_name)
wiki_page = subreddit.wiki[f"{wiki_link}"].content_md wiki_page = subreddit.wiki[f"{wiki_link}"].content_md
# remove the first part of the wiki page before the table
wiki_page = wiki_page[wiki_page.find("|--|--|"):] wiki_page = wiki_page[wiki_page.find("|--|--|"):]
# remove the last part of the wiki page after the table
wiki_page = wiki_page[wiki_page.find("\n") + 1:] wiki_page = wiki_page[wiki_page.find("\n") + 1:]
#wiki_page = wiki_page[:wiki_page.find("\n\n")] #wiki_page = wiki_page[:wiki_page.find("\n\n")]
@ -165,20 +180,26 @@ def fetch_monthly_page(wiki_link, subreddit_name):
return None return None
# Example usage: # reddit infos
subreddit_name = "kpop" subreddit_name = "kpop"
wiki_page_name = "upcoming-releases/archive" wiki_page_name = "upcoming-releases/archive"
# reddit instance
dotenv.load_dotenv()
reddit = praw.Reddit( reddit = praw.Reddit(
client_id=os.getenv('REDDIT_CLIENT_ID'), client_id=os.getenv('REDDIT_CLIENT_ID'),
client_secret=os.getenv('REDDIT_CLIENT_SECRET'), client_secret=os.getenv('REDDIT_CLIENT_SECRET'),
user_agent=os.getenv('REDDIT_USER_AGENT') user_agent=os.getenv('REDDIT_USER_AGENT')
) )
# fetch subreddit
try: try:
subreddit = reddit.subreddit(subreddit_name) subreddit = reddit.subreddit(subreddit_name)
except praw.exceptions.PRAWException as e: except praw.exceptions.PRAWException as e:
print(f"Error fetching subreddit: {e}") print(f"Error fetching subreddit: {e}")
# fetch wiki page
content = fetch_main_reddit_wiki_page(subreddit_name, wiki_page_name) content = fetch_main_reddit_wiki_page(subreddit_name, wiki_page_name)
if content: if content:
@ -190,6 +211,7 @@ if content:
print("Fetching monthly page: " + wiki_link) print("Fetching monthly page: " + wiki_link)
try: try:
# fetch the monthly page and parse it
json_data += fetch_monthly_page(wiki_link, subreddit_name) json_data += fetch_monthly_page(wiki_link, subreddit_name)
except Exception as e: except Exception as e:
# write json_data to file # write json_data to file