mirror of
https://github.com/JonasunderscoreJones/turbo-octo-potato.git
synced 2025-10-28 11:39:18 +01:00
added documentation
This commit is contained in:
parent
61f0c87ef9
commit
01ee0e5b7d
1 changed files with 26 additions and 4 deletions
|
|
@ -6,8 +6,6 @@ import re
|
||||||
import json
|
import json
|
||||||
import time
|
import time
|
||||||
|
|
||||||
dotenv.load_dotenv()
|
|
||||||
|
|
||||||
def fetch_main_reddit_wiki_page(subreddit_name, page_name):
|
def fetch_main_reddit_wiki_page(subreddit_name, page_name):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
@ -86,8 +84,13 @@ def convert_monthly_content_to_json(content, year, month):
|
||||||
if parts[5]:
|
if parts[5]:
|
||||||
parts[5] = parts[5].group(1)
|
parts[5] = parts[5].group(1)
|
||||||
if parts[5].startswith("/"):
|
if parts[5].startswith("/"):
|
||||||
|
# if the link is a relative link, add the domain
|
||||||
parts[5] = "https://www.reddit.com" + parts[5]
|
parts[5] = "https://www.reddit.com" + parts[5]
|
||||||
|
|
||||||
|
# if the song links are provided, parse them
|
||||||
|
# do this by splitting the string by " / "
|
||||||
|
# and then parsing the markdown syntax
|
||||||
|
# to get the actual link
|
||||||
if parts[6] != "":
|
if parts[6] != "":
|
||||||
parts[6] = parts[6].split(" / ")
|
parts[6] = parts[6].split(" / ")
|
||||||
links = []
|
links = []
|
||||||
|
|
@ -97,18 +100,25 @@ def convert_monthly_content_to_json(content, year, month):
|
||||||
if link:
|
if link:
|
||||||
link = link.group(1)
|
link = link.group(1)
|
||||||
links.append(link)
|
links.append(link)
|
||||||
|
# replace the string with the list of links
|
||||||
parts[6] = links
|
parts[6] = links
|
||||||
|
# remove the last element if it's empty
|
||||||
|
# sometimes the markdown is messy
|
||||||
if parts[-1] == "":
|
if parts[-1] == "":
|
||||||
parts.pop(-1)
|
parts.pop(-1)
|
||||||
else:
|
else:
|
||||||
|
# if the song links are not provided, replace the string with an empty list
|
||||||
parts[6] = []
|
parts[6] = []
|
||||||
|
|
||||||
|
# add the reddit link to the list of links
|
||||||
reddit = parts.pop(5)
|
reddit = parts.pop(5)
|
||||||
if reddit != "":
|
if reddit != "":
|
||||||
parts[5].append(reddit)
|
parts[5].append(reddit)
|
||||||
|
|
||||||
|
# remove the "th", "st", "nd", "rd" from the day
|
||||||
parts[0] = parts[0].replace('th', '').replace('st', '').replace('nd', '').replace('rd', '')
|
parts[0] = parts[0].replace('th', '').replace('st', '').replace('nd', '').replace('rd', '')
|
||||||
|
|
||||||
|
# create a json entry from the parsed data
|
||||||
json_entry = {
|
json_entry = {
|
||||||
"date": f"{year}-{month}-{parts[0]}",
|
"date": f"{year}-{month}-{parts[0]}",
|
||||||
"time": parts[1],
|
"time": parts[1],
|
||||||
|
|
@ -120,10 +130,13 @@ def convert_monthly_content_to_json(content, year, month):
|
||||||
|
|
||||||
|
|
||||||
json_data.append(json_entry)
|
json_data.append(json_entry)
|
||||||
#print(json_entry)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
# if the line doesn't start with a pipe, ignore it
|
||||||
|
# these lines are not part of the table
|
||||||
if not line.startswith("|"):
|
if not line.startswith("|"):
|
||||||
continue
|
continue
|
||||||
|
# other issues are logged but ignored
|
||||||
else:
|
else:
|
||||||
print("[IGNORED] Error parsing line: '" + line + "'")
|
print("[IGNORED] Error parsing line: '" + line + "'")
|
||||||
print(e)
|
print(e)
|
||||||
|
|
@ -138,7 +151,9 @@ def fetch_monthly_page(wiki_link, subreddit_name):
|
||||||
subreddit = reddit.subreddit(subreddit_name)
|
subreddit = reddit.subreddit(subreddit_name)
|
||||||
wiki_page = subreddit.wiki[f"{wiki_link}"].content_md
|
wiki_page = subreddit.wiki[f"{wiki_link}"].content_md
|
||||||
|
|
||||||
|
# remove the first part of the wiki page before the table
|
||||||
wiki_page = wiki_page[wiki_page.find("|--|--|"):]
|
wiki_page = wiki_page[wiki_page.find("|--|--|"):]
|
||||||
|
# remove the last part of the wiki page after the table
|
||||||
wiki_page = wiki_page[wiki_page.find("\n") + 1:]
|
wiki_page = wiki_page[wiki_page.find("\n") + 1:]
|
||||||
#wiki_page = wiki_page[:wiki_page.find("\n\n")]
|
#wiki_page = wiki_page[:wiki_page.find("\n\n")]
|
||||||
|
|
||||||
|
|
@ -165,20 +180,26 @@ def fetch_monthly_page(wiki_link, subreddit_name):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
# Example usage:
|
# reddit infos
|
||||||
subreddit_name = "kpop"
|
subreddit_name = "kpop"
|
||||||
wiki_page_name = "upcoming-releases/archive"
|
wiki_page_name = "upcoming-releases/archive"
|
||||||
|
|
||||||
|
# reddit instance
|
||||||
|
dotenv.load_dotenv()
|
||||||
|
|
||||||
reddit = praw.Reddit(
|
reddit = praw.Reddit(
|
||||||
client_id=os.getenv('REDDIT_CLIENT_ID'),
|
client_id=os.getenv('REDDIT_CLIENT_ID'),
|
||||||
client_secret=os.getenv('REDDIT_CLIENT_SECRET'),
|
client_secret=os.getenv('REDDIT_CLIENT_SECRET'),
|
||||||
user_agent=os.getenv('REDDIT_USER_AGENT')
|
user_agent=os.getenv('REDDIT_USER_AGENT')
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# fetch subreddit
|
||||||
try:
|
try:
|
||||||
subreddit = reddit.subreddit(subreddit_name)
|
subreddit = reddit.subreddit(subreddit_name)
|
||||||
except praw.exceptions.PRAWException as e:
|
except praw.exceptions.PRAWException as e:
|
||||||
print(f"Error fetching subreddit: {e}")
|
print(f"Error fetching subreddit: {e}")
|
||||||
|
|
||||||
|
# fetch wiki page
|
||||||
content = fetch_main_reddit_wiki_page(subreddit_name, wiki_page_name)
|
content = fetch_main_reddit_wiki_page(subreddit_name, wiki_page_name)
|
||||||
|
|
||||||
if content:
|
if content:
|
||||||
|
|
@ -190,6 +211,7 @@ if content:
|
||||||
print("Fetching monthly page: " + wiki_link)
|
print("Fetching monthly page: " + wiki_link)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
# fetch the monthly page and parse it
|
||||||
json_data += fetch_monthly_page(wiki_link, subreddit_name)
|
json_data += fetch_monthly_page(wiki_link, subreddit_name)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# write json_data to file
|
# write json_data to file
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue