mirror of
https://github.com/JonasunderscoreJones/turbo-octo-potato.git
synced 2025-10-25 11:09:18 +02:00
added some stuff
This commit is contained in:
parent
7b3577206f
commit
7e693d0cda
3 changed files with 306 additions and 0 deletions
111
kprofilesfetch.py
Normal file
111
kprofilesfetch.py
Normal file
|
|
@ -0,0 +1,111 @@
|
||||||
|
import datetime, requests, dotenv, os, sys
|
||||||
|
import top_lib
|
||||||
|
|
||||||
|
dotenv.load_dotenv()
|
||||||
|
|
||||||
|
WORKING_DIR = os.getenv('WORKING_DIR')
|
||||||
|
|
||||||
|
|
||||||
|
def getLinks():
|
||||||
|
links = []
|
||||||
|
|
||||||
|
# Starting month and year
|
||||||
|
start_date = datetime.date(2020, 3, 1)
|
||||||
|
|
||||||
|
# End month and year
|
||||||
|
end_date = datetime.date.today().replace(day=1)
|
||||||
|
end_date = end_date.replace(month=end_date.month + 1) if end_date.month != 12 else end_date.replace(year=end_date.year + 1, month=1)
|
||||||
|
current_date = start_date
|
||||||
|
while current_date <= end_date:
|
||||||
|
# Construct the URL based on the current month and year
|
||||||
|
links.append(f"https://kprofiles.com/{current_date.strftime('%B').lower()}-{current_date.year}-comebacks-debuts-releases/")
|
||||||
|
|
||||||
|
# Move to the next month
|
||||||
|
if current_date.month == 12:
|
||||||
|
current_date = current_date.replace(year=current_date.year + 1, month=1)
|
||||||
|
else:
|
||||||
|
current_date = current_date.replace(month=current_date.month + 1)
|
||||||
|
|
||||||
|
return links
|
||||||
|
|
||||||
|
def checkLinkExtensions(link, comeback_compilation):
|
||||||
|
if link in comeback_compilation:
|
||||||
|
return link
|
||||||
|
elif link.replace("-debuts-releases", "") in comeback_compilation:
|
||||||
|
return link.replace("-debuts-releases", "")
|
||||||
|
elif link.replace("-comebacks-debuts-releases", "") in comeback_compilation:
|
||||||
|
return link.replace("-comebacks-debuts-releases", "")
|
||||||
|
elif link.replace("-comebacks-debuts-releases", "-kpop") in comeback_compilation:
|
||||||
|
return link.replace("-comebacks-debuts-releases", "-kpop")
|
||||||
|
elif link[:-1] + "-2/" in comeback_compilation:
|
||||||
|
return link[:-1] + "-2/" # WHY IS OCTOBER 2020 THE ONLY MONTH WITH A -2
|
||||||
|
elif link.replace("-comebacks-debuts-releases", "-kpop-comebacks-debuts-releases") in comeback_compilation:
|
||||||
|
return link.replace("-comebacks-debuts-releases", "-kpop-comebacks-debuts-releases")
|
||||||
|
elif link.replace("-comebacks-debuts-releases", "-kpop-comebacks") in comeback_compilation:
|
||||||
|
return link.replace("-comebacks-debuts-releases", "-kpop-comebacks")
|
||||||
|
print("Link not found: " + link)
|
||||||
|
|
||||||
|
|
||||||
|
def filterValidLinks(links):
|
||||||
|
# valid_links = []
|
||||||
|
# for link in links:
|
||||||
|
# if requests.get(link).status_code == 200:
|
||||||
|
# valid_links.append(link)
|
||||||
|
# else:
|
||||||
|
# print(requests.get(link).status_code)
|
||||||
|
# return valid_links
|
||||||
|
valid_links = []
|
||||||
|
compilation_link = "https://kprofiles.com/comebacks/page/"
|
||||||
|
comeback_compilation = ""
|
||||||
|
for i in range(1, 100):
|
||||||
|
request = requests.get(compilation_link + str(i))
|
||||||
|
if request.status_code == 200:
|
||||||
|
comeback_compilation += request.text
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
for link in links:
|
||||||
|
is_valid = checkLinkExtensions(link, comeback_compilation)
|
||||||
|
if is_valid:
|
||||||
|
valid_links.append(is_valid)
|
||||||
|
|
||||||
|
return valid_links
|
||||||
|
|
||||||
|
def fetchSite(link):
|
||||||
|
#check if file already exists
|
||||||
|
if os.path.isfile(WORKING_DIR + "/kprofiles/" + link.split("/")[-2] + ".html") and not FORCE_REFRESH:
|
||||||
|
# read from file
|
||||||
|
with open(WORKING_DIR + "/kprofiles/" + link.split("/")[-2] + ".html", "r") as file:
|
||||||
|
return file.read()
|
||||||
|
request = requests.get(link)
|
||||||
|
if request.status_code == 200:
|
||||||
|
# save to file
|
||||||
|
with open(WORKING_DIR + "/kprofiles/" + link.split("/")[-2] + ".html", "w") as file:
|
||||||
|
file.write(request.text)
|
||||||
|
return request.text
|
||||||
|
|
||||||
|
def fetchHandler(links):
|
||||||
|
data = []
|
||||||
|
bar = top_lib.Progressbar(total=len(links))
|
||||||
|
bar.print(0)
|
||||||
|
try:
|
||||||
|
os.makedirs(WORKING_DIR + "/kprofiles/", exist_ok=True)
|
||||||
|
except OSError:
|
||||||
|
OSError("Creation of the directory %s failed" % WORKING_DIR + "/kprofiles/")
|
||||||
|
for link in links:
|
||||||
|
data.append(fetchSite(link))
|
||||||
|
bar.print(links.index(link) + 1)
|
||||||
|
return data
|
||||||
|
|
||||||
|
def stripText():
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
# launch args
|
||||||
|
FORCE_REFRESH = True if "-f" in sys.argv else False
|
||||||
|
|
||||||
|
print("Fetching kprofiles.com... (This may take a while, kprofiles is slow...)")
|
||||||
|
links = getLinks()
|
||||||
|
valid_links = filterValidLinks(links)
|
||||||
|
data = fetchHandler(valid_links)
|
||||||
49
sound-font.py
Normal file
49
sound-font.py
Normal file
|
|
@ -0,0 +1,49 @@
|
||||||
|
from pydub import AudioSegment
|
||||||
|
import fluidsynth
|
||||||
|
import os
|
||||||
|
|
||||||
|
# Load the MP3 file
|
||||||
|
input_file = "/home/jonas_jones/Downloads/apple-crunch.mp3"
|
||||||
|
audio = AudioSegment.from_mp3(input_file)
|
||||||
|
|
||||||
|
# Define the piano pitch range (from C1 to C7)
|
||||||
|
piano_keys = 88 # 88 keys on a piano
|
||||||
|
pitch_range = list(range(1, piano_keys + 1))
|
||||||
|
|
||||||
|
# Create a temporary directory to store individual pitch-shifted audio files
|
||||||
|
temp_dir = "temp_audio"
|
||||||
|
os.makedirs(temp_dir, exist_ok=True)
|
||||||
|
|
||||||
|
# Export and pitch-shift the audio in different pitches
|
||||||
|
for pitch in pitch_range:
|
||||||
|
# Calculate the ratio for pitch shift (12 semitones = 1 octave)
|
||||||
|
semitone_ratio = 2 ** (pitch / 12.0)
|
||||||
|
# Shift the pitch
|
||||||
|
shifted_audio = audio._spawn(audio.raw_data, overrides={
|
||||||
|
"frame_rate": int(audio.frame_rate * semitone_ratio)
|
||||||
|
})
|
||||||
|
# Export the shifted audio
|
||||||
|
output_file = os.path.join(temp_dir, f"output_pitch_{pitch}.wav")
|
||||||
|
shifted_audio.export(output_file, format="wav")
|
||||||
|
|
||||||
|
print("Audio exported in different pitches.")
|
||||||
|
|
||||||
|
# Create an empty SoundFont
|
||||||
|
soundfont = fluidsynth.SoundFont()
|
||||||
|
|
||||||
|
# Load the pitch-shifted audio files into the SoundFont
|
||||||
|
for pitch in pitch_range:
|
||||||
|
audio_file = os.path.join(temp_dir, f"output_pitch_{pitch}.wav")
|
||||||
|
soundfont.add_sample(audio_file, preset=0, note=pitch)
|
||||||
|
|
||||||
|
# Save the SoundFont to a file
|
||||||
|
soundfont_file = "output_soundfont.sf2"
|
||||||
|
soundfont.write_to_file(soundfont_file)
|
||||||
|
|
||||||
|
print(f"SoundFont '{soundfont_file}' created.")
|
||||||
|
|
||||||
|
# Clean up: Delete temporary audio files and directory
|
||||||
|
for pitch in pitch_range:
|
||||||
|
audio_file = os.path.join(temp_dir, f"output_pitch_{pitch}.wav")
|
||||||
|
os.remove(audio_file)
|
||||||
|
os.rmdir(temp_dir)
|
||||||
146
stripText.py
Normal file
146
stripText.py
Normal file
|
|
@ -0,0 +1,146 @@
|
||||||
|
import dotenv, os, re, datetime
|
||||||
|
import html as html_lib
|
||||||
|
|
||||||
|
dotenv.load_dotenv()
|
||||||
|
|
||||||
|
# Load the environment variables
|
||||||
|
WORKING_DIR = os.getenv('WORKING_DIR')
|
||||||
|
|
||||||
|
# Read file .working/kprofiles/march-2020-comebacks-debuts-releases.html
|
||||||
|
with open(os.path.join(WORKING_DIR, "kprofiles", "march-2020-comebacks-debuts-releases.html"), "r") as f:
|
||||||
|
html = f.read()
|
||||||
|
|
||||||
|
def stripText(html, date:datetime.date=None):
|
||||||
|
# remove the script and style sections
|
||||||
|
script_pattern = re.compile('<script.*?</script>', re.DOTALL)
|
||||||
|
style_pattern = re.compile('<style.*?</style>', re.DOTALL)
|
||||||
|
text = re.sub(script_pattern, "", html)
|
||||||
|
text = re.sub(style_pattern, "", text)
|
||||||
|
text = html_lib.unescape(text)
|
||||||
|
if html.startswith("<!DOCTYPE html>"):
|
||||||
|
return text
|
||||||
|
lines = text.split("•")
|
||||||
|
|
||||||
|
if date:
|
||||||
|
result = []
|
||||||
|
else:
|
||||||
|
result = ""
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
print(line)
|
||||||
|
print(lines)
|
||||||
|
line = line.replace("<strong>", "").replace("</strong>", "").replace("<span>", "").replace("</span>", "").replace("<br/>", "")
|
||||||
|
if "[Comeback]" in line:
|
||||||
|
line = line.split("[Comeback]")[0] + "[Comeback]"
|
||||||
|
elif "[Debut]" in line:
|
||||||
|
line = line.split("[Debut]")[0] + "[Debut]"
|
||||||
|
elif "[Release]" in line:
|
||||||
|
line = line.split("[Release]")[0] + "[Release]"
|
||||||
|
elif "[Solo Debut]" in line:
|
||||||
|
line = line.split("[Solo Debut]")[0] + "[Debut]"
|
||||||
|
elif "[Solo Release]" in line:
|
||||||
|
line = line.split("[Solo Release]")[0] + "[Release]"
|
||||||
|
elif "[Pre-Debut Release]" in line:
|
||||||
|
line = line.split("[Pre-Debut Release]")[0] + "[Pre-Debut Release]"
|
||||||
|
elif "[Pre-Single Release]" in line:
|
||||||
|
line = line.split("[Pre-Single Release]")[0] + "[Pre-Debut Release]"
|
||||||
|
elif "[Japanese Comeback]" in line:
|
||||||
|
line = line.split("[Japanese Comeback]")[0] + "[Japanese Comeback]"
|
||||||
|
elif "[Japanese Debut]" in line:
|
||||||
|
line = line.split("[Japanese Debut]")[0] + "[Japanese Debut]"
|
||||||
|
elif "[Project Release]" in line:
|
||||||
|
line = line.split("[Project Release]")[0] + "[Release]"
|
||||||
|
elif "[Pre-Release Single]" in line:
|
||||||
|
line = line.split("[Pre-Release Single]")[0] + "[Pre-Release]"
|
||||||
|
elif "[Comeback Single]" in line:
|
||||||
|
line = line.split("[Comeback Single]")[0] + "[Comeback]"
|
||||||
|
elif "[Collab Release]" in line:
|
||||||
|
line = line.split("[Collab Release]")[0] + "[Release]"
|
||||||
|
elif "[Comeback Full Album]" in line:
|
||||||
|
line = line.split("[Comeback Full Album]")[0] + "[Comeback]"
|
||||||
|
elif "[Special Release]" in line:
|
||||||
|
line = line.split("[Special Release]")[0] + "[Release]"
|
||||||
|
elif "[Collab]" in line:
|
||||||
|
line = line.split("[Collab]")[0] + "[Release]"
|
||||||
|
elif "[Mixtape]" in line:
|
||||||
|
line = line.split("[Mixtape]")[0] + "[Mixtape]"
|
||||||
|
elif "[Japan Release]" in line:
|
||||||
|
line = line.split("[Japan Release]")[0] + "[Japanese Release]"
|
||||||
|
elif "[Single Release]" in line:
|
||||||
|
line = line.split("[Single Release]")[0] + "[Release]"
|
||||||
|
line = line.replace("\n", "").replace("‘", "").replace("’", "")
|
||||||
|
if date and not line == "" and not line == " ":
|
||||||
|
print(line)
|
||||||
|
artist_title = line.split("[")[0].strip()
|
||||||
|
artist = artist_title.split("|")[0].strip()
|
||||||
|
title = artist_title.split("|")[1].strip().replace("‘", "").replace("’", "")
|
||||||
|
release_type = line.split("[")[1].split("]")[0].strip()
|
||||||
|
line = (str(date), artist, title, release_type)
|
||||||
|
result.append(line)
|
||||||
|
for i in ["\n", " ", ""]:
|
||||||
|
try:
|
||||||
|
result.remove(i)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
result += line + "\n"
|
||||||
|
return result
|
||||||
|
|
||||||
|
def formatDate(date:datetime.date):
|
||||||
|
formatted_date = "{} {}".format(date.strftime("%B"), date.day)
|
||||||
|
if 10 <= date.day % 100 <= 20:
|
||||||
|
suffix = 'th'
|
||||||
|
else:
|
||||||
|
suffix = {1: 'st', 2: 'nd', 3: 'rd'}.get(date.day % 10, 'th')
|
||||||
|
formatted_date += suffix
|
||||||
|
return formatted_date
|
||||||
|
|
||||||
|
def extract_between_strings(main_string, string1, string2):
|
||||||
|
start_index = main_string.find(string1)
|
||||||
|
end_index = main_string.find(string2)
|
||||||
|
|
||||||
|
# Check if both strings are found in the main string
|
||||||
|
if start_index != -1 and end_index != -1:
|
||||||
|
# Extract the characters between string1 and string2
|
||||||
|
extracted_text = main_string[start_index + len(string1):end_index]
|
||||||
|
return extracted_text
|
||||||
|
else:
|
||||||
|
# If either string1 or string2 is not found, return None or an empty string
|
||||||
|
return None
|
||||||
|
|
||||||
|
def increaseDateDay(date:datetime.date):
|
||||||
|
return date + datetime.timedelta(days=1)
|
||||||
|
|
||||||
|
|
||||||
|
def do_dates(html, date:datetime.date):
|
||||||
|
result = []
|
||||||
|
this_date = formatDate(date)
|
||||||
|
this_string = None
|
||||||
|
this_this_date = None
|
||||||
|
for i in range(0, 46):
|
||||||
|
this_date = formatDate(date)
|
||||||
|
date = increaseDateDay(date)
|
||||||
|
date_str = formatDate(date)
|
||||||
|
extract = extract_between_strings(html, this_date, date_str)
|
||||||
|
if this_string:
|
||||||
|
extract = extract_between_strings(html, this_string, date_str)
|
||||||
|
if not extract:
|
||||||
|
this_string = this_date
|
||||||
|
this_this_date = date
|
||||||
|
continue
|
||||||
|
this_string = None
|
||||||
|
#print("---------------------------------------------------")
|
||||||
|
#print(this_date)
|
||||||
|
this_this_date = date if this_this_date == None else this_this_date
|
||||||
|
result += stripText(extract, this_this_date)
|
||||||
|
this_this_date = None
|
||||||
|
return result
|
||||||
|
|
||||||
|
result = do_dates(stripText(html), datetime.date(2020, 2, 15))
|
||||||
|
for i in result:
|
||||||
|
print(i)
|
||||||
|
print(len(result))
|
||||||
|
|
||||||
|
#save output to file .working/kprofiles/march-2020-comebacks-debuts-releases.txt
|
||||||
|
with open(os.path.join(WORKING_DIR, "kprofiles", "may-2020-comebacks-debuts-releases.txt"), "w") as f:
|
||||||
|
f.write(stripText(html))
|
||||||
Loading…
Add table
Add a link
Reference in a new issue