From 7e693d0cda7a799812665b6470c149d745497542 Mon Sep 17 00:00:00 2001 From: Jonas_Jones <91549607+J-onasJones@users.noreply.github.com> Date: Wed, 1 Nov 2023 21:44:37 +0100 Subject: [PATCH] added some stuff --- kprofilesfetch.py | 111 +++++++++++++++++++++++++++++++++++ sound-font.py | 49 ++++++++++++++++ stripText.py | 146 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 306 insertions(+) create mode 100644 kprofilesfetch.py create mode 100644 sound-font.py create mode 100644 stripText.py diff --git a/kprofilesfetch.py b/kprofilesfetch.py new file mode 100644 index 0000000..e1f2dd7 --- /dev/null +++ b/kprofilesfetch.py @@ -0,0 +1,111 @@ +import datetime, requests, dotenv, os, sys +import top_lib + +dotenv.load_dotenv() + +WORKING_DIR = os.getenv('WORKING_DIR') + + +def getLinks(): + links = [] + + # Starting month and year + start_date = datetime.date(2020, 3, 1) + + # End month and year + end_date = datetime.date.today().replace(day=1) + end_date = end_date.replace(month=end_date.month + 1) if end_date.month != 12 else end_date.replace(year=end_date.year + 1, month=1) + current_date = start_date + while current_date <= end_date: + # Construct the URL based on the current month and year + links.append(f"https://kprofiles.com/{current_date.strftime('%B').lower()}-{current_date.year}-comebacks-debuts-releases/") + + # Move to the next month + if current_date.month == 12: + current_date = current_date.replace(year=current_date.year + 1, month=1) + else: + current_date = current_date.replace(month=current_date.month + 1) + + return links + +def checkLinkExtensions(link, comeback_compilation): + if link in comeback_compilation: + return link + elif link.replace("-debuts-releases", "") in comeback_compilation: + return link.replace("-debuts-releases", "") + elif link.replace("-comebacks-debuts-releases", "") in comeback_compilation: + return link.replace("-comebacks-debuts-releases", "") + elif link.replace("-comebacks-debuts-releases", "-kpop") in comeback_compilation: + return link.replace("-comebacks-debuts-releases", "-kpop") + elif link[:-1] + "-2/" in comeback_compilation: + return link[:-1] + "-2/" # WHY IS OCTOBER 2020 THE ONLY MONTH WITH A -2 + elif link.replace("-comebacks-debuts-releases", "-kpop-comebacks-debuts-releases") in comeback_compilation: + return link.replace("-comebacks-debuts-releases", "-kpop-comebacks-debuts-releases") + elif link.replace("-comebacks-debuts-releases", "-kpop-comebacks") in comeback_compilation: + return link.replace("-comebacks-debuts-releases", "-kpop-comebacks") + print("Link not found: " + link) + + +def filterValidLinks(links): + # valid_links = [] + # for link in links: + # if requests.get(link).status_code == 200: + # valid_links.append(link) + # else: + # print(requests.get(link).status_code) + # return valid_links + valid_links = [] + compilation_link = "https://kprofiles.com/comebacks/page/" + comeback_compilation = "" + for i in range(1, 100): + request = requests.get(compilation_link + str(i)) + if request.status_code == 200: + comeback_compilation += request.text + else: + break + + for link in links: + is_valid = checkLinkExtensions(link, comeback_compilation) + if is_valid: + valid_links.append(is_valid) + + return valid_links + +def fetchSite(link): + #check if file already exists + if os.path.isfile(WORKING_DIR + "/kprofiles/" + link.split("/")[-2] + ".html") and not FORCE_REFRESH: + # read from file + with open(WORKING_DIR + "/kprofiles/" + link.split("/")[-2] + ".html", "r") as file: + return file.read() + request = requests.get(link) + if request.status_code == 200: + # save to file + with open(WORKING_DIR + "/kprofiles/" + link.split("/")[-2] + ".html", "w") as file: + file.write(request.text) + return request.text + +def fetchHandler(links): + data = [] + bar = top_lib.Progressbar(total=len(links)) + bar.print(0) + try: + os.makedirs(WORKING_DIR + "/kprofiles/", exist_ok=True) + except OSError: + OSError("Creation of the directory %s failed" % WORKING_DIR + "/kprofiles/") + for link in links: + data.append(fetchSite(link)) + bar.print(links.index(link) + 1) + return data + +def stripText(): + pass + + +if __name__ == '__main__': + # launch args + FORCE_REFRESH = True if "-f" in sys.argv else False + + print("Fetching kprofiles.com... (This may take a while, kprofiles is slow...)") + links = getLinks() + valid_links = filterValidLinks(links) + data = fetchHandler(valid_links) diff --git a/sound-font.py b/sound-font.py new file mode 100644 index 0000000..0c63992 --- /dev/null +++ b/sound-font.py @@ -0,0 +1,49 @@ +from pydub import AudioSegment +import fluidsynth +import os + +# Load the MP3 file +input_file = "/home/jonas_jones/Downloads/apple-crunch.mp3" +audio = AudioSegment.from_mp3(input_file) + +# Define the piano pitch range (from C1 to C7) +piano_keys = 88 # 88 keys on a piano +pitch_range = list(range(1, piano_keys + 1)) + +# Create a temporary directory to store individual pitch-shifted audio files +temp_dir = "temp_audio" +os.makedirs(temp_dir, exist_ok=True) + +# Export and pitch-shift the audio in different pitches +for pitch in pitch_range: + # Calculate the ratio for pitch shift (12 semitones = 1 octave) + semitone_ratio = 2 ** (pitch / 12.0) + # Shift the pitch + shifted_audio = audio._spawn(audio.raw_data, overrides={ + "frame_rate": int(audio.frame_rate * semitone_ratio) + }) + # Export the shifted audio + output_file = os.path.join(temp_dir, f"output_pitch_{pitch}.wav") + shifted_audio.export(output_file, format="wav") + +print("Audio exported in different pitches.") + +# Create an empty SoundFont +soundfont = fluidsynth.SoundFont() + +# Load the pitch-shifted audio files into the SoundFont +for pitch in pitch_range: + audio_file = os.path.join(temp_dir, f"output_pitch_{pitch}.wav") + soundfont.add_sample(audio_file, preset=0, note=pitch) + +# Save the SoundFont to a file +soundfont_file = "output_soundfont.sf2" +soundfont.write_to_file(soundfont_file) + +print(f"SoundFont '{soundfont_file}' created.") + +# Clean up: Delete temporary audio files and directory +for pitch in pitch_range: + audio_file = os.path.join(temp_dir, f"output_pitch_{pitch}.wav") + os.remove(audio_file) +os.rmdir(temp_dir) diff --git a/stripText.py b/stripText.py new file mode 100644 index 0000000..feb2335 --- /dev/null +++ b/stripText.py @@ -0,0 +1,146 @@ +import dotenv, os, re, datetime +import html as html_lib + +dotenv.load_dotenv() + +# Load the environment variables +WORKING_DIR = os.getenv('WORKING_DIR') + +# Read file .working/kprofiles/march-2020-comebacks-debuts-releases.html +with open(os.path.join(WORKING_DIR, "kprofiles", "march-2020-comebacks-debuts-releases.html"), "r") as f: + html = f.read() + +def stripText(html, date:datetime.date=None): + # remove the script and style sections + script_pattern = re.compile('', re.DOTALL) + style_pattern = re.compile('', re.DOTALL) + text = re.sub(script_pattern, "", html) + text = re.sub(style_pattern, "", text) + text = html_lib.unescape(text) + if html.startswith(""): + return text + lines = text.split("•") + + if date: + result = [] + else: + result = "" + + for line in lines: + print(line) + print(lines) + line = line.replace("", "").replace("", "").replace("", "").replace("", "").replace("
", "") + if "[Comeback]" in line: + line = line.split("[Comeback]")[0] + "[Comeback]" + elif "[Debut]" in line: + line = line.split("[Debut]")[0] + "[Debut]" + elif "[Release]" in line: + line = line.split("[Release]")[0] + "[Release]" + elif "[Solo Debut]" in line: + line = line.split("[Solo Debut]")[0] + "[Debut]" + elif "[Solo Release]" in line: + line = line.split("[Solo Release]")[0] + "[Release]" + elif "[Pre-Debut Release]" in line: + line = line.split("[Pre-Debut Release]")[0] + "[Pre-Debut Release]" + elif "[Pre-Single Release]" in line: + line = line.split("[Pre-Single Release]")[0] + "[Pre-Debut Release]" + elif "[Japanese Comeback]" in line: + line = line.split("[Japanese Comeback]")[0] + "[Japanese Comeback]" + elif "[Japanese Debut]" in line: + line = line.split("[Japanese Debut]")[0] + "[Japanese Debut]" + elif "[Project Release]" in line: + line = line.split("[Project Release]")[0] + "[Release]" + elif "[Pre-Release Single]" in line: + line = line.split("[Pre-Release Single]")[0] + "[Pre-Release]" + elif "[Comeback Single]" in line: + line = line.split("[Comeback Single]")[0] + "[Comeback]" + elif "[Collab Release]" in line: + line = line.split("[Collab Release]")[0] + "[Release]" + elif "[Comeback Full Album]" in line: + line = line.split("[Comeback Full Album]")[0] + "[Comeback]" + elif "[Special Release]" in line: + line = line.split("[Special Release]")[0] + "[Release]" + elif "[Collab]" in line: + line = line.split("[Collab]")[0] + "[Release]" + elif "[Mixtape]" in line: + line = line.split("[Mixtape]")[0] + "[Mixtape]" + elif "[Japan Release]" in line: + line = line.split("[Japan Release]")[0] + "[Japanese Release]" + elif "[Single Release]" in line: + line = line.split("[Single Release]")[0] + "[Release]" + line = line.replace("\n", "").replace("‘", "").replace("’", "") + if date and not line == "" and not line == " ": + print(line) + artist_title = line.split("[")[0].strip() + artist = artist_title.split("|")[0].strip() + title = artist_title.split("|")[1].strip().replace("‘", "").replace("’", "") + release_type = line.split("[")[1].split("]")[0].strip() + line = (str(date), artist, title, release_type) + result.append(line) + for i in ["\n", " ", ""]: + try: + result.remove(i) + except ValueError: + pass + else: + result += line + "\n" + return result + +def formatDate(date:datetime.date): + formatted_date = "{} {}".format(date.strftime("%B"), date.day) + if 10 <= date.day % 100 <= 20: + suffix = 'th' + else: + suffix = {1: 'st', 2: 'nd', 3: 'rd'}.get(date.day % 10, 'th') + formatted_date += suffix + return formatted_date + +def extract_between_strings(main_string, string1, string2): + start_index = main_string.find(string1) + end_index = main_string.find(string2) + + # Check if both strings are found in the main string + if start_index != -1 and end_index != -1: + # Extract the characters between string1 and string2 + extracted_text = main_string[start_index + len(string1):end_index] + return extracted_text + else: + # If either string1 or string2 is not found, return None or an empty string + return None + +def increaseDateDay(date:datetime.date): + return date + datetime.timedelta(days=1) + + +def do_dates(html, date:datetime.date): + result = [] + this_date = formatDate(date) + this_string = None + this_this_date = None + for i in range(0, 46): + this_date = formatDate(date) + date = increaseDateDay(date) + date_str = formatDate(date) + extract = extract_between_strings(html, this_date, date_str) + if this_string: + extract = extract_between_strings(html, this_string, date_str) + if not extract: + this_string = this_date + this_this_date = date + continue + this_string = None + #print("---------------------------------------------------") + #print(this_date) + this_this_date = date if this_this_date == None else this_this_date + result += stripText(extract, this_this_date) + this_this_date = None + return result + +result = do_dates(stripText(html), datetime.date(2020, 2, 15)) +for i in result: + print(i) +print(len(result)) + +#save output to file .working/kprofiles/march-2020-comebacks-debuts-releases.txt +with open(os.path.join(WORKING_DIR, "kprofiles", "may-2020-comebacks-debuts-releases.txt"), "w") as f: + f.write(stripText(html))