mirror of
				https://github.com/JonasunderscoreJones/turbo-octo-potato.git
				synced 2025-10-26 02:29:18 +01:00 
			
		
		
		
	added some stuff
This commit is contained in:
		
							parent
							
								
									7b3577206f
								
							
						
					
					
						commit
						7e693d0cda
					
				
					 3 changed files with 306 additions and 0 deletions
				
			
		
							
								
								
									
										111
									
								
								kprofilesfetch.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										111
									
								
								kprofilesfetch.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,111 @@ | |||
| import datetime, requests, dotenv, os, sys | ||||
| import top_lib | ||||
| 
 | ||||
| dotenv.load_dotenv() | ||||
| 
 | ||||
| WORKING_DIR = os.getenv('WORKING_DIR') | ||||
| 
 | ||||
| 
 | ||||
| def getLinks(): | ||||
|     links = [] | ||||
| 
 | ||||
|     # Starting month and year | ||||
|     start_date = datetime.date(2020, 3, 1) | ||||
| 
 | ||||
|     # End month and year | ||||
|     end_date = datetime.date.today().replace(day=1) | ||||
|     end_date = end_date.replace(month=end_date.month + 1) if end_date.month != 12 else end_date.replace(year=end_date.year + 1, month=1) | ||||
|     current_date = start_date | ||||
|     while current_date <= end_date: | ||||
|         # Construct the URL based on the current month and year | ||||
|         links.append(f"https://kprofiles.com/{current_date.strftime('%B').lower()}-{current_date.year}-comebacks-debuts-releases/") | ||||
|          | ||||
|         # Move to the next month | ||||
|         if current_date.month == 12: | ||||
|             current_date = current_date.replace(year=current_date.year + 1, month=1) | ||||
|         else: | ||||
|             current_date = current_date.replace(month=current_date.month + 1) | ||||
|          | ||||
|     return links | ||||
| 
 | ||||
| def checkLinkExtensions(link, comeback_compilation): | ||||
|     if link in comeback_compilation: | ||||
|         return link | ||||
|     elif link.replace("-debuts-releases", "") in comeback_compilation: | ||||
|         return link.replace("-debuts-releases", "") | ||||
|     elif link.replace("-comebacks-debuts-releases", "") in comeback_compilation: | ||||
|         return link.replace("-comebacks-debuts-releases", "") | ||||
|     elif link.replace("-comebacks-debuts-releases", "-kpop") in comeback_compilation: | ||||
|         return link.replace("-comebacks-debuts-releases", "-kpop") | ||||
|     elif link[:-1] + "-2/" in comeback_compilation: | ||||
|         return link[:-1] + "-2/" # WHY IS OCTOBER 2020 THE ONLY MONTH WITH A -2 | ||||
|     elif link.replace("-comebacks-debuts-releases", "-kpop-comebacks-debuts-releases") in comeback_compilation: | ||||
|         return link.replace("-comebacks-debuts-releases", "-kpop-comebacks-debuts-releases") | ||||
|     elif link.replace("-comebacks-debuts-releases", "-kpop-comebacks") in comeback_compilation: | ||||
|         return link.replace("-comebacks-debuts-releases", "-kpop-comebacks") | ||||
|     print("Link not found: " + link) | ||||
|      | ||||
| 
 | ||||
| def filterValidLinks(links): | ||||
|     # valid_links = [] | ||||
|     # for link in links: | ||||
|     #     if requests.get(link).status_code == 200: | ||||
|     #         valid_links.append(link) | ||||
|     #     else: | ||||
|     #         print(requests.get(link).status_code) | ||||
|     # return valid_links | ||||
|     valid_links = [] | ||||
|     compilation_link = "https://kprofiles.com/comebacks/page/" | ||||
|     comeback_compilation = "" | ||||
|     for i in range(1, 100): | ||||
|         request = requests.get(compilation_link + str(i)) | ||||
|         if request.status_code == 200: | ||||
|             comeback_compilation += request.text | ||||
|         else: | ||||
|             break | ||||
|      | ||||
|     for link in links: | ||||
|         is_valid = checkLinkExtensions(link, comeback_compilation) | ||||
|         if is_valid: | ||||
|             valid_links.append(is_valid) | ||||
|      | ||||
|     return valid_links | ||||
| 
 | ||||
| def fetchSite(link): | ||||
|     #check if file already exists | ||||
|     if os.path.isfile(WORKING_DIR + "/kprofiles/" + link.split("/")[-2] + ".html") and not FORCE_REFRESH: | ||||
|         # read from file | ||||
|         with open(WORKING_DIR + "/kprofiles/" + link.split("/")[-2] + ".html", "r") as file: | ||||
|             return file.read() | ||||
|     request = requests.get(link) | ||||
|     if request.status_code == 200: | ||||
|         # save to file | ||||
|         with open(WORKING_DIR + "/kprofiles/" + link.split("/")[-2] + ".html", "w") as file: | ||||
|             file.write(request.text) | ||||
|         return request.text | ||||
| 
 | ||||
| def fetchHandler(links): | ||||
|     data = [] | ||||
|     bar = top_lib.Progressbar(total=len(links)) | ||||
|     bar.print(0) | ||||
|     try: | ||||
|         os.makedirs(WORKING_DIR + "/kprofiles/", exist_ok=True) | ||||
|     except OSError: | ||||
|         OSError("Creation of the directory %s failed" % WORKING_DIR + "/kprofiles/") | ||||
|     for link in links: | ||||
|         data.append(fetchSite(link)) | ||||
|         bar.print(links.index(link) + 1) | ||||
|     return data | ||||
| 
 | ||||
| def stripText(): | ||||
|     pass | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == '__main__': | ||||
|     # launch args | ||||
|     FORCE_REFRESH = True if "-f" in sys.argv else False | ||||
| 
 | ||||
|     print("Fetching kprofiles.com... (This may take a while, kprofiles is slow...)") | ||||
|     links = getLinks() | ||||
|     valid_links = filterValidLinks(links) | ||||
|     data = fetchHandler(valid_links) | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue