diff --git a/src/main/resources/members_of_parliament_image_crawler.py b/src/main/resources/members_of_parliament_image_crawler.py new file mode 100644 index 0000000..9bb2340 --- /dev/null +++ b/src/main/resources/members_of_parliament_image_crawler.py @@ -0,0 +1,846 @@ +import time +import requests +from selenium import webdriver +from selenium.webdriver.chrome.service import Service +from selenium.webdriver.chrome.options import Options +from selenium.webdriver.common.by import By +import os + +members = [ + "Gysi Gregor", + "Hüppe Hubert", + "Kubicki Wolfgang", + "Ramsauer Peter", + "Gröhe Hermann", + "Grund Manfred", + "Laschet Armin", + "Lemke Steffi", + "Meister Michael", + "Merz Friedrich", + "Özdemir Cem", + "Rachel Thomas", + "Röttgen Norbert", + "Straubinger Max", + "Bartsch Dietmar", + "Deligöz Ekin", + "Friedrich Hans-Peter", + "Göring-Eckardt Katrin Dagmar", + "Heil Hubertus", + "Kramme Anette", + "Nietan Dietmar", + "Pau Petra", + "Roth Claudia", + "Roth Michael", + "Schneider Carsten", + "Scholz Olaf", + "Trittin Jürgen", + "Widmann-Mauz Annette", + "Willsch Klaus-Peter", + "Bartol Sören", + "Braun Helge", + "Connemann Gitta", + "Dobrindt Alexander", + "Ferlemann Enak", + "Fricke Otto", + "Grosse-Brömer Michael", + "Grübel Markus", + "Gutting Olav", + "Hagedorn Bettina", + "Hennrich Michael", + "Klöckner Julia", + "Krichbaum Gunther", + "Künast Renate", + "Kurth Markus", + "Lips Patricia", + "Lötzsch Gesine", + "Lührmann Anna", + "Mantel Dorothee", + "Mayer Stephan", + "Mützenich Rolf", + "Raab Daniela", + "Rupprecht Albert", + "Schäfer Axel", + "Scheuer Andreas", + "Silberhorn Thomas", + "Spahn Jens", + "Stetten Christian", + "Storjohann Gero", + "Tillmann Antje", + "Wanderwitz Marco", + "Wellenreuther Ingo", + "Wissing Volker", + "Auernhammer Artur", + "Kofler Bärbel", + "Klingbeil Lars", + "Annen Niels", + "Bareiß Thomas", + "Brand Michael", + "Dağdelen Sevim", + "Ernst Klaus", + "Gehring Kai", + "Gerster Martin", + "Grütters Monika", + "Haßelmann Britta", + "Hofreiter Anton", + "Jung Andreas", + "Juratovic Josip", + "Kipping Katja", + "Koeppen Jens", + "Korte Jan", + "Lauterbach Karl", + "Lehrieder Paul", + "Link Michael", + "Mast Katja", + "Miersch Matthias", + "Müller Carsten", + "Müller Detlef", + "Otte Henning", + "Rix Sönke", + "Schäffler Frank", + "Schieder Marianne", + "Schwabe Frank", + "Schwarzelühr-Sutter Rita", + "Toncar Florian", + "Ulrich Alexander", + "Winkelmeier-Becker Elisabeth", + "Nouripour Omid", + "Strengmann-Kuhn Wolfgang", + "Hirte Christian", + "Aschenberg-Dugnus Christine", + "Aumer Peter", + "Bas Bärbel", + "Beyer Peter", + "Bilger Steffen", + "Birkwald Matthias W.", + "Brandl Reinhard", + "Brehmer Heike", + "Brinkhaus Ralph", + "Buschmann Marco", + "Djir-Sarai Bijan", + "Franke Edgar", + "Frieser Michael", + "Gädechens Ingo", + "Gebhart Thomas", + "Gerdes Michael", + "Gohlke Nicole", + "Hahn Florian", + "Hardt Jürgen", + "Heil Mechthild", + "Heveling Ansgar", + "Höferlin Manuel", + "Hunko Andrej", + "Jarzombek Thomas", + "Kaufmann Stefan", + "Keul Katja", + "Kiesewetter Roderich", + "Kindler Sven-Christian", + "Klein Volkmar", + "Klein-Schmeink Maria", + "Knoerig Axel", + "Kober Pascal", + "Krischer Oliver", + "Lange Ulrich", + "Lay Caren", + "Lenkert Ralph", + "Lindemann Lars", + "Lindner Christian", + "Linnemann Carsten", + "Luczak Jan-Marco", + "Luksic Oliver", + "Lutze Thomas", + "Malczak Agnes", + "Mattfeldt Andreas", + "Middelberg Mathias", + "Möhring Cornelia", + "Monstadt Dietrich", + "Müller Nadine", + "Müller-Gemmeke Beate", + "Nestle Ingrid", + "Notz Konstantin", + "Özoğuz Aydan", + "Paus Lisa", + "Rief Josef", + "Rüddel Erwin", + "Schnieder Patrick", + "Schwartze Stefan", + "Seif Detlef", + "Stier Dieter", + "Stracke Stephan", + "Thomae Stephan", + "Vogel Johannes", + "Vogler Kathrin", + "Wadephul Johann", + "Wagenknecht Sahra", + "Weiss Sabine", + "Ebner Harald", + "Lindner Tobias", + "Walter-Rosenheimer Beate", + "Todtenhausen Manfred", + "Hellmich Wolfgang", + "Reinhold Hagen", + "Albani Stephan", + "Amtsberg Luise", + "Baehrens Heike", + "Baerbock Annalena", + "Bahr Ulrike", + "Berghegger André", + "Brantner Franziska", + "Castellucci Lars", + "Daldrup Bernhard", + "Diaby Karamba", + "Dittmar Sabine", + "Donth Michael", + "Dröge Katharina", + "Durz Hansjörg", + "Esken Saskia", + "Färber Hermann", + "Fechner Johannes", + "Feiler Uwe", + "Frei Thorsten", + "Gastel Matthias", + "Grötsch Uli", + "Grundmann Oliver", + "Güntzler Fritz", + "Haase Christian", + "Hagl-Kehl Rita", + "Hahn André", + "Hakverdi Metin", + "Hartmann Sebastian", + "Hauer Matthias", + "Heck Stefan", + "Heidenblut Dirk", + "Heinrich Gabriela", + "Helfrich Mark", + "Hitschler Thomas", + "Hoffmann Alexander", + "Hoppenstedt Hendrik", + "Irlstorfer Erich", + "Janecek Dieter", + "Junge Frank", + "Karliczek Anja", + "Kippels Georg", + "Kiziltepe Cansel", + "Koob Markus", + "Körber Carsten", + "Kühn Christian", + "Launert Silke", + "Leikert Katja", + "Lenz Andreas", + "Lindholz Andrea", + "Magwas Yvonne", + "Metzler Jan", + "Mihalic Irene", + "Mittag Susanne", + "Müller Bettina", + "Müntefering Michelle", + "Oellers Wilfried", + "Oßner Florian", + "Özdemir Mahmut", + "Pahlmann Ingrid", + "Poschmann Sabine", + "Post Achim", + "Rabanus Martin", + "Radomski Kerstin", + "Radwan Alexander", + "Rainer Alois", + "Renner Martina", + "De Ridder Daniela", + "Rohde Dennis", + "Rosemann Martin", + "Rüffer Corinna", + "Rützel Bernd", + "Schauws Ulle", + "Scheer Nina", + "Schiefner Udo", + "Schimke Jana", + "Schmidt Dagmar", + "Schulz-Asche Kordula", + "Schwarz Andreas", + "Sorge Tino", + "Stadler Svenja", + "Stamm-Fibich Martina", + "Stefinger Wolfgang", + "Stegemann Albert", + "Steiniger Johannes", + "Tausend Claudia", + "Thews Michael", + "Timmermann-Fechter Astrid", + "Träger Carsten", + "Ullrich Volker", + "Verlinden Julia", + "Vöpel Dirk", + "Warken Nina", + "Weisgerber Anja", + "Westphal Bernd", + "Whittaker Kai", + "Wiese Dirk", + "Yüksel Gülistan", + "Zeulner Emmi", + "Zierke Stefan", + "Zimmermann Jens", + "Petry Christian", + "Glöckner Angelika", + "Schmitt Ronja", + "Ryglewski Sarah", + "Coße Jürgen", + "Uhl Markus", + "Akbulut Gökay", + "Alt Renata", + "Altenkamp Norbert Maria", + "Amthor Philipp", + "Badum Lisa", + "Bauer Nicole", + "Baumann Bernd", + "Bayram Canan", + "Beeck Jens", + "In der Beek Olaf", + "Bernhard Marc", + "Bernstein Melanie", + "Biadacz Marc", + "Bleck Andreas", + "Boehringer Peter", + "Brandenburg Jens", + "Brandenburg Mario", + "Brandner Stephan", + "Braun Jürgen", + "Breher Silvia", + "Brehm Sebastian", + "Breymaier Leni", + "Brodesser Carsten", + "Budde Katrin", + "Bühl Marcus", + "Busen Karlheinz", + "Bystron Petr", + "Cezanne Jörg", + "Christmann Anna", + "Chrupalla Tino", + "Cotar Joana", + "Cronenberg Carl-Julius", + "Curio Gottfried", + "Damerow Astrid", + "Dilcher Esther", + "Domscheit-Berg Anke", + "Dürr Christian", + "Ehrhorn Thomas", + "Erndl Thomas", + "Esdar Wiebke", + "Espendiller Michael", + "Faber Marcus", + "Fahimi Yasmin", + "Felser Peter", + "Ferschl Susanne", + "Föst Daniel", + "Friedhoff Dietmar", + "Frohnmaier Markus", + "Frömming Götz", + "Gauland Alexander", + "Gelbhaar Stefan", + "Glaser Albrecht", + "Gottschalk Kay", + "Gremmels Timon", + "Grundl Erhard", + "Hacker Thomas", + "Harder-Kühnel Mariana Iris", + "Haug Jochen", + "Heilmann Thomas", + "Helling-Plahr Katrin", + "Herbrand Markus", + "Herbst Torsten", + "Hess Martin", + "Hessel Katja", + "Hilse Karsten", + "Höchst Nicole", + "Hocker Gero Clemens", + "Hoffmann Bettina", + "Hoffmann Christoph", + "Holm Leif-Erik", + "Holtz Ottmar", + "Houben Reinhard", + "Huber Johannes", + "Jacobi Fabian", + "Jongen Marc", + "Jung Ingmar", + "Kaiser Elisabeth", + "Kappert-Gonther Kirsten", + "Keuter Stefan", + "Kießling Michael", + "Klein Karsten", + "Kleinwächter Norbert", + "Kloke Katharina", + "Kluckert Daniela", + "Köhler Lukas", + "Komning Enrico", + "König Jörn", + "Konrad Carina", + "Kotré Steffen", + "Kraft Rainer", + "Kuhle Konstantin", + "Lambsdorff Alexander", + "Lechte Ulrich", + "Lehmann Jens", + "Lehmann Sven", + "Lindh Helge", + "Loos Bernhard", + "Lucassen Rüdiger", + "Maas Heiko", + "Mannes Astrid", + "Meiser Pascal", + "Meyer Christoph", + "Miazga Corinna", + "Mohamed Ali Amira", + "Mohrs Falko", + "Moll Claudia", + "Möller Siemtje", + "Müller Alexander", + "Müller Axel", + "Müller Claudia", + "Müller Sepp", + "Müller-Rosentritt Frank", + "Münzenmaier Sebastian", + "Nastic Zaklin", + "Nicolaisen Petra", + "Nolte Jan Ralf", + "Ortleb Josephine", + "Oster Josef", + "Otten Gerold", + "Pellmann Sören", + "Perli Victor", + "Peterka Tobias Matthias", + "Pilsinger Stephan", + "Ploß Christoph", + "Pohl Jürgen", + "Polat Filiz", + "Protschka Stephan", + "Reichardt Martin", + "Renner Martin Erwin", + "Reuther Bernd", + "Riexinger Bernd", + "Rottmann Manuela", + "Rouenhoff Stefan", + "Sattelberger Thomas", + "Sauter Christian", + "Schielke-Ziesing Ulrike", + "Schmid Nils", + "Schmidt Stefan", + "Schmidt Uwe", + "Schneider Jörg", + "Schraps Johannes", + "Schreiner Felix", + "Schrodi Michael", + "Schulz Uwe", + "Seestern-Pauly Matthias", + "Seitz Thomas", + "Sichert Martin", + "Simon Björn", + "Spaniel Dirk", + "Springer René", + "Staffler Katrin", + "Stark-Watzinger Bettina", + "Stein Mathias", + "Storch Beatrix", + "Strack-Zimmermann Marie-Agnes", + "Strasser Benjamin", + "Tatti Jessica", + "Tebroke Hermann-Josef", + "Teuteberg Linda", + "Theurer Michael", + "Thies Hans-Jürgen", + "Throm Alexander", + "Töns Markus", + "Ullmann Andrew", + "Ullrich Gerald", + "Vieregge Kerstin", + "Vries Christoph", + "Weeser Sandra", + "Weidel Alice", + "Westig Nicole", + "Weyel Harald", + "Wiehle Wolfgang", + "Wiesmann Bettina Margarethe", + "Wirth Christian", + "Witt Uwe", + "Ziemiak Paul", + "Jensen Gyde", + "Völlers Marja-Liisa", + "Baradari Nezahat", + "Heidt Peter", + "Mackensen Isabel", + "Weingarten Joe", + "Hanke Reginald", + "Lehmann Sylvia", + "Bubendorfer-Licht Sandra", + "Martin Dorothee", + "Dahmen Janosch", + "Emmerich Marcel", + "Abdi Sanae", + "Abel Valentin", + "Abraham Knut", + "Adler Katja", + "Aeffner Stephanie", + "Ahmetovic Adis", + "Alabali-Radovan Reem", + "Al-Dailami Ali", + "Al-Halak Muhanad", + "Andres Dagmar", + "Arlt Johannes", + "Audretsch Andreas", + "Außendorf Maik", + "Bacherle Tobias", + "Bachmann Carolin", + "Baldy Daniel", + "Banaszak Felix", + "Bär Karl", + "Baum Christina", + "Beck Katharina", + "Beckamp Roger", + "Becker Holger", + "Benner Lukas", + "Berghahn Jürgen", + "Bergt Bengt", + "Blankenburg Jakob", + "Bochmann René", + "Bodtke Ingo", + "Boginski Friedhelm", + "Bollmann Gereon", + "Borchardt Simone", + "Brandes Dirk", + "Breilmann Michael", + "Bröhr Marlon", + "Bsirske Frank", + "Bury Yannick", + "Cademartori Dujisin Isabel", + "Czaja Mario", + "Demir Hakan", + "Detzer Sandra", + "Diedenhofen Martin", + "Dieren Jan", + "Dietz Thomas", + "Döring Felix", + "Droßmann Falko", + "Düring Deborah", + "Echeverria Axel", + "Eckert Leon", + "Edelhäußer Ralph", + "Eichwede Sonja", + "Engelhard Alexander", + "Engelhardt Heike", + "Englhardt-Kopf Martina", + "Farle Robert", + "Fäscher Ariane", + "Fester Emilia", + "Fiedler Sebastian", + "Funke Fabian", + "Funke-Kaiser Maximilian", + "Gambir Schahina", + "Ganserer Tessa", + "Gassner-Herz Martin", + "Gava Manuel", + "Geissler Jonas", + "Gerschau Knut", + "Gesenhues Jan-Niclas", + "Gnauck Hannes", + "Görke Christian", + "Gramling Fabian", + "Gräßle Ingeborg", + "Grau Armin", + "Grützmacher Sabine", + "Güler Serap", + "Gürpinar Ates", + "Habeck Robert", + "Hartewig Philipp", + "Harzer Ulrike", + "Heiligenstadt Frauke", + "Heitmann Linda", + "Helferich Matthias", + "Henneberger Kathrin", + "Hennig Anke", + "Hennig-Wellsow Susanne", + "Herrmann Bernhard", + "Heselhaus Nadine", + "Hierl Susanne", + "Hönel Bruno", + "Hoppermann Franziska", + "Hostert Jasmina", + "Hubertz Verena", + "Hümpfer Markus", + "Huy Gerrit", + "Janich Steffen", + "Janssen Anne", + "Jurisch Ann-Veruschka", + "Kaddor Lamya", + "Karaahmetoğlu Macit", + "Kasper Carlos", + "Kassautzki Anna", + "Kaufmann Malte", + "Kaufmann Michael", + "Keller Rainer Johannes", + "Kellner Michael", + "Kersten Franziska", + "Khan Misbah", + "Kleebank Helmut", + "Klein Ottilie", + "Klinck Kristian", + "Klose Annika", + "Klüssendorf Tim", + "König Anne", + "Kopf Chantal", + "Koß Simona", + "Kraft Laura", + "Krämer Philip", + "Kreiser Dunja", + "Kröber Martin", + "Kruse Michael", + "Kuban Tilman", + "Kühnert Kevin", + "Lahrkamp Sarah", + "Lang Ricarda", + "Larem Andreas", + "Latendorf Ina", + "Leiser Kevin", + "Lenders Jürgen", + "Lenk Barbara", + "Leye Christian", + "Licina-Bode Luiza", + "Lieb Thorsten", + "Liebert Anja", + "Limbacher Esra", + "Limburg Helge", + "Loop Denise", + "Lucks Max", + "Lugk Bettina", + "Lütke Kristine", + "Machalet Tanja", + "Mack Klaus", + "Malottki Erik", + "Mann Holger", + "Mansoori Kaweh", + "Martens Zanda", + "Marvi Parsa", + "Mascheck Franziska", + "Mayer Zoe", + "Mayer-Lay Volker", + "Mehltretter Andreas", + "Mehmet Ali Takis", + "Menge Susanne", + "Merten Anikó", + "Mesarosch Robin", + "Michaelsen Swantje Henrike", + "Michel Kathrin", + "Mieves Matthias David", + "Mijatović Boris", + "Moncsek Mike", + "Moosdorf Matthias", + "Mordhorst Maximilian", + "Mörseburg Maximilian", + "Müller Florian", + "Müller Michael", + "Müller Sascha", + "Nacke Stefan", + "Nanni Sara", + "Nasr Rasha", + "Naujok Edgar", + "Nick Ophelia", + "Nickholz Brian", + "Nürnberger Jörg", + "Oehl Lennard", + "Oppelt Moritz", + "Otte Karoline", + "Pahlke Julian", + "Pantazis Christos", + "Papenbrock Wiebke", + "Papendieck Mathias", + "Pawlik Natalie", + "Peick Jens", + "Philippi Andreas", + "Piechotta Paula", + "Plobner Jan", + "Plum Martin", + "Redder Volker", + "Rehbaum Henning", + "Reichel Markus", + "Reichinnek Heidi", + "Reinalter Anja", + "Rhie Ye-One", + "Rinck Frank", + "Rohwer Lars", + "Roloff Sebastian", + "Rosenthal Jessica", + "Röwekamp Thomas", + "Rudolph Thorsten", + "Rudolph Tina", + "Saleh Kassem Taher", + "Santos Firnhaber Catarina", + "Schäfer Ingo", + "Schäfer Jamila", + "Schäfer Sebastian", + "Schamber Rebecca", + "Schattner Bernd", + "Schätzl Johannes", + "Schenderlein Christiane", + "Schierenbeck Peggy", + "Schisanowski Timo", + "Schmid Christoph", + "Schmidt Eugen", + "Schmidt Jan Wenzel", + "Schneider Daniel", + "Schönberger Marlene", + "Schreider Christian", + "Schröder Christina-Johanne", + "Schröder Ria", + "Schulz Anja", + "Schulze Svenja", + "Schwarz Armin", + "Seidler Stefan", + "Seiter Stephan", + "Seitzl Lina", + "Sekmen Melis", + "Semet Rainer", + "Slawik Nyke", + "Spallek Anne Monika", + "Spellerberg Merle", + "Stahr Nina", + "Steffen Till", + "Stegner Ralf", + "Steinmüller Hanna", + "Sthamer Nadja", + "Stöber Klaus", + "Stöcker Diana", + "Stockmeier Konrad", + "Stumpp Christina", + "Stüwe Ruppert", + "Tesfaiesus Awet", + "Teutrine Jens", + "Tippelt Nico", + "Troff-Schaffarzyk Anja", + "Türk-Nachbaur Derya", + "Uhlig Katrin", + "Ullrich Frank", + "Vogt Oliver", + "Wagener Niklas", + "Wagener Robin", + "Wagner Carolin", + "Wagner Johannes", + "Wallstein Maja", + "Walter Hannes", + "Wegge Carmen", + "Wegling Melanie", + "Weishaupt Saskia", + "Weiss Maria-Lena", + "Wenzel Stefan", + "Werner Lena", + "Wiener Klaus", + "Winkler Tobias", + "Winklmann Tina", + "Wissler Janine", + "Wittmann Mechthilde", + "Wollmann Herbert", + "Wulf Mareike Lotte", + "Wundrak Joachim", + "Ziegler Kay-Uwe", + "Zippelius Nicolas", + "Zorn Armand", + "Zschau Katrin", + "Raffelhüschen Claudia", + "Bünger Clara", + "Sacher Michael", + "Gründer Nils", + "Rinkert Daniel", + "Bartz Alexander", + "Vontz Emily", + "Mende Dirk-Ulrich", + "Föhr Alexander", + "Rothfuß Rainer", + "Trăsnea Ana-Maria", + "Bartelt Christian", + "Wagner Tim", + "Ruf Nadine", + "Kretz Jürgen", + "Krumwiede-Steiner Franziska", + "Hohmann Angela", + "Heubach Heike", + "Schiller Manfred", + "Griewel Fabian" +] + +# Base URL for querying (with placeholders for last name and first name) +base_url = "https://bilddatenbank.bundestag.de/search/picture-result?query={0}%2C+{1}&filterQuery%5Bereignis%5D%5B%5D=Portr%C3%A4t%2FPortrait&sortVal=2" +#base_url = "https://bilddatenbank.bundestag.de/search/picture-result?filterQuery%5Bname%5D%5B%5D={0}l%2C+{1}&filterQuery%5Bereignis%5D%5B%5D=Portr%C3%A4t%2FPortrait&sortVal=2" + +def fetch_image(lastname, firstname): + # Set up headless Chrome options + chrome_options = Options() + #chrome_options.add_argument("--headless") # for some reason the images don't load if headless + chrome_options.add_argument("--disable-gpu") + + # Path to the ChromeDriver executable + driver_path = "/usr/bin/chromedriver" + + # Set up the WebDriver + service = Service(driver_path) + driver = webdriver.Chrome(service=service, options=chrome_options) + + try: + # Open the URL with the headless browser + driver.get(base_url.format(lastname, firstname)) + + # Wait for website to load + #time.sleep(3) + + # img_url = extract_until_quote(driver.page_source, substring="https://bilddatenbank.bundestag.de/fotos/") + img_url = extract_until_quote(driver.page_source, substring="data-srcset=") + + print(f"Fetching image for {lastname}, {firstname}") + try: + img_url = img_url.replace("data-srcset=\"", "https://bilddatenbank.bundestag.de") + + print(img_url) + + if img_url: + print(f"Found image URL: {img_url}") + + # Download the image + img_data = requests.get(img_url).content + + # Create a directory to save the image + if not os.path.exists('membersOfParliamentImages'): + os.makedirs('membersOfParliamentImages') + + # Save the image to the 'images' folder + image_filename = f"membersOfParliamentImages/{lastname}_{firstname}.jpg" + with open(image_filename, 'wb') as f: + f.write(img_data) + + print(f"Downloaded image: {image_filename}") + else: + print("No matching image found.") + except: + print(f"ERROR: Something went wrong with parsing the image url {img_url}. Maybe this Member has no Image?") + finally: + # Close the WebDriver session + driver.quit() + +def extract_until_quote(long_string, substring): + # Find the starting index of the substring + start_index = long_string.find(substring) + + if start_index == -1: + return None # Substring not found + + # Find the position of the first quote after the substring + quote_index = long_string.find('.jpg', start_index) + + if quote_index == -1: + return None # No quote found after the substring + + # Extract the substring from the start of the found substring to the first quote + result = long_string[start_index:quote_index+4] + + return result + +# Example usage: Replace 'Lastname' and 'Firstname' with the actual name you're searching for +lastname = 'Merkel' +firstname = 'Angela' + + +for member in members: + fetch_image(member.split(" ")[0], member.split(" ")[1])