added image crawler

This commit is contained in:
s5260822 2025-03-10 18:38:18 +01:00
parent b72f1b9d1e
commit 0907e8623a

View file

@ -0,0 +1,846 @@
import time
import requests
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import os
members = [
"Gysi Gregor",
"Hüppe Hubert",
"Kubicki Wolfgang",
"Ramsauer Peter",
"Gröhe Hermann",
"Grund Manfred",
"Laschet Armin",
"Lemke Steffi",
"Meister Michael",
"Merz Friedrich",
"Özdemir Cem",
"Rachel Thomas",
"Röttgen Norbert",
"Straubinger Max",
"Bartsch Dietmar",
"Deligöz Ekin",
"Friedrich Hans-Peter",
"Göring-Eckardt Katrin Dagmar",
"Heil Hubertus",
"Kramme Anette",
"Nietan Dietmar",
"Pau Petra",
"Roth Claudia",
"Roth Michael",
"Schneider Carsten",
"Scholz Olaf",
"Trittin Jürgen",
"Widmann-Mauz Annette",
"Willsch Klaus-Peter",
"Bartol Sören",
"Braun Helge",
"Connemann Gitta",
"Dobrindt Alexander",
"Ferlemann Enak",
"Fricke Otto",
"Grosse-Brömer Michael",
"Grübel Markus",
"Gutting Olav",
"Hagedorn Bettina",
"Hennrich Michael",
"Klöckner Julia",
"Krichbaum Gunther",
"Künast Renate",
"Kurth Markus",
"Lips Patricia",
"Lötzsch Gesine",
"Lührmann Anna",
"Mantel Dorothee",
"Mayer Stephan",
"Mützenich Rolf",
"Raab Daniela",
"Rupprecht Albert",
"Schäfer Axel",
"Scheuer Andreas",
"Silberhorn Thomas",
"Spahn Jens",
"Stetten Christian",
"Storjohann Gero",
"Tillmann Antje",
"Wanderwitz Marco",
"Wellenreuther Ingo",
"Wissing Volker",
"Auernhammer Artur",
"Kofler Bärbel",
"Klingbeil Lars",
"Annen Niels",
"Bareiß Thomas",
"Brand Michael",
"Dağdelen Sevim",
"Ernst Klaus",
"Gehring Kai",
"Gerster Martin",
"Grütters Monika",
"Haßelmann Britta",
"Hofreiter Anton",
"Jung Andreas",
"Juratovic Josip",
"Kipping Katja",
"Koeppen Jens",
"Korte Jan",
"Lauterbach Karl",
"Lehrieder Paul",
"Link Michael",
"Mast Katja",
"Miersch Matthias",
"Müller Carsten",
"Müller Detlef",
"Otte Henning",
"Rix Sönke",
"Schäffler Frank",
"Schieder Marianne",
"Schwabe Frank",
"Schwarzelühr-Sutter Rita",
"Toncar Florian",
"Ulrich Alexander",
"Winkelmeier-Becker Elisabeth",
"Nouripour Omid",
"Strengmann-Kuhn Wolfgang",
"Hirte Christian",
"Aschenberg-Dugnus Christine",
"Aumer Peter",
"Bas Bärbel",
"Beyer Peter",
"Bilger Steffen",
"Birkwald Matthias W.",
"Brandl Reinhard",
"Brehmer Heike",
"Brinkhaus Ralph",
"Buschmann Marco",
"Djir-Sarai Bijan",
"Franke Edgar",
"Frieser Michael",
"Gädechens Ingo",
"Gebhart Thomas",
"Gerdes Michael",
"Gohlke Nicole",
"Hahn Florian",
"Hardt Jürgen",
"Heil Mechthild",
"Heveling Ansgar",
"Höferlin Manuel",
"Hunko Andrej",
"Jarzombek Thomas",
"Kaufmann Stefan",
"Keul Katja",
"Kiesewetter Roderich",
"Kindler Sven-Christian",
"Klein Volkmar",
"Klein-Schmeink Maria",
"Knoerig Axel",
"Kober Pascal",
"Krischer Oliver",
"Lange Ulrich",
"Lay Caren",
"Lenkert Ralph",
"Lindemann Lars",
"Lindner Christian",
"Linnemann Carsten",
"Luczak Jan-Marco",
"Luksic Oliver",
"Lutze Thomas",
"Malczak Agnes",
"Mattfeldt Andreas",
"Middelberg Mathias",
"Möhring Cornelia",
"Monstadt Dietrich",
"Müller Nadine",
"Müller-Gemmeke Beate",
"Nestle Ingrid",
"Notz Konstantin",
"Özoğuz Aydan",
"Paus Lisa",
"Rief Josef",
"Rüddel Erwin",
"Schnieder Patrick",
"Schwartze Stefan",
"Seif Detlef",
"Stier Dieter",
"Stracke Stephan",
"Thomae Stephan",
"Vogel Johannes",
"Vogler Kathrin",
"Wadephul Johann",
"Wagenknecht Sahra",
"Weiss Sabine",
"Ebner Harald",
"Lindner Tobias",
"Walter-Rosenheimer Beate",
"Todtenhausen Manfred",
"Hellmich Wolfgang",
"Reinhold Hagen",
"Albani Stephan",
"Amtsberg Luise",
"Baehrens Heike",
"Baerbock Annalena",
"Bahr Ulrike",
"Berghegger André",
"Brantner Franziska",
"Castellucci Lars",
"Daldrup Bernhard",
"Diaby Karamba",
"Dittmar Sabine",
"Donth Michael",
"Dröge Katharina",
"Durz Hansjörg",
"Esken Saskia",
"Färber Hermann",
"Fechner Johannes",
"Feiler Uwe",
"Frei Thorsten",
"Gastel Matthias",
"Grötsch Uli",
"Grundmann Oliver",
"Güntzler Fritz",
"Haase Christian",
"Hagl-Kehl Rita",
"Hahn André",
"Hakverdi Metin",
"Hartmann Sebastian",
"Hauer Matthias",
"Heck Stefan",
"Heidenblut Dirk",
"Heinrich Gabriela",
"Helfrich Mark",
"Hitschler Thomas",
"Hoffmann Alexander",
"Hoppenstedt Hendrik",
"Irlstorfer Erich",
"Janecek Dieter",
"Junge Frank",
"Karliczek Anja",
"Kippels Georg",
"Kiziltepe Cansel",
"Koob Markus",
"Körber Carsten",
"Kühn Christian",
"Launert Silke",
"Leikert Katja",
"Lenz Andreas",
"Lindholz Andrea",
"Magwas Yvonne",
"Metzler Jan",
"Mihalic Irene",
"Mittag Susanne",
"Müller Bettina",
"Müntefering Michelle",
"Oellers Wilfried",
"Oßner Florian",
"Özdemir Mahmut",
"Pahlmann Ingrid",
"Poschmann Sabine",
"Post Achim",
"Rabanus Martin",
"Radomski Kerstin",
"Radwan Alexander",
"Rainer Alois",
"Renner Martina",
"De Ridder Daniela",
"Rohde Dennis",
"Rosemann Martin",
"Rüffer Corinna",
"Rützel Bernd",
"Schauws Ulle",
"Scheer Nina",
"Schiefner Udo",
"Schimke Jana",
"Schmidt Dagmar",
"Schulz-Asche Kordula",
"Schwarz Andreas",
"Sorge Tino",
"Stadler Svenja",
"Stamm-Fibich Martina",
"Stefinger Wolfgang",
"Stegemann Albert",
"Steiniger Johannes",
"Tausend Claudia",
"Thews Michael",
"Timmermann-Fechter Astrid",
"Träger Carsten",
"Ullrich Volker",
"Verlinden Julia",
"Vöpel Dirk",
"Warken Nina",
"Weisgerber Anja",
"Westphal Bernd",
"Whittaker Kai",
"Wiese Dirk",
"Yüksel Gülistan",
"Zeulner Emmi",
"Zierke Stefan",
"Zimmermann Jens",
"Petry Christian",
"Glöckner Angelika",
"Schmitt Ronja",
"Ryglewski Sarah",
"Coße Jürgen",
"Uhl Markus",
"Akbulut Gökay",
"Alt Renata",
"Altenkamp Norbert Maria",
"Amthor Philipp",
"Badum Lisa",
"Bauer Nicole",
"Baumann Bernd",
"Bayram Canan",
"Beeck Jens",
"In der Beek Olaf",
"Bernhard Marc",
"Bernstein Melanie",
"Biadacz Marc",
"Bleck Andreas",
"Boehringer Peter",
"Brandenburg Jens",
"Brandenburg Mario",
"Brandner Stephan",
"Braun Jürgen",
"Breher Silvia",
"Brehm Sebastian",
"Breymaier Leni",
"Brodesser Carsten",
"Budde Katrin",
"Bühl Marcus",
"Busen Karlheinz",
"Bystron Petr",
"Cezanne Jörg",
"Christmann Anna",
"Chrupalla Tino",
"Cotar Joana",
"Cronenberg Carl-Julius",
"Curio Gottfried",
"Damerow Astrid",
"Dilcher Esther",
"Domscheit-Berg Anke",
"Dürr Christian",
"Ehrhorn Thomas",
"Erndl Thomas",
"Esdar Wiebke",
"Espendiller Michael",
"Faber Marcus",
"Fahimi Yasmin",
"Felser Peter",
"Ferschl Susanne",
"Föst Daniel",
"Friedhoff Dietmar",
"Frohnmaier Markus",
"Frömming Götz",
"Gauland Alexander",
"Gelbhaar Stefan",
"Glaser Albrecht",
"Gottschalk Kay",
"Gremmels Timon",
"Grundl Erhard",
"Hacker Thomas",
"Harder-Kühnel Mariana Iris",
"Haug Jochen",
"Heilmann Thomas",
"Helling-Plahr Katrin",
"Herbrand Markus",
"Herbst Torsten",
"Hess Martin",
"Hessel Katja",
"Hilse Karsten",
"Höchst Nicole",
"Hocker Gero Clemens",
"Hoffmann Bettina",
"Hoffmann Christoph",
"Holm Leif-Erik",
"Holtz Ottmar",
"Houben Reinhard",
"Huber Johannes",
"Jacobi Fabian",
"Jongen Marc",
"Jung Ingmar",
"Kaiser Elisabeth",
"Kappert-Gonther Kirsten",
"Keuter Stefan",
"Kießling Michael",
"Klein Karsten",
"Kleinwächter Norbert",
"Kloke Katharina",
"Kluckert Daniela",
"Köhler Lukas",
"Komning Enrico",
"König Jörn",
"Konrad Carina",
"Kotré Steffen",
"Kraft Rainer",
"Kuhle Konstantin",
"Lambsdorff Alexander",
"Lechte Ulrich",
"Lehmann Jens",
"Lehmann Sven",
"Lindh Helge",
"Loos Bernhard",
"Lucassen Rüdiger",
"Maas Heiko",
"Mannes Astrid",
"Meiser Pascal",
"Meyer Christoph",
"Miazga Corinna",
"Mohamed Ali Amira",
"Mohrs Falko",
"Moll Claudia",
"Möller Siemtje",
"Müller Alexander",
"Müller Axel",
"Müller Claudia",
"Müller Sepp",
"Müller-Rosentritt Frank",
"Münzenmaier Sebastian",
"Nastic Zaklin",
"Nicolaisen Petra",
"Nolte Jan Ralf",
"Ortleb Josephine",
"Oster Josef",
"Otten Gerold",
"Pellmann Sören",
"Perli Victor",
"Peterka Tobias Matthias",
"Pilsinger Stephan",
"Ploß Christoph",
"Pohl Jürgen",
"Polat Filiz",
"Protschka Stephan",
"Reichardt Martin",
"Renner Martin Erwin",
"Reuther Bernd",
"Riexinger Bernd",
"Rottmann Manuela",
"Rouenhoff Stefan",
"Sattelberger Thomas",
"Sauter Christian",
"Schielke-Ziesing Ulrike",
"Schmid Nils",
"Schmidt Stefan",
"Schmidt Uwe",
"Schneider Jörg",
"Schraps Johannes",
"Schreiner Felix",
"Schrodi Michael",
"Schulz Uwe",
"Seestern-Pauly Matthias",
"Seitz Thomas",
"Sichert Martin",
"Simon Björn",
"Spaniel Dirk",
"Springer René",
"Staffler Katrin",
"Stark-Watzinger Bettina",
"Stein Mathias",
"Storch Beatrix",
"Strack-Zimmermann Marie-Agnes",
"Strasser Benjamin",
"Tatti Jessica",
"Tebroke Hermann-Josef",
"Teuteberg Linda",
"Theurer Michael",
"Thies Hans-Jürgen",
"Throm Alexander",
"Töns Markus",
"Ullmann Andrew",
"Ullrich Gerald",
"Vieregge Kerstin",
"Vries Christoph",
"Weeser Sandra",
"Weidel Alice",
"Westig Nicole",
"Weyel Harald",
"Wiehle Wolfgang",
"Wiesmann Bettina Margarethe",
"Wirth Christian",
"Witt Uwe",
"Ziemiak Paul",
"Jensen Gyde",
"Völlers Marja-Liisa",
"Baradari Nezahat",
"Heidt Peter",
"Mackensen Isabel",
"Weingarten Joe",
"Hanke Reginald",
"Lehmann Sylvia",
"Bubendorfer-Licht Sandra",
"Martin Dorothee",
"Dahmen Janosch",
"Emmerich Marcel",
"Abdi Sanae",
"Abel Valentin",
"Abraham Knut",
"Adler Katja",
"Aeffner Stephanie",
"Ahmetovic Adis",
"Alabali-Radovan Reem",
"Al-Dailami Ali",
"Al-Halak Muhanad",
"Andres Dagmar",
"Arlt Johannes",
"Audretsch Andreas",
"Außendorf Maik",
"Bacherle Tobias",
"Bachmann Carolin",
"Baldy Daniel",
"Banaszak Felix",
"Bär Karl",
"Baum Christina",
"Beck Katharina",
"Beckamp Roger",
"Becker Holger",
"Benner Lukas",
"Berghahn Jürgen",
"Bergt Bengt",
"Blankenburg Jakob",
"Bochmann René",
"Bodtke Ingo",
"Boginski Friedhelm",
"Bollmann Gereon",
"Borchardt Simone",
"Brandes Dirk",
"Breilmann Michael",
"Bröhr Marlon",
"Bsirske Frank",
"Bury Yannick",
"Cademartori Dujisin Isabel",
"Czaja Mario",
"Demir Hakan",
"Detzer Sandra",
"Diedenhofen Martin",
"Dieren Jan",
"Dietz Thomas",
"Döring Felix",
"Droßmann Falko",
"Düring Deborah",
"Echeverria Axel",
"Eckert Leon",
"Edelhäußer Ralph",
"Eichwede Sonja",
"Engelhard Alexander",
"Engelhardt Heike",
"Englhardt-Kopf Martina",
"Farle Robert",
"Fäscher Ariane",
"Fester Emilia",
"Fiedler Sebastian",
"Funke Fabian",
"Funke-Kaiser Maximilian",
"Gambir Schahina",
"Ganserer Tessa",
"Gassner-Herz Martin",
"Gava Manuel",
"Geissler Jonas",
"Gerschau Knut",
"Gesenhues Jan-Niclas",
"Gnauck Hannes",
"Görke Christian",
"Gramling Fabian",
"Gräßle Ingeborg",
"Grau Armin",
"Grützmacher Sabine",
"Güler Serap",
"Gürpinar Ates",
"Habeck Robert",
"Hartewig Philipp",
"Harzer Ulrike",
"Heiligenstadt Frauke",
"Heitmann Linda",
"Helferich Matthias",
"Henneberger Kathrin",
"Hennig Anke",
"Hennig-Wellsow Susanne",
"Herrmann Bernhard",
"Heselhaus Nadine",
"Hierl Susanne",
"Hönel Bruno",
"Hoppermann Franziska",
"Hostert Jasmina",
"Hubertz Verena",
"Hümpfer Markus",
"Huy Gerrit",
"Janich Steffen",
"Janssen Anne",
"Jurisch Ann-Veruschka",
"Kaddor Lamya",
"Karaahmetoğlu Macit",
"Kasper Carlos",
"Kassautzki Anna",
"Kaufmann Malte",
"Kaufmann Michael",
"Keller Rainer Johannes",
"Kellner Michael",
"Kersten Franziska",
"Khan Misbah",
"Kleebank Helmut",
"Klein Ottilie",
"Klinck Kristian",
"Klose Annika",
"Klüssendorf Tim",
"König Anne",
"Kopf Chantal",
"Koß Simona",
"Kraft Laura",
"Krämer Philip",
"Kreiser Dunja",
"Kröber Martin",
"Kruse Michael",
"Kuban Tilman",
"Kühnert Kevin",
"Lahrkamp Sarah",
"Lang Ricarda",
"Larem Andreas",
"Latendorf Ina",
"Leiser Kevin",
"Lenders Jürgen",
"Lenk Barbara",
"Leye Christian",
"Licina-Bode Luiza",
"Lieb Thorsten",
"Liebert Anja",
"Limbacher Esra",
"Limburg Helge",
"Loop Denise",
"Lucks Max",
"Lugk Bettina",
"Lütke Kristine",
"Machalet Tanja",
"Mack Klaus",
"Malottki Erik",
"Mann Holger",
"Mansoori Kaweh",
"Martens Zanda",
"Marvi Parsa",
"Mascheck Franziska",
"Mayer Zoe",
"Mayer-Lay Volker",
"Mehltretter Andreas",
"Mehmet Ali Takis",
"Menge Susanne",
"Merten Anikó",
"Mesarosch Robin",
"Michaelsen Swantje Henrike",
"Michel Kathrin",
"Mieves Matthias David",
"Mijatović Boris",
"Moncsek Mike",
"Moosdorf Matthias",
"Mordhorst Maximilian",
"Mörseburg Maximilian",
"Müller Florian",
"Müller Michael",
"Müller Sascha",
"Nacke Stefan",
"Nanni Sara",
"Nasr Rasha",
"Naujok Edgar",
"Nick Ophelia",
"Nickholz Brian",
"Nürnberger Jörg",
"Oehl Lennard",
"Oppelt Moritz",
"Otte Karoline",
"Pahlke Julian",
"Pantazis Christos",
"Papenbrock Wiebke",
"Papendieck Mathias",
"Pawlik Natalie",
"Peick Jens",
"Philippi Andreas",
"Piechotta Paula",
"Plobner Jan",
"Plum Martin",
"Redder Volker",
"Rehbaum Henning",
"Reichel Markus",
"Reichinnek Heidi",
"Reinalter Anja",
"Rhie Ye-One",
"Rinck Frank",
"Rohwer Lars",
"Roloff Sebastian",
"Rosenthal Jessica",
"Röwekamp Thomas",
"Rudolph Thorsten",
"Rudolph Tina",
"Saleh Kassem Taher",
"Santos Firnhaber Catarina",
"Schäfer Ingo",
"Schäfer Jamila",
"Schäfer Sebastian",
"Schamber Rebecca",
"Schattner Bernd",
"Schätzl Johannes",
"Schenderlein Christiane",
"Schierenbeck Peggy",
"Schisanowski Timo",
"Schmid Christoph",
"Schmidt Eugen",
"Schmidt Jan Wenzel",
"Schneider Daniel",
"Schönberger Marlene",
"Schreider Christian",
"Schröder Christina-Johanne",
"Schröder Ria",
"Schulz Anja",
"Schulze Svenja",
"Schwarz Armin",
"Seidler Stefan",
"Seiter Stephan",
"Seitzl Lina",
"Sekmen Melis",
"Semet Rainer",
"Slawik Nyke",
"Spallek Anne Monika",
"Spellerberg Merle",
"Stahr Nina",
"Steffen Till",
"Stegner Ralf",
"Steinmüller Hanna",
"Sthamer Nadja",
"Stöber Klaus",
"Stöcker Diana",
"Stockmeier Konrad",
"Stumpp Christina",
"Stüwe Ruppert",
"Tesfaiesus Awet",
"Teutrine Jens",
"Tippelt Nico",
"Troff-Schaffarzyk Anja",
"Türk-Nachbaur Derya",
"Uhlig Katrin",
"Ullrich Frank",
"Vogt Oliver",
"Wagener Niklas",
"Wagener Robin",
"Wagner Carolin",
"Wagner Johannes",
"Wallstein Maja",
"Walter Hannes",
"Wegge Carmen",
"Wegling Melanie",
"Weishaupt Saskia",
"Weiss Maria-Lena",
"Wenzel Stefan",
"Werner Lena",
"Wiener Klaus",
"Winkler Tobias",
"Winklmann Tina",
"Wissler Janine",
"Wittmann Mechthilde",
"Wollmann Herbert",
"Wulf Mareike Lotte",
"Wundrak Joachim",
"Ziegler Kay-Uwe",
"Zippelius Nicolas",
"Zorn Armand",
"Zschau Katrin",
"Raffelhüschen Claudia",
"Bünger Clara",
"Sacher Michael",
"Gründer Nils",
"Rinkert Daniel",
"Bartz Alexander",
"Vontz Emily",
"Mende Dirk-Ulrich",
"Föhr Alexander",
"Rothfuß Rainer",
"Trăsnea Ana-Maria",
"Bartelt Christian",
"Wagner Tim",
"Ruf Nadine",
"Kretz Jürgen",
"Krumwiede-Steiner Franziska",
"Hohmann Angela",
"Heubach Heike",
"Schiller Manfred",
"Griewel Fabian"
]
# Base URL for querying (with placeholders for last name and first name)
base_url = "https://bilddatenbank.bundestag.de/search/picture-result?query={0}%2C+{1}&filterQuery%5Bereignis%5D%5B%5D=Portr%C3%A4t%2FPortrait&sortVal=2"
#base_url = "https://bilddatenbank.bundestag.de/search/picture-result?filterQuery%5Bname%5D%5B%5D={0}l%2C+{1}&filterQuery%5Bereignis%5D%5B%5D=Portr%C3%A4t%2FPortrait&sortVal=2"
def fetch_image(lastname, firstname):
# Set up headless Chrome options
chrome_options = Options()
#chrome_options.add_argument("--headless") # for some reason the images don't load if headless
chrome_options.add_argument("--disable-gpu")
# Path to the ChromeDriver executable
driver_path = "/usr/bin/chromedriver"
# Set up the WebDriver
service = Service(driver_path)
driver = webdriver.Chrome(service=service, options=chrome_options)
try:
# Open the URL with the headless browser
driver.get(base_url.format(lastname, firstname))
# Wait for website to load
#time.sleep(3)
# img_url = extract_until_quote(driver.page_source, substring="https://bilddatenbank.bundestag.de/fotos/")
img_url = extract_until_quote(driver.page_source, substring="data-srcset=")
print(f"Fetching image for {lastname}, {firstname}")
try:
img_url = img_url.replace("data-srcset=\"", "https://bilddatenbank.bundestag.de")
print(img_url)
if img_url:
print(f"Found image URL: {img_url}")
# Download the image
img_data = requests.get(img_url).content
# Create a directory to save the image
if not os.path.exists('membersOfParliamentImages'):
os.makedirs('membersOfParliamentImages')
# Save the image to the 'images' folder
image_filename = f"membersOfParliamentImages/{lastname}_{firstname}.jpg"
with open(image_filename, 'wb') as f:
f.write(img_data)
print(f"Downloaded image: {image_filename}")
else:
print("No matching image found.")
except:
print(f"ERROR: Something went wrong with parsing the image url {img_url}. Maybe this Member has no Image?")
finally:
# Close the WebDriver session
driver.quit()
def extract_until_quote(long_string, substring):
# Find the starting index of the substring
start_index = long_string.find(substring)
if start_index == -1:
return None # Substring not found
# Find the position of the first quote after the substring
quote_index = long_string.find('.jpg', start_index)
if quote_index == -1:
return None # No quote found after the substring
# Extract the substring from the start of the found substring to the first quote
result = long_string[start_index:quote_index+4]
return result
# Example usage: Replace 'Lastname' and 'Firstname' with the actual name you're searching for
lastname = 'Merkel'
firstname = 'Angela'
for member in members:
fetch_image(member.split(" ")[0], member.split(" ")[1])