implemented import of speeches to database

This commit is contained in:
Picman2000 2025-03-04 14:03:40 +01:00
parent ae5c3f17eb
commit 631560a94e
7 changed files with 35 additions and 139 deletions

View file

@ -2,10 +2,7 @@ package org.texttechnologylab.project.gruppe_05_1;
import com.mongodb.client.MongoDatabase;
import org.texttechnologylab.project.gruppe_05_1.database.MongoDBHandler;
import org.texttechnologylab.project.gruppe_05_1.database.MongoObjectFactory;
import org.texttechnologylab.project.gruppe_05_1.database.SpeechIndex;
import org.texttechnologylab.project.gruppe_05_1.database.SpeechIndexFactoryImpl;
import org.texttechnologylab.project.gruppe_05_1.database.*;
import org.texttechnologylab.project.gruppe_05_1.domain.mdb.Mdb;
import org.texttechnologylab.project.gruppe_05_1.domain.mdb.MdbDocument;
import org.texttechnologylab.project.gruppe_05_1.rest.RESTHandler;
@ -23,6 +20,7 @@ import java.io.IOException;
import java.util.List;
import java.util.Properties;
import static java.lang.Boolean.FALSE;
import static java.lang.Boolean.TRUE;
/*
@ -60,38 +58,38 @@ public class Main {
Logger.pink("Parsing XML and inserting data into DB (Uebung 2)...");
SpeechIndexFactoryImpl speechIndexFactory = new SpeechIndexFactoryImpl();
if (MongoPprUtils.getSpeechCollection().countDocuments() != 0) {
System.out.println("Speeches werden nicht gelesen, da sie bereits in der Datenbank stehen");
}
else {
SpeechIndex speechIndex = speechIndexFactory
.parseLegislativePeriods(TRUE)
.builder()
.parseSessions()
.filterForCurrentMembers(FALSE)
.build();
SpeechIndex speechIndex = speechIndexFactory
.builder()
.parseLegislativePeriods(TRUE)
.parseSessions()
.parseMembers()
.parseFractions()
.filterForCurrentMembers(TRUE)
.build();
//speechIndex.printInfo();
//speechIndex.printInfo();
System.out.println("Data retrieved from DB:");
System.out.println("SPEECHCOUNT: " + speechIndex.getSpeeches().size());
System.out.println("SESSIONCOUNT: " + speechIndex.getSessions().size());
System.out.println("AGENDAITEMCOUNT: " + speechIndex.getAgendaItems().size());
System.out.println("Data retrieved from DB:");
System.out.println("MEMBERCOUNT: " + speechIndex.getMembers().size());
System.out.println("FRACTIONCOUNT: " + speechIndex.getFractions().size());
System.out.println("SPEECHCOUNT: " + speechIndex.getSpeeches().size());
System.out.println("SESSIONCOUNT: " + speechIndex.getSessions().size());
System.out.println("AGENDAITEMCOUNT: " + speechIndex.getAgendaItems().size());
MongoDBHandler mongoDBHandler = new MongoDBHandler();
mongoDBHandler.deleteAllDocuments(); // Clear the DB
MongoDBHandler mongoDBHandler = new MongoDBHandler();
mongoDBHandler.deleteAllDocuments(); // Clear the DB
Logger.pink("Adding Sessions to DB...");
mongoDBHandler.insertSessions(speechIndex.getSessions());
Logger.pink("Adding Sessions to DB...");
mongoDBHandler.insertSessions(speechIndex.getSessions());
Logger.pink("Adding Agenda Items to DB...");
mongoDBHandler.insertAgendaItems(speechIndex.getAgendaItems());
Logger.pink("Adding Agenda Items to DB...");
mongoDBHandler.insertAgendaItems(speechIndex.getAgendaItems());
Logger.pink("Adding Speeches to DB...");
mongoDBHandler.insertSpeeches(speechIndex.getSpeeches());
Logger.pink("Adding Speeches to DB...");
mongoDBHandler.insertSpeeches(speechIndex.getSpeeches());
mongoDBHandler.close(); // Close the connection to the DB
mongoDBHandler.close(); // Close the connection to the DB
}
//TEST

View file

@ -52,8 +52,7 @@ public class MongoDBHandler {
private static String databaseName;
private MongoCollection<Document> speechesCollection;
private MongoCollection<Document> membersCollection;
private MongoCollection<Document> fractionsCollection;
private MongoCollection<Document> sessionsCollection;
private MongoCollection<Document> agendaItemsCollection;
private MongoCollection<Document> historyCollection;
@ -90,15 +89,11 @@ public class MongoDBHandler {
mongoClient = MongoClients.create(settings);
database = mongoClient.getDatabase(databaseName);
speechesCollection = database.getCollection("speech");
membersCollection = database.getCollection("members");
fractionsCollection = database.getCollection("fractions");
sessionsCollection = database.getCollection("sessions");
agendaItemsCollection = database.getCollection("agendaItems");
historyCollection = database.getCollection("history");
Logger.info("Connected to MongoDB database: " + databaseName);
// hopeless attempt of creating the fulltext search index :(
membersCollection.createIndex(new Document("collection", 1));
}
/**
@ -582,49 +577,6 @@ public class MongoDBHandler {
speechesCollection.insertMany(speechDocuments);
}
public Speech insertSpeech(int speakerId, int sessionId, int agendaItemId) throws MemberNotFoundException, SessionNotFoundException, AgendaItemNotFoundException, ServerErrorException {
// fetch member by speakerId
MemberOfParliament member;
try {
List<MemberOfParliament> members = retrieveAllMembersOfParliament(Filters.eq("id", speakerId));
if (members.isEmpty()) {
Logger.error("No member found with id " + speakerId);
throw new MemberNotFoundException();
} else if (members.size() > 1) {
Logger.warn("Multiple members found with id " + speakerId);
throw new ServerErrorException();
}
member = members.get(0);
} catch (IOException e) {
Logger.error("Failed to retrieve member with id " + speakerId);
throw new ServerErrorException();
}
// check if session and agendaItem exist
List<Session> sessions = retrieveAllSessions(Filters.eq("sessionId", sessionId));
if (sessions.isEmpty()) {
Logger.error("No session found with id " + sessionId);
throw new SessionNotFoundException();
}
List<AgendaItem> agendaItems = retrieveAllAgendaItems(Filters.eq("id", agendaItemId));
if (agendaItems.isEmpty()) {
Logger.error("No agendaItem found with id " + agendaItemId);
throw new AgendaItemNotFoundException();
}
// get a new random speechId that is not already in use
int speechId = 0;
while (!retrieveAllSpeeches(Filters.eq("speechId", speechId)).isEmpty()) {
// generate random int
speechId = (int) (Math.random() * Integer.MAX_VALUE);
}
// create speech
Speech speech = new Speech_File_Impl(sessionId, agendaItemId, speakerId, speechId, member.getName(), member.getParty());
// insert speech into DB
insertSpeeches(List.of(speech));
return speech;
}
public List<Speech> retrieveAllSpeeches() {
List<Document> speeches = speechesCollection.find().into(new ArrayList<>());
@ -686,34 +638,6 @@ public class MongoDBHandler {
return result;
}
public List<MemberOfParliament> retrieveAllMembersOfParliament() {
List<Document> members = membersCollection.find().into(new ArrayList<>());
List<MemberOfParliament> result = new ArrayList<>();
for (Document member : members) {
result.add(new MemberOfParliament_MongoDB_Impl(member));
}
return result;
}
public List<MemberOfParliament> retrieveAllMembersOfParliament(Bson filter) throws IOException {
List<Document> speeches = membersCollection.find(filter).into(new ArrayList<>());
List<MemberOfParliament> result = new ArrayList<>();
for (Document speech : speeches) {
result.add(new MemberOfParliament_MongoDB_Impl(speech));
}
return result;
}
public List<MemberOfParliament> retrieveAllMembersOfParliament(Bson filter, Bson projection) throws IOException {
List<Document> speeches = membersCollection.find(filter).projection(projection).into(new ArrayList<>());
List<MemberOfParliament> result = new ArrayList<>();
for (Document speech : speeches) {
result.add(new MemberOfParliament_MongoDB_Impl(speech));
}
return result;
}
public void deleteAllDocuments() {
speechesCollection.deleteMany(new Document());

View file

@ -10,14 +10,10 @@ public class SpeechIndex {
private final List<Session> sessions;
private final List<Speech> speeches;
private final List<AgendaItem> agendaItems;
private final List<MemberOfParliament> members;
private final List<Fraction> fractions;
public SpeechIndex(List<Session> sessions, List<Speech> speeches, List<AgendaItem> agendaItems, List<MemberOfParliament> members, List<Fraction> fractions) {
public SpeechIndex(List<Session> sessions, List<Speech> speeches, List<AgendaItem> agendaItems) {
this.sessions = sessions;
this.speeches = speeches;
this.agendaItems = agendaItems;
this.members = members;
this.fractions = fractions;
}
}

View file

@ -27,21 +27,6 @@ public interface SpeechIndexFactory {
*/
SpeechIndexFactory parseSessions();
/**
* Parses all members of parliament.
* @return the SpeechIndexFactory instance
* @throws IOException if an I/O error occurs
* @throws SAXException if a SAX error occurs
*/
SpeechIndexFactory parseMembers() throws IOException, SAXException;
/**
* Parses all fractions.
* @return the SpeechIndexFactory instance
* @throws IOException if an I/O error occurs
* @throws SAXException if a SAX error occurs
*/
SpeechIndexFactory parseFractions() throws IOException, SAXException;
/**
* Builds a new SpeechIndex instance.

View file

@ -39,19 +39,10 @@ public class SpeechIndexFactoryImpl implements SpeechIndexFactory {
return this;
}
@Override
public SpeechIndexFactory parseMembers() throws IOException, SAXException {
return null;
}
@Override
public SpeechIndexFactory parseFractions() throws IOException, SAXException {
return null;
}
@Override
public SpeechIndex build() throws IOException, SAXException {
return null;
return new SpeechIndex(sessions, speeches, agendaItems);
}

View file

@ -356,7 +356,6 @@ public abstract class PPRUtils {
int offset = 0;
int limit = 10;
boolean hasMore = true;
while (hasMore) {
String queryUrl = "https://www.bundestag.de/ajax/filterlist/de/services/opendata/866354-866354?limit="
+ limit + "&noFilterSet=true&offset=" + offset;

View file

@ -2,6 +2,7 @@ package org.texttechnologylab.project.gruppe_05_1.xml.speeches;
import lombok.Getter;
import lombok.Setter;
import org.texttechnologylab.project.gruppe_05_1.database.MongoPprUtils;
import org.texttechnologylab.project.gruppe_05_1.util.PPRUtils;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.*;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.AgendaItem;
@ -34,12 +35,13 @@ public class SpeechParser {
private Boolean parseLegislativePeriods;
public List<Session> parseAllSessions() {
List<Session> sessionsEmpty = new ArrayList<>();
List<Session> sessions = new ArrayList<>();
this.speeches = new ArrayList<>();
this.agendaItems = new ArrayList<>();
Set<Document> xmlDocuments = PPRUtils.processXML();
System.out.println("All sessions parsed");
for (org.w3c.dom.Document xmlDoc:xmlDocuments) {
for (org.w3c.dom.Document xmlDoc : xmlDocuments) {
try {
File tempFile = convertDocumentToFile(xmlDoc);
Session session = parseSessionFile(tempFile);
@ -50,8 +52,8 @@ public class SpeechParser {
e.printStackTrace();
}
}
return sessions;
}
private Session parseSessionFile(File file) throws Exception {
@ -168,6 +170,7 @@ public class SpeechParser {
}
return null;
}
/**
* Konvertiert ein org.w3c.dom.Document in eine temporäre Datei.
*/