From ae5c3f17eb476f108dfce72b066b58e89331bc0a Mon Sep 17 00:00:00 2001 From: Picman2000 <65342372+Picman2000@users.noreply.github.com> Date: Tue, 4 Mar 2025 13:09:12 +0100 Subject: [PATCH] Rollback point --- .../AgendaItemNotFoundException.java | 7 + .../FractionAlreadyExistsException.java | 7 + .../exceptions/FractionNotFoundException.java | 7 + .../exceptions/MemberNotFoundException.java | 7 + .../java/exceptions/ServerErrorException.java | 7 + .../exceptions/SessionNotFoundException.java | 7 + .../exceptions/SpeechNotFoundException.java | 7 + .../project/gruppe_05_1/Main.java | 52 ++- .../gruppe_05_1/database/MongoDBHandler.java | 336 +++++++++++++++++- .../gruppe_05_1/database/SpeechIndex.java | 23 ++ .../database/SpeechIndexFactory.java | 60 ++++ .../database/SpeechIndexFactoryImpl.java | 63 ++++ .../speeches/AgendaItem_MongoDB_Impl.java | 15 + .../speeches/Comment_MongoDB_Impl.java | 17 + .../domainimp/speeches/Line_MongoDB_Impl.java | 15 + .../MemberOfParliament_MongoDB_Impl.java | 25 ++ .../speeches/Session_MongoDB_Impl.java | 17 + .../speeches/Speaker_MongoDB_Impl.java | 16 + .../speeches/Speech_MongoDB_Impl.java | 36 ++ .../project/gruppe_05_1/util/Logger.java | 27 ++ .../project/gruppe_05_1/util/PPRUtils.java | 7 +- .../xml/speeches/SpeechParser.java | 3 +- 22 files changed, 751 insertions(+), 10 deletions(-) create mode 100644 src/main/java/exceptions/AgendaItemNotFoundException.java create mode 100644 src/main/java/exceptions/FractionAlreadyExistsException.java create mode 100644 src/main/java/exceptions/FractionNotFoundException.java create mode 100644 src/main/java/exceptions/MemberNotFoundException.java create mode 100644 src/main/java/exceptions/ServerErrorException.java create mode 100644 src/main/java/exceptions/SessionNotFoundException.java create mode 100644 src/main/java/exceptions/SpeechNotFoundException.java create mode 100644 src/main/java/org/texttechnologylab/project/gruppe_05_1/database/SpeechIndex.java create mode 100644 src/main/java/org/texttechnologylab/project/gruppe_05_1/database/SpeechIndexFactory.java create mode 100644 src/main/java/org/texttechnologylab/project/gruppe_05_1/database/SpeechIndexFactoryImpl.java create mode 100644 src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimp/speeches/AgendaItem_MongoDB_Impl.java create mode 100644 src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimp/speeches/Comment_MongoDB_Impl.java create mode 100644 src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimp/speeches/Line_MongoDB_Impl.java create mode 100644 src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimp/speeches/MemberOfParliament_MongoDB_Impl.java create mode 100644 src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimp/speeches/Session_MongoDB_Impl.java create mode 100644 src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimp/speeches/Speaker_MongoDB_Impl.java create mode 100644 src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimp/speeches/Speech_MongoDB_Impl.java create mode 100644 src/main/java/org/texttechnologylab/project/gruppe_05_1/util/Logger.java diff --git a/src/main/java/exceptions/AgendaItemNotFoundException.java b/src/main/java/exceptions/AgendaItemNotFoundException.java new file mode 100644 index 0000000..1d5198c --- /dev/null +++ b/src/main/java/exceptions/AgendaItemNotFoundException.java @@ -0,0 +1,7 @@ +package exceptions; + +public class AgendaItemNotFoundException extends RuntimeException { + public AgendaItemNotFoundException() { + super("Agenda Item not found"); + } +} diff --git a/src/main/java/exceptions/FractionAlreadyExistsException.java b/src/main/java/exceptions/FractionAlreadyExistsException.java new file mode 100644 index 0000000..ba3548d --- /dev/null +++ b/src/main/java/exceptions/FractionAlreadyExistsException.java @@ -0,0 +1,7 @@ +package exceptions; + +public class FractionAlreadyExistsException extends RuntimeException { + public FractionAlreadyExistsException() { + super("Fraction already exists"); + } +} diff --git a/src/main/java/exceptions/FractionNotFoundException.java b/src/main/java/exceptions/FractionNotFoundException.java new file mode 100644 index 0000000..770dbbb --- /dev/null +++ b/src/main/java/exceptions/FractionNotFoundException.java @@ -0,0 +1,7 @@ +package exceptions; + +public class FractionNotFoundException extends RuntimeException { + public FractionNotFoundException() { + super("Fraction not found"); + } +} diff --git a/src/main/java/exceptions/MemberNotFoundException.java b/src/main/java/exceptions/MemberNotFoundException.java new file mode 100644 index 0000000..42381ec --- /dev/null +++ b/src/main/java/exceptions/MemberNotFoundException.java @@ -0,0 +1,7 @@ +package exceptions; + +public class MemberNotFoundException extends RuntimeException { + public MemberNotFoundException() { + super("Member not found"); + } +} diff --git a/src/main/java/exceptions/ServerErrorException.java b/src/main/java/exceptions/ServerErrorException.java new file mode 100644 index 0000000..d78372e --- /dev/null +++ b/src/main/java/exceptions/ServerErrorException.java @@ -0,0 +1,7 @@ +package exceptions; + +public class ServerErrorException extends RuntimeException { + public ServerErrorException() { + super("Server error occurred"); + } +} diff --git a/src/main/java/exceptions/SessionNotFoundException.java b/src/main/java/exceptions/SessionNotFoundException.java new file mode 100644 index 0000000..9ace66f --- /dev/null +++ b/src/main/java/exceptions/SessionNotFoundException.java @@ -0,0 +1,7 @@ +package exceptions; + +public class SessionNotFoundException extends RuntimeException { + public SessionNotFoundException() { + super("Session not found"); + } +} diff --git a/src/main/java/exceptions/SpeechNotFoundException.java b/src/main/java/exceptions/SpeechNotFoundException.java new file mode 100644 index 0000000..376a5d4 --- /dev/null +++ b/src/main/java/exceptions/SpeechNotFoundException.java @@ -0,0 +1,7 @@ +package exceptions; + +public class SpeechNotFoundException extends RuntimeException { + public SpeechNotFoundException() { + super("Speech not found"); + } +} diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/Main.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/Main.java index 33d6ef0..555ccb2 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/Main.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/Main.java @@ -4,19 +4,27 @@ package org.texttechnologylab.project.gruppe_05_1; import com.mongodb.client.MongoDatabase; import org.texttechnologylab.project.gruppe_05_1.database.MongoDBHandler; import org.texttechnologylab.project.gruppe_05_1.database.MongoObjectFactory; +import org.texttechnologylab.project.gruppe_05_1.database.SpeechIndex; +import org.texttechnologylab.project.gruppe_05_1.database.SpeechIndexFactoryImpl; import org.texttechnologylab.project.gruppe_05_1.domain.mdb.Mdb; import org.texttechnologylab.project.gruppe_05_1.domain.mdb.MdbDocument; import org.texttechnologylab.project.gruppe_05_1.rest.RESTHandler; +import org.texttechnologylab.project.gruppe_05_1.util.Logger; import org.texttechnologylab.project.gruppe_05_1.util.PPRUtils; import org.texttechnologylab.project.gruppe_05_1.util.PropertiesUtils; import org.texttechnologylab.project.gruppe_05_1.util.XmlUtils; import org.texttechnologylab.project.gruppe_05_1.xml.FileObjectFactory; import org.texttechnologylab.project.gruppe_05_1.xml.speeches.SpeechParser; import org.w3c.dom.Element; +import org.xml.sax.SAXException; +import javax.xml.parsers.ParserConfigurationException; +import java.io.IOException; import java.util.List; import java.util.Properties; +import static java.lang.Boolean.TRUE; + /* import com.mongodb.client.*; import org.bson.Document; @@ -46,9 +54,49 @@ public class Main { private static final FileObjectFactory xmlFactory = FileObjectFactory.getFactory(); private static final MongoObjectFactory mongoFactory = MongoObjectFactory.getFactory(); private static final SpeechParser speechParser = new SpeechParser(); - public static void main(String[] args) { + public static void main(String[] args) throws ParserConfigurationException, IOException, SAXException { + //TEST - speechParser.parseAllSessions(); + + Logger.pink("Parsing XML and inserting data into DB (Uebung 2)..."); + SpeechIndexFactoryImpl speechIndexFactory = new SpeechIndexFactoryImpl(); + + SpeechIndex speechIndex = speechIndexFactory + .builder() + .parseLegislativePeriods(TRUE) + .parseSessions() + .parseMembers() + .parseFractions() + .filterForCurrentMembers(TRUE) + .build(); + + //speechIndex.printInfo(); + + System.out.println("Data retrieved from DB:"); + System.out.println("MEMBERCOUNT: " + speechIndex.getMembers().size()); + System.out.println("FRACTIONCOUNT: " + speechIndex.getFractions().size()); + System.out.println("SPEECHCOUNT: " + speechIndex.getSpeeches().size()); + System.out.println("SESSIONCOUNT: " + speechIndex.getSessions().size()); + System.out.println("AGENDAITEMCOUNT: " + speechIndex.getAgendaItems().size()); + + MongoDBHandler mongoDBHandler = new MongoDBHandler(); + mongoDBHandler.deleteAllDocuments(); // Clear the DB + + Logger.pink("Adding Sessions to DB..."); + mongoDBHandler.insertSessions(speechIndex.getSessions()); + + Logger.pink("Adding Agenda Items to DB..."); + mongoDBHandler.insertAgendaItems(speechIndex.getAgendaItems()); + + Logger.pink("Adding Speeches to DB..."); + mongoDBHandler.insertSpeeches(speechIndex.getSpeeches()); + + mongoDBHandler.close(); // Close the connection to the DB + + + //TEST + + // Stellt fest, dass alle nötigen Datenbank-Collections existieren PPRUtils.ensureCollectionExist(); diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/MongoDBHandler.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/MongoDBHandler.java index 47a73a0..8b684f0 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/MongoDBHandler.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/MongoDBHandler.java @@ -7,14 +7,29 @@ import com.mongodb.client.MongoClient; import com.mongodb.client.MongoClients; import com.mongodb.client.MongoCollection; import com.mongodb.client.MongoDatabase; +import com.mongodb.client.model.Filters; import com.mongodb.client.model.Indexes; import com.mongodb.client.model.Updates; +import exceptions.AgendaItemNotFoundException; +import exceptions.MemberNotFoundException; +import exceptions.ServerErrorException; +import exceptions.SessionNotFoundException; import org.bson.Document; import org.bson.conversions.Bson; import org.bson.types.ObjectId; +import org.texttechnologylab.DockerUnifiedUIMAInterface.connection.mongodb.MongoDBConfig; +import org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches.AgendaItem_MongoDB_Impl; +import org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches.MemberOfParliament_MongoDB_Impl; +import org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches.Session_MongoDB_Impl; +import org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches.Speech_MongoDB_Impl; +import org.texttechnologylab.project.gruppe_05_1.util.Logger; import org.texttechnologylab.project.gruppe_05_1.util.PropertiesUtils; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.*; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.*; +import java.io.IOException; import java.util.*; +import java.util.concurrent.TimeUnit; import static com.mongodb.client.model.Filters.eq; @@ -22,7 +37,8 @@ public class MongoDBHandler { public static final String propertiesFileName = "mongoDB.properties"; public static final String DEFAULT_ID_FIELD_NAME = "_id"; - + private final MongoClient mongoClient; + private final MongoDatabase database; private static MongoDatabase mongoDatabase = null; public final static Class DOC_LIST_CLASS = new ArrayList().getClass(); @@ -35,6 +51,56 @@ public class MongoDBHandler { private static String collection; private static String databaseName; + private MongoCollection speechesCollection; + private MongoCollection membersCollection; + private MongoCollection fractionsCollection; + private MongoCollection sessionsCollection; + private MongoCollection agendaItemsCollection; + private MongoCollection historyCollection; + + + public MongoDBHandler() { + // Load the MongoDB configuration from the properties file + String propertiesFilePath = "config/database.properties"; + // Set loglevel for slf4j to avoid spam + System.setProperty("org.slf4j.simpleLogger.defaultLogLevel", "error"); + Properties mongoProperties = PropertiesUtils.readPropertiesFromResource(propertiesFileName); + // Zugangsdaten + localServer = mongoProperties.getProperty("localserver"); + remoteServer = mongoProperties.getProperty("remote_host"); + user = mongoProperties.getProperty("remote_user"); + password = mongoProperties.getProperty("remote_password"); + port = mongoProperties.getProperty("remote_port"); + collection = mongoProperties.getProperty("remote_collection"); + databaseName = mongoProperties.getProperty("remote_database"); + + MongoCredential credential = MongoCredential + .createCredential( + user, + databaseName, + password.toCharArray()); + + MongoClientSettings settings = MongoClientSettings.builder() + .credential(credential) + .timeout(180, TimeUnit.HOURS) // needs increased timeout for the bulk speech inserts + .applyToClusterSettings(builder -> + builder.hosts(List.of(new ServerAddress(remoteServer, Integer.parseInt(port))))) + .build(); + + mongoClient = MongoClients.create(settings); + database = mongoClient.getDatabase(databaseName); + speechesCollection = database.getCollection("speech"); + membersCollection = database.getCollection("members"); + fractionsCollection = database.getCollection("fractions"); + sessionsCollection = database.getCollection("sessions"); + agendaItemsCollection = database.getCollection("agendaItems"); + historyCollection = database.getCollection("history"); + Logger.info("Connected to MongoDB database: " + databaseName); + + // hopeless attempt of creating the fulltext search index :( + membersCollection.createIndex(new Document("collection", 1)); + } + /** * Get the MongoDB according to properties. * If a local server URI is defined, use it. Otherwise, use remote server. @@ -392,4 +458,272 @@ public class MongoDBHandler { collection.deleteOne(deleteQuery); } + /* + * Justus Jonas operations + * ======================= + */ + + public void insertSession(Session session) { + Document sessionDocument = new Document("sessionId", session.getId()) + .append("dateTime", session.getDateTime()) + .append("endTime", session.getEndTime()) + .append("legislativePeriod", session.getLegislativePeriod()); + + sessionsCollection.insertOne(sessionDocument); + } + + public Session insertSession(String dateTime, String endTime, String legislativePeriod) { + // get a new random sessionId that is not already in use + int sessionId = 0; + while (!retrieveAllSessions(Filters.eq("sessionId", sessionId)).isEmpty()) { + // generate random int + sessionId = (int) (Math.random() * Integer.MAX_VALUE); + } + + // create session + Session session = new Session_File_Impl(legislativePeriod, sessionId, dateTime, endTime); + + // insert session into DB + insertSession(session); + + return session; + } + + + public void insertSessions(List sessions) { + for (Session session : sessions) { + insertSession(session); + } + } + + public void insertAgendaItems(List agendaItems) { + List agendaItemDocuments = new ArrayList<>(); + for (AgendaItem agendaItem : agendaItems) { + Document agendaItemDocument = new Document("id", agendaItem.getId()) + .append("sessionId", agendaItem.getSessionId()) + .append("title", agendaItem.getTitle()); + + agendaItemDocuments.add(agendaItemDocument); + } + + agendaItemsCollection.insertMany(agendaItemDocuments); + + } + + public AgendaItem insertAgendaItem(int sessionId, String title) throws SessionNotFoundException, ServerErrorException { + // check if session exists + List sessions = retrieveAllSessions(Filters.eq("sessionId", sessionId)); + if (sessions.isEmpty()) { + Logger.error("No session found with id " + sessionId); + throw new SessionNotFoundException(); + } + // get a new random agendaItemId that is not already in use + int agendaItemId = 0; + while (!retrieveAllAgendaItems(Filters.eq("id", agendaItemId)).isEmpty()) { + // generate random int + agendaItemId = (int) (Math.random() * Integer.MAX_VALUE); + } + + // create agendaItem + AgendaItem agendaItem = new AgendaItem_File_Impl(agendaItemId, sessionId, title); + + // insert agendaItem into DB + insertAgendaItems(List.of(agendaItem)); + + return agendaItem; + } + + public void insertSpeeches(List speeches) { + // Convert each Speech to a Document + List speechDocuments = new ArrayList<>(); + for (Speech speech : speeches) { + Document speechDocument = new Document("sessionId", speech.getSessionId()) + .append("agendaItemId", speech.getAgendaItemId()) + .append("speechId", speech.getSpeechId()) + .append("speakerId", speech.getSpeakerId()) + .append("speakerName", speech.getSpeakerName()) + .append("fraction", speech.getFraction()); + + // Convert speechContents to a list of Documents + List contentDocuments = new ArrayList<>(); + for (Content content : speech.getSpeechContents()) { + if (content instanceof Comment_File_Impl) { + Comment_File_Impl commentContent = (Comment_File_Impl) content; + contentDocuments.add(new Document("type", "comment") + .append("contentId", commentContent.getContentId()) + .append("speechId", commentContent.getSpeechId()) + .append("commentatorName", commentContent.getCommentatorName()) + .append("comment", commentContent.getComment())); + } else if (content instanceof Line_File_Impl) { + Line_File_Impl lineContent = (Line_File_Impl) content; + contentDocuments.add(new Document("type", "line") + .append("contentId", lineContent.getContentId()) + .append("speechId", lineContent.getSpeechId()) + .append("content", lineContent.getContent())); + } else if (content instanceof Speaker_File_Impl) { + Speaker_File_Impl speakerContent = (Speaker_File_Impl) content; + contentDocuments.add(new Document("type", "speaker") + .append("contentId", speakerContent.getContentId()) + .append("speechId", speakerContent.getSpeechId()) + .append("speakerId", speakerContent.getSpeakerId()) + .append("speakerName", speakerContent.getSpeakerName()) + .append("fraction", speakerContent.getFraction())); + } + } + + // Add the speech contents to the speech document + speechDocument.append("speechContents", contentDocuments); + + // Add the speech document to the list + speechDocuments.add(speechDocument); + } + + // Insert all documents at once using insertMany + speechesCollection.insertMany(speechDocuments); + } + + public Speech insertSpeech(int speakerId, int sessionId, int agendaItemId) throws MemberNotFoundException, SessionNotFoundException, AgendaItemNotFoundException, ServerErrorException { + // fetch member by speakerId + MemberOfParliament member; + try { + List members = retrieveAllMembersOfParliament(Filters.eq("id", speakerId)); + if (members.isEmpty()) { + Logger.error("No member found with id " + speakerId); + throw new MemberNotFoundException(); + } else if (members.size() > 1) { + Logger.warn("Multiple members found with id " + speakerId); + throw new ServerErrorException(); + } + member = members.get(0); + } catch (IOException e) { + Logger.error("Failed to retrieve member with id " + speakerId); + throw new ServerErrorException(); + } + // check if session and agendaItem exist + List sessions = retrieveAllSessions(Filters.eq("sessionId", sessionId)); + if (sessions.isEmpty()) { + Logger.error("No session found with id " + sessionId); + throw new SessionNotFoundException(); + } + List agendaItems = retrieveAllAgendaItems(Filters.eq("id", agendaItemId)); + if (agendaItems.isEmpty()) { + Logger.error("No agendaItem found with id " + agendaItemId); + throw new AgendaItemNotFoundException(); + } + // get a new random speechId that is not already in use + int speechId = 0; + while (!retrieveAllSpeeches(Filters.eq("speechId", speechId)).isEmpty()) { + // generate random int + speechId = (int) (Math.random() * Integer.MAX_VALUE); + } + + // create speech + Speech speech = new Speech_File_Impl(sessionId, agendaItemId, speakerId, speechId, member.getName(), member.getParty()); + + // insert speech into DB + insertSpeeches(List.of(speech)); + + return speech; + } + + public List retrieveAllSpeeches() { + List speeches = speechesCollection.find().into(new ArrayList<>()); + List result = new ArrayList<>(); + for (Document speech : speeches) { + result.add(new Speech_MongoDB_Impl(speech)); + } + + return result; + } + + public List retrieveAllSpeeches(Bson filter) { + List speeches = speechesCollection.find(filter).into(new ArrayList<>()); + List result = new ArrayList<>(); + for (Document speech : speeches) { + result.add(new Speech_MongoDB_Impl(speech)); + } + + return result; + } + + public List retrieveAllSessions() { + List sessions = sessionsCollection.find().into(new ArrayList<>()); + List result = new ArrayList<>(); + for (Document session : sessions) { + result.add(new Session_MongoDB_Impl(session)); + } + + return result; + } + + public List retrieveAllSessions(Bson filter) { + List speeches = sessionsCollection.find(filter).into(new ArrayList<>()); + List result = new ArrayList<>(); + for (Document speech : speeches) { + result.add(new Session_MongoDB_Impl(speech)); + } + + return result; + } + + public List retrieveAllAgendaItems() { + List agendaItems = agendaItemsCollection.find().into(new ArrayList<>()); + List result = new ArrayList<>(); + for (Document agendaItem : agendaItems) { + result.add(new AgendaItem_MongoDB_Impl(agendaItem)); + } + + return result; + } + + public List retrieveAllAgendaItems(Bson filter) { + List speeches = agendaItemsCollection.find(filter).into(new ArrayList<>()); + List result = new ArrayList<>(); + for (Document speech : speeches) { + result.add(new AgendaItem_MongoDB_Impl(speech)); + } + + return result; + } + + public List retrieveAllMembersOfParliament() { + List members = membersCollection.find().into(new ArrayList<>()); + List result = new ArrayList<>(); + for (Document member : members) { + result.add(new MemberOfParliament_MongoDB_Impl(member)); + } + + return result; + } + + public List retrieveAllMembersOfParliament(Bson filter) throws IOException { + List speeches = membersCollection.find(filter).into(new ArrayList<>()); + List result = new ArrayList<>(); + for (Document speech : speeches) { + result.add(new MemberOfParliament_MongoDB_Impl(speech)); + } + return result; + } + + public List retrieveAllMembersOfParliament(Bson filter, Bson projection) throws IOException { + List speeches = membersCollection.find(filter).projection(projection).into(new ArrayList<>()); + List result = new ArrayList<>(); + for (Document speech : speeches) { + result.add(new MemberOfParliament_MongoDB_Impl(speech)); + } + + return result; + } + + public void deleteAllDocuments() { + speechesCollection.deleteMany(new Document()); + sessionsCollection.deleteMany(new Document()); + agendaItemsCollection.deleteMany(new Document()); + //historyCollection.deleteMany(new Document()); + } + + public void close() { + mongoClient.close(); + } + } diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/SpeechIndex.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/SpeechIndex.java new file mode 100644 index 0000000..5c8e571 --- /dev/null +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/SpeechIndex.java @@ -0,0 +1,23 @@ +package org.texttechnologylab.project.gruppe_05_1.database; + +import lombok.Getter; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.*; + +import java.util.List; + +@Getter +public class SpeechIndex { + private final List sessions; + private final List speeches; + private final List agendaItems; + private final List members; + private final List fractions; + + public SpeechIndex(List sessions, List speeches, List agendaItems, List members, List fractions) { + this.sessions = sessions; + this.speeches = speeches; + this.agendaItems = agendaItems; + this.members = members; + this.fractions = fractions; + } +} diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/SpeechIndexFactory.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/SpeechIndexFactory.java new file mode 100644 index 0000000..a321f8c --- /dev/null +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/SpeechIndexFactory.java @@ -0,0 +1,60 @@ +package org.texttechnologylab.project.gruppe_05_1.database; + + +import org.xml.sax.SAXException; + +import javax.xml.parsers.ParserConfigurationException; +import java.io.IOException; + +public interface SpeechIndexFactory { + /** + * Set the parseLegislativePeriods flag. + * @param parseLegislativePeriods that indicates whether the legislative periods should be parsed which takes a lot longer + * @return the SpeechIndexFactory instance + */ + SpeechIndexFactory parseLegislativePeriods(Boolean parseLegislativePeriods); + + /** + * Creates a new SpeechIndexFactory instance. + * @return a new SpeechIndexFactory instance + * @throws ParserConfigurationException if the parser configuration is invalid + */ + SpeechIndexFactory builder() throws ParserConfigurationException; + + /** + * Parses all sessions, speeches and agenda items. + * @return the SpeechIndexFactory instance + */ + SpeechIndexFactory parseSessions(); + + /** + * Parses all members of parliament. + * @return the SpeechIndexFactory instance + * @throws IOException if an I/O error occurs + * @throws SAXException if a SAX error occurs + */ + SpeechIndexFactory parseMembers() throws IOException, SAXException; + + /** + * Parses all fractions. + * @return the SpeechIndexFactory instance + * @throws IOException if an I/O error occurs + * @throws SAXException if a SAX error occurs + */ + SpeechIndexFactory parseFractions() throws IOException, SAXException; + + /** + * Builds a new SpeechIndex instance. + * @return a new SpeechIndex instance + * @throws IOException if an I/O error occurs + * @throws SAXException if a SAX error occurs + */ + SpeechIndex build() throws IOException, SAXException; + + /** + * Filters the members for current members. + * @param filterForCurrentMembers that indicates whether the members should be filtered for current members + * @return the SpeechIndexFactory instance + */ + SpeechIndexFactory filterForCurrentMembers(boolean filterForCurrentMembers); +} diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/SpeechIndexFactoryImpl.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/SpeechIndexFactoryImpl.java new file mode 100644 index 0000000..6fb064d --- /dev/null +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/SpeechIndexFactoryImpl.java @@ -0,0 +1,63 @@ +package org.texttechnologylab.project.gruppe_05_1.database; + + +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.*; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.SpeechParser; +import org.xml.sax.SAXException; + +import javax.xml.parsers.ParserConfigurationException; +import java.io.IOException; +import java.util.List; +import java.util.stream.Collectors; + +public class SpeechIndexFactoryImpl implements SpeechIndexFactory { + private SpeechParser speechParser; + private List sessions; + private List agendaItems; + private List speeches; + private List members; + private List fractions; + private Boolean parseLegislativePeriods = true; + + + @Override + public SpeechIndexFactoryImpl parseLegislativePeriods(Boolean parseLegislativePeriods) { + this.parseLegislativePeriods = parseLegislativePeriods; + try {this.speechParser.setParseLegislativePeriods(parseLegislativePeriods);} catch (NullPointerException ignored) {} + return this; + } + + public SpeechIndexFactoryImpl builder() throws ParserConfigurationException { + this.speechParser = new SpeechParser(); + return this; + } + + public SpeechIndexFactoryImpl parseSessions() { + this.sessions = this.speechParser.parseAllSessions(); + this.speeches = this.speechParser.getSpeeches(); + this.agendaItems = this.speechParser.getAgendaItems(); + return this; + } + + @Override + public SpeechIndexFactory parseMembers() throws IOException, SAXException { + return null; + } + + @Override + public SpeechIndexFactory parseFractions() throws IOException, SAXException { + return null; + } + + @Override + public SpeechIndex build() throws IOException, SAXException { + return null; + } + + + public SpeechIndexFactoryImpl filterForCurrentMembers(boolean filterForCurrentMembers) { + if (!filterForCurrentMembers) return this; + this.members = this.members.stream().filter(MemberOfParliament::isCurrentMember).collect(Collectors.toList()); + return this; + } +} diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimp/speeches/AgendaItem_MongoDB_Impl.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimp/speeches/AgendaItem_MongoDB_Impl.java new file mode 100644 index 0000000..578255d --- /dev/null +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimp/speeches/AgendaItem_MongoDB_Impl.java @@ -0,0 +1,15 @@ +package org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches; + +import org.bson.Document; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.AgendaItem_File_Impl; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.AgendaItem; + + +public class AgendaItem_MongoDB_Impl extends AgendaItem_File_Impl implements AgendaItem { + public AgendaItem_MongoDB_Impl(Document mongoDocument) { + super( + mongoDocument.getInteger("id"), + mongoDocument.getInteger("sessionId"), + mongoDocument.getString("title")); + } +} diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimp/speeches/Comment_MongoDB_Impl.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimp/speeches/Comment_MongoDB_Impl.java new file mode 100644 index 0000000..64000df --- /dev/null +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimp/speeches/Comment_MongoDB_Impl.java @@ -0,0 +1,17 @@ +package org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches; + +import org.bson.Document; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.Comment_File_Impl; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Comment; + + +public class Comment_MongoDB_Impl extends Comment_File_Impl implements Comment { + + public Comment_MongoDB_Impl(Document mongoDocument) { + super( + mongoDocument.getInteger("contentId"), + mongoDocument.getInteger("speechId"), + mongoDocument.getString("commentatorName"), + mongoDocument.getString("comment")); + } +} diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimp/speeches/Line_MongoDB_Impl.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimp/speeches/Line_MongoDB_Impl.java new file mode 100644 index 0000000..ac951aa --- /dev/null +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimp/speeches/Line_MongoDB_Impl.java @@ -0,0 +1,15 @@ +package org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches; + +import org.bson.Document; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.Line_File_Impl; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Line; + + +public class Line_MongoDB_Impl extends Line_File_Impl implements Line { + public Line_MongoDB_Impl(Document mongoDocument) { + super( + mongoDocument.getInteger("contentId"), + mongoDocument.getInteger("speechId"), + mongoDocument.getString("content")); + } +} diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimp/speeches/MemberOfParliament_MongoDB_Impl.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimp/speeches/MemberOfParliament_MongoDB_Impl.java new file mode 100644 index 0000000..71ff61b --- /dev/null +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimp/speeches/MemberOfParliament_MongoDB_Impl.java @@ -0,0 +1,25 @@ +package org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches; + +import org.bson.Document; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.MemberOfParliament_File_Impl; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.MemberOfParliament; + + +public class MemberOfParliament_MongoDB_Impl extends MemberOfParliament_File_Impl implements MemberOfParliament { + public MemberOfParliament_MongoDB_Impl(Document mongoDocument) {super( + mongoDocument.getString("name"), + mongoDocument.getString("firstName"), + mongoDocument.getString("title"), + mongoDocument.getString("dateOfBirth"), + mongoDocument.getString("dateOfDeath"), + mongoDocument.getString("placeOfBirth"), + mongoDocument.getString("gender"), + mongoDocument.getString("religion"), + mongoDocument.getInteger("id"), + mongoDocument.getString("party"), + null, + mongoDocument.getInteger("firstLegislativePeriod"), + mongoDocument.getInteger("lastLegislativePeriod"), + mongoDocument.getString("image_data")); + } +} diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimp/speeches/Session_MongoDB_Impl.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimp/speeches/Session_MongoDB_Impl.java new file mode 100644 index 0000000..855d076 --- /dev/null +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimp/speeches/Session_MongoDB_Impl.java @@ -0,0 +1,17 @@ +package org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches; + +import org.bson.Document; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.Session_File_Impl; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Session; + + +public class Session_MongoDB_Impl extends Session_File_Impl implements Session { + + public Session_MongoDB_Impl(Document mongoDocument) { + super( + mongoDocument.getString("legislativePeriod"), + mongoDocument.getInteger("sessionId"), + mongoDocument.getString("dateTime"), + mongoDocument.getString("endTime")); + } +} diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimp/speeches/Speaker_MongoDB_Impl.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimp/speeches/Speaker_MongoDB_Impl.java new file mode 100644 index 0000000..2a9aefb --- /dev/null +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimp/speeches/Speaker_MongoDB_Impl.java @@ -0,0 +1,16 @@ +package org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches; + +import org.bson.Document; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.Speaker_File_Impl; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Speaker; + +public class Speaker_MongoDB_Impl extends Speaker_File_Impl implements Speaker { + public Speaker_MongoDB_Impl(Document mongoDocument) { + super( + mongoDocument.getInteger("contentId"), + mongoDocument.getInteger("speechId"), + mongoDocument.getInteger("speakerId"), + mongoDocument.getString("speakerName"), + mongoDocument.getString("fraction")); + } +} diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimp/speeches/Speech_MongoDB_Impl.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimp/speeches/Speech_MongoDB_Impl.java new file mode 100644 index 0000000..0ac537c --- /dev/null +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimp/speeches/Speech_MongoDB_Impl.java @@ -0,0 +1,36 @@ +package org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches; + +import org.bson.Document; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.Speech_File_Impl; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Speech; + + +import java.util.List; + +public class Speech_MongoDB_Impl extends Speech_File_Impl implements Speech { + public Speech_MongoDB_Impl(Document mongoDocument) { + super( + mongoDocument.getInteger("sessionId"), + mongoDocument.getInteger("agendaItemId"), + mongoDocument.getInteger("speechId"), + mongoDocument.getInteger("speakerId"), + mongoDocument.getString("speakerName"), + mongoDocument.getString("fraction")); + + for (Document content : (List) mongoDocument.get("speechContents")) { + switch (content.getString("type")) { + case "line": + this.addContent(new Line_MongoDB_Impl(content)); + break; + case "comment": + this.addContent(new Comment_MongoDB_Impl(content)); + break; + case "speaker": + this.addContent(new Speaker_MongoDB_Impl(content)); + break; + default: + throw new IllegalArgumentException("Unknown content type: " + content.getString("type")); + } + } + } +} diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/util/Logger.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/util/Logger.java new file mode 100644 index 0000000..7bc58b9 --- /dev/null +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/util/Logger.java @@ -0,0 +1,27 @@ +package org.texttechnologylab.project.gruppe_05_1.util; + +public class Logger { + private static final boolean DEBUG_LOGGING = false; + // info, warn, error with message and colors and datetime + public static void info(String message) { + System.out.println("\u001B[32m" + java.time.LocalTime.now() + " INFO: " + message + "\u001B[0m"); + } + + public static void warn(String message) { + System.out.println("\u001B[33m" + java.time.LocalTime.now() + " WARN: " + message + "\u001B[0m"); + } + + public static void error(String message) { + System.out.println("\u001B[31m" + java.time.LocalTime.now() + " ERROR: " + message + "\u001B[0m"); + } + + public static void debug(String message) { + if (DEBUG_LOGGING) { + System.out.println("\u001B[38;5;214m" + java.time.LocalTime.now() + " DEBUG: " + message + "\u001B[0m"); + } + } + + public static void pink(String message) { + System.out.println("\u001B[35m" + java.time.LocalTime.now() + " PINK: " + message + "\u001B[0m"); + } +} diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/util/PPRUtils.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/util/PPRUtils.java index a1b515c..002c304 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/util/PPRUtils.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/util/PPRUtils.java @@ -17,7 +17,6 @@ import org.texttechnologylab.project.gruppe_05_1.domain.speaker.Membership; import org.texttechnologylab.project.gruppe_05_1.domain.speaker.Speaker; import org.texttechnologylab.project.gruppe_05_1.nlp.NlpUtils; import org.texttechnologylab.project.gruppe_05_1.xml.FileObjectFactory; -import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.xml.sax.InputSource; @@ -361,7 +360,7 @@ public abstract class PPRUtils { while (hasMore) { String queryUrl = "https://www.bundestag.de/ajax/filterlist/de/services/opendata/866354-866354?limit=" + limit + "&noFilterSet=true&offset=" + offset; - System.out.println("Lade: " + queryUrl); + //System.out.println("Lade: " + queryUrl); try { Document htmlDoc = Jsoup.connect(queryUrl).get(); Elements xmlLinks = htmlDoc.select("a.bt-link-dokument"); @@ -372,10 +371,9 @@ public abstract class PPRUtils { for (org.jsoup.nodes.Element link : xmlLinks) { String xmlUrl = link.attr("href"); - System.out.println("Verarbeite XML: " + xmlUrl); + //System.out.println("Verarbeite XML: " + xmlUrl); try { org.w3c.dom.Document xmlDoc = downloadAndParseXML(xmlUrl); - String uniqueId = xmlDoc.getDocumentElement().getAttribute("sitzung-nr"); if (processedProtocols.contains(uniqueId)) { System.out.println("Protokoll bereits verarbeitet: " + uniqueId); @@ -383,7 +381,6 @@ public abstract class PPRUtils { } processedProtocols.add(uniqueId); xmlProtocols.add(xmlDoc); - //TODO verarbeitung } catch (Exception e) { System.err.println("Fehler beim Verarbeiten der XML-Datei: " + xmlUrl); e.printStackTrace(); diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/SpeechParser.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/SpeechParser.java index 82062ee..4d6e0ca 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/SpeechParser.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/SpeechParser.java @@ -37,8 +37,8 @@ public class SpeechParser { List sessions = new ArrayList<>(); this.speeches = new ArrayList<>(); this.agendaItems = new ArrayList<>(); - //TODO Logik so machen dass aus array von xmls gelesen wird nicht aus pfad Set xmlDocuments = PPRUtils.processXML(); + System.out.println("All sessions parsed"); for (org.w3c.dom.Document xmlDoc:xmlDocuments) { try { File tempFile = convertDocumentToFile(xmlDoc); @@ -65,7 +65,6 @@ public class SpeechParser { Element root = document.getDocumentElement(); String legislativePeriod = root.getAttribute("wahlperiode"); int sessionId = Integer.parseInt(root.getAttribute("sitzung-nr")); - System.out.println("Session " + sessionId + " wurde gespeichert"); String sessionDate = root.getAttribute("sitzung-datum"); Element startTimeElement = (Element) root.getElementsByTagName("sitzungsbeginn").item(0); String startTimeString = startTimeElement != null ? startTimeElement.getAttribute("sitzung-start-uhrzeit") : null;