diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/Main.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/Main.java index 555ccb2..6a14729 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/Main.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/Main.java @@ -2,10 +2,7 @@ package org.texttechnologylab.project.gruppe_05_1; import com.mongodb.client.MongoDatabase; -import org.texttechnologylab.project.gruppe_05_1.database.MongoDBHandler; -import org.texttechnologylab.project.gruppe_05_1.database.MongoObjectFactory; -import org.texttechnologylab.project.gruppe_05_1.database.SpeechIndex; -import org.texttechnologylab.project.gruppe_05_1.database.SpeechIndexFactoryImpl; +import org.texttechnologylab.project.gruppe_05_1.database.*; import org.texttechnologylab.project.gruppe_05_1.domain.mdb.Mdb; import org.texttechnologylab.project.gruppe_05_1.domain.mdb.MdbDocument; import org.texttechnologylab.project.gruppe_05_1.rest.RESTHandler; @@ -23,6 +20,7 @@ import java.io.IOException; import java.util.List; import java.util.Properties; +import static java.lang.Boolean.FALSE; import static java.lang.Boolean.TRUE; /* @@ -60,38 +58,38 @@ public class Main { Logger.pink("Parsing XML and inserting data into DB (Uebung 2)..."); SpeechIndexFactoryImpl speechIndexFactory = new SpeechIndexFactoryImpl(); + if (MongoPprUtils.getSpeechCollection().countDocuments() != 0) { + System.out.println("Speeches werden nicht gelesen, da sie bereits in der Datenbank stehen"); + } + else { + SpeechIndex speechIndex = speechIndexFactory + .parseLegislativePeriods(TRUE) + .builder() + .parseSessions() + .filterForCurrentMembers(FALSE) + .build(); - SpeechIndex speechIndex = speechIndexFactory - .builder() - .parseLegislativePeriods(TRUE) - .parseSessions() - .parseMembers() - .parseFractions() - .filterForCurrentMembers(TRUE) - .build(); + //speechIndex.printInfo(); - //speechIndex.printInfo(); + System.out.println("Data retrieved from DB:"); + System.out.println("SPEECHCOUNT: " + speechIndex.getSpeeches().size()); + System.out.println("SESSIONCOUNT: " + speechIndex.getSessions().size()); + System.out.println("AGENDAITEMCOUNT: " + speechIndex.getAgendaItems().size()); - System.out.println("Data retrieved from DB:"); - System.out.println("MEMBERCOUNT: " + speechIndex.getMembers().size()); - System.out.println("FRACTIONCOUNT: " + speechIndex.getFractions().size()); - System.out.println("SPEECHCOUNT: " + speechIndex.getSpeeches().size()); - System.out.println("SESSIONCOUNT: " + speechIndex.getSessions().size()); - System.out.println("AGENDAITEMCOUNT: " + speechIndex.getAgendaItems().size()); + MongoDBHandler mongoDBHandler = new MongoDBHandler(); + mongoDBHandler.deleteAllDocuments(); // Clear the DB - MongoDBHandler mongoDBHandler = new MongoDBHandler(); - mongoDBHandler.deleteAllDocuments(); // Clear the DB + Logger.pink("Adding Sessions to DB..."); + mongoDBHandler.insertSessions(speechIndex.getSessions()); - Logger.pink("Adding Sessions to DB..."); - mongoDBHandler.insertSessions(speechIndex.getSessions()); + Logger.pink("Adding Agenda Items to DB..."); + mongoDBHandler.insertAgendaItems(speechIndex.getAgendaItems()); - Logger.pink("Adding Agenda Items to DB..."); - mongoDBHandler.insertAgendaItems(speechIndex.getAgendaItems()); + Logger.pink("Adding Speeches to DB..."); + mongoDBHandler.insertSpeeches(speechIndex.getSpeeches()); - Logger.pink("Adding Speeches to DB..."); - mongoDBHandler.insertSpeeches(speechIndex.getSpeeches()); - - mongoDBHandler.close(); // Close the connection to the DB + mongoDBHandler.close(); // Close the connection to the DB + } //TEST diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/MongoDBHandler.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/MongoDBHandler.java index 8b684f0..8361a70 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/MongoDBHandler.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/MongoDBHandler.java @@ -52,8 +52,7 @@ public class MongoDBHandler { private static String databaseName; private MongoCollection speechesCollection; - private MongoCollection membersCollection; - private MongoCollection fractionsCollection; + private MongoCollection sessionsCollection; private MongoCollection agendaItemsCollection; private MongoCollection historyCollection; @@ -90,15 +89,11 @@ public class MongoDBHandler { mongoClient = MongoClients.create(settings); database = mongoClient.getDatabase(databaseName); speechesCollection = database.getCollection("speech"); - membersCollection = database.getCollection("members"); - fractionsCollection = database.getCollection("fractions"); sessionsCollection = database.getCollection("sessions"); agendaItemsCollection = database.getCollection("agendaItems"); historyCollection = database.getCollection("history"); Logger.info("Connected to MongoDB database: " + databaseName); - // hopeless attempt of creating the fulltext search index :( - membersCollection.createIndex(new Document("collection", 1)); } /** @@ -582,49 +577,6 @@ public class MongoDBHandler { speechesCollection.insertMany(speechDocuments); } - public Speech insertSpeech(int speakerId, int sessionId, int agendaItemId) throws MemberNotFoundException, SessionNotFoundException, AgendaItemNotFoundException, ServerErrorException { - // fetch member by speakerId - MemberOfParliament member; - try { - List members = retrieveAllMembersOfParliament(Filters.eq("id", speakerId)); - if (members.isEmpty()) { - Logger.error("No member found with id " + speakerId); - throw new MemberNotFoundException(); - } else if (members.size() > 1) { - Logger.warn("Multiple members found with id " + speakerId); - throw new ServerErrorException(); - } - member = members.get(0); - } catch (IOException e) { - Logger.error("Failed to retrieve member with id " + speakerId); - throw new ServerErrorException(); - } - // check if session and agendaItem exist - List sessions = retrieveAllSessions(Filters.eq("sessionId", sessionId)); - if (sessions.isEmpty()) { - Logger.error("No session found with id " + sessionId); - throw new SessionNotFoundException(); - } - List agendaItems = retrieveAllAgendaItems(Filters.eq("id", agendaItemId)); - if (agendaItems.isEmpty()) { - Logger.error("No agendaItem found with id " + agendaItemId); - throw new AgendaItemNotFoundException(); - } - // get a new random speechId that is not already in use - int speechId = 0; - while (!retrieveAllSpeeches(Filters.eq("speechId", speechId)).isEmpty()) { - // generate random int - speechId = (int) (Math.random() * Integer.MAX_VALUE); - } - - // create speech - Speech speech = new Speech_File_Impl(sessionId, agendaItemId, speakerId, speechId, member.getName(), member.getParty()); - - // insert speech into DB - insertSpeeches(List.of(speech)); - - return speech; - } public List retrieveAllSpeeches() { List speeches = speechesCollection.find().into(new ArrayList<>()); @@ -686,34 +638,6 @@ public class MongoDBHandler { return result; } - public List retrieveAllMembersOfParliament() { - List members = membersCollection.find().into(new ArrayList<>()); - List result = new ArrayList<>(); - for (Document member : members) { - result.add(new MemberOfParliament_MongoDB_Impl(member)); - } - - return result; - } - - public List retrieveAllMembersOfParliament(Bson filter) throws IOException { - List speeches = membersCollection.find(filter).into(new ArrayList<>()); - List result = new ArrayList<>(); - for (Document speech : speeches) { - result.add(new MemberOfParliament_MongoDB_Impl(speech)); - } - return result; - } - - public List retrieveAllMembersOfParliament(Bson filter, Bson projection) throws IOException { - List speeches = membersCollection.find(filter).projection(projection).into(new ArrayList<>()); - List result = new ArrayList<>(); - for (Document speech : speeches) { - result.add(new MemberOfParliament_MongoDB_Impl(speech)); - } - - return result; - } public void deleteAllDocuments() { speechesCollection.deleteMany(new Document()); diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/SpeechIndex.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/SpeechIndex.java index 5c8e571..935e5da 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/SpeechIndex.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/SpeechIndex.java @@ -10,14 +10,10 @@ public class SpeechIndex { private final List sessions; private final List speeches; private final List agendaItems; - private final List members; - private final List fractions; - public SpeechIndex(List sessions, List speeches, List agendaItems, List members, List fractions) { + public SpeechIndex(List sessions, List speeches, List agendaItems) { this.sessions = sessions; this.speeches = speeches; this.agendaItems = agendaItems; - this.members = members; - this.fractions = fractions; } } diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/SpeechIndexFactory.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/SpeechIndexFactory.java index a321f8c..919db13 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/SpeechIndexFactory.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/SpeechIndexFactory.java @@ -27,21 +27,6 @@ public interface SpeechIndexFactory { */ SpeechIndexFactory parseSessions(); - /** - * Parses all members of parliament. - * @return the SpeechIndexFactory instance - * @throws IOException if an I/O error occurs - * @throws SAXException if a SAX error occurs - */ - SpeechIndexFactory parseMembers() throws IOException, SAXException; - - /** - * Parses all fractions. - * @return the SpeechIndexFactory instance - * @throws IOException if an I/O error occurs - * @throws SAXException if a SAX error occurs - */ - SpeechIndexFactory parseFractions() throws IOException, SAXException; /** * Builds a new SpeechIndex instance. diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/SpeechIndexFactoryImpl.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/SpeechIndexFactoryImpl.java index 6fb064d..0bf0208 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/SpeechIndexFactoryImpl.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/SpeechIndexFactoryImpl.java @@ -39,19 +39,10 @@ public class SpeechIndexFactoryImpl implements SpeechIndexFactory { return this; } - @Override - public SpeechIndexFactory parseMembers() throws IOException, SAXException { - return null; - } - - @Override - public SpeechIndexFactory parseFractions() throws IOException, SAXException { - return null; - } @Override public SpeechIndex build() throws IOException, SAXException { - return null; + return new SpeechIndex(sessions, speeches, agendaItems); } diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/util/PPRUtils.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/util/PPRUtils.java index 002c304..0eacef9 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/util/PPRUtils.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/util/PPRUtils.java @@ -356,7 +356,6 @@ public abstract class PPRUtils { int offset = 0; int limit = 10; boolean hasMore = true; - while (hasMore) { String queryUrl = "https://www.bundestag.de/ajax/filterlist/de/services/opendata/866354-866354?limit=" + limit + "&noFilterSet=true&offset=" + offset; diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/SpeechParser.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/SpeechParser.java index 4d6e0ca..fcb436d 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/SpeechParser.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/SpeechParser.java @@ -2,6 +2,7 @@ package org.texttechnologylab.project.gruppe_05_1.xml.speeches; import lombok.Getter; import lombok.Setter; +import org.texttechnologylab.project.gruppe_05_1.database.MongoPprUtils; import org.texttechnologylab.project.gruppe_05_1.util.PPRUtils; import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.*; import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.AgendaItem; @@ -34,12 +35,13 @@ public class SpeechParser { private Boolean parseLegislativePeriods; public List parseAllSessions() { + List sessionsEmpty = new ArrayList<>(); List sessions = new ArrayList<>(); this.speeches = new ArrayList<>(); this.agendaItems = new ArrayList<>(); Set xmlDocuments = PPRUtils.processXML(); System.out.println("All sessions parsed"); - for (org.w3c.dom.Document xmlDoc:xmlDocuments) { + for (org.w3c.dom.Document xmlDoc : xmlDocuments) { try { File tempFile = convertDocumentToFile(xmlDoc); Session session = parseSessionFile(tempFile); @@ -50,8 +52,8 @@ public class SpeechParser { e.printStackTrace(); } } - return sessions; + } private Session parseSessionFile(File file) throws Exception { @@ -168,6 +170,7 @@ public class SpeechParser { } return null; } + /** * Konvertiert ein org.w3c.dom.Document in eine temporäre Datei. */