diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index 6799dc5..0000000 Binary files a/.DS_Store and /dev/null differ diff --git a/generated_class_diagram.puml b/generated_class_diagram.puml index 6df0c19..2b4544a 100644 --- a/generated_class_diagram.puml +++ b/generated_class_diagram.puml @@ -11,6 +11,8 @@ package org.texttechnologylab.project.gruppe_05_1 { } org.texttechnologylab.project.gruppe_05_1.Main "1" <-- "1" org.texttechnologylab.project.gruppe_05_1.domain.mdb.Mdb org.texttechnologylab.project.gruppe_05_1.Main "1" <-- "1" org.texttechnologylab.project.gruppe_05_1.domain.mdb.MdbDocument +org.texttechnologylab.project.gruppe_05_1.Main "1" <-- "1" org.texttechnologylab.project.gruppe_05_1.nlp.NlpUtils +org.texttechnologylab.project.gruppe_05_1.Main "1" <-- "1" org.texttechnologylab.project.gruppe_05_1.nlp.XmiExtractor org.texttechnologylab.project.gruppe_05_1.Main "1" <-- "1" org.texttechnologylab.project.gruppe_05_1.rest.RESTHandler org.texttechnologylab.project.gruppe_05_1.Main "1" <-- "1" org.texttechnologylab.project.gruppe_05_1.util.Logger org.texttechnologylab.project.gruppe_05_1.Main "1" <-- "1" org.texttechnologylab.project.gruppe_05_1.util.PPRUtils @@ -26,11 +28,10 @@ package org.texttechnologylab.project.gruppe_05_1.database { - agendaItems - speeches - members - - fractions - - parseLegislativePeriods + parseLegislativePeriods() + builder() + parseSessions() + + build() + filterForCurrentMembers() } } @@ -61,11 +62,14 @@ package org.texttechnologylab.project.gruppe_05_1.database { - sessionsCollection - agendaItemsCollection - historyCollection + + getDatabase() + insertSession() + insertSessions() + insertAgendaItems() + insertSpeeches() + + updateXmiData() + deleteAllDocuments() + + bulkUpdateDocuments() + close() } } @@ -82,10 +86,10 @@ package org.texttechnologylab.project.gruppe_05_1.database { + SPEECH_COLLECTION_NAME + PICTURES_COLLECTION_NAME + COMMENT_COLLECTION_NAME - - speakerCollecion - - speechCollecion - - picturesCollecion - - commentCollecion + - speakerCollection + - speechCollection + - picturesCollection + - commentCollection + createSpeakerCollection() + createSpeechCollection() + createCommentCollection() @@ -100,17 +104,12 @@ package org.texttechnologylab.project.gruppe_05_1.database { org.texttechnologylab.project.gruppe_05_1.database.MongoPprUtils "1" <-- "1" org.texttechnologylab.project.gruppe_05_1.domain.html.Parlamentarier org.texttechnologylab.project.gruppe_05_1.database.MongoPprUtils "1" <-- "1" org.texttechnologylab.project.gruppe_05_1.domain.html.ParlamentarierDetails org.texttechnologylab.project.gruppe_05_1.database.MongoPprUtils "1" <-- "1" org.texttechnologylab.project.gruppe_05_1.domain.speaker.Membership -org.texttechnologylab.project.gruppe_05_1.database.MongoPprUtils "1" <-- "1" org.texttechnologylab.project.gruppe_05_1.domain.speech.Speech -org.texttechnologylab.project.gruppe_05_1.database.MongoPprUtils "1" <-- "1" org.texttechnologylab.project.gruppe_05_1.util.PPRUtils package org.texttechnologylab.project.gruppe_05_1.database { class "SpeechIndex" as org.texttechnologylab.project.gruppe_05_1.database.SpeechIndex { - sessions - speeches - agendaItems - - getSessions() - - getSpeeches() - - getAgendaitems() } } @@ -136,7 +135,6 @@ package org.texttechnologylab.project.gruppe_05_1.database { + createList() } } -org.texttechnologylab.project.gruppe_05_1.database.MongoOperations "1" <-- "1" org.texttechnologylab.project.gruppe_05_1.xml.FileObjectFactory package org.texttechnologylab.project.gruppe_05_1.database.domainimpl.mdb { class "BiografischeAngaben_Mongo_Impl" as org.texttechnologylab.project.gruppe_05_1.database.domainimpl.mdb.BiografischeAngaben_Mongo_Impl { @@ -287,10 +285,23 @@ org.texttechnologylab.project.gruppe_05_1.rest.ParlamentarierController "1" <-- org.texttechnologylab.project.gruppe_05_1.rest.ParlamentarierController "1" <-- "1" org.texttechnologylab.project.gruppe_05_1.database.MongoPprUtils org.texttechnologylab.project.gruppe_05_1.rest.ParlamentarierController "1" <-- "1" org.texttechnologylab.project.gruppe_05_1.util.PPRUtils +package org.texttechnologylab.project.gruppe_05_1.nlp { + class "XmiExtractor" as org.texttechnologylab.project.gruppe_05_1.nlp.XmiExtractor { + - 'static' + + extractAndUploadXmiData() + - processXmiGzStream() + - extractSpeechKeyFromFilename() + - uploadToMongoDB() + + getDatabase() + } +} + package org.texttechnologylab.project.gruppe_05_1.nlp { class "NlpUtils" as org.texttechnologylab.project.gruppe_05_1.nlp.NlpUtils { - pComposer - iWorkers + - TYPE_SYSTEM_DESCRIPTOR_PATH + - MAX_FEATURE_LENGTH + createNlpData() - duuiInit() + getCas() @@ -305,7 +316,7 @@ package org.texttechnologylab.project.gruppe_05_1.nlp { package org.texttechnologylab.project.gruppe_05_1.xml { interface "XmlOperations" as org.texttechnologylab.project.gruppe_05_1.xml.XmlOperations { - + fromXmlNode() + - fromXmlNode() } } @@ -358,7 +369,6 @@ package org.texttechnologylab.project.gruppe_05_1.xml.mdb { + fromXmlNode() } } -org.texttechnologylab.project.gruppe_05_1.xml.mdb.BiografischeAngaben_File_Impl "1" <-- "1" org.texttechnologylab.project.gruppe_05_1.domain.Gender org.texttechnologylab.project.gruppe_05_1.xml.mdb.BiografischeAngaben_File_Impl "1" <-- "1" org.texttechnologylab.project.gruppe_05_1.domain.mdb.BiografischeAngaben org.texttechnologylab.project.gruppe_05_1.xml.mdb.BiografischeAngaben_File_Impl "1" <-- "1" org.texttechnologylab.project.gruppe_05_1.util.GeneralUtils org.texttechnologylab.project.gruppe_05_1.xml.mdb.BiografischeAngaben_File_Impl "1" <-- "1" org.texttechnologylab.project.gruppe_05_1.xml.XmlOperations @@ -387,16 +397,12 @@ package org.texttechnologylab.project.gruppe_05_1.xml.speeches { class "SpeechParser" as org.texttechnologylab.project.gruppe_05_1.xml.speeches.SpeechParser { - speeches - agendaItems - - parseLegislativePeriods - - getSpeeches() - - getAgendaitems() - - setParselegislativeperiods() + + setParseLegislativePeriods() - parseSessionFile() - getOptionalTextContent() - convertDocumentToFile() } } -org.texttechnologylab.project.gruppe_05_1.xml.speeches.SpeechParser "1" <-- "1" org.texttechnologylab.project.gruppe_05_1.database.MongoPprUtils org.texttechnologylab.project.gruppe_05_1.xml.speeches.SpeechParser "1" <-- "1" org.texttechnologylab.project.gruppe_05_1.util.PPRUtils org.texttechnologylab.project.gruppe_05_1.xml.speeches.SpeechParser "1" <-- "1" org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.AgendaItem org.texttechnologylab.project.gruppe_05_1.xml.speeches.SpeechParser "1" <-- "1" org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Session @@ -497,10 +503,10 @@ package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls { - startDate - endDate - constituency - - getId() - - getStartdate() - - getEnddate() - - getConstituency() + + getId() + + getStartDate() + + getEndDate() + + getConstituency() + getType() + toHTML() } @@ -515,11 +521,10 @@ package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls { - dateTime - endTime - agendaItems - - getId() - - getLegislativeperiod() - - getDatetime() - - getEndtime() - - getAgendaitems() + + getId() + + getLegislativePeriod() + + getDateTime() + + getEndTime() + getType() + addAgendaItem() + toHTML() @@ -535,10 +540,10 @@ package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls { - speechId - comment - commentatorName - - getContentid() - - getSpeechid() - - getComment() - - getCommentatorname() + + getContentId() + + getSpeechId() + + getComment() + + getCommentatorName() + getType() } } @@ -556,14 +561,14 @@ package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls { - placeOfBirth - gender - religion - - getName() - - getFirstname() - - getTitle() - - getDateofbirth() - - getDateofdeath() - - getPlaceofbirth() - - getGender() - - getReligion() + + getName() + + getFirstName() + + getTitle() + + getDateOfBirth() + + getDateOfDeath() + + getPlaceOfBirth() + + getGender() + + getReligion() } } org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.Individual_File_Impl "1" <-- "1" org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Individual @@ -573,9 +578,9 @@ package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls { - contentId # speechId - content - - getContentid() - # getSpeechid() - - getContent() + + getContentId() + + getSpeechId() + + getContent() + getType() } } @@ -592,13 +597,14 @@ package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls { - speakerName - fraction - speechContents - - getSessionid() - - getAgendaitemid() - - getSpeakerid() - - getSpeechid() - - getSpeakername() - - getFraction() - - getSpeechcontents() + - speechKey + + getSessionId() + + getAgendaItemId() + + getSpeakerId() + + getSpeechId() + + getSpeakerName() + + getFraction() + + getSpeechKey() + getType() + addContent() + toHTML() @@ -613,9 +619,9 @@ package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls { - id - name - federalState - - getId() - - getName() - - getFederalstate() + + getId() + + getName() + + getFederalState() + getType() + toHTML() } @@ -630,11 +636,11 @@ package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls { - speakerId - speakerName - fraction - - getContentid() - - getSpeechid() - - getSpeakerid() - - getSpeakername() - - getFraction() + + getContentId() + + getSpeechId() + + getSpeakerId() + + getSpeakerName() + + getFraction() + getType() } } @@ -648,10 +654,10 @@ package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls { - content - contentType - date + + getContent() + + getContentType() + + getDate() + getAction() - - getContent() - - getContenttype() - - getDate() + getType() } } @@ -666,12 +672,11 @@ package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls { - firstLegislativePeriodId - lastLegislativePeriodId - photo - - getId() - - getParty() - - getLegislativeperiods() - - getFirstlegislativeperiodid() - - getLastlegislativeperiodid() - - getPhoto() + + getId() + + getParty() + + getFirstLegislativePeriodId() + + getLastLegislativePeriodId() + + getPhoto() + isCurrentMember() + updateParty() + getType() @@ -688,10 +693,9 @@ package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls { - sessionId - title - speeches - - getId() - - getSessionid() - - getTitle() - - getSpeeches() + + getId() + + getSessionId() + + getTitle() + getType() + addSpeech() + toHTML() @@ -706,9 +710,8 @@ package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls { - name - longName - members - - getName() - - getLongname() - - getMembers() + + getName() + + getLongName() + getType() + addMember() + updateName() @@ -725,7 +728,6 @@ package org.texttechnologylab.project.gruppe_05_1.xml.speaker { } org.texttechnologylab.project.gruppe_05_1.xml.speaker.Speaker_File_Impl "1" <-- "1" org.texttechnologylab.project.gruppe_05_1.domain.speaker.Membership org.texttechnologylab.project.gruppe_05_1.xml.speaker.Speaker_File_Impl "1" <-- "1" org.texttechnologylab.project.gruppe_05_1.domain.speaker.Speaker -org.texttechnologylab.project.gruppe_05_1.xml.speaker.Speaker_File_Impl "1" <-- "1" org.texttechnologylab.project.gruppe_05_1.util.GeneralUtils org.texttechnologylab.project.gruppe_05_1.xml.speaker.Speaker_File_Impl "1" <-- "1" org.texttechnologylab.project.gruppe_05_1.util.PPRUtils org.texttechnologylab.project.gruppe_05_1.xml.speaker.Speaker_File_Impl "1" <-- "1" org.texttechnologylab.project.gruppe_05_1.util.XmlUtils org.texttechnologylab.project.gruppe_05_1.xml.speaker.Speaker_File_Impl "1" <-- "1" org.texttechnologylab.project.gruppe_05_1.xml.XmlOperations @@ -780,6 +782,7 @@ package org.texttechnologylab.project.gruppe_05_1.util { class "PPRUtils" as org.texttechnologylab.project.gruppe_05_1.util.PPRUtils { + PARTEILOS_KUERZEL - processedProtocols + + legislaturPeriode + ensureCollectionExist() + parlamentExplorerInit() + readPhotos() @@ -797,10 +800,8 @@ org.texttechnologylab.project.gruppe_05_1.util.PPRUtils "1" <-- "1" org.texttech org.texttechnologylab.project.gruppe_05_1.util.PPRUtils "1" <-- "1" org.texttechnologylab.project.gruppe_05_1.domain.html.Parlamentarier org.texttechnologylab.project.gruppe_05_1.util.PPRUtils "1" <-- "1" org.texttechnologylab.project.gruppe_05_1.domain.mdb.BiografischeAngaben org.texttechnologylab.project.gruppe_05_1.util.PPRUtils "1" <-- "1" org.texttechnologylab.project.gruppe_05_1.domain.mdb.Mdb -org.texttechnologylab.project.gruppe_05_1.util.PPRUtils "1" <-- "1" org.texttechnologylab.project.gruppe_05_1.domain.mdb.MdbDocument org.texttechnologylab.project.gruppe_05_1.util.PPRUtils "1" <-- "1" org.texttechnologylab.project.gruppe_05_1.domain.speaker.Membership org.texttechnologylab.project.gruppe_05_1.util.PPRUtils "1" <-- "1" org.texttechnologylab.project.gruppe_05_1.domain.speaker.Speaker -org.texttechnologylab.project.gruppe_05_1.util.PPRUtils "1" <-- "1" org.texttechnologylab.project.gruppe_05_1.nlp.NlpUtils org.texttechnologylab.project.gruppe_05_1.util.PPRUtils "1" <-- "1" org.texttechnologylab.project.gruppe_05_1.xml.FileObjectFactory package org.texttechnologylab.project.gruppe_05_1.domain { @@ -980,7 +981,6 @@ package org.texttechnologylab.project.gruppe_05_1.domain.mdb { + toString() } } -org.texttechnologylab.project.gruppe_05_1.domain.mdb.BiografischeAngaben "1" <-- "1" org.texttechnologylab.project.gruppe_05_1.domain.Gender package org.texttechnologylab.project.gruppe_05_1.domain.mdb { enum "Mandatsart" as org.texttechnologylab.project.gruppe_05_1.domain.mdb.Mandatsart { @@ -1028,8 +1028,8 @@ package org.texttechnologylab.project.gruppe_05_1.domain.html { + setGeschlecht() + getBeruf() + setBeruf() - + getAkademischertitel() - + setAkademischertitel() + + getAkademischerTitel() + + setAkademischerTitel() + getFamilienstand() + setFamilienstand() + getReligion() @@ -1133,10 +1133,10 @@ package org.texttechnologylab.project.gruppe_05_1.domain.speech { class "Protocol" as org.texttechnologylab.project.gruppe_05_1.domain.speech.Protocol { + getDate() + setDate() - + getStarttime() - + setStarttime() - + getEndtime() - + setEndtime() + + getStartTime() + + setStartTime() + + getEndTime() + + setEndTime() + getIndex() + setIndex() + getTitel() diff --git a/src/.DS_Store b/src/.DS_Store deleted file mode 100644 index 7cbb935..0000000 Binary files a/src/.DS_Store and /dev/null differ diff --git a/src/main/.DS_Store b/src/main/.DS_Store deleted file mode 100644 index e772586..0000000 Binary files a/src/main/.DS_Store and /dev/null differ diff --git a/src/main/java/.DS_Store b/src/main/java/.DS_Store deleted file mode 100644 index f1ac8ac..0000000 Binary files a/src/main/java/.DS_Store and /dev/null differ diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/Main.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/Main.java index d4a47ea..a83e9d0 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/Main.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/Main.java @@ -1,26 +1,16 @@ package org.texttechnologylab.project.gruppe_05_1; -import com.mongodb.client.MongoDatabase; import org.texttechnologylab.project.gruppe_05_1.database.*; -import org.texttechnologylab.project.gruppe_05_1.domain.mdb.Mdb; -import org.texttechnologylab.project.gruppe_05_1.domain.mdb.MdbDocument; -import org.texttechnologylab.project.gruppe_05_1.nlp.NlpUtils; -import org.texttechnologylab.project.gruppe_05_1.nlp.XmiExtractor; import org.texttechnologylab.project.gruppe_05_1.rest.RESTHandler; import org.texttechnologylab.project.gruppe_05_1.util.Logger; import org.texttechnologylab.project.gruppe_05_1.util.PPRUtils; -import org.texttechnologylab.project.gruppe_05_1.util.PropertiesUtils; -import org.texttechnologylab.project.gruppe_05_1.util.XmlUtils; import org.texttechnologylab.project.gruppe_05_1.xml.FileObjectFactory; import org.texttechnologylab.project.gruppe_05_1.xml.speeches.SpeechParser; -import org.w3c.dom.Element; import org.xml.sax.SAXException; import javax.xml.parsers.ParserConfigurationException; import java.io.IOException; -import java.util.List; -import java.util.Properties; import static java.lang.Boolean.FALSE; import static java.lang.Boolean.TRUE; @@ -57,10 +47,10 @@ public class Main { public static void main(String[] args) throws Exception { //TEST - + MongoDBHandler mongoDBHandler = new MongoDBHandler(); SpeechIndexFactoryImpl speechIndexFactory = new SpeechIndexFactoryImpl(); - if (MongoPprUtils.getSpeechCollection().countDocuments() != 0) { + if (mongoDBHandler.getDatabase().getCollection(MongoPprUtils.SPEECH_COLLECTION_NAME).countDocuments() != 0) { System.out.println("Speeches werden nicht gelesen, da sie bereits in der Datenbank stehen"); } else { @@ -79,8 +69,7 @@ public class Main { System.out.println("SESSIONCOUNT: " + speechIndex.getSessions().size()); System.out.println("AGENDAITEMCOUNT: " + speechIndex.getAgendaItems().size()); - MongoDBHandler mongoDBHandler = new MongoDBHandler(); - mongoDBHandler.deleteAllDocuments(); // Clear the DB + mongoDBHandler.deleteSpeechRelatedDocuments(); // Clear speeches, sessions, agendas (history) Logger.pink("Adding Sessions to DB..."); mongoDBHandler.insertSessions(speechIndex.getSessions()); @@ -90,13 +79,8 @@ public class Main { Logger.pink("Adding Speeches to DB..."); mongoDBHandler.insertSpeeches(speechIndex.getSpeeches()); - - mongoDBHandler.close(); // Close the connection to the DB } - // Stellt fest, dass alle nötigen Datenbank-Collections existieren - PPRUtils.ensureCollectionExist(); - // Alle Informationen (Parlamentarier, Reden, Kommentare etc.) lesen und in die Mongo-DB einfügen, falls diese noch nicht vorhanden sind. PPRUtils.parlamentExplorerInit(xmlFactory, mongoFactory); diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/MongoDBHandler.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/MongoDBHandler.java index 503194e..e57ae45 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/MongoDBHandler.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/MongoDBHandler.java @@ -9,16 +9,12 @@ import com.mongodb.client.MongoClients; import com.mongodb.client.MongoCollection; import com.mongodb.client.MongoDatabase; import com.mongodb.client.model.*; -import exceptions.AgendaItemNotFoundException; -import exceptions.MemberNotFoundException; import exceptions.ServerErrorException; import exceptions.SessionNotFoundException; import org.bson.Document; import org.bson.conversions.Bson; import org.bson.types.ObjectId; -import org.texttechnologylab.DockerUnifiedUIMAInterface.connection.mongodb.MongoDBConfig; import org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches.AgendaItem_MongoDB_Impl; -import org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches.MemberOfParliament_MongoDB_Impl; import org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches.Session_MongoDB_Impl; import org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches.Speech_MongoDB_Impl; import org.texttechnologylab.project.gruppe_05_1.util.Logger; @@ -26,7 +22,6 @@ import org.texttechnologylab.project.gruppe_05_1.util.PropertiesUtils; import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.*; import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.*; -import java.io.IOException; import java.util.*; import java.util.concurrent.TimeUnit; @@ -50,8 +45,8 @@ public class MongoDBHandler { private static String collection; private static String databaseName; + private MongoCollection speakerCollection; private MongoCollection speechesCollection; - private MongoCollection sessionsCollection; private MongoCollection agendaItemsCollection; private MongoCollection historyCollection; @@ -72,27 +67,41 @@ public class MongoDBHandler { collection = mongoProperties.getProperty("remote_collection"); databaseName = mongoProperties.getProperty("remote_database"); - MongoCredential credential = MongoCredential - .createCredential( - user, - databaseName, - password.toCharArray()); + // URI für lokale Datenbank oder für eine Datenbank auf dem Server + String uri; + if ( (localServer != null) && (! localServer.isBlank())) { + uri = localServer; + mongoClient = MongoClients.create(uri); - MongoClientSettings settings = MongoClientSettings.builder() - .credential(credential) - .timeout(180, TimeUnit.HOURS) // needs increased timeout for the bulk speech inserts - .applyToClusterSettings(builder -> - builder.hosts(List.of(new ServerAddress(remoteServer, Integer.parseInt(port))))) - .build(); + // Connect + database = mongoClient.getDatabase(databaseName); + } else { + MongoCredential credential = MongoCredential + .createCredential( + user, + databaseName, + password.toCharArray()); - mongoClient = MongoClients.create(settings); - database = mongoClient.getDatabase(databaseName); - speechesCollection = database.getCollection("speech"); - sessionsCollection = database.getCollection("sessions"); - agendaItemsCollection = database.getCollection("agendaItems"); - historyCollection = database.getCollection("history"); + MongoClientSettings settings = MongoClientSettings.builder() + .credential(credential) + .timeout(180, TimeUnit.HOURS) // needs increased timeout for the bulk speech inserts + .applyToClusterSettings(builder -> + builder.hosts(List.of(new ServerAddress(remoteServer, Integer.parseInt(port))))) + .build(); + + mongoClient = MongoClients.create(settings); + database = mongoClient.getDatabase(databaseName); + } + + speakerCollection = database.getCollection(MongoPprUtils.SPEAKER_COLLECTION_NAME); + speechesCollection = database.getCollection(MongoPprUtils.SPEECH_COLLECTION_NAME); + sessionsCollection = database.getCollection(MongoPprUtils.SESSION_COLLECTION_NAME); + agendaItemsCollection = database.getCollection(MongoPprUtils.AGENDA_ITEMS_COLLECTION_NAME); + historyCollection = database.getCollection(MongoPprUtils.HISTORY_COLLECTION_NAME); + + createIndicesForSpeakerCollection(); + createIndicesForSpeechCollection(); Logger.info("Connected to MongoDB database: " + databaseName); - } public MongoDatabase getDatabase() { @@ -106,18 +115,22 @@ public class MongoDBHandler { */ static public MongoDatabase getMongoDatabase() { - if (mongoDatabase == null) { - Properties mongoProperties = PropertiesUtils.readPropertiesFromResource(propertiesFileName); - // Zugangsdaten - localServer = mongoProperties.getProperty("localserver"); - remoteServer = mongoProperties.getProperty("remote_host"); - user = mongoProperties.getProperty("remote_user"); - password = mongoProperties.getProperty("remote_password"); - port = mongoProperties.getProperty("remote_port"); - collection = mongoProperties.getProperty("remote_collection"); - databaseName = mongoProperties.getProperty("remote_database"); + if (mongoDatabase != null) { + return mongoDatabase; } + + Properties mongoProperties = PropertiesUtils.readPropertiesFromResource(propertiesFileName); + // Zugangsdaten + localServer = mongoProperties.getProperty("localserver"); + remoteServer = mongoProperties.getProperty("remote_host"); + user = mongoProperties.getProperty("remote_user"); + password = mongoProperties.getProperty("remote_password"); + port = mongoProperties.getProperty("remote_port"); + collection = mongoProperties.getProperty("remote_collection"); + databaseName = mongoProperties.getProperty("remote_database"); + + // MongoDBClient erzeugen // String uri = mongoServer + "://" + mongoUser + ":" + mongoPassword + "@" + mongoNeetwork; // cluster, network, user... @@ -159,9 +172,9 @@ public class MongoDBHandler { * * @return List with the names of all collections */ - static public Set getCollectionNames() { - // return getMongoDatabase().listCollectionNames().into(new ArrayList<>()); - return getMongoDatabase().listCollectionNames().into(new HashSet<>()); + public Set getCollectionNames() { + // return getDatabase().listCollectionNames().into(new ArrayList<>()); + return getDatabase().listCollectionNames().into(new HashSet<>()); } /** @@ -169,8 +182,8 @@ public class MongoDBHandler { * @param name Name of collection to check for existance * @return does the collection exist */ - static public boolean collectionExists(String name) { - return getMongoDatabase().listCollectionNames().into(new ArrayList<>()).contains(name); + public boolean collectionExists(String name) { + return getDatabase().listCollectionNames().into(new ArrayList<>()).contains(name); } @@ -194,8 +207,8 @@ public class MongoDBHandler { } } - static public void createCollectionIfNotExist(String collectionName) { - createCollectionIfNotExist(getMongoDatabase(), collectionName); + public void createCollectionIfNotExist(String collectionName) { + createCollectionIfNotExist(getDatabase(), collectionName); } @@ -212,8 +225,8 @@ public class MongoDBHandler { } } - static public void createCollection(String collectionName) { - createCollection(getMongoDatabase(), collectionName); + public void createCollection(String collectionName) { + createCollection(getDatabase(), collectionName); } @@ -262,8 +275,23 @@ public class MongoDBHandler { } } - static public void createOrTrancateCollection(String collectionName) { - createOrTrancateCollection(getMongoDatabase(), collectionName); + public void createIndicesForSpeakerCollection() { + if (speakerCollection.listIndexes().into(new ArrayList<>()).size() == 1) { + MongoDBHandler.createIndexForCollection(speakerCollection,"name", true); + MongoDBHandler.createIndexForCollection(speakerCollection,"firstName", true); + MongoDBHandler.createIndexForCollection(speakerCollection,"party", true); + } + + } + public void createIndicesForSpeechCollection() { + if (speechesCollection.listIndexes().into(new ArrayList<>()).size() == 1) { + MongoDBHandler.createIndexForCollection(speechesCollection, "speakerId", true); + MongoDBHandler.createIndexForCollection(speechesCollection, "speechKey", true); + } + } + + public void createOrTrancateCollection(String collectionName) { + createOrTrancateCollection(database, collectionName); } /** @@ -649,7 +677,7 @@ public class MongoDBHandler { ); } - public void deleteAllDocuments() { + public void deleteSpeechRelatedDocuments() { speechesCollection.deleteMany(new Document()); sessionsCollection.deleteMany(new Document()); agendaItemsCollection.deleteMany(new Document()); diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/MongoOperations.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/MongoOperations.java index e6d2f7a..4fd3dc8 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/MongoOperations.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/MongoOperations.java @@ -1,7 +1,6 @@ package org.texttechnologylab.project.gruppe_05_1.database; import org.bson.Document; -import org.texttechnologylab.project.gruppe_05_1.xml.FileObjectFactory; import java.util.List; diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/MongoPprUtils.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/MongoPprUtils.java index 38ca84e..b020bbf 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/MongoPprUtils.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/MongoPprUtils.java @@ -2,25 +2,17 @@ package org.texttechnologylab.project.gruppe_05_1.database; import com.mongodb.client.MongoCollection; import com.mongodb.client.MongoCursor; -import com.mongodb.client.model.Indexes; import org.bson.Document; import org.texttechnologylab.project.gruppe_05_1.domain.html.Parlamentarier; import org.texttechnologylab.project.gruppe_05_1.domain.html.ParlamentarierDetails; import org.texttechnologylab.project.gruppe_05_1.domain.speaker.Membership; -import org.texttechnologylab.project.gruppe_05_1.domain.speech.Speech; -import org.texttechnologylab.project.gruppe_05_1.util.PPRUtils; import java.time.LocalDate; -import java.time.LocalDateTime; import java.time.ZoneId; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Date; -import java.util.List; +import java.util.*; /** * Diese Klasse beinhaltet Mongo-Utilities, welche spezifisch für die PPR-Datenstrukturen sind. - * * Mongo-Utilities genereller Natur stehen in der Klasse MongoDBHandler. */ public class MongoPprUtils { @@ -31,71 +23,48 @@ public class MongoPprUtils { */ public static final String SPEAKER_COLLECTION_NAME = "speaker"; public static final String SPEECH_COLLECTION_NAME = "speech"; + + public static final String SESSION_COLLECTION_NAME = "sessions"; + public static final String AGENDA_ITEMS_COLLECTION_NAME = "agendaItems"; + public static final String HISTORY_COLLECTION_NAME = "history"; public static final String PICTURES_COLLECTION_NAME = "pictures"; public static final String COMMENT_COLLECTION_NAME = "comment"; - private static MongoCollection speakerCollecion = null; - private static MongoCollection speechCollecion = null; - private static MongoCollection picturesCollecion = null; - private static MongoCollection commentCollecion = null; + private static MongoCollection speakerCollection = null; + private static MongoCollection speechCollection = null; + private static MongoCollection picturesCollection = null; + private static MongoCollection commentCollection = null; public static MongoCollection getSpeakerCollection() { - if (speakerCollecion == null) speakerCollecion = MongoDBHandler.getMongoDatabase().getCollection(SPEAKER_COLLECTION_NAME); - return speakerCollecion; + if (speakerCollection == null) speakerCollection = MongoDBHandler.getMongoDatabase().getCollection(SPEAKER_COLLECTION_NAME); + return speakerCollection; } public static MongoCollection getSpeechCollection() { - if (speechCollecion == null) speechCollecion = MongoDBHandler.getMongoDatabase().getCollection(SPEECH_COLLECTION_NAME); - return speechCollecion; + if (speechCollection == null) speechCollection = MongoDBHandler.getMongoDatabase().getCollection(SPEECH_COLLECTION_NAME); + return speechCollection; } - public static MongoCollection getPicturesCollection() { - if (picturesCollecion == null) picturesCollecion = MongoDBHandler.getMongoDatabase().getCollection(PICTURES_COLLECTION_NAME); - return picturesCollecion; - } - - public static MongoCollection getCommentCollection() { - if (commentCollecion == null) commentCollecion = MongoDBHandler.getMongoDatabase().getCollection(COMMENT_COLLECTION_NAME); - return commentCollecion; - } /** * Create the Speaker Collection and useful indices for it */ - public static void createSpeakerCollection() { - MongoDBHandler.createCollection(MongoPprUtils.SPEAKER_COLLECTION_NAME); - - MongoDBHandler.createIndexForCollection(getSpeakerCollection(), Arrays.asList("name", "firstName", "party"), true); + public static void createIndexForSpeakerCollection() { + // MongoDBHandler.createIndexForCollection(getSpeakerCollection(), Arrays.asList("name", "firstName", "party"), true); + MongoDBHandler.createIndexForCollection(getSpeakerCollection(),"name", true); + MongoDBHandler.createIndexForCollection(getSpeakerCollection(),"firstName", true); + MongoDBHandler.createIndexForCollection(getSpeakerCollection(),"party", true); } /** * Create the Speech Collection and useful indices for it */ - public static void createSpeechCollection() { - MongoDBHandler.createCollection(MongoPprUtils.SPEECH_COLLECTION_NAME); - MongoDBHandler.createIndexForCollection(getSpeechCollection(), "speaker", true); + public static void createIndexForSpeechCollection() { + MongoDBHandler.createIndexForCollection(getSpeechCollection(), "speakerId", true); + MongoDBHandler.createIndexForCollection(getSpeechCollection(), "speechKey", true); } - /** - * Create the Comment Collection and useful indices for it - */ - public static void createCommentCollection() { - MongoDBHandler.createCollection(MongoPprUtils.COMMENT_COLLECTION_NAME); - MongoDBHandler.createIndexForCollection(getCommentCollection(), Arrays.asList("speaker", "speech"), true); - } - - - /** - * Create the Picture Collection and useful indices for it - */ - public static void createPictureCollection() { - MongoDBHandler.createCollection(MongoPprUtils.PICTURES_COLLECTION_NAME); - - // TODO: für welche Felder sollen Indizes gebaut werden? - // MongoDBHandler.createIndexForCollection(getPicturesCollection(), Arrays.asList("field_1", "field_2"), true); - } - /** * Truncate the Speaker Collection. * Note that it is quicker (and saves space) to drop and re-create rather than removing all documents using "remove({})" @@ -103,7 +72,7 @@ public class MongoPprUtils { public static void truncateSpeakerCollection() { getSpeakerCollection().drop(); - createSpeechCollection(); + createIndexForSpeechCollection(); } /* @@ -143,7 +112,7 @@ public class MongoPprUtils { plist.add(p); } } catch (Throwable t) { - System.err.println(t); + System.err.print(t); } finally { cursor.close(); } @@ -164,11 +133,7 @@ public class MongoPprUtils { p.setNachname((String) doc.get("name")); p.setVorname((String) doc.get("firstName")); String partei = (String) doc.get("party"); - if (partei == null) { - p.setPartei("(parteilos)"); - } else { - p.setPartei(partei); - } + p.setPartei(Objects.requireNonNullElse(partei, "(parteilos)")); return p; } @@ -186,8 +151,7 @@ public class MongoPprUtils { */ public static ParlamentarierDetails getParlamentarierDetailsByID(String id) { Document doc = MongoDBHandler.findFirstDocumentInCollection(getSpeakerCollection(), "_id", id); - ParlamentarierDetails p = readParlamentarierDetailsFromSpeaker(doc); - return p; + return readParlamentarierDetailsFromSpeaker(doc); } @@ -198,8 +162,7 @@ public class MongoPprUtils { */ public static ParlamentarierDetails getParlamentarierDetailsByID(Integer id) { Document doc = MongoDBHandler.findFirstDocumentInCollection(getSpeakerCollection(), "_id", id.toString()); - ParlamentarierDetails p = readParlamentarierDetailsFromSpeaker(doc); - return p; + return readParlamentarierDetailsFromSpeaker(doc); } /** @@ -215,16 +178,12 @@ public class MongoPprUtils { p.setNachname((String) doc.get("name")); p.setVorname((String) doc.get("firstName")); String partei = (String) doc.get("party"); - if (partei == null) { - p.setPartei("(parteilos)"); - } else { - p.setPartei(partei); - } + p.setPartei(Objects.requireNonNullElse(partei, "(parteilos)")); p.setTitle((String) doc.get("title")); p.setGeburtsort((String) doc.get("geburtsort")); p.setGeschlecht((String) doc.get("geschlecht")); p.setBeruf((String) doc.get("beruf")); - p.setAkademischertitel((String) doc.get("akademischertitel")); + p.setAkademischerTitel((String) doc.get("akademischertitel")); p.setFamilienstand((String) doc.get("familienstand")); p.setReligion((String) doc.get("religion")); p.setVita((String) doc.get("vita")); diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/SpeechIndex.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/SpeechIndex.java index 1696c86..7d7c378 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/SpeechIndex.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/SpeechIndex.java @@ -1,6 +1,5 @@ package org.texttechnologylab.project.gruppe_05_1.database; -import lombok.Getter; import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.*; import java.util.List; diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/SpeechIndexFactoryImpl.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/SpeechIndexFactoryImpl.java index 0bf0208..5f9c6de 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/SpeechIndexFactoryImpl.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/SpeechIndexFactoryImpl.java @@ -3,10 +3,7 @@ package org.texttechnologylab.project.gruppe_05_1.database; import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.*; import org.texttechnologylab.project.gruppe_05_1.xml.speeches.SpeechParser; -import org.xml.sax.SAXException; -import javax.xml.parsers.ParserConfigurationException; -import java.io.IOException; import java.util.List; import java.util.stream.Collectors; @@ -16,18 +13,15 @@ public class SpeechIndexFactoryImpl implements SpeechIndexFactory { private List agendaItems; private List speeches; private List members; - private List fractions; - private Boolean parseLegislativePeriods = true; @Override public SpeechIndexFactoryImpl parseLegislativePeriods(Boolean parseLegislativePeriods) { - this.parseLegislativePeriods = parseLegislativePeriods; try {this.speechParser.setParseLegislativePeriods(parseLegislativePeriods);} catch (NullPointerException ignored) {} return this; } - public SpeechIndexFactoryImpl builder() throws ParserConfigurationException { + public SpeechIndexFactoryImpl builder() { this.speechParser = new SpeechParser(); return this; } @@ -41,7 +35,7 @@ public class SpeechIndexFactoryImpl implements SpeechIndexFactory { @Override - public SpeechIndex build() throws IOException, SAXException { + public SpeechIndex build() { return new SpeechIndex(sessions, speeches, agendaItems); } diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimpl/mdb/BiografischeAngaben_Mongo_Impl.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimpl/mdb/BiografischeAngaben_Mongo_Impl.java index 5d47cb3..dce9a48 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimpl/mdb/BiografischeAngaben_Mongo_Impl.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimpl/mdb/BiografischeAngaben_Mongo_Impl.java @@ -26,8 +26,8 @@ public class BiografischeAngaben_Mongo_Impl extends BiografischeAngaben implemen fields.put("parteiKuerzel", entity.getParteiKuerzel()); fields.put("vitaKurz", entity.getVitaKurz()); fields.put("veroeffentlichungspflichtiges", entity.getVeroeffentlichungspflichtiges()); - Document doc = MongoDBHandler.createDocument(false, fields); - return doc; + + return MongoDBHandler.createDocument(false, fields); } @Override diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimpl/mdb/Institution_Mongo_Impl.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimpl/mdb/Institution_Mongo_Impl.java index 6d5f245..a33bf13 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimpl/mdb/Institution_Mongo_Impl.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimpl/mdb/Institution_Mongo_Impl.java @@ -22,8 +22,7 @@ public class Institution_Mongo_Impl extends Institution implements MongoOperatio fields.put("fktinsVon", entity.getFktinsVon()); fields.put("fktinsBis", entity.getFktinsBis()); - Document doc = MongoDBHandler.createDocument(false, fields); - return doc; + return MongoDBHandler.createDocument(false, fields); } @Override diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimpl/mdb/MdbName_Mongo_Impl.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimpl/mdb/MdbName_Mongo_Impl.java index 4b7e4be..fad4e0f 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimpl/mdb/MdbName_Mongo_Impl.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimpl/mdb/MdbName_Mongo_Impl.java @@ -23,6 +23,7 @@ public class MdbName_Mongo_Impl extends MdbName implements MongoOperations { "bio", bioDoc, "wahlperioden", wpDocs ); - Document doc = MongoDBHandler.createDocument(false, fields); - return doc; + return MongoDBHandler.createDocument(false, fields); } @Override diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimpl/mdb/Membership_Mongo_Impl.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimpl/mdb/Membership_Mongo_Impl.java index 494922e..c11be39 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimpl/mdb/Membership_Mongo_Impl.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimpl/mdb/Membership_Mongo_Impl.java @@ -25,9 +25,7 @@ public class Membership_Mongo_Impl extends Membership implements MongoOperations fields.put("member", entity.getMember()); // TODO: wahrscheinlich nicht nötig fields.put("wp", entity.getWp()); - Document doc = MongoDBHandler.createDocument(false, fields); - - return doc; + return MongoDBHandler.createDocument(false, fields); } @Override diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimpl/mdb/Speaker_Mongo_Impl.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimpl/mdb/Speaker_Mongo_Impl.java index a321599..20fc049 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimpl/mdb/Speaker_Mongo_Impl.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimpl/mdb/Speaker_Mongo_Impl.java @@ -35,9 +35,7 @@ public class Speaker_Mongo_Impl extends Speaker implements MongoOperations { - System.out.println(sentence.getBegin() + "-" + sentence.getEnd() + ": " + sentence.getCoveredText()); + JCasUtil.select(tCas, Sentence.class).forEach(sentence -> { + System.out.println(sentence.getBegin()+"-"+sentence.getEnd()+": "+sentence.getCoveredText()); System.out.println(JCasUtil.selectCovered(org.hucompute.textimager.uima.type.Sentiment.class, sentence)); }); } private static void casInit() { - JCas jcas = null; + JCas jcas; try { jcas = JCasFactory.createJCas(); } catch (ResourceInitializationException e) { System.err.println("ResourceInitializationException: " + e.getMessage()); - System.err.println(e.getStackTrace()); + System.err.println(Arrays.toString(e.getStackTrace())); throw new RuntimeException(e); } catch (CASException e) { System.err.println("CASException: " + e.getMessage()); - System.err.println(e.getStackTrace()); + System.err.println(Arrays.toString(e.getStackTrace())); throw new RuntimeException(e); } @@ -263,8 +252,7 @@ public class NlpUtils { /** * Execution of video processing via DUUI using the RemoteDriver - * - * @throws Exception + * @throws Exception in case of an error */ public static void runVideos() throws Exception { @@ -273,6 +261,7 @@ public class NlpUtils { URL fVideo = classLoader.getResource("example.mp4"); // TODO // convertieren eines Videos in einen Base64-String + assert fVideo != null; File fFile = new File(fVideo.getPath()); byte[] bFile = FileUtils.readFileToByteArray(fFile); String encodedString = Base64.getEncoder().encodeToString(bFile); @@ -315,14 +304,10 @@ public class NlpUtils { pComposer.run(pCas); // select some data - JCasUtil.selectAll(videoCas).stream().forEach(videoAnnotation -> { - System.out.println(videoAnnotation); - }); + JCasUtil.selectAll(videoCas).forEach(System.out::println); // select some data - JCasUtil.selectAll(transcriptCas).stream().forEach(tAnnotation -> { - System.out.println(tAnnotation); - }); + JCasUtil.selectAll(transcriptCas).forEach(System.out::println); } diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/rest/JavalinConfig.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/rest/JavalinConfig.java index 4d37a16..f47aac4 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/rest/JavalinConfig.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/rest/JavalinConfig.java @@ -23,7 +23,7 @@ public class JavalinConfig extends Properties { /** * Constructor mit Pfad zur Properties-Datei - * @param sPath + * @param sPath Pfad zur Properties-Datei */ public JavalinConfig(String sPath) { @@ -49,7 +49,7 @@ public class JavalinConfig extends Properties { /** * Liefert den Port zurück, auf dem Javalin läuft - * @return + * @return Port */ public Integer getPort() { return GeneralUtils.parseInt(getProperty("port")); diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/util/FileUtils.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/util/FileUtils.java index 1e2a740..b53da1a 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/util/FileUtils.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/util/FileUtils.java @@ -20,17 +20,18 @@ public abstract class FileUtils { * Creates a (possibly nested) directory * @param dir (e.g. "generated" , "level1/level2/level3" etc. */ - public static void createDirectoryIFNotExists(String dir) { + public static boolean createDirectoryIFNotExists(String dir) { File directory = new File(dir); - if (! directory.exists()){ - directory.mkdirs(); + if (!directory.exists()){ + return directory.mkdirs(); } + return true; } /** * Write a list of Strings to file - * @param fileName - * @param stringsList + * @param fileName the filename + * @param stringsList the list of strings to be written to the file */ public static void writeStringsToFile(String fileName, List stringsList) { FileWriter fileWriter = null; @@ -55,8 +56,8 @@ public abstract class FileUtils { /** * - * @param fileName - * @param string + * @param fileName the filename + * @param string the string to be written to the file */ public static void writeStringToFile(String fileName, String string) { FileWriter fileWriter = null; @@ -81,9 +82,9 @@ public abstract class FileUtils { /** * - * @param fileName - * @return - * @throws IOException + * @param fileName the filename + * @return the filewriter object + * @throws IOException if the file cannot be created */ public static FileWriter createFileWriter(String fileName) throws IOException{ FileWriter fileWriter = null; @@ -94,8 +95,8 @@ public abstract class FileUtils { /** * - * @param fileWriter - * @param stringsList + * @param fileWriter the filewriter object + * @param stringsList the list of strings to be written to the file */ public static void writeStringsToFile(FileWriter fileWriter, List stringsList) { @@ -116,8 +117,8 @@ public abstract class FileUtils { /** * - * @param dir - * @return + * @param dir the directory + * @return a set of filenames in the directory */ public static Set listFilesInDirectory(String dir) { try (Stream stream = Files.list(Paths.get(dir))) { diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/util/GeneralUtils.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/util/GeneralUtils.java index fff29e9..0b013a3 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/util/GeneralUtils.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/util/GeneralUtils.java @@ -1,28 +1,16 @@ package org.texttechnologylab.project.gruppe_05_1.util; -import java.io.IOException; -import java.nio.file.DirectoryStream; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; import java.time.LocalDate; -import java.time.LocalDateTime; import java.time.LocalTime; import java.time.format.DateTimeFormatter; import java.time.format.DateTimeParseException; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; -import java.util.Set; -import java.util.stream.Collectors; -import java.util.stream.Stream; public abstract class GeneralUtils { /** * - * @param integer - * @return + * @param integer the integer to be parsed + * @return the parsed integer or null if the integer could not be parsed */ public static Integer parseInt(String integer) { @@ -35,8 +23,8 @@ public abstract class GeneralUtils { /** * Parse a date in the format used in Germany - * @param date - * @return + * @param date the date to be parsed + * @return the parsed date or null if the date could not be parsed */ public static LocalDate parseDate(String date) { @@ -50,9 +38,9 @@ public abstract class GeneralUtils { /** * Parse tiem in a give format - * @param date - * @param timeFormat - * @return + * @param date the time to be parsed + * @param timeFormat the format of the time + * @return the parsed time or null if the time could not be parsed */ public static LocalTime parseTime(String date, String timeFormat) { @@ -66,8 +54,8 @@ public abstract class GeneralUtils { /** * Parse a Formatiere Datumsfelder wie in Deutschland üblich ist. - * @param date - * @return + * @param date the date to be formatted + * @return the formatted date or an empty string if the date is null */ public static String formatDate(LocalDate date) { @@ -77,8 +65,8 @@ public abstract class GeneralUtils { /** * Format time in the format used in Germany - * @param time - * @return + * @param time the time to be formatted + * @return the formatted time or an empty string if the time is null */ public static String formatTime(LocalTime time) { diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/util/PPRUtils.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/util/PPRUtils.java index 86f38f5..3da9b02 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/util/PPRUtils.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/util/PPRUtils.java @@ -1,8 +1,6 @@ package org.texttechnologylab.project.gruppe_05_1.util; import com.mongodb.client.MongoCollection; -import com.mongodb.client.MongoDatabase; -import com.mongodb.client.model.Indexes; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.select.Elements; @@ -12,10 +10,8 @@ import org.texttechnologylab.project.gruppe_05_1.database.MongoPprUtils; import org.texttechnologylab.project.gruppe_05_1.domain.html.Parlamentarier; import org.texttechnologylab.project.gruppe_05_1.domain.mdb.BiografischeAngaben; import org.texttechnologylab.project.gruppe_05_1.domain.mdb.Mdb; -import org.texttechnologylab.project.gruppe_05_1.domain.mdb.MdbDocument; import org.texttechnologylab.project.gruppe_05_1.domain.speaker.Membership; import org.texttechnologylab.project.gruppe_05_1.domain.speaker.Speaker; -import org.texttechnologylab.project.gruppe_05_1.nlp.NlpUtils; import org.texttechnologylab.project.gruppe_05_1.xml.FileObjectFactory; import org.w3c.dom.Element; import org.w3c.dom.Node; @@ -34,28 +30,12 @@ import java.util.zip.ZipInputStream; public abstract class PPRUtils { public static final String PARTEILOS_KUERZEL = "Parteilos"; - private static Set processedProtocols = new HashSet<>(); - private static Set xmlProtocols = new HashSet<>(); + private static final Set processedProtocols = new HashSet<>(); + private static final Set xmlProtocols = new HashSet<>(); + public static final Integer legislaturPeriode = 20; - /** - * Prüfe, ob die Collections existieren. Falls nicht: erzeuge sie und lege Indizes an - */ - public static void ensureCollectionExist() { - - Set existingCollectionNames = MongoDBHandler.getCollectionNames(); - - if (!existingCollectionNames.contains(MongoPprUtils.SPEAKER_COLLECTION_NAME)) { - MongoPprUtils.createSpeakerCollection(); - } - - if (!existingCollectionNames.contains(MongoPprUtils.SPEECH_COLLECTION_NAME)) { - MongoPprUtils.createSpeechCollection(); - } - - } - /** * Alle Informationen lesen... * - Parlamentarier @@ -63,7 +43,8 @@ public abstract class PPRUtils { * - Kommentare * - etc. * ... und in die Mongo-DB persistieren, falls noch nicht vorhanden sind. - * @param xmlFactory + * @param xmlFactory Factory für die XML-Objekte + * @param mongoFactory Factory für die MongoDB-Objekte */ public static void parlamentExplorerInit(FileObjectFactory xmlFactory, MongoObjectFactory mongoFactory) { @@ -93,7 +74,7 @@ public abstract class PPRUtils { /** * Fotos hochladen - TODO - * @param mongoFactory + * @param mongoFactory Factory für die MongoDB-Objekte */ public static void readPhotos(MongoObjectFactory mongoFactory) { @@ -101,8 +82,8 @@ public abstract class PPRUtils { /** * Reden und Kommentare einlesen - TODO - * @param xmlFactory - * @param mongoFactory + * @param xmlFactory Factory für die XML-Objekte + * @param mongoFactory Factory für die MongoDB-Objekte */ public static void readSpeechesAndComments(FileObjectFactory xmlFactory, MongoObjectFactory mongoFactory) { @@ -110,8 +91,8 @@ public abstract class PPRUtils { /** * Liest die MdBs aus der Bundestag-Seite und persistiere sie in die MongoDB - * @param mdbUrl - * @param xmlFactory + * @param mdbUrl URL der MDBs + * @param xmlFactory Factory für die XML-Objekte */ public static void readAndPersistMdbs(String mdbUrl, FileObjectFactory xmlFactory, MongoObjectFactory mongoFactory) { org.w3c.dom.Document mdbRoot = getMdbFromRemoteXmlZipfile(mdbUrl); @@ -125,7 +106,7 @@ public abstract class PPRUtils { Speaker speaker = xmlFactory.createSpeaker(mdbNode); // System.out.println("Speaker " + speaker.getId() + " (" + speaker.getFirstName() + " " + speaker.getName() + ", " + speaker.getParty() + ")"); - if (mdbActiveInWp(speaker, 20)) { + if (mdbActiveInWp(speaker, legislaturPeriode)) { org.bson.Document speakerDoc = mongoFactory.createSpeaker(speaker); MongoDBHandler.insertDocument(speakerCollection, speakerDoc); } @@ -140,19 +121,19 @@ public abstract class PPRUtils { private static boolean mdbActiveInWp(Speaker speaker, Integer legislaturPeriode) { List wps = speaker.getMemberships().stream() .map(Membership::getWp) - .collect(Collectors.toList()); + .toList(); return wps.contains(legislaturPeriode); } /** * Liest die MDB aus einer Zip-Datei der bundestag,de-Seite - * @param zipUrl - * @return + * @param zipUrl URL der ZIP-Datei + * @return the MDB Document */ private static org.w3c.dom.Document getMdbFromRemoteXmlZipfile(String zipUrl) { - URL url = null; - InputStream urlInputStream = null; + URL url; + InputStream urlInputStream; try { url = new URL(zipUrl); urlInputStream = url.openStream(); @@ -196,9 +177,9 @@ public abstract class PPRUtils { /** * Helper method to save a stream for a later use. * We use it to save the DTD and the XML files of the MDBs, which are within a zipfile on the bundestag.de site - * @param input - * @param output - * @throws IOException + * @param input the input stream + * @param output the output stream + * @throws IOException if an error occurs */ private static void copyStream(InputStream input, OutputStream output) throws IOException { @@ -212,7 +193,7 @@ public abstract class PPRUtils { /** * Eine Liste von Parlamentariern nach Namen (erst nach Nachnamen, dann nach Vornamen) sortieren (aufsteigend) - * @param mdbList + * @param mdbList Liste der Parlamentarier */ public static void sortParlamentarierByName(List mdbList) { @@ -231,8 +212,8 @@ public abstract class PPRUtils { /** * Alle Parteien (aus einer Liste der MdBs) herausfinden. * null-Einträge durch einen Platzhalter ersetzen, damit später keine null pointer exceptions auftretten - * @param mdbList - * @return + * @param mdbList Liste der MdBs + * @return Set der Parteien */ public static Set getFraktionenFromMdbList(List mdbList) { @@ -259,8 +240,8 @@ public abstract class PPRUtils { /** * Eine Zuordnung zwischen Partei und deren Mitgliedern erzeugen. Ein Mitglied ist hier die MDB-Struktur - * @param mdbList - * @return + * @param mdbList Liste der MdBs + * @return Map mit Partei als Key und Liste der MdBs als Value */ public static Map> createMdbParteiZuordnung(List mdbList) { @@ -285,9 +266,9 @@ public abstract class PPRUtils { /** * Eine Zuordnung zwischen Partei und deren Mitgliedern erzeugen. Ein Mitglied wird hier durch seine ID erfaßt - * @param parteien - * @param mdbList - * @return + * @param parteien Set der Parteien + * @param mdbList Liste der MdBs + * @return Map mit Partei als Key und Liste der MdB-IDs als Value */ public static Map> getMdbParteiZuordnung(Set parteien, List mdbList) { @@ -314,8 +295,8 @@ public abstract class PPRUtils { /** * Eine Zuordnung zwischen MdB (repräsentiert durch seine ID) und seien biographischen Daten erzeugen. - * @param mdbList - * @return + * @param mdbList Liste der MdBs + * @return Map mit MdB-ID als Key und biographischen Daten als Value */ public static Map getMdbParteiZuordnung(List mdbList) { @@ -330,7 +311,7 @@ public abstract class PPRUtils { /** * Eine Liste von MdBs nach Namen (erst nach Nachnamen, dann nach Vornamen) sortieren (aufsteigend) - * @param mdbList + * @param mdbList Liste der MdBs */ public static void sortMdbByName(List mdbList) { @@ -375,7 +356,7 @@ public abstract class PPRUtils { xmlProtocols.add(xmlDoc); } catch (Exception e) { System.err.println("Fehler beim Verarbeiten der XML-Datei: " + xmlUrl); - e.printStackTrace(); + System.err.println("Fehler: " + e.getLocalizedMessage()); } } @@ -393,7 +374,7 @@ public abstract class PPRUtils { } } catch (IOException e) { System.err.println("Fehler beim Laden der Seite: " + queryUrl); - e.printStackTrace(); + System.err.println("Fehler: " + e.getLocalizedMessage()); break; } } diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/util/XmlUtils.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/util/XmlUtils.java index 8ff4ae6..243cc43 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/util/XmlUtils.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/util/XmlUtils.java @@ -16,7 +16,6 @@ import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.zip.ZipEntry; -import java.util.zip.ZipInputStream; public abstract class XmlUtils { diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/XmlOperations.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/XmlOperations.java index ff2fc96..5696833 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/XmlOperations.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/XmlOperations.java @@ -4,5 +4,5 @@ import org.w3c.dom.Node; public interface XmlOperations { FileObjectFactory factory = FileObjectFactory.getFactory(); - public Object fromXmlNode(Node node); + Object fromXmlNode(Node node); } diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/mdb/BiografischeAngaben_File_Impl.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/mdb/BiografischeAngaben_File_Impl.java index c49f1bd..be24918 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/mdb/BiografischeAngaben_File_Impl.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/mdb/BiografischeAngaben_File_Impl.java @@ -1,6 +1,5 @@ package org.texttechnologylab.project.gruppe_05_1.xml.mdb; -import org.texttechnologylab.project.gruppe_05_1.domain.Gender; import org.texttechnologylab.project.gruppe_05_1.domain.mdb.BiografischeAngaben; import org.texttechnologylab.project.gruppe_05_1.util.GeneralUtils; import org.texttechnologylab.project.gruppe_05_1.xml.XmlOperations; diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/mdb/MdbDocument_File_Impl.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/mdb/MdbDocument_File_Impl.java index 0dc96cb..b60f4b8 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/mdb/MdbDocument_File_Impl.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/mdb/MdbDocument_File_Impl.java @@ -15,6 +15,7 @@ public class MdbDocument_File_Impl extends MdbDocument implements XmlOperations MdbDocument doc = new MdbDocument_File_Impl(); Node versionNode = XmlUtils.getFirstChildByName(node, "VERSION"); + assert versionNode != null; doc.setVersion(versionNode.getFirstChild().getNodeValue()); List mdbs = new ArrayList<>(); diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/mdb/Mdb_File_Impl.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/mdb/Mdb_File_Impl.java index c651c07..a22adac 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/mdb/Mdb_File_Impl.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/mdb/Mdb_File_Impl.java @@ -15,9 +15,11 @@ public class Mdb_File_Impl extends Mdb implements XmlOperations { public Mdb fromXmlNode(Node node) { Mdb mdb = new Mdb_File_Impl(); Node idNode = XmlUtils.getFirstChildByName(node, "ID"); + assert idNode != null; mdb.setId(idNode.getFirstChild().getNodeValue()); Node namenNode = XmlUtils.getFirstChildByName(node, "NAMEN"); + assert namenNode != null; List nameNodeList = XmlUtils.getChildrenByName(namenNode, "NAME"); List mdbNameList = new ArrayList<>(); for (Node nameNode : nameNodeList) { @@ -31,6 +33,7 @@ public class Mdb_File_Impl extends Mdb implements XmlOperations { Node wpenNode = XmlUtils.getFirstChildByName(node, "WAHLPERIODEN"); + assert wpenNode != null; List wpNodeList = XmlUtils.getChildrenByName(wpenNode, "WAHLPERIODE"); List wpList = new ArrayList<>(); for (Node wpNode : wpNodeList) { diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speaker/Speaker_File_Impl.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speaker/Speaker_File_Impl.java index 1e64039..468888c 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speaker/Speaker_File_Impl.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speaker/Speaker_File_Impl.java @@ -3,7 +3,6 @@ package org.texttechnologylab.project.gruppe_05_1.xml.speaker; import org.texttechnologylab.project.gruppe_05_1.domain.mdb.*; import org.texttechnologylab.project.gruppe_05_1.domain.speaker.Membership; import org.texttechnologylab.project.gruppe_05_1.domain.speaker.Speaker; -import org.texttechnologylab.project.gruppe_05_1.util.GeneralUtils; import org.texttechnologylab.project.gruppe_05_1.util.PPRUtils; import org.texttechnologylab.project.gruppe_05_1.util.XmlUtils; import org.texttechnologylab.project.gruppe_05_1.xml.XmlOperations; @@ -20,10 +19,12 @@ public class Speaker_File_Impl extends Speaker implements XmlOperations { // ID Node idNode = XmlUtils.getFirstChildByName(node, "ID"); + assert idNode != null; speaker.setId(idNode.getFirstChild().getNodeValue()); // Name: alle Namen lesen, nur den letzten berücksichtigen Node namenNode = XmlUtils.getFirstChildByName(node, "NAMEN"); + assert namenNode != null; List nameNodeList = XmlUtils.getChildrenByName(namenNode, "NAME"); List mdbNameList = new ArrayList<>(); for (Node nameNode : nameNodeList) { @@ -58,6 +59,7 @@ public class Speaker_File_Impl extends Speaker implements XmlOperations { // Memberships Node wpenNode = XmlUtils.getFirstChildByName(node, "WAHLPERIODEN"); + assert wpenNode != null; List wpNodeList = XmlUtils.getChildrenByName(wpenNode, "WAHLPERIODE"); List wpList = new ArrayList<>(); for (Node wpNode : wpNodeList) { diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Impls/Speaker_File_Impl.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Impls/Speaker_File_Impl.java index 20d7abf..a71b8c1 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Impls/Speaker_File_Impl.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Impls/Speaker_File_Impl.java @@ -1,6 +1,5 @@ package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls; -import lombok.Getter; import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Content; import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Speaker; import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType; diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/SpeechParser.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/SpeechParser.java index 124d359..21456fc 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/SpeechParser.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/SpeechParser.java @@ -1,8 +1,5 @@ package org.texttechnologylab.project.gruppe_05_1.xml.speeches; -import lombok.Getter; -import lombok.Setter; -import org.texttechnologylab.project.gruppe_05_1.database.MongoPprUtils; import org.texttechnologylab.project.gruppe_05_1.util.PPRUtils; import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.*; import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.AgendaItem; @@ -29,7 +26,6 @@ public class SpeechParser { private List speeches; private List agendaItems; - private Boolean parseLegislativePeriods; public List getSpeeches() { return speeches; @@ -40,7 +36,6 @@ public class SpeechParser { } public void setParseLegislativePeriods(Boolean parseLegislativePeriods) { - this.parseLegislativePeriods = parseLegislativePeriods; } public List parseAllSessions() { @@ -58,7 +53,7 @@ public class SpeechParser { tempFile.delete(); // Lösche die temporäre Datei nach der Verarbeitung } catch (Exception e) { System.err.println("Error parsing XML document."); - e.printStackTrace(); + System.err.println(e.getMessage()); } } return sessions; diff --git a/src/main/resources/.DS_Store b/src/main/resources/.DS_Store deleted file mode 100644 index 558c0a3..0000000 Binary files a/src/main/resources/.DS_Store and /dev/null differ