Merge branch 'main' into 'NLPSHIT'

# Conflicts:
#   src/main/java/org/texttechnologylab/project/gruppe_05_1/Main.java
#   src/main/java/org/texttechnologylab/project/gruppe_05_1/nlp/NlpUtils.java
#   src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Impls/Speech_File_Impl.java
This commit is contained in:
Henry Scharf 2025-03-10 11:33:20 +00:00
commit 81a38f0efb
41 changed files with 322 additions and 420 deletions

BIN
src/.DS_Store vendored

Binary file not shown.

BIN
src/main/.DS_Store vendored

Binary file not shown.

Binary file not shown.

View file

@ -1,26 +1,16 @@
package org.texttechnologylab.project.gruppe_05_1;
import com.mongodb.client.MongoDatabase;
import org.texttechnologylab.project.gruppe_05_1.database.*;
import org.texttechnologylab.project.gruppe_05_1.domain.mdb.Mdb;
import org.texttechnologylab.project.gruppe_05_1.domain.mdb.MdbDocument;
import org.texttechnologylab.project.gruppe_05_1.nlp.NlpUtils;
import org.texttechnologylab.project.gruppe_05_1.nlp.XmiExtractor;
import org.texttechnologylab.project.gruppe_05_1.rest.RESTHandler;
import org.texttechnologylab.project.gruppe_05_1.util.Logger;
import org.texttechnologylab.project.gruppe_05_1.util.PPRUtils;
import org.texttechnologylab.project.gruppe_05_1.util.PropertiesUtils;
import org.texttechnologylab.project.gruppe_05_1.util.XmlUtils;
import org.texttechnologylab.project.gruppe_05_1.xml.FileObjectFactory;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.SpeechParser;
import org.w3c.dom.Element;
import org.xml.sax.SAXException;
import javax.xml.parsers.ParserConfigurationException;
import java.io.IOException;
import java.util.List;
import java.util.Properties;
import static java.lang.Boolean.FALSE;
import static java.lang.Boolean.TRUE;
@ -57,10 +47,10 @@ public class Main {
public static void main(String[] args) throws Exception {
//TEST
MongoDBHandler mongoDBHandler = new MongoDBHandler();
SpeechIndexFactoryImpl speechIndexFactory = new SpeechIndexFactoryImpl();
if (MongoPprUtils.getSpeechCollection().countDocuments() != 0) {
if (mongoDBHandler.getDatabase().getCollection(MongoPprUtils.SPEECH_COLLECTION_NAME).countDocuments() != 0) {
System.out.println("Speeches werden nicht gelesen, da sie bereits in der Datenbank stehen");
}
else {
@ -79,8 +69,7 @@ public class Main {
System.out.println("SESSIONCOUNT: " + speechIndex.getSessions().size());
System.out.println("AGENDAITEMCOUNT: " + speechIndex.getAgendaItems().size());
MongoDBHandler mongoDBHandler = new MongoDBHandler();
mongoDBHandler.deleteAllDocuments(); // Clear the DB
mongoDBHandler.deleteSpeechRelatedDocuments(); // Clear speeches, sessions, agendas (history)
Logger.pink("Adding Sessions to DB...");
mongoDBHandler.insertSessions(speechIndex.getSessions());
@ -90,13 +79,8 @@ public class Main {
Logger.pink("Adding Speeches to DB...");
mongoDBHandler.insertSpeeches(speechIndex.getSpeeches());
mongoDBHandler.close(); // Close the connection to the DB
}
// Stellt fest, dass alle nötigen Datenbank-Collections existieren
PPRUtils.ensureCollectionExist();
// Alle Informationen (Parlamentarier, Reden, Kommentare etc.) lesen und in die Mongo-DB einfügen, falls diese noch nicht vorhanden sind.
PPRUtils.parlamentExplorerInit(xmlFactory, mongoFactory);

View file

@ -9,16 +9,12 @@ import com.mongodb.client.MongoClients;
import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoDatabase;
import com.mongodb.client.model.*;
import exceptions.AgendaItemNotFoundException;
import exceptions.MemberNotFoundException;
import exceptions.ServerErrorException;
import exceptions.SessionNotFoundException;
import org.bson.Document;
import org.bson.conversions.Bson;
import org.bson.types.ObjectId;
import org.texttechnologylab.DockerUnifiedUIMAInterface.connection.mongodb.MongoDBConfig;
import org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches.AgendaItem_MongoDB_Impl;
import org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches.MemberOfParliament_MongoDB_Impl;
import org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches.Session_MongoDB_Impl;
import org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches.Speech_MongoDB_Impl;
import org.texttechnologylab.project.gruppe_05_1.util.Logger;
@ -26,7 +22,6 @@ import org.texttechnologylab.project.gruppe_05_1.util.PropertiesUtils;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.*;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.*;
import java.io.IOException;
import java.util.*;
import java.util.concurrent.TimeUnit;
@ -50,8 +45,8 @@ public class MongoDBHandler {
private static String collection;
private static String databaseName;
private MongoCollection<Document> speakerCollection;
private MongoCollection<Document> speechesCollection;
private MongoCollection<Document> sessionsCollection;
private MongoCollection<Document> agendaItemsCollection;
private MongoCollection<Document> historyCollection;
@ -72,27 +67,41 @@ public class MongoDBHandler {
collection = mongoProperties.getProperty("remote_collection");
databaseName = mongoProperties.getProperty("remote_database");
MongoCredential credential = MongoCredential
.createCredential(
user,
databaseName,
password.toCharArray());
// URI für lokale Datenbank oder für eine Datenbank auf dem Server
String uri;
if ( (localServer != null) && (! localServer.isBlank())) {
uri = localServer;
mongoClient = MongoClients.create(uri);
MongoClientSettings settings = MongoClientSettings.builder()
.credential(credential)
.timeout(180, TimeUnit.HOURS) // needs increased timeout for the bulk speech inserts
.applyToClusterSettings(builder ->
builder.hosts(List.of(new ServerAddress(remoteServer, Integer.parseInt(port)))))
.build();
// Connect
database = mongoClient.getDatabase(databaseName);
} else {
MongoCredential credential = MongoCredential
.createCredential(
user,
databaseName,
password.toCharArray());
mongoClient = MongoClients.create(settings);
database = mongoClient.getDatabase(databaseName);
speechesCollection = database.getCollection("speech");
sessionsCollection = database.getCollection("sessions");
agendaItemsCollection = database.getCollection("agendaItems");
historyCollection = database.getCollection("history");
MongoClientSettings settings = MongoClientSettings.builder()
.credential(credential)
.timeout(180, TimeUnit.HOURS) // needs increased timeout for the bulk speech inserts
.applyToClusterSettings(builder ->
builder.hosts(List.of(new ServerAddress(remoteServer, Integer.parseInt(port)))))
.build();
mongoClient = MongoClients.create(settings);
database = mongoClient.getDatabase(databaseName);
}
speakerCollection = database.getCollection(MongoPprUtils.SPEAKER_COLLECTION_NAME);
speechesCollection = database.getCollection(MongoPprUtils.SPEECH_COLLECTION_NAME);
sessionsCollection = database.getCollection(MongoPprUtils.SESSION_COLLECTION_NAME);
agendaItemsCollection = database.getCollection(MongoPprUtils.AGENDA_ITEMS_COLLECTION_NAME);
historyCollection = database.getCollection(MongoPprUtils.HISTORY_COLLECTION_NAME);
createIndicesForSpeakerCollection();
createIndicesForSpeechCollection();
Logger.info("Connected to MongoDB database: " + databaseName);
}
public MongoDatabase getDatabase() {
@ -106,18 +115,22 @@ public class MongoDBHandler {
*/
static public MongoDatabase getMongoDatabase() {
if (mongoDatabase == null) {
Properties mongoProperties = PropertiesUtils.readPropertiesFromResource(propertiesFileName);
// Zugangsdaten
localServer = mongoProperties.getProperty("localserver");
remoteServer = mongoProperties.getProperty("remote_host");
user = mongoProperties.getProperty("remote_user");
password = mongoProperties.getProperty("remote_password");
port = mongoProperties.getProperty("remote_port");
collection = mongoProperties.getProperty("remote_collection");
databaseName = mongoProperties.getProperty("remote_database");
if (mongoDatabase != null) {
return mongoDatabase;
}
Properties mongoProperties = PropertiesUtils.readPropertiesFromResource(propertiesFileName);
// Zugangsdaten
localServer = mongoProperties.getProperty("localserver");
remoteServer = mongoProperties.getProperty("remote_host");
user = mongoProperties.getProperty("remote_user");
password = mongoProperties.getProperty("remote_password");
port = mongoProperties.getProperty("remote_port");
collection = mongoProperties.getProperty("remote_collection");
databaseName = mongoProperties.getProperty("remote_database");
// MongoDBClient erzeugen
// String uri = mongoServer + "://" + mongoUser + ":" + mongoPassword + "@" + mongoNeetwork; // cluster, network, user...
@ -159,9 +172,9 @@ public class MongoDBHandler {
*
* @return List<String> with the names of all collections
*/
static public Set<String> getCollectionNames() {
// return getMongoDatabase().listCollectionNames().into(new ArrayList<>());
return getMongoDatabase().listCollectionNames().into(new HashSet<>());
public Set<String> getCollectionNames() {
// return getDatabase().listCollectionNames().into(new ArrayList<>());
return getDatabase().listCollectionNames().into(new HashSet<>());
}
/**
@ -169,8 +182,8 @@ public class MongoDBHandler {
* @param name Name of collection to check for existance
* @return does the collection exist
*/
static public boolean collectionExists(String name) {
return getMongoDatabase().listCollectionNames().into(new ArrayList<>()).contains(name);
public boolean collectionExists(String name) {
return getDatabase().listCollectionNames().into(new ArrayList<>()).contains(name);
}
@ -194,8 +207,8 @@ public class MongoDBHandler {
}
}
static public void createCollectionIfNotExist(String collectionName) {
createCollectionIfNotExist(getMongoDatabase(), collectionName);
public void createCollectionIfNotExist(String collectionName) {
createCollectionIfNotExist(getDatabase(), collectionName);
}
@ -212,8 +225,8 @@ public class MongoDBHandler {
}
}
static public void createCollection(String collectionName) {
createCollection(getMongoDatabase(), collectionName);
public void createCollection(String collectionName) {
createCollection(getDatabase(), collectionName);
}
@ -262,8 +275,23 @@ public class MongoDBHandler {
}
}
static public void createOrTrancateCollection(String collectionName) {
createOrTrancateCollection(getMongoDatabase(), collectionName);
public void createIndicesForSpeakerCollection() {
if (speakerCollection.listIndexes().into(new ArrayList<>()).size() == 1) {
MongoDBHandler.createIndexForCollection(speakerCollection,"name", true);
MongoDBHandler.createIndexForCollection(speakerCollection,"firstName", true);
MongoDBHandler.createIndexForCollection(speakerCollection,"party", true);
}
}
public void createIndicesForSpeechCollection() {
if (speechesCollection.listIndexes().into(new ArrayList<>()).size() == 1) {
MongoDBHandler.createIndexForCollection(speechesCollection, "speakerId", true);
MongoDBHandler.createIndexForCollection(speechesCollection, "speechKey", true);
}
}
public void createOrTrancateCollection(String collectionName) {
createOrTrancateCollection(database, collectionName);
}
/**
@ -649,7 +677,7 @@ public class MongoDBHandler {
);
}
public void deleteAllDocuments() {
public void deleteSpeechRelatedDocuments() {
speechesCollection.deleteMany(new Document());
sessionsCollection.deleteMany(new Document());
agendaItemsCollection.deleteMany(new Document());

View file

@ -1,7 +1,6 @@
package org.texttechnologylab.project.gruppe_05_1.database;
import org.bson.Document;
import org.texttechnologylab.project.gruppe_05_1.xml.FileObjectFactory;
import java.util.List;

View file

@ -2,25 +2,17 @@ package org.texttechnologylab.project.gruppe_05_1.database;
import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoCursor;
import com.mongodb.client.model.Indexes;
import org.bson.Document;
import org.texttechnologylab.project.gruppe_05_1.domain.html.Parlamentarier;
import org.texttechnologylab.project.gruppe_05_1.domain.html.ParlamentarierDetails;
import org.texttechnologylab.project.gruppe_05_1.domain.speaker.Membership;
import org.texttechnologylab.project.gruppe_05_1.domain.speech.Speech;
import org.texttechnologylab.project.gruppe_05_1.util.PPRUtils;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.ZoneId;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.*;
/**
* Diese Klasse beinhaltet Mongo-Utilities, welche spezifisch für die PPR-Datenstrukturen sind.
*
* Mongo-Utilities genereller Natur stehen in der Klasse MongoDBHandler.
*/
public class MongoPprUtils {
@ -31,71 +23,48 @@ public class MongoPprUtils {
*/
public static final String SPEAKER_COLLECTION_NAME = "speaker";
public static final String SPEECH_COLLECTION_NAME = "speech";
public static final String SESSION_COLLECTION_NAME = "sessions";
public static final String AGENDA_ITEMS_COLLECTION_NAME = "agendaItems";
public static final String HISTORY_COLLECTION_NAME = "history";
public static final String PICTURES_COLLECTION_NAME = "pictures";
public static final String COMMENT_COLLECTION_NAME = "comment";
private static MongoCollection<Document> speakerCollecion = null;
private static MongoCollection<Document> speechCollecion = null;
private static MongoCollection<Document> picturesCollecion = null;
private static MongoCollection<Document> commentCollecion = null;
private static MongoCollection<Document> speakerCollection = null;
private static MongoCollection<Document> speechCollection = null;
private static MongoCollection<Document> picturesCollection = null;
private static MongoCollection<Document> commentCollection = null;
public static MongoCollection<Document> getSpeakerCollection() {
if (speakerCollecion == null) speakerCollecion = MongoDBHandler.getMongoDatabase().getCollection(SPEAKER_COLLECTION_NAME);
return speakerCollecion;
if (speakerCollection == null) speakerCollection = MongoDBHandler.getMongoDatabase().getCollection(SPEAKER_COLLECTION_NAME);
return speakerCollection;
}
public static MongoCollection<Document> getSpeechCollection() {
if (speechCollecion == null) speechCollecion = MongoDBHandler.getMongoDatabase().getCollection(SPEECH_COLLECTION_NAME);
return speechCollecion;
if (speechCollection == null) speechCollection = MongoDBHandler.getMongoDatabase().getCollection(SPEECH_COLLECTION_NAME);
return speechCollection;
}
public static MongoCollection<Document> getPicturesCollection() {
if (picturesCollecion == null) picturesCollecion = MongoDBHandler.getMongoDatabase().getCollection(PICTURES_COLLECTION_NAME);
return picturesCollecion;
}
public static MongoCollection<Document> getCommentCollection() {
if (commentCollecion == null) commentCollecion = MongoDBHandler.getMongoDatabase().getCollection(COMMENT_COLLECTION_NAME);
return commentCollecion;
}
/**
* Create the Speaker Collection and useful indices for it
*/
public static void createSpeakerCollection() {
MongoDBHandler.createCollection(MongoPprUtils.SPEAKER_COLLECTION_NAME);
MongoDBHandler.createIndexForCollection(getSpeakerCollection(), Arrays.asList("name", "firstName", "party"), true);
public static void createIndexForSpeakerCollection() {
// MongoDBHandler.createIndexForCollection(getSpeakerCollection(), Arrays.asList("name", "firstName", "party"), true);
MongoDBHandler.createIndexForCollection(getSpeakerCollection(),"name", true);
MongoDBHandler.createIndexForCollection(getSpeakerCollection(),"firstName", true);
MongoDBHandler.createIndexForCollection(getSpeakerCollection(),"party", true);
}
/**
* Create the Speech Collection and useful indices for it
*/
public static void createSpeechCollection() {
MongoDBHandler.createCollection(MongoPprUtils.SPEECH_COLLECTION_NAME);
MongoDBHandler.createIndexForCollection(getSpeechCollection(), "speaker", true);
public static void createIndexForSpeechCollection() {
MongoDBHandler.createIndexForCollection(getSpeechCollection(), "speakerId", true);
MongoDBHandler.createIndexForCollection(getSpeechCollection(), "speechKey", true);
}
/**
* Create the Comment Collection and useful indices for it
*/
public static void createCommentCollection() {
MongoDBHandler.createCollection(MongoPprUtils.COMMENT_COLLECTION_NAME);
MongoDBHandler.createIndexForCollection(getCommentCollection(), Arrays.asList("speaker", "speech"), true);
}
/**
* Create the Picture Collection and useful indices for it
*/
public static void createPictureCollection() {
MongoDBHandler.createCollection(MongoPprUtils.PICTURES_COLLECTION_NAME);
// TODO: für welche Felder sollen Indizes gebaut werden?
// MongoDBHandler.createIndexForCollection(getPicturesCollection(), Arrays.asList("field_1", "field_2"), true);
}
/**
* Truncate the Speaker Collection.
* Note that it is quicker (and saves space) to drop and re-create rather than removing all documents using "remove({})"
@ -103,7 +72,7 @@ public class MongoPprUtils {
public static void truncateSpeakerCollection() {
getSpeakerCollection().drop();
createSpeechCollection();
createIndexForSpeechCollection();
}
/*
@ -143,7 +112,7 @@ public class MongoPprUtils {
plist.add(p);
}
} catch (Throwable t) {
System.err.println(t);
System.err.print(t);
} finally {
cursor.close();
}
@ -164,11 +133,7 @@ public class MongoPprUtils {
p.setNachname((String) doc.get("name"));
p.setVorname((String) doc.get("firstName"));
String partei = (String) doc.get("party");
if (partei == null) {
p.setPartei("(parteilos)");
} else {
p.setPartei(partei);
}
p.setPartei(Objects.requireNonNullElse(partei, "(parteilos)"));
return p;
}
@ -186,8 +151,7 @@ public class MongoPprUtils {
*/
public static ParlamentarierDetails getParlamentarierDetailsByID(String id) {
Document doc = MongoDBHandler.findFirstDocumentInCollection(getSpeakerCollection(), "_id", id);
ParlamentarierDetails p = readParlamentarierDetailsFromSpeaker(doc);
return p;
return readParlamentarierDetailsFromSpeaker(doc);
}
@ -198,8 +162,7 @@ public class MongoPprUtils {
*/
public static ParlamentarierDetails getParlamentarierDetailsByID(Integer id) {
Document doc = MongoDBHandler.findFirstDocumentInCollection(getSpeakerCollection(), "_id", id.toString());
ParlamentarierDetails p = readParlamentarierDetailsFromSpeaker(doc);
return p;
return readParlamentarierDetailsFromSpeaker(doc);
}
/**
@ -215,16 +178,12 @@ public class MongoPprUtils {
p.setNachname((String) doc.get("name"));
p.setVorname((String) doc.get("firstName"));
String partei = (String) doc.get("party");
if (partei == null) {
p.setPartei("(parteilos)");
} else {
p.setPartei(partei);
}
p.setPartei(Objects.requireNonNullElse(partei, "(parteilos)"));
p.setTitle((String) doc.get("title"));
p.setGeburtsort((String) doc.get("geburtsort"));
p.setGeschlecht((String) doc.get("geschlecht"));
p.setBeruf((String) doc.get("beruf"));
p.setAkademischertitel((String) doc.get("akademischertitel"));
p.setAkademischerTitel((String) doc.get("akademischertitel"));
p.setFamilienstand((String) doc.get("familienstand"));
p.setReligion((String) doc.get("religion"));
p.setVita((String) doc.get("vita"));

View file

@ -1,6 +1,5 @@
package org.texttechnologylab.project.gruppe_05_1.database;
import lombok.Getter;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.*;
import java.util.List;

View file

@ -3,10 +3,7 @@ package org.texttechnologylab.project.gruppe_05_1.database;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.*;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.SpeechParser;
import org.xml.sax.SAXException;
import javax.xml.parsers.ParserConfigurationException;
import java.io.IOException;
import java.util.List;
import java.util.stream.Collectors;
@ -16,18 +13,15 @@ public class SpeechIndexFactoryImpl implements SpeechIndexFactory {
private List<AgendaItem> agendaItems;
private List<Speech> speeches;
private List<MemberOfParliament> members;
private List<Fraction> fractions;
private Boolean parseLegislativePeriods = true;
@Override
public SpeechIndexFactoryImpl parseLegislativePeriods(Boolean parseLegislativePeriods) {
this.parseLegislativePeriods = parseLegislativePeriods;
try {this.speechParser.setParseLegislativePeriods(parseLegislativePeriods);} catch (NullPointerException ignored) {}
return this;
}
public SpeechIndexFactoryImpl builder() throws ParserConfigurationException {
public SpeechIndexFactoryImpl builder() {
this.speechParser = new SpeechParser();
return this;
}
@ -41,7 +35,7 @@ public class SpeechIndexFactoryImpl implements SpeechIndexFactory {
@Override
public SpeechIndex build() throws IOException, SAXException {
public SpeechIndex build() {
return new SpeechIndex(sessions, speeches, agendaItems);
}

View file

@ -26,8 +26,8 @@ public class BiografischeAngaben_Mongo_Impl extends BiografischeAngaben implemen
fields.put("parteiKuerzel", entity.getParteiKuerzel());
fields.put("vitaKurz", entity.getVitaKurz());
fields.put("veroeffentlichungspflichtiges", entity.getVeroeffentlichungspflichtiges());
Document doc = MongoDBHandler.createDocument(false, fields);
return doc;
return MongoDBHandler.createDocument(false, fields);
}
@Override

View file

@ -22,8 +22,7 @@ public class Institution_Mongo_Impl extends Institution implements MongoOperatio
fields.put("fktinsVon", entity.getFktinsVon());
fields.put("fktinsBis", entity.getFktinsBis());
Document doc = MongoDBHandler.createDocument(false, fields);
return doc;
return MongoDBHandler.createDocument(false, fields);
}
@Override

View file

@ -23,6 +23,7 @@ public class MdbName_Mongo_Impl extends MdbName implements MongoOperations<MdbNa
fields.put("akadTitel", entity.getAkadTitel());
fields.put("historieVon", entity.getHistorieVon());
fields.put("historieBis", entity.getHistorieBis());
return MongoDBHandler.createDocument(false, fields);
}

View file

@ -34,9 +34,8 @@ public class Mdb_Mongo_Impl extends Mdb implements MongoOperations<Mdb> {
"bio", bioDoc,
"wahlperioden", wpDocs
);
Document doc = MongoDBHandler.createDocument(false, fields);
return doc;
return MongoDBHandler.createDocument(false, fields);
}
@Override

View file

@ -25,9 +25,7 @@ public class Membership_Mongo_Impl extends Membership implements MongoOperations
fields.put("member", entity.getMember()); // TODO: wahrscheinlich nicht nötig
fields.put("wp", entity.getWp());
Document doc = MongoDBHandler.createDocument(false, fields);
return doc;
return MongoDBHandler.createDocument(false, fields);
}
@Override

View file

@ -35,9 +35,7 @@ public class Speaker_Mongo_Impl extends Speaker implements MongoOperations<Speak
fields.put("party", entity.getParty());
fields.put("memberships", membershipDocs);
Document doc = MongoDBHandler.createDocument(false, fields);
return doc;
return MongoDBHandler.createDocument(false, fields);
}
@Override

View file

@ -32,8 +32,7 @@ public class Wahlperiode_Mongo_Impl extends Wahlperiode implements MongoOperatio
}
fields.put("institutionen", instDocs);
Document doc = MongoDBHandler.createDocument(false, fields);
return doc;
return MongoDBHandler.createDocument(false, fields);
}
@Override

View file

@ -13,7 +13,7 @@ public enum Gender { // TODO: Delete
return this.text;
}
private Gender(String text) {this.text = text;}
Gender(String text) {this.text = text;}
public static Gender byText(String text) {
if (null == text) return NA;

View file

@ -20,7 +20,7 @@ public class ParlamentarierDetails {
LocalDate sterbedatum;
String geschlecht;
String beruf;
String akademischertitel;
String akademischerTitel;
String familienstand;
String religion;
String vita;
@ -116,12 +116,12 @@ public class ParlamentarierDetails {
this.beruf = beruf;
}
public String getAkademischertitel() {
return akademischertitel;
public String getAkademischerTitel() {
return akademischerTitel;
}
public void setAkademischertitel(String akademischertitel) {
this.akademischertitel = akademischertitel;
public void setAkademischerTitel(String akademischerTitel) {
this.akademischerTitel = akademischerTitel;
}
public String getFamilienstand() {
@ -183,8 +183,7 @@ public class ParlamentarierDetails {
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof ParlamentarierDetails)) return false;
ParlamentarierDetails that = (ParlamentarierDetails) o;
if (!(o instanceof ParlamentarierDetails that)) return false;
return Objects.equals(id, that.id) ;
}
@ -206,7 +205,7 @@ public class ParlamentarierDetails {
.add("sterbedatum=" + sterbedatum)
.add("geschlecht='" + geschlecht + "'")
.add("beruf='" + beruf + "'")
.add("akademischertitel='" + akademischertitel + "'")
.add("akademischertitel='" + akademischerTitel + "'")
.add("familienstand='" + familienstand + "'")
.add("religion='" + religion + "'")
.add("vita='" + vita + "'")

View file

@ -4,8 +4,6 @@ import java.time.LocalDate;
import java.util.Objects;
import java.util.StringJoiner;
import org.texttechnologylab.project.gruppe_05_1.domain.Gender;
public abstract class BiografischeAngaben {
private LocalDate geburtsdatum;
@ -111,9 +109,8 @@ public abstract class BiografischeAngaben {
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof BiografischeAngaben)) return false;
BiografischeAngaben that = (BiografischeAngaben) o;
return Objects.equals(geburtsdatum, that.geburtsdatum) && Objects.equals(geburtsort, that.geburtsort) && Objects.equals(geburtsland, that.geburtsland) && Objects.equals(sterbedatum, that.sterbedatum) && gender == that.gender && familienstand == that.familienstand && religion == that.religion && Objects.equals(beruf, that.beruf) && Objects.equals(parteiKuerzel, that.parteiKuerzel) && Objects.equals(vitaKurz, that.vitaKurz) && Objects.equals(veroeffentlichungspflichtiges, that.veroeffentlichungspflichtiges);
if (!(o instanceof BiografischeAngaben that)) return false;
return Objects.equals(geburtsdatum, that.geburtsdatum) && Objects.equals(geburtsort, that.geburtsort) && Objects.equals(geburtsland, that.geburtsland) && Objects.equals(sterbedatum, that.sterbedatum) && Objects.equals(gender, that.gender) && Objects.equals(familienstand, that.familienstand) && Objects.equals(religion, that.religion) && Objects.equals(beruf, that.beruf) && Objects.equals(parteiKuerzel, that.parteiKuerzel) && Objects.equals(vitaKurz, that.vitaKurz) && Objects.equals(veroeffentlichungspflichtiges, that.veroeffentlichungspflichtiges);
}
@Override

View file

@ -10,7 +10,7 @@ public enum Mandatsart {
private final String text;
private Mandatsart(String text) {this.text = text;}
Mandatsart(String text) {this.text = text;}
public static Mandatsart byText(String text) {
if (null == text) return NA;

View file

@ -46,8 +46,7 @@ public abstract class Mdb {
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof Mdb)) return false;
Mdb mdb = (Mdb) o;
if (!(o instanceof Mdb mdb)) return false;
return Objects.equals(id, mdb.id) && Objects.equals(namen, mdb.namen) && Objects.equals(bio, mdb.bio) && Objects.equals(wahlperioden, mdb.wahlperioden);
}

View file

@ -28,8 +28,7 @@ public abstract class MdbDocument {
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof MdbDocument)) return false;
MdbDocument that = (MdbDocument) o;
if (!(o instanceof MdbDocument that)) return false;
return Objects.equals(version, that.version) && Objects.equals(mdbs, that.mdbs);
}

View file

@ -92,9 +92,8 @@ public abstract class Wahlperiode {
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof Wahlperiode)) return false;
Wahlperiode that = (Wahlperiode) o;
return wp == that.wp && wknNr == that.wknNr && Objects.equals(mdbWpVon, that.mdbWpVon) && Objects.equals(mdbWpBis, that.mdbWpBis) && Objects.equals(wkrName, that.wkrName) && Objects.equals(wkrLand, that.wkrLand) && Objects.equals(liste, that.liste) && mandatsart == that.mandatsart && Objects.equals(institutionen, that.institutionen);
if (!(o instanceof Wahlperiode that)) return false;
return wp == that.wp && Objects.equals(wknNr, that.wknNr) && Objects.equals(mdbWpVon, that.mdbWpVon) && Objects.equals(mdbWpBis, that.mdbWpBis) && Objects.equals(wkrName, that.wkrName) && Objects.equals(wkrLand, that.wkrLand) && Objects.equals(liste, that.liste) && mandatsart == that.mandatsart && Objects.equals(institutionen, that.institutionen);
}
@Override

View file

@ -1,7 +1,6 @@
package org.texttechnologylab.project.gruppe_05_1.domain.speaker;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.util.Objects;
import java.util.StringJoiner;

View file

@ -7,8 +7,8 @@ import java.util.StringJoiner;
public abstract class Protocol {
LocalDate date;
LocalTime starttime;
LocalTime endtime;
LocalTime startTime;
LocalTime endTime;
Integer index;
String titel;
String place;
@ -22,20 +22,20 @@ public abstract class Protocol {
this.date = date;
}
public LocalTime getStarttime() {
return starttime;
public LocalTime getStartTime() {
return startTime;
}
public void setStarttime(LocalTime starttime) {
this.starttime = starttime;
public void setStartTime(LocalTime startTime) {
this.startTime = startTime;
}
public LocalTime getEndtime() {
return endtime;
public LocalTime getEndTime() {
return endTime;
}
public void setEndtime(LocalTime endtime) {
this.endtime = endtime;
public void setEndTime(LocalTime endTime) {
this.endTime = endTime;
}
public Integer getIndex() {
@ -74,22 +74,22 @@ public abstract class Protocol {
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof Protocol protocol)) return false;
return Objects.equals(date, protocol.date) && Objects.equals(starttime, protocol.starttime)
&& Objects.equals(endtime, protocol.endtime) && Objects.equals(index, protocol.index)
return Objects.equals(date, protocol.date) && Objects.equals(startTime, protocol.startTime)
&& Objects.equals(endTime, protocol.endTime) && Objects.equals(index, protocol.index)
&& Objects.equals(titel, protocol.titel) && Objects.equals(place, protocol.place) && Objects.equals(wp, protocol.wp);
}
@Override
public int hashCode() {
return Objects.hash(date, starttime, endtime, index, titel, place, wp);
return Objects.hash(date, startTime, endTime, index, titel, place, wp);
}
@Override
public String toString() {
return new StringJoiner(", ", Protocol.class.getSimpleName() + "[", "]")
.add("date=" + date)
.add("starttime=" + starttime)
.add("endtime=" + endtime)
.add("starttime=" + startTime)
.add("endtime=" + endTime)
.add("index=" + index)
.add("titel='" + titel + "'")
.add("place='" + place + "'")

View file

@ -11,13 +11,7 @@ import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency;
import org.apache.commons.io.FileUtils;
import org.apache.uima.UIMAException;
import org.apache.uima.UIMAFramework;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.impl.XmiCasDeserializer;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.cas.text.AnnotationIndex;
import org.apache.uima.fit.factory.JCasFactory;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
@ -45,7 +39,6 @@ import org.xml.sax.SAXException;
import java.io.*;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.*;
@ -55,13 +48,12 @@ import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription;
import static org.springframework.core.io.buffer.DataBufferUtils.readInputStream;
public class NlpUtils {
// common class-attributes
private static DUUIComposer pComposer = null;
private static int iWorkers = 1;
private static final int iWorkers = 1;
private static final String TYPE_SYSTEM_DESCRIPTOR_PATH = "/speeches/TypeSystem.xml.gz";
private static final int MAX_FEATURE_LENGTH = 10000;
@ -74,7 +66,7 @@ public class NlpUtils {
runVideos();
} catch (Exception e) {
System.err.println("Exception: " + e.getMessage());
System.err.println(e.getStackTrace());
System.err.println(Arrays.toString(e.getStackTrace()));
throw new RuntimeException(e);
}
@ -87,12 +79,12 @@ public class NlpUtils {
private static void duuiInit() {
DUUILuaContext ctx = null;
DUUILuaContext ctx;
try {
ctx = new DUUILuaContext().withJsonLibrary();
} catch (IOException e) {
System.err.println("IOException: " + e.getMessage());
System.err.println(e.getStackTrace());
System.err.println(Arrays.toString(e.getStackTrace()));
throw new RuntimeException(e);
}
@ -103,7 +95,7 @@ public class NlpUtils {
.withWorkers(iWorkers); // wir geben dem Composer eine Anzahl an Threads mit.
} catch (URISyntaxException e) {
System.err.println("URISyntaxException: " + e.getMessage());
System.err.println(e.getStackTrace());
System.err.println(e.getMessage());
throw new RuntimeException(e);
}
@ -114,15 +106,15 @@ public class NlpUtils {
dockerDriver = new DUUIDockerDriver();
} catch (IOException e) {
System.err.println("IOException: " + e.getMessage());
System.err.println(e.getStackTrace());
System.err.println(Arrays.toString(e.getStackTrace()));
throw new RuntimeException(e);
} catch (UIMAException e) {
System.err.println("UIMAException: " + e.getMessage());
System.err.println(e.getStackTrace());
System.err.println(Arrays.toString(e.getStackTrace()));
throw new RuntimeException(e);
} catch (SAXException e) {
System.err.println("SAXException: " + e.getMessage());
System.err.println(e.getStackTrace());
System.err.println(Arrays.toString(e.getStackTrace()));
throw new RuntimeException(e);
}
@ -134,23 +126,20 @@ public class NlpUtils {
/**
* Initialization of a sample CAS document
*
* @return
* @throws ResourceInitializationException
* @throws CASException
* @return JCas object
*/
public static JCas getCas() {
// init a CAS with a static text.
JCas pCas = null;
JCas pCas;
try {
pCas = JCasFactory.createText("Ich finde dieses Programm läuft sehr gut. Ich überlege wie ich dieses für meine Bachelor-Arbeit nachnutzen kann.", "de");
} catch (ResourceInitializationException e) {
System.err.println("ResourceInitializationException: " + e.getMessage());
System.err.println(e.getStackTrace());
System.err.println(Arrays.toString(e.getStackTrace()));
throw new RuntimeException(e);
} catch (CASException e) {
System.err.println("CASException: " + e.getMessage());
System.err.println(e.getStackTrace());
System.err.println(Arrays.toString(e.getStackTrace()));
throw new RuntimeException(e);
}
@ -177,7 +166,7 @@ public class NlpUtils {
.build());
} catch (Exception e) {
System.err.println("Exception: " + e.getMessage());
System.err.println(e.getStackTrace());
System.err.println(Arrays.toString(e.getStackTrace()));
throw new RuntimeException(e);
}
@ -189,7 +178,7 @@ public class NlpUtils {
.build());
} catch (Exception e) {
System.err.println("Exception: " + e.getMessage());
System.err.println(e.getStackTrace());
System.err.println(Arrays.toString(e.getStackTrace()));
throw new RuntimeException(e);
}
@ -199,27 +188,27 @@ public class NlpUtils {
pComposer.run(tCas);
} catch (Exception e) {
System.err.println("Exception: " + e.getMessage());
System.err.println(e.getStackTrace());
System.err.println(Arrays.toString(e.getStackTrace()));
throw new RuntimeException(e);
}
JCasUtil.select(tCas, Sentence.class).stream().forEach(sentence -> {
System.out.println(sentence.getBegin() + "-" + sentence.getEnd() + ": " + sentence.getCoveredText());
JCasUtil.select(tCas, Sentence.class).forEach(sentence -> {
System.out.println(sentence.getBegin()+"-"+sentence.getEnd()+": "+sentence.getCoveredText());
System.out.println(JCasUtil.selectCovered(org.hucompute.textimager.uima.type.Sentiment.class, sentence));
});
}
private static void casInit() {
JCas jcas = null;
JCas jcas;
try {
jcas = JCasFactory.createJCas();
} catch (ResourceInitializationException e) {
System.err.println("ResourceInitializationException: " + e.getMessage());
System.err.println(e.getStackTrace());
System.err.println(Arrays.toString(e.getStackTrace()));
throw new RuntimeException(e);
} catch (CASException e) {
System.err.println("CASException: " + e.getMessage());
System.err.println(e.getStackTrace());
System.err.println(Arrays.toString(e.getStackTrace()));
throw new RuntimeException(e);
}
@ -263,8 +252,7 @@ public class NlpUtils {
/**
* Execution of video processing via DUUI using the RemoteDriver
*
* @throws Exception
* @throws Exception in case of an error
*/
public static void runVideos() throws Exception {
@ -273,6 +261,7 @@ public class NlpUtils {
URL fVideo = classLoader.getResource("example.mp4"); // TODO
// convertieren eines Videos in einen Base64-String
assert fVideo != null;
File fFile = new File(fVideo.getPath());
byte[] bFile = FileUtils.readFileToByteArray(fFile);
String encodedString = Base64.getEncoder().encodeToString(bFile);
@ -315,14 +304,10 @@ public class NlpUtils {
pComposer.run(pCas);
// select some data
JCasUtil.selectAll(videoCas).stream().forEach(videoAnnotation -> {
System.out.println(videoAnnotation);
});
JCasUtil.selectAll(videoCas).forEach(System.out::println);
// select some data
JCasUtil.selectAll(transcriptCas).stream().forEach(tAnnotation -> {
System.out.println(tAnnotation);
});
JCasUtil.selectAll(transcriptCas).forEach(System.out::println);
}

View file

@ -23,7 +23,7 @@ public class JavalinConfig extends Properties {
/**
* Constructor mit Pfad zur Properties-Datei
* @param sPath
* @param sPath Pfad zur Properties-Datei
*/
public JavalinConfig(String sPath) {
@ -49,7 +49,7 @@ public class JavalinConfig extends Properties {
/**
* Liefert den Port zurück, auf dem Javalin läuft
* @return
* @return Port
*/
public Integer getPort() {
return GeneralUtils.parseInt(getProperty("port"));

View file

@ -20,17 +20,18 @@ public abstract class FileUtils {
* Creates a (possibly nested) directory
* @param dir (e.g. "generated" , "level1/level2/level3" etc.
*/
public static void createDirectoryIFNotExists(String dir) {
public static boolean createDirectoryIFNotExists(String dir) {
File directory = new File(dir);
if (! directory.exists()){
directory.mkdirs();
if (!directory.exists()){
return directory.mkdirs();
}
return true;
}
/**
* Write a list of Strings to file
* @param fileName
* @param stringsList
* @param fileName the filename
* @param stringsList the list of strings to be written to the file
*/
public static void writeStringsToFile(String fileName, List<String> stringsList) {
FileWriter fileWriter = null;
@ -55,8 +56,8 @@ public abstract class FileUtils {
/**
*
* @param fileName
* @param string
* @param fileName the filename
* @param string the string to be written to the file
*/
public static void writeStringToFile(String fileName, String string) {
FileWriter fileWriter = null;
@ -81,9 +82,9 @@ public abstract class FileUtils {
/**
*
* @param fileName
* @return
* @throws IOException
* @param fileName the filename
* @return the filewriter object
* @throws IOException if the file cannot be created
*/
public static FileWriter createFileWriter(String fileName) throws IOException{
FileWriter fileWriter = null;
@ -94,8 +95,8 @@ public abstract class FileUtils {
/**
*
* @param fileWriter
* @param stringsList
* @param fileWriter the filewriter object
* @param stringsList the list of strings to be written to the file
*/
public static void writeStringsToFile(FileWriter fileWriter, List<String> stringsList) {
@ -116,8 +117,8 @@ public abstract class FileUtils {
/**
*
* @param dir
* @return
* @param dir the directory
* @return a set of filenames in the directory
*/
public static Set<String> listFilesInDirectory(String dir) {
try (Stream<Path> stream = Files.list(Paths.get(dir))) {

View file

@ -1,28 +1,16 @@
package org.texttechnologylab.project.gruppe_05_1.util;
import java.io.IOException;
import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.LocalTime;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeParseException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
public abstract class GeneralUtils {
/**
*
* @param integer
* @return
* @param integer the integer to be parsed
* @return the parsed integer or null if the integer could not be parsed
*/
public static Integer parseInt(String integer) {
@ -35,8 +23,8 @@ public abstract class GeneralUtils {
/**
* Parse a date in the format used in Germany
* @param date
* @return
* @param date the date to be parsed
* @return the parsed date or null if the date could not be parsed
*/
public static LocalDate parseDate(String date) {
@ -50,9 +38,9 @@ public abstract class GeneralUtils {
/**
* Parse tiem in a give format
* @param date
* @param timeFormat
* @return
* @param date the time to be parsed
* @param timeFormat the format of the time
* @return the parsed time or null if the time could not be parsed
*/
public static LocalTime parseTime(String date, String timeFormat) {
@ -66,8 +54,8 @@ public abstract class GeneralUtils {
/**
* Parse a Formatiere Datumsfelder wie in Deutschland üblich ist.
* @param date
* @return
* @param date the date to be formatted
* @return the formatted date or an empty string if the date is null
*/
public static String formatDate(LocalDate date) {
@ -77,8 +65,8 @@ public abstract class GeneralUtils {
/**
* Format time in the format used in Germany
* @param time
* @return
* @param time the time to be formatted
* @return the formatted time or an empty string if the time is null
*/
public static String formatTime(LocalTime time) {

View file

@ -1,8 +1,6 @@
package org.texttechnologylab.project.gruppe_05_1.util;
import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoDatabase;
import com.mongodb.client.model.Indexes;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
@ -12,10 +10,8 @@ import org.texttechnologylab.project.gruppe_05_1.database.MongoPprUtils;
import org.texttechnologylab.project.gruppe_05_1.domain.html.Parlamentarier;
import org.texttechnologylab.project.gruppe_05_1.domain.mdb.BiografischeAngaben;
import org.texttechnologylab.project.gruppe_05_1.domain.mdb.Mdb;
import org.texttechnologylab.project.gruppe_05_1.domain.mdb.MdbDocument;
import org.texttechnologylab.project.gruppe_05_1.domain.speaker.Membership;
import org.texttechnologylab.project.gruppe_05_1.domain.speaker.Speaker;
import org.texttechnologylab.project.gruppe_05_1.nlp.NlpUtils;
import org.texttechnologylab.project.gruppe_05_1.xml.FileObjectFactory;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
@ -34,28 +30,12 @@ import java.util.zip.ZipInputStream;
public abstract class PPRUtils {
public static final String PARTEILOS_KUERZEL = "Parteilos";
private static Set<String> processedProtocols = new HashSet<>();
private static Set<org.w3c.dom.Document> xmlProtocols = new HashSet<>();
private static final Set<String> processedProtocols = new HashSet<>();
private static final Set<org.w3c.dom.Document> xmlProtocols = new HashSet<>();
public static final Integer legislaturPeriode = 20;
/**
* Prüfe, ob die Collections existieren. Falls nicht: erzeuge sie und lege Indizes an
*/
public static void ensureCollectionExist() {
Set<String> existingCollectionNames = MongoDBHandler.getCollectionNames();
if (!existingCollectionNames.contains(MongoPprUtils.SPEAKER_COLLECTION_NAME)) {
MongoPprUtils.createSpeakerCollection();
}
if (!existingCollectionNames.contains(MongoPprUtils.SPEECH_COLLECTION_NAME)) {
MongoPprUtils.createSpeechCollection();
}
}
/**
* Alle Informationen lesen...
* - Parlamentarier
@ -63,7 +43,8 @@ public abstract class PPRUtils {
* - Kommentare
* - etc.
* ... und in die Mongo-DB persistieren, falls noch nicht vorhanden sind.
* @param xmlFactory
* @param xmlFactory Factory für die XML-Objekte
* @param mongoFactory Factory für die MongoDB-Objekte
*/
public static void parlamentExplorerInit(FileObjectFactory xmlFactory, MongoObjectFactory mongoFactory) {
@ -93,7 +74,7 @@ public abstract class PPRUtils {
/**
* Fotos hochladen - TODO
* @param mongoFactory
* @param mongoFactory Factory für die MongoDB-Objekte
*/
public static void readPhotos(MongoObjectFactory mongoFactory) {
@ -101,8 +82,8 @@ public abstract class PPRUtils {
/**
* Reden und Kommentare einlesen - TODO
* @param xmlFactory
* @param mongoFactory
* @param xmlFactory Factory für die XML-Objekte
* @param mongoFactory Factory für die MongoDB-Objekte
*/
public static void readSpeechesAndComments(FileObjectFactory xmlFactory, MongoObjectFactory mongoFactory) {
@ -110,8 +91,8 @@ public abstract class PPRUtils {
/**
* Liest die MdBs aus der Bundestag-Seite und persistiere sie in die MongoDB
* @param mdbUrl
* @param xmlFactory
* @param mdbUrl URL der MDBs
* @param xmlFactory Factory für die XML-Objekte
*/
public static void readAndPersistMdbs(String mdbUrl, FileObjectFactory xmlFactory, MongoObjectFactory mongoFactory) {
org.w3c.dom.Document mdbRoot = getMdbFromRemoteXmlZipfile(mdbUrl);
@ -125,7 +106,7 @@ public abstract class PPRUtils {
Speaker speaker = xmlFactory.createSpeaker(mdbNode);
// System.out.println("Speaker " + speaker.getId() + " (" + speaker.getFirstName() + " " + speaker.getName() + ", " + speaker.getParty() + ")");
if (mdbActiveInWp(speaker, 20)) {
if (mdbActiveInWp(speaker, legislaturPeriode)) {
org.bson.Document speakerDoc = mongoFactory.createSpeaker(speaker);
MongoDBHandler.insertDocument(speakerCollection, speakerDoc);
}
@ -140,19 +121,19 @@ public abstract class PPRUtils {
private static boolean mdbActiveInWp(Speaker speaker, Integer legislaturPeriode) {
List<Integer> wps = speaker.getMemberships().stream()
.map(Membership::getWp)
.collect(Collectors.toList());
.toList();
return wps.contains(legislaturPeriode);
}
/**
* Liest die MDB aus einer Zip-Datei der bundestag,de-Seite
* @param zipUrl
* @return
* @param zipUrl URL der ZIP-Datei
* @return the MDB Document
*/
private static org.w3c.dom.Document getMdbFromRemoteXmlZipfile(String zipUrl) {
URL url = null;
InputStream urlInputStream = null;
URL url;
InputStream urlInputStream;
try {
url = new URL(zipUrl);
urlInputStream = url.openStream();
@ -196,9 +177,9 @@ public abstract class PPRUtils {
/**
* Helper method to save a stream for a later use.
* We use it to save the DTD and the XML files of the MDBs, which are within a zipfile on the bundestag.de site
* @param input
* @param output
* @throws IOException
* @param input the input stream
* @param output the output stream
* @throws IOException if an error occurs
*/
private static void copyStream(InputStream input, OutputStream output) throws IOException {
@ -212,7 +193,7 @@ public abstract class PPRUtils {
/**
* Eine Liste von Parlamentariern nach Namen (erst nach Nachnamen, dann nach Vornamen) sortieren (aufsteigend)
* @param mdbList
* @param mdbList Liste der Parlamentarier
*/
public static void sortParlamentarierByName(List<Parlamentarier> mdbList) {
@ -231,8 +212,8 @@ public abstract class PPRUtils {
/**
* Alle Parteien (aus einer Liste der MdBs) herausfinden.
* null-Einträge durch einen Platzhalter ersetzen, damit später keine null pointer exceptions auftretten
* @param mdbList
* @return
* @param mdbList Liste der MdBs
* @return Set der Parteien
*/
public static Set<String> getFraktionenFromMdbList(List<Mdb> mdbList) {
@ -259,8 +240,8 @@ public abstract class PPRUtils {
/**
* Eine Zuordnung zwischen Partei und deren Mitgliedern erzeugen. Ein Mitglied ist hier die MDB-Struktur
* @param mdbList
* @return
* @param mdbList Liste der MdBs
* @return Map mit Partei als Key und Liste der MdBs als Value
*/
public static Map<String, List<Mdb>> createMdbParteiZuordnung(List<Mdb> mdbList) {
@ -285,9 +266,9 @@ public abstract class PPRUtils {
/**
* Eine Zuordnung zwischen Partei und deren Mitgliedern erzeugen. Ein Mitglied wird hier durch seine ID erfaßt
* @param parteien
* @param mdbList
* @return
* @param parteien Set der Parteien
* @param mdbList Liste der MdBs
* @return Map mit Partei als Key und Liste der MdB-IDs als Value
*/
public static Map<String, List<String>> getMdbParteiZuordnung(Set<String> parteien, List<Mdb> mdbList) {
@ -314,8 +295,8 @@ public abstract class PPRUtils {
/**
* Eine Zuordnung zwischen MdB (repräsentiert durch seine ID) und seien biographischen Daten erzeugen.
* @param mdbList
* @return
* @param mdbList Liste der MdBs
* @return Map mit MdB-ID als Key und biographischen Daten als Value
*/
public static Map<String, BiografischeAngaben> getMdbParteiZuordnung(List<Mdb> mdbList) {
@ -330,7 +311,7 @@ public abstract class PPRUtils {
/**
* Eine Liste von MdBs nach Namen (erst nach Nachnamen, dann nach Vornamen) sortieren (aufsteigend)
* @param mdbList
* @param mdbList Liste der MdBs
*/
public static void sortMdbByName(List<Mdb> mdbList) {
@ -375,7 +356,7 @@ public abstract class PPRUtils {
xmlProtocols.add(xmlDoc);
} catch (Exception e) {
System.err.println("Fehler beim Verarbeiten der XML-Datei: " + xmlUrl);
e.printStackTrace();
System.err.println("Fehler: " + e.getLocalizedMessage());
}
}
@ -393,7 +374,7 @@ public abstract class PPRUtils {
}
} catch (IOException e) {
System.err.println("Fehler beim Laden der Seite: " + queryUrl);
e.printStackTrace();
System.err.println("Fehler: " + e.getLocalizedMessage());
break;
}
}

View file

@ -16,7 +16,6 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
public abstract class XmlUtils {

View file

@ -4,5 +4,5 @@ import org.w3c.dom.Node;
public interface XmlOperations {
FileObjectFactory factory = FileObjectFactory.getFactory();
public Object fromXmlNode(Node node);
Object fromXmlNode(Node node);
}

View file

@ -1,6 +1,5 @@
package org.texttechnologylab.project.gruppe_05_1.xml.mdb;
import org.texttechnologylab.project.gruppe_05_1.domain.Gender;
import org.texttechnologylab.project.gruppe_05_1.domain.mdb.BiografischeAngaben;
import org.texttechnologylab.project.gruppe_05_1.util.GeneralUtils;
import org.texttechnologylab.project.gruppe_05_1.xml.XmlOperations;

View file

@ -15,6 +15,7 @@ public class MdbDocument_File_Impl extends MdbDocument implements XmlOperations
MdbDocument doc = new MdbDocument_File_Impl();
Node versionNode = XmlUtils.getFirstChildByName(node, "VERSION");
assert versionNode != null;
doc.setVersion(versionNode.getFirstChild().getNodeValue());
List<Mdb> mdbs = new ArrayList<>();

View file

@ -15,9 +15,11 @@ public class Mdb_File_Impl extends Mdb implements XmlOperations {
public Mdb fromXmlNode(Node node) {
Mdb mdb = new Mdb_File_Impl();
Node idNode = XmlUtils.getFirstChildByName(node, "ID");
assert idNode != null;
mdb.setId(idNode.getFirstChild().getNodeValue());
Node namenNode = XmlUtils.getFirstChildByName(node, "NAMEN");
assert namenNode != null;
List<Node> nameNodeList = XmlUtils.getChildrenByName(namenNode, "NAME");
List<MdbName> mdbNameList = new ArrayList<>();
for (Node nameNode : nameNodeList) {
@ -31,6 +33,7 @@ public class Mdb_File_Impl extends Mdb implements XmlOperations {
Node wpenNode = XmlUtils.getFirstChildByName(node, "WAHLPERIODEN");
assert wpenNode != null;
List<Node> wpNodeList = XmlUtils.getChildrenByName(wpenNode, "WAHLPERIODE");
List<Wahlperiode> wpList = new ArrayList<>();
for (Node wpNode : wpNodeList) {

View file

@ -3,7 +3,6 @@ package org.texttechnologylab.project.gruppe_05_1.xml.speaker;
import org.texttechnologylab.project.gruppe_05_1.domain.mdb.*;
import org.texttechnologylab.project.gruppe_05_1.domain.speaker.Membership;
import org.texttechnologylab.project.gruppe_05_1.domain.speaker.Speaker;
import org.texttechnologylab.project.gruppe_05_1.util.GeneralUtils;
import org.texttechnologylab.project.gruppe_05_1.util.PPRUtils;
import org.texttechnologylab.project.gruppe_05_1.util.XmlUtils;
import org.texttechnologylab.project.gruppe_05_1.xml.XmlOperations;
@ -20,10 +19,12 @@ public class Speaker_File_Impl extends Speaker implements XmlOperations {
// ID
Node idNode = XmlUtils.getFirstChildByName(node, "ID");
assert idNode != null;
speaker.setId(idNode.getFirstChild().getNodeValue());
// Name: alle Namen lesen, nur den letzten berücksichtigen
Node namenNode = XmlUtils.getFirstChildByName(node, "NAMEN");
assert namenNode != null;
List<Node> nameNodeList = XmlUtils.getChildrenByName(namenNode, "NAME");
List<MdbName> mdbNameList = new ArrayList<>();
for (Node nameNode : nameNodeList) {
@ -58,6 +59,7 @@ public class Speaker_File_Impl extends Speaker implements XmlOperations {
// Memberships
Node wpenNode = XmlUtils.getFirstChildByName(node, "WAHLPERIODEN");
assert wpenNode != null;
List<Node> wpNodeList = XmlUtils.getChildrenByName(wpenNode, "WAHLPERIODE");
List<Wahlperiode> wpList = new ArrayList<>();
for (Node wpNode : wpNodeList) {

View file

@ -1,6 +1,5 @@
package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls;
import lombok.Getter;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Content;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Speaker;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType;

View file

@ -1,8 +1,5 @@
package org.texttechnologylab.project.gruppe_05_1.xml.speeches;
import lombok.Getter;
import lombok.Setter;
import org.texttechnologylab.project.gruppe_05_1.database.MongoPprUtils;
import org.texttechnologylab.project.gruppe_05_1.util.PPRUtils;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.*;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.AgendaItem;
@ -29,7 +26,6 @@ public class SpeechParser {
private List<Speech> speeches;
private List<AgendaItem> agendaItems;
private Boolean parseLegislativePeriods;
public List<Speech> getSpeeches() {
return speeches;
@ -40,7 +36,6 @@ public class SpeechParser {
}
public void setParseLegislativePeriods(Boolean parseLegislativePeriods) {
this.parseLegislativePeriods = parseLegislativePeriods;
}
public List<Session> parseAllSessions() {
@ -58,7 +53,7 @@ public class SpeechParser {
tempFile.delete(); // Lösche die temporäre Datei nach der Verarbeitung
} catch (Exception e) {
System.err.println("Error parsing XML document.");
e.printStackTrace();
System.err.println(e.getMessage());
}
}
return sessions;

Binary file not shown.