some code refactoring

This commit is contained in:
Jonas_Jones 2025-03-09 15:27:28 +01:00
parent d2c5e1ce2b
commit 005ab18142
34 changed files with 153 additions and 232 deletions

View file

@ -1,7 +1,6 @@
package org.texttechnologylab.project.gruppe_05_1.database;
import org.bson.Document;
import org.texttechnologylab.project.gruppe_05_1.xml.FileObjectFactory;
import java.util.List;

View file

@ -2,27 +2,21 @@ package org.texttechnologylab.project.gruppe_05_1.database;
import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoCursor;
import com.mongodb.client.model.Indexes;
import lombok.extern.slf4j.Slf4j;
import org.bson.Document;
import org.texttechnologylab.project.gruppe_05_1.domain.html.Parlamentarier;
import org.texttechnologylab.project.gruppe_05_1.domain.html.ParlamentarierDetails;
import org.texttechnologylab.project.gruppe_05_1.domain.speaker.Membership;
import org.texttechnologylab.project.gruppe_05_1.domain.speech.Speech;
import org.texttechnologylab.project.gruppe_05_1.util.PPRUtils;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.ZoneId;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.*;
/**
* Diese Klasse beinhaltet Mongo-Utilities, welche spezifisch für die PPR-Datenstrukturen sind.
*
* Mongo-Utilities genereller Natur stehen in der Klasse MongoDBHandler.
*/
@Slf4j
public class MongoPprUtils {
/*
@ -34,29 +28,29 @@ public class MongoPprUtils {
public static final String PICTURES_COLLECTION_NAME = "pictures";
public static final String COMMENT_COLLECTION_NAME = "comment";
private static MongoCollection<Document> speakerCollecion = null;
private static MongoCollection<Document> speechCollecion = null;
private static MongoCollection<Document> picturesCollecion = null;
private static MongoCollection<Document> commentCollecion = null;
private static MongoCollection<Document> speakerCollection = null;
private static MongoCollection<Document> speechCollection = null;
private static MongoCollection<Document> picturesCollection = null;
private static MongoCollection<Document> commentCollection = null;
public static MongoCollection<Document> getSpeakerCollection() {
if (speakerCollecion == null) speakerCollecion = MongoDBHandler.getMongoDatabase().getCollection(SPEAKER_COLLECTION_NAME);
return speakerCollecion;
if (speakerCollection == null) speakerCollection = MongoDBHandler.getMongoDatabase().getCollection(SPEAKER_COLLECTION_NAME);
return speakerCollection;
}
public static MongoCollection<Document> getSpeechCollection() {
if (speechCollecion == null) speechCollecion = MongoDBHandler.getMongoDatabase().getCollection(SPEECH_COLLECTION_NAME);
return speechCollecion;
if (speechCollection == null) speechCollection = MongoDBHandler.getMongoDatabase().getCollection(SPEECH_COLLECTION_NAME);
return speechCollection;
}
public static MongoCollection<Document> getPicturesCollection() {
if (picturesCollecion == null) picturesCollecion = MongoDBHandler.getMongoDatabase().getCollection(PICTURES_COLLECTION_NAME);
return picturesCollecion;
if (picturesCollection == null) picturesCollection = MongoDBHandler.getMongoDatabase().getCollection(PICTURES_COLLECTION_NAME);
return picturesCollection;
}
public static MongoCollection<Document> getCommentCollection() {
if (commentCollecion == null) commentCollecion = MongoDBHandler.getMongoDatabase().getCollection(COMMENT_COLLECTION_NAME);
return commentCollecion;
if (commentCollection == null) commentCollection = MongoDBHandler.getMongoDatabase().getCollection(COMMENT_COLLECTION_NAME);
return commentCollection;
}
/**
@ -143,7 +137,7 @@ public class MongoPprUtils {
plist.add(p);
}
} catch (Throwable t) {
System.err.println(t);
System.err.print(t);
} finally {
cursor.close();
}
@ -164,11 +158,7 @@ public class MongoPprUtils {
p.setNachname((String) doc.get("name"));
p.setVorname((String) doc.get("firstName"));
String partei = (String) doc.get("party");
if (partei == null) {
p.setPartei("(parteilos)");
} else {
p.setPartei(partei);
}
p.setPartei(Objects.requireNonNullElse(partei, "(parteilos)"));
return p;
}
@ -186,8 +176,7 @@ public class MongoPprUtils {
*/
public static ParlamentarierDetails getParlamentarierDetailsByID(String id) {
Document doc = MongoDBHandler.findFirstDocumentInCollection(getSpeakerCollection(), "_id", id);
ParlamentarierDetails p = readParlamentarierDetailsFromSpeaker(doc);
return p;
return readParlamentarierDetailsFromSpeaker(doc);
}
@ -198,8 +187,7 @@ public class MongoPprUtils {
*/
public static ParlamentarierDetails getParlamentarierDetailsByID(Integer id) {
Document doc = MongoDBHandler.findFirstDocumentInCollection(getSpeakerCollection(), "_id", id.toString());
ParlamentarierDetails p = readParlamentarierDetailsFromSpeaker(doc);
return p;
return readParlamentarierDetailsFromSpeaker(doc);
}
/**
@ -215,16 +203,12 @@ public class MongoPprUtils {
p.setNachname((String) doc.get("name"));
p.setVorname((String) doc.get("firstName"));
String partei = (String) doc.get("party");
if (partei == null) {
p.setPartei("(parteilos)");
} else {
p.setPartei(partei);
}
p.setPartei(Objects.requireNonNullElse(partei, "(parteilos)"));
p.setTitle((String) doc.get("title"));
p.setGeburtsort((String) doc.get("geburtsort"));
p.setGeschlecht((String) doc.get("geschlecht"));
p.setBeruf((String) doc.get("beruf"));
p.setAkademischertitel((String) doc.get("akademischertitel"));
p.setAkademischerTitel((String) doc.get("akademischertitel"));
p.setFamilienstand((String) doc.get("familienstand"));
p.setReligion((String) doc.get("religion"));
p.setVita((String) doc.get("vita"));

View file

@ -1,6 +1,5 @@
package org.texttechnologylab.project.gruppe_05_1.database;
import lombok.Getter;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.*;
import java.util.List;

View file

@ -3,10 +3,7 @@ package org.texttechnologylab.project.gruppe_05_1.database;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.*;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.SpeechParser;
import org.xml.sax.SAXException;
import javax.xml.parsers.ParserConfigurationException;
import java.io.IOException;
import java.util.List;
import java.util.stream.Collectors;
@ -16,18 +13,15 @@ public class SpeechIndexFactoryImpl implements SpeechIndexFactory {
private List<AgendaItem> agendaItems;
private List<Speech> speeches;
private List<MemberOfParliament> members;
private List<Fraction> fractions;
private Boolean parseLegislativePeriods = true;
@Override
public SpeechIndexFactoryImpl parseLegislativePeriods(Boolean parseLegislativePeriods) {
this.parseLegislativePeriods = parseLegislativePeriods;
try {this.speechParser.setParseLegislativePeriods(parseLegislativePeriods);} catch (NullPointerException ignored) {}
return this;
}
public SpeechIndexFactoryImpl builder() throws ParserConfigurationException {
public SpeechIndexFactoryImpl builder() {
this.speechParser = new SpeechParser();
return this;
}
@ -41,7 +35,7 @@ public class SpeechIndexFactoryImpl implements SpeechIndexFactory {
@Override
public SpeechIndex build() throws IOException, SAXException {
public SpeechIndex build() {
return new SpeechIndex(sessions, speeches, agendaItems);
}

View file

@ -26,8 +26,8 @@ public class BiografischeAngaben_Mongo_Impl extends BiografischeAngaben implemen
fields.put("parteiKuerzel", entity.getParteiKuerzel());
fields.put("vitaKurz", entity.getVitaKurz());
fields.put("veroeffentlichungspflichtiges", entity.getVeroeffentlichungspflichtiges());
Document doc = MongoDBHandler.createDocument(false, fields);
return doc;
return MongoDBHandler.createDocument(false, fields);
}
@Override

View file

@ -22,8 +22,7 @@ public class Institution_Mongo_Impl extends Institution implements MongoOperatio
fields.put("fktinsVon", entity.getFktinsVon());
fields.put("fktinsBis", entity.getFktinsBis());
Document doc = MongoDBHandler.createDocument(false, fields);
return doc;
return MongoDBHandler.createDocument(false, fields);
}
@Override

View file

@ -23,6 +23,7 @@ public class MdbName_Mongo_Impl extends MdbName implements MongoOperations<MdbNa
fields.put("akadTitel", entity.getAkadTitel());
fields.put("historieVon", entity.getHistorieVon());
fields.put("historieBis", entity.getHistorieBis());
return MongoDBHandler.createDocument(false, fields);
}

View file

@ -34,9 +34,8 @@ public class Mdb_Mongo_Impl extends Mdb implements MongoOperations<Mdb> {
"bio", bioDoc,
"wahlperioden", wpDocs
);
Document doc = MongoDBHandler.createDocument(false, fields);
return doc;
return MongoDBHandler.createDocument(false, fields);
}
@Override

View file

@ -25,9 +25,7 @@ public class Membership_Mongo_Impl extends Membership implements MongoOperations
fields.put("member", entity.getMember()); // TODO: wahrscheinlich nicht nötig
fields.put("wp", entity.getWp());
Document doc = MongoDBHandler.createDocument(false, fields);
return doc;
return MongoDBHandler.createDocument(false, fields);
}
@Override

View file

@ -35,9 +35,7 @@ public class Speaker_Mongo_Impl extends Speaker implements MongoOperations<Speak
fields.put("party", entity.getParty());
fields.put("memberships", membershipDocs);
Document doc = MongoDBHandler.createDocument(false, fields);
return doc;
return MongoDBHandler.createDocument(false, fields);
}
@Override

View file

@ -32,8 +32,7 @@ public class Wahlperiode_Mongo_Impl extends Wahlperiode implements MongoOperatio
}
fields.put("institutionen", instDocs);
Document doc = MongoDBHandler.createDocument(false, fields);
return doc;
return MongoDBHandler.createDocument(false, fields);
}
@Override

View file

@ -13,7 +13,7 @@ public enum Gender { // TODO: Delete
return this.text;
}
private Gender(String text) {this.text = text;}
Gender(String text) {this.text = text;}
public static Gender byText(String text) {
if (null == text) return NA;

View file

@ -20,7 +20,7 @@ public class ParlamentarierDetails {
LocalDate sterbedatum;
String geschlecht;
String beruf;
String akademischertitel;
String akademischerTitel;
String familienstand;
String religion;
String vita;
@ -116,12 +116,12 @@ public class ParlamentarierDetails {
this.beruf = beruf;
}
public String getAkademischertitel() {
return akademischertitel;
public String getAkademischerTitel() {
return akademischerTitel;
}
public void setAkademischertitel(String akademischertitel) {
this.akademischertitel = akademischertitel;
public void setAkademischerTitel(String akademischerTitel) {
this.akademischerTitel = akademischerTitel;
}
public String getFamilienstand() {
@ -183,8 +183,7 @@ public class ParlamentarierDetails {
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof ParlamentarierDetails)) return false;
ParlamentarierDetails that = (ParlamentarierDetails) o;
if (!(o instanceof ParlamentarierDetails that)) return false;
return Objects.equals(id, that.id) ;
}
@ -206,7 +205,7 @@ public class ParlamentarierDetails {
.add("sterbedatum=" + sterbedatum)
.add("geschlecht='" + geschlecht + "'")
.add("beruf='" + beruf + "'")
.add("akademischertitel='" + akademischertitel + "'")
.add("akademischertitel='" + akademischerTitel + "'")
.add("familienstand='" + familienstand + "'")
.add("religion='" + religion + "'")
.add("vita='" + vita + "'")

View file

@ -4,8 +4,6 @@ import java.time.LocalDate;
import java.util.Objects;
import java.util.StringJoiner;
import org.texttechnologylab.project.gruppe_05_1.domain.Gender;
public abstract class BiografischeAngaben {
private LocalDate geburtsdatum;
@ -111,9 +109,8 @@ public abstract class BiografischeAngaben {
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof BiografischeAngaben)) return false;
BiografischeAngaben that = (BiografischeAngaben) o;
return Objects.equals(geburtsdatum, that.geburtsdatum) && Objects.equals(geburtsort, that.geburtsort) && Objects.equals(geburtsland, that.geburtsland) && Objects.equals(sterbedatum, that.sterbedatum) && gender == that.gender && familienstand == that.familienstand && religion == that.religion && Objects.equals(beruf, that.beruf) && Objects.equals(parteiKuerzel, that.parteiKuerzel) && Objects.equals(vitaKurz, that.vitaKurz) && Objects.equals(veroeffentlichungspflichtiges, that.veroeffentlichungspflichtiges);
if (!(o instanceof BiografischeAngaben that)) return false;
return Objects.equals(geburtsdatum, that.geburtsdatum) && Objects.equals(geburtsort, that.geburtsort) && Objects.equals(geburtsland, that.geburtsland) && Objects.equals(sterbedatum, that.sterbedatum) && Objects.equals(gender, that.gender) && Objects.equals(familienstand, that.familienstand) && Objects.equals(religion, that.religion) && Objects.equals(beruf, that.beruf) && Objects.equals(parteiKuerzel, that.parteiKuerzel) && Objects.equals(vitaKurz, that.vitaKurz) && Objects.equals(veroeffentlichungspflichtiges, that.veroeffentlichungspflichtiges);
}
@Override

View file

@ -10,7 +10,7 @@ public enum Mandatsart {
private final String text;
private Mandatsart(String text) {this.text = text;}
Mandatsart(String text) {this.text = text;}
public static Mandatsart byText(String text) {
if (null == text) return NA;

View file

@ -46,8 +46,7 @@ public abstract class Mdb {
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof Mdb)) return false;
Mdb mdb = (Mdb) o;
if (!(o instanceof Mdb mdb)) return false;
return Objects.equals(id, mdb.id) && Objects.equals(namen, mdb.namen) && Objects.equals(bio, mdb.bio) && Objects.equals(wahlperioden, mdb.wahlperioden);
}

View file

@ -28,8 +28,7 @@ public abstract class MdbDocument {
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof MdbDocument)) return false;
MdbDocument that = (MdbDocument) o;
if (!(o instanceof MdbDocument that)) return false;
return Objects.equals(version, that.version) && Objects.equals(mdbs, that.mdbs);
}

View file

@ -92,9 +92,8 @@ public abstract class Wahlperiode {
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof Wahlperiode)) return false;
Wahlperiode that = (Wahlperiode) o;
return wp == that.wp && wknNr == that.wknNr && Objects.equals(mdbWpVon, that.mdbWpVon) && Objects.equals(mdbWpBis, that.mdbWpBis) && Objects.equals(wkrName, that.wkrName) && Objects.equals(wkrLand, that.wkrLand) && Objects.equals(liste, that.liste) && mandatsart == that.mandatsart && Objects.equals(institutionen, that.institutionen);
if (!(o instanceof Wahlperiode that)) return false;
return wp == that.wp && Objects.equals(wknNr, that.wknNr) && Objects.equals(mdbWpVon, that.mdbWpVon) && Objects.equals(mdbWpBis, that.mdbWpBis) && Objects.equals(wkrName, that.wkrName) && Objects.equals(wkrLand, that.wkrLand) && Objects.equals(liste, that.liste) && mandatsart == that.mandatsart && Objects.equals(institutionen, that.institutionen);
}
@Override

View file

@ -1,7 +1,6 @@
package org.texttechnologylab.project.gruppe_05_1.domain.speaker;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.util.Objects;
import java.util.StringJoiner;

View file

@ -7,8 +7,8 @@ import java.util.StringJoiner;
public abstract class Protocol {
LocalDate date;
LocalTime starttime;
LocalTime endtime;
LocalTime startTime;
LocalTime endTime;
Integer index;
String titel;
String place;
@ -22,20 +22,20 @@ public abstract class Protocol {
this.date = date;
}
public LocalTime getStarttime() {
return starttime;
public LocalTime getStartTime() {
return startTime;
}
public void setStarttime(LocalTime starttime) {
this.starttime = starttime;
public void setStartTime(LocalTime startTime) {
this.startTime = startTime;
}
public LocalTime getEndtime() {
return endtime;
public LocalTime getEndTime() {
return endTime;
}
public void setEndtime(LocalTime endtime) {
this.endtime = endtime;
public void setEndTime(LocalTime endTime) {
this.endTime = endTime;
}
public Integer getIndex() {
@ -74,22 +74,22 @@ public abstract class Protocol {
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof Protocol protocol)) return false;
return Objects.equals(date, protocol.date) && Objects.equals(starttime, protocol.starttime)
&& Objects.equals(endtime, protocol.endtime) && Objects.equals(index, protocol.index)
return Objects.equals(date, protocol.date) && Objects.equals(startTime, protocol.startTime)
&& Objects.equals(endTime, protocol.endTime) && Objects.equals(index, protocol.index)
&& Objects.equals(titel, protocol.titel) && Objects.equals(place, protocol.place) && Objects.equals(wp, protocol.wp);
}
@Override
public int hashCode() {
return Objects.hash(date, starttime, endtime, index, titel, place, wp);
return Objects.hash(date, startTime, endTime, index, titel, place, wp);
}
@Override
public String toString() {
return new StringJoiner(", ", Protocol.class.getSimpleName() + "[", "]")
.add("date=" + date)
.add("starttime=" + starttime)
.add("endtime=" + endtime)
.add("starttime=" + startTime)
.add("endtime=" + endTime)
.add("index=" + index)
.add("titel='" + titel + "'")
.add("place='" + place + "'")

View file

@ -1,29 +1,15 @@
package org.texttechnologylab.project.gruppe_05_1.nlp;
import com.mongodb.client.model.Filters;
import com.mongodb.client.model.UpdateOneModel;
import com.mongodb.client.model.Updates;
import com.mongodb.client.model.WriteModel;
import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
import org.apache.commons.io.FileUtils;
import org.apache.uima.UIMAException;
import org.apache.uima.UIMAFramework;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.impl.XmiCasDeserializer;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.cas.text.AnnotationIndex;
import org.apache.uima.fit.factory.JCasFactory;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.resource.metadata.TypeSystemDescription;
import org.apache.uima.util.CasCreationUtils;
import org.apache.uima.util.XMLInputSource;
import org.bson.Document;
import org.dkpro.core.io.xmi.XmiWriter;
import org.texttechnologylab.DockerUnifiedUIMAInterface.DUUIComposer;
import org.texttechnologylab.DockerUnifiedUIMAInterface.driver.DUUIDockerDriver;
@ -31,29 +17,23 @@ import org.texttechnologylab.DockerUnifiedUIMAInterface.driver.DUUIRemoteDriver;
import org.texttechnologylab.DockerUnifiedUIMAInterface.driver.DUUIUIMADriver;
import org.texttechnologylab.DockerUnifiedUIMAInterface.lua.DUUILuaContext;
import org.texttechnologylab.annotation.NamedEntity;
import org.texttechnologylab.project.gruppe_05_1.database.MongoDBHandler;
import org.texttechnologylab.uima.type.Sentiment;
import org.xml.sax.SAXException;
import java.io.*;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.*;
import java.util.zip.GZIPInputStream;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription;
import static org.springframework.core.io.buffer.DataBufferUtils.readInputStream;
public class NlpUtils {
// common class-attributes
private static DUUIComposer pComposer = null;
private static int iWorkers = 1;
private static final int iWorkers = 1;
private static final String TYPE_SYSTEM_DESCRIPTOR_PATH = "/speeches/TypeSystem.xml.gz";
private static final int MAX_FEATURE_LENGTH = 10000;
@ -66,7 +46,7 @@ public class NlpUtils {
runVideos();
} catch (Exception e) {
System.err.println("Exception: " + e.getMessage());
System.err.println(e.getStackTrace());
System.err.println(Arrays.toString(e.getStackTrace()));
throw new RuntimeException(e);
}
@ -79,12 +59,12 @@ public class NlpUtils {
private static void duuiInit() {
DUUILuaContext ctx = null;
DUUILuaContext ctx;
try {
ctx = new DUUILuaContext().withJsonLibrary();
} catch (IOException e) {
System.err.println("IOException: " + e.getMessage());
System.err.println(e.getStackTrace());
System.err.println(Arrays.toString(e.getStackTrace()));
throw new RuntimeException(e);
}
@ -95,7 +75,7 @@ public class NlpUtils {
.withWorkers(iWorkers); // wir geben dem Composer eine Anzahl an Threads mit.
} catch (URISyntaxException e) {
System.err.println("URISyntaxException: " + e.getMessage());
System.err.println(e.getStackTrace());
System.err.println(e.getMessage());
throw new RuntimeException(e);
}
@ -106,15 +86,15 @@ public class NlpUtils {
dockerDriver = new DUUIDockerDriver();
} catch (IOException e) {
System.err.println("IOException: " + e.getMessage());
System.err.println(e.getStackTrace());
System.err.println(Arrays.toString(e.getStackTrace()));
throw new RuntimeException(e);
} catch (UIMAException e) {
System.err.println("UIMAException: " + e.getMessage());
System.err.println(e.getStackTrace());
System.err.println(Arrays.toString(e.getStackTrace()));
throw new RuntimeException(e);
} catch (SAXException e) {
System.err.println("SAXException: " + e.getMessage());
System.err.println(e.getStackTrace());
System.err.println(Arrays.toString(e.getStackTrace()));
throw new RuntimeException(e);
}
@ -126,22 +106,20 @@ public class NlpUtils {
/**
* Initialization of a sample CAS document
* @return
* @throws ResourceInitializationException
* @throws CASException
* @return JCas object
*/
public static JCas getCas() {
// init a CAS with a static text.
JCas pCas = null;
JCas pCas;
try {
pCas = JCasFactory.createText("Ich finde dieses Programm läuft sehr gut. Ich überlege wie ich dieses für meine Bachelor-Arbeit nachnutzen kann.", "de");
} catch (ResourceInitializationException e) {
System.err.println("ResourceInitializationException: " + e.getMessage());
System.err.println(e.getStackTrace());
System.err.println(Arrays.toString(e.getStackTrace()));
throw new RuntimeException(e);
} catch (CASException e) {
System.err.println("CASException: " + e.getMessage());
System.err.println(e.getStackTrace());
System.err.println(Arrays.toString(e.getStackTrace()));
throw new RuntimeException(e);
}
@ -168,7 +146,7 @@ public class NlpUtils {
.build());
} catch (Exception e) {
System.err.println("Exception: " + e.getMessage());
System.err.println(e.getStackTrace());
System.err.println(Arrays.toString(e.getStackTrace()));
throw new RuntimeException(e);
}
@ -180,7 +158,7 @@ public class NlpUtils {
.build());
} catch (Exception e) {
System.err.println("Exception: " + e.getMessage());
System.err.println(e.getStackTrace());
System.err.println(Arrays.toString(e.getStackTrace()));
throw new RuntimeException(e);
}
@ -190,27 +168,27 @@ public class NlpUtils {
pComposer.run(tCas);
} catch (Exception e) {
System.err.println("Exception: " + e.getMessage());
System.err.println(e.getStackTrace());
System.err.println(Arrays.toString(e.getStackTrace()));
throw new RuntimeException(e);
}
JCasUtil.select(tCas, Sentence.class).stream().forEach(sentence -> {
JCasUtil.select(tCas, Sentence.class).forEach(sentence -> {
System.out.println(sentence.getBegin()+"-"+sentence.getEnd()+": "+sentence.getCoveredText());
System.out.println(JCasUtil.selectCovered(org.hucompute.textimager.uima.type.Sentiment.class, sentence));
});
}
private static void casInit() {
JCas jcas = null;
JCas jcas;
try {
jcas = JCasFactory.createJCas();
} catch (ResourceInitializationException e) {
System.err.println("ResourceInitializationException: " + e.getMessage());
System.err.println(e.getStackTrace());
System.err.println(Arrays.toString(e.getStackTrace()));
throw new RuntimeException(e);
} catch (CASException e) {
System.err.println("CASException: " + e.getMessage());
System.err.println(e.getStackTrace());
System.err.println(Arrays.toString(e.getStackTrace()));
throw new RuntimeException(e);
}
@ -256,7 +234,7 @@ public class NlpUtils {
/**
* Execution of video processing via DUUI using the RemoteDriver
* @throws Exception
* @throws Exception in case of an error
*/
public static void runVideos() throws Exception {
@ -265,6 +243,7 @@ public class NlpUtils {
URL fVideo = classLoader.getResource("example.mp4"); // TODO
// convertieren eines Videos in einen Base64-String
assert fVideo != null;
File fFile = new File(fVideo.getPath());
byte[] bFile = FileUtils.readFileToByteArray(fFile);
String encodedString = Base64.getEncoder().encodeToString(bFile);
@ -307,14 +286,10 @@ public class NlpUtils {
pComposer.run(pCas);
// select some data
JCasUtil.selectAll(videoCas).stream().forEach(videoAnnotation->{
System.out.println(videoAnnotation);
});
JCasUtil.selectAll(videoCas).forEach(System.out::println);
// select some data
JCasUtil.selectAll(transcriptCas).stream().forEach(tAnnotation->{
System.out.println(tAnnotation);
});
JCasUtil.selectAll(transcriptCas).forEach(System.out::println);
}

View file

@ -23,7 +23,7 @@ public class JavalinConfig extends Properties {
/**
* Constructor mit Pfad zur Properties-Datei
* @param sPath
* @param sPath Pfad zur Properties-Datei
*/
public JavalinConfig(String sPath) {
@ -49,7 +49,7 @@ public class JavalinConfig extends Properties {
/**
* Liefert den Port zurück, auf dem Javalin läuft
* @return
* @return Port
*/
public Integer getPort() {
return GeneralUtils.parseInt(getProperty("port"));

View file

@ -20,17 +20,18 @@ public abstract class FileUtils {
* Creates a (possibly nested) directory
* @param dir (e.g. "generated" , "level1/level2/level3" etc.
*/
public static void createDirectoryIFNotExists(String dir) {
public static boolean createDirectoryIFNotExists(String dir) {
File directory = new File(dir);
if (! directory.exists()){
directory.mkdirs();
if (!directory.exists()){
return directory.mkdirs();
}
return true;
}
/**
* Write a list of Strings to file
* @param fileName
* @param stringsList
* @param fileName the filename
* @param stringsList the list of strings to be written to the file
*/
public static void writeStringsToFile(String fileName, List<String> stringsList) {
FileWriter fileWriter = null;
@ -55,8 +56,8 @@ public abstract class FileUtils {
/**
*
* @param fileName
* @param string
* @param fileName the filename
* @param string the string to be written to the file
*/
public static void writeStringToFile(String fileName, String string) {
FileWriter fileWriter = null;
@ -81,9 +82,9 @@ public abstract class FileUtils {
/**
*
* @param fileName
* @return
* @throws IOException
* @param fileName the filename
* @return the filewriter object
* @throws IOException if the file cannot be created
*/
public static FileWriter createFileWriter(String fileName) throws IOException{
FileWriter fileWriter = null;
@ -94,8 +95,8 @@ public abstract class FileUtils {
/**
*
* @param fileWriter
* @param stringsList
* @param fileWriter the filewriter object
* @param stringsList the list of strings to be written to the file
*/
public static void writeStringsToFile(FileWriter fileWriter, List<String> stringsList) {
@ -116,8 +117,8 @@ public abstract class FileUtils {
/**
*
* @param dir
* @return
* @param dir the directory
* @return a set of filenames in the directory
*/
public static Set<String> listFilesInDirectory(String dir) {
try (Stream<Path> stream = Files.list(Paths.get(dir))) {

View file

@ -1,28 +1,16 @@
package org.texttechnologylab.project.gruppe_05_1.util;
import java.io.IOException;
import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.LocalTime;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeParseException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
public abstract class GeneralUtils {
/**
*
* @param integer
* @return
* @param integer the integer to be parsed
* @return the parsed integer or null if the integer could not be parsed
*/
public static Integer parseInt(String integer) {
@ -35,8 +23,8 @@ public abstract class GeneralUtils {
/**
* Parse a date in the format used in Germany
* @param date
* @return
* @param date the date to be parsed
* @return the parsed date or null if the date could not be parsed
*/
public static LocalDate parseDate(String date) {
@ -50,9 +38,9 @@ public abstract class GeneralUtils {
/**
* Parse tiem in a give format
* @param date
* @param timeFormat
* @return
* @param date the time to be parsed
* @param timeFormat the format of the time
* @return the parsed time or null if the time could not be parsed
*/
public static LocalTime parseTime(String date, String timeFormat) {
@ -66,8 +54,8 @@ public abstract class GeneralUtils {
/**
* Parse a Formatiere Datumsfelder wie in Deutschland üblich ist.
* @param date
* @return
* @param date the date to be formatted
* @return the formatted date or an empty string if the date is null
*/
public static String formatDate(LocalDate date) {
@ -77,8 +65,8 @@ public abstract class GeneralUtils {
/**
* Format time in the format used in Germany
* @param time
* @return
* @param time the time to be formatted
* @return the formatted time or an empty string if the time is null
*/
public static String formatTime(LocalTime time) {

View file

@ -1,8 +1,6 @@
package org.texttechnologylab.project.gruppe_05_1.util;
import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoDatabase;
import com.mongodb.client.model.Indexes;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
@ -12,10 +10,8 @@ import org.texttechnologylab.project.gruppe_05_1.database.MongoPprUtils;
import org.texttechnologylab.project.gruppe_05_1.domain.html.Parlamentarier;
import org.texttechnologylab.project.gruppe_05_1.domain.mdb.BiografischeAngaben;
import org.texttechnologylab.project.gruppe_05_1.domain.mdb.Mdb;
import org.texttechnologylab.project.gruppe_05_1.domain.mdb.MdbDocument;
import org.texttechnologylab.project.gruppe_05_1.domain.speaker.Membership;
import org.texttechnologylab.project.gruppe_05_1.domain.speaker.Speaker;
import org.texttechnologylab.project.gruppe_05_1.nlp.NlpUtils;
import org.texttechnologylab.project.gruppe_05_1.xml.FileObjectFactory;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
@ -34,8 +30,9 @@ import java.util.zip.ZipInputStream;
public abstract class PPRUtils {
public static final String PARTEILOS_KUERZEL = "Parteilos";
private static Set<String> processedProtocols = new HashSet<>();
private static Set<org.w3c.dom.Document> xmlProtocols = new HashSet<>();
private static final Set<String> processedProtocols = new HashSet<>();
private static final Set<org.w3c.dom.Document> xmlProtocols = new HashSet<>();
public static final Integer legislaturPeriode = 20;
@ -63,7 +60,8 @@ public abstract class PPRUtils {
* - Kommentare
* - etc.
* ... und in die Mongo-DB persistieren, falls noch nicht vorhanden sind.
* @param xmlFactory
* @param xmlFactory Factory für die XML-Objekte
* @param mongoFactory Factory für die MongoDB-Objekte
*/
public static void parlamentExplorerInit(FileObjectFactory xmlFactory, MongoObjectFactory mongoFactory) {
@ -93,7 +91,7 @@ public abstract class PPRUtils {
/**
* Fotos hochladen - TODO
* @param mongoFactory
* @param mongoFactory Factory für die MongoDB-Objekte
*/
public static void readPhotos(MongoObjectFactory mongoFactory) {
@ -101,8 +99,8 @@ public abstract class PPRUtils {
/**
* Reden und Kommentare einlesen - TODO
* @param xmlFactory
* @param mongoFactory
* @param xmlFactory Factory für die XML-Objekte
* @param mongoFactory Factory für die MongoDB-Objekte
*/
public static void readSpeechesAndComments(FileObjectFactory xmlFactory, MongoObjectFactory mongoFactory) {
@ -110,8 +108,8 @@ public abstract class PPRUtils {
/**
* Liest die MdBs aus der Bundestag-Seite und persistiere sie in die MongoDB
* @param mdbUrl
* @param xmlFactory
* @param mdbUrl URL der MDBs
* @param xmlFactory Factory für die XML-Objekte
*/
public static void readAndPersistMdbs(String mdbUrl, FileObjectFactory xmlFactory, MongoObjectFactory mongoFactory) {
org.w3c.dom.Document mdbRoot = getMdbFromRemoteXmlZipfile(mdbUrl);
@ -125,7 +123,7 @@ public abstract class PPRUtils {
Speaker speaker = xmlFactory.createSpeaker(mdbNode);
// System.out.println("Speaker " + speaker.getId() + " (" + speaker.getFirstName() + " " + speaker.getName() + ", " + speaker.getParty() + ")");
if (mdbActiveInWp(speaker, 20)) {
if (mdbActiveInWp(speaker, legislaturPeriode)) {
org.bson.Document speakerDoc = mongoFactory.createSpeaker(speaker);
MongoDBHandler.insertDocument(speakerCollection, speakerDoc);
}
@ -140,19 +138,19 @@ public abstract class PPRUtils {
private static boolean mdbActiveInWp(Speaker speaker, Integer legislaturPeriode) {
List<Integer> wps = speaker.getMemberships().stream()
.map(Membership::getWp)
.collect(Collectors.toList());
.toList();
return wps.contains(legislaturPeriode);
}
/**
* Liest die MDB aus einer Zip-Datei der bundestag,de-Seite
* @param zipUrl
* @return
* @param zipUrl URL der ZIP-Datei
* @return the MDB Document
*/
private static org.w3c.dom.Document getMdbFromRemoteXmlZipfile(String zipUrl) {
URL url = null;
InputStream urlInputStream = null;
URL url;
InputStream urlInputStream;
try {
url = new URL(zipUrl);
urlInputStream = url.openStream();
@ -196,9 +194,9 @@ public abstract class PPRUtils {
/**
* Helper method to save a stream for a later use.
* We use it to save the DTD and the XML files of the MDBs, which are within a zipfile on the bundestag.de site
* @param input
* @param output
* @throws IOException
* @param input the input stream
* @param output the output stream
* @throws IOException if an error occurs
*/
private static void copyStream(InputStream input, OutputStream output) throws IOException {
@ -212,7 +210,7 @@ public abstract class PPRUtils {
/**
* Eine Liste von Parlamentariern nach Namen (erst nach Nachnamen, dann nach Vornamen) sortieren (aufsteigend)
* @param mdbList
* @param mdbList Liste der Parlamentarier
*/
public static void sortParlamentarierByName(List<Parlamentarier> mdbList) {
@ -231,8 +229,8 @@ public abstract class PPRUtils {
/**
* Alle Parteien (aus einer Liste der MdBs) herausfinden.
* null-Einträge durch einen Platzhalter ersetzen, damit später keine null pointer exceptions auftretten
* @param mdbList
* @return
* @param mdbList Liste der MdBs
* @return Set der Parteien
*/
public static Set<String> getFraktionenFromMdbList(List<Mdb> mdbList) {
@ -259,8 +257,8 @@ public abstract class PPRUtils {
/**
* Eine Zuordnung zwischen Partei und deren Mitgliedern erzeugen. Ein Mitglied ist hier die MDB-Struktur
* @param mdbList
* @return
* @param mdbList Liste der MdBs
* @return Map mit Partei als Key und Liste der MdBs als Value
*/
public static Map<String, List<Mdb>> createMdbParteiZuordnung(List<Mdb> mdbList) {
@ -285,9 +283,9 @@ public abstract class PPRUtils {
/**
* Eine Zuordnung zwischen Partei und deren Mitgliedern erzeugen. Ein Mitglied wird hier durch seine ID erfaßt
* @param parteien
* @param mdbList
* @return
* @param parteien Set der Parteien
* @param mdbList Liste der MdBs
* @return Map mit Partei als Key und Liste der MdB-IDs als Value
*/
public static Map<String, List<String>> getMdbParteiZuordnung(Set<String> parteien, List<Mdb> mdbList) {
@ -314,8 +312,8 @@ public abstract class PPRUtils {
/**
* Eine Zuordnung zwischen MdB (repräsentiert durch seine ID) und seien biographischen Daten erzeugen.
* @param mdbList
* @return
* @param mdbList Liste der MdBs
* @return Map mit MdB-ID als Key und biographischen Daten als Value
*/
public static Map<String, BiografischeAngaben> getMdbParteiZuordnung(List<Mdb> mdbList) {
@ -330,7 +328,7 @@ public abstract class PPRUtils {
/**
* Eine Liste von MdBs nach Namen (erst nach Nachnamen, dann nach Vornamen) sortieren (aufsteigend)
* @param mdbList
* @param mdbList Liste der MdBs
*/
public static void sortMdbByName(List<Mdb> mdbList) {
@ -375,7 +373,7 @@ public abstract class PPRUtils {
xmlProtocols.add(xmlDoc);
} catch (Exception e) {
System.err.println("Fehler beim Verarbeiten der XML-Datei: " + xmlUrl);
e.printStackTrace();
System.err.println("Fehler: " + e.getLocalizedMessage());
}
}
@ -393,7 +391,7 @@ public abstract class PPRUtils {
}
} catch (IOException e) {
System.err.println("Fehler beim Laden der Seite: " + queryUrl);
e.printStackTrace();
System.err.println("Fehler: " + e.getLocalizedMessage());
break;
}
}

View file

@ -16,7 +16,6 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
public abstract class XmlUtils {

View file

@ -4,5 +4,5 @@ import org.w3c.dom.Node;
public interface XmlOperations {
FileObjectFactory factory = FileObjectFactory.getFactory();
public Object fromXmlNode(Node node);
Object fromXmlNode(Node node);
}

View file

@ -1,6 +1,5 @@
package org.texttechnologylab.project.gruppe_05_1.xml.mdb;
import org.texttechnologylab.project.gruppe_05_1.domain.Gender;
import org.texttechnologylab.project.gruppe_05_1.domain.mdb.BiografischeAngaben;
import org.texttechnologylab.project.gruppe_05_1.util.GeneralUtils;
import org.texttechnologylab.project.gruppe_05_1.xml.XmlOperations;

View file

@ -15,6 +15,7 @@ public class MdbDocument_File_Impl extends MdbDocument implements XmlOperations
MdbDocument doc = new MdbDocument_File_Impl();
Node versionNode = XmlUtils.getFirstChildByName(node, "VERSION");
assert versionNode != null;
doc.setVersion(versionNode.getFirstChild().getNodeValue());
List<Mdb> mdbs = new ArrayList<>();

View file

@ -15,9 +15,11 @@ public class Mdb_File_Impl extends Mdb implements XmlOperations {
public Mdb fromXmlNode(Node node) {
Mdb mdb = new Mdb_File_Impl();
Node idNode = XmlUtils.getFirstChildByName(node, "ID");
assert idNode != null;
mdb.setId(idNode.getFirstChild().getNodeValue());
Node namenNode = XmlUtils.getFirstChildByName(node, "NAMEN");
assert namenNode != null;
List<Node> nameNodeList = XmlUtils.getChildrenByName(namenNode, "NAME");
List<MdbName> mdbNameList = new ArrayList<>();
for (Node nameNode : nameNodeList) {
@ -31,6 +33,7 @@ public class Mdb_File_Impl extends Mdb implements XmlOperations {
Node wpenNode = XmlUtils.getFirstChildByName(node, "WAHLPERIODEN");
assert wpenNode != null;
List<Node> wpNodeList = XmlUtils.getChildrenByName(wpenNode, "WAHLPERIODE");
List<Wahlperiode> wpList = new ArrayList<>();
for (Node wpNode : wpNodeList) {

View file

@ -3,7 +3,6 @@ package org.texttechnologylab.project.gruppe_05_1.xml.speaker;
import org.texttechnologylab.project.gruppe_05_1.domain.mdb.*;
import org.texttechnologylab.project.gruppe_05_1.domain.speaker.Membership;
import org.texttechnologylab.project.gruppe_05_1.domain.speaker.Speaker;
import org.texttechnologylab.project.gruppe_05_1.util.GeneralUtils;
import org.texttechnologylab.project.gruppe_05_1.util.PPRUtils;
import org.texttechnologylab.project.gruppe_05_1.util.XmlUtils;
import org.texttechnologylab.project.gruppe_05_1.xml.XmlOperations;
@ -20,10 +19,12 @@ public class Speaker_File_Impl extends Speaker implements XmlOperations {
// ID
Node idNode = XmlUtils.getFirstChildByName(node, "ID");
assert idNode != null;
speaker.setId(idNode.getFirstChild().getNodeValue());
// Name: alle Namen lesen, nur den letzten berücksichtigen
Node namenNode = XmlUtils.getFirstChildByName(node, "NAMEN");
assert namenNode != null;
List<Node> nameNodeList = XmlUtils.getChildrenByName(namenNode, "NAME");
List<MdbName> mdbNameList = new ArrayList<>();
for (Node nameNode : nameNodeList) {
@ -58,6 +59,7 @@ public class Speaker_File_Impl extends Speaker implements XmlOperations {
// Memberships
Node wpenNode = XmlUtils.getFirstChildByName(node, "WAHLPERIODEN");
assert wpenNode != null;
List<Node> wpNodeList = XmlUtils.getChildrenByName(wpenNode, "WAHLPERIODE");
List<Wahlperiode> wpList = new ArrayList<>();
for (Node wpNode : wpNodeList) {

View file

@ -1,6 +1,5 @@
package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls;
import lombok.Getter;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Content;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Speaker;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType;

View file

@ -1,6 +1,5 @@
package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls;
import lombok.Getter;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Content;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Speech;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType;

View file

@ -1,8 +1,5 @@
package org.texttechnologylab.project.gruppe_05_1.xml.speeches;
import lombok.Getter;
import lombok.Setter;
import org.texttechnologylab.project.gruppe_05_1.database.MongoPprUtils;
import org.texttechnologylab.project.gruppe_05_1.util.PPRUtils;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.*;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.AgendaItem;
@ -29,7 +26,6 @@ public class SpeechParser {
private List<Speech> speeches;
private List<AgendaItem> agendaItems;
private Boolean parseLegislativePeriods;
public List<Speech> getSpeeches() {
return speeches;
@ -40,7 +36,6 @@ public class SpeechParser {
}
public void setParseLegislativePeriods(Boolean parseLegislativePeriods) {
this.parseLegislativePeriods = parseLegislativePeriods;
}
public List<Session> parseAllSessions() {
@ -58,7 +53,7 @@ public class SpeechParser {
tempFile.delete(); // Lösche die temporäre Datei nach der Verarbeitung
} catch (Exception e) {
System.err.println("Error parsing XML document.");
e.printStackTrace();
System.err.println(e.getMessage());
}
}
return sessions;