merge conflict reslove

This commit is contained in:
s5260822 2025-03-18 17:18:50 +01:00
parent 98d1d80fda
commit e6ef7adc6c
25 changed files with 1266 additions and 88 deletions

View file

@ -8,10 +8,18 @@ import org.texttechnologylab.project.gruppe_05_1.rest.RESTHandler;
import org.texttechnologylab.project.gruppe_05_1.util.Logger;
import org.texttechnologylab.project.gruppe_05_1.util.PPRUtils;
import org.texttechnologylab.project.gruppe_05_1.xml.FileObjectFactory;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.SpeechParser;
import org.w3c.dom.Document;
import java.util.Arrays;
import java.util.Set;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import static java.lang.Boolean.FALSE;
import static java.lang.Boolean.TRUE;
import static org.texttechnologylab.project.gruppe_05_1.util.PPRUtils.checkAndProcessNewProtocols;
public class Main {
public static boolean UPLOAD_MEMBER_PHOTOS;
@ -116,13 +124,33 @@ public class Main {
Logger.pink("Uploading Member Photos to DB...");
mongoDBHandler.uploadMemberPhotos();
}
mongoDBHandler.close();
try {
NlpUtils.runRemoteDriver();
} catch (Exception e) {
Logger.error("Error while running NLP remote driver");
Logger.error(e.getMessage());
}
NlpUtils.runRemoteDriver();
/*ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(1);
scheduler.scheduleAtFixedRate(() -> {
try {
NlpUtils.runRemoteDriver();
} catch (Exception e) {
Logger.error("Error while running NLP remote driver");
Logger.error(e.getMessage());
}
try {
Logger.info("Starte Aktualisierung der Protokolle...");
Set<Document> newProtocols = checkAndProcessNewProtocols(mongoDBHandler);
Logger.info("Neue Protokolle gefunden: " + newProtocols.size());
if (newProtocols.isEmpty()) {
Logger.info("Keine neuen Protokolle gefunden, Upload wird übersprungen.");
} else {
SpeechParser speechParser = new SpeechParser();
mongoDBHandler.insertSessions(speechParser.parseAllSessions(newProtocols));
mongoDBHandler.insertAgendaItems(speechParser.getAgendaItems());
mongoDBHandler.insertSpeeches(speechParser.getSpeeches());
Logger.info("Neuer Protokolle uploaded: " + newProtocols.size());
}
} catch (Exception ex) {
Logger.error("Fehler bei der Protokollaktualisierung: " + ex.getMessage());
}
}, 0, 10, TimeUnit.MINUTES);*/
RESTHandler restHandler = new RESTHandler();
restHandler.startJavalin();

View file

@ -3,6 +3,7 @@ package org.texttechnologylab.project.gruppe_05_1.database;
import com.mongodb.MongoClientSettings;
import com.mongodb.MongoCredential;
import com.mongodb.ServerAddress;
import com.mongodb.WriteConcern;
import com.mongodb.bulk.BulkWriteResult;
import com.mongodb.client.MongoClient;
import com.mongodb.client.MongoClients;
@ -691,7 +692,9 @@ public class MongoDBHandler {
public void bulkWriteNlpData(List<WriteModel<Document>> bulkOperations) {
if (!bulkOperations.isEmpty()) {
BulkWriteResult result = speechesCollection.bulkWrite(bulkOperations);
BulkWriteOptions options = new BulkWriteOptions().ordered(false);
// Optional: Setze einen weniger strengen Write Concern
BulkWriteResult result = speechesCollection.bulkWrite(bulkOperations, options);
int modifiedCount = result.getModifiedCount();
int matchedCount = result.getMatchedCount();
int upsertCount = result.getUpserts().size();
@ -764,6 +767,12 @@ public class MongoDBHandler {
}
}
public boolean sessionExists(String sessionNumber) {
Document filter = new Document("sessionNumber", sessionNumber);
long count = sessionsCollection.countDocuments(filter);
return count > 0;
}
public String getMemberPhoto(String memberId) {
Document photoDocument = memberPhotoCollection.find(eq("memberId", memberId)).first();
if (photoDocument == null) {

View file

@ -8,6 +8,7 @@ import org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches.Spe
import org.texttechnologylab.project.gruppe_05_1.domain.html.HtmlSpeech;
import org.texttechnologylab.project.gruppe_05_1.domain.html.Parlamentarier;
import org.texttechnologylab.project.gruppe_05_1.domain.html.ParlamentarierDetails;
import org.texttechnologylab.project.gruppe_05_1.domain.nlp.*;
import org.texttechnologylab.project.gruppe_05_1.domain.speaker.Membership;
import org.texttechnologylab.project.gruppe_05_1.domain.speech.SpeechMetaData;
import org.texttechnologylab.project.gruppe_05_1.util.GeneralUtils;
@ -162,6 +163,24 @@ public class MongoPprUtils {
return p;
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Parlamentarier - Picture
/**
*
* @param id : ID des Parlamentariers
* @return Das Foto (als Base64-encoded String)
*/
public static String getParlamentarierPictureByID(String id) {
Document doc = MongoDBHandler.findFirstDocumentInCollection(getPicturesCollection(), "memberId", id);
if (doc == null) {
return null;
} else return doc.getString("base64");
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - -
@ -362,7 +381,11 @@ public class MongoPprUtils {
// Sortiere nach Datum, absteigend
speechMetaDataList.sort((md1, md2) -> {
return md2.getDateTime().compareTo(md1.getDateTime());
try {
return md2.getDateTime().compareTo(md1.getDateTime());
} catch (NullPointerException e) {
return 0;
}
});
return speechMetaDataList;
@ -399,6 +422,14 @@ public class MongoPprUtils {
}
}
/**
* Liefert die Rede-Informationen für die Anzeige einer Rede:
* - die Rede-ID
* - Name und Fraktion des Redners
* - Die Inhalte der Rede
* @param key: Rede ID
* @return
*/
public static HtmlSpeech getSpeechByKey(String key) {
Document filter = new Document("speechKey", key);
Document speechDoc = getSpeechCollection().find(filter).first();

View file

@ -2,6 +2,9 @@ package org.texttechnologylab.project.gruppe_05_1.domain.html;
import org.bson.Document;
import org.texttechnologylab.project.gruppe_05_1.database.MongoDBHandler;
import org.texttechnologylab.project.gruppe_05_1.domain.nlp.NlpInfo;
import org.texttechnologylab.project.gruppe_05_1.domain.nlp.Token;
import org.texttechnologylab.project.gruppe_05_1.domain.nlp.Topic;
import java.util.ArrayList;
import java.util.List;
@ -13,6 +16,7 @@ public class HtmlSpeech {
String speakerName;
String fraction;
List<SpeechContent> content = new ArrayList<>();
NlpInfo nlp = null;
public HtmlSpeech() {
}
@ -30,6 +34,33 @@ public class HtmlSpeech {
addContent(new SpeechContent(contentDoc));
}
}
Document nlpDoc = (Document) doc.get("analysisResults");
nlp = readNlpInfo(nlpDoc);
}
private NlpInfo readNlpInfo(Document nlpDoc) {
if (nlpDoc == null) return null;
NlpInfo nlp = new NlpInfo();
// TODO: HERE
List<Document> tokensDocs = nlpDoc.get("tokens", MongoDBHandler.DOC_LIST_CLASS);
nlp.setTokens(Token.readTokensFromMongo(tokensDocs));
List<Document> sentencesDocs = nlpDoc.get("sentences", MongoDBHandler.DOC_LIST_CLASS);
List<Document> dependenciesDocs = nlpDoc.get("dependencies", MongoDBHandler.DOC_LIST_CLASS);
List<Document> namedEntitiesDocs = nlpDoc.get("namedEntities", MongoDBHandler.DOC_LIST_CLASS);
List<Document> sentimentsDocs = nlpDoc.get("sentiments", MongoDBHandler.DOC_LIST_CLASS);
List<Document> topicsDocs = nlpDoc.get("topics", MongoDBHandler.DOC_LIST_CLASS);
nlp.setTopics(Topic.readTopicsFromMongo(topicsDocs));
// TODO: Video
return nlp;
}
public String getSpeechKey() {
@ -68,16 +99,26 @@ public class HtmlSpeech {
content.add(contentLine);
}
public NlpInfo getNlp() {
return nlp;
}
public void setNlp(NlpInfo nlp) {
this.nlp = nlp;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof HtmlSpeech that)) return false;
return Objects.equals(speechKey, that.speechKey) && Objects.equals(speakerName, that.speakerName) && Objects.equals(fraction, that.fraction) && Objects.equals(content, that.content);
return Objects.equals(speechKey, that.speechKey) && Objects.equals(speakerName, that.speakerName)
&& Objects.equals(fraction, that.fraction) && Objects.equals(content, that.content)
&& Objects.equals(nlp, that.nlp);
}
@Override
public int hashCode() {
return Objects.hash(speechKey, speakerName, fraction, content);
return Objects.hash(speechKey, speakerName, fraction, content, nlp);
}
@Override
@ -87,6 +128,7 @@ public class HtmlSpeech {
.add("speakerName='" + speakerName + "'")
.add("fraction='" + fraction + "'")
.add("content=" + content)
.add("nlp=" + nlp)
.toString();
}
}

View file

@ -0,0 +1,87 @@
package org.texttechnologylab.project.gruppe_05_1.domain.nlp;
import java.util.Objects;
import java.util.StringJoiner;
public class AudioToken {
private int begin;
private int end;
private double timeStart;
private double timeEnd;
private String value;
public AudioToken() {
}
public AudioToken(int begin, int end, double timeStart, double timeEnd, String value) {
this.begin = begin;
this.end = end;
this.timeStart = timeStart;
this.timeEnd = timeEnd;
this.value = value;
}
public int getBegin() {
return begin;
}
public void setBegin(int begin) {
this.begin = begin;
}
public int getEnd() {
return end;
}
public void setEnd(int end) {
this.end = end;
}
public double getTimeStart() {
return timeStart;
}
public void setTimeStart(double timeStart) {
this.timeStart = timeStart;
}
public double getTimeEnd() {
return timeEnd;
}
public void setTimeEnd(double timeEnd) {
this.timeEnd = timeEnd;
}
public String getValue() {
return value;
}
public void setValue(String value) {
this.value = value;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof AudioToken that)) return false;
return begin == that.begin && end == that.end && Double.compare(timeStart, that.timeStart) == 0 && Double.compare(timeEnd, that.timeEnd) == 0 && Objects.equals(value, that.value);
}
@Override
public int hashCode() {
return Objects.hash(begin, end, timeStart, timeEnd, value);
}
@Override
public String toString() {
return new StringJoiner(", ", AudioToken.class.getSimpleName() + "[", "]")
.add("begin=" + begin)
.add("end=" + end)
.add("timeStart=" + timeStart)
.add("timeEnd=" + timeEnd)
.add("value='" + value + "'")
.toString();
}
}

View file

@ -0,0 +1,64 @@
package org.texttechnologylab.project.gruppe_05_1.domain.nlp;
import java.util.Objects;
import java.util.StringJoiner;
public class Dependency {
String type;
String governor;
String dependent;
public Dependency() {
}
public Dependency(String type, String governor, String dependent) {
this.type = type;
this.governor = governor;
this.dependent = dependent;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public String getGovernor() {
return governor;
}
public void setGovernor(String governor) {
this.governor = governor;
}
public String getDependent() {
return dependent;
}
public void setDependent(String dependent) {
this.dependent = dependent;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof Dependency that)) return false;
return Objects.equals(type, that.type) && Objects.equals(governor, that.governor) && Objects.equals(dependent, that.dependent);
}
@Override
public int hashCode() {
return Objects.hash(type, governor, dependent);
}
@Override
public String toString() {
return new StringJoiner(", ", Dependency.class.getSimpleName() + "[", "]")
.add("type='" + type + "'")
.add("governor='" + governor + "'")
.add("dependent='" + dependent + "'")
.toString();
}
}

View file

@ -0,0 +1,55 @@
package org.texttechnologylab.project.gruppe_05_1.domain.nlp;
import java.util.Objects;
import java.util.StringJoiner;
public class NamedEntity {
String type; // PER, LOC etc.
// int begin; // TODO: momentan nicht in MongoDB
// int end; // TODO: momentan nicht in MongoDB
String text;
public NamedEntity() {
}
public NamedEntity(String type, String text) {
this.type = type;
this.text = text;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public String getText() {
return text;
}
public void setText(String text) {
this.text = text;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof NamedEntity that)) return false;
return Objects.equals(type, that.type) && Objects.equals(text, that.text);
}
@Override
public int hashCode() {
return Objects.hash(type, text);
}
@Override
public String toString() {
return new StringJoiner(", ", NamedEntity.class.getSimpleName() + "[", "]")
.add("type='" + type + "'")
.add("text='" + text + "'")
.toString();
}
}

View file

@ -0,0 +1,123 @@
package org.texttechnologylab.project.gruppe_05_1.domain.nlp;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.StringJoiner;
public class NlpInfo {
List<Token> tokens;
List<Sentence> sentences;
List<Dependency> dependencies;
List<NamedEntity> namedEntities;
Sentiment overallSentiment; // Sentiment for the whole text ; kann null sein!
List<Sentiment> sentiments; // sentiments for the respective sentences (eine Liste von 0..n Elementen)
List<Topic> topics;
List<Pos> posList;
VideoInformation videoInformation;
public List<Token> getTokens() {
return tokens;
}
public void setTokens(List<Token> tokens) {
this.tokens = tokens;
}
public List<Sentence> getSentences() {
return sentences;
}
public void setSentences(List<Sentence> sentences) {
this.sentences = sentences;
}
public List<Dependency> getDependencies() {
return dependencies;
}
public void setDependencies(List<Dependency> dependencies) {
this.dependencies = dependencies;
}
public List<NamedEntity> getNamedEntities() {
return namedEntities;
}
public void setNamedEntities(List<NamedEntity> namedEntities) {
this.namedEntities = namedEntities;
}
public Sentiment getOverallSentiment() {
return overallSentiment;
}
public void setOverallSentiment(Sentiment overallSentiment) {
this.overallSentiment = overallSentiment;
}
public List<Sentiment> getSentiments() {
return sentiments;
}
public void setSentiments(List<Sentiment> sentiments) {
this.sentiments = sentiments;
}
public List<Topic> getTopics() {
return topics;
}
public void setTopics(List<Topic> topics) {
this.topics = topics;
}
public List<Pos> getPosList() {
return posList;
}
public void setPosList(List<Pos> posList) {
this.posList = posList;
}
public VideoInformation getVideoInformation() {
return videoInformation;
}
public void setVideoInformation(VideoInformation videoInformation) {
this.videoInformation = videoInformation;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof NlpInfo nlpInfo)) return false;
return Objects.equals(tokens, nlpInfo.tokens) && Objects.equals(sentences, nlpInfo.sentences)
&& Objects.equals(dependencies, nlpInfo.dependencies) && Objects.equals(namedEntities, nlpInfo.namedEntities)
&& Objects.equals(overallSentiment, nlpInfo.overallSentiment) && Objects.equals(sentiments, nlpInfo.sentiments)
&& Objects.equals(topics, nlpInfo.topics) && Objects.equals(posList, nlpInfo.posList)
&& Objects.equals(videoInformation, nlpInfo.videoInformation);
}
@Override
public int hashCode() {
return Objects.hash(tokens, sentences, dependencies, namedEntities, overallSentiment, sentiments, topics, posList, videoInformation);
}
@Override
public String toString() {
return new StringJoiner(", ", NlpInfo.class.getSimpleName() + "[", "]")
.add("tokens=" + tokens)
.add("sentences=" + sentences)
.add("dependencies=" + dependencies)
.add("namedEntities=" + namedEntities)
.add("overallSentiment=" + overallSentiment)
.add("sentiments=" + sentiments)
.add("topics=" + topics)
.add("posList=" + posList)
.add("videoInformation=" + videoInformation)
.toString();
}
}

View file

@ -0,0 +1,119 @@
package org.texttechnologylab.project.gruppe_05_1.domain.nlp;
import java.util.Objects;
import java.util.StringJoiner;
public class Pos {
String posValue; // ART, NN...
String coarseValue; // PROPN...
int begin;
int end;
String coveredText;
// Am Dateiende stehen beispiele für mögliche Werte
public Pos() {
}
public Pos(String posValue, String coarseValue, int begin, int end, String coveredText) {
this.posValue = posValue;
this.coarseValue = coarseValue;
this.begin = begin;
this.end = end;
this.coveredText = coveredText;
}
public String getPosValue() {
return posValue;
}
public void setPosValue(String posValue) {
this.posValue = posValue;
}
public String getCoarseValue() {
return coarseValue;
}
public void setCoarseValue(String coarseValue) {
this.coarseValue = coarseValue;
}
public int getBegin() {
return begin;
}
public void setBegin(int begin) {
this.begin = begin;
}
public int getEnd() {
return end;
}
public void setEnd(int end) {
this.end = end;
}
public String getCoveredText() {
return coveredText;
}
public void setCoveredText(String coveredText) {
this.coveredText = coveredText;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof Pos pos)) return false;
return begin == pos.begin && end == pos.end && Objects.equals(posValue, pos.posValue) && Objects.equals(coarseValue, pos.coarseValue) && Objects.equals(coveredText, pos.coveredText);
}
@Override
public int hashCode() {
return Objects.hash(posValue, coarseValue, begin, end, coveredText);
}
@Override
public String toString() {
return new StringJoiner(", ", Pos.class.getSimpleName() + "[", "]")
.add("posValue='" + posValue + "'")
.add("coarseValue='" + coarseValue + "'")
.add("begin=" + begin)
.add("end=" + end)
.add("coveredText='" + coveredText + "'")
.toString();
}
/* Beispielswerte:
MyPos{posValue='ART', coarseValue='DET', begin=0, end=3, coveredText='Die'},
MyPos{posValue='NN', coarseValue='NOUN', begin=4, end=8, coveredText='Idee'},
MyPos{posValue='APPR', coarseValue='ADP', begin=9, end=12, coveredText='von'},
MyPos{posValue='NE', coarseValue='PROPN', begin=13, end=16, coveredText='Joe'},
MyPos{posValue='NN', coarseValue='PROPN', begin=17, end=22, coveredText='Biden'},
MyPos{posValue='APPR', coarseValue='ADP', begin=23, end=26, coveredText='aus'},
MyPos{posValue='NE', coarseValue='PROPN', begin=27, end=36, coveredText='Bucharest'},
MyPos{posValue='$,', coarseValue='PUNCT', begin=36, end=37, coveredText=','},
MyPos{posValue='NE', coarseValue='PROPN', begin=38, end=46, coveredText='Rumänien'},
MyPos{posValue='$,', coarseValue='PUNCT', begin=46, end=47, coveredText=','},
MyPos{posValue='VVFIN', coarseValue='VERB', begin=48, end=53, coveredText='finde'},
MyPos{posValue='PPER', coarseValue='PRON', begin=54, end=57, coveredText='ich'},
MyPos{posValue='ADJD', coarseValue='ADV', begin=58, end=61, coveredText='gut'},
MyPos{posValue='$.', coarseValue='PUNCT', begin=61, end=62, coveredText='.'},
MyPos{posValue='ART', coarseValue='DET', begin=63, end=66, coveredText='Den'},
MyPos{posValue='NN', coarseValue='NOUN', begin=67, end=76, coveredText='Vorschlag'},
MyPos{posValue='APPR', coarseValue='ADP', begin=77, end=80, coveredText='von'},
MyPos{posValue='NE', coarseValue='PROPN', begin=81, end=87, coveredText='Donald'},
MyPos{posValue='NE', coarseValue='PROPN', begin=88, end=93, coveredText='Trump'},
MyPos{posValue='APPR', coarseValue='ADP', begin=94, end=97, coveredText='aus'},
MyPos{posValue='NE', coarseValue='PROPN', begin=98, end=108, coveredText='Frankreich'},
MyPos{posValue='VVFIN', coarseValue='VERB', begin=109, end=114, coveredText='finde'},
MyPos{posValue='PPER', coarseValue='PRON', begin=115, end=118, coveredText='ich'},
MyPos{posValue='ADV', coarseValue='ADV', begin=119, end=126, coveredText='weniger'},
MyPos{posValue='ADJD', coarseValue='ADV', begin=127, end=130, coveredText='gut'},
MyPos{posValue='$.', coarseValue='PUNCT', begin=130, end=131, coveredText='.'}],
*/
}

View file

@ -0,0 +1,44 @@
package org.texttechnologylab.project.gruppe_05_1.domain.nlp;
import java.util.Objects;
import java.util.StringJoiner;
public class Sentence {
// int begin; // TODO: momentan nicht in MongoDB
// int end; // TODO: momentan nicht in MongoDB
String text;
public Sentence() {
}
public Sentence(String text) {
this.text = text;
}
public String getText() {
return text;
}
public void setText(String text) {
this.text = text;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof Sentence sentence)) return false;
return Objects.equals(text, sentence.text);
}
@Override
public int hashCode() {
return Objects.hash(text);
}
@Override
public String toString() {
return new StringJoiner(", ", Sentence.class.getSimpleName() + "[", "]")
.add("text='" + text + "'")
.toString();
}
}

View file

@ -0,0 +1,97 @@
package org.texttechnologylab.project.gruppe_05_1.domain.nlp;
import java.util.Objects;
import java.util.StringJoiner;
public class Sentiment {
int begin;
int end;
double sentiment; // overall sentiment
double negative;
double neutral;
double positive;
public Sentiment() {
}
public Sentiment(int begin, int end, double sentiment, double negative, double neutral, double positive) {
this.begin = begin;
this.end = end;
this.sentiment = sentiment;
this.negative = negative;
this.neutral = neutral;
this.positive = positive;
}
public int getBegin() {
return begin;
}
public void setBegin(int begin) {
this.begin = begin;
}
public int getEnd() {
return end;
}
public void setEnd(int end) {
this.end = end;
}
public double getSentiment() {
return sentiment;
}
public void setSentiment(double sentiment) {
this.sentiment = sentiment;
}
public double getNegative() {
return negative;
}
public void setNegative(double negative) {
this.negative = negative;
}
public double getNeutral() {
return neutral;
}
public void setNeutral(double neutral) {
this.neutral = neutral;
}
public double getPositive() {
return positive;
}
public void setPositive(double positive) {
this.positive = positive;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof Sentiment sentiment1)) return false;
return begin == sentiment1.begin && end == sentiment1.end && Double.compare(sentiment, sentiment1.sentiment) == 0 && Double.compare(negative, sentiment1.negative) == 0 && Double.compare(neutral, sentiment1.neutral) == 0 && Double.compare(positive, sentiment1.positive) == 0;
}
@Override
public int hashCode() {
return Objects.hash(begin, end, sentiment, negative, neutral, positive);
}
@Override
public String toString() {
return new StringJoiner(", ", Sentiment.class.getSimpleName() + "[", "]")
.add("begin=" + begin)
.add("end=" + end)
.add("sentiment=" + sentiment)
.add("negative=" + negative)
.add("neutral=" + neutral)
.add("positive=" + positive)
.toString();
}
}

View file

@ -0,0 +1,96 @@
package org.texttechnologylab.project.gruppe_05_1.domain.nlp;
import org.bson.Document;
import java.util.*;
public class Token {
String text;
String pos;
String lemma;
public Token() {
}
public Token(String text, String pos, String lemma) {
this.text = text;
this.pos = pos;
this.lemma = lemma;
}
public String getText() {
return text;
}
public void setText(String text) {
this.text = text;
}
public String getPos() {
return pos;
}
public void setPos(String pos) {
this.pos = pos;
}
public String getLemma() {
return lemma;
}
public void setLemma(String lemma) {
this.lemma = lemma;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof Token token)) return false;
return Objects.equals(text, token.text) && Objects.equals(pos, token.pos) && Objects.equals(lemma, token.lemma);
}
@Override
public int hashCode() {
return Objects.hash(text, pos, lemma);
}
@Override
public String toString() {
return new StringJoiner(", ", Token.class.getSimpleName() + "[", "]")
.add("text='" + text + "'")
.add("pos='" + pos + "'")
.add("lemma='" + lemma + "'")
.toString();
}
/**
* Die Token-Dokumente (Speech --> analysisResults --> token) aus der MongoDB lesen
* @param tokenDocs Eine Liste von Mongo-Dokumenten
* @return Eine Liste der Token
*/
public static List<Token> readTokensFromMongo(List<Document> tokenDocs) {
List<Token> tokens = new ArrayList<>();
for (Document doc : tokenDocs) {
tokens.add(new Token(doc.getString("text"),
doc.getString("pos"),
doc.getString("lemma")
));
}
return tokens;
}
/**
* Zählt alle verschiedenen POS Vorkommen auf
* @param tokenList
* @return Jede POS art mit ihrer Anzahl an Vorkommen
*/
public static Map<String, Integer> countPOS(List<Token> tokenList) {
Map<String, Integer> posCounts = new HashMap<>();
for (Token token : tokenList) {
posCounts.put(token.getPos(), posCounts.getOrDefault(token.getPos(), 0) + 1);
}
return posCounts;
}
}

View file

@ -0,0 +1,107 @@
package org.texttechnologylab.project.gruppe_05_1.domain.nlp;
import org.bson.Document;
import java.util.*;
import java.util.stream.Collectors;
public class Topic {
String topic;
Double score;
String text;
public Topic() {
}
public Topic(String topic, Double score, String text) {
this.topic = topic;
this.score = score;
this.text = text;
}
public String getTopic() {
return topic;
}
public void setTopic(String topic) {
this.topic = topic;
}
public Double getScore() {
return score;
}
public void setScore(Double score) {
this.score = score;
}
public String getText() {
return text;
}
public void setText(String text) {
this.text = text;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof Topic topic1)) return false;
return Double.compare(score, topic1.score) == 0 && Objects.equals(topic, topic1.topic) && Objects.equals(text, topic1.text);
}
@Override
public int hashCode() {
return Objects.hash(topic, score, text);
}
@Override
public String toString() {
return new StringJoiner(", ", Topic.class.getSimpleName() + "[", "]")
.add("topic='" + topic + "'")
.add("score=" + score)
.add("text='" + text + "'")
.toString();
}
/**
* Die Topics-Dokumente (Speech --> analysisResults --> topics) aus der MongoDB lesen
* @param topicsDocs Eine Liste von Mongo-Dokumenten
* @return Eine Liste der Topics
*/
public static List<Topic> readTopicsFromMongo(List<Document> topicsDocs) {
List<Topic> topics = new ArrayList<>();
for (Document doc : topicsDocs) {
topics.add(new Topic(doc.getString("topic"),
doc.getDouble("score"),
doc.getString("text")
));
}
return topics;
}
/**
* Topic-Informationen "verdichten":
* Ausgangssituation: eine Liste mit mehreren Topics. Ein Topic kann in dieser Liste mehrfach vorkommen.
* Man will wissen, welche Score hat jeden Topic. Hier werden die Werte der jeweiligen Topics summiert.
*
* @param topicsList
* @return Map<String, Double>
*/
public static Map<String, Double> condenseTopicInformation(List<Topic> topicsList) {
Map<String, Double> condensedTopicInfo = new HashMap<>();
for (Topic t : topicsList) {
Double oldValue = condensedTopicInfo.get(t.getTopic());
if (oldValue != null) {
condensedTopicInfo.replace(t.getTopic(), oldValue + t.getScore());
} else {
condensedTopicInfo.put(t.getTopic(), t.getScore());
}
}
return condensedTopicInfo;
}
}

View file

@ -0,0 +1,43 @@
package org.texttechnologylab.project.gruppe_05_1.domain.nlp;
import java.util.List;
import java.util.Objects;
import java.util.StringJoiner;
public class VideoInformation {
List<AudioToken> audioTokens;
public VideoInformation() {
}
public VideoInformation(List<AudioToken> audioTokens) {
this.audioTokens = audioTokens;
}
public List<AudioToken> getAudioTokens() {
return audioTokens;
}
public void setAudioTokens(List<AudioToken> audioTokens) {
this.audioTokens = audioTokens;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof VideoInformation that)) return false;
return Objects.equals(audioTokens, that.audioTokens);
}
@Override
public int hashCode() {
return Objects.hash(audioTokens);
}
@Override
public String toString() {
return new StringJoiner(", ", VideoInformation.class.getSimpleName() + "[", "]")
.add("audioTokens=" + audioTokens)
.toString();
}
}

View file

@ -0,0 +1,112 @@
package org.texttechnologylab.project.gruppe_05_1.domain.nlp.html;
import java.util.Objects;
/**
* Diese Klasse ordnet das entspreche Sentiment zu einem Satz zu.
* Sie ist ein Datencontainer für die Darstellung über FreeMarker
*/
public class SentimentOfSentence {
int begin;
int end;
String text;
// RGBA Werte für die Darstellung
float sentiment; // overall sentiment --> wird für den alpha (Opaque) Wert verwendet --> 0..1
int negative; // red --> 0..255
int neutral; // 0..255, wird momentan nicht benutzt
int positive; // green --> 0..255
public SentimentOfSentence() {}
public SentimentOfSentence(int begin, int end, String text, float sentiment, int negative, int neutral, int positive) {
this.begin = begin;
this.end = end;
this.text = text;
this.sentiment = sentiment;
this.negative = negative;
this.neutral = neutral;
this.positive = positive;
}
public int getBegin() {
return begin;
}
public void setBegin(int begin) {
this.begin = begin;
}
public int getEnd() {
return end;
}
public void setEnd(int end) {
this.end = end;
}
public String getText() {
return text;
}
public void setText(String text) {
this.text = text;
}
public float getSentiment() {
return sentiment;
}
public void setSentiment(float sentiment) {
this.sentiment = sentiment;
}
public int getNegative() {
return negative;
}
public void setNegative(int negative) {
this.negative = negative;
}
public int getNeutral() {
return neutral;
}
public void setNeutral(int neutral) {
this.neutral = neutral;
}
public int getPositive() {
return positive;
}
public void setPositive(int positive) {
this.positive = positive;
}
@Override
public boolean equals(Object o) {
if (!(o instanceof SentimentOfSentence that)) return false;
return begin == that.begin && end == that.end && Double.compare(sentiment, that.sentiment) == 0 && Double.compare(negative, that.negative) == 0 && Double.compare(neutral, that.neutral) == 0 && Double.compare(positive, that.positive) == 0 && Objects.equals(text, that.text);
}
@Override
public int hashCode() {
return Objects.hash(begin, end, text, sentiment, negative, neutral, positive);
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("SentimentOfSentence{");
sb.append("begin=").append(begin);
sb.append(", end=").append(end);
sb.append(", text='").append(text).append('\'');
sb.append(", sentiment=").append(sentiment);
sb.append(", negative=").append(negative);
sb.append(", neutral=").append(neutral);
sb.append(", positive=").append(positive);
sb.append('}');
return sb.toString();
}
}

View file

@ -360,6 +360,8 @@ public class NlpUtils {
bulkOperations.add(new UpdateOneModel<>(updateFilter, update));
}
if (!bulkOperations.isEmpty()) {
System.out.println("Processing of " + bulkOperations.size() + " documents finished");
System.out.println("uploading...");
mongoDBHandler.bulkWriteNlpData(bulkOperations);
Logger.debug("Bulk write completed for " + bulkOperations.size() + " documents.");
mongoDBHandler.close();

View file

@ -1,15 +1,16 @@
package org.texttechnologylab.project.gruppe_05_1.nlp;
import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoDatabase;
import com.mongodb.client.model.Filters;
import com.mongodb.client.model.UpdateOneModel;
import com.mongodb.client.model.WriteModel;
import com.mongodb.client.result.UpdateResult;
import org.apache.uima.fit.util.JCasUtil;
import org.bson.Document;
import java.io.*;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.zip.*;
import java.util.*;
import java.util.stream.Collectors;
@ -18,12 +19,7 @@ import org.bson.conversions.Bson;
import org.apache.uima.fit.factory.JCasFactory;
import org.apache.uima.jcas.JCas;
import org.apache.uima.cas.impl.XmiCasDeserializer;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency;
import de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity;
import org.texttechnologylab.project.gruppe_05_1.database.MongoDBHandler;
import org.hucompute.textimager.uima.type.category.CategoryCoveredTagged;
import org.texttechnologylab.project.gruppe_05_1.util.Logger;
import static org.texttechnologylab.project.gruppe_05_1.Main.JCAS_SPEECHES_TYPESYSTEM_DIR;
@ -31,29 +27,45 @@ import static org.texttechnologylab.project.gruppe_05_1.Main.JCAS_SPEECHES_TYPES
public class XmiExtractor {
private List<WriteModel<Document>> bulkOperations;
private MongoDBHandler mongoDBHandler;
private final List<WriteModel<Document>> bulkOperations = Collections.synchronizedList(new ArrayList<>());
private final MongoDBHandler mongoDBHandler;
private static final int BATCH_SIZE = 1000;
private int processedCount = 0;
private static final AtomicInteger processedCount = new AtomicInteger(0);
public XmiExtractor() {
mongoDBHandler = new MongoDBHandler();
this.bulkOperations = new ArrayList<>();
}
public void extractAndUploadXmiData() throws IOException {
InputStream resourceStream = getClass().getClassLoader().getResourceAsStream("speeches/20.zip");
if (resourceStream == null) {
throw new IOException("20.zip nicht gefunden im Ressourcenordner /speeches");
}
ExecutorService executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
List<Future<?>> futures = new ArrayList<>();
try (ZipInputStream zis = new ZipInputStream(resourceStream)) {
ZipEntry entry;
while ((entry = zis.getNextEntry()) != null) {
if (entry.getName().endsWith(".xmi.gz")) {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
byte[] buffer = new byte[1024];
int len;
while ((len = zis.read(buffer)) > 0) {
baos.write(buffer, 0, len);
File tempFile = File.createTempFile("xmi_entry_", ".xmi.gz");
try (FileOutputStream fos = new FileOutputStream(tempFile)) {
byte[] buffer = new byte[1024];
int len;
while ((len = zis.read(buffer)) > 0) {
fos.write(buffer, 0, len);
}
}
byte[] entryData = baos.toByteArray();
processXmiGzStream(new ByteArrayInputStream(entryData), entry.getName());
ZipEntry finalEntry = entry;
Future<?> future = executor.submit(() -> {
try (FileInputStream fis = new FileInputStream(tempFile)) {
processXmiGzStream(fis, finalEntry.getName());
} catch (IOException e) {
e.printStackTrace();
} finally {
tempFile.delete();
}
});
futures.add(future);
}
zis.closeEntry();
}
@ -61,7 +73,16 @@ public class XmiExtractor {
Logger.error("Error reading XMI data from ZIP file.");
Logger.error(e.getMessage());
}
flushBatch();
for (Future<?> future : futures) {
try {
future.get();
} catch (Exception e) {
e.printStackTrace();
}
}
executor.shutdown();
flushBatch(); // Synchronously upload the remaining batch
mongoDBHandler.close();
}
private void processXmiGzStream(InputStream inputStream, String filename) {
@ -70,12 +91,10 @@ public class XmiExtractor {
jCas = JCasFactory.createJCas(JCAS_SPEECHES_TYPESYSTEM_DIR);
XmiCasDeserializer.deserialize(gis, jCas.getCas(), true);
// Build structured analysisResults Document
Document analysisResults = new Document();
// Tokens: Include POS, Lemma, etc.
List<Document> tokens = new ArrayList<>();
for (Token token : JCasUtil.select(jCas, Token.class)) {
for (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token token : JCasUtil.select(jCas, de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token.class)) {
Document tokenDoc = new Document()
.append("text", token.getCoveredText())
.append("pos", token.getPos().getPosValue())
@ -84,15 +103,13 @@ public class XmiExtractor {
}
analysisResults.append("tokens", tokens);
// Sentences
List<String> sentences = JCasUtil.select(jCas, Sentence.class).stream()
.map(Sentence::getCoveredText)
List<String> sentences = JCasUtil.select(jCas, de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence.class).stream()
.map(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence::getCoveredText)
.collect(Collectors.toList());
analysisResults.append("sentences", sentences);
// Dependencies
List<Document> dependencies = new ArrayList<>();
for (Dependency dep : JCasUtil.select(jCas, Dependency.class)) {
for (de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency dep : JCasUtil.select(jCas, de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency.class)) {
Document depDoc = new Document()
.append("type", dep.getDependencyType())
.append("governor", dep.getGovernor().getCoveredText())
@ -101,9 +118,8 @@ public class XmiExtractor {
}
analysisResults.append("dependencies", dependencies);
// Named Entities
List<Document> namedEntities = new ArrayList<>();
for (NamedEntity ne : JCasUtil.select(jCas, NamedEntity.class)) {
for (de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity ne : JCasUtil.select(jCas, de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity.class)) {
Document neDoc = new Document()
.append("text", ne.getCoveredText())
.append("type", ne.getValue());
@ -111,23 +127,16 @@ public class XmiExtractor {
}
analysisResults.append("namedEntities", namedEntities);
// Sentiment
List<Document> sentiments = new ArrayList<>();
for (org.hucompute.textimager.uima.type.Sentiment sentiment :
JCasUtil.select(jCas, org.hucompute.textimager.uima.type.Sentiment.class)) {
for (org.hucompute.textimager.uima.type.Sentiment sentiment : JCasUtil.select(jCas, org.hucompute.textimager.uima.type.Sentiment.class)) {
Document sentimentDoc = new Document()
.append("begin", sentiment.getBegin())
.append("end", sentiment.getEnd())
.append("score", sentiment.getSentiment())
.append("subjectivity", sentiment.getSubjectivity());
// Check for VaderSentiment subtype
if (sentiment instanceof org.hucompute.textimager.uima.type.VaderSentiment) {
org.hucompute.textimager.uima.type.VaderSentiment vader =
(org.hucompute.textimager.uima.type.VaderSentiment) sentiment;
sentimentDoc
.append("pos", vader.getPos())
org.hucompute.textimager.uima.type.VaderSentiment vader = (org.hucompute.textimager.uima.type.VaderSentiment) sentiment;
sentimentDoc.append("pos", vader.getPos())
.append("neu", vader.getNeu())
.append("neg", vader.getNeg());
}
@ -136,39 +145,36 @@ public class XmiExtractor {
analysisResults.append("sentiments", sentiments);
List<Document> topics = new ArrayList<>();
for (CategoryCoveredTagged topic : JCasUtil.select(jCas, CategoryCoveredTagged.class)) {
for (org.hucompute.textimager.uima.type.category.CategoryCoveredTagged topic : JCasUtil.select(jCas, org.hucompute.textimager.uima.type.category.CategoryCoveredTagged.class)) {
Document topicDoc = new Document()
.append("topic", topic.getValue())
.append("score", topic.getScore())
.append("tags", topic.getTags())
.append("text", topic.getCoveredText());
topics.add(topicDoc);
}
topics.sort((d1, d2) -> Double.compare(d2.getDouble("score"), d1.getDouble("score")));
analysisResults.append("topics", topics);
// Upload structured Document to MongoDB
String speechKey = extractSpeechKeyFromFilename(filename);
if (speechKey != null) {
Bson filter = Filters.eq("speechKey", speechKey);
Bson update = new Document("$set", new Document("analysisResults", analysisResults));
UpdateOneModel<Document> updateModel = new UpdateOneModel<>(filter, update);
bulkOperations.add(updateModel);
if (bulkOperations.size() >= BATCH_SIZE) {
flushBatch();
synchronized (bulkOperations) {
if (bulkOperations.size() >= BATCH_SIZE) {
Logger.info("BATCH_SIZE to Upload: " + bulkOperations.size());
flushBatch();
}
}
processedCount++;
if (processedCount % 5000 == 0) {
Logger.info("Processed speeches: " + processedCount);
int count = processedCount.incrementAndGet();
if (count % 1000 == 0) {
Logger.info("Processed speeches: " + count);
}
}
} catch (Exception e) {
e.printStackTrace();
}
finally {
} finally {
if (jCas != null) {
jCas.reset();
}
@ -180,23 +186,11 @@ public class XmiExtractor {
return baseName.replace("20/", "");
}
private void flushBatch() {
private synchronized void flushBatch() {
if (!bulkOperations.isEmpty()) {
mongoDBHandler.bulkWriteNlpData(bulkOperations);
bulkOperations.clear();
}
}
/*
public static void main(String[] args) {
try {
XmiExtractor extractor = new XmiExtractor(database);
extractor.extractAndUploadXmiData();
System.out.println("Processing complete.");
} catch (Exception e) {
e.printStackTrace();
}
} */
}

View file

@ -89,6 +89,7 @@ public class ParlamentarierController {
String id = ctx.pathParam("id");
Logger.info("getParlamentarierDetails, ID = " + id);
// Alle Details des Abgeordnetes (Vor- und Nachname, Geburts- und Sterbeort, Partei, Vita etc.)
ParlamentarierDetails pd = MongoPprUtils.getParlamentarierDetailsByID(id);
Map<String, Object> attributes = new HashMap<>();
@ -101,6 +102,10 @@ public class ParlamentarierController {
attributes.put("speechesPlaceholder", emptyList);
}
// Foto des Abgeordnetes
String picture = MongoPprUtils.getParlamentarierPictureByID(id);
attributes.put("pic", picture);
ctx.render("parlamentarierDetails.ftl", attributes);
}

View file

@ -5,12 +5,16 @@ import io.javalin.openapi.*;
import org.texttechnologylab.project.gruppe_05_1.database.MongoPprUtils;
import org.texttechnologylab.project.gruppe_05_1.domain.html.HtmlSpeech;
import org.texttechnologylab.project.gruppe_05_1.domain.html.ParlamentarierDetails;
import org.texttechnologylab.project.gruppe_05_1.domain.nlp.Token;
import org.texttechnologylab.project.gruppe_05_1.domain.nlp.Topic;
import org.texttechnologylab.project.gruppe_05_1.domain.speech.SpeechMetaData;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Speech;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
public class SpeechController {
/**
@ -68,6 +72,38 @@ public class SpeechController {
HtmlSpeech speech = MongoPprUtils.getSpeechByKey(redeId);
attributes.put("s", speech);
// NLP: Topic
if ((speech.getNlp() != null) && (speech.getNlp().getTopics() != null)) {
Map<String, Double> topics = Topic.condenseTopicInformation(speech.getNlp().getTopics()); // Daten "verdichten"...
// ... und ersetzen
speech.getNlp().setTopics(
topics.entrySet().stream()
.map(me -> new Topic(me.getKey(), me.getValue(), null))
.collect(Collectors.toList()));
}
// NLP: POS
if (speech.getNlp() != null && speech.getNlp().getTokens() != null) {
List<Token> tokens = speech.getNlp().getTokens();
Map<String, Integer> posCounts = Token.countPOS(tokens);
List<Token> posList = posCounts.entrySet().stream()
.map(entry -> new Token(entry.getKey(), String.valueOf(entry.getValue()), "")) // Lemma remains empty
.collect(Collectors.toList());
System.out.println("DEBUG: Sending POS List to NLP - " + posList);
speech.getNlp().setPosList((List) posList);
} else {
System.out.println("DEBUG: POS List is EMPTY");
speech.getNlp().setPosList((List) new ArrayList<Token>()); // Ensure it's never null
}
// TODO: Token wird momentan etwas komisch abgespeichert, da im Attribut text die POS art steht, und in pos die Anzahl dieser POS arten. Umstrukturieren damit keine Verwirrung herrscht
ctx.render("speech.ftl", attributes);
}

View file

@ -23,6 +23,8 @@ import java.io.*;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
@ -423,4 +425,61 @@ public abstract class PPRUtils {
return fileNames;
}
public static Set<org.w3c.dom.Document> checkAndProcessNewProtocols(MongoDBHandler mongoDBHandler) {
Set<org.w3c.dom.Document> newProtocols = new HashSet<>();
int offset = 0;
int limit = 10;
boolean hasMore = true;
Pattern sessionPattern = Pattern.compile("Plenarprotokoll der (\\d+)\\. Sitzung");
while (hasMore) {
String queryUrl = "https://www.bundestag.de/ajax/filterlist/de/services/opendata/866354-866354?limit="
+ limit + "&noFilterSet=true&offset=" + offset;
try {
org.jsoup.nodes.Document htmlDoc = Jsoup.connect(queryUrl).get();
Elements sessionLinks = htmlDoc.select("a.bt-link-dokument");
if (sessionLinks.isEmpty()) break;
for (org.jsoup.nodes.Element link : sessionLinks) {
String xmlUrl = link.attr("href");
String fileName = xmlUrl.substring(xmlUrl.lastIndexOf('/') + 1); // "20212.xml"
// Entferne die Dateiendung
String sessionNumberFull = fileName.replace(".xml", ""); // z.B. "20212"
String sessionNumber;
if (sessionNumberFull.startsWith("20") && sessionNumberFull.length() > 2) {
sessionNumber = sessionNumberFull.substring(2);
} else {
sessionNumber = sessionNumberFull;
}
if (!mongoDBHandler.sessionExists(sessionNumber)) {
try {
org.w3c.dom.Document xmlDoc = downloadAndParseXML(xmlUrl);
newProtocols.add(xmlDoc);
} catch (Exception ex) {
Logger.error("Error processing XML for session " + sessionNumber + ": " + ex.getMessage());
}
}
}
org.jsoup.nodes.Element metaSlider = htmlDoc.selectFirst("div.meta-slider");
if (metaSlider != null && metaSlider.hasAttr("data-nextoffset")) {
int nextOffset = Integer.parseInt(metaSlider.attr("data-nextoffset"));
if (nextOffset <= offset) {
hasMore = false;
} else {
offset = nextOffset;
}
} else {
hasMore = false;
}
} catch (IOException e) {
Logger.error("Error loading page: " + queryUrl + " : " + e.getMessage());
break;
}
}
return newProtocols;
}
}

View file

@ -40,7 +40,6 @@ public class SpeechParser {
}
public List<Session> parseAllSessions() {
List<Session> sessionsEmpty = new ArrayList<>();
List<Session> sessions = new ArrayList<>();
this.speeches = new ArrayList<>();
this.agendaItems = new ArrayList<>();
@ -61,6 +60,26 @@ public class SpeechParser {
}
public List<Session> parseAllSessions(Set<Document> xmlDocuments) {
List<Session> sessions = new ArrayList<>();
this.speeches = new ArrayList<>();
this.agendaItems = new ArrayList<>();
Logger.info("All new sessions parsed");
for (org.w3c.dom.Document xmlDoc : xmlDocuments) {
try {
File tempFile = convertDocumentToFile(xmlDoc);
Session session = parseSessionFile(tempFile);
sessions.add(session);
tempFile.delete(); // Lösche die temporäre Datei nach der Verarbeitung
} catch (Exception e) {
Logger.error("Error parsing XML document.");
Logger.error(e.getMessage());
}
}
return sessions;
}
private Session parseSessionFile(File file) throws Exception {
//file = removeDoctypeAnnotation(file.getAbsolutePath());

View file

@ -763,7 +763,7 @@ members = [
]
# Base URL for querying (with placeholders for last name and first name)
base_url = "https://bilddatenbank.bundestag.de/search/picture-result?query={0}%2C+{1}&filterQuery%5Bereignis%5D%5B%5D=Portr%C3%A4t%2FPortrait&sortVal=2"
base_url = "https://bilddatenbank.bundestag.de/search/picture-result?query={0}+{1}&sortVal=2"
#base_url = "https://bilddatenbank.bundestag.de/search/picture-result?filterQuery%5Bname%5D%5B%5D={0}l%2C+{1}&filterQuery%5Bereignis%5D%5B%5D=Portr%C3%A4t%2FPortrait&sortVal=2"
def fetch_image(lastname, firstname):

View file

@ -17,10 +17,9 @@
<h1>${p.vorname} ${p.nachname} (${p.partei})</h1>
</header>
<br>
<br>
<#if pic??>
<img style="max-width: 400px; height: auto;" src="data:image/jpeg;base64,${pic}" alt="Foto von ${p.vorname} ${p.nachname} (${p.partei})" />
<img style="max-width: 400px; height: auto;" src="data:image/jpeg;base64,${pic}" alt="Foto von ${p.vorname} ${p.nachname} (${p.partei})" />
<#else>
<h2>(kein Foto verfügbar)</h2>
</#if>

View file

@ -23,11 +23,19 @@
<h2>Rede ${s.speechKey} </h2>
<main>
<#list s.content as c>
<#include "speechContent.ftl">
</#list>
</main>
<#list s.content as c>
<#include "speechContent.ftl">
</#list>
<br><br>
<#if s.nlp??>
<h2>NLP Information</h2>
<#assign nlp = "${s.nlp}">
<#include "nlp.ftl">
<#else>
<h2>Keine NLP Information verfügbar für diese Rede</h2>
</#if>
<br> <br>
</body>

View file

@ -48,4 +48,3 @@
});
</script>

Before

Width:  |  Height:  |  Size: 1.8 KiB

After

Width:  |  Height:  |  Size: 1.8 KiB

Before After
Before After