From e6ef7adc6c6be19472e54dd9db79bbf6293cb3d5 Mon Sep 17 00:00:00 2001
From: s5260822 <s5260822@stud.uni-frankfurt.de>
Date: Tue, 18 Mar 2025 17:18:50 +0100
Subject: [PATCH] merge conflict reslove

---
 .../project/gruppe_05_1/Main.java             |  42 +++++-
 .../gruppe_05_1/database/MongoDBHandler.java  |  11 +-
 .../gruppe_05_1/database/MongoPprUtils.java   |  33 ++++-
 .../gruppe_05_1/domain/html/HtmlSpeech.java   |  46 ++++++-
 .../gruppe_05_1/domain/nlp/AudioToken.java    |  87 ++++++++++++
 .../gruppe_05_1/domain/nlp/Dependency.java    |  64 +++++++++
 .../gruppe_05_1/domain/nlp/NamedEntity.java   |  55 ++++++++
 .../gruppe_05_1/domain/nlp/NlpInfo.java       | 123 +++++++++++++++++
 .../project/gruppe_05_1/domain/nlp/Pos.java   | 119 ++++++++++++++++
 .../gruppe_05_1/domain/nlp/Sentence.java      |  44 ++++++
 .../gruppe_05_1/domain/nlp/Sentiment.java     |  97 +++++++++++++
 .../project/gruppe_05_1/domain/nlp/Token.java |  96 +++++++++++++
 .../project/gruppe_05_1/domain/nlp/Topic.java | 107 +++++++++++++++
 .../domain/nlp/VideoInformation.java          |  43 ++++++
 .../domain/nlp/html/SentimentOfSentence.java  | 112 +++++++++++++++
 .../project/gruppe_05_1/nlp/NlpUtils.java     |   2 +
 .../project/gruppe_05_1/nlp/XmiExtractor.java | 128 +++++++++---------
 .../rest/ParlamentarierController.java        |   5 +
 .../gruppe_05_1/rest/SpeechController.java    |  36 +++++
 .../project/gruppe_05_1/util/PPRUtils.java    |  59 ++++++++
 .../xml/speeches/SpeechParser.java            |  21 ++-
 .../members_of_parliament_image_crawler.py    |   2 +-
 .../templates/parlamentarierDetails.ftl       |   3 +-
 src/main/resources/templates/speech.ftl       |  18 ++-
 .../resources/templates/topicsBubbleChart.ftl |   1 -
 25 files changed, 1266 insertions(+), 88 deletions(-)
 create mode 100644 src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/AudioToken.java
 create mode 100644 src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Dependency.java
 create mode 100644 src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/NamedEntity.java
 create mode 100644 src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/NlpInfo.java
 create mode 100644 src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Pos.java
 create mode 100644 src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Sentence.java
 create mode 100644 src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Sentiment.java
 create mode 100644 src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Token.java
 create mode 100644 src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Topic.java
 create mode 100644 src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/VideoInformation.java
 create mode 100644 src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/html/SentimentOfSentence.java

diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/Main.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/Main.java
index 05eb9c7..95a20a5 100644
--- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/Main.java
+++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/Main.java
@@ -8,10 +8,18 @@ import org.texttechnologylab.project.gruppe_05_1.rest.RESTHandler;
 import org.texttechnologylab.project.gruppe_05_1.util.Logger;
 import org.texttechnologylab.project.gruppe_05_1.util.PPRUtils;
 import org.texttechnologylab.project.gruppe_05_1.xml.FileObjectFactory;
+import org.texttechnologylab.project.gruppe_05_1.xml.speeches.SpeechParser;
+import org.w3c.dom.Document;
+
 import java.util.Arrays;
+import java.util.Set;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
 
 import static java.lang.Boolean.FALSE;
 import static java.lang.Boolean.TRUE;
+import static org.texttechnologylab.project.gruppe_05_1.util.PPRUtils.checkAndProcessNewProtocols;
 
 public class Main {
     public static boolean UPLOAD_MEMBER_PHOTOS;
@@ -116,13 +124,33 @@ public class Main {
             Logger.pink("Uploading Member Photos to DB...");
             mongoDBHandler.uploadMemberPhotos();
         }
-        mongoDBHandler.close();
-        try {
-            NlpUtils.runRemoteDriver();
-        } catch (Exception e) {
-            Logger.error("Error while running NLP remote driver");
-            Logger.error(e.getMessage());
-        }
+        NlpUtils.runRemoteDriver();
+        /*ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(1);
+        scheduler.scheduleAtFixedRate(() -> {
+            try {
+                NlpUtils.runRemoteDriver();
+            } catch (Exception e) {
+                Logger.error("Error while running NLP remote driver");
+                Logger.error(e.getMessage());
+            }
+            try {
+                Logger.info("Starte Aktualisierung der Protokolle...");
+                Set<Document> newProtocols = checkAndProcessNewProtocols(mongoDBHandler);
+                Logger.info("Neue Protokolle gefunden: " + newProtocols.size());
+                if (newProtocols.isEmpty()) {
+                    Logger.info("Keine neuen Protokolle gefunden, Upload wird übersprungen.");
+                } else {
+                    SpeechParser speechParser = new SpeechParser();
+                    mongoDBHandler.insertSessions(speechParser.parseAllSessions(newProtocols));
+                    mongoDBHandler.insertAgendaItems(speechParser.getAgendaItems());
+                    mongoDBHandler.insertSpeeches(speechParser.getSpeeches());
+                    Logger.info("Neuer Protokolle uploaded: " + newProtocols.size());
+                }
+            } catch (Exception ex) {
+                Logger.error("Fehler bei der Protokollaktualisierung: " + ex.getMessage());
+            }
+        }, 0, 10, TimeUnit.MINUTES);*/
+
         RESTHandler restHandler = new RESTHandler();
         restHandler.startJavalin();
 
diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/MongoDBHandler.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/MongoDBHandler.java
index 6ac6e28..b7de73c 100644
--- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/MongoDBHandler.java
+++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/MongoDBHandler.java
@@ -3,6 +3,7 @@ package org.texttechnologylab.project.gruppe_05_1.database;
 import com.mongodb.MongoClientSettings;
 import com.mongodb.MongoCredential;
 import com.mongodb.ServerAddress;
+import com.mongodb.WriteConcern;
 import com.mongodb.bulk.BulkWriteResult;
 import com.mongodb.client.MongoClient;
 import com.mongodb.client.MongoClients;
@@ -691,7 +692,9 @@ public class MongoDBHandler {
 
     public void bulkWriteNlpData(List<WriteModel<Document>> bulkOperations) {
         if (!bulkOperations.isEmpty()) {
-            BulkWriteResult result = speechesCollection.bulkWrite(bulkOperations);
+            BulkWriteOptions options = new BulkWriteOptions().ordered(false);
+            // Optional: Setze einen weniger strengen Write Concern
+            BulkWriteResult result = speechesCollection.bulkWrite(bulkOperations, options);
             int modifiedCount = result.getModifiedCount();
             int matchedCount = result.getMatchedCount();
             int upsertCount = result.getUpserts().size();
@@ -764,6 +767,12 @@ public class MongoDBHandler {
         }
     }
 
+    public boolean sessionExists(String sessionNumber) {
+        Document filter = new Document("sessionNumber", sessionNumber);
+        long count = sessionsCollection.countDocuments(filter);
+        return count > 0;
+    }
+
     public String getMemberPhoto(String memberId) {
         Document photoDocument = memberPhotoCollection.find(eq("memberId", memberId)).first();
         if (photoDocument == null) {
diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/MongoPprUtils.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/MongoPprUtils.java
index 22ce309..d86c4d9 100644
--- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/MongoPprUtils.java
+++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/MongoPprUtils.java
@@ -8,6 +8,7 @@ import org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches.Spe
 import org.texttechnologylab.project.gruppe_05_1.domain.html.HtmlSpeech;
 import org.texttechnologylab.project.gruppe_05_1.domain.html.Parlamentarier;
 import org.texttechnologylab.project.gruppe_05_1.domain.html.ParlamentarierDetails;
+import org.texttechnologylab.project.gruppe_05_1.domain.nlp.*;
 import org.texttechnologylab.project.gruppe_05_1.domain.speaker.Membership;
 import org.texttechnologylab.project.gruppe_05_1.domain.speech.SpeechMetaData;
 import org.texttechnologylab.project.gruppe_05_1.util.GeneralUtils;
@@ -162,6 +163,24 @@ public class MongoPprUtils {
         return p;
     }
 
+
+    // - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+
+    // Parlamentarier - Picture
+
+    /**
+     *
+     * @param id : ID des Parlamentariers
+     * @return Das Foto (als Base64-encoded String)
+     */
+    public static String getParlamentarierPictureByID(String id) {
+        Document doc = MongoDBHandler.findFirstDocumentInCollection(getPicturesCollection(), "memberId", id);
+        if (doc == null) {
+            return null;
+        } else return doc.getString("base64");
+    }
+
     // - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 
 
@@ -362,7 +381,11 @@ public class MongoPprUtils {
 
         // Sortiere nach Datum, absteigend
         speechMetaDataList.sort((md1, md2) -> {
-            return md2.getDateTime().compareTo(md1.getDateTime());
+            try {
+                return md2.getDateTime().compareTo(md1.getDateTime());
+            } catch (NullPointerException e) {
+                return 0;
+            }
         });
 
         return speechMetaDataList;
@@ -399,6 +422,14 @@ public class MongoPprUtils {
         }
     }
 
+    /**
+     * Liefert die Rede-Informationen für die Anzeige einer Rede:
+     * - die Rede-ID
+     * - Name und Fraktion des Redners
+     * - Die Inhalte der Rede
+     * @param key: Rede ID
+     * @return
+     */
     public static HtmlSpeech getSpeechByKey(String key) {
         Document filter = new Document("speechKey", key);
         Document speechDoc = getSpeechCollection().find(filter).first();
diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/html/HtmlSpeech.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/html/HtmlSpeech.java
index 3b4080e..07a4fd5 100644
--- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/html/HtmlSpeech.java
+++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/html/HtmlSpeech.java
@@ -2,6 +2,9 @@ package org.texttechnologylab.project.gruppe_05_1.domain.html;
 
 import org.bson.Document;
 import org.texttechnologylab.project.gruppe_05_1.database.MongoDBHandler;
+import org.texttechnologylab.project.gruppe_05_1.domain.nlp.NlpInfo;
+import org.texttechnologylab.project.gruppe_05_1.domain.nlp.Token;
+import org.texttechnologylab.project.gruppe_05_1.domain.nlp.Topic;
 
 import java.util.ArrayList;
 import java.util.List;
@@ -13,6 +16,7 @@ public class HtmlSpeech {
     String                  speakerName;
     String                  fraction;
     List<SpeechContent> content = new ArrayList<>();
+    NlpInfo                 nlp = null;
 
     public HtmlSpeech() {
     }
@@ -30,6 +34,33 @@ public class HtmlSpeech {
                 addContent(new SpeechContent(contentDoc));
             }
         }
+
+        Document nlpDoc = (Document) doc.get("analysisResults");
+        nlp = readNlpInfo(nlpDoc);
+    }
+
+    private NlpInfo readNlpInfo(Document nlpDoc) {
+        if (nlpDoc == null) return null;
+        NlpInfo nlp = new NlpInfo();
+
+        // TODO: HERE
+        List<Document> tokensDocs = nlpDoc.get("tokens", MongoDBHandler.DOC_LIST_CLASS);
+        nlp.setTokens(Token.readTokensFromMongo(tokensDocs));
+
+        List<Document> sentencesDocs = nlpDoc.get("sentences", MongoDBHandler.DOC_LIST_CLASS);
+
+        List<Document> dependenciesDocs = nlpDoc.get("dependencies", MongoDBHandler.DOC_LIST_CLASS);
+
+        List<Document> namedEntitiesDocs = nlpDoc.get("namedEntities", MongoDBHandler.DOC_LIST_CLASS);
+
+        List<Document> sentimentsDocs = nlpDoc.get("sentiments", MongoDBHandler.DOC_LIST_CLASS);
+
+        List<Document> topicsDocs = nlpDoc.get("topics", MongoDBHandler.DOC_LIST_CLASS);
+        nlp.setTopics(Topic.readTopicsFromMongo(topicsDocs));
+
+        // TODO: Video
+
+        return nlp;
     }
 
     public String getSpeechKey() {
@@ -68,16 +99,26 @@ public class HtmlSpeech {
         content.add(contentLine);
     }
 
+    public NlpInfo getNlp() {
+        return nlp;
+    }
+
+    public void setNlp(NlpInfo nlp) {
+        this.nlp = nlp;
+    }
+
     @Override
     public boolean equals(Object o) {
         if (this == o) return true;
         if (!(o instanceof HtmlSpeech that)) return false;
-        return Objects.equals(speechKey, that.speechKey) && Objects.equals(speakerName, that.speakerName) && Objects.equals(fraction, that.fraction) && Objects.equals(content, that.content);
+        return Objects.equals(speechKey, that.speechKey) && Objects.equals(speakerName, that.speakerName)
+                && Objects.equals(fraction, that.fraction) && Objects.equals(content, that.content)
+                && Objects.equals(nlp, that.nlp);
     }
 
     @Override
     public int hashCode() {
-        return Objects.hash(speechKey, speakerName, fraction, content);
+        return Objects.hash(speechKey, speakerName, fraction, content, nlp);
     }
 
     @Override
@@ -87,6 +128,7 @@ public class HtmlSpeech {
                 .add("speakerName='" + speakerName + "'")
                 .add("fraction='" + fraction + "'")
                 .add("content=" + content)
+                .add("nlp=" + nlp)
                 .toString();
     }
 }
diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/AudioToken.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/AudioToken.java
new file mode 100644
index 0000000..de72e1d
--- /dev/null
+++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/AudioToken.java
@@ -0,0 +1,87 @@
+package org.texttechnologylab.project.gruppe_05_1.domain.nlp;
+
+import java.util.Objects;
+import java.util.StringJoiner;
+
+public class AudioToken {
+
+    private int begin;
+    private int end;
+    private double timeStart;
+    private double timeEnd;
+    private String value;
+
+    public AudioToken() {
+    }
+
+    public AudioToken(int begin, int end, double timeStart, double timeEnd, String value) {
+        this.begin = begin;
+        this.end = end;
+        this.timeStart = timeStart;
+        this.timeEnd = timeEnd;
+        this.value = value;
+    }
+
+    public int getBegin() {
+        return begin;
+    }
+
+    public void setBegin(int begin) {
+        this.begin = begin;
+    }
+
+    public int getEnd() {
+        return end;
+    }
+
+    public void setEnd(int end) {
+        this.end = end;
+    }
+
+    public double getTimeStart() {
+        return timeStart;
+    }
+
+    public void setTimeStart(double timeStart) {
+        this.timeStart = timeStart;
+    }
+
+    public double getTimeEnd() {
+        return timeEnd;
+    }
+
+    public void setTimeEnd(double timeEnd) {
+        this.timeEnd = timeEnd;
+    }
+
+    public String getValue() {
+        return value;
+    }
+
+    public void setValue(String value) {
+        this.value = value;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) return true;
+        if (!(o instanceof AudioToken that)) return false;
+        return begin == that.begin && end == that.end && Double.compare(timeStart, that.timeStart) == 0 && Double.compare(timeEnd, that.timeEnd) == 0 && Objects.equals(value, that.value);
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(begin, end, timeStart, timeEnd, value);
+    }
+
+    @Override
+    public String toString() {
+        return new StringJoiner(", ", AudioToken.class.getSimpleName() + "[", "]")
+                .add("begin=" + begin)
+                .add("end=" + end)
+                .add("timeStart=" + timeStart)
+                .add("timeEnd=" + timeEnd)
+                .add("value='" + value + "'")
+                .toString();
+    }
+}
diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Dependency.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Dependency.java
new file mode 100644
index 0000000..ad5ead6
--- /dev/null
+++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Dependency.java
@@ -0,0 +1,64 @@
+package org.texttechnologylab.project.gruppe_05_1.domain.nlp;
+
+import java.util.Objects;
+import java.util.StringJoiner;
+
+public class Dependency {
+    String      type;
+    String      governor;
+    String      dependent;
+
+    public Dependency() {
+    }
+
+    public Dependency(String type, String governor, String dependent) {
+        this.type = type;
+        this.governor = governor;
+        this.dependent = dependent;
+    }
+
+    public String getType() {
+        return type;
+    }
+
+    public void setType(String type) {
+        this.type = type;
+    }
+
+    public String getGovernor() {
+        return governor;
+    }
+
+    public void setGovernor(String governor) {
+        this.governor = governor;
+    }
+
+    public String getDependent() {
+        return dependent;
+    }
+
+    public void setDependent(String dependent) {
+        this.dependent = dependent;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) return true;
+        if (!(o instanceof Dependency that)) return false;
+        return Objects.equals(type, that.type) && Objects.equals(governor, that.governor) && Objects.equals(dependent, that.dependent);
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(type, governor, dependent);
+    }
+
+    @Override
+    public String toString() {
+        return new StringJoiner(", ", Dependency.class.getSimpleName() + "[", "]")
+                .add("type='" + type + "'")
+                .add("governor='" + governor + "'")
+                .add("dependent='" + dependent + "'")
+                .toString();
+    }
+}
diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/NamedEntity.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/NamedEntity.java
new file mode 100644
index 0000000..0f3ec5d
--- /dev/null
+++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/NamedEntity.java
@@ -0,0 +1,55 @@
+package org.texttechnologylab.project.gruppe_05_1.domain.nlp;
+
+import java.util.Objects;
+import java.util.StringJoiner;
+
+public class NamedEntity {
+    String  type;      // PER, LOC etc.
+    // int     begin;      // TODO: momentan nicht in MongoDB
+    // int     end;        // TODO: momentan nicht in MongoDB
+    String  text;
+
+    public NamedEntity() {
+    }
+
+    public NamedEntity(String type, String text) {
+        this.type = type;
+        this.text = text;
+    }
+
+    public String getType() {
+        return type;
+    }
+
+    public void setType(String type) {
+        this.type = type;
+    }
+
+    public String getText() {
+        return text;
+    }
+
+    public void setText(String text) {
+        this.text = text;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) return true;
+        if (!(o instanceof NamedEntity that)) return false;
+        return Objects.equals(type, that.type) && Objects.equals(text, that.text);
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(type, text);
+    }
+
+    @Override
+    public String toString() {
+        return new StringJoiner(", ", NamedEntity.class.getSimpleName() + "[", "]")
+                .add("type='" + type + "'")
+                .add("text='" + text + "'")
+                .toString();
+    }
+}
diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/NlpInfo.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/NlpInfo.java
new file mode 100644
index 0000000..de7ecd5
--- /dev/null
+++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/NlpInfo.java
@@ -0,0 +1,123 @@
+package org.texttechnologylab.project.gruppe_05_1.domain.nlp;
+
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.StringJoiner;
+
+public class NlpInfo {
+    List<Token>         tokens;
+    List<Sentence>      sentences;
+    List<Dependency>    dependencies;
+    List<NamedEntity>   namedEntities;
+    Sentiment           overallSentiment;       // Sentiment for the whole text ; kann null sein!
+    List<Sentiment>     sentiments;             // sentiments for the respective sentences (eine Liste von 0..n Elementen)
+    List<Topic>         topics;
+    List<Pos>           posList;
+
+    VideoInformation    videoInformation;
+
+    public List<Token> getTokens() {
+        return tokens;
+    }
+
+    public void setTokens(List<Token> tokens) {
+        this.tokens = tokens;
+    }
+
+    public List<Sentence> getSentences() {
+        return sentences;
+    }
+
+    public void setSentences(List<Sentence> sentences) {
+        this.sentences = sentences;
+    }
+
+    public List<Dependency> getDependencies() {
+        return dependencies;
+    }
+
+    public void setDependencies(List<Dependency> dependencies) {
+        this.dependencies = dependencies;
+    }
+
+    public List<NamedEntity> getNamedEntities() {
+        return namedEntities;
+    }
+
+    public void setNamedEntities(List<NamedEntity> namedEntities) {
+        this.namedEntities = namedEntities;
+    }
+
+    public Sentiment getOverallSentiment() {
+        return overallSentiment;
+    }
+
+    public void setOverallSentiment(Sentiment overallSentiment) {
+        this.overallSentiment = overallSentiment;
+    }
+
+    public List<Sentiment> getSentiments() {
+        return sentiments;
+    }
+
+    public void setSentiments(List<Sentiment> sentiments) {
+        this.sentiments = sentiments;
+    }
+
+    public List<Topic> getTopics() {
+        return topics;
+    }
+
+    public void setTopics(List<Topic> topics) {
+        this.topics = topics;
+    }
+
+    public List<Pos> getPosList() {
+        return posList;
+    }
+
+    public void setPosList(List<Pos> posList) {
+        this.posList = posList;
+    }
+
+    public VideoInformation getVideoInformation() {
+        return videoInformation;
+    }
+
+    public void setVideoInformation(VideoInformation videoInformation) {
+        this.videoInformation = videoInformation;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) return true;
+        if (!(o instanceof NlpInfo nlpInfo)) return false;
+        return Objects.equals(tokens, nlpInfo.tokens) && Objects.equals(sentences, nlpInfo.sentences)
+                && Objects.equals(dependencies, nlpInfo.dependencies) && Objects.equals(namedEntities, nlpInfo.namedEntities)
+                && Objects.equals(overallSentiment, nlpInfo.overallSentiment) && Objects.equals(sentiments, nlpInfo.sentiments)
+                && Objects.equals(topics, nlpInfo.topics) && Objects.equals(posList, nlpInfo.posList)
+                && Objects.equals(videoInformation, nlpInfo.videoInformation);
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(tokens, sentences, dependencies, namedEntities, overallSentiment, sentiments, topics, posList, videoInformation);
+    }
+
+    @Override
+    public String toString() {
+        return new StringJoiner(", ", NlpInfo.class.getSimpleName() + "[", "]")
+                .add("tokens=" + tokens)
+                .add("sentences=" + sentences)
+                .add("dependencies=" + dependencies)
+                .add("namedEntities=" + namedEntities)
+                .add("overallSentiment=" + overallSentiment)
+                .add("sentiments=" + sentiments)
+                .add("topics=" + topics)
+                .add("posList=" + posList)
+                .add("videoInformation=" + videoInformation)
+                .toString();
+    }
+
+}
diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Pos.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Pos.java
new file mode 100644
index 0000000..74f027a
--- /dev/null
+++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Pos.java
@@ -0,0 +1,119 @@
+package org.texttechnologylab.project.gruppe_05_1.domain.nlp;
+
+import java.util.Objects;
+import java.util.StringJoiner;
+
+public class Pos {
+    String  posValue;       // ART, NN...
+    String  coarseValue;    // PROPN...
+    int     begin;
+    int     end;
+    String  coveredText;
+
+    // Am Dateiende stehen beispiele für mögliche Werte
+
+
+    public Pos() {
+    }
+
+    public Pos(String posValue, String coarseValue, int begin, int end, String coveredText) {
+        this.posValue = posValue;
+        this.coarseValue = coarseValue;
+        this.begin = begin;
+        this.end = end;
+        this.coveredText = coveredText;
+    }
+
+    public String getPosValue() {
+        return posValue;
+    }
+
+    public void setPosValue(String posValue) {
+        this.posValue = posValue;
+    }
+
+    public String getCoarseValue() {
+        return coarseValue;
+    }
+
+    public void setCoarseValue(String coarseValue) {
+        this.coarseValue = coarseValue;
+    }
+
+    public int getBegin() {
+        return begin;
+    }
+
+    public void setBegin(int begin) {
+        this.begin = begin;
+    }
+
+    public int getEnd() {
+        return end;
+    }
+
+    public void setEnd(int end) {
+        this.end = end;
+    }
+
+    public String getCoveredText() {
+        return coveredText;
+    }
+
+    public void setCoveredText(String coveredText) {
+        this.coveredText = coveredText;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) return true;
+        if (!(o instanceof Pos pos)) return false;
+        return begin == pos.begin && end == pos.end && Objects.equals(posValue, pos.posValue) && Objects.equals(coarseValue, pos.coarseValue) && Objects.equals(coveredText, pos.coveredText);
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(posValue, coarseValue, begin, end, coveredText);
+    }
+
+    @Override
+    public String toString() {
+        return new StringJoiner(", ", Pos.class.getSimpleName() + "[", "]")
+                .add("posValue='" + posValue + "'")
+                .add("coarseValue='" + coarseValue + "'")
+                .add("begin=" + begin)
+                .add("end=" + end)
+                .add("coveredText='" + coveredText + "'")
+                .toString();
+    }
+
+    /* Beispielswerte:
+
+    MyPos{posValue='ART', coarseValue='DET', begin=0, end=3, coveredText='Die'},
+    MyPos{posValue='NN', coarseValue='NOUN', begin=4, end=8, coveredText='Idee'},
+    MyPos{posValue='APPR', coarseValue='ADP', begin=9, end=12, coveredText='von'},
+    MyPos{posValue='NE', coarseValue='PROPN', begin=13, end=16, coveredText='Joe'},
+    MyPos{posValue='NN', coarseValue='PROPN', begin=17, end=22, coveredText='Biden'},
+    MyPos{posValue='APPR', coarseValue='ADP', begin=23, end=26, coveredText='aus'},
+    MyPos{posValue='NE', coarseValue='PROPN', begin=27, end=36, coveredText='Bucharest'},
+    MyPos{posValue='$,', coarseValue='PUNCT', begin=36, end=37, coveredText=','},
+    MyPos{posValue='NE', coarseValue='PROPN', begin=38, end=46, coveredText='Rumänien'},
+    MyPos{posValue='$,', coarseValue='PUNCT', begin=46, end=47, coveredText=','},
+    MyPos{posValue='VVFIN', coarseValue='VERB', begin=48, end=53, coveredText='finde'},
+    MyPos{posValue='PPER', coarseValue='PRON', begin=54, end=57, coveredText='ich'},
+    MyPos{posValue='ADJD', coarseValue='ADV', begin=58, end=61, coveredText='gut'},
+    MyPos{posValue='$.', coarseValue='PUNCT', begin=61, end=62, coveredText='.'},
+    MyPos{posValue='ART', coarseValue='DET', begin=63, end=66, coveredText='Den'},
+    MyPos{posValue='NN', coarseValue='NOUN', begin=67, end=76, coveredText='Vorschlag'},
+    MyPos{posValue='APPR', coarseValue='ADP', begin=77, end=80, coveredText='von'},
+    MyPos{posValue='NE', coarseValue='PROPN', begin=81, end=87, coveredText='Donald'},
+    MyPos{posValue='NE', coarseValue='PROPN', begin=88, end=93, coveredText='Trump'},
+    MyPos{posValue='APPR', coarseValue='ADP', begin=94, end=97, coveredText='aus'},
+    MyPos{posValue='NE', coarseValue='PROPN', begin=98, end=108, coveredText='Frankreich'},
+    MyPos{posValue='VVFIN', coarseValue='VERB', begin=109, end=114, coveredText='finde'},
+    MyPos{posValue='PPER', coarseValue='PRON', begin=115, end=118, coveredText='ich'},
+    MyPos{posValue='ADV', coarseValue='ADV', begin=119, end=126, coveredText='weniger'},
+    MyPos{posValue='ADJD', coarseValue='ADV', begin=127, end=130, coveredText='gut'},
+    MyPos{posValue='$.', coarseValue='PUNCT', begin=130, end=131, coveredText='.'}],
+     */
+}
diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Sentence.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Sentence.java
new file mode 100644
index 0000000..213f58e
--- /dev/null
+++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Sentence.java
@@ -0,0 +1,44 @@
+package org.texttechnologylab.project.gruppe_05_1.domain.nlp;
+
+import java.util.Objects;
+import java.util.StringJoiner;
+
+public class Sentence {
+    // int         begin;      // TODO: momentan nicht in MongoDB
+    // int         end;        // TODO: momentan nicht in MongoDB
+    String text;
+
+    public Sentence() {
+    }
+
+    public Sentence(String text) {
+        this.text = text;
+    }
+
+    public String getText() {
+        return text;
+    }
+
+    public void setText(String text) {
+        this.text = text;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) return true;
+        if (!(o instanceof Sentence sentence)) return false;
+        return Objects.equals(text, sentence.text);
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(text);
+    }
+
+    @Override
+    public String toString() {
+        return new StringJoiner(", ", Sentence.class.getSimpleName() + "[", "]")
+                .add("text='" + text + "'")
+                .toString();
+    }
+}
diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Sentiment.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Sentiment.java
new file mode 100644
index 0000000..a2f04e3
--- /dev/null
+++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Sentiment.java
@@ -0,0 +1,97 @@
+package org.texttechnologylab.project.gruppe_05_1.domain.nlp;
+
+import java.util.Objects;
+import java.util.StringJoiner;
+
+public class Sentiment {
+    int     begin;
+    int     end;
+    double  sentiment;      // overall sentiment
+    double  negative;
+    double  neutral;
+    double  positive;
+
+    public Sentiment() {
+    }
+
+    public Sentiment(int begin, int end, double sentiment, double negative, double neutral, double positive) {
+        this.begin = begin;
+        this.end = end;
+        this.sentiment = sentiment;
+        this.negative = negative;
+        this.neutral = neutral;
+        this.positive = positive;
+    }
+
+    public int getBegin() {
+        return begin;
+    }
+
+    public void setBegin(int begin) {
+        this.begin = begin;
+    }
+
+    public int getEnd() {
+        return end;
+    }
+
+    public void setEnd(int end) {
+        this.end = end;
+    }
+
+    public double getSentiment() {
+        return sentiment;
+    }
+
+    public void setSentiment(double sentiment) {
+        this.sentiment = sentiment;
+    }
+
+    public double getNegative() {
+        return negative;
+    }
+
+    public void setNegative(double negative) {
+        this.negative = negative;
+    }
+
+    public double getNeutral() {
+        return neutral;
+    }
+
+    public void setNeutral(double neutral) {
+        this.neutral = neutral;
+    }
+
+    public double getPositive() {
+        return positive;
+    }
+
+    public void setPositive(double positive) {
+        this.positive = positive;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) return true;
+        if (!(o instanceof Sentiment sentiment1)) return false;
+        return begin == sentiment1.begin && end == sentiment1.end && Double.compare(sentiment, sentiment1.sentiment) == 0 && Double.compare(negative, sentiment1.negative) == 0 && Double.compare(neutral, sentiment1.neutral) == 0 && Double.compare(positive, sentiment1.positive) == 0;
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(begin, end, sentiment, negative, neutral, positive);
+    }
+
+    @Override
+    public String toString() {
+        return new StringJoiner(", ", Sentiment.class.getSimpleName() + "[", "]")
+                .add("begin=" + begin)
+                .add("end=" + end)
+                .add("sentiment=" + sentiment)
+                .add("negative=" + negative)
+                .add("neutral=" + neutral)
+                .add("positive=" + positive)
+                .toString();
+    }
+}
diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Token.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Token.java
new file mode 100644
index 0000000..09a5365
--- /dev/null
+++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Token.java
@@ -0,0 +1,96 @@
+package org.texttechnologylab.project.gruppe_05_1.domain.nlp;
+
+import org.bson.Document;
+
+import java.util.*;
+
+public class Token {
+    String text;
+    String pos;
+    String lemma;
+
+    public Token() {
+    }
+
+    public Token(String text, String pos, String lemma) {
+        this.text = text;
+        this.pos = pos;
+        this.lemma = lemma;
+    }
+
+    public String getText() {
+        return text;
+    }
+
+    public void setText(String text) {
+        this.text = text;
+    }
+
+    public String getPos() {
+        return pos;
+    }
+
+    public void setPos(String pos) {
+        this.pos = pos;
+    }
+
+    public String getLemma() {
+        return lemma;
+    }
+
+    public void setLemma(String lemma) {
+        this.lemma = lemma;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) return true;
+        if (!(o instanceof Token token)) return false;
+        return Objects.equals(text, token.text) && Objects.equals(pos, token.pos) && Objects.equals(lemma, token.lemma);
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(text, pos, lemma);
+    }
+
+    @Override
+    public String toString() {
+        return new StringJoiner(", ", Token.class.getSimpleName() + "[", "]")
+                .add("text='" + text + "'")
+                .add("pos='" + pos + "'")
+                .add("lemma='" + lemma + "'")
+                .toString();
+    }
+
+    /**
+     * Die Token-Dokumente (Speech --> analysisResults --> token) aus der MongoDB lesen
+     * @param tokenDocs Eine Liste von Mongo-Dokumenten
+     * @return Eine Liste der Token
+     */
+    public static List<Token> readTokensFromMongo(List<Document> tokenDocs) {
+        List<Token> tokens = new ArrayList<>();
+        for (Document doc : tokenDocs) {
+            tokens.add(new Token(doc.getString("text"),
+                                 doc.getString("pos"),
+                                 doc.getString("lemma")
+            ));
+        }
+        return tokens;
+    }
+
+    /**
+     * Zählt alle verschiedenen POS Vorkommen auf
+     * @param tokenList
+     * @return Jede POS art mit ihrer Anzahl an Vorkommen
+     */
+    public static Map<String, Integer> countPOS(List<Token> tokenList) {
+        Map<String, Integer> posCounts = new HashMap<>();
+
+        for (Token token : tokenList) {
+            posCounts.put(token.getPos(), posCounts.getOrDefault(token.getPos(), 0) + 1);
+        }
+
+        return posCounts;
+    }
+}
diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Topic.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Topic.java
new file mode 100644
index 0000000..474f5aa
--- /dev/null
+++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Topic.java
@@ -0,0 +1,107 @@
+package org.texttechnologylab.project.gruppe_05_1.domain.nlp;
+
+import org.bson.Document;
+
+import java.util.*;
+import java.util.stream.Collectors;
+
+public class Topic {
+    String      topic;
+    Double      score;
+    String      text;
+
+    public Topic() {
+    }
+
+    public Topic(String topic, Double score, String text) {
+        this.topic = topic;
+        this.score = score;
+        this.text = text;
+    }
+
+    public String getTopic() {
+        return topic;
+    }
+
+    public void setTopic(String topic) {
+        this.topic = topic;
+    }
+
+    public Double getScore() {
+        return score;
+    }
+
+    public void setScore(Double score) {
+        this.score = score;
+    }
+
+    public String getText() {
+        return text;
+    }
+
+    public void setText(String text) {
+        this.text = text;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) return true;
+        if (!(o instanceof Topic topic1)) return false;
+        return Double.compare(score, topic1.score) == 0 && Objects.equals(topic, topic1.topic) && Objects.equals(text, topic1.text);
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(topic, score, text);
+    }
+
+    @Override
+    public String toString() {
+        return new StringJoiner(", ", Topic.class.getSimpleName() + "[", "]")
+                .add("topic='" + topic + "'")
+                .add("score=" + score)
+                .add("text='" + text + "'")
+                .toString();
+    }
+
+
+    /**
+     * Die Topics-Dokumente (Speech --> analysisResults --> topics) aus der MongoDB lesen
+     * @param topicsDocs Eine Liste von Mongo-Dokumenten
+     * @return Eine Liste der Topics
+     */
+    public static List<Topic> readTopicsFromMongo(List<Document> topicsDocs) {
+        List<Topic> topics = new ArrayList<>();
+        for (Document doc : topicsDocs) {
+            topics.add(new Topic(doc.getString("topic"),
+                                 doc.getDouble("score"),
+                                 doc.getString("text")
+                    ));
+        }
+        return topics;
+    }
+
+
+    /**
+     * Topic-Informationen "verdichten":
+     * Ausgangssituation: eine Liste mit mehreren Topics. Ein Topic kann in dieser Liste mehrfach vorkommen.
+     * Man will wissen, welche Score hat jeden Topic. Hier werden die Werte der jeweiligen Topics summiert.
+     *
+     * @param topicsList
+     * @return Map<String, Double>
+     */
+    public static Map<String, Double> condenseTopicInformation(List<Topic> topicsList) {
+        Map<String, Double> condensedTopicInfo = new HashMap<>();
+
+        for (Topic t : topicsList) {
+            Double oldValue = condensedTopicInfo.get(t.getTopic());
+            if (oldValue != null) {
+                condensedTopicInfo.replace(t.getTopic(), oldValue + t.getScore());
+            } else {
+                condensedTopicInfo.put(t.getTopic(), t.getScore());
+            }
+        }
+
+        return condensedTopicInfo;
+    }
+}
diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/VideoInformation.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/VideoInformation.java
new file mode 100644
index 0000000..e33db12
--- /dev/null
+++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/VideoInformation.java
@@ -0,0 +1,43 @@
+package org.texttechnologylab.project.gruppe_05_1.domain.nlp;
+
+import java.util.List;
+import java.util.Objects;
+import java.util.StringJoiner;
+
+public class VideoInformation {
+    List<AudioToken>    audioTokens;
+
+    public VideoInformation() {
+    }
+
+    public VideoInformation(List<AudioToken> audioTokens) {
+        this.audioTokens = audioTokens;
+    }
+
+    public List<AudioToken> getAudioTokens() {
+        return audioTokens;
+    }
+
+    public void setAudioTokens(List<AudioToken> audioTokens) {
+        this.audioTokens = audioTokens;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) return true;
+        if (!(o instanceof VideoInformation that)) return false;
+        return Objects.equals(audioTokens, that.audioTokens);
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(audioTokens);
+    }
+
+    @Override
+    public String toString() {
+        return new StringJoiner(", ", VideoInformation.class.getSimpleName() + "[", "]")
+                .add("audioTokens=" + audioTokens)
+                .toString();
+    }
+}
diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/html/SentimentOfSentence.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/html/SentimentOfSentence.java
new file mode 100644
index 0000000..d96dcf0
--- /dev/null
+++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/html/SentimentOfSentence.java
@@ -0,0 +1,112 @@
+package org.texttechnologylab.project.gruppe_05_1.domain.nlp.html;
+
+import java.util.Objects;
+
+/**
+ * Diese Klasse ordnet das entspreche Sentiment zu einem Satz zu.
+ * Sie ist ein Datencontainer für die Darstellung über FreeMarker
+ */
+public class SentimentOfSentence {
+    int         begin;
+    int         end;
+    String      text;
+    // RGBA Werte für die Darstellung
+    float  sentiment;      // overall sentiment --> wird für den alpha (Opaque) Wert verwendet --> 0..1
+    int  negative;          // red --> 0..255
+    int  neutral;           // 0..255, wird momentan nicht benutzt
+    int  positive;          // green --> 0..255
+
+    public SentimentOfSentence() {}
+
+    public SentimentOfSentence(int begin, int end, String text, float sentiment, int negative, int neutral, int positive) {
+        this.begin = begin;
+        this.end = end;
+        this.text = text;
+        this.sentiment = sentiment;
+        this.negative = negative;
+        this.neutral = neutral;
+        this.positive = positive;
+    }
+
+    public int getBegin() {
+        return begin;
+    }
+
+    public void setBegin(int begin) {
+        this.begin = begin;
+    }
+
+    public int getEnd() {
+        return end;
+    }
+
+    public void setEnd(int end) {
+        this.end = end;
+    }
+
+    public String getText() {
+        return text;
+    }
+
+    public void setText(String text) {
+        this.text = text;
+    }
+
+    public float getSentiment() {
+        return sentiment;
+    }
+
+    public void setSentiment(float sentiment) {
+        this.sentiment = sentiment;
+    }
+
+    public int getNegative() {
+        return negative;
+    }
+
+    public void setNegative(int negative) {
+        this.negative = negative;
+    }
+
+    public int getNeutral() {
+        return neutral;
+    }
+
+    public void setNeutral(int neutral) {
+        this.neutral = neutral;
+    }
+
+    public int getPositive() {
+        return positive;
+    }
+
+    public void setPositive(int positive) {
+        this.positive = positive;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (!(o instanceof SentimentOfSentence that)) return false;
+        return begin == that.begin && end == that.end && Double.compare(sentiment, that.sentiment) == 0 && Double.compare(negative, that.negative) == 0 && Double.compare(neutral, that.neutral) == 0 && Double.compare(positive, that.positive) == 0 && Objects.equals(text, that.text);
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(begin, end, text, sentiment, negative, neutral, positive);
+    }
+
+    @Override
+    public String toString() {
+        final StringBuilder sb = new StringBuilder("SentimentOfSentence{");
+        sb.append("begin=").append(begin);
+        sb.append(", end=").append(end);
+        sb.append(", text='").append(text).append('\'');
+        sb.append(", sentiment=").append(sentiment);
+        sb.append(", negative=").append(negative);
+        sb.append(", neutral=").append(neutral);
+        sb.append(", positive=").append(positive);
+        sb.append('}');
+        return sb.toString();
+    }
+}
+
diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/nlp/NlpUtils.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/nlp/NlpUtils.java
index 6c4c37a..e01e647 100644
--- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/nlp/NlpUtils.java
+++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/nlp/NlpUtils.java
@@ -360,6 +360,8 @@ public class NlpUtils {
                 bulkOperations.add(new UpdateOneModel<>(updateFilter, update));
             }
             if (!bulkOperations.isEmpty()) {
+                System.out.println("Processing of " + bulkOperations.size() + " documents finished");
+                System.out.println("uploading...");
                 mongoDBHandler.bulkWriteNlpData(bulkOperations);
                 Logger.debug("Bulk write completed for " + bulkOperations.size() + " documents.");
                 mongoDBHandler.close();
diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/nlp/XmiExtractor.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/nlp/XmiExtractor.java
index 30566ff..89fee0d 100644
--- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/nlp/XmiExtractor.java
+++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/nlp/XmiExtractor.java
@@ -1,15 +1,16 @@
 package org.texttechnologylab.project.gruppe_05_1.nlp;
 
-import com.mongodb.client.MongoCollection;
-import com.mongodb.client.MongoDatabase;
 import com.mongodb.client.model.Filters;
 import com.mongodb.client.model.UpdateOneModel;
 import com.mongodb.client.model.WriteModel;
-import com.mongodb.client.result.UpdateResult;
 import org.apache.uima.fit.util.JCasUtil;
 import org.bson.Document;
 import java.io.*;
-import java.net.URL;
+import java.nio.charset.StandardCharsets;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.atomic.AtomicInteger;
 import java.util.zip.*;
 import java.util.*;
 import java.util.stream.Collectors;
@@ -18,12 +19,7 @@ import org.bson.conversions.Bson;
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.cas.impl.XmiCasDeserializer;
-import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
-import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
-import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency;
-import de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity;
 import org.texttechnologylab.project.gruppe_05_1.database.MongoDBHandler;
-import org.hucompute.textimager.uima.type.category.CategoryCoveredTagged;
 import org.texttechnologylab.project.gruppe_05_1.util.Logger;
 
 import static org.texttechnologylab.project.gruppe_05_1.Main.JCAS_SPEECHES_TYPESYSTEM_DIR;
@@ -31,29 +27,45 @@ import static org.texttechnologylab.project.gruppe_05_1.Main.JCAS_SPEECHES_TYPES
 
 public class XmiExtractor {
 
-    private List<WriteModel<Document>> bulkOperations;
-    private MongoDBHandler mongoDBHandler;
+    private final List<WriteModel<Document>> bulkOperations = Collections.synchronizedList(new ArrayList<>());
+    private final MongoDBHandler mongoDBHandler;
     private static final int BATCH_SIZE = 1000;
-    private int processedCount = 0;
+    private static final AtomicInteger processedCount = new AtomicInteger(0);
+
     public XmiExtractor() {
         mongoDBHandler = new MongoDBHandler();
-        this.bulkOperations = new ArrayList<>();
     }
 
     public void extractAndUploadXmiData() throws IOException {
         InputStream resourceStream = getClass().getClassLoader().getResourceAsStream("speeches/20.zip");
+        if (resourceStream == null) {
+            throw new IOException("20.zip nicht gefunden im Ressourcenordner /speeches");
+        }
+        ExecutorService executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
+        List<Future<?>> futures = new ArrayList<>();
         try (ZipInputStream zis = new ZipInputStream(resourceStream)) {
             ZipEntry entry;
             while ((entry = zis.getNextEntry()) != null) {
                 if (entry.getName().endsWith(".xmi.gz")) {
-                    ByteArrayOutputStream baos = new ByteArrayOutputStream();
-                    byte[] buffer = new byte[1024];
-                    int len;
-                    while ((len = zis.read(buffer)) > 0) {
-                        baos.write(buffer, 0, len);
+                    File tempFile = File.createTempFile("xmi_entry_", ".xmi.gz");
+                    try (FileOutputStream fos = new FileOutputStream(tempFile)) {
+                        byte[] buffer = new byte[1024];
+                        int len;
+                        while ((len = zis.read(buffer)) > 0) {
+                            fos.write(buffer, 0, len);
+                        }
                     }
-                    byte[] entryData = baos.toByteArray();
-                    processXmiGzStream(new ByteArrayInputStream(entryData), entry.getName());
+                    ZipEntry finalEntry = entry;
+                    Future<?> future = executor.submit(() -> {
+                        try (FileInputStream fis = new FileInputStream(tempFile)) {
+                            processXmiGzStream(fis, finalEntry.getName());
+                        } catch (IOException e) {
+                            e.printStackTrace();
+                        } finally {
+                            tempFile.delete();
+                        }
+                    });
+                    futures.add(future);
                 }
                 zis.closeEntry();
             }
@@ -61,7 +73,16 @@ public class XmiExtractor {
             Logger.error("Error reading XMI data from ZIP file.");
             Logger.error(e.getMessage());
         }
-        flushBatch();
+        for (Future<?> future : futures) {
+            try {
+                future.get();
+            } catch (Exception e) {
+                e.printStackTrace();
+            }
+        }
+        executor.shutdown();
+        flushBatch(); // Synchronously upload the remaining batch
+        mongoDBHandler.close();
     }
 
     private void processXmiGzStream(InputStream inputStream, String filename) {
@@ -70,12 +91,10 @@ public class XmiExtractor {
             jCas = JCasFactory.createJCas(JCAS_SPEECHES_TYPESYSTEM_DIR);
             XmiCasDeserializer.deserialize(gis, jCas.getCas(), true);
 
-            // Build structured analysisResults Document
             Document analysisResults = new Document();
 
-            // Tokens: Include POS, Lemma, etc.
             List<Document> tokens = new ArrayList<>();
-            for (Token token : JCasUtil.select(jCas, Token.class)) {
+            for (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token token : JCasUtil.select(jCas, de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token.class)) {
                 Document tokenDoc = new Document()
                         .append("text", token.getCoveredText())
                         .append("pos", token.getPos().getPosValue())
@@ -84,15 +103,13 @@ public class XmiExtractor {
             }
             analysisResults.append("tokens", tokens);
 
-            // Sentences
-            List<String> sentences = JCasUtil.select(jCas, Sentence.class).stream()
-                    .map(Sentence::getCoveredText)
+            List<String> sentences = JCasUtil.select(jCas, de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence.class).stream()
+                    .map(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence::getCoveredText)
                     .collect(Collectors.toList());
             analysisResults.append("sentences", sentences);
 
-            // Dependencies
             List<Document> dependencies = new ArrayList<>();
-            for (Dependency dep : JCasUtil.select(jCas, Dependency.class)) {
+            for (de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency dep : JCasUtil.select(jCas, de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency.class)) {
                 Document depDoc = new Document()
                         .append("type", dep.getDependencyType())
                         .append("governor", dep.getGovernor().getCoveredText())
@@ -101,9 +118,8 @@ public class XmiExtractor {
             }
             analysisResults.append("dependencies", dependencies);
 
-            // Named Entities
             List<Document> namedEntities = new ArrayList<>();
-            for (NamedEntity ne : JCasUtil.select(jCas, NamedEntity.class)) {
+            for (de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity ne : JCasUtil.select(jCas, de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity.class)) {
                 Document neDoc = new Document()
                         .append("text", ne.getCoveredText())
                         .append("type", ne.getValue());
@@ -111,23 +127,16 @@ public class XmiExtractor {
             }
             analysisResults.append("namedEntities", namedEntities);
 
-            // Sentiment
             List<Document> sentiments = new ArrayList<>();
-            for (org.hucompute.textimager.uima.type.Sentiment sentiment :
-                    JCasUtil.select(jCas, org.hucompute.textimager.uima.type.Sentiment.class)) {
-
+            for (org.hucompute.textimager.uima.type.Sentiment sentiment : JCasUtil.select(jCas, org.hucompute.textimager.uima.type.Sentiment.class)) {
                 Document sentimentDoc = new Document()
                         .append("begin", sentiment.getBegin())
                         .append("end", sentiment.getEnd())
                         .append("score", sentiment.getSentiment())
                         .append("subjectivity", sentiment.getSubjectivity());
-
-                // Check for VaderSentiment subtype
                 if (sentiment instanceof org.hucompute.textimager.uima.type.VaderSentiment) {
-                    org.hucompute.textimager.uima.type.VaderSentiment vader =
-                            (org.hucompute.textimager.uima.type.VaderSentiment) sentiment;
-                    sentimentDoc
-                            .append("pos", vader.getPos())
+                    org.hucompute.textimager.uima.type.VaderSentiment vader = (org.hucompute.textimager.uima.type.VaderSentiment) sentiment;
+                    sentimentDoc.append("pos", vader.getPos())
                             .append("neu", vader.getNeu())
                             .append("neg", vader.getNeg());
                 }
@@ -136,39 +145,36 @@ public class XmiExtractor {
             analysisResults.append("sentiments", sentiments);
 
             List<Document> topics = new ArrayList<>();
-            for (CategoryCoveredTagged topic : JCasUtil.select(jCas, CategoryCoveredTagged.class)) {
+            for (org.hucompute.textimager.uima.type.category.CategoryCoveredTagged topic : JCasUtil.select(jCas, org.hucompute.textimager.uima.type.category.CategoryCoveredTagged.class)) {
                 Document topicDoc = new Document()
                         .append("topic", topic.getValue())
                         .append("score", topic.getScore())
-                        .append("tags", topic.getTags())
                         .append("text", topic.getCoveredText());
                 topics.add(topicDoc);
             }
             topics.sort((d1, d2) -> Double.compare(d2.getDouble("score"), d1.getDouble("score")));
             analysisResults.append("topics", topics);
 
-
-            // Upload structured Document to MongoDB
             String speechKey = extractSpeechKeyFromFilename(filename);
             if (speechKey != null) {
                 Bson filter = Filters.eq("speechKey", speechKey);
                 Bson update = new Document("$set", new Document("analysisResults", analysisResults));
                 UpdateOneModel<Document> updateModel = new UpdateOneModel<>(filter, update);
                 bulkOperations.add(updateModel);
-                if (bulkOperations.size() >= BATCH_SIZE) {
-                    flushBatch();
+                synchronized (bulkOperations) {
+                    if (bulkOperations.size() >= BATCH_SIZE) {
+                        Logger.info("BATCH_SIZE to Upload: " + bulkOperations.size());
+                        flushBatch();
+                    }
                 }
-                processedCount++;
-                if (processedCount % 5000 == 0) {
-                    Logger.info("Processed speeches: " + processedCount);
+                int count = processedCount.incrementAndGet();
+                if (count % 1000 == 0) {
+                    Logger.info("Processed speeches: " + count);
                 }
-
             }
-
         } catch (Exception e) {
             e.printStackTrace();
-        }
-        finally {
+        } finally {
             if (jCas != null) {
                 jCas.reset();
             }
@@ -180,23 +186,11 @@ public class XmiExtractor {
         return baseName.replace("20/", "");
     }
 
-    private void flushBatch() {
+    private synchronized void flushBatch() {
         if (!bulkOperations.isEmpty()) {
             mongoDBHandler.bulkWriteNlpData(bulkOperations);
             bulkOperations.clear();
         }
     }
-
-
-    /*
-    public static void main(String[] args) {
-        try {
-            XmiExtractor extractor = new XmiExtractor(database);
-            extractor.extractAndUploadXmiData();
-            System.out.println("Processing complete.");
-        } catch (Exception e) {
-            e.printStackTrace();
-        }
-    } */
 }
 
diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/rest/ParlamentarierController.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/rest/ParlamentarierController.java
index 8402965..c522aa2 100644
--- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/rest/ParlamentarierController.java
+++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/rest/ParlamentarierController.java
@@ -89,6 +89,7 @@ public class ParlamentarierController {
         String id = ctx.pathParam("id");
         Logger.info("getParlamentarierDetails, ID = " + id);
 
+        // Alle Details des Abgeordnetes (Vor- und Nachname, Geburts- und Sterbeort, Partei, Vita etc.)
         ParlamentarierDetails pd = MongoPprUtils.getParlamentarierDetailsByID(id);
 
         Map<String, Object> attributes = new HashMap<>();
@@ -101,6 +102,10 @@ public class ParlamentarierController {
             attributes.put("speechesPlaceholder", emptyList);
         }
 
+        // Foto des Abgeordnetes
+        String picture = MongoPprUtils.getParlamentarierPictureByID(id);
+        attributes.put("pic", picture);
+
         ctx.render("parlamentarierDetails.ftl", attributes);
     }
 
diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/rest/SpeechController.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/rest/SpeechController.java
index df6e5cb..f6bb0ad 100644
--- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/rest/SpeechController.java
+++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/rest/SpeechController.java
@@ -5,12 +5,16 @@ import io.javalin.openapi.*;
 import org.texttechnologylab.project.gruppe_05_1.database.MongoPprUtils;
 import org.texttechnologylab.project.gruppe_05_1.domain.html.HtmlSpeech;
 import org.texttechnologylab.project.gruppe_05_1.domain.html.ParlamentarierDetails;
+import org.texttechnologylab.project.gruppe_05_1.domain.nlp.Token;
+import org.texttechnologylab.project.gruppe_05_1.domain.nlp.Topic;
 import org.texttechnologylab.project.gruppe_05_1.domain.speech.SpeechMetaData;
 import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Speech;
 
+import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.stream.Collectors;
 
 public class SpeechController {
     /**
@@ -68,6 +72,38 @@ public class SpeechController {
         HtmlSpeech speech = MongoPprUtils.getSpeechByKey(redeId);
         attributes.put("s", speech);
 
+        // NLP: Topic
+        if ((speech.getNlp() != null) && (speech.getNlp().getTopics() != null)) {
+            Map<String, Double> topics = Topic.condenseTopicInformation(speech.getNlp().getTopics());   // Daten "verdichten"...
+            // ... und ersetzen
+            speech.getNlp().setTopics(
+                    topics.entrySet().stream()
+                            .map(me -> new Topic(me.getKey(), me.getValue(), null))
+                            .collect(Collectors.toList()));
+        }
+
+        // NLP: POS
+        if (speech.getNlp() != null && speech.getNlp().getTokens() != null) {
+            List<Token> tokens = speech.getNlp().getTokens();
+
+            Map<String, Integer> posCounts = Token.countPOS(tokens);
+
+            List<Token> posList = posCounts.entrySet().stream()
+                    .map(entry -> new Token(entry.getKey(), String.valueOf(entry.getValue()), "")) // Lemma remains empty
+                    .collect(Collectors.toList());
+
+            System.out.println("DEBUG: Sending POS List to NLP - " + posList);
+
+            speech.getNlp().setPosList((List) posList);
+
+        } else {
+            System.out.println("DEBUG: POS List is EMPTY");
+            speech.getNlp().setPosList((List) new ArrayList<Token>()); // Ensure it's never null
+        }
+
+        // TODO: Token wird momentan etwas komisch abgespeichert, da im Attribut text die POS art steht, und in pos die Anzahl dieser POS arten. Umstrukturieren damit keine Verwirrung herrscht
+
+
         ctx.render("speech.ftl", attributes);
     }
 
diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/util/PPRUtils.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/util/PPRUtils.java
index bd32967..36eed86 100644
--- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/util/PPRUtils.java
+++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/util/PPRUtils.java
@@ -23,6 +23,8 @@ import java.io.*;
 import java.net.HttpURLConnection;
 import java.net.URL;
 import java.util.*;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 import java.util.stream.Collectors;
 import java.util.zip.ZipEntry;
 import java.util.zip.ZipInputStream;
@@ -423,4 +425,61 @@ public abstract class PPRUtils {
         return fileNames;
     }
 
+    public static Set<org.w3c.dom.Document> checkAndProcessNewProtocols(MongoDBHandler mongoDBHandler) {
+        Set<org.w3c.dom.Document> newProtocols = new HashSet<>();
+        int offset = 0;
+        int limit = 10;
+        boolean hasMore = true;
+        Pattern sessionPattern = Pattern.compile("Plenarprotokoll der (\\d+)\\. Sitzung");
+
+        while (hasMore) {
+            String queryUrl = "https://www.bundestag.de/ajax/filterlist/de/services/opendata/866354-866354?limit="
+                    + limit + "&noFilterSet=true&offset=" + offset;
+            try {
+                org.jsoup.nodes.Document htmlDoc = Jsoup.connect(queryUrl).get();
+                Elements sessionLinks = htmlDoc.select("a.bt-link-dokument");
+                if (sessionLinks.isEmpty()) break;
+
+                for (org.jsoup.nodes.Element link : sessionLinks) {
+                    String xmlUrl = link.attr("href");
+                    String fileName = xmlUrl.substring(xmlUrl.lastIndexOf('/') + 1);  // "20212.xml"
+                    // Entferne die Dateiendung
+                    String sessionNumberFull = fileName.replace(".xml", ""); // z.B. "20212"
+                    String sessionNumber;
+                    if (sessionNumberFull.startsWith("20") && sessionNumberFull.length() > 2) {
+                        sessionNumber = sessionNumberFull.substring(2);
+                    } else {
+                        sessionNumber = sessionNumberFull;
+                    }
+                        if (!mongoDBHandler.sessionExists(sessionNumber)) {
+                            try {
+                                org.w3c.dom.Document xmlDoc = downloadAndParseXML(xmlUrl);
+                                newProtocols.add(xmlDoc);
+                            } catch (Exception ex) {
+                                Logger.error("Error processing XML for session " + sessionNumber + ": " + ex.getMessage());
+                            }
+                        }
+
+                }
+
+                org.jsoup.nodes.Element metaSlider = htmlDoc.selectFirst("div.meta-slider");
+                if (metaSlider != null && metaSlider.hasAttr("data-nextoffset")) {
+                    int nextOffset = Integer.parseInt(metaSlider.attr("data-nextoffset"));
+                    if (nextOffset <= offset) {
+                        hasMore = false;
+                    } else {
+                        offset = nextOffset;
+                    }
+                } else {
+                    hasMore = false;
+                }
+            } catch (IOException e) {
+                Logger.error("Error loading page: " + queryUrl + " : " + e.getMessage());
+                break;
+            }
+        }
+        return newProtocols;
+    }
+
+
 }
diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/SpeechParser.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/SpeechParser.java
index c4c1ef6..1750a36 100644
--- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/SpeechParser.java
+++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/SpeechParser.java
@@ -40,7 +40,6 @@ public class SpeechParser {
     }
 
     public List<Session> parseAllSessions() {
-        List<Session> sessionsEmpty = new ArrayList<>();
         List<Session> sessions = new ArrayList<>();
         this.speeches = new ArrayList<>();
         this.agendaItems = new ArrayList<>();
@@ -61,6 +60,26 @@ public class SpeechParser {
 
     }
 
+    public List<Session> parseAllSessions(Set<Document> xmlDocuments) {
+        List<Session> sessions = new ArrayList<>();
+        this.speeches = new ArrayList<>();
+        this.agendaItems = new ArrayList<>();
+        Logger.info("All new sessions parsed");
+        for (org.w3c.dom.Document xmlDoc : xmlDocuments) {
+            try {
+                File tempFile = convertDocumentToFile(xmlDoc);
+                Session session = parseSessionFile(tempFile);
+                sessions.add(session);
+                tempFile.delete();  // Lösche die temporäre Datei nach der Verarbeitung
+            } catch (Exception e) {
+                Logger.error("Error parsing XML document.");
+                Logger.error(e.getMessage());
+            }
+        }
+        return sessions;
+
+    }
+
     private Session parseSessionFile(File file) throws Exception {
         //file = removeDoctypeAnnotation(file.getAbsolutePath());
 
diff --git a/src/main/resources/members_of_parliament_image_crawler.py b/src/main/resources/members_of_parliament_image_crawler.py
index 9bb2340..2759e4c 100644
--- a/src/main/resources/members_of_parliament_image_crawler.py
+++ b/src/main/resources/members_of_parliament_image_crawler.py
@@ -763,7 +763,7 @@ members = [
 ]
 
 # Base URL for querying (with placeholders for last name and first name)
-base_url = "https://bilddatenbank.bundestag.de/search/picture-result?query={0}%2C+{1}&filterQuery%5Bereignis%5D%5B%5D=Portr%C3%A4t%2FPortrait&sortVal=2"
+base_url = "https://bilddatenbank.bundestag.de/search/picture-result?query={0}+{1}&sortVal=2"
 #base_url = "https://bilddatenbank.bundestag.de/search/picture-result?filterQuery%5Bname%5D%5B%5D={0}l%2C+{1}&filterQuery%5Bereignis%5D%5B%5D=Portr%C3%A4t%2FPortrait&sortVal=2"
 
 def fetch_image(lastname, firstname):
diff --git a/src/main/resources/templates/parlamentarierDetails.ftl b/src/main/resources/templates/parlamentarierDetails.ftl
index bb8df72..63adf8d 100644
--- a/src/main/resources/templates/parlamentarierDetails.ftl
+++ b/src/main/resources/templates/parlamentarierDetails.ftl
@@ -17,10 +17,9 @@
         <h1>${p.vorname}  ${p.nachname} (${p.partei})</h1>
     </header>
 
-    <br>
     <br>
     <#if pic??>
-        <img style="max-width: 400px; height: auto;" src="data:image/jpeg;base64,${pic}" alt="Foto von ${p.vorname}  ${p.nachname} (${p.partei})" />
+         <img style="max-width: 400px; height: auto;" src="data:image/jpeg;base64,${pic}" alt="Foto von ${p.vorname}  ${p.nachname} (${p.partei})" />
     <#else>
         <h2>(kein Foto verfügbar)</h2>
     </#if>
diff --git a/src/main/resources/templates/speech.ftl b/src/main/resources/templates/speech.ftl
index a0ef5a3..7a40243 100644
--- a/src/main/resources/templates/speech.ftl
+++ b/src/main/resources/templates/speech.ftl
@@ -23,11 +23,19 @@
 
     <h2>Rede ${s.speechKey} </h2>
 
-    <main>
-        <#list s.content as c>
-            <#include "speechContent.ftl">
-        </#list>
-    </main>
+    <#list s.content as c>
+        <#include "speechContent.ftl">
+    </#list>
+
+    <br><br>
+    <#if s.nlp??>
+         <h2>NLP Information</h2>
+         <#assign nlp = "${s.nlp}">
+         <#include "nlp.ftl">
+    <#else>
+        <h2>Keine NLP Information verfügbar für diese Rede</h2>
+    </#if>
+    <br> <br>
 
 
 </body>
diff --git a/src/main/resources/templates/topicsBubbleChart.ftl b/src/main/resources/templates/topicsBubbleChart.ftl
index 4dba392..bef23fe 100644
--- a/src/main/resources/templates/topicsBubbleChart.ftl
+++ b/src/main/resources/templates/topicsBubbleChart.ftl
@@ -48,4 +48,3 @@
         });
 
 </script>
-