From 861e14b64db71e588db1ef816c4a447a1319abce Mon Sep 17 00:00:00 2001
From: vysitor <bac.baconator@gmail.com>
Date: Sat, 22 Mar 2025 17:20:09 +0100
Subject: [PATCH] Improved loading speech, work in progress, hotfixed
 RadarChart issue

---
 .../project/gruppe_05_1/Main.java             |   3 +
 .../gruppe_05_1/database/MongoDBHandler.java  |   6 +
 .../gruppe_05_1/database/MongoPprUtils.java   | 151 ++++++++++++++----
 .../gruppe_05_1/rest/FrontEndController.java  |   2 +-
 .../gruppe_05_1/rest/SpeechController.java    |   2 +-
 .../templates/sentimentsRadarChart.ftl        |   6 +-
 6 files changed, 136 insertions(+), 34 deletions(-)
diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/Main.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/Main.java
index 85aeddb..098d601 100644
--- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/Main.java
+++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/Main.java
@@ -107,6 +107,9 @@ public class Main {
             Logger.pink("Adding Speeches to DB...");
             mongoDBHandler.insertSpeeches(speechIndex.getSpeeches());
 
+            Logger.pink("Building Metadata...");
+            MongoPprUtils.rebuildMetadata();
+
             // only upload member photos if database was empty by default, not when speeches are force-overwritten
             if (!FORCE_UPLOAD_SPEECHES) {
                 Logger.pink("Uploading Member Photos to DB...");
diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/MongoDBHandler.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/MongoDBHandler.java
index cde1b92..fc263d4 100644
--- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/MongoDBHandler.java
+++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/MongoDBHandler.java
@@ -417,6 +417,12 @@ public class MongoDBHandler {
         return doc;
     }
 
+    /**
+     * Liefert ein Feldwert aks Double, auch wenn er in der Datenbank als Integer oder String steht
+     * @param doc Mongo-Dokument
+     * @param fieldName Feldname
+     * @return Double
+     */
     public static Double getFieldAsDouble(Document doc, String fieldName) {
         Object obj = doc.get(fieldName);
         if (obj instanceof Double) return (Double)  obj;
diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/MongoPprUtils.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/MongoPprUtils.java
index 73c6d90..cfd5679 100644
--- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/MongoPprUtils.java
+++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/MongoPprUtils.java
@@ -1,9 +1,6 @@
 package org.texttechnologylab.project.gruppe_05_1.database;
 
-import com.mongodb.client.FindIterable;
-import com.mongodb.client.MongoCollection;
-import com.mongodb.client.MongoCursor;
-import com.mongodb.client.MongoDatabase;
+import com.mongodb.client.*;
 import com.mongodb.client.model.Accumulators;
 import com.mongodb.client.model.Aggregates;
 import com.mongodb.client.model.Filters;
@@ -648,6 +645,11 @@ public class MongoPprUtils {
         } else return doc.getString("base64");
     }
 
+    // - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+    // Metadata
+
+
     /**
      * Aktualisiert (or erzeugt, falls nicht bereits vorhanden) diverse Metadaten:
      * - Die Liste der Parteien/Fraktionen, wie sie im Speaker-Collection stehen
@@ -661,30 +663,6 @@ public class MongoPprUtils {
         List<String> distinctPartiesOfSpeakers = getSpeakerCollection().distinct("party", String.class).into(new java.util.ArrayList<>());
         List<String> distinctPartiesFromSpeeches = getSpeechCollection().distinct("fraction", String.class).into(new java.util.ArrayList<>());
 
-        Logger.info("Collecting Topics Information");
-        Set<String> topics = new HashSet<>();
-
-        // Aggregation pipeline
-        List<Bson> pipeline = List.of(
-                Aggregates.unwind("$analysisResults.topics"),  // Unwind the "topics" array
-                Aggregates.project(Projections.fields(Projections.include("analysisResults.topics.topic"))),  // Project only the "topic" field
-                Aggregates.group(null, Accumulators.addToSet("distinctTopics", "$analysisResults.topics.topic"))  // Group to get distinct values
-        );
-        List<String> topicsList = null;
-        List<Document> results = getSpeechCollection().aggregate(pipeline).into(new java.util.ArrayList<>());
-        // Extract and print all distinct "topic" values
-        if (!results.isEmpty()) {
-            Document result = results.get(0); // Get the first (and only) document
-            List<String> distinctTopics = result.getList("distinctTopics", String.class);
-            topicsList = distinctTopics;
-            for (String topic : distinctTopics) {
-                System.out.println(topic);
-            }
-        } else {
-            System.out.println("No topics found.");
-        }
-
-
         Logger.info("Updating Metadata Collection: begin");
 
         MongoDBHandler.createCollection(db, METADATA_COLLECTION_NAME);
@@ -701,11 +679,126 @@ public class MongoPprUtils {
         metadataCollection.replaceOne(filterPartiesOfSpeakers, partiesDocOfSpeakers, new com.mongodb.client.model.ReplaceOptions().upsert(true));
 
 
+        Logger.info("Enriching Speech Information: begin");
+        enrichSpeechDocuments();
+        Logger.info("Enriching Speech Information: end");
+
+        Logger.info("Collecting Topics Information");
+
+        Document unwindStage = new Document("$unwind", "$topics"); // Deconstruct the "topics" array
+        Document groupStage = new Document("$group", new Document("_id", "$topics")); // Group by "topics"
+        Document projectStage = new Document("$project", new Document("topic", "$_id").append("_id", 0)); // Optionally format the result
+
+        // Execute the aggregation
+        AggregateIterable<Document> result = getSpeechCollection().aggregate(Arrays.asList(unwindStage, groupStage, projectStage));
+        Set<String> topics = new HashSet<>();
+        for (Document doc : result) {
+            topics.add(doc.getString("topic"));
+        }
+
         Document filterTopics = new Document("type", "topics");
         Document topicsDoc = MongoDBHandler.createDocument(false, Map.of("type", "topics",
-                "value", topicsList));
+                "value", topics));
         metadataCollection.replaceOne(filterTopics, topicsDoc, new com.mongodb.client.model.ReplaceOptions().upsert(true));
 
         Logger.info("Updating Metadata Collection: end");
     }
+
+    public static List<String> getAllPartiesOfSpeakers() {
+        Document doc = MongoDBHandler.findFirstDocumentInCollection(getMetadataCollection(), "type", "parties_of_speakers");
+        if (doc == null) {return new ArrayList<>();}
+        else {
+            return new ArrayList<>(doc.getList("value", String.class));
+        }
+    }
+
+
+    public static final List<String> ALL_PARTIES_FROM_SPEECHES = Arrays.asList(
+            "Afd", "BSW", "GRÜNEN", "CDU/CSU", "LINKE", "FDP",
+            "Fraktionslos" /* auch als "fraktionslos" vorhanden!*/,
+            "SPD",
+            "keine" /* entspricht den null Wert */
+    );
+
+    /**
+     * Liefert die Liste aller Parteien/Fraktionen, welche in der Liste der Reden stehen, zurück.
+     * Diese Liste dient zur Filterung der Reden auf der entsprechenden Seite.
+     * Da die Datenqualität dieses Feldes extrem schlecht ist, muss man hier etwas tricksen:
+     * - Für Bündnis 90 / Die Grünen sind 5 unterschiedlichen Schreibweisen vorhanden
+     * - Für Die Linke sind ebenfalls 5 unterschiedlichen Schreibweisen vorhanden
+     * - Wegen der unterschiedlichen Schreibweisen muss man für das Frontend mit Pattern Matching arbeiten
+     * - Bei 6 Reden steht "SPDCDU/CSU". Diese Reden werden dann bei der Filterung nicht berücksichtigt
+     * - Für 3561 der 25387 Reden wurde keine Partei/Fraktion eingetragen. Diese Zahl ist zu hoch, um sie einfach zu ignorieren, daher der Eintrag "keine"
+     * - Beide Schreibweise "Fraktionslos" (166 Reden) und "fraktionslos" (311 Reden) sind vorhanden
+     * @return List<String> Liste aller Parteien/Fraktionen, welche in der Liste der Reden stehen
+     */
+    public static List<String> getAllPartiesFromSpeeches() {
+        return ALL_PARTIES_FROM_SPEECHES;
+    }
+
+    /**
+     * Reichere die Rede-Dokumente um Informationen an:
+     * - Datum und Uhrzeit der Rede (als DateTime und textuell):    dateTimeString , dateTime
+     * - Agenda-Titel: agendaTitel
+     * - Die Topics der Rede aus der NLP-Analyse
+     */
+    public static void enrichSpeechDocuments() {
+
+        MongoCollection<Document> collection = getSpeechCollection();
+        FindIterable<Document> documents = collection.find();
+
+        for (Document doc : documents) {
+            // Enrich with Info from Session & Agenda, which is always available
+            if ( ! doc.containsKey("dateTime")) {
+                int sessionId = doc.getInteger("sessionId");
+                int agendaItemId = doc.getInteger("agendaItemId");
+                String agendaTitel = getAgendaTitle(sessionId, agendaItemId);
+                LocalDateTime dateTime = null;
+                String dateTimeString = getSessionDateTime(sessionId);
+                if (dateTimeString != null) {
+                    for (String format : Arrays.asList("dd.MM.yyyy HH:mm",
+                            "dd.MM.yyyy H:mm",
+                            "dd.MM.yyyy HH.mm",
+                            "dd.MM.yyyy H.mm")) {
+                        dateTime = GeneralUtils.parseDateTime(dateTimeString,format);
+                        if (dateTime != null) break;
+                    }
+                    if (dateTime == null) {Logger.error(dateTimeString + " could not be parsed");}
+                }
+
+                Document updateFieldsFromSession = new Document()
+                        .append("dateTime", dateTime)
+                        .append("dateTimeString", dateTimeString)
+                        .append("agendaTitel", agendaTitel);
+
+                collection.updateOne(
+                        new Document("_id", doc.get("_id")),
+                        new Document("$set", updateFieldsFromSession)
+                );
+            }
+
+            // Enrich with NLP Info which is only available after running the analysis
+            if (( ! doc.containsKey("topics"))
+                    && (doc.containsKey("analysisResults"))) {
+                Document nlpDoc = (Document) doc.get("analysisResults");
+                if (nlpDoc.containsKey("topics")) {
+                    Set<String> topics = new HashSet<>();
+                    List<Document> topicsDocs = nlpDoc.getList("topics", Document.class);
+                    for (Document topicDoc : topicsDocs) {
+                        topics.add(topicDoc.getString("topic"));
+                    }
+
+                    Document updateFieldsFromTopics = new Document()
+                            .append("topics", topics);
+
+                    collection.updateOne(
+                            new Document("_id", doc.get("_id")),
+                            new Document("$set", updateFieldsFromTopics)
+                    );
+                }
+
+            }
+        }
+    }
+
 }
diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/rest/FrontEndController.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/rest/FrontEndController.java
index 359d8ad..c5f38df 100644
--- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/rest/FrontEndController.java
+++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/rest/FrontEndController.java
@@ -63,7 +63,7 @@ public class FrontEndController {
 
         Map<String, Object> attributes = new HashMap<>();
         attributes.put("parlamentarier", parlamentarier);
-        attributes.put("parties", listFractionsFromMembers(MongoPprUtils.getAllParlamentarier("")));
+        attributes.put("parties", MongoPprUtils.getAllPartiesOfSpeakers());
         ctx.render("parlamentarier.ftl", attributes);
     }
 
diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/rest/SpeechController.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/rest/SpeechController.java
index 85e5d19..d8bee69 100644
--- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/rest/SpeechController.java
+++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/rest/SpeechController.java
@@ -227,7 +227,7 @@ public class SpeechController {
         attributes.put("filter", filter == null || filter.isBlank() ? null : filter);
 
         // Filtern nach Partei/Fraktion
-        attributes.put("parties", listFractionsFromMembers(MongoPprUtils.getAllParlamentarier("")));
+        attributes.put("parties", MongoPprUtils.getAllPartiesFromSpeeches());
 
         // Filtern nach Topics - TODO
         List<String> topics = Arrays.asList("International", "Government", "Labor", "Economy", "Public");
diff --git a/src/main/resources/templates/sentimentsRadarChart.ftl b/src/main/resources/templates/sentimentsRadarChart.ftl
index 882e3a6..db70cba 100644
--- a/src/main/resources/templates/sentimentsRadarChart.ftl
+++ b/src/main/resources/templates/sentimentsRadarChart.ftl
@@ -5,9 +5,9 @@
     <#if sentiments?? && sentiments?size gt 0>
     <#list sentiments as sentiment>
     sentimentData.push({
-        pos: ${sentiment.positive?number},
-        neu: ${sentiment.neutral?number},
-        neg: ${sentiment.negative?number}
+        pos: ${sentiment.positive?string?replace(',', '.')},
+        neu: ${sentiment.neutral?string?replace(',', '.')},
+        neg: ${sentiment.negative?string?replace(',', '.')},
     });
     </#list>
     <#else>