diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/Main.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/Main.java index 801e080..85aeddb 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/Main.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/Main.java @@ -28,6 +28,8 @@ public class Main { public static boolean FORCE_UPLOAD_MEMBERS; public static boolean FORCE_UPLOAD_SPEECHES; public static boolean ONLY_RUN_WEB; + + public static boolean REBUILD_METADATA; public static boolean DEBUG_LOGGING; private static final FileObjectFactory xmlFactory = FileObjectFactory.getFactory(); private static final MongoObjectFactory mongoFactory = MongoObjectFactory.getFactory(); @@ -43,6 +45,7 @@ public class Main { FORCE_UPLOAD_MEMBERS = Arrays.asList(args).contains("forceUploadMembers"); FORCE_UPLOAD_SPEECHES = Arrays.asList(args).contains("forceUploadSpeeches"); ONLY_RUN_WEB = Arrays.asList(args).contains("onlyRunWeb"); + REBUILD_METADATA = Arrays.asList(args).contains("rebuildMetadata"); DEBUG_LOGGING = Arrays.asList(args).contains("debugLogging"); System.out.println("Starting Multimodal Parliament Explorer..."); @@ -52,6 +55,7 @@ public class Main { System.out.println(" - Force Upload Members: " + FORCE_UPLOAD_MEMBERS); System.out.println(" - Force Upload Speeches: " + FORCE_UPLOAD_SPEECHES); System.out.println(" - Only Run javalin Web Server: " + ONLY_RUN_WEB); + System.out.println(" - Rebuild Metadata: " + REBUILD_METADATA); System.out.println(" - Debug Logging: " + DEBUG_LOGGING); System.out.println("--------------------------------------------o"); @@ -64,6 +68,12 @@ public class Main { MongoDBHandler mongoDBHandler = new MongoDBHandler(); + if (REBUILD_METADATA) { + Logger.info("Rebuilding Metadata..."); + MongoPprUtils.rebuildMetadata(); + System.exit(0); + } + SpeechIndexFactoryImpl speechIndexFactory = new SpeechIndexFactoryImpl(); if ((mongoDBHandler.getDatabase().getCollection(MongoPprUtils.SPEECH_COLLECTION_NAME).countDocuments() != 0) && !FORCE_UPLOAD_SPEECHES) { Logger.info("Skipping Speech parsing and DB insertion as they are already present..."); diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/MongoPprUtils.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/MongoPprUtils.java index 1f92857..73c6d90 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/MongoPprUtils.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/MongoPprUtils.java @@ -3,6 +3,9 @@ package org.texttechnologylab.project.gruppe_05_1.database; import com.mongodb.client.FindIterable; import com.mongodb.client.MongoCollection; import com.mongodb.client.MongoCursor; +import com.mongodb.client.MongoDatabase; +import com.mongodb.client.model.Accumulators; +import com.mongodb.client.model.Aggregates; import com.mongodb.client.model.Filters; import com.mongodb.client.model.Projections; import io.javalin.http.Context; @@ -45,6 +48,7 @@ public class MongoPprUtils { public static final String HISTORY_COLLECTION_NAME = "history"; public static final String PICTURES_COLLECTION_NAME = "pictures"; public static final String COMMENT_COLLECTION_NAME = "comment"; + public static final String METADATA_COLLECTION_NAME = "metadata"; private static MongoCollection speakerCollection = null; private static MongoCollection speechCollection = null; @@ -52,6 +56,7 @@ public class MongoPprUtils { private static MongoCollection agendaItemsCollection = null; private static MongoCollection picturesCollection = null; private static MongoCollection commentCollection = null; + private static MongoCollection metadataCollection = null; public static MongoCollection getSpeakerCollection() { if (speakerCollection == null) speakerCollection = MongoDBHandler.getMongoDatabase().getCollection(SPEAKER_COLLECTION_NAME); @@ -78,6 +83,11 @@ public class MongoPprUtils { return picturesCollection; } + public static MongoCollection getMetadataCollection() { + if (metadataCollection == null) metadataCollection = MongoDBHandler.getMongoDatabase().getCollection(METADATA_COLLECTION_NAME); + return metadataCollection; + } + /** * Create the Speaker Collection and useful indices for it */ @@ -626,10 +636,76 @@ public class MongoPprUtils { // getMemberPhoto + /** + * Liefert das Bild eines Abgeordneten zurück + * @param id + * @return Base64-encoded Photo + */ public static String getMemberPhoto(String id) { Document doc = MongoDBHandler.findFirstDocumentInCollection(getPicturesCollection(), "memberId", id); if (doc == null) { return null; } else return doc.getString("base64"); } + + /** + * Aktualisiert (or erzeugt, falls nicht bereits vorhanden) diverse Metadaten: + * - Die Liste der Parteien/Fraktionen, wie sie im Speaker-Collection stehen + * - Die Liste der Parteien/Fraktionen, wie sie im Speech-Collection stehen (diese Listen sind recht unterschiedlich) + * - Topics nach NLP-Analyse der Reden + */ + public static void rebuildMetadata() { + MongoDatabase db = MongoDBHandler.getMongoDatabase(); + + Logger.info("Collecting Partei/Fraktion Information"); + List distinctPartiesOfSpeakers = getSpeakerCollection().distinct("party", String.class).into(new java.util.ArrayList<>()); + List distinctPartiesFromSpeeches = getSpeechCollection().distinct("fraction", String.class).into(new java.util.ArrayList<>()); + + Logger.info("Collecting Topics Information"); + Set topics = new HashSet<>(); + + // Aggregation pipeline + List pipeline = List.of( + Aggregates.unwind("$analysisResults.topics"), // Unwind the "topics" array + Aggregates.project(Projections.fields(Projections.include("analysisResults.topics.topic"))), // Project only the "topic" field + Aggregates.group(null, Accumulators.addToSet("distinctTopics", "$analysisResults.topics.topic")) // Group to get distinct values + ); + List topicsList = null; + List results = getSpeechCollection().aggregate(pipeline).into(new java.util.ArrayList<>()); + // Extract and print all distinct "topic" values + if (!results.isEmpty()) { + Document result = results.get(0); // Get the first (and only) document + List distinctTopics = result.getList("distinctTopics", String.class); + topicsList = distinctTopics; + for (String topic : distinctTopics) { + System.out.println(topic); + } + } else { + System.out.println("No topics found."); + } + + + Logger.info("Updating Metadata Collection: begin"); + + MongoDBHandler.createCollection(db, METADATA_COLLECTION_NAME); + MongoCollection metadataCollection = getMetadataCollection(); + + Document filterPartiesFromSpeeches = new Document("type", "parties_from_speeches"); + Document partiesDocFromSpeeches = MongoDBHandler.createDocument(false, Map.of("type", "parties_from_speeches", + "value", distinctPartiesFromSpeeches)); + metadataCollection.replaceOne(filterPartiesFromSpeeches, partiesDocFromSpeeches, new com.mongodb.client.model.ReplaceOptions().upsert(true)); + + Document filterPartiesOfSpeakers = new Document("type", "parties_of_speakers"); + Document partiesDocOfSpeakers = MongoDBHandler.createDocument(false, Map.of("type", "parties_of_speakers", + "value", distinctPartiesOfSpeakers)); + metadataCollection.replaceOne(filterPartiesOfSpeakers, partiesDocOfSpeakers, new com.mongodb.client.model.ReplaceOptions().upsert(true)); + + + Document filterTopics = new Document("type", "topics"); + Document topicsDoc = MongoDBHandler.createDocument(false, Map.of("type", "topics", + "value", topicsList)); + metadataCollection.replaceOne(filterTopics, topicsDoc, new com.mongodb.client.model.ReplaceOptions().upsert(true)); + + Logger.info("Updating Metadata Collection: end"); + } } diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Sentiment.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Sentiment.java index ab9398f..3894a72 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Sentiment.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Sentiment.java @@ -113,9 +113,9 @@ public class Sentiment { doc.getInteger("begin"), doc.getInteger("end"), MongoDBHandler.getFieldAsDouble(doc, "score"), - MongoDBHandler.getFieldAsDouble(doc, "pos"), + MongoDBHandler.getFieldAsDouble(doc, "neg"), MongoDBHandler.getFieldAsDouble(doc, "neu"), - MongoDBHandler.getFieldAsDouble(doc, "neg") + MongoDBHandler.getFieldAsDouble(doc, "pos") )); } return sentiments; diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/rest/SpeechController.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/rest/SpeechController.java index f7f5f85..85e5d19 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/rest/SpeechController.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/rest/SpeechController.java @@ -158,8 +158,9 @@ public class SpeechController { // Der erste Sentiment gilt der gesamten Rede. Die weitere Sentiments entsprechen die Sätze. List sentiments = speech.getNlp().getSentiments(); if ((sentiments != null) && ! sentiments.isEmpty()) { - Sentiment overallSentiment = sentiments.get(0); - attributes.put("overallSentiment", overallSentiment); + List overallSentiments = new ArrayList<>(sentiments); + attributes.put("overallSentiments", overallSentiments); + sentiments.remove(0); // Sentiment-Icon diff --git a/src/main/resources/templates/nlp.ftl b/src/main/resources/templates/nlp.ftl index 7e9bf9e..a7f9f83 100644 --- a/src/main/resources/templates/nlp.ftl +++ b/src/main/resources/templates/nlp.ftl @@ -20,8 +20,9 @@
- <#if overallSentiment??> + <#if overallSentiments??>

Sentiments Information (als Radar Chart)

+ <#assign sentiments = overallSentiments> <#include "sentimentsRadarChart.ftl"> <#else>

Keine Sentiments Information für diese Rede verfügbar

diff --git a/src/main/resources/templates/posBarChart.ftl b/src/main/resources/templates/posBarChart.ftl index 08b2972..4f0fb8f 100644 --- a/src/main/resources/templates/posBarChart.ftl +++ b/src/main/resources/templates/posBarChart.ftl @@ -1,12 +1,10 @@ + +