Improved loading speech, work in progress, hotfixed RadarChart issue
This commit is contained in:
parent
76a12e5a3d
commit
861e14b64d
6 changed files with 136 additions and 34 deletions
|
@ -107,6 +107,9 @@ public class Main {
|
|||
Logger.pink("Adding Speeches to DB...");
|
||||
mongoDBHandler.insertSpeeches(speechIndex.getSpeeches());
|
||||
|
||||
Logger.pink("Building Metadata...");
|
||||
MongoPprUtils.rebuildMetadata();
|
||||
|
||||
// only upload member photos if database was empty by default, not when speeches are force-overwritten
|
||||
if (!FORCE_UPLOAD_SPEECHES) {
|
||||
Logger.pink("Uploading Member Photos to DB...");
|
||||
|
|
|
@ -417,6 +417,12 @@ public class MongoDBHandler {
|
|||
return doc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Liefert ein Feldwert aks Double, auch wenn er in der Datenbank als Integer oder String steht
|
||||
* @param doc Mongo-Dokument
|
||||
* @param fieldName Feldname
|
||||
* @return Double
|
||||
*/
|
||||
public static Double getFieldAsDouble(Document doc, String fieldName) {
|
||||
Object obj = doc.get(fieldName);
|
||||
if (obj instanceof Double) return (Double) obj;
|
||||
|
|
|
@ -1,9 +1,6 @@
|
|||
package org.texttechnologylab.project.gruppe_05_1.database;
|
||||
|
||||
import com.mongodb.client.FindIterable;
|
||||
import com.mongodb.client.MongoCollection;
|
||||
import com.mongodb.client.MongoCursor;
|
||||
import com.mongodb.client.MongoDatabase;
|
||||
import com.mongodb.client.*;
|
||||
import com.mongodb.client.model.Accumulators;
|
||||
import com.mongodb.client.model.Aggregates;
|
||||
import com.mongodb.client.model.Filters;
|
||||
|
@ -648,6 +645,11 @@ public class MongoPprUtils {
|
|||
} else return doc.getString("base64");
|
||||
}
|
||||
|
||||
// - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||||
|
||||
// Metadata
|
||||
|
||||
|
||||
/**
|
||||
* Aktualisiert (or erzeugt, falls nicht bereits vorhanden) diverse Metadaten:
|
||||
* - Die Liste der Parteien/Fraktionen, wie sie im Speaker-Collection stehen
|
||||
|
@ -661,30 +663,6 @@ public class MongoPprUtils {
|
|||
List<String> distinctPartiesOfSpeakers = getSpeakerCollection().distinct("party", String.class).into(new java.util.ArrayList<>());
|
||||
List<String> distinctPartiesFromSpeeches = getSpeechCollection().distinct("fraction", String.class).into(new java.util.ArrayList<>());
|
||||
|
||||
Logger.info("Collecting Topics Information");
|
||||
Set<String> topics = new HashSet<>();
|
||||
|
||||
// Aggregation pipeline
|
||||
List<Bson> pipeline = List.of(
|
||||
Aggregates.unwind("$analysisResults.topics"), // Unwind the "topics" array
|
||||
Aggregates.project(Projections.fields(Projections.include("analysisResults.topics.topic"))), // Project only the "topic" field
|
||||
Aggregates.group(null, Accumulators.addToSet("distinctTopics", "$analysisResults.topics.topic")) // Group to get distinct values
|
||||
);
|
||||
List<String> topicsList = null;
|
||||
List<Document> results = getSpeechCollection().aggregate(pipeline).into(new java.util.ArrayList<>());
|
||||
// Extract and print all distinct "topic" values
|
||||
if (!results.isEmpty()) {
|
||||
Document result = results.get(0); // Get the first (and only) document
|
||||
List<String> distinctTopics = result.getList("distinctTopics", String.class);
|
||||
topicsList = distinctTopics;
|
||||
for (String topic : distinctTopics) {
|
||||
System.out.println(topic);
|
||||
}
|
||||
} else {
|
||||
System.out.println("No topics found.");
|
||||
}
|
||||
|
||||
|
||||
Logger.info("Updating Metadata Collection: begin");
|
||||
|
||||
MongoDBHandler.createCollection(db, METADATA_COLLECTION_NAME);
|
||||
|
@ -701,11 +679,126 @@ public class MongoPprUtils {
|
|||
metadataCollection.replaceOne(filterPartiesOfSpeakers, partiesDocOfSpeakers, new com.mongodb.client.model.ReplaceOptions().upsert(true));
|
||||
|
||||
|
||||
Logger.info("Enriching Speech Information: begin");
|
||||
enrichSpeechDocuments();
|
||||
Logger.info("Enriching Speech Information: end");
|
||||
|
||||
Logger.info("Collecting Topics Information");
|
||||
|
||||
Document unwindStage = new Document("$unwind", "$topics"); // Deconstruct the "topics" array
|
||||
Document groupStage = new Document("$group", new Document("_id", "$topics")); // Group by "topics"
|
||||
Document projectStage = new Document("$project", new Document("topic", "$_id").append("_id", 0)); // Optionally format the result
|
||||
|
||||
// Execute the aggregation
|
||||
AggregateIterable<Document> result = getSpeechCollection().aggregate(Arrays.asList(unwindStage, groupStage, projectStage));
|
||||
Set<String> topics = new HashSet<>();
|
||||
for (Document doc : result) {
|
||||
topics.add(doc.getString("topic"));
|
||||
}
|
||||
|
||||
Document filterTopics = new Document("type", "topics");
|
||||
Document topicsDoc = MongoDBHandler.createDocument(false, Map.of("type", "topics",
|
||||
"value", topicsList));
|
||||
"value", topics));
|
||||
metadataCollection.replaceOne(filterTopics, topicsDoc, new com.mongodb.client.model.ReplaceOptions().upsert(true));
|
||||
|
||||
Logger.info("Updating Metadata Collection: end");
|
||||
}
|
||||
|
||||
public static List<String> getAllPartiesOfSpeakers() {
|
||||
Document doc = MongoDBHandler.findFirstDocumentInCollection(getMetadataCollection(), "type", "parties_of_speakers");
|
||||
if (doc == null) {return new ArrayList<>();}
|
||||
else {
|
||||
return new ArrayList<>(doc.getList("value", String.class));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public static final List<String> ALL_PARTIES_FROM_SPEECHES = Arrays.asList(
|
||||
"Afd", "BSW", "GRÜNEN", "CDU/CSU", "LINKE", "FDP",
|
||||
"Fraktionslos" /* auch als "fraktionslos" vorhanden!*/,
|
||||
"SPD",
|
||||
"keine" /* entspricht den null Wert */
|
||||
);
|
||||
|
||||
/**
|
||||
* Liefert die Liste aller Parteien/Fraktionen, welche in der Liste der Reden stehen, zurück.
|
||||
* Diese Liste dient zur Filterung der Reden auf der entsprechenden Seite.
|
||||
* Da die Datenqualität dieses Feldes extrem schlecht ist, muss man hier etwas tricksen:
|
||||
* - Für Bündnis 90 / Die Grünen sind 5 unterschiedlichen Schreibweisen vorhanden
|
||||
* - Für Die Linke sind ebenfalls 5 unterschiedlichen Schreibweisen vorhanden
|
||||
* - Wegen der unterschiedlichen Schreibweisen muss man für das Frontend mit Pattern Matching arbeiten
|
||||
* - Bei 6 Reden steht "SPDCDU/CSU". Diese Reden werden dann bei der Filterung nicht berücksichtigt
|
||||
* - Für 3561 der 25387 Reden wurde keine Partei/Fraktion eingetragen. Diese Zahl ist zu hoch, um sie einfach zu ignorieren, daher der Eintrag "keine"
|
||||
* - Beide Schreibweise "Fraktionslos" (166 Reden) und "fraktionslos" (311 Reden) sind vorhanden
|
||||
* @return List<String> Liste aller Parteien/Fraktionen, welche in der Liste der Reden stehen
|
||||
*/
|
||||
public static List<String> getAllPartiesFromSpeeches() {
|
||||
return ALL_PARTIES_FROM_SPEECHES;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reichere die Rede-Dokumente um Informationen an:
|
||||
* - Datum und Uhrzeit der Rede (als DateTime und textuell): dateTimeString , dateTime
|
||||
* - Agenda-Titel: agendaTitel
|
||||
* - Die Topics der Rede aus der NLP-Analyse
|
||||
*/
|
||||
public static void enrichSpeechDocuments() {
|
||||
|
||||
MongoCollection<Document> collection = getSpeechCollection();
|
||||
FindIterable<Document> documents = collection.find();
|
||||
|
||||
for (Document doc : documents) {
|
||||
// Enrich with Info from Session & Agenda, which is always available
|
||||
if ( ! doc.containsKey("dateTime")) {
|
||||
int sessionId = doc.getInteger("sessionId");
|
||||
int agendaItemId = doc.getInteger("agendaItemId");
|
||||
String agendaTitel = getAgendaTitle(sessionId, agendaItemId);
|
||||
LocalDateTime dateTime = null;
|
||||
String dateTimeString = getSessionDateTime(sessionId);
|
||||
if (dateTimeString != null) {
|
||||
for (String format : Arrays.asList("dd.MM.yyyy HH:mm",
|
||||
"dd.MM.yyyy H:mm",
|
||||
"dd.MM.yyyy HH.mm",
|
||||
"dd.MM.yyyy H.mm")) {
|
||||
dateTime = GeneralUtils.parseDateTime(dateTimeString,format);
|
||||
if (dateTime != null) break;
|
||||
}
|
||||
if (dateTime == null) {Logger.error(dateTimeString + " could not be parsed");}
|
||||
}
|
||||
|
||||
Document updateFieldsFromSession = new Document()
|
||||
.append("dateTime", dateTime)
|
||||
.append("dateTimeString", dateTimeString)
|
||||
.append("agendaTitel", agendaTitel);
|
||||
|
||||
collection.updateOne(
|
||||
new Document("_id", doc.get("_id")),
|
||||
new Document("$set", updateFieldsFromSession)
|
||||
);
|
||||
}
|
||||
|
||||
// Enrich with NLP Info which is only available after running the analysis
|
||||
if (( ! doc.containsKey("topics"))
|
||||
&& (doc.containsKey("analysisResults"))) {
|
||||
Document nlpDoc = (Document) doc.get("analysisResults");
|
||||
if (nlpDoc.containsKey("topics")) {
|
||||
Set<String> topics = new HashSet<>();
|
||||
List<Document> topicsDocs = nlpDoc.getList("topics", Document.class);
|
||||
for (Document topicDoc : topicsDocs) {
|
||||
topics.add(topicDoc.getString("topic"));
|
||||
}
|
||||
|
||||
Document updateFieldsFromTopics = new Document()
|
||||
.append("topics", topics);
|
||||
|
||||
collection.updateOne(
|
||||
new Document("_id", doc.get("_id")),
|
||||
new Document("$set", updateFieldsFromTopics)
|
||||
);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -63,7 +63,7 @@ public class FrontEndController {
|
|||
|
||||
Map<String, Object> attributes = new HashMap<>();
|
||||
attributes.put("parlamentarier", parlamentarier);
|
||||
attributes.put("parties", listFractionsFromMembers(MongoPprUtils.getAllParlamentarier("")));
|
||||
attributes.put("parties", MongoPprUtils.getAllPartiesOfSpeakers());
|
||||
ctx.render("parlamentarier.ftl", attributes);
|
||||
}
|
||||
|
||||
|
|
|
@ -227,7 +227,7 @@ public class SpeechController {
|
|||
attributes.put("filter", filter == null || filter.isBlank() ? null : filter);
|
||||
|
||||
// Filtern nach Partei/Fraktion
|
||||
attributes.put("parties", listFractionsFromMembers(MongoPprUtils.getAllParlamentarier("")));
|
||||
attributes.put("parties", MongoPprUtils.getAllPartiesFromSpeeches());
|
||||
|
||||
// Filtern nach Topics - TODO
|
||||
List<String> topics = Arrays.asList("International", "Government", "Labor", "Economy", "Public");
|
||||
|
|
|
@ -5,9 +5,9 @@
|
|||
<#if sentiments?? && sentiments?size gt 0>
|
||||
<#list sentiments as sentiment>
|
||||
sentimentData.push({
|
||||
pos: ${sentiment.positive?number},
|
||||
neu: ${sentiment.neutral?number},
|
||||
neg: ${sentiment.negative?number}
|
||||
pos: ${sentiment.positive?string?replace(',', '.')},
|
||||
neu: ${sentiment.neutral?string?replace(',', '.')},
|
||||
neg: ${sentiment.negative?string?replace(',', '.')},
|
||||
});
|
||||
</#list>
|
||||
<#else>
|
||||
|
|
Before Width: | Height: | Size: 3.7 KiB After Width: | Height: | Size: 3.8 KiB |
Loading…
Add table
Add a link
Reference in a new issue