From fcc064a616497bda22e2a16b665d54f865b808a3 Mon Sep 17 00:00:00 2001 From: Artorias Date: Tue, 18 Mar 2025 15:40:09 +0100 Subject: [PATCH] Working POS Bar Chart for every speech implemented --- .../gruppe_05_1/domain/html/HtmlSpeech.java | 2 + .../project/gruppe_05_1/domain/nlp/Token.java | 36 ++++++++- .../gruppe_05_1/rest/SpeechController.java | 24 ++++++ .../gruppe_05_1/website/templates/nlp.ftl | 1 - .../website/templates/posBarChart.ftl | 79 +++++++++++++++++++ 5 files changed, 139 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/html/HtmlSpeech.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/html/HtmlSpeech.java index 1e61039..07a4fd5 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/html/HtmlSpeech.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/html/HtmlSpeech.java @@ -3,6 +3,7 @@ package org.texttechnologylab.project.gruppe_05_1.domain.html; import org.bson.Document; import org.texttechnologylab.project.gruppe_05_1.database.MongoDBHandler; import org.texttechnologylab.project.gruppe_05_1.domain.nlp.NlpInfo; +import org.texttechnologylab.project.gruppe_05_1.domain.nlp.Token; import org.texttechnologylab.project.gruppe_05_1.domain.nlp.Topic; import java.util.ArrayList; @@ -44,6 +45,7 @@ public class HtmlSpeech { // TODO: HERE List tokensDocs = nlpDoc.get("tokens", MongoDBHandler.DOC_LIST_CLASS); + nlp.setTokens(Token.readTokensFromMongo(tokensDocs)); List sentencesDocs = nlpDoc.get("sentences", MongoDBHandler.DOC_LIST_CLASS); diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Token.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Token.java index d751320..09a5365 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Token.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Token.java @@ -1,7 +1,8 @@ package org.texttechnologylab.project.gruppe_05_1.domain.nlp; -import java.util.Objects; -import java.util.StringJoiner; +import org.bson.Document; + +import java.util.*; public class Token { String text; @@ -61,4 +62,35 @@ public class Token { .add("lemma='" + lemma + "'") .toString(); } + + /** + * Die Token-Dokumente (Speech --> analysisResults --> token) aus der MongoDB lesen + * @param tokenDocs Eine Liste von Mongo-Dokumenten + * @return Eine Liste der Token + */ + public static List readTokensFromMongo(List tokenDocs) { + List tokens = new ArrayList<>(); + for (Document doc : tokenDocs) { + tokens.add(new Token(doc.getString("text"), + doc.getString("pos"), + doc.getString("lemma") + )); + } + return tokens; + } + + /** + * Zählt alle verschiedenen POS Vorkommen auf + * @param tokenList + * @return Jede POS art mit ihrer Anzahl an Vorkommen + */ + public static Map countPOS(List tokenList) { + Map posCounts = new HashMap<>(); + + for (Token token : tokenList) { + posCounts.put(token.getPos(), posCounts.getOrDefault(token.getPos(), 0) + 1); + } + + return posCounts; + } } diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/rest/SpeechController.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/rest/SpeechController.java index a422988..f6bb0ad 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/rest/SpeechController.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/rest/SpeechController.java @@ -5,10 +5,12 @@ import io.javalin.openapi.*; import org.texttechnologylab.project.gruppe_05_1.database.MongoPprUtils; import org.texttechnologylab.project.gruppe_05_1.domain.html.HtmlSpeech; import org.texttechnologylab.project.gruppe_05_1.domain.html.ParlamentarierDetails; +import org.texttechnologylab.project.gruppe_05_1.domain.nlp.Token; import org.texttechnologylab.project.gruppe_05_1.domain.nlp.Topic; import org.texttechnologylab.project.gruppe_05_1.domain.speech.SpeechMetaData; import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Speech; +import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -80,6 +82,28 @@ public class SpeechController { .collect(Collectors.toList())); } + // NLP: POS + if (speech.getNlp() != null && speech.getNlp().getTokens() != null) { + List tokens = speech.getNlp().getTokens(); + + Map posCounts = Token.countPOS(tokens); + + List posList = posCounts.entrySet().stream() + .map(entry -> new Token(entry.getKey(), String.valueOf(entry.getValue()), "")) // Lemma remains empty + .collect(Collectors.toList()); + + System.out.println("DEBUG: Sending POS List to NLP - " + posList); + + speech.getNlp().setPosList((List) posList); + + } else { + System.out.println("DEBUG: POS List is EMPTY"); + speech.getNlp().setPosList((List) new ArrayList()); // Ensure it's never null + } + + // TODO: Token wird momentan etwas komisch abgespeichert, da im Attribut text die POS art steht, und in pos die Anzahl dieser POS arten. Umstrukturieren damit keine Verwirrung herrscht + + ctx.render("speech.ftl", attributes); } diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/website/templates/nlp.ftl b/src/main/java/org/texttechnologylab/project/gruppe_05_1/website/templates/nlp.ftl index d5fc652..4dfeb43 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/website/templates/nlp.ftl +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/website/templates/nlp.ftl @@ -6,7 +6,6 @@

Keine Topics Information für diese Rede verfügbar

- <#if s.nlp.posList??>

POS Information (als Bar Chart)

<#assign posList = s.nlp.posList> diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/website/templates/posBarChart.ftl b/src/main/java/org/texttechnologylab/project/gruppe_05_1/website/templates/posBarChart.ftl index e69de29..2e6122d 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/website/templates/posBarChart.ftl +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/website/templates/posBarChart.ftl @@ -0,0 +1,79 @@ + + + + + + +