Working POS Bar Chart for every speech implemented

This commit is contained in:
Artorias 2025-03-18 15:40:09 +01:00
parent 22a555d8a2
commit fcc064a616
5 changed files with 139 additions and 3 deletions

View file

@ -3,6 +3,7 @@ package org.texttechnologylab.project.gruppe_05_1.domain.html;
import org.bson.Document;
import org.texttechnologylab.project.gruppe_05_1.database.MongoDBHandler;
import org.texttechnologylab.project.gruppe_05_1.domain.nlp.NlpInfo;
import org.texttechnologylab.project.gruppe_05_1.domain.nlp.Token;
import org.texttechnologylab.project.gruppe_05_1.domain.nlp.Topic;
import java.util.ArrayList;
@ -44,6 +45,7 @@ public class HtmlSpeech {
// TODO: HERE
List<Document> tokensDocs = nlpDoc.get("tokens", MongoDBHandler.DOC_LIST_CLASS);
nlp.setTokens(Token.readTokensFromMongo(tokensDocs));
List<Document> sentencesDocs = nlpDoc.get("sentences", MongoDBHandler.DOC_LIST_CLASS);

View file

@ -1,7 +1,8 @@
package org.texttechnologylab.project.gruppe_05_1.domain.nlp;
import java.util.Objects;
import java.util.StringJoiner;
import org.bson.Document;
import java.util.*;
public class Token {
String text;
@ -61,4 +62,35 @@ public class Token {
.add("lemma='" + lemma + "'")
.toString();
}
/**
* Die Token-Dokumente (Speech --> analysisResults --> token) aus der MongoDB lesen
* @param tokenDocs Eine Liste von Mongo-Dokumenten
* @return Eine Liste der Token
*/
public static List<Token> readTokensFromMongo(List<Document> tokenDocs) {
List<Token> tokens = new ArrayList<>();
for (Document doc : tokenDocs) {
tokens.add(new Token(doc.getString("text"),
doc.getString("pos"),
doc.getString("lemma")
));
}
return tokens;
}
/**
* Zählt alle verschiedenen POS Vorkommen auf
* @param tokenList
* @return Jede POS art mit ihrer Anzahl an Vorkommen
*/
public static Map<String, Integer> countPOS(List<Token> tokenList) {
Map<String, Integer> posCounts = new HashMap<>();
for (Token token : tokenList) {
posCounts.put(token.getPos(), posCounts.getOrDefault(token.getPos(), 0) + 1);
}
return posCounts;
}
}

View file

@ -5,10 +5,12 @@ import io.javalin.openapi.*;
import org.texttechnologylab.project.gruppe_05_1.database.MongoPprUtils;
import org.texttechnologylab.project.gruppe_05_1.domain.html.HtmlSpeech;
import org.texttechnologylab.project.gruppe_05_1.domain.html.ParlamentarierDetails;
import org.texttechnologylab.project.gruppe_05_1.domain.nlp.Token;
import org.texttechnologylab.project.gruppe_05_1.domain.nlp.Topic;
import org.texttechnologylab.project.gruppe_05_1.domain.speech.SpeechMetaData;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Speech;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@ -80,6 +82,28 @@ public class SpeechController {
.collect(Collectors.toList()));
}
// NLP: POS
if (speech.getNlp() != null && speech.getNlp().getTokens() != null) {
List<Token> tokens = speech.getNlp().getTokens();
Map<String, Integer> posCounts = Token.countPOS(tokens);
List<Token> posList = posCounts.entrySet().stream()
.map(entry -> new Token(entry.getKey(), String.valueOf(entry.getValue()), "")) // Lemma remains empty
.collect(Collectors.toList());
System.out.println("DEBUG: Sending POS List to NLP - " + posList);
speech.getNlp().setPosList((List) posList);
} else {
System.out.println("DEBUG: POS List is EMPTY");
speech.getNlp().setPosList((List) new ArrayList<Token>()); // Ensure it's never null
}
// TODO: Token wird momentan etwas komisch abgespeichert, da im Attribut text die POS art steht, und in pos die Anzahl dieser POS arten. Umstrukturieren damit keine Verwirrung herrscht
ctx.render("speech.ftl", attributes);
}

View file

@ -6,7 +6,6 @@
<h3>Keine Topics Information für diese Rede verfügbar</h3>
</#if>
<#if s.nlp.posList??>
<h3>POS Information (als Bar Chart)</h3>
<#assign posList = s.nlp.posList>

View file

@ -0,0 +1,79 @@
<svg id="posBarchart"></svg>
<script>
// Define variables only in JavaScript
const barChartWidth = 1000;
const barChartHeight = 750;
const margin = { top: 20, right: 30, bottom: 50, left: 50 };
// Ensure posList exists before processing
var posData = [];
<#if posList?? && posList?size gt 0>
<#list posList as token>
posData.push({ pos: "${token.text}", count: ${token.pos} });
</#list>
<#else>
posData.push({ pos: "No Data", count: 0 });
</#if>
console.log("Final POS Data being used:", posData);
var svg = d3.select("#posBarchart")
.attr("width", barChartWidth)
.attr("height", barChartHeight);
// Create Scales
var xScale = d3.scaleBand()
.domain(posData.map(d => d.pos))
.range([margin.left, barChartWidth - margin.right])
.padding(0.2);
var yScale = d3.scaleLinear()
.domain([0, d3.max(posData, d => d.count)])
.nice()
.range([barChartHeight - margin.bottom, margin.top]);
var colorScale = d3.scaleOrdinal(d3.schemeCategory10);
// Create Bars
var bars = svg.selectAll("rect")
.data(posData)
.enter().append("rect")
.attr("x", d => xScale(d.pos))
.attr("y", d => yScale(d.count))
.attr("width", xScale.bandwidth())
.attr("height", d => Math.max(0, barChartHeight - margin.bottom - yScale(d.count))) // Prevents negative heights
.attr("fill", d => colorScale(d.pos));
console.log("Number of bars created:", bars.size());
// X Axis
svg.append("g")
.attr("transform", "translate(0," + (barChartHeight - margin.bottom) + ")")
.call(d3.axisBottom(xScale))
.selectAll("text")
.attr("transform", "rotate(-45)")
.style("text-anchor", "end")
.style("font-size", "12px");
// Y Axis
svg.append("g")
.attr("transform", "translate(" + margin.left + ",0)")
.call(d3.axisLeft(yScale));
// Labels
svg.selectAll("text.label")
.data(posData)
.enter().append("text")
.attr("class", "label")
.attr("x", d => xScale(d.pos) + xScale.bandwidth() / 2)
.attr("y", d => yScale(d.count) - 5)
.attr("text-anchor", "middle")
.attr("fill", "#000")
.text(d => d.count);
</script>

Before

Width:  |  Height:  |  Size: 0 B

After

Width:  |  Height:  |  Size: 2.3 KiB

Before After
Before After