Merge branch 'speech_export_feature'

This commit is contained in:
Jonas Werner 2025-03-23 14:49:30 +01:00
commit ae126f0df3
9 changed files with 269 additions and 4 deletions

View file

@ -10,6 +10,8 @@ import org.texttechnologylab.project.gruppe_05_1.database.domainimpl.mdb.Speech_
import org.texttechnologylab.project.gruppe_05_1.domain.html.HtmlSpeech;
import org.texttechnologylab.project.gruppe_05_1.domain.html.Parlamentarier;
import org.texttechnologylab.project.gruppe_05_1.domain.html.ParlamentarierDetails;
import org.texttechnologylab.project.gruppe_05_1.domain.nlp.NamedEntity;
import org.texttechnologylab.project.gruppe_05_1.domain.nlp.Token;
import org.texttechnologylab.project.gruppe_05_1.domain.html.SpeechOverview;
import org.texttechnologylab.project.gruppe_05_1.domain.speaker.Membership;
import org.texttechnologylab.project.gruppe_05_1.domain.speech.SpeechMetaData;
@ -23,6 +25,7 @@ import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.ZoneId;
import java.util.*;
import java.util.stream.Collectors;
import static com.mongodb.client.model.Filters.eq;
@ -812,6 +815,45 @@ public class MongoPprUtils {
}
return speechIds;
}
public static Map<String, Integer> getPOSInformationCardinalitiesForSpeechById(String speechId) {
List<Token> tokens = getHTMLSpeechByKey(speechId).getNlp().getTokens();
Map<String, Integer> posCounts = Token.countPOS(tokens);
List<Token> posList = posCounts.entrySet().stream()
.map(entry -> new Token(entry.getKey(), String.valueOf(entry.getValue()), "")) // Lemma remains empty
.collect(Collectors.toList());
return posCounts;
}
public static Map<String, Integer> getNamedEntitiesInformationCardinalitiesForSpeechById(String speechId) {
Map<String, Map<String, Integer>> namedEntitiesMapOfMaps = new HashMap<>();
for (NamedEntity ne : getHTMLSpeechByKey(speechId).getNlp().getNamedEntities()) {
String type = ne.getType();
String text = ne.getText();
if (namedEntitiesMapOfMaps.containsKey(type)) {
// Named Entity Type bekannt...
Map<String, Integer> typeAppearance = namedEntitiesMapOfMaps.get(type);
if (typeAppearance.containsKey(text)) {
// ... und der Text auch bekannt --> erhöhe die Anzahl um 1
typeAppearance.replace(
text,
typeAppearance.get(text) + 1) ;
} else {
typeAppearance.put(text, 1);
}
} else {
// Named Entity Type unbekannt: erstelle einen neuen Eintrag für Type sowie einen Eintrag für den ihm gehörigen Text
Map<String, Integer> firstTextAppearance = new HashMap<>();
firstTextAppearance.put(text, 1);
namedEntitiesMapOfMaps.put(type, firstTextAppearance);
}
}
return namedEntitiesMapOfMaps.get("CARDINAL"); // needs fixing
}
/**
* Liefert die Liste aller Parteien/Fraktionen, welche in der Liste der Parlamentarier stehen, zurück.

View file

@ -85,15 +85,16 @@ public class Speech_MongoDB_Impl extends Speech_File_Impl implements Speech {
public String toTeX() {
StringBuilder tex = new StringBuilder();
String party = (this.getFraction() != null ? " (" + this.getFraction() + ")" : "");
String speechTitle = "Rede " +
this.getSpeechKey() +
"/" +
getAgendaTitle(this.getSessionId(), this.getAgendaItemId()) +
" von " +
this.getSpeakerName() +
" (" +
this.getFraction() +
") vom " +
party +
" vom " +
getSessionDateTime(this.getSessionId());
tex.append("\\section*{").append(speechTitle).append("}\n");

View file

@ -1,6 +1,7 @@
package org.texttechnologylab.project.gruppe_05_1.domain.nlp;
import org.bson.Document;
import org.w3c.dom.Element;
import java.util.ArrayList;
import java.util.List;
@ -65,4 +66,11 @@ public class NamedEntity {
}
return nes;
}
public Element toXML(org.w3c.dom.Document doc) {
Element ne = doc.createElement("NamedEntity");
ne.setAttribute("type", type);
ne.setTextContent(text);
return ne;
}
}

View file

@ -1,5 +1,7 @@
package org.texttechnologylab.project.gruppe_05_1.domain.nlp;
import org.w3c.dom.Element;
import java.util.Objects;
import java.util.StringJoiner;
@ -116,4 +118,14 @@ public class Pos {
MyPos{posValue='ADJD', coarseValue='ADV', begin=127, end=130, coveredText='gut'},
MyPos{posValue='$.', coarseValue='PUNCT', begin=130, end=131, coveredText='.'}],
*/
public Element toXML(org.w3c.dom.Document doc) {
Element posElement = doc.createElement("pos");
posElement.setAttribute("posValue", posValue);
posElement.setAttribute("coarseValue", coarseValue);
posElement.setAttribute("begin", String.valueOf(begin));
posElement.setAttribute("end", String.valueOf(end));
posElement.setTextContent(coveredText);
return posElement;
}
}

View file

@ -2,6 +2,7 @@ package org.texttechnologylab.project.gruppe_05_1.domain.nlp;
import org.bson.Document;
import org.texttechnologylab.project.gruppe_05_1.database.MongoDBHandler;
import org.w3c.dom.Element;
import java.util.ArrayList;
import java.util.List;
@ -120,4 +121,15 @@ public class Sentiment {
}
return sentiments;
}
public org.w3c.dom.Element toXML(org.w3c.dom.Document doc) {
Element sentimentElement = doc.createElement("sentiment");
sentimentElement.setAttribute("begin", String.valueOf(this.begin));
sentimentElement.setAttribute("end", String.valueOf(this.end));
sentimentElement.setAttribute("sentiment", String.valueOf(this.sentiment));
sentimentElement.setAttribute("negative", String.valueOf(this.negative));
sentimentElement.setAttribute("neutral", String.valueOf(this.neutral));
sentimentElement.setAttribute("positive", String.valueOf(this.positive));
return sentimentElement;
}
}

View file

@ -1,6 +1,7 @@
package org.texttechnologylab.project.gruppe_05_1.domain.nlp;
import org.bson.Document;
import org.w3c.dom.Element;
import java.util.*;
import java.util.stream.Collectors;
@ -104,4 +105,12 @@ public class Topic {
return condensedTopicInfo;
}
public Element toXML(org.w3c.dom.Document doc) {
Element topicElement = doc.createElement("topic");
topicElement.setAttribute("topic", this.getTopic());
topicElement.setAttribute("score", this.getScore().toString());
topicElement.setTextContent(this.getText());
return topicElement;
}
}

View file

@ -1,6 +1,8 @@
package org.texttechnologylab.project.gruppe_05_1.export;
import org.texttechnologylab.project.gruppe_05_1.database.domainimpl.mdb.Speaker_MongoDB_Impl;
import org.texttechnologylab.project.gruppe_05_1.domain.nlp.Sentiment;
import org.texttechnologylab.project.gruppe_05_1.domain.nlp.Topic;
import org.texttechnologylab.project.gruppe_05_1.util.Logger;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Speech;
@ -16,6 +18,7 @@ import java.time.format.DateTimeFormatter;
import java.util.Arrays;
import java.util.Base64;
import java.util.List;
import java.util.Map;
import static org.texttechnologylab.project.gruppe_05_1.Main.RESOURCES_DIR;
import static org.texttechnologylab.project.gruppe_05_1.Main.TEMP_EXPORT_DIR;
@ -49,7 +52,23 @@ public class TeXUtil {
tex.append(speech.toTeX());
return tex.toString().replace("$$SPEAKERINFO$$", speaker.toTeX());
Map<String, Double> topics = Topic.condenseTopicInformation(getHTMLSpeechByKey(speechId).getNlp().getTopics());
// loop through topics and Logger.pink them
for (Map.Entry<String, Double> entry : topics.entrySet()) {
Logger.pink(entry + " " + entry.getValue());
}
/*Map<String, Integer> pos = getPOSInformationCardinalitiesForSpeechById(speechId);
// loop through topics and Logger.pink them
for (Map.Entry<String, Integer> entry : pos.entrySet()) {
Logger.pink(entry + " " + entry.getValue());
}*/
return tex.toString().replace("$$SPEAKERINFO$$", speaker.toTeX())
.replace("$$NLPMETADATA$$",
generateChartView(generateBubbleChartLatex(topics),
generateBarChartLatex(getPOSInformationCardinalitiesForSpeechById(speechId)),
generateRadarChartLatex(getHTMLSpeechByKey(speechId).getNlp().getSentiments()), ""));
}
public static String getSpeechToTexComponent(Speech speech) {
@ -223,4 +242,114 @@ public class TeXUtil {
} catch (IOException ignored) {}
return false;
}
public static String generateChartView(String bubbleChartTeX, String barChartTeX, String radarChartTeX, String sunburstCharTeX) {
StringBuilder tex = new StringBuilder();
// 2x2 minipage layout
tex.append("\\begin{minipage}{1\\textwidth}\n")
.append(barChartTeX)
.append("\\end{minipage}\n")
.append("\\begin{minipage}{0.4\\textwidth}\n")
.append(bubbleChartTeX)
.append("\\end{minipage}\n")
.append("\\begin{minipage}{0.3\\textwidth}\n")
.append(radarChartTeX)
.append("\\end{minipage}\n")
.append("\\begin{minipage}{0.3\\textwidth}\n")
.append(sunburstCharTeX)
.append("\\end{minipage}\n");
return tex.toString();
}
public static String generateBubbleChartLatex(Map<String, Double> bubbleData) {
StringBuilder tex = new StringBuilder();
tex.append("Topics Information\\\\\n");
// draw generic table with String | Double
tex.append("\\begin{tabular}{|c|c|}\n")
.append("\\hline\n")
.append("Category & Value \\\\ \\hline\n");
for (Map.Entry<String, Double> entry : bubbleData.entrySet()) {
tex.append(entry.getKey()).append(" & ").append(entry.getValue()).append(" \\\\ \\hline\n");
}
tex.append("\\end{tabular}\n\n");
return tex.toString();
}
public static String generateBarChartLatex(Map<String, Integer> barData) {
StringBuilder tex = new StringBuilder();
tex.append("POS Information\\\\\n");
tex.append("\n" +
"\\scalebox{0.25}{" + // the only way to reliably show most of the POS is by scaling it down this far
"\\begin{tikzpicture}\n" +
"\n" +
"\\begin{axis}[\n" +
" ybar,\n" +
" width=4\\textwidth,\n" +
" height=0.5\\textwidth,\n");
StringBuilder graphData = new StringBuilder();
StringBuilder xCords = new StringBuilder();
xCords.append("{");
for (Map.Entry<String, Integer> entry : barData.entrySet()) {
xCords.append(entry.getKey()).append(", ");
graphData.append("\t(").append(entry.getKey()).append(", ").append(entry.getValue()).append(")\n");
}
xCords.append("}");
String xCordsString = xCords.toString().replace("$", "\\$");
tex.append(" symbolic x coords=").append(xCordsString).append(",\n" +
" xtick=data,\n" +
" ylabel={Value},\n" +
" xlabel={Category},\n" +
" ymin=0, ymax=800\n" +
" ]" +
"\\addplot coordinates {\n");
tex.append(graphData.toString().replace("$", "\\$"));
tex.append("};\n" +
"\\end{axis}\n" +
"\n" +
"\\end{tikzpicture}" +
"}");
return tex.toString();
}
public static String generateRadarChartLatex(List<Sentiment> sentimets) {
StringBuilder tex = new StringBuilder();
/*tex.append("\\begin{tikzpicture}\n" +
" \\coordinate (origin) at (0, 0);\n" +
"\n" +
" % Define the axes (3 axes) with unit length (1)\n" +
" \\foreach[count=\\i] \\dim in {Negative, Neutral, Positive}{\n" +
" \\coordinate (\\i) at (\\i * 360 / 3: 1); % Set radius to 1 for unit length axes\n" +
" \\node at (\\i * 360 / 3: 1.1) {\\huge\\dim}; % Axis labels (slightly outside)\n" +
" \\draw (origin) -- (\\i); % Draw the axes\n" +
" }");
for (Sentiment sentiment : sentimets) {
tex.append("\\foreach \\i/\\value in {1/")
.append(sentiment.getNegative())
.append(", 2/")
.append(sentiment.getNeutral())
.append(", 3/")
.append(sentiment.getPositive())
.append("}{\n")
.append(" \\coordinate (point-\\i) at (\\i * 360 / 3: \\value);\n")
.append(" }\n");
}
tex.append("\\draw [fill=blue!20, opacity=.7] (point-1) -- (point-2) -- (point-3) -- cycle;\n" +
"\\end{tikzpicture}");*/
return tex.toString();
}
}

View file

@ -1,6 +1,8 @@
package org.texttechnologylab.project.gruppe_05_1.export;
import org.texttechnologylab.project.gruppe_05_1.database.domainimpl.mdb.Speaker_MongoDB_Impl;
import org.texttechnologylab.project.gruppe_05_1.domain.html.HtmlSpeech;
import org.texttechnologylab.project.gruppe_05_1.domain.nlp.*;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Speech;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
@ -49,6 +51,51 @@ public class XMLUtil {
return doc;
}
public static void addNlpData(Document doc, HtmlSpeech nlpData) {
Element nlpDataElement = doc.createElement("nlp");
Element sentimentsElement = doc.createElement("sentiments");
nlpDataElement.appendChild(sentimentsElement);
List<Sentiment> sentiments = nlpData.getNlp().getSentiments();
for (Sentiment sentiment: sentiments) {
sentimentsElement.appendChild(sentiment.toXML(doc));
}
Element topicsElement = doc.createElement("topics");
nlpDataElement.appendChild(topicsElement);
List<Topic> topics = nlpData.getNlp().getTopics();
if (topics != null) {
for (Topic topic: topics) {
topicsElement.appendChild(topic.toXML(doc));
}
}
Element namedEntitiesElement = doc.createElement("NamedEntities");
nlpDataElement.appendChild(namedEntitiesElement);
List<NamedEntity> namedEntities = nlpData.getNlp().getNamedEntities();
if (namedEntities != null) {
for (NamedEntity namedEntity: namedEntities) {
namedEntitiesElement.appendChild(namedEntity.toXML(doc));
}
}
Element posElement = doc.createElement("pos");
nlpDataElement.appendChild(posElement);
List<Pos> posElements = nlpData.getNlp().getPosList();
if (posElements != null) {
for (Pos pos: posElements) {
posElement.appendChild(pos.toXML(doc));
}
}
doc.getFirstChild().appendChild(nlpDataElement);
}
public static void addSpeechById(Document doc, String speechId) {
// get speeches element
Element speechesElement = (Element) doc.getElementsByTagName("speeches").item(0);
@ -60,6 +107,9 @@ public class XMLUtil {
Speech speech = getSpeechByKey(speechId);
Speaker_MongoDB_Impl speaker = getSpeakerById(String.valueOf(speech.getSpeakerId()));
HtmlSpeech htmlSpeech = getHTMLSpeechByKey(speechId);
addNlpData(doc, htmlSpeech);
speechElement.appendChild(speaker.toXML(doc));
speechElement.appendChild(speech.toXML(doc));
}

View file

@ -4,6 +4,8 @@
\usepackage{geometry}
\usepackage{xcolor}
\usepackage[T1]{fontenc}
\usepackage{tikz}
\usepackage{pgfplots}
\pagestyle{fancy}