diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/MongoPprUtils.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/MongoPprUtils.java index 0aef63d..d3753b8 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/MongoPprUtils.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/MongoPprUtils.java @@ -10,6 +10,8 @@ import org.texttechnologylab.project.gruppe_05_1.database.domainimpl.mdb.Speech_ import org.texttechnologylab.project.gruppe_05_1.domain.html.HtmlSpeech; import org.texttechnologylab.project.gruppe_05_1.domain.html.Parlamentarier; import org.texttechnologylab.project.gruppe_05_1.domain.html.ParlamentarierDetails; +import org.texttechnologylab.project.gruppe_05_1.domain.nlp.NamedEntity; +import org.texttechnologylab.project.gruppe_05_1.domain.nlp.Token; import org.texttechnologylab.project.gruppe_05_1.domain.html.SpeechOverview; import org.texttechnologylab.project.gruppe_05_1.domain.speaker.Membership; import org.texttechnologylab.project.gruppe_05_1.domain.speech.SpeechMetaData; @@ -23,6 +25,7 @@ import java.time.LocalDate; import java.time.LocalDateTime; import java.time.ZoneId; import java.util.*; +import java.util.stream.Collectors; import static com.mongodb.client.model.Filters.eq; @@ -812,6 +815,45 @@ public class MongoPprUtils { } return speechIds; } + public static Map getPOSInformationCardinalitiesForSpeechById(String speechId) { + List tokens = getHTMLSpeechByKey(speechId).getNlp().getTokens(); + Map posCounts = Token.countPOS(tokens); + + List posList = posCounts.entrySet().stream() + .map(entry -> new Token(entry.getKey(), String.valueOf(entry.getValue()), "")) // Lemma remains empty + .collect(Collectors.toList()); + + return posCounts; + } + + public static Map getNamedEntitiesInformationCardinalitiesForSpeechById(String speechId) { + Map> namedEntitiesMapOfMaps = new HashMap<>(); + + for (NamedEntity ne : getHTMLSpeechByKey(speechId).getNlp().getNamedEntities()) { + String type = ne.getType(); + String text = ne.getText(); + + if (namedEntitiesMapOfMaps.containsKey(type)) { + // Named Entity Type bekannt... + Map typeAppearance = namedEntitiesMapOfMaps.get(type); + if (typeAppearance.containsKey(text)) { + // ... und der Text auch bekannt --> erhöhe die Anzahl um 1 + typeAppearance.replace( + text, + typeAppearance.get(text) + 1) ; + } else { + typeAppearance.put(text, 1); + } + } else { + // Named Entity Type unbekannt: erstelle einen neuen Eintrag für Type sowie einen Eintrag für den ihm gehörigen Text + Map firstTextAppearance = new HashMap<>(); + firstTextAppearance.put(text, 1); + namedEntitiesMapOfMaps.put(type, firstTextAppearance); + } + } + + return namedEntitiesMapOfMaps.get("CARDINAL"); // needs fixing + } /** * Liefert die Liste aller Parteien/Fraktionen, welche in der Liste der Parlamentarier stehen, zurück. diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimpl/mdb/Speech_MongoDB_Impl.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimpl/mdb/Speech_MongoDB_Impl.java index 326da2e..a625769 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimpl/mdb/Speech_MongoDB_Impl.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/database/domainimpl/mdb/Speech_MongoDB_Impl.java @@ -85,15 +85,16 @@ public class Speech_MongoDB_Impl extends Speech_File_Impl implements Speech { public String toTeX() { StringBuilder tex = new StringBuilder(); + String party = (this.getFraction() != null ? " (" + this.getFraction() + ")" : ""); + String speechTitle = "Rede " + this.getSpeechKey() + "/" + getAgendaTitle(this.getSessionId(), this.getAgendaItemId()) + " von " + this.getSpeakerName() + - " (" + - this.getFraction() + - ") vom " + + party + + " vom " + getSessionDateTime(this.getSessionId()); tex.append("\\section*{").append(speechTitle).append("}\n"); diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/NamedEntity.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/NamedEntity.java index 165df29..dadd618 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/NamedEntity.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/NamedEntity.java @@ -1,6 +1,7 @@ package org.texttechnologylab.project.gruppe_05_1.domain.nlp; import org.bson.Document; +import org.w3c.dom.Element; import java.util.ArrayList; import java.util.List; @@ -65,4 +66,11 @@ public class NamedEntity { } return nes; } + + public Element toXML(org.w3c.dom.Document doc) { + Element ne = doc.createElement("NamedEntity"); + ne.setAttribute("type", type); + ne.setTextContent(text); + return ne; + } } diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Pos.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Pos.java index 74f027a..6a75ce2 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Pos.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Pos.java @@ -1,5 +1,7 @@ package org.texttechnologylab.project.gruppe_05_1.domain.nlp; +import org.w3c.dom.Element; + import java.util.Objects; import java.util.StringJoiner; @@ -116,4 +118,14 @@ public class Pos { MyPos{posValue='ADJD', coarseValue='ADV', begin=127, end=130, coveredText='gut'}, MyPos{posValue='$.', coarseValue='PUNCT', begin=130, end=131, coveredText='.'}], */ + + public Element toXML(org.w3c.dom.Document doc) { + Element posElement = doc.createElement("pos"); + posElement.setAttribute("posValue", posValue); + posElement.setAttribute("coarseValue", coarseValue); + posElement.setAttribute("begin", String.valueOf(begin)); + posElement.setAttribute("end", String.valueOf(end)); + posElement.setTextContent(coveredText); + return posElement; + } } diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Sentiment.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Sentiment.java index 3894a72..7b8815b 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Sentiment.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Sentiment.java @@ -2,6 +2,7 @@ package org.texttechnologylab.project.gruppe_05_1.domain.nlp; import org.bson.Document; import org.texttechnologylab.project.gruppe_05_1.database.MongoDBHandler; +import org.w3c.dom.Element; import java.util.ArrayList; import java.util.List; @@ -120,4 +121,15 @@ public class Sentiment { } return sentiments; } + + public org.w3c.dom.Element toXML(org.w3c.dom.Document doc) { + Element sentimentElement = doc.createElement("sentiment"); + sentimentElement.setAttribute("begin", String.valueOf(this.begin)); + sentimentElement.setAttribute("end", String.valueOf(this.end)); + sentimentElement.setAttribute("sentiment", String.valueOf(this.sentiment)); + sentimentElement.setAttribute("negative", String.valueOf(this.negative)); + sentimentElement.setAttribute("neutral", String.valueOf(this.neutral)); + sentimentElement.setAttribute("positive", String.valueOf(this.positive)); + return sentimentElement; + } } diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Topic.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Topic.java index 474f5aa..e4cfff6 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Topic.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Topic.java @@ -1,6 +1,7 @@ package org.texttechnologylab.project.gruppe_05_1.domain.nlp; import org.bson.Document; +import org.w3c.dom.Element; import java.util.*; import java.util.stream.Collectors; @@ -104,4 +105,12 @@ public class Topic { return condensedTopicInfo; } + + public Element toXML(org.w3c.dom.Document doc) { + Element topicElement = doc.createElement("topic"); + topicElement.setAttribute("topic", this.getTopic()); + topicElement.setAttribute("score", this.getScore().toString()); + topicElement.setTextContent(this.getText()); + return topicElement; + } } diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/export/TeXUtil.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/export/TeXUtil.java index 16cbe83..ead65f3 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/export/TeXUtil.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/export/TeXUtil.java @@ -1,6 +1,8 @@ package org.texttechnologylab.project.gruppe_05_1.export; import org.texttechnologylab.project.gruppe_05_1.database.domainimpl.mdb.Speaker_MongoDB_Impl; +import org.texttechnologylab.project.gruppe_05_1.domain.nlp.Sentiment; +import org.texttechnologylab.project.gruppe_05_1.domain.nlp.Topic; import org.texttechnologylab.project.gruppe_05_1.util.Logger; import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Speech; @@ -16,6 +18,7 @@ import java.time.format.DateTimeFormatter; import java.util.Arrays; import java.util.Base64; import java.util.List; +import java.util.Map; import static org.texttechnologylab.project.gruppe_05_1.Main.RESOURCES_DIR; import static org.texttechnologylab.project.gruppe_05_1.Main.TEMP_EXPORT_DIR; @@ -49,7 +52,23 @@ public class TeXUtil { tex.append(speech.toTeX()); - return tex.toString().replace("$$SPEAKERINFO$$", speaker.toTeX()); + Map topics = Topic.condenseTopicInformation(getHTMLSpeechByKey(speechId).getNlp().getTopics()); + // loop through topics and Logger.pink them + for (Map.Entry entry : topics.entrySet()) { + Logger.pink(entry + " " + entry.getValue()); + } + + /*Map pos = getPOSInformationCardinalitiesForSpeechById(speechId); + // loop through topics and Logger.pink them + for (Map.Entry entry : pos.entrySet()) { + Logger.pink(entry + " " + entry.getValue()); + }*/ + + return tex.toString().replace("$$SPEAKERINFO$$", speaker.toTeX()) + .replace("$$NLPMETADATA$$", + generateChartView(generateBubbleChartLatex(topics), + generateBarChartLatex(getPOSInformationCardinalitiesForSpeechById(speechId)), + generateRadarChartLatex(getHTMLSpeechByKey(speechId).getNlp().getSentiments()), "")); } public static String getSpeechToTexComponent(Speech speech) { @@ -223,4 +242,114 @@ public class TeXUtil { } catch (IOException ignored) {} return false; } + + public static String generateChartView(String bubbleChartTeX, String barChartTeX, String radarChartTeX, String sunburstCharTeX) { + StringBuilder tex = new StringBuilder(); + // 2x2 minipage layout + tex.append("\\begin{minipage}{1\\textwidth}\n") + .append(barChartTeX) + .append("\\end{minipage}\n") + .append("\\begin{minipage}{0.4\\textwidth}\n") + .append(bubbleChartTeX) + .append("\\end{minipage}\n") + .append("\\begin{minipage}{0.3\\textwidth}\n") + .append(radarChartTeX) + .append("\\end{minipage}\n") + .append("\\begin{minipage}{0.3\\textwidth}\n") + .append(sunburstCharTeX) + .append("\\end{minipage}\n"); + + return tex.toString(); + } + + public static String generateBubbleChartLatex(Map bubbleData) { + StringBuilder tex = new StringBuilder(); + + tex.append("Topics Information\\\\\n"); + + // draw generic table with String | Double + tex.append("\\begin{tabular}{|c|c|}\n") + .append("\\hline\n") + .append("Category & Value \\\\ \\hline\n"); + + for (Map.Entry entry : bubbleData.entrySet()) { + tex.append(entry.getKey()).append(" & ").append(entry.getValue()).append(" \\\\ \\hline\n"); + } + + tex.append("\\end{tabular}\n\n"); + + return tex.toString(); + } + + public static String generateBarChartLatex(Map barData) { + StringBuilder tex = new StringBuilder(); + + tex.append("POS Information\\\\\n"); + + tex.append("\n" + + "\\scalebox{0.25}{" + // the only way to reliably show most of the POS is by scaling it down this far + "\\begin{tikzpicture}\n" + + "\n" + + "\\begin{axis}[\n" + + " ybar,\n" + + " width=4\\textwidth,\n" + + " height=0.5\\textwidth,\n"); + + StringBuilder graphData = new StringBuilder(); + StringBuilder xCords = new StringBuilder(); + xCords.append("{"); + for (Map.Entry entry : barData.entrySet()) { + xCords.append(entry.getKey()).append(", "); + graphData.append("\t(").append(entry.getKey()).append(", ").append(entry.getValue()).append(")\n"); + } + xCords.append("}"); + String xCordsString = xCords.toString().replace("$", "\\$"); + + tex.append(" symbolic x coords=").append(xCordsString).append(",\n" + + " xtick=data,\n" + + " ylabel={Value},\n" + + " xlabel={Category},\n" + + " ymin=0, ymax=800\n" + + " ]" + + "\\addplot coordinates {\n"); + tex.append(graphData.toString().replace("$", "\\$")); + tex.append("};\n" + + "\\end{axis}\n" + + "\n" + + "\\end{tikzpicture}" + + "}"); + + return tex.toString(); + } + + public static String generateRadarChartLatex(List sentimets) { + StringBuilder tex = new StringBuilder(); + + /*tex.append("\\begin{tikzpicture}\n" + + " \\coordinate (origin) at (0, 0);\n" + + "\n" + + " % Define the axes (3 axes) with unit length (1)\n" + + " \\foreach[count=\\i] \\dim in {Negative, Neutral, Positive}{\n" + + " \\coordinate (\\i) at (\\i * 360 / 3: 1); % Set radius to 1 for unit length axes\n" + + " \\node at (\\i * 360 / 3: 1.1) {\\huge\\dim}; % Axis labels (slightly outside)\n" + + " \\draw (origin) -- (\\i); % Draw the axes\n" + + " }"); + + for (Sentiment sentiment : sentimets) { + tex.append("\\foreach \\i/\\value in {1/") + .append(sentiment.getNegative()) + .append(", 2/") + .append(sentiment.getNeutral()) + .append(", 3/") + .append(sentiment.getPositive()) + .append("}{\n") + .append(" \\coordinate (point-\\i) at (\\i * 360 / 3: \\value);\n") + .append(" }\n"); + } + + tex.append("\\draw [fill=blue!20, opacity=.7] (point-1) -- (point-2) -- (point-3) -- cycle;\n" + + "\\end{tikzpicture}");*/ + + return tex.toString(); + } } diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/export/XMLUtil.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/export/XMLUtil.java index 908fdea..18c03b3 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/export/XMLUtil.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/export/XMLUtil.java @@ -1,6 +1,8 @@ package org.texttechnologylab.project.gruppe_05_1.export; import org.texttechnologylab.project.gruppe_05_1.database.domainimpl.mdb.Speaker_MongoDB_Impl; +import org.texttechnologylab.project.gruppe_05_1.domain.html.HtmlSpeech; +import org.texttechnologylab.project.gruppe_05_1.domain.nlp.*; import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Speech; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerFactory; @@ -49,6 +51,51 @@ public class XMLUtil { return doc; } + public static void addNlpData(Document doc, HtmlSpeech nlpData) { + Element nlpDataElement = doc.createElement("nlp"); + + Element sentimentsElement = doc.createElement("sentiments"); + nlpDataElement.appendChild(sentimentsElement); + + + List sentiments = nlpData.getNlp().getSentiments(); + for (Sentiment sentiment: sentiments) { + sentimentsElement.appendChild(sentiment.toXML(doc)); + } + + Element topicsElement = doc.createElement("topics"); + nlpDataElement.appendChild(topicsElement); + + List topics = nlpData.getNlp().getTopics(); + if (topics != null) { + for (Topic topic: topics) { + topicsElement.appendChild(topic.toXML(doc)); + } + } + + Element namedEntitiesElement = doc.createElement("NamedEntities"); + nlpDataElement.appendChild(namedEntitiesElement); + + List namedEntities = nlpData.getNlp().getNamedEntities(); + if (namedEntities != null) { + for (NamedEntity namedEntity: namedEntities) { + namedEntitiesElement.appendChild(namedEntity.toXML(doc)); + } + } + + Element posElement = doc.createElement("pos"); + nlpDataElement.appendChild(posElement); + + List posElements = nlpData.getNlp().getPosList(); + if (posElements != null) { + for (Pos pos: posElements) { + posElement.appendChild(pos.toXML(doc)); + } + } + + doc.getFirstChild().appendChild(nlpDataElement); + } + public static void addSpeechById(Document doc, String speechId) { // get speeches element Element speechesElement = (Element) doc.getElementsByTagName("speeches").item(0); @@ -60,6 +107,9 @@ public class XMLUtil { Speech speech = getSpeechByKey(speechId); Speaker_MongoDB_Impl speaker = getSpeakerById(String.valueOf(speech.getSpeakerId())); + HtmlSpeech htmlSpeech = getHTMLSpeechByKey(speechId); + addNlpData(doc, htmlSpeech); + speechElement.appendChild(speaker.toXML(doc)); speechElement.appendChild(speech.toXML(doc)); } diff --git a/src/main/resources/tex/preamble.tex b/src/main/resources/tex/preamble.tex index ec2bf02..bfcfc8c 100644 --- a/src/main/resources/tex/preamble.tex +++ b/src/main/resources/tex/preamble.tex @@ -4,6 +4,8 @@ \usepackage{geometry} \usepackage{xcolor} \usepackage[T1]{fontenc} +\usepackage{tikz} +\usepackage{pgfplots} \pagestyle{fancy}