Merge branch 'speech_export_feature'
This commit is contained in:
commit
ae126f0df3
9 changed files with 269 additions and 4 deletions
|
@ -10,6 +10,8 @@ import org.texttechnologylab.project.gruppe_05_1.database.domainimpl.mdb.Speech_
|
|||
import org.texttechnologylab.project.gruppe_05_1.domain.html.HtmlSpeech;
|
||||
import org.texttechnologylab.project.gruppe_05_1.domain.html.Parlamentarier;
|
||||
import org.texttechnologylab.project.gruppe_05_1.domain.html.ParlamentarierDetails;
|
||||
import org.texttechnologylab.project.gruppe_05_1.domain.nlp.NamedEntity;
|
||||
import org.texttechnologylab.project.gruppe_05_1.domain.nlp.Token;
|
||||
import org.texttechnologylab.project.gruppe_05_1.domain.html.SpeechOverview;
|
||||
import org.texttechnologylab.project.gruppe_05_1.domain.speaker.Membership;
|
||||
import org.texttechnologylab.project.gruppe_05_1.domain.speech.SpeechMetaData;
|
||||
|
@ -23,6 +25,7 @@ import java.time.LocalDate;
|
|||
import java.time.LocalDateTime;
|
||||
import java.time.ZoneId;
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static com.mongodb.client.model.Filters.eq;
|
||||
|
||||
|
@ -812,6 +815,45 @@ public class MongoPprUtils {
|
|||
}
|
||||
return speechIds;
|
||||
}
|
||||
public static Map<String, Integer> getPOSInformationCardinalitiesForSpeechById(String speechId) {
|
||||
List<Token> tokens = getHTMLSpeechByKey(speechId).getNlp().getTokens();
|
||||
Map<String, Integer> posCounts = Token.countPOS(tokens);
|
||||
|
||||
List<Token> posList = posCounts.entrySet().stream()
|
||||
.map(entry -> new Token(entry.getKey(), String.valueOf(entry.getValue()), "")) // Lemma remains empty
|
||||
.collect(Collectors.toList());
|
||||
|
||||
return posCounts;
|
||||
}
|
||||
|
||||
public static Map<String, Integer> getNamedEntitiesInformationCardinalitiesForSpeechById(String speechId) {
|
||||
Map<String, Map<String, Integer>> namedEntitiesMapOfMaps = new HashMap<>();
|
||||
|
||||
for (NamedEntity ne : getHTMLSpeechByKey(speechId).getNlp().getNamedEntities()) {
|
||||
String type = ne.getType();
|
||||
String text = ne.getText();
|
||||
|
||||
if (namedEntitiesMapOfMaps.containsKey(type)) {
|
||||
// Named Entity Type bekannt...
|
||||
Map<String, Integer> typeAppearance = namedEntitiesMapOfMaps.get(type);
|
||||
if (typeAppearance.containsKey(text)) {
|
||||
// ... und der Text auch bekannt --> erhöhe die Anzahl um 1
|
||||
typeAppearance.replace(
|
||||
text,
|
||||
typeAppearance.get(text) + 1) ;
|
||||
} else {
|
||||
typeAppearance.put(text, 1);
|
||||
}
|
||||
} else {
|
||||
// Named Entity Type unbekannt: erstelle einen neuen Eintrag für Type sowie einen Eintrag für den ihm gehörigen Text
|
||||
Map<String, Integer> firstTextAppearance = new HashMap<>();
|
||||
firstTextAppearance.put(text, 1);
|
||||
namedEntitiesMapOfMaps.put(type, firstTextAppearance);
|
||||
}
|
||||
}
|
||||
|
||||
return namedEntitiesMapOfMaps.get("CARDINAL"); // needs fixing
|
||||
}
|
||||
|
||||
/**
|
||||
* Liefert die Liste aller Parteien/Fraktionen, welche in der Liste der Parlamentarier stehen, zurück.
|
||||
|
|
|
@ -85,15 +85,16 @@ public class Speech_MongoDB_Impl extends Speech_File_Impl implements Speech {
|
|||
public String toTeX() {
|
||||
StringBuilder tex = new StringBuilder();
|
||||
|
||||
String party = (this.getFraction() != null ? " (" + this.getFraction() + ")" : "");
|
||||
|
||||
String speechTitle = "Rede " +
|
||||
this.getSpeechKey() +
|
||||
"/" +
|
||||
getAgendaTitle(this.getSessionId(), this.getAgendaItemId()) +
|
||||
" von " +
|
||||
this.getSpeakerName() +
|
||||
" (" +
|
||||
this.getFraction() +
|
||||
") vom " +
|
||||
party +
|
||||
" vom " +
|
||||
getSessionDateTime(this.getSessionId());
|
||||
|
||||
tex.append("\\section*{").append(speechTitle).append("}\n");
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package org.texttechnologylab.project.gruppe_05_1.domain.nlp;
|
||||
|
||||
import org.bson.Document;
|
||||
import org.w3c.dom.Element;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
@ -65,4 +66,11 @@ public class NamedEntity {
|
|||
}
|
||||
return nes;
|
||||
}
|
||||
|
||||
public Element toXML(org.w3c.dom.Document doc) {
|
||||
Element ne = doc.createElement("NamedEntity");
|
||||
ne.setAttribute("type", type);
|
||||
ne.setTextContent(text);
|
||||
return ne;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
package org.texttechnologylab.project.gruppe_05_1.domain.nlp;
|
||||
|
||||
import org.w3c.dom.Element;
|
||||
|
||||
import java.util.Objects;
|
||||
import java.util.StringJoiner;
|
||||
|
||||
|
@ -116,4 +118,14 @@ public class Pos {
|
|||
MyPos{posValue='ADJD', coarseValue='ADV', begin=127, end=130, coveredText='gut'},
|
||||
MyPos{posValue='$.', coarseValue='PUNCT', begin=130, end=131, coveredText='.'}],
|
||||
*/
|
||||
|
||||
public Element toXML(org.w3c.dom.Document doc) {
|
||||
Element posElement = doc.createElement("pos");
|
||||
posElement.setAttribute("posValue", posValue);
|
||||
posElement.setAttribute("coarseValue", coarseValue);
|
||||
posElement.setAttribute("begin", String.valueOf(begin));
|
||||
posElement.setAttribute("end", String.valueOf(end));
|
||||
posElement.setTextContent(coveredText);
|
||||
return posElement;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,6 +2,7 @@ package org.texttechnologylab.project.gruppe_05_1.domain.nlp;
|
|||
|
||||
import org.bson.Document;
|
||||
import org.texttechnologylab.project.gruppe_05_1.database.MongoDBHandler;
|
||||
import org.w3c.dom.Element;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
@ -120,4 +121,15 @@ public class Sentiment {
|
|||
}
|
||||
return sentiments;
|
||||
}
|
||||
|
||||
public org.w3c.dom.Element toXML(org.w3c.dom.Document doc) {
|
||||
Element sentimentElement = doc.createElement("sentiment");
|
||||
sentimentElement.setAttribute("begin", String.valueOf(this.begin));
|
||||
sentimentElement.setAttribute("end", String.valueOf(this.end));
|
||||
sentimentElement.setAttribute("sentiment", String.valueOf(this.sentiment));
|
||||
sentimentElement.setAttribute("negative", String.valueOf(this.negative));
|
||||
sentimentElement.setAttribute("neutral", String.valueOf(this.neutral));
|
||||
sentimentElement.setAttribute("positive", String.valueOf(this.positive));
|
||||
return sentimentElement;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package org.texttechnologylab.project.gruppe_05_1.domain.nlp;
|
||||
|
||||
import org.bson.Document;
|
||||
import org.w3c.dom.Element;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
@ -104,4 +105,12 @@ public class Topic {
|
|||
|
||||
return condensedTopicInfo;
|
||||
}
|
||||
|
||||
public Element toXML(org.w3c.dom.Document doc) {
|
||||
Element topicElement = doc.createElement("topic");
|
||||
topicElement.setAttribute("topic", this.getTopic());
|
||||
topicElement.setAttribute("score", this.getScore().toString());
|
||||
topicElement.setTextContent(this.getText());
|
||||
return topicElement;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
package org.texttechnologylab.project.gruppe_05_1.export;
|
||||
|
||||
import org.texttechnologylab.project.gruppe_05_1.database.domainimpl.mdb.Speaker_MongoDB_Impl;
|
||||
import org.texttechnologylab.project.gruppe_05_1.domain.nlp.Sentiment;
|
||||
import org.texttechnologylab.project.gruppe_05_1.domain.nlp.Topic;
|
||||
import org.texttechnologylab.project.gruppe_05_1.util.Logger;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Speech;
|
||||
|
||||
|
@ -16,6 +18,7 @@ import java.time.format.DateTimeFormatter;
|
|||
import java.util.Arrays;
|
||||
import java.util.Base64;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.texttechnologylab.project.gruppe_05_1.Main.RESOURCES_DIR;
|
||||
import static org.texttechnologylab.project.gruppe_05_1.Main.TEMP_EXPORT_DIR;
|
||||
|
@ -49,7 +52,23 @@ public class TeXUtil {
|
|||
|
||||
tex.append(speech.toTeX());
|
||||
|
||||
return tex.toString().replace("$$SPEAKERINFO$$", speaker.toTeX());
|
||||
Map<String, Double> topics = Topic.condenseTopicInformation(getHTMLSpeechByKey(speechId).getNlp().getTopics());
|
||||
// loop through topics and Logger.pink them
|
||||
for (Map.Entry<String, Double> entry : topics.entrySet()) {
|
||||
Logger.pink(entry + " " + entry.getValue());
|
||||
}
|
||||
|
||||
/*Map<String, Integer> pos = getPOSInformationCardinalitiesForSpeechById(speechId);
|
||||
// loop through topics and Logger.pink them
|
||||
for (Map.Entry<String, Integer> entry : pos.entrySet()) {
|
||||
Logger.pink(entry + " " + entry.getValue());
|
||||
}*/
|
||||
|
||||
return tex.toString().replace("$$SPEAKERINFO$$", speaker.toTeX())
|
||||
.replace("$$NLPMETADATA$$",
|
||||
generateChartView(generateBubbleChartLatex(topics),
|
||||
generateBarChartLatex(getPOSInformationCardinalitiesForSpeechById(speechId)),
|
||||
generateRadarChartLatex(getHTMLSpeechByKey(speechId).getNlp().getSentiments()), ""));
|
||||
}
|
||||
|
||||
public static String getSpeechToTexComponent(Speech speech) {
|
||||
|
@ -223,4 +242,114 @@ public class TeXUtil {
|
|||
} catch (IOException ignored) {}
|
||||
return false;
|
||||
}
|
||||
|
||||
public static String generateChartView(String bubbleChartTeX, String barChartTeX, String radarChartTeX, String sunburstCharTeX) {
|
||||
StringBuilder tex = new StringBuilder();
|
||||
// 2x2 minipage layout
|
||||
tex.append("\\begin{minipage}{1\\textwidth}\n")
|
||||
.append(barChartTeX)
|
||||
.append("\\end{minipage}\n")
|
||||
.append("\\begin{minipage}{0.4\\textwidth}\n")
|
||||
.append(bubbleChartTeX)
|
||||
.append("\\end{minipage}\n")
|
||||
.append("\\begin{minipage}{0.3\\textwidth}\n")
|
||||
.append(radarChartTeX)
|
||||
.append("\\end{minipage}\n")
|
||||
.append("\\begin{minipage}{0.3\\textwidth}\n")
|
||||
.append(sunburstCharTeX)
|
||||
.append("\\end{minipage}\n");
|
||||
|
||||
return tex.toString();
|
||||
}
|
||||
|
||||
public static String generateBubbleChartLatex(Map<String, Double> bubbleData) {
|
||||
StringBuilder tex = new StringBuilder();
|
||||
|
||||
tex.append("Topics Information\\\\\n");
|
||||
|
||||
// draw generic table with String | Double
|
||||
tex.append("\\begin{tabular}{|c|c|}\n")
|
||||
.append("\\hline\n")
|
||||
.append("Category & Value \\\\ \\hline\n");
|
||||
|
||||
for (Map.Entry<String, Double> entry : bubbleData.entrySet()) {
|
||||
tex.append(entry.getKey()).append(" & ").append(entry.getValue()).append(" \\\\ \\hline\n");
|
||||
}
|
||||
|
||||
tex.append("\\end{tabular}\n\n");
|
||||
|
||||
return tex.toString();
|
||||
}
|
||||
|
||||
public static String generateBarChartLatex(Map<String, Integer> barData) {
|
||||
StringBuilder tex = new StringBuilder();
|
||||
|
||||
tex.append("POS Information\\\\\n");
|
||||
|
||||
tex.append("\n" +
|
||||
"\\scalebox{0.25}{" + // the only way to reliably show most of the POS is by scaling it down this far
|
||||
"\\begin{tikzpicture}\n" +
|
||||
"\n" +
|
||||
"\\begin{axis}[\n" +
|
||||
" ybar,\n" +
|
||||
" width=4\\textwidth,\n" +
|
||||
" height=0.5\\textwidth,\n");
|
||||
|
||||
StringBuilder graphData = new StringBuilder();
|
||||
StringBuilder xCords = new StringBuilder();
|
||||
xCords.append("{");
|
||||
for (Map.Entry<String, Integer> entry : barData.entrySet()) {
|
||||
xCords.append(entry.getKey()).append(", ");
|
||||
graphData.append("\t(").append(entry.getKey()).append(", ").append(entry.getValue()).append(")\n");
|
||||
}
|
||||
xCords.append("}");
|
||||
String xCordsString = xCords.toString().replace("$", "\\$");
|
||||
|
||||
tex.append(" symbolic x coords=").append(xCordsString).append(",\n" +
|
||||
" xtick=data,\n" +
|
||||
" ylabel={Value},\n" +
|
||||
" xlabel={Category},\n" +
|
||||
" ymin=0, ymax=800\n" +
|
||||
" ]" +
|
||||
"\\addplot coordinates {\n");
|
||||
tex.append(graphData.toString().replace("$", "\\$"));
|
||||
tex.append("};\n" +
|
||||
"\\end{axis}\n" +
|
||||
"\n" +
|
||||
"\\end{tikzpicture}" +
|
||||
"}");
|
||||
|
||||
return tex.toString();
|
||||
}
|
||||
|
||||
public static String generateRadarChartLatex(List<Sentiment> sentimets) {
|
||||
StringBuilder tex = new StringBuilder();
|
||||
|
||||
/*tex.append("\\begin{tikzpicture}\n" +
|
||||
" \\coordinate (origin) at (0, 0);\n" +
|
||||
"\n" +
|
||||
" % Define the axes (3 axes) with unit length (1)\n" +
|
||||
" \\foreach[count=\\i] \\dim in {Negative, Neutral, Positive}{\n" +
|
||||
" \\coordinate (\\i) at (\\i * 360 / 3: 1); % Set radius to 1 for unit length axes\n" +
|
||||
" \\node at (\\i * 360 / 3: 1.1) {\\huge\\dim}; % Axis labels (slightly outside)\n" +
|
||||
" \\draw (origin) -- (\\i); % Draw the axes\n" +
|
||||
" }");
|
||||
|
||||
for (Sentiment sentiment : sentimets) {
|
||||
tex.append("\\foreach \\i/\\value in {1/")
|
||||
.append(sentiment.getNegative())
|
||||
.append(", 2/")
|
||||
.append(sentiment.getNeutral())
|
||||
.append(", 3/")
|
||||
.append(sentiment.getPositive())
|
||||
.append("}{\n")
|
||||
.append(" \\coordinate (point-\\i) at (\\i * 360 / 3: \\value);\n")
|
||||
.append(" }\n");
|
||||
}
|
||||
|
||||
tex.append("\\draw [fill=blue!20, opacity=.7] (point-1) -- (point-2) -- (point-3) -- cycle;\n" +
|
||||
"\\end{tikzpicture}");*/
|
||||
|
||||
return tex.toString();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
package org.texttechnologylab.project.gruppe_05_1.export;
|
||||
|
||||
import org.texttechnologylab.project.gruppe_05_1.database.domainimpl.mdb.Speaker_MongoDB_Impl;
|
||||
import org.texttechnologylab.project.gruppe_05_1.domain.html.HtmlSpeech;
|
||||
import org.texttechnologylab.project.gruppe_05_1.domain.nlp.*;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Speech;
|
||||
import javax.xml.transform.Transformer;
|
||||
import javax.xml.transform.TransformerFactory;
|
||||
|
@ -49,6 +51,51 @@ public class XMLUtil {
|
|||
return doc;
|
||||
}
|
||||
|
||||
public static void addNlpData(Document doc, HtmlSpeech nlpData) {
|
||||
Element nlpDataElement = doc.createElement("nlp");
|
||||
|
||||
Element sentimentsElement = doc.createElement("sentiments");
|
||||
nlpDataElement.appendChild(sentimentsElement);
|
||||
|
||||
|
||||
List<Sentiment> sentiments = nlpData.getNlp().getSentiments();
|
||||
for (Sentiment sentiment: sentiments) {
|
||||
sentimentsElement.appendChild(sentiment.toXML(doc));
|
||||
}
|
||||
|
||||
Element topicsElement = doc.createElement("topics");
|
||||
nlpDataElement.appendChild(topicsElement);
|
||||
|
||||
List<Topic> topics = nlpData.getNlp().getTopics();
|
||||
if (topics != null) {
|
||||
for (Topic topic: topics) {
|
||||
topicsElement.appendChild(topic.toXML(doc));
|
||||
}
|
||||
}
|
||||
|
||||
Element namedEntitiesElement = doc.createElement("NamedEntities");
|
||||
nlpDataElement.appendChild(namedEntitiesElement);
|
||||
|
||||
List<NamedEntity> namedEntities = nlpData.getNlp().getNamedEntities();
|
||||
if (namedEntities != null) {
|
||||
for (NamedEntity namedEntity: namedEntities) {
|
||||
namedEntitiesElement.appendChild(namedEntity.toXML(doc));
|
||||
}
|
||||
}
|
||||
|
||||
Element posElement = doc.createElement("pos");
|
||||
nlpDataElement.appendChild(posElement);
|
||||
|
||||
List<Pos> posElements = nlpData.getNlp().getPosList();
|
||||
if (posElements != null) {
|
||||
for (Pos pos: posElements) {
|
||||
posElement.appendChild(pos.toXML(doc));
|
||||
}
|
||||
}
|
||||
|
||||
doc.getFirstChild().appendChild(nlpDataElement);
|
||||
}
|
||||
|
||||
public static void addSpeechById(Document doc, String speechId) {
|
||||
// get speeches element
|
||||
Element speechesElement = (Element) doc.getElementsByTagName("speeches").item(0);
|
||||
|
@ -60,6 +107,9 @@ public class XMLUtil {
|
|||
Speech speech = getSpeechByKey(speechId);
|
||||
Speaker_MongoDB_Impl speaker = getSpeakerById(String.valueOf(speech.getSpeakerId()));
|
||||
|
||||
HtmlSpeech htmlSpeech = getHTMLSpeechByKey(speechId);
|
||||
addNlpData(doc, htmlSpeech);
|
||||
|
||||
speechElement.appendChild(speaker.toXML(doc));
|
||||
speechElement.appendChild(speech.toXML(doc));
|
||||
}
|
||||
|
|
|
@ -4,6 +4,8 @@
|
|||
\usepackage{geometry}
|
||||
\usepackage{xcolor}
|
||||
\usepackage[T1]{fontenc}
|
||||
\usepackage{tikz}
|
||||
\usepackage{pgfplots}
|
||||
|
||||
\pagestyle{fancy}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue