added NLP Data to XML export

This commit is contained in:
Jonas Werner 2025-03-23 13:53:21 +01:00
parent b82597dfeb
commit e43dea4e36
5 changed files with 91 additions and 0 deletions

View file

@ -1,6 +1,7 @@
package org.texttechnologylab.project.gruppe_05_1.domain.nlp;
import org.bson.Document;
import org.w3c.dom.Element;
import java.util.ArrayList;
import java.util.List;
@ -65,4 +66,11 @@ public class NamedEntity {
}
return nes;
}
public Element toXML(org.w3c.dom.Document doc) {
Element ne = doc.createElement("NamedEntity");
ne.setAttribute("type", type);
ne.setTextContent(text);
return ne;
}
}

View file

@ -1,5 +1,7 @@
package org.texttechnologylab.project.gruppe_05_1.domain.nlp;
import org.w3c.dom.Element;
import java.util.Objects;
import java.util.StringJoiner;
@ -116,4 +118,14 @@ public class Pos {
MyPos{posValue='ADJD', coarseValue='ADV', begin=127, end=130, coveredText='gut'},
MyPos{posValue='$.', coarseValue='PUNCT', begin=130, end=131, coveredText='.'}],
*/
public Element toXML(org.w3c.dom.Document doc) {
Element posElement = doc.createElement("pos");
posElement.setAttribute("posValue", posValue);
posElement.setAttribute("coarseValue", coarseValue);
posElement.setAttribute("begin", String.valueOf(begin));
posElement.setAttribute("end", String.valueOf(end));
posElement.setTextContent(coveredText);
return posElement;
}
}

View file

@ -2,6 +2,7 @@ package org.texttechnologylab.project.gruppe_05_1.domain.nlp;
import org.bson.Document;
import org.texttechnologylab.project.gruppe_05_1.database.MongoDBHandler;
import org.w3c.dom.Element;
import java.util.ArrayList;
import java.util.List;
@ -120,4 +121,15 @@ public class Sentiment {
}
return sentiments;
}
public org.w3c.dom.Element toXML(org.w3c.dom.Document doc) {
Element sentimentElement = doc.createElement("sentiment");
sentimentElement.setAttribute("begin", String.valueOf(this.begin));
sentimentElement.setAttribute("end", String.valueOf(this.end));
sentimentElement.setAttribute("sentiment", String.valueOf(this.sentiment));
sentimentElement.setAttribute("negative", String.valueOf(this.negative));
sentimentElement.setAttribute("neutral", String.valueOf(this.neutral));
sentimentElement.setAttribute("positive", String.valueOf(this.positive));
return sentimentElement;
}
}

View file

@ -1,6 +1,7 @@
package org.texttechnologylab.project.gruppe_05_1.domain.nlp;
import org.bson.Document;
import org.w3c.dom.Element;
import java.util.*;
import java.util.stream.Collectors;
@ -104,4 +105,12 @@ public class Topic {
return condensedTopicInfo;
}
public Element toXML(org.w3c.dom.Document doc) {
Element topicElement = doc.createElement("topic");
topicElement.setAttribute("topic", this.getTopic());
topicElement.setAttribute("score", this.getScore().toString());
topicElement.setTextContent(this.getText());
return topicElement;
}
}

View file

@ -4,6 +4,8 @@ import org.eclipse.jetty.xml.XmlParser;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Node;
import org.texttechnologylab.project.gruppe_05_1.database.domainimpl.mdb.Speaker_MongoDB_Impl;
import org.texttechnologylab.project.gruppe_05_1.domain.html.HtmlSpeech;
import org.texttechnologylab.project.gruppe_05_1.domain.nlp.*;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Speech;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
@ -52,6 +54,51 @@ public class XMLUtil {
return doc;
}
public static void addNlpData(Document doc, HtmlSpeech nlpData) {
Element nlpDataElement = doc.createElement("nlp");
Element sentimentsElement = doc.createElement("sentiments");
nlpDataElement.appendChild(sentimentsElement);
List<Sentiment> sentiments = nlpData.getNlp().getSentiments();
for (Sentiment sentiment: sentiments) {
sentimentsElement.appendChild(sentiment.toXML(doc));
}
Element topicsElement = doc.createElement("topics");
nlpDataElement.appendChild(topicsElement);
List<Topic> topics = nlpData.getNlp().getTopics();
if (topics != null) {
for (Topic topic: topics) {
topicsElement.appendChild(topic.toXML(doc));
}
}
Element namedEntitiesElement = doc.createElement("NamedEntities");
nlpDataElement.appendChild(namedEntitiesElement);
List<NamedEntity> namedEntities = nlpData.getNlp().getNamedEntities();
if (namedEntities != null) {
for (NamedEntity namedEntity: namedEntities) {
namedEntitiesElement.appendChild(namedEntity.toXML(doc));
}
}
Element posElement = doc.createElement("pos");
nlpDataElement.appendChild(posElement);
List<Pos> posElements = nlpData.getNlp().getPosList();
if (posElements != null) {
for (Pos pos: posElements) {
posElement.appendChild(pos.toXML(doc));
}
}
doc.getFirstChild().appendChild(nlpDataElement);
}
public static void addSpeechById(Document doc, String speechId) {
// get speeches element
Element speechesElement = (Element) doc.getElementsByTagName("speeches").item(0);
@ -63,6 +110,9 @@ public class XMLUtil {
Speech speech = getSpeechByKey(speechId);
Speaker_MongoDB_Impl speaker = getSpeakerById(String.valueOf(speech.getSpeakerId()));
HtmlSpeech htmlSpeech = getHTMLSpeechByKey(speechId);
addNlpData(doc, htmlSpeech);
speechElement.appendChild(speaker.toXML(doc));
speechElement.appendChild(speech.toXML(doc));
}