diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/NamedEntity.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/NamedEntity.java index 165df29..dadd618 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/NamedEntity.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/NamedEntity.java @@ -1,6 +1,7 @@ package org.texttechnologylab.project.gruppe_05_1.domain.nlp; import org.bson.Document; +import org.w3c.dom.Element; import java.util.ArrayList; import java.util.List; @@ -65,4 +66,11 @@ public class NamedEntity { } return nes; } + + public Element toXML(org.w3c.dom.Document doc) { + Element ne = doc.createElement("NamedEntity"); + ne.setAttribute("type", type); + ne.setTextContent(text); + return ne; + } } diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Pos.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Pos.java index 74f027a..6a75ce2 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Pos.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Pos.java @@ -1,5 +1,7 @@ package org.texttechnologylab.project.gruppe_05_1.domain.nlp; +import org.w3c.dom.Element; + import java.util.Objects; import java.util.StringJoiner; @@ -116,4 +118,14 @@ public class Pos { MyPos{posValue='ADJD', coarseValue='ADV', begin=127, end=130, coveredText='gut'}, MyPos{posValue='$.', coarseValue='PUNCT', begin=130, end=131, coveredText='.'}], */ + + public Element toXML(org.w3c.dom.Document doc) { + Element posElement = doc.createElement("pos"); + posElement.setAttribute("posValue", posValue); + posElement.setAttribute("coarseValue", coarseValue); + posElement.setAttribute("begin", String.valueOf(begin)); + posElement.setAttribute("end", String.valueOf(end)); + posElement.setTextContent(coveredText); + return posElement; + } } diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Sentiment.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Sentiment.java index 3894a72..7b8815b 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Sentiment.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Sentiment.java @@ -2,6 +2,7 @@ package org.texttechnologylab.project.gruppe_05_1.domain.nlp; import org.bson.Document; import org.texttechnologylab.project.gruppe_05_1.database.MongoDBHandler; +import org.w3c.dom.Element; import java.util.ArrayList; import java.util.List; @@ -120,4 +121,15 @@ public class Sentiment { } return sentiments; } + + public org.w3c.dom.Element toXML(org.w3c.dom.Document doc) { + Element sentimentElement = doc.createElement("sentiment"); + sentimentElement.setAttribute("begin", String.valueOf(this.begin)); + sentimentElement.setAttribute("end", String.valueOf(this.end)); + sentimentElement.setAttribute("sentiment", String.valueOf(this.sentiment)); + sentimentElement.setAttribute("negative", String.valueOf(this.negative)); + sentimentElement.setAttribute("neutral", String.valueOf(this.neutral)); + sentimentElement.setAttribute("positive", String.valueOf(this.positive)); + return sentimentElement; + } } diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Topic.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Topic.java index 474f5aa..e4cfff6 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Topic.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Topic.java @@ -1,6 +1,7 @@ package org.texttechnologylab.project.gruppe_05_1.domain.nlp; import org.bson.Document; +import org.w3c.dom.Element; import java.util.*; import java.util.stream.Collectors; @@ -104,4 +105,12 @@ public class Topic { return condensedTopicInfo; } + + public Element toXML(org.w3c.dom.Document doc) { + Element topicElement = doc.createElement("topic"); + topicElement.setAttribute("topic", this.getTopic()); + topicElement.setAttribute("score", this.getScore().toString()); + topicElement.setTextContent(this.getText()); + return topicElement; + } } diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/export/XMLUtil.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/export/XMLUtil.java index 4011614..177e160 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/export/XMLUtil.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/export/XMLUtil.java @@ -4,6 +4,8 @@ import org.eclipse.jetty.xml.XmlParser; import org.jsoup.Jsoup; import org.jsoup.nodes.Node; import org.texttechnologylab.project.gruppe_05_1.database.domainimpl.mdb.Speaker_MongoDB_Impl; +import org.texttechnologylab.project.gruppe_05_1.domain.html.HtmlSpeech; +import org.texttechnologylab.project.gruppe_05_1.domain.nlp.*; import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Speech; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerFactory; @@ -52,6 +54,51 @@ public class XMLUtil { return doc; } + public static void addNlpData(Document doc, HtmlSpeech nlpData) { + Element nlpDataElement = doc.createElement("nlp"); + + Element sentimentsElement = doc.createElement("sentiments"); + nlpDataElement.appendChild(sentimentsElement); + + + List sentiments = nlpData.getNlp().getSentiments(); + for (Sentiment sentiment: sentiments) { + sentimentsElement.appendChild(sentiment.toXML(doc)); + } + + Element topicsElement = doc.createElement("topics"); + nlpDataElement.appendChild(topicsElement); + + List topics = nlpData.getNlp().getTopics(); + if (topics != null) { + for (Topic topic: topics) { + topicsElement.appendChild(topic.toXML(doc)); + } + } + + Element namedEntitiesElement = doc.createElement("NamedEntities"); + nlpDataElement.appendChild(namedEntitiesElement); + + List namedEntities = nlpData.getNlp().getNamedEntities(); + if (namedEntities != null) { + for (NamedEntity namedEntity: namedEntities) { + namedEntitiesElement.appendChild(namedEntity.toXML(doc)); + } + } + + Element posElement = doc.createElement("pos"); + nlpDataElement.appendChild(posElement); + + List posElements = nlpData.getNlp().getPosList(); + if (posElements != null) { + for (Pos pos: posElements) { + posElement.appendChild(pos.toXML(doc)); + } + } + + doc.getFirstChild().appendChild(nlpDataElement); + } + public static void addSpeechById(Document doc, String speechId) { // get speeches element Element speechesElement = (Element) doc.getElementsByTagName("speeches").item(0); @@ -63,6 +110,9 @@ public class XMLUtil { Speech speech = getSpeechByKey(speechId); Speaker_MongoDB_Impl speaker = getSpeakerById(String.valueOf(speech.getSpeakerId())); + HtmlSpeech htmlSpeech = getHTMLSpeechByKey(speechId); + addNlpData(doc, htmlSpeech); + speechElement.appendChild(speaker.toXML(doc)); speechElement.appendChild(speech.toXML(doc)); }