Merge branch 'scrapingXMLs' into 'main'
Implementierung von XML scraping und parsing See merge request s1188354/multimodal_parliament_explorer_05_1!1
This commit is contained in:
commit
8a6548662c
30 changed files with 1442 additions and 2 deletions
|
@ -11,6 +11,7 @@ import org.texttechnologylab.project.gruppe_05_1.util.PPRUtils;
|
|||
import org.texttechnologylab.project.gruppe_05_1.util.PropertiesUtils;
|
||||
import org.texttechnologylab.project.gruppe_05_1.util.XmlUtils;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.FileObjectFactory;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.SpeechParser;
|
||||
import org.w3c.dom.Element;
|
||||
|
||||
import java.util.List;
|
||||
|
@ -44,9 +45,10 @@ public class Main {
|
|||
|
||||
private static final FileObjectFactory xmlFactory = FileObjectFactory.getFactory();
|
||||
private static final MongoObjectFactory mongoFactory = MongoObjectFactory.getFactory();
|
||||
|
||||
private static final SpeechParser speechParser = new SpeechParser();
|
||||
public static void main(String[] args) {
|
||||
|
||||
//TEST
|
||||
speechParser.parseAllSessions();
|
||||
// Stellt fest, dass alle nötigen Datenbank-Collections existieren
|
||||
PPRUtils.ensureCollectionExist();
|
||||
|
||||
|
|
|
@ -3,6 +3,9 @@ package org.texttechnologylab.project.gruppe_05_1.util;
|
|||
import com.mongodb.client.MongoCollection;
|
||||
import com.mongodb.client.MongoDatabase;
|
||||
import com.mongodb.client.model.Indexes;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.select.Elements;
|
||||
import org.texttechnologylab.project.gruppe_05_1.database.MongoDBHandler;
|
||||
import org.texttechnologylab.project.gruppe_05_1.database.MongoObjectFactory;
|
||||
import org.texttechnologylab.project.gruppe_05_1.database.MongoPprUtils;
|
||||
|
@ -19,8 +22,10 @@ import org.w3c.dom.Element;
|
|||
import org.w3c.dom.Node;
|
||||
import org.xml.sax.InputSource;
|
||||
|
||||
import javax.xml.parsers.DocumentBuilder;
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
import java.io.*;
|
||||
import java.net.HttpURLConnection;
|
||||
import java.net.URL;
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
@ -30,6 +35,9 @@ import java.util.zip.ZipInputStream;
|
|||
public abstract class PPRUtils {
|
||||
|
||||
public static final String PARTEILOS_KUERZEL = "Parteilos";
|
||||
private static Set<String> processedProtocols = new HashSet<>();
|
||||
private static Set<org.w3c.dom.Document> xmlProtocols = new HashSet<>();
|
||||
|
||||
|
||||
|
||||
/**
|
||||
|
@ -343,4 +351,92 @@ public abstract class PPRUtils {
|
|||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
public static Set<org.w3c.dom.Document> processXML() {
|
||||
int offset = 0;
|
||||
int limit = 10;
|
||||
boolean hasMore = true;
|
||||
|
||||
while (hasMore) {
|
||||
String queryUrl = "https://www.bundestag.de/ajax/filterlist/de/services/opendata/866354-866354?limit="
|
||||
+ limit + "&noFilterSet=true&offset=" + offset;
|
||||
System.out.println("Lade: " + queryUrl);
|
||||
try {
|
||||
Document htmlDoc = Jsoup.connect(queryUrl).get();
|
||||
Elements xmlLinks = htmlDoc.select("a.bt-link-dokument");
|
||||
if (xmlLinks.isEmpty()) {
|
||||
System.out.println("Keine weiteren Protokolle gefunden.");
|
||||
break;
|
||||
}
|
||||
|
||||
for (org.jsoup.nodes.Element link : xmlLinks) {
|
||||
String xmlUrl = link.attr("href");
|
||||
System.out.println("Verarbeite XML: " + xmlUrl);
|
||||
try {
|
||||
org.w3c.dom.Document xmlDoc = downloadAndParseXML(xmlUrl);
|
||||
|
||||
String uniqueId = xmlDoc.getDocumentElement().getAttribute("sitzung-nr");
|
||||
if (processedProtocols.contains(uniqueId)) {
|
||||
System.out.println("Protokoll bereits verarbeitet: " + uniqueId);
|
||||
continue;
|
||||
}
|
||||
processedProtocols.add(uniqueId);
|
||||
xmlProtocols.add(xmlDoc);
|
||||
//TODO verarbeitung
|
||||
} catch (Exception e) {
|
||||
System.err.println("Fehler beim Verarbeiten der XML-Datei: " + xmlUrl);
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
// check if next
|
||||
org.jsoup.nodes.Element metaSlider = htmlDoc.selectFirst("div.meta-slider");
|
||||
if (metaSlider != null && metaSlider.hasAttr("data-nextoffset")) {
|
||||
int nextOffset = Integer.parseInt(metaSlider.attr("data-nextoffset"));
|
||||
if (nextOffset <= offset) {
|
||||
hasMore = false;
|
||||
} else {
|
||||
offset = nextOffset;
|
||||
}
|
||||
} else {
|
||||
hasMore = false;
|
||||
}
|
||||
} catch (IOException e) {
|
||||
System.err.println("Fehler beim Laden der Seite: " + queryUrl);
|
||||
e.printStackTrace();
|
||||
break;
|
||||
}
|
||||
}
|
||||
return xmlProtocols;
|
||||
}
|
||||
|
||||
/**
|
||||
* Lädt die XML-Datei von der gegebenen URL herunter und parst sie
|
||||
* mittels dbParser.
|
||||
*
|
||||
* @param xmlUrl URL der XML-Datei
|
||||
* @return Das geparste org.w3c.dom.Document
|
||||
* @throws Exception wenn ein Fehler auftritt
|
||||
*/
|
||||
public static org.w3c.dom.Document downloadAndParseXML(String xmlUrl) throws Exception {
|
||||
URL url = new URL(xmlUrl);
|
||||
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
|
||||
connection.setRequestMethod("GET");
|
||||
connection.connect();
|
||||
|
||||
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
|
||||
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
|
||||
dBuilder.setEntityResolver((publicId, systemId) -> {
|
||||
InputStream dtdStream = PPRUtils.class.getResourceAsStream("/plenarprotokolle/dbtplenarprotokoll.dtd");
|
||||
if (dtdStream != null) {
|
||||
return new InputSource(dtdStream);
|
||||
}
|
||||
return null;
|
||||
});
|
||||
org.w3c.dom.Document doc = dBuilder.parse(connection.getInputStream());
|
||||
doc.getDocumentElement().normalize();
|
||||
return doc;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls;
|
||||
|
||||
import lombok.Getter;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.AgendaItem;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Speech;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType;
|
||||
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
@Getter
|
||||
public class AgendaItem_File_Impl implements AgendaItem {
|
||||
private final int id;
|
||||
private final int sessionId;
|
||||
private final String title;
|
||||
private final ArrayList<Speech> speeches;
|
||||
|
||||
public AgendaItem_File_Impl(int id, int sessionId, String title) {
|
||||
this.id = id;
|
||||
this.sessionId = sessionId;
|
||||
this.title = title;
|
||||
this.speeches = new ArrayList<>();
|
||||
}
|
||||
|
||||
@Override
|
||||
public MongoDBEntryType getType() {
|
||||
return MongoDBEntryType.AGENDA_ITEM;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addSpeech(Speech speech) {
|
||||
this.speeches.add(speech);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toHTML() {
|
||||
return "";
|
||||
}
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls;
|
||||
|
||||
import lombok.Getter;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Comment;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Content;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType;
|
||||
|
||||
|
||||
@Getter
|
||||
public class Comment_File_Impl implements Content, Comment {
|
||||
private final int contentId;
|
||||
private final int speechId;
|
||||
private final String comment;
|
||||
private final String commentatorName;
|
||||
|
||||
public Comment_File_Impl(int contentId, int speechId, String commentatorName, String comment) {
|
||||
this.contentId = contentId;
|
||||
this.speechId = speechId;
|
||||
this.commentatorName = commentatorName;
|
||||
this.comment = comment;
|
||||
}
|
||||
|
||||
@Override
|
||||
public MongoDBEntryType getType() {
|
||||
return MongoDBEntryType.SPEECH_COMMENT;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls;
|
||||
|
||||
import lombok.Getter;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Constituency;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType;
|
||||
|
||||
|
||||
@Getter
|
||||
public class Constituency_File_Impl implements Constituency {
|
||||
private final int id;
|
||||
private final String name;
|
||||
private final String federalState;
|
||||
|
||||
public Constituency_File_Impl(int id, String name, String federalState) {
|
||||
this.id = id;
|
||||
this.name = name;
|
||||
this.federalState = federalState;
|
||||
}
|
||||
|
||||
@Override
|
||||
public MongoDBEntryType getType() {
|
||||
return MongoDBEntryType.CONSTIUENCY;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toHTML() {
|
||||
return "<div class='constituency'>"
|
||||
+ "<h2>Constituency Details</h2>"
|
||||
+ "<p><strong>ID:</strong> " + id + "</p>"
|
||||
+ "<p><strong>Name:</strong> " + name + "</p>"
|
||||
+ "<p><strong>Federal State:</strong> " + federalState + "</p>"
|
||||
+ "</div>";
|
||||
}
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls;
|
||||
|
||||
import lombok.Getter;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Fraction;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType;
|
||||
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@Getter
|
||||
public class Fraction_File_Impl implements Fraction {
|
||||
private String name;
|
||||
private final String longName;
|
||||
private final List<Integer> members;
|
||||
|
||||
public Fraction_File_Impl(String name, String longName, List<Integer> members) {
|
||||
this.name = name;
|
||||
this.longName = longName;
|
||||
this.members = members;
|
||||
}
|
||||
|
||||
@Override
|
||||
public MongoDBEntryType getType() {
|
||||
return MongoDBEntryType.FRACTION;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addMember(int member) {
|
||||
this.members.add(member);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void updateName(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toHTML() {
|
||||
return "<div class='fraction'>"
|
||||
+ "<h2>Fraction Details</h2>"
|
||||
+ "<p><strong>Name:</strong> " + name + "</p>"
|
||||
+ "<p><strong>Long Name:</strong> " + longName + "</p>"
|
||||
+ "</div>";
|
||||
}
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls;
|
||||
|
||||
import lombok.Getter;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.HistoryEntry;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType;
|
||||
|
||||
|
||||
@Getter
|
||||
public class HistoryEntry_File_Impl implements HistoryEntry {
|
||||
private final String action;
|
||||
private final String content;
|
||||
private final String contentType;
|
||||
private final String date;
|
||||
|
||||
public HistoryEntry_File_Impl(String content, String contentType, String date, String action) {
|
||||
this.action = action;
|
||||
this.content = content;
|
||||
this.contentType = contentType;
|
||||
this.date = date;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getAction() {
|
||||
return this.action;
|
||||
}
|
||||
|
||||
@Override
|
||||
public MongoDBEntryType getType() {
|
||||
return MongoDBEntryType.HISTORY_ENTRY;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,28 @@
|
|||
package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls;
|
||||
|
||||
import lombok.Getter;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Individual;
|
||||
|
||||
|
||||
@Getter
|
||||
public abstract class Individual_File_Impl implements Individual {
|
||||
private final String name;
|
||||
private final String firstName;
|
||||
private final String title;
|
||||
private final String dateOfBirth;
|
||||
private final String dateOfDeath;
|
||||
private final String placeOfBirth;
|
||||
private final String gender;
|
||||
private final String religion;
|
||||
|
||||
public Individual_File_Impl(String name, String firstName, String title, String dateOfBirth, String dateOfDeath, String placeOfBirth, String gender, String religion) {
|
||||
this.name = name;
|
||||
this.firstName = firstName;
|
||||
this.title = title;
|
||||
this.dateOfBirth = dateOfBirth;
|
||||
this.dateOfDeath = dateOfDeath;
|
||||
this.placeOfBirth = placeOfBirth;
|
||||
this.gender = gender;
|
||||
this.religion = religion;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls;
|
||||
|
||||
import lombok.Getter;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.LegislativePeriod;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType;
|
||||
|
||||
|
||||
@Getter
|
||||
public class LegislativePeriod_File_Impl implements LegislativePeriod {
|
||||
private final int id;
|
||||
private final String startDate;
|
||||
private final String endDate;
|
||||
private final String constituency;
|
||||
|
||||
public LegislativePeriod_File_Impl(int id, String startDate, String endDate, String constituency) {
|
||||
this.id = id;
|
||||
this.startDate = startDate;
|
||||
this.endDate = endDate;
|
||||
this.constituency = constituency;
|
||||
}
|
||||
|
||||
@Override
|
||||
public MongoDBEntryType getType() {
|
||||
return MongoDBEntryType.LEGISLATIVE_PERIOD;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toHTML() {
|
||||
return "<div class='legislative-period'>"
|
||||
+ "<h2>Legislative Period Details</h2>"
|
||||
+ "<p><strong>ID:</strong> " + id + "</p>"
|
||||
+ "<p><strong>Start Date:</strong> " + startDate + "</p>"
|
||||
+ "<p><strong>End Date:</strong> " + endDate + "</p>"
|
||||
+ "<p><strong>Constituency:</strong> " + constituency + "</p>"
|
||||
+ "</div>";
|
||||
}
|
||||
}
|
|
@ -0,0 +1,24 @@
|
|||
package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls;
|
||||
|
||||
import lombok.Getter;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Content;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Line;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType;
|
||||
|
||||
@Getter
|
||||
public class Line_File_Impl implements Content, Line {
|
||||
private final int contentId;
|
||||
protected final int speechId;
|
||||
private final String content;
|
||||
|
||||
public Line_File_Impl(int contentId, int speechId, String content) {
|
||||
this.contentId = contentId;
|
||||
this.speechId = speechId;
|
||||
this.content = content;
|
||||
}
|
||||
|
||||
@Override
|
||||
public MongoDBEntryType getType() {
|
||||
return MongoDBEntryType.SPEECH_LINE;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,94 @@
|
|||
package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls;
|
||||
|
||||
import lombok.Getter;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.LegislativePeriod;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.MemberOfParliament;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType;
|
||||
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@Getter
|
||||
public class MemberOfParliament_File_Impl extends Individual_File_Impl implements MemberOfParliament {
|
||||
private final int id;
|
||||
private String party;
|
||||
private final List<LegislativePeriod> legislativePeriods;
|
||||
private final int firstLegislativePeriodId;
|
||||
private final int lastLegislativePeriodId;
|
||||
private final String photo;
|
||||
|
||||
public MemberOfParliament_File_Impl(String name, String firstName, String title, String dateOfBirth, String dateofDeath, String placeOfBirth, String gender, String religion, int id, String party, List<LegislativePeriod> legislativePeriods, int firstLegislativePeriodId, int lastLegislativePeriodId, String photo) {
|
||||
super(name, firstName, title, dateOfBirth, dateofDeath, placeOfBirth, gender, religion);
|
||||
this.id = id;
|
||||
this.party = party;
|
||||
this.legislativePeriods = legislativePeriods;
|
||||
this.firstLegislativePeriodId = firstLegislativePeriodId;
|
||||
this.lastLegislativePeriodId = lastLegislativePeriodId;
|
||||
this.photo = photo;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isCurrentMember() {
|
||||
return this.lastLegislativePeriodId == 20;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void updateParty(String party) {
|
||||
this.party = party;
|
||||
}
|
||||
|
||||
@Override
|
||||
public MongoDBEntryType getType() {
|
||||
return MongoDBEntryType.MEMBER_OF_PARLIAMENT;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toHTML() {
|
||||
StringBuilder html = new StringBuilder();
|
||||
|
||||
// Basic Member details
|
||||
html.append("<div class='member-of-parliament'>")
|
||||
.append("<h2>Member of Parliament Details</h2>")
|
||||
.append("<p><strong>ID:</strong> ").append(id).append("</p>")
|
||||
.append("<p><strong>Name:</strong> ").append(getName()).append(" ").append(getFirstName()).append("</p>")
|
||||
.append("<p><strong>Party:</strong> ").append(party).append("</p>");
|
||||
|
||||
// Member photo (constructed from last name and first name)
|
||||
String photoPath = "../src/memberphotos/" + getName() + "_" + getFirstName() + ".jpg";
|
||||
html.append("<p><img src='").append(photoPath).append("' alt='Photo' width='100' /></p>");
|
||||
|
||||
// Legislative Periods - First and Last Period
|
||||
html.append("<h3>Legislative Periods</h3>");
|
||||
if (legislativePeriods == null || legislativePeriods.isEmpty()) {
|
||||
if (firstLegislativePeriodId == lastLegislativePeriodId) {
|
||||
html.append("<p>Member of Parliament during the legislative period ")
|
||||
.append(firstLegislativePeriodId);
|
||||
|
||||
} else {
|
||||
html.append("<p>Candidated for the first time during the legislative period ")
|
||||
.append(firstLegislativePeriodId)
|
||||
.append(" and the last time during the legislative period ")
|
||||
.append(lastLegislativePeriodId);
|
||||
}
|
||||
if (isCurrentMember()) {
|
||||
html.append(" <p><b>(currently active)</b></p>");
|
||||
}
|
||||
html.append("</p>");
|
||||
} else {
|
||||
// First Legislative Period
|
||||
LegislativePeriod firstPeriod = legislativePeriods.get(0);
|
||||
html.append("<p><strong>First Period:</strong> ").append(firstPeriod.getStartDate())
|
||||
.append(" to ").append(firstPeriod.getEndDate()).append("</p>");
|
||||
|
||||
// Last Legislative Period
|
||||
LegislativePeriod lastPeriod = legislativePeriods.get(legislativePeriods.size() - 1);
|
||||
html.append("<p><strong>Last Period:</strong> ").append(lastPeriod.getStartDate())
|
||||
.append(" to ").append(lastPeriod.getEndDate()).append("</p>");
|
||||
}
|
||||
|
||||
html.append("</div>");
|
||||
|
||||
return html.toString();
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,47 @@
|
|||
package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls;
|
||||
|
||||
import lombok.Getter;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.AgendaItem;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Session;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
@Getter
|
||||
public class Session_File_Impl implements Session {
|
||||
private final int id;
|
||||
private final String legislativePeriod;
|
||||
private final String dateTime;
|
||||
private final String endTime;
|
||||
private final List<AgendaItem> agendaItems;
|
||||
|
||||
public Session_File_Impl(String legislativePeriod, int id, String dateTime, String endTime) {
|
||||
this.id = id;
|
||||
this.legislativePeriod = legislativePeriod;
|
||||
this.dateTime = dateTime;
|
||||
this.endTime = endTime;
|
||||
this.agendaItems = new ArrayList<>();
|
||||
}
|
||||
|
||||
@Override
|
||||
public MongoDBEntryType getType() {
|
||||
return MongoDBEntryType.SESSION;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addAgendaItem(AgendaItem item) {
|
||||
this.agendaItems.add(item);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toHTML() {
|
||||
return "<div class='session'>"
|
||||
+ "<h2>Session Details</h2>"
|
||||
+ "<p><strong>ID:</strong> " + id + "</p>"
|
||||
+ "<p><strong>Legislative Period:</strong> " + legislativePeriod + "</p>"
|
||||
+ "<p><strong>Start Time:</strong> " + dateTime + "</p>"
|
||||
+ "<p><strong>End Time:</strong> " + endTime + "</p>"
|
||||
+ "</div>";
|
||||
}
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls;
|
||||
|
||||
import lombok.Getter;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Content;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Speaker;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType;
|
||||
|
||||
|
||||
@Getter
|
||||
public class Speaker_File_Impl implements Content, Speaker {
|
||||
private final int contentId;
|
||||
private final int speechId;
|
||||
private final int speakerId;
|
||||
private final String speakerName;
|
||||
private final String fraction;
|
||||
|
||||
public Speaker_File_Impl(int contentId, int speechId, int speakerId, String speakerName, String fraction) {
|
||||
this.contentId = contentId;
|
||||
this.speechId = speechId;
|
||||
this.speakerId = speakerId;
|
||||
this.speakerName = speakerName;
|
||||
this.fraction = fraction;
|
||||
}
|
||||
|
||||
@Override
|
||||
public MongoDBEntryType getType() {
|
||||
return MongoDBEntryType.SPEECH_SPEAKER;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls;
|
||||
|
||||
import lombok.Getter;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Content;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Speech;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType;
|
||||
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
@Getter
|
||||
public class Speech_File_Impl implements Speech {
|
||||
private final int sessionId;
|
||||
private final int agendaItemId;
|
||||
private final int speakerId;
|
||||
private final int speechId;
|
||||
private final String speakerName;
|
||||
private final String fraction;
|
||||
private final List<Content> speechContents;
|
||||
|
||||
public Speech_File_Impl(int sessionId, int agendaItemId, int speechId, int speakerId, String speakerName, String fraction) {
|
||||
this.speakerId = speakerId;
|
||||
this.agendaItemId = agendaItemId;
|
||||
this.speechId = speechId;
|
||||
this.speakerName = speakerName;
|
||||
this.fraction = fraction;
|
||||
this.sessionId = sessionId;
|
||||
this.speechContents = new ArrayList<>();
|
||||
}
|
||||
|
||||
@Override
|
||||
public MongoDBEntryType getType() {
|
||||
return MongoDBEntryType.SPEECH;
|
||||
}
|
||||
|
||||
public void addContent(Content content) {
|
||||
this.speechContents.add(content);
|
||||
//Logger.pink("Added paragraph to speech: " + paragraph);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toHTML() {
|
||||
//TODO: Implement a logic that highlights the lines that the speaker is saying
|
||||
return "<div class='speech'>"
|
||||
+ "<h2>Speech Details</h2>"
|
||||
+ "<p><strong>Session ID:</strong> " + sessionId + "</p>"
|
||||
+ "<p><strong>Speaker ID:</strong> " + speakerId + "</p>"
|
||||
+ "<p><strong>Speaker Name:</strong> " + speakerName + "</p>"
|
||||
+ "<p><strong>Fraction:</strong> " + fraction + "</p>"
|
||||
+ "</div>";
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,56 @@
|
|||
package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces;
|
||||
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
public interface AgendaItem {
|
||||
/**
|
||||
* Returns the ID of the agenda item.
|
||||
*
|
||||
* @return The ID of the agenda item.
|
||||
*/
|
||||
int getId();
|
||||
|
||||
/**
|
||||
* Returns the type of the MongoDB entry.
|
||||
*
|
||||
* @return The type of the MongoDB entry.
|
||||
*/
|
||||
MongoDBEntryType getType();
|
||||
|
||||
/**
|
||||
* Returns the session ID.
|
||||
*
|
||||
* @return The session ID.
|
||||
*/
|
||||
int getSessionId();
|
||||
|
||||
/**
|
||||
* Returns the title of the agenda item.
|
||||
*
|
||||
* @return The title of the agenda item.
|
||||
*/
|
||||
String getTitle();
|
||||
|
||||
/**
|
||||
* Returns the speeches of the agenda item.
|
||||
*
|
||||
* @return The speeches of the agenda item.
|
||||
*/
|
||||
ArrayList<Speech> getSpeeches();
|
||||
|
||||
/**
|
||||
* Adds a speech to the agenda item.
|
||||
*
|
||||
* @param speech The speech to add.
|
||||
*/
|
||||
void addSpeech(Speech speech);
|
||||
|
||||
/**
|
||||
* Returns the HTML representation of the agenda item.
|
||||
*
|
||||
* @return The HTML representation of the agenda item.
|
||||
*/
|
||||
String toHTML();
|
||||
}
|
|
@ -0,0 +1,35 @@
|
|||
package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces;
|
||||
|
||||
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType;
|
||||
|
||||
public interface Comment extends Content {
|
||||
/**
|
||||
* Returns the content ID.
|
||||
*
|
||||
* @return The content ID.
|
||||
*/
|
||||
int getContentId();
|
||||
|
||||
|
||||
/**
|
||||
* Returns the type of the MongoDB entry.
|
||||
*
|
||||
* @return The type of the MongoDB entry.
|
||||
*/
|
||||
MongoDBEntryType getType();
|
||||
|
||||
/**
|
||||
* Returns the comment.
|
||||
*
|
||||
* @return the comment
|
||||
*/
|
||||
String getComment();
|
||||
|
||||
/**
|
||||
* Returns the commentator name.
|
||||
*
|
||||
* @return the commentator name
|
||||
*/
|
||||
String getCommentatorName();
|
||||
}
|
|
@ -0,0 +1,38 @@
|
|||
package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces;
|
||||
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType;
|
||||
|
||||
public interface Constituency {
|
||||
/**
|
||||
* Returns the id of the constituency.
|
||||
*
|
||||
* @return the id of the constituency
|
||||
*/
|
||||
int getId();
|
||||
|
||||
/**
|
||||
* Returns the type of the MongoDB entry.
|
||||
*
|
||||
* @return The type of the MongoDB entry.
|
||||
*/
|
||||
MongoDBEntryType getType();
|
||||
/**
|
||||
* Returns the name of the constituency.
|
||||
*
|
||||
* @return the name of the constituency
|
||||
*/
|
||||
String getName();
|
||||
/**
|
||||
* Returns the federal state of the constituency.
|
||||
*
|
||||
* @return the federal state of the constituency
|
||||
*/
|
||||
String getFederalState();
|
||||
|
||||
/**
|
||||
* Returns the HTML representation of the agenda item.
|
||||
*
|
||||
* @return The HTML representation of the agenda item.
|
||||
*/
|
||||
String toHTML();
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces;
|
||||
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType;
|
||||
|
||||
public interface Content {
|
||||
/**
|
||||
* Returns the content ID.
|
||||
*
|
||||
* @return The content ID.
|
||||
*/
|
||||
int getContentId();
|
||||
|
||||
|
||||
/**
|
||||
* Returns the type of the MongoDB entry.
|
||||
*
|
||||
* @return The type of the MongoDB entry.
|
||||
*/
|
||||
MongoDBEntryType getType();
|
||||
|
||||
/**
|
||||
* Returns the speech ID.
|
||||
*
|
||||
* @return The speech ID.
|
||||
*/
|
||||
int getSpeechId();
|
||||
}
|
|
@ -0,0 +1,55 @@
|
|||
package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces;
|
||||
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public interface Fraction {
|
||||
/**
|
||||
* Returns the name of the fraction.
|
||||
*
|
||||
* @return The name of the fraction.
|
||||
*/
|
||||
String getName();
|
||||
|
||||
/**
|
||||
* Returns the type of the MongoDB entry.
|
||||
*
|
||||
* @return The type of the MongoDB entry.
|
||||
*/
|
||||
MongoDBEntryType getType();
|
||||
|
||||
/**
|
||||
* Returns the long name of the fraction.
|
||||
*
|
||||
* @return The long name of the fraction.
|
||||
*/
|
||||
String getLongName();
|
||||
|
||||
/**
|
||||
* Returns the members of the fraction.
|
||||
*
|
||||
* @return The members of the fraction.
|
||||
*/
|
||||
List<Integer> getMembers();
|
||||
/**
|
||||
* Adds a member to the fraction.
|
||||
*
|
||||
* @param member The member to add.
|
||||
*/
|
||||
void addMember(int member);
|
||||
|
||||
/**
|
||||
* Updates the name of the fraction.
|
||||
*
|
||||
* @param name The new name of the fraction.
|
||||
*/
|
||||
void updateName(String name);
|
||||
|
||||
/**
|
||||
* Returns the HTML representation of the agenda item.
|
||||
*
|
||||
* @return The HTML representation of the agenda item.
|
||||
*/
|
||||
String toHTML();
|
||||
}
|
|
@ -0,0 +1,26 @@
|
|||
package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces;
|
||||
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType;
|
||||
|
||||
public interface HistoryEntry {
|
||||
/**
|
||||
* Returns the action of the history entry.
|
||||
**/
|
||||
String getAction();
|
||||
/**
|
||||
* Returns the content of the history entry.
|
||||
**/
|
||||
Object getContent();
|
||||
|
||||
/**
|
||||
* Returns the type of the MongoDB entry.
|
||||
*
|
||||
* @return The type of the MongoDB entry.
|
||||
*/
|
||||
MongoDBEntryType getType();
|
||||
|
||||
/**
|
||||
* Returns the date of the history entry.
|
||||
**/
|
||||
String getDate();
|
||||
}
|
|
@ -0,0 +1,75 @@
|
|||
package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces;
|
||||
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType;
|
||||
|
||||
public interface Individual {
|
||||
/**
|
||||
* Returns the name
|
||||
*
|
||||
* @return the name
|
||||
*/
|
||||
String getName();
|
||||
|
||||
/**
|
||||
* Returns the type of the MongoDB entry.
|
||||
*
|
||||
* @return The type of the MongoDB entry.
|
||||
*/
|
||||
MongoDBEntryType getType();
|
||||
|
||||
/**
|
||||
* Returns the first name
|
||||
*
|
||||
* @return the first name
|
||||
*/
|
||||
String getFirstName();
|
||||
|
||||
/**
|
||||
* Returns the title
|
||||
*
|
||||
* @return the title
|
||||
*/
|
||||
String getTitle();
|
||||
|
||||
/**
|
||||
* Returns the date of birth
|
||||
*
|
||||
* @return the date of birth
|
||||
*/
|
||||
String getDateOfBirth();
|
||||
|
||||
/**
|
||||
* Returns the date of death
|
||||
*
|
||||
* @return the date of death
|
||||
*/
|
||||
String getDateOfDeath();
|
||||
|
||||
/**
|
||||
* Returns the place of birth
|
||||
*
|
||||
* @return the place of birth
|
||||
*/
|
||||
String getPlaceOfBirth();
|
||||
|
||||
/**
|
||||
* Returns the gender
|
||||
*
|
||||
* @return the gender
|
||||
*/
|
||||
String getGender();
|
||||
|
||||
/**
|
||||
* Returns the religion
|
||||
*
|
||||
* @return the religion
|
||||
*/
|
||||
String getReligion();
|
||||
|
||||
/**
|
||||
* Returns the HTML representation of the agenda item.
|
||||
*
|
||||
* @return The HTML representation of the agenda item.
|
||||
*/
|
||||
String toHTML();
|
||||
}
|
|
@ -0,0 +1,47 @@
|
|||
package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces;
|
||||
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType;
|
||||
|
||||
public interface LegislativePeriod {
|
||||
/**
|
||||
* Returns the id of the legislative period.
|
||||
*
|
||||
* @return the id of the legislative period
|
||||
*/
|
||||
int getId();
|
||||
|
||||
/**
|
||||
* Returns the type of the MongoDB entry.
|
||||
*
|
||||
* @return The type of the MongoDB entry.
|
||||
*/
|
||||
MongoDBEntryType getType();
|
||||
|
||||
/**
|
||||
* Returns the number of the legislative period.
|
||||
*
|
||||
* @return the number of the legislative period
|
||||
*/
|
||||
String getStartDate();
|
||||
|
||||
/**
|
||||
* Returns the start date of the legislative period.
|
||||
*
|
||||
* @return the start date of the legislative period
|
||||
*/
|
||||
String getEndDate();
|
||||
|
||||
/**
|
||||
* Returns the end date of the legislative period.
|
||||
*
|
||||
* @return the end date of the legislative period
|
||||
*/
|
||||
String getConstituency();
|
||||
|
||||
/**
|
||||
* Returns the HTML representation of the agenda item.
|
||||
*
|
||||
* @return The HTML representation of the agenda item.
|
||||
*/
|
||||
String toHTML();
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces;
|
||||
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType;
|
||||
|
||||
public interface Line extends Content {
|
||||
/**
|
||||
* Returns the content ID.
|
||||
*
|
||||
* @return The content ID.
|
||||
*/
|
||||
int getContentId();
|
||||
|
||||
|
||||
/**
|
||||
* Returns the type of the MongoDB entry.
|
||||
*
|
||||
* @return The type of the MongoDB entry.
|
||||
*/
|
||||
MongoDBEntryType getType();
|
||||
|
||||
/**
|
||||
* Returns the content of the line.
|
||||
*
|
||||
* @return the content of the line
|
||||
*/
|
||||
String getContent();
|
||||
}
|
|
@ -0,0 +1,77 @@
|
|||
package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces;
|
||||
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public interface MemberOfParliament extends Individual {
|
||||
/**
|
||||
* Returns the id of the member of parliament.
|
||||
*
|
||||
* @return the id of the member of parliament
|
||||
*/
|
||||
int getId();
|
||||
|
||||
/**
|
||||
* Returns the type of the MongoDB entry.
|
||||
*
|
||||
* @return The type of the MongoDB entry.
|
||||
*/
|
||||
MongoDBEntryType getType();
|
||||
|
||||
/**
|
||||
* Returns the party of the member of parliament.
|
||||
*
|
||||
* @return the party of the member of parliament
|
||||
*/
|
||||
String getParty();
|
||||
|
||||
/**
|
||||
* Returns the legislative periods of the member of parliament.
|
||||
*
|
||||
* @return the legislative periods of the member of parliament
|
||||
*/
|
||||
List<LegislativePeriod> getLegislativePeriods();
|
||||
|
||||
/**
|
||||
* Returns the first legislative period id of the member of parliament.
|
||||
*
|
||||
* @return the first legislative period id of the member of parliament
|
||||
*/
|
||||
int getFirstLegislativePeriodId();
|
||||
|
||||
/**
|
||||
* Returns the last legislative period id of the member of parliament.
|
||||
*
|
||||
* @return the last legislative period id of the member of parliament
|
||||
*/
|
||||
int getLastLegislativePeriodId();
|
||||
|
||||
/**
|
||||
* Returns the base64 encoded string of the photo of the member of parliament.
|
||||
*
|
||||
* @return the base64 encoded string of the photo of the member of parliament
|
||||
*/
|
||||
String getPhoto();
|
||||
|
||||
/**
|
||||
* Returns whether the member of parliament is a current member.
|
||||
*
|
||||
* @return whether the member of parliament is a current member
|
||||
*/
|
||||
boolean isCurrentMember();
|
||||
|
||||
/**
|
||||
* Updates the party of the member of parliament.
|
||||
*
|
||||
* @param party the new party of the member of parliament
|
||||
*/
|
||||
void updateParty(String party);
|
||||
|
||||
/**
|
||||
* Returns the HTML representation of the agenda item.
|
||||
*
|
||||
* @return The HTML representation of the agenda item.
|
||||
*/
|
||||
String toHTML();
|
||||
}
|
|
@ -0,0 +1,62 @@
|
|||
package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces;
|
||||
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public interface Session {
|
||||
/**
|
||||
* Returns the id of the session.
|
||||
*
|
||||
* @return the id of the session
|
||||
*/
|
||||
int getId();
|
||||
|
||||
/**
|
||||
* Returns the type of the MongoDB entry.
|
||||
*
|
||||
* @return The type of the MongoDB entry.
|
||||
*/
|
||||
MongoDBEntryType getType();
|
||||
|
||||
/**
|
||||
* Returns the legislative period of the session.
|
||||
*
|
||||
* @return the legislative period of the session
|
||||
*/
|
||||
String getLegislativePeriod();
|
||||
|
||||
/**
|
||||
* Returns the date and time of the session.
|
||||
*
|
||||
* @return the date and time of the session
|
||||
*/
|
||||
String getDateTime();
|
||||
|
||||
/**
|
||||
* Returns the end time of the session.
|
||||
*
|
||||
* @return the end time of the session
|
||||
*/
|
||||
String getEndTime();
|
||||
|
||||
/**
|
||||
* Returns the agenda items of the session.
|
||||
*
|
||||
* @return the agenda items of the session
|
||||
*/
|
||||
List<AgendaItem> getAgendaItems();
|
||||
|
||||
/**
|
||||
* Adds an agenda item to the session.
|
||||
* @param item The agenda item to add.
|
||||
*/
|
||||
void addAgendaItem(AgendaItem item);
|
||||
|
||||
/**
|
||||
* Returns the HTML representation of the agenda item.
|
||||
*
|
||||
* @return The HTML representation of the agenda item.
|
||||
*/
|
||||
String toHTML();
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces;
|
||||
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType;
|
||||
|
||||
public interface Speaker extends Content {
|
||||
/**
|
||||
* Returns the content ID.
|
||||
*
|
||||
* @return The content ID.
|
||||
*/
|
||||
int getContentId();
|
||||
|
||||
|
||||
/**
|
||||
* Returns the type of the MongoDB entry.
|
||||
*
|
||||
* @return The type of the MongoDB entry.
|
||||
*/
|
||||
MongoDBEntryType getType();
|
||||
|
||||
/**
|
||||
* Returns the speakerId.
|
||||
*
|
||||
* @return the speakerId
|
||||
*/
|
||||
int getSpeakerId();
|
||||
|
||||
/**
|
||||
* Returns the speakerName.
|
||||
*
|
||||
* @return the speakerName
|
||||
*/
|
||||
String getSpeakerName();
|
||||
|
||||
/**
|
||||
* Returns the fraction.
|
||||
*
|
||||
* @return the fraction
|
||||
*/
|
||||
String getFraction();
|
||||
}
|
|
@ -0,0 +1,75 @@
|
|||
package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces;
|
||||
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public interface Speech {
|
||||
/**
|
||||
* Returns the speech ID.
|
||||
*
|
||||
* @return The speech ID.
|
||||
*/
|
||||
int getSpeechId();
|
||||
|
||||
/**
|
||||
* Returns the type of the MongoDB entry.
|
||||
*
|
||||
* @return The type of the MongoDB entry.
|
||||
*/
|
||||
MongoDBEntryType getType();
|
||||
|
||||
/**
|
||||
* Returns the agenda item ID.
|
||||
*
|
||||
* @return The agenda item ID.
|
||||
*/
|
||||
int getAgendaItemId();
|
||||
|
||||
/**
|
||||
* Returns the speaker ID.
|
||||
*
|
||||
* @return The speaker ID.
|
||||
*/
|
||||
int getSpeakerId();
|
||||
|
||||
/**
|
||||
* Returns the speaker name.
|
||||
*
|
||||
* @return The speaker name.
|
||||
*/
|
||||
String getSpeakerName();
|
||||
|
||||
/**
|
||||
* Returns the fraction.
|
||||
*
|
||||
* @return The fraction.
|
||||
*/
|
||||
String getFraction();
|
||||
|
||||
/**
|
||||
* Returns the session ID.
|
||||
*
|
||||
* @return The session ID.
|
||||
*/
|
||||
int getSessionId();
|
||||
|
||||
/**
|
||||
* Returns the speech contents.
|
||||
*
|
||||
* @return The speech contents.
|
||||
*/
|
||||
List<Content> getSpeechContents();
|
||||
/**
|
||||
* Adds a speaker to the speech.
|
||||
* @param speaker The speaker to add.
|
||||
*/
|
||||
void addContent(Content speaker);
|
||||
|
||||
/**
|
||||
* Returns the HTML representation of the agenda item.
|
||||
*
|
||||
* @return The HTML representation of the agenda item.
|
||||
*/
|
||||
String toHTML();
|
||||
}
|
|
@ -0,0 +1,188 @@
|
|||
package org.texttechnologylab.project.gruppe_05_1.xml.speeches;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
import org.texttechnologylab.project.gruppe_05_1.util.PPRUtils;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.*;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.AgendaItem;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Session;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Speech;
|
||||
import org.w3c.dom.Document;
|
||||
import org.w3c.dom.Element;
|
||||
import org.w3c.dom.Node;
|
||||
import org.w3c.dom.NodeList;
|
||||
|
||||
import javax.xml.parsers.DocumentBuilder;
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
import javax.xml.transform.OutputKeys;
|
||||
import javax.xml.transform.Transformer;
|
||||
import javax.xml.transform.TransformerFactory;
|
||||
import javax.xml.transform.dom.DOMSource;
|
||||
import javax.xml.transform.stream.StreamResult;
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
public class SpeechParser {
|
||||
|
||||
@Getter
|
||||
private List<Speech> speeches;
|
||||
@Getter
|
||||
private List<AgendaItem> agendaItems;
|
||||
@Setter
|
||||
private Boolean parseLegislativePeriods;
|
||||
|
||||
public List<Session> parseAllSessions() {
|
||||
List<Session> sessions = new ArrayList<>();
|
||||
this.speeches = new ArrayList<>();
|
||||
this.agendaItems = new ArrayList<>();
|
||||
//TODO Logik so machen dass aus array von xmls gelesen wird nicht aus pfad
|
||||
Set<Document> xmlDocuments = PPRUtils.processXML();
|
||||
for (org.w3c.dom.Document xmlDoc:xmlDocuments) {
|
||||
try {
|
||||
File tempFile = convertDocumentToFile(xmlDoc);
|
||||
Session session = parseSessionFile(tempFile);
|
||||
sessions.add(session);
|
||||
tempFile.delete(); // Lösche die temporäre Datei nach der Verarbeitung
|
||||
} catch (Exception e) {
|
||||
System.err.println("Error parsing XML document.");
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
return sessions;
|
||||
}
|
||||
|
||||
private Session parseSessionFile(File file) throws Exception {
|
||||
//file = removeDoctypeAnnotation(file.getAbsolutePath());
|
||||
|
||||
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
|
||||
DocumentBuilder builder = factory.newDocumentBuilder();
|
||||
Document document = builder.parse(file);
|
||||
|
||||
// Extract session details
|
||||
Element root = document.getDocumentElement();
|
||||
String legislativePeriod = root.getAttribute("wahlperiode");
|
||||
int sessionId = Integer.parseInt(root.getAttribute("sitzung-nr"));
|
||||
System.out.println("Session " + sessionId + " wurde gespeichert");
|
||||
String sessionDate = root.getAttribute("sitzung-datum");
|
||||
Element startTimeElement = (Element) root.getElementsByTagName("sitzungsbeginn").item(0);
|
||||
String startTimeString = startTimeElement != null ? startTimeElement.getAttribute("sitzung-start-uhrzeit") : null;
|
||||
|
||||
String startTime = startTimeString != null ? sessionDate + " " + startTimeString : sessionDate;
|
||||
|
||||
Element sessionEndElement = (Element) root.getElementsByTagName("sitzungsende").item(0);
|
||||
String sessionEndTime = sessionEndElement != null ? sessionEndElement.getAttribute("sitzung-ende-uhrzeit") : null;
|
||||
|
||||
Session_File_Impl session = new Session_File_Impl(legislativePeriod, sessionId, startTime, sessionEndTime);
|
||||
|
||||
// Parse agenda items
|
||||
NodeList agendaNodes = document.getElementsByTagName("tagesordnungspunkt");
|
||||
for (int agendaItemId = 0; agendaItemId < agendaNodes.getLength(); agendaItemId++) {
|
||||
Element agendaElement = (Element) agendaNodes.item(agendaItemId);
|
||||
if (agendaElement == null) continue;
|
||||
|
||||
String agendaTitle = agendaElement.getAttribute("top-id");
|
||||
|
||||
AgendaItem_File_Impl agendaItemFileImpl = new AgendaItem_File_Impl(agendaItemId, sessionId, agendaTitle);
|
||||
|
||||
this.agendaItems.add(agendaItemFileImpl);
|
||||
|
||||
// Parse speeches
|
||||
NodeList speechNodes = agendaElement.getElementsByTagName("rede");
|
||||
for (int speechId = 0; speechId < speechNodes.getLength(); speechId++) {
|
||||
Element speechElement = (Element) speechNodes.item(speechId);
|
||||
if (speechElement == null) continue;
|
||||
|
||||
// Parse speaker
|
||||
Element speakerElement = (Element) speechElement.getElementsByTagName("redner").item(0);
|
||||
if (speakerElement == null) continue;
|
||||
|
||||
int speakerId = Integer.parseInt(speakerElement.getAttribute("id"));
|
||||
Element nameElement = (Element) speakerElement.getElementsByTagName("name").item(0);
|
||||
if (nameElement == null) continue;
|
||||
|
||||
String title = getOptionalTextContent(nameElement, "titel");
|
||||
String firstName = getOptionalTextContent(nameElement, "vorname");
|
||||
String lastName = getOptionalTextContent(nameElement, "nachname");
|
||||
String fraction = getOptionalTextContent(nameElement, "fraktion");
|
||||
|
||||
String speakerName = (title != null ? title + " " : "") + firstName + " " + lastName;
|
||||
Speech_File_Impl speech = new Speech_File_Impl(sessionId, agendaItemId, speechId, speakerId, speakerName, fraction);
|
||||
|
||||
// Add the speaker to speech contents
|
||||
speech.addContent(new Speaker_File_Impl(0, speechId, speakerId, speakerName, fraction));
|
||||
|
||||
// Parse content in order
|
||||
NodeList contentNodes = speechElement.getChildNodes();
|
||||
for (int k = 0; k < contentNodes.getLength(); k++) {
|
||||
Node contentNode = contentNodes.item(k);
|
||||
if (contentNode == null || contentNode.getNodeType() != Node.ELEMENT_NODE) continue;
|
||||
|
||||
Element contentElement = (Element) contentNode;
|
||||
String tagName = contentElement.getTagName();
|
||||
|
||||
switch (tagName) {
|
||||
case "p":
|
||||
String paragraphClass = contentElement.getAttribute("klasse");
|
||||
if ("redner".equals(paragraphClass)) {
|
||||
// This case can be skipped as the speaker is already added
|
||||
continue;
|
||||
} else if ("kommentar".equals(paragraphClass)) {
|
||||
String comment = contentElement.getTextContent().trim();
|
||||
String commentatorName = ""; // Extract if present
|
||||
speech.addContent(new Comment_File_Impl(k + 1, speechId, commentatorName, comment));
|
||||
} else {
|
||||
String line = contentElement.getTextContent().trim();
|
||||
speech.addContent(new Line_File_Impl(k + 1, speechId, line));
|
||||
}
|
||||
break;
|
||||
|
||||
case "kommentar":
|
||||
String comment = contentElement.getTextContent().trim();
|
||||
String commentatorName = ""; // Extract if available
|
||||
speech.addContent(new Comment_File_Impl(k + 1, speechId, commentatorName, comment));
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
agendaItemFileImpl.addSpeech(speech);
|
||||
this.speeches.add(speech);
|
||||
}
|
||||
|
||||
session.addAgendaItem(agendaItemFileImpl);
|
||||
}
|
||||
return session;
|
||||
}
|
||||
|
||||
private static String getOptionalTextContent(Element parent, String tagName) {
|
||||
NodeList nodes = parent.getElementsByTagName(tagName);
|
||||
if (nodes.getLength() > 0) {
|
||||
Node node = nodes.item(0);
|
||||
if (node != null) {
|
||||
return node.getTextContent().trim();
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
/**
|
||||
* Konvertiert ein org.w3c.dom.Document in eine temporäre Datei.
|
||||
*/
|
||||
private File convertDocumentToFile(org.w3c.dom.Document xmlDoc) throws Exception {
|
||||
File tempFile = File.createTempFile("session_", ".xml");
|
||||
TransformerFactory transformerFactory = TransformerFactory.newInstance();
|
||||
Transformer transformer = transformerFactory.newTransformer();
|
||||
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
|
||||
|
||||
DOMSource source = new DOMSource(xmlDoc);
|
||||
StreamResult result = new StreamResult(tempFile);
|
||||
transformer.transform(source, result);
|
||||
|
||||
return tempFile;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,17 @@
|
|||
package org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums;
|
||||
|
||||
public enum MongoDBEntryType {
|
||||
AGENDA_ITEM,
|
||||
CONSTIUENCY,
|
||||
FRACTION,
|
||||
HISTORY_ENTRY,
|
||||
INDIVIDUAL,
|
||||
LEGISLATIVE_PERIOD,
|
||||
MEMBER_OF_PARLIAMENT,
|
||||
SESSION,
|
||||
SPEECH,
|
||||
SPEECH_COMMENT,
|
||||
SPEECH_CONTENT,
|
||||
SPEECH_LINE,
|
||||
SPEECH_SPEAKER,
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue