diff --git a/pom.xml b/pom.xml index a92556e..4edb569 100644 --- a/pom.xml +++ b/pom.xml @@ -36,6 +36,13 @@ + + org.projectlombok + lombok + 1.18.36 + provided + + io.javalin javalin diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/Main.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/Main.java index a59ed69..33d6ef0 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/Main.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/Main.java @@ -11,6 +11,7 @@ import org.texttechnologylab.project.gruppe_05_1.util.PPRUtils; import org.texttechnologylab.project.gruppe_05_1.util.PropertiesUtils; import org.texttechnologylab.project.gruppe_05_1.util.XmlUtils; import org.texttechnologylab.project.gruppe_05_1.xml.FileObjectFactory; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.SpeechParser; import org.w3c.dom.Element; import java.util.List; @@ -44,9 +45,10 @@ public class Main { private static final FileObjectFactory xmlFactory = FileObjectFactory.getFactory(); private static final MongoObjectFactory mongoFactory = MongoObjectFactory.getFactory(); - + private static final SpeechParser speechParser = new SpeechParser(); public static void main(String[] args) { - + //TEST + speechParser.parseAllSessions(); // Stellt fest, dass alle nötigen Datenbank-Collections existieren PPRUtils.ensureCollectionExist(); diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/util/PPRUtils.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/util/PPRUtils.java index 128ae21..a1b515c 100644 --- a/src/main/java/org/texttechnologylab/project/gruppe_05_1/util/PPRUtils.java +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/util/PPRUtils.java @@ -3,6 +3,9 @@ package org.texttechnologylab.project.gruppe_05_1.util; import com.mongodb.client.MongoCollection; import com.mongodb.client.MongoDatabase; import com.mongodb.client.model.Indexes; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.select.Elements; import org.texttechnologylab.project.gruppe_05_1.database.MongoDBHandler; import org.texttechnologylab.project.gruppe_05_1.database.MongoObjectFactory; import org.texttechnologylab.project.gruppe_05_1.database.MongoPprUtils; @@ -19,8 +22,10 @@ import org.w3c.dom.Element; import org.w3c.dom.Node; import org.xml.sax.InputSource; +import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import java.io.*; +import java.net.HttpURLConnection; import java.net.URL; import java.util.*; import java.util.stream.Collectors; @@ -30,6 +35,9 @@ import java.util.zip.ZipInputStream; public abstract class PPRUtils { public static final String PARTEILOS_KUERZEL = "Parteilos"; + private static Set processedProtocols = new HashSet<>(); + private static Set xmlProtocols = new HashSet<>(); + /** @@ -343,4 +351,92 @@ public abstract class PPRUtils { } }); } + + + public static Set processXML() { + int offset = 0; + int limit = 10; + boolean hasMore = true; + + while (hasMore) { + String queryUrl = "https://www.bundestag.de/ajax/filterlist/de/services/opendata/866354-866354?limit=" + + limit + "&noFilterSet=true&offset=" + offset; + System.out.println("Lade: " + queryUrl); + try { + Document htmlDoc = Jsoup.connect(queryUrl).get(); + Elements xmlLinks = htmlDoc.select("a.bt-link-dokument"); + if (xmlLinks.isEmpty()) { + System.out.println("Keine weiteren Protokolle gefunden."); + break; + } + + for (org.jsoup.nodes.Element link : xmlLinks) { + String xmlUrl = link.attr("href"); + System.out.println("Verarbeite XML: " + xmlUrl); + try { + org.w3c.dom.Document xmlDoc = downloadAndParseXML(xmlUrl); + + String uniqueId = xmlDoc.getDocumentElement().getAttribute("sitzung-nr"); + if (processedProtocols.contains(uniqueId)) { + System.out.println("Protokoll bereits verarbeitet: " + uniqueId); + continue; + } + processedProtocols.add(uniqueId); + xmlProtocols.add(xmlDoc); + //TODO verarbeitung + } catch (Exception e) { + System.err.println("Fehler beim Verarbeiten der XML-Datei: " + xmlUrl); + e.printStackTrace(); + } + } + + // check if next + org.jsoup.nodes.Element metaSlider = htmlDoc.selectFirst("div.meta-slider"); + if (metaSlider != null && metaSlider.hasAttr("data-nextoffset")) { + int nextOffset = Integer.parseInt(metaSlider.attr("data-nextoffset")); + if (nextOffset <= offset) { + hasMore = false; + } else { + offset = nextOffset; + } + } else { + hasMore = false; + } + } catch (IOException e) { + System.err.println("Fehler beim Laden der Seite: " + queryUrl); + e.printStackTrace(); + break; + } + } + return xmlProtocols; + } + + /** + * Lädt die XML-Datei von der gegebenen URL herunter und parst sie + * mittels dbParser. + * + * @param xmlUrl URL der XML-Datei + * @return Das geparste org.w3c.dom.Document + * @throws Exception wenn ein Fehler auftritt + */ + public static org.w3c.dom.Document downloadAndParseXML(String xmlUrl) throws Exception { + URL url = new URL(xmlUrl); + HttpURLConnection connection = (HttpURLConnection) url.openConnection(); + connection.setRequestMethod("GET"); + connection.connect(); + + DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance(); + DocumentBuilder dBuilder = dbFactory.newDocumentBuilder(); + dBuilder.setEntityResolver((publicId, systemId) -> { + InputStream dtdStream = PPRUtils.class.getResourceAsStream("/plenarprotokolle/dbtplenarprotokoll.dtd"); + if (dtdStream != null) { + return new InputSource(dtdStream); + } + return null; + }); + org.w3c.dom.Document doc = dBuilder.parse(connection.getInputStream()); + doc.getDocumentElement().normalize(); + return doc; + } + } diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Impls/AgendaItem_File_Impl.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Impls/AgendaItem_File_Impl.java new file mode 100644 index 0000000..878099c --- /dev/null +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Impls/AgendaItem_File_Impl.java @@ -0,0 +1,39 @@ +package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls; + +import lombok.Getter; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.AgendaItem; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Speech; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType; + + +import java.util.ArrayList; + +@Getter +public class AgendaItem_File_Impl implements AgendaItem { + private final int id; + private final int sessionId; + private final String title; + private final ArrayList speeches; + + public AgendaItem_File_Impl(int id, int sessionId, String title) { + this.id = id; + this.sessionId = sessionId; + this.title = title; + this.speeches = new ArrayList<>(); + } + + @Override + public MongoDBEntryType getType() { + return MongoDBEntryType.AGENDA_ITEM; + } + + @Override + public void addSpeech(Speech speech) { + this.speeches.add(speech); + } + + @Override + public String toHTML() { + return ""; + } +} diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Impls/Comment_File_Impl.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Impls/Comment_File_Impl.java new file mode 100644 index 0000000..ce5c029 --- /dev/null +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Impls/Comment_File_Impl.java @@ -0,0 +1,27 @@ +package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls; + +import lombok.Getter; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Comment; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Content; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType; + + +@Getter +public class Comment_File_Impl implements Content, Comment { + private final int contentId; + private final int speechId; + private final String comment; + private final String commentatorName; + + public Comment_File_Impl(int contentId, int speechId, String commentatorName, String comment) { + this.contentId = contentId; + this.speechId = speechId; + this.commentatorName = commentatorName; + this.comment = comment; + } + + @Override + public MongoDBEntryType getType() { + return MongoDBEntryType.SPEECH_COMMENT; + } +} diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Impls/Constituency_File_Impl.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Impls/Constituency_File_Impl.java new file mode 100644 index 0000000..751b7ed --- /dev/null +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Impls/Constituency_File_Impl.java @@ -0,0 +1,34 @@ +package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls; + +import lombok.Getter; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Constituency; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType; + + +@Getter +public class Constituency_File_Impl implements Constituency { + private final int id; + private final String name; + private final String federalState; + + public Constituency_File_Impl(int id, String name, String federalState) { + this.id = id; + this.name = name; + this.federalState = federalState; + } + + @Override + public MongoDBEntryType getType() { + return MongoDBEntryType.CONSTIUENCY; + } + + @Override + public String toHTML() { + return "
" + + "

Constituency Details

" + + "

ID: " + id + "

" + + "

Name: " + name + "

" + + "

Federal State: " + federalState + "

" + + "
"; + } +} diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Impls/Fraction_File_Impl.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Impls/Fraction_File_Impl.java new file mode 100644 index 0000000..0d0546a --- /dev/null +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Impls/Fraction_File_Impl.java @@ -0,0 +1,45 @@ +package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls; + +import lombok.Getter; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Fraction; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType; + + +import java.util.List; + +@Getter +public class Fraction_File_Impl implements Fraction { + private String name; + private final String longName; + private final List members; + + public Fraction_File_Impl(String name, String longName, List members) { + this.name = name; + this.longName = longName; + this.members = members; + } + + @Override + public MongoDBEntryType getType() { + return MongoDBEntryType.FRACTION; + } + + @Override + public void addMember(int member) { + this.members.add(member); + } + + @Override + public void updateName(String name) { + this.name = name; + } + + @Override + public String toHTML() { + return "
" + + "

Fraction Details

" + + "

Name: " + name + "

" + + "

Long Name: " + longName + "

" + + "
"; + } +} diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Impls/HistoryEntry_File_Impl.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Impls/HistoryEntry_File_Impl.java new file mode 100644 index 0000000..9000f52 --- /dev/null +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Impls/HistoryEntry_File_Impl.java @@ -0,0 +1,31 @@ +package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls; + +import lombok.Getter; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.HistoryEntry; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType; + + +@Getter +public class HistoryEntry_File_Impl implements HistoryEntry { + private final String action; + private final String content; + private final String contentType; + private final String date; + + public HistoryEntry_File_Impl(String content, String contentType, String date, String action) { + this.action = action; + this.content = content; + this.contentType = contentType; + this.date = date; + } + + @Override + public String getAction() { + return this.action; + } + + @Override + public MongoDBEntryType getType() { + return MongoDBEntryType.HISTORY_ENTRY; + } +} diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Impls/Individual_File_Impl.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Impls/Individual_File_Impl.java new file mode 100644 index 0000000..3c0cc5c --- /dev/null +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Impls/Individual_File_Impl.java @@ -0,0 +1,28 @@ +package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls; + +import lombok.Getter; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Individual; + + +@Getter +public abstract class Individual_File_Impl implements Individual { + private final String name; + private final String firstName; + private final String title; + private final String dateOfBirth; + private final String dateOfDeath; + private final String placeOfBirth; + private final String gender; + private final String religion; + + public Individual_File_Impl(String name, String firstName, String title, String dateOfBirth, String dateOfDeath, String placeOfBirth, String gender, String religion) { + this.name = name; + this.firstName = firstName; + this.title = title; + this.dateOfBirth = dateOfBirth; + this.dateOfDeath = dateOfDeath; + this.placeOfBirth = placeOfBirth; + this.gender = gender; + this.religion = religion; + } +} diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Impls/LegislativePeriod_File_Impl.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Impls/LegislativePeriod_File_Impl.java new file mode 100644 index 0000000..9c1e981 --- /dev/null +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Impls/LegislativePeriod_File_Impl.java @@ -0,0 +1,37 @@ +package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls; + +import lombok.Getter; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.LegislativePeriod; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType; + + +@Getter +public class LegislativePeriod_File_Impl implements LegislativePeriod { + private final int id; + private final String startDate; + private final String endDate; + private final String constituency; + + public LegislativePeriod_File_Impl(int id, String startDate, String endDate, String constituency) { + this.id = id; + this.startDate = startDate; + this.endDate = endDate; + this.constituency = constituency; + } + + @Override + public MongoDBEntryType getType() { + return MongoDBEntryType.LEGISLATIVE_PERIOD; + } + + @Override + public String toHTML() { + return "
" + + "

Legislative Period Details

" + + "

ID: " + id + "

" + + "

Start Date: " + startDate + "

" + + "

End Date: " + endDate + "

" + + "

Constituency: " + constituency + "

" + + "
"; + } +} diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Impls/Line_File_Impl.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Impls/Line_File_Impl.java new file mode 100644 index 0000000..7d951a3 --- /dev/null +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Impls/Line_File_Impl.java @@ -0,0 +1,24 @@ +package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls; + +import lombok.Getter; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Content; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Line; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType; + +@Getter +public class Line_File_Impl implements Content, Line { + private final int contentId; + protected final int speechId; + private final String content; + + public Line_File_Impl(int contentId, int speechId, String content) { + this.contentId = contentId; + this.speechId = speechId; + this.content = content; + } + + @Override + public MongoDBEntryType getType() { + return MongoDBEntryType.SPEECH_LINE; + } +} \ No newline at end of file diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Impls/MemberOfParliament_File_Impl.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Impls/MemberOfParliament_File_Impl.java new file mode 100644 index 0000000..82d23b8 --- /dev/null +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Impls/MemberOfParliament_File_Impl.java @@ -0,0 +1,94 @@ +package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls; + +import lombok.Getter; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.LegislativePeriod; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.MemberOfParliament; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType; + + +import java.util.List; + +@Getter +public class MemberOfParliament_File_Impl extends Individual_File_Impl implements MemberOfParliament { + private final int id; + private String party; + private final List legislativePeriods; + private final int firstLegislativePeriodId; + private final int lastLegislativePeriodId; + private final String photo; + + public MemberOfParliament_File_Impl(String name, String firstName, String title, String dateOfBirth, String dateofDeath, String placeOfBirth, String gender, String religion, int id, String party, List legislativePeriods, int firstLegislativePeriodId, int lastLegislativePeriodId, String photo) { + super(name, firstName, title, dateOfBirth, dateofDeath, placeOfBirth, gender, religion); + this.id = id; + this.party = party; + this.legislativePeriods = legislativePeriods; + this.firstLegislativePeriodId = firstLegislativePeriodId; + this.lastLegislativePeriodId = lastLegislativePeriodId; + this.photo = photo; + } + + @Override + public boolean isCurrentMember() { + return this.lastLegislativePeriodId == 20; + } + + @Override + public void updateParty(String party) { + this.party = party; + } + + @Override + public MongoDBEntryType getType() { + return MongoDBEntryType.MEMBER_OF_PARLIAMENT; + } + + @Override + public String toHTML() { + StringBuilder html = new StringBuilder(); + + // Basic Member details + html.append("
") + .append("

Member of Parliament Details

") + .append("

ID: ").append(id).append("

") + .append("

Name: ").append(getName()).append(" ").append(getFirstName()).append("

") + .append("

Party: ").append(party).append("

"); + + // Member photo (constructed from last name and first name) + String photoPath = "../src/memberphotos/" + getName() + "_" + getFirstName() + ".jpg"; + html.append("

Photo

"); + + // Legislative Periods - First and Last Period + html.append("

Legislative Periods

"); + if (legislativePeriods == null || legislativePeriods.isEmpty()) { + if (firstLegislativePeriodId == lastLegislativePeriodId) { + html.append("

Member of Parliament during the legislative period ") + .append(firstLegislativePeriodId); + + } else { + html.append("

Candidated for the first time during the legislative period ") + .append(firstLegislativePeriodId) + .append(" and the last time during the legislative period ") + .append(lastLegislativePeriodId); + } + if (isCurrentMember()) { + html.append("

(currently active)

"); + } + html.append("

"); + } else { + // First Legislative Period + LegislativePeriod firstPeriod = legislativePeriods.get(0); + html.append("

First Period: ").append(firstPeriod.getStartDate()) + .append(" to ").append(firstPeriod.getEndDate()).append("

"); + + // Last Legislative Period + LegislativePeriod lastPeriod = legislativePeriods.get(legislativePeriods.size() - 1); + html.append("

Last Period: ").append(lastPeriod.getStartDate()) + .append(" to ").append(lastPeriod.getEndDate()).append("

"); + } + + html.append("
"); + + return html.toString(); + } + +} diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Impls/Session_File_Impl.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Impls/Session_File_Impl.java new file mode 100644 index 0000000..27136c9 --- /dev/null +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Impls/Session_File_Impl.java @@ -0,0 +1,47 @@ +package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls; + +import lombok.Getter; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.AgendaItem; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Session; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType; + +import java.util.ArrayList; +import java.util.List; + +@Getter +public class Session_File_Impl implements Session { + private final int id; + private final String legislativePeriod; + private final String dateTime; + private final String endTime; + private final List agendaItems; + + public Session_File_Impl(String legislativePeriod, int id, String dateTime, String endTime) { + this.id = id; + this.legislativePeriod = legislativePeriod; + this.dateTime = dateTime; + this.endTime = endTime; + this.agendaItems = new ArrayList<>(); + } + + @Override + public MongoDBEntryType getType() { + return MongoDBEntryType.SESSION; + } + + @Override + public void addAgendaItem(AgendaItem item) { + this.agendaItems.add(item); + } + + @Override + public String toHTML() { + return "
" + + "

Session Details

" + + "

ID: " + id + "

" + + "

Legislative Period: " + legislativePeriod + "

" + + "

Start Time: " + dateTime + "

" + + "

End Time: " + endTime + "

" + + "
"; + } +} diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Impls/Speaker_File_Impl.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Impls/Speaker_File_Impl.java new file mode 100644 index 0000000..c7d395e --- /dev/null +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Impls/Speaker_File_Impl.java @@ -0,0 +1,29 @@ +package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls; + +import lombok.Getter; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Content; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Speaker; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType; + + +@Getter +public class Speaker_File_Impl implements Content, Speaker { + private final int contentId; + private final int speechId; + private final int speakerId; + private final String speakerName; + private final String fraction; + + public Speaker_File_Impl(int contentId, int speechId, int speakerId, String speakerName, String fraction) { + this.contentId = contentId; + this.speechId = speechId; + this.speakerId = speakerId; + this.speakerName = speakerName; + this.fraction = fraction; + } + + @Override + public MongoDBEntryType getType() { + return MongoDBEntryType.SPEECH_SPEAKER; + } +} diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Impls/Speech_File_Impl.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Impls/Speech_File_Impl.java new file mode 100644 index 0000000..8e62e15 --- /dev/null +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Impls/Speech_File_Impl.java @@ -0,0 +1,54 @@ +package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls; + +import lombok.Getter; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Content; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Speech; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType; + + +import java.util.ArrayList; +import java.util.List; + +@Getter +public class Speech_File_Impl implements Speech { + private final int sessionId; + private final int agendaItemId; + private final int speakerId; + private final int speechId; + private final String speakerName; + private final String fraction; + private final List speechContents; + + public Speech_File_Impl(int sessionId, int agendaItemId, int speechId, int speakerId, String speakerName, String fraction) { + this.speakerId = speakerId; + this.agendaItemId = agendaItemId; + this.speechId = speechId; + this.speakerName = speakerName; + this.fraction = fraction; + this.sessionId = sessionId; + this.speechContents = new ArrayList<>(); + } + + @Override + public MongoDBEntryType getType() { + return MongoDBEntryType.SPEECH; + } + + public void addContent(Content content) { + this.speechContents.add(content); + //Logger.pink("Added paragraph to speech: " + paragraph); + } + + @Override + public String toHTML() { + //TODO: Implement a logic that highlights the lines that the speaker is saying + return "
" + + "

Speech Details

" + + "

Session ID: " + sessionId + "

" + + "

Speaker ID: " + speakerId + "

" + + "

Speaker Name: " + speakerName + "

" + + "

Fraction: " + fraction + "

" + + "
"; + } + +} diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Interfaces/AgendaItem.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Interfaces/AgendaItem.java new file mode 100644 index 0000000..9aba11d --- /dev/null +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Interfaces/AgendaItem.java @@ -0,0 +1,56 @@ +package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces; + +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType; + +import java.util.ArrayList; + +public interface AgendaItem { + /** + * Returns the ID of the agenda item. + * + * @return The ID of the agenda item. + */ + int getId(); + + /** + * Returns the type of the MongoDB entry. + * + * @return The type of the MongoDB entry. + */ + MongoDBEntryType getType(); + + /** + * Returns the session ID. + * + * @return The session ID. + */ + int getSessionId(); + + /** + * Returns the title of the agenda item. + * + * @return The title of the agenda item. + */ + String getTitle(); + + /** + * Returns the speeches of the agenda item. + * + * @return The speeches of the agenda item. + */ + ArrayList getSpeeches(); + + /** + * Adds a speech to the agenda item. + * + * @param speech The speech to add. + */ + void addSpeech(Speech speech); + + /** + * Returns the HTML representation of the agenda item. + * + * @return The HTML representation of the agenda item. + */ + String toHTML(); +} diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Interfaces/Comment.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Interfaces/Comment.java new file mode 100644 index 0000000..088b8e4 --- /dev/null +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Interfaces/Comment.java @@ -0,0 +1,35 @@ +package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces; + + +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType; + +public interface Comment extends Content { + /** + * Returns the content ID. + * + * @return The content ID. + */ + int getContentId(); + + + /** + * Returns the type of the MongoDB entry. + * + * @return The type of the MongoDB entry. + */ + MongoDBEntryType getType(); + + /** + * Returns the comment. + * + * @return the comment + */ + String getComment(); + + /** + * Returns the commentator name. + * + * @return the commentator name + */ + String getCommentatorName(); +} diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Interfaces/Constituency.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Interfaces/Constituency.java new file mode 100644 index 0000000..5bd3f75 --- /dev/null +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Interfaces/Constituency.java @@ -0,0 +1,38 @@ +package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces; + +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType; + +public interface Constituency { + /** + * Returns the id of the constituency. + * + * @return the id of the constituency + */ + int getId(); + + /** + * Returns the type of the MongoDB entry. + * + * @return The type of the MongoDB entry. + */ + MongoDBEntryType getType(); + /** + * Returns the name of the constituency. + * + * @return the name of the constituency + */ + String getName(); + /** + * Returns the federal state of the constituency. + * + * @return the federal state of the constituency + */ + String getFederalState(); + + /** + * Returns the HTML representation of the agenda item. + * + * @return The HTML representation of the agenda item. + */ + String toHTML(); +} diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Interfaces/Content.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Interfaces/Content.java new file mode 100644 index 0000000..1138cb7 --- /dev/null +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Interfaces/Content.java @@ -0,0 +1,27 @@ +package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces; + +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType; + +public interface Content { + /** + * Returns the content ID. + * + * @return The content ID. + */ + int getContentId(); + + + /** + * Returns the type of the MongoDB entry. + * + * @return The type of the MongoDB entry. + */ + MongoDBEntryType getType(); + + /** + * Returns the speech ID. + * + * @return The speech ID. + */ + int getSpeechId(); +} diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Interfaces/Fraction.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Interfaces/Fraction.java new file mode 100644 index 0000000..d478e80 --- /dev/null +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Interfaces/Fraction.java @@ -0,0 +1,55 @@ +package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces; + +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType; + +import java.util.List; + +public interface Fraction { + /** + * Returns the name of the fraction. + * + * @return The name of the fraction. + */ + String getName(); + + /** + * Returns the type of the MongoDB entry. + * + * @return The type of the MongoDB entry. + */ + MongoDBEntryType getType(); + + /** + * Returns the long name of the fraction. + * + * @return The long name of the fraction. + */ + String getLongName(); + + /** + * Returns the members of the fraction. + * + * @return The members of the fraction. + */ + List getMembers(); + /** + * Adds a member to the fraction. + * + * @param member The member to add. + */ + void addMember(int member); + + /** + * Updates the name of the fraction. + * + * @param name The new name of the fraction. + */ + void updateName(String name); + + /** + * Returns the HTML representation of the agenda item. + * + * @return The HTML representation of the agenda item. + */ + String toHTML(); +} diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Interfaces/HistoryEntry.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Interfaces/HistoryEntry.java new file mode 100644 index 0000000..2c856df --- /dev/null +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Interfaces/HistoryEntry.java @@ -0,0 +1,26 @@ +package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces; + +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType; + +public interface HistoryEntry { + /** + * Returns the action of the history entry. + **/ + String getAction(); + /** + * Returns the content of the history entry. + **/ + Object getContent(); + + /** + * Returns the type of the MongoDB entry. + * + * @return The type of the MongoDB entry. + */ + MongoDBEntryType getType(); + + /** + * Returns the date of the history entry. + **/ + String getDate(); +} diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Interfaces/Individual.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Interfaces/Individual.java new file mode 100644 index 0000000..3aaffce --- /dev/null +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Interfaces/Individual.java @@ -0,0 +1,75 @@ +package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces; + +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType; + +public interface Individual { + /** + * Returns the name + * + * @return the name + */ + String getName(); + + /** + * Returns the type of the MongoDB entry. + * + * @return The type of the MongoDB entry. + */ + MongoDBEntryType getType(); + + /** + * Returns the first name + * + * @return the first name + */ + String getFirstName(); + + /** + * Returns the title + * + * @return the title + */ + String getTitle(); + + /** + * Returns the date of birth + * + * @return the date of birth + */ + String getDateOfBirth(); + + /** + * Returns the date of death + * + * @return the date of death + */ + String getDateOfDeath(); + + /** + * Returns the place of birth + * + * @return the place of birth + */ + String getPlaceOfBirth(); + + /** + * Returns the gender + * + * @return the gender + */ + String getGender(); + + /** + * Returns the religion + * + * @return the religion + */ + String getReligion(); + + /** + * Returns the HTML representation of the agenda item. + * + * @return The HTML representation of the agenda item. + */ + String toHTML(); +} diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Interfaces/LegislativePeriod.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Interfaces/LegislativePeriod.java new file mode 100644 index 0000000..5faf0b8 --- /dev/null +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Interfaces/LegislativePeriod.java @@ -0,0 +1,47 @@ +package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces; + +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType; + +public interface LegislativePeriod { + /** + * Returns the id of the legislative period. + * + * @return the id of the legislative period + */ + int getId(); + + /** + * Returns the type of the MongoDB entry. + * + * @return The type of the MongoDB entry. + */ + MongoDBEntryType getType(); + + /** + * Returns the number of the legislative period. + * + * @return the number of the legislative period + */ + String getStartDate(); + + /** + * Returns the start date of the legislative period. + * + * @return the start date of the legislative period + */ + String getEndDate(); + + /** + * Returns the end date of the legislative period. + * + * @return the end date of the legislative period + */ + String getConstituency(); + + /** + * Returns the HTML representation of the agenda item. + * + * @return The HTML representation of the agenda item. + */ + String toHTML(); +} diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Interfaces/Line.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Interfaces/Line.java new file mode 100644 index 0000000..7348907 --- /dev/null +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Interfaces/Line.java @@ -0,0 +1,27 @@ +package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces; + +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType; + +public interface Line extends Content { + /** + * Returns the content ID. + * + * @return The content ID. + */ + int getContentId(); + + + /** + * Returns the type of the MongoDB entry. + * + * @return The type of the MongoDB entry. + */ + MongoDBEntryType getType(); + + /** + * Returns the content of the line. + * + * @return the content of the line + */ + String getContent(); +} diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Interfaces/MemberOfParliament.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Interfaces/MemberOfParliament.java new file mode 100644 index 0000000..378b55d --- /dev/null +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Interfaces/MemberOfParliament.java @@ -0,0 +1,77 @@ +package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces; + +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType; + +import java.util.List; + +public interface MemberOfParliament extends Individual { + /** + * Returns the id of the member of parliament. + * + * @return the id of the member of parliament + */ + int getId(); + + /** + * Returns the type of the MongoDB entry. + * + * @return The type of the MongoDB entry. + */ + MongoDBEntryType getType(); + + /** + * Returns the party of the member of parliament. + * + * @return the party of the member of parliament + */ + String getParty(); + + /** + * Returns the legislative periods of the member of parliament. + * + * @return the legislative periods of the member of parliament + */ + List getLegislativePeriods(); + + /** + * Returns the first legislative period id of the member of parliament. + * + * @return the first legislative period id of the member of parliament + */ + int getFirstLegislativePeriodId(); + + /** + * Returns the last legislative period id of the member of parliament. + * + * @return the last legislative period id of the member of parliament + */ + int getLastLegislativePeriodId(); + + /** + * Returns the base64 encoded string of the photo of the member of parliament. + * + * @return the base64 encoded string of the photo of the member of parliament + */ + String getPhoto(); + + /** + * Returns whether the member of parliament is a current member. + * + * @return whether the member of parliament is a current member + */ + boolean isCurrentMember(); + + /** + * Updates the party of the member of parliament. + * + * @param party the new party of the member of parliament + */ + void updateParty(String party); + + /** + * Returns the HTML representation of the agenda item. + * + * @return The HTML representation of the agenda item. + */ + String toHTML(); +} diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Interfaces/Session.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Interfaces/Session.java new file mode 100644 index 0000000..b7717d9 --- /dev/null +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Interfaces/Session.java @@ -0,0 +1,62 @@ +package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces; + +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType; + +import java.util.List; + +public interface Session { + /** + * Returns the id of the session. + * + * @return the id of the session + */ + int getId(); + + /** + * Returns the type of the MongoDB entry. + * + * @return The type of the MongoDB entry. + */ + MongoDBEntryType getType(); + + /** + * Returns the legislative period of the session. + * + * @return the legislative period of the session + */ + String getLegislativePeriod(); + + /** + * Returns the date and time of the session. + * + * @return the date and time of the session + */ + String getDateTime(); + + /** + * Returns the end time of the session. + * + * @return the end time of the session + */ + String getEndTime(); + + /** + * Returns the agenda items of the session. + * + * @return the agenda items of the session + */ + List getAgendaItems(); + + /** + * Adds an agenda item to the session. + * @param item The agenda item to add. + */ + void addAgendaItem(AgendaItem item); + + /** + * Returns the HTML representation of the agenda item. + * + * @return The HTML representation of the agenda item. + */ + String toHTML(); +} diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Interfaces/Speaker.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Interfaces/Speaker.java new file mode 100644 index 0000000..55f4eb4 --- /dev/null +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Interfaces/Speaker.java @@ -0,0 +1,41 @@ +package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces; + +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType; + +public interface Speaker extends Content { + /** + * Returns the content ID. + * + * @return The content ID. + */ + int getContentId(); + + + /** + * Returns the type of the MongoDB entry. + * + * @return The type of the MongoDB entry. + */ + MongoDBEntryType getType(); + + /** + * Returns the speakerId. + * + * @return the speakerId + */ + int getSpeakerId(); + + /** + * Returns the speakerName. + * + * @return the speakerName + */ + String getSpeakerName(); + + /** + * Returns the fraction. + * + * @return the fraction + */ + String getFraction(); +} diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Interfaces/Speech.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Interfaces/Speech.java new file mode 100644 index 0000000..786741e --- /dev/null +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/Interfaces/Speech.java @@ -0,0 +1,75 @@ +package org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces; + +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums.MongoDBEntryType; + +import java.util.List; + +public interface Speech { + /** + * Returns the speech ID. + * + * @return The speech ID. + */ + int getSpeechId(); + + /** + * Returns the type of the MongoDB entry. + * + * @return The type of the MongoDB entry. + */ + MongoDBEntryType getType(); + + /** + * Returns the agenda item ID. + * + * @return The agenda item ID. + */ + int getAgendaItemId(); + + /** + * Returns the speaker ID. + * + * @return The speaker ID. + */ + int getSpeakerId(); + + /** + * Returns the speaker name. + * + * @return The speaker name. + */ + String getSpeakerName(); + + /** + * Returns the fraction. + * + * @return The fraction. + */ + String getFraction(); + + /** + * Returns the session ID. + * + * @return The session ID. + */ + int getSessionId(); + + /** + * Returns the speech contents. + * + * @return The speech contents. + */ + List getSpeechContents(); + /** + * Adds a speaker to the speech. + * @param speaker The speaker to add. + */ + void addContent(Content speaker); + + /** + * Returns the HTML representation of the agenda item. + * + * @return The HTML representation of the agenda item. + */ + String toHTML(); +} diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/SpeechParser.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/SpeechParser.java new file mode 100644 index 0000000..82062ee --- /dev/null +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/SpeechParser.java @@ -0,0 +1,188 @@ +package org.texttechnologylab.project.gruppe_05_1.xml.speeches; + +import lombok.Getter; +import lombok.Setter; +import org.texttechnologylab.project.gruppe_05_1.util.PPRUtils; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.*; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.AgendaItem; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Session; +import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Speech; +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.transform.OutputKeys; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.stream.StreamResult; +import java.io.File; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; + +public class SpeechParser { + + @Getter + private List speeches; + @Getter + private List agendaItems; + @Setter + private Boolean parseLegislativePeriods; + + public List parseAllSessions() { + List sessions = new ArrayList<>(); + this.speeches = new ArrayList<>(); + this.agendaItems = new ArrayList<>(); + //TODO Logik so machen dass aus array von xmls gelesen wird nicht aus pfad + Set xmlDocuments = PPRUtils.processXML(); + for (org.w3c.dom.Document xmlDoc:xmlDocuments) { + try { + File tempFile = convertDocumentToFile(xmlDoc); + Session session = parseSessionFile(tempFile); + sessions.add(session); + tempFile.delete(); // Lösche die temporäre Datei nach der Verarbeitung + } catch (Exception e) { + System.err.println("Error parsing XML document."); + e.printStackTrace(); + } + } + + return sessions; + } + + private Session parseSessionFile(File file) throws Exception { + //file = removeDoctypeAnnotation(file.getAbsolutePath()); + + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + DocumentBuilder builder = factory.newDocumentBuilder(); + Document document = builder.parse(file); + + // Extract session details + Element root = document.getDocumentElement(); + String legislativePeriod = root.getAttribute("wahlperiode"); + int sessionId = Integer.parseInt(root.getAttribute("sitzung-nr")); + System.out.println("Session " + sessionId + " wurde gespeichert"); + String sessionDate = root.getAttribute("sitzung-datum"); + Element startTimeElement = (Element) root.getElementsByTagName("sitzungsbeginn").item(0); + String startTimeString = startTimeElement != null ? startTimeElement.getAttribute("sitzung-start-uhrzeit") : null; + + String startTime = startTimeString != null ? sessionDate + " " + startTimeString : sessionDate; + + Element sessionEndElement = (Element) root.getElementsByTagName("sitzungsende").item(0); + String sessionEndTime = sessionEndElement != null ? sessionEndElement.getAttribute("sitzung-ende-uhrzeit") : null; + + Session_File_Impl session = new Session_File_Impl(legislativePeriod, sessionId, startTime, sessionEndTime); + + // Parse agenda items + NodeList agendaNodes = document.getElementsByTagName("tagesordnungspunkt"); + for (int agendaItemId = 0; agendaItemId < agendaNodes.getLength(); agendaItemId++) { + Element agendaElement = (Element) agendaNodes.item(agendaItemId); + if (agendaElement == null) continue; + + String agendaTitle = agendaElement.getAttribute("top-id"); + + AgendaItem_File_Impl agendaItemFileImpl = new AgendaItem_File_Impl(agendaItemId, sessionId, agendaTitle); + + this.agendaItems.add(agendaItemFileImpl); + + // Parse speeches + NodeList speechNodes = agendaElement.getElementsByTagName("rede"); + for (int speechId = 0; speechId < speechNodes.getLength(); speechId++) { + Element speechElement = (Element) speechNodes.item(speechId); + if (speechElement == null) continue; + + // Parse speaker + Element speakerElement = (Element) speechElement.getElementsByTagName("redner").item(0); + if (speakerElement == null) continue; + + int speakerId = Integer.parseInt(speakerElement.getAttribute("id")); + Element nameElement = (Element) speakerElement.getElementsByTagName("name").item(0); + if (nameElement == null) continue; + + String title = getOptionalTextContent(nameElement, "titel"); + String firstName = getOptionalTextContent(nameElement, "vorname"); + String lastName = getOptionalTextContent(nameElement, "nachname"); + String fraction = getOptionalTextContent(nameElement, "fraktion"); + + String speakerName = (title != null ? title + " " : "") + firstName + " " + lastName; + Speech_File_Impl speech = new Speech_File_Impl(sessionId, agendaItemId, speechId, speakerId, speakerName, fraction); + + // Add the speaker to speech contents + speech.addContent(new Speaker_File_Impl(0, speechId, speakerId, speakerName, fraction)); + + // Parse content in order + NodeList contentNodes = speechElement.getChildNodes(); + for (int k = 0; k < contentNodes.getLength(); k++) { + Node contentNode = contentNodes.item(k); + if (contentNode == null || contentNode.getNodeType() != Node.ELEMENT_NODE) continue; + + Element contentElement = (Element) contentNode; + String tagName = contentElement.getTagName(); + + switch (tagName) { + case "p": + String paragraphClass = contentElement.getAttribute("klasse"); + if ("redner".equals(paragraphClass)) { + // This case can be skipped as the speaker is already added + continue; + } else if ("kommentar".equals(paragraphClass)) { + String comment = contentElement.getTextContent().trim(); + String commentatorName = ""; // Extract if present + speech.addContent(new Comment_File_Impl(k + 1, speechId, commentatorName, comment)); + } else { + String line = contentElement.getTextContent().trim(); + speech.addContent(new Line_File_Impl(k + 1, speechId, line)); + } + break; + + case "kommentar": + String comment = contentElement.getTextContent().trim(); + String commentatorName = ""; // Extract if available + speech.addContent(new Comment_File_Impl(k + 1, speechId, commentatorName, comment)); + break; + + default: + break; + } + } + + agendaItemFileImpl.addSpeech(speech); + this.speeches.add(speech); + } + + session.addAgendaItem(agendaItemFileImpl); + } + return session; + } + + private static String getOptionalTextContent(Element parent, String tagName) { + NodeList nodes = parent.getElementsByTagName(tagName); + if (nodes.getLength() > 0) { + Node node = nodes.item(0); + if (node != null) { + return node.getTextContent().trim(); + } + } + return null; + } + /** + * Konvertiert ein org.w3c.dom.Document in eine temporäre Datei. + */ + private File convertDocumentToFile(org.w3c.dom.Document xmlDoc) throws Exception { + File tempFile = File.createTempFile("session_", ".xml"); + TransformerFactory transformerFactory = TransformerFactory.newInstance(); + Transformer transformer = transformerFactory.newTransformer(); + transformer.setOutputProperty(OutputKeys.INDENT, "yes"); + + DOMSource source = new DOMSource(xmlDoc); + StreamResult result = new StreamResult(tempFile); + transformer.transform(source, result); + + return tempFile; + } + +} diff --git a/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/enums/MongoDBEntryType.java b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/enums/MongoDBEntryType.java new file mode 100644 index 0000000..62f0a91 --- /dev/null +++ b/src/main/java/org/texttechnologylab/project/gruppe_05_1/xml/speeches/enums/MongoDBEntryType.java @@ -0,0 +1,17 @@ +package org.texttechnologylab.project.gruppe_05_1.xml.speeches.enums; + +public enum MongoDBEntryType { + AGENDA_ITEM, + CONSTIUENCY, + FRACTION, + HISTORY_ENTRY, + INDIVIDUAL, + LEGISLATIVE_PERIOD, + MEMBER_OF_PARLIAMENT, + SESSION, + SPEECH, + SPEECH_COMMENT, + SPEECH_CONTENT, + SPEECH_LINE, + SPEECH_SPEAKER, +}