Rollback point

This commit is contained in:
Picman2000 2025-03-04 13:09:12 +01:00
parent 8a6548662c
commit ae5c3f17eb
22 changed files with 751 additions and 10 deletions

View file

@ -0,0 +1,7 @@
package exceptions;
public class AgendaItemNotFoundException extends RuntimeException {
public AgendaItemNotFoundException() {
super("Agenda Item not found");
}
}

View file

@ -0,0 +1,7 @@
package exceptions;
public class FractionAlreadyExistsException extends RuntimeException {
public FractionAlreadyExistsException() {
super("Fraction already exists");
}
}

View file

@ -0,0 +1,7 @@
package exceptions;
public class FractionNotFoundException extends RuntimeException {
public FractionNotFoundException() {
super("Fraction not found");
}
}

View file

@ -0,0 +1,7 @@
package exceptions;
public class MemberNotFoundException extends RuntimeException {
public MemberNotFoundException() {
super("Member not found");
}
}

View file

@ -0,0 +1,7 @@
package exceptions;
public class ServerErrorException extends RuntimeException {
public ServerErrorException() {
super("Server error occurred");
}
}

View file

@ -0,0 +1,7 @@
package exceptions;
public class SessionNotFoundException extends RuntimeException {
public SessionNotFoundException() {
super("Session not found");
}
}

View file

@ -0,0 +1,7 @@
package exceptions;
public class SpeechNotFoundException extends RuntimeException {
public SpeechNotFoundException() {
super("Speech not found");
}
}

View file

@ -4,19 +4,27 @@ package org.texttechnologylab.project.gruppe_05_1;
import com.mongodb.client.MongoDatabase;
import org.texttechnologylab.project.gruppe_05_1.database.MongoDBHandler;
import org.texttechnologylab.project.gruppe_05_1.database.MongoObjectFactory;
import org.texttechnologylab.project.gruppe_05_1.database.SpeechIndex;
import org.texttechnologylab.project.gruppe_05_1.database.SpeechIndexFactoryImpl;
import org.texttechnologylab.project.gruppe_05_1.domain.mdb.Mdb;
import org.texttechnologylab.project.gruppe_05_1.domain.mdb.MdbDocument;
import org.texttechnologylab.project.gruppe_05_1.rest.RESTHandler;
import org.texttechnologylab.project.gruppe_05_1.util.Logger;
import org.texttechnologylab.project.gruppe_05_1.util.PPRUtils;
import org.texttechnologylab.project.gruppe_05_1.util.PropertiesUtils;
import org.texttechnologylab.project.gruppe_05_1.util.XmlUtils;
import org.texttechnologylab.project.gruppe_05_1.xml.FileObjectFactory;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.SpeechParser;
import org.w3c.dom.Element;
import org.xml.sax.SAXException;
import javax.xml.parsers.ParserConfigurationException;
import java.io.IOException;
import java.util.List;
import java.util.Properties;
import static java.lang.Boolean.TRUE;
/*
import com.mongodb.client.*;
import org.bson.Document;
@ -46,9 +54,49 @@ public class Main {
private static final FileObjectFactory xmlFactory = FileObjectFactory.getFactory();
private static final MongoObjectFactory mongoFactory = MongoObjectFactory.getFactory();
private static final SpeechParser speechParser = new SpeechParser();
public static void main(String[] args) {
public static void main(String[] args) throws ParserConfigurationException, IOException, SAXException {
//TEST
speechParser.parseAllSessions();
Logger.pink("Parsing XML and inserting data into DB (Uebung 2)...");
SpeechIndexFactoryImpl speechIndexFactory = new SpeechIndexFactoryImpl();
SpeechIndex speechIndex = speechIndexFactory
.builder()
.parseLegislativePeriods(TRUE)
.parseSessions()
.parseMembers()
.parseFractions()
.filterForCurrentMembers(TRUE)
.build();
//speechIndex.printInfo();
System.out.println("Data retrieved from DB:");
System.out.println("MEMBERCOUNT: " + speechIndex.getMembers().size());
System.out.println("FRACTIONCOUNT: " + speechIndex.getFractions().size());
System.out.println("SPEECHCOUNT: " + speechIndex.getSpeeches().size());
System.out.println("SESSIONCOUNT: " + speechIndex.getSessions().size());
System.out.println("AGENDAITEMCOUNT: " + speechIndex.getAgendaItems().size());
MongoDBHandler mongoDBHandler = new MongoDBHandler();
mongoDBHandler.deleteAllDocuments(); // Clear the DB
Logger.pink("Adding Sessions to DB...");
mongoDBHandler.insertSessions(speechIndex.getSessions());
Logger.pink("Adding Agenda Items to DB...");
mongoDBHandler.insertAgendaItems(speechIndex.getAgendaItems());
Logger.pink("Adding Speeches to DB...");
mongoDBHandler.insertSpeeches(speechIndex.getSpeeches());
mongoDBHandler.close(); // Close the connection to the DB
//TEST
// Stellt fest, dass alle nötigen Datenbank-Collections existieren
PPRUtils.ensureCollectionExist();

View file

@ -7,14 +7,29 @@ import com.mongodb.client.MongoClient;
import com.mongodb.client.MongoClients;
import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoDatabase;
import com.mongodb.client.model.Filters;
import com.mongodb.client.model.Indexes;
import com.mongodb.client.model.Updates;
import exceptions.AgendaItemNotFoundException;
import exceptions.MemberNotFoundException;
import exceptions.ServerErrorException;
import exceptions.SessionNotFoundException;
import org.bson.Document;
import org.bson.conversions.Bson;
import org.bson.types.ObjectId;
import org.texttechnologylab.DockerUnifiedUIMAInterface.connection.mongodb.MongoDBConfig;
import org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches.AgendaItem_MongoDB_Impl;
import org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches.MemberOfParliament_MongoDB_Impl;
import org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches.Session_MongoDB_Impl;
import org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches.Speech_MongoDB_Impl;
import org.texttechnologylab.project.gruppe_05_1.util.Logger;
import org.texttechnologylab.project.gruppe_05_1.util.PropertiesUtils;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.*;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.*;
import java.io.IOException;
import java.util.*;
import java.util.concurrent.TimeUnit;
import static com.mongodb.client.model.Filters.eq;
@ -22,7 +37,8 @@ public class MongoDBHandler {
public static final String propertiesFileName = "mongoDB.properties";
public static final String DEFAULT_ID_FIELD_NAME = "_id";
private final MongoClient mongoClient;
private final MongoDatabase database;
private static MongoDatabase mongoDatabase = null;
public final static Class<? extends List> DOC_LIST_CLASS = new ArrayList<Document>().getClass();
@ -35,6 +51,56 @@ public class MongoDBHandler {
private static String collection;
private static String databaseName;
private MongoCollection<Document> speechesCollection;
private MongoCollection<Document> membersCollection;
private MongoCollection<Document> fractionsCollection;
private MongoCollection<Document> sessionsCollection;
private MongoCollection<Document> agendaItemsCollection;
private MongoCollection<Document> historyCollection;
public MongoDBHandler() {
// Load the MongoDB configuration from the properties file
String propertiesFilePath = "config/database.properties";
// Set loglevel for slf4j to avoid spam
System.setProperty("org.slf4j.simpleLogger.defaultLogLevel", "error");
Properties mongoProperties = PropertiesUtils.readPropertiesFromResource(propertiesFileName);
// Zugangsdaten
localServer = mongoProperties.getProperty("localserver");
remoteServer = mongoProperties.getProperty("remote_host");
user = mongoProperties.getProperty("remote_user");
password = mongoProperties.getProperty("remote_password");
port = mongoProperties.getProperty("remote_port");
collection = mongoProperties.getProperty("remote_collection");
databaseName = mongoProperties.getProperty("remote_database");
MongoCredential credential = MongoCredential
.createCredential(
user,
databaseName,
password.toCharArray());
MongoClientSettings settings = MongoClientSettings.builder()
.credential(credential)
.timeout(180, TimeUnit.HOURS) // needs increased timeout for the bulk speech inserts
.applyToClusterSettings(builder ->
builder.hosts(List.of(new ServerAddress(remoteServer, Integer.parseInt(port)))))
.build();
mongoClient = MongoClients.create(settings);
database = mongoClient.getDatabase(databaseName);
speechesCollection = database.getCollection("speech");
membersCollection = database.getCollection("members");
fractionsCollection = database.getCollection("fractions");
sessionsCollection = database.getCollection("sessions");
agendaItemsCollection = database.getCollection("agendaItems");
historyCollection = database.getCollection("history");
Logger.info("Connected to MongoDB database: " + databaseName);
// hopeless attempt of creating the fulltext search index :(
membersCollection.createIndex(new Document("collection", 1));
}
/**
* Get the MongoDB according to properties.
* If a local server URI is defined, use it. Otherwise, use remote server.
@ -392,4 +458,272 @@ public class MongoDBHandler {
collection.deleteOne(deleteQuery);
}
/*
* Justus Jonas operations
* =======================
*/
public void insertSession(Session session) {
Document sessionDocument = new Document("sessionId", session.getId())
.append("dateTime", session.getDateTime())
.append("endTime", session.getEndTime())
.append("legislativePeriod", session.getLegislativePeriod());
sessionsCollection.insertOne(sessionDocument);
}
public Session insertSession(String dateTime, String endTime, String legislativePeriod) {
// get a new random sessionId that is not already in use
int sessionId = 0;
while (!retrieveAllSessions(Filters.eq("sessionId", sessionId)).isEmpty()) {
// generate random int
sessionId = (int) (Math.random() * Integer.MAX_VALUE);
}
// create session
Session session = new Session_File_Impl(legislativePeriod, sessionId, dateTime, endTime);
// insert session into DB
insertSession(session);
return session;
}
public void insertSessions(List<Session> sessions) {
for (Session session : sessions) {
insertSession(session);
}
}
public void insertAgendaItems(List<AgendaItem> agendaItems) {
List<Document> agendaItemDocuments = new ArrayList<>();
for (AgendaItem agendaItem : agendaItems) {
Document agendaItemDocument = new Document("id", agendaItem.getId())
.append("sessionId", agendaItem.getSessionId())
.append("title", agendaItem.getTitle());
agendaItemDocuments.add(agendaItemDocument);
}
agendaItemsCollection.insertMany(agendaItemDocuments);
}
public AgendaItem insertAgendaItem(int sessionId, String title) throws SessionNotFoundException, ServerErrorException {
// check if session exists
List<Session> sessions = retrieveAllSessions(Filters.eq("sessionId", sessionId));
if (sessions.isEmpty()) {
Logger.error("No session found with id " + sessionId);
throw new SessionNotFoundException();
}
// get a new random agendaItemId that is not already in use
int agendaItemId = 0;
while (!retrieveAllAgendaItems(Filters.eq("id", agendaItemId)).isEmpty()) {
// generate random int
agendaItemId = (int) (Math.random() * Integer.MAX_VALUE);
}
// create agendaItem
AgendaItem agendaItem = new AgendaItem_File_Impl(agendaItemId, sessionId, title);
// insert agendaItem into DB
insertAgendaItems(List.of(agendaItem));
return agendaItem;
}
public void insertSpeeches(List<Speech> speeches) {
// Convert each Speech to a Document
List<Document> speechDocuments = new ArrayList<>();
for (Speech speech : speeches) {
Document speechDocument = new Document("sessionId", speech.getSessionId())
.append("agendaItemId", speech.getAgendaItemId())
.append("speechId", speech.getSpeechId())
.append("speakerId", speech.getSpeakerId())
.append("speakerName", speech.getSpeakerName())
.append("fraction", speech.getFraction());
// Convert speechContents to a list of Documents
List<Document> contentDocuments = new ArrayList<>();
for (Content content : speech.getSpeechContents()) {
if (content instanceof Comment_File_Impl) {
Comment_File_Impl commentContent = (Comment_File_Impl) content;
contentDocuments.add(new Document("type", "comment")
.append("contentId", commentContent.getContentId())
.append("speechId", commentContent.getSpeechId())
.append("commentatorName", commentContent.getCommentatorName())
.append("comment", commentContent.getComment()));
} else if (content instanceof Line_File_Impl) {
Line_File_Impl lineContent = (Line_File_Impl) content;
contentDocuments.add(new Document("type", "line")
.append("contentId", lineContent.getContentId())
.append("speechId", lineContent.getSpeechId())
.append("content", lineContent.getContent()));
} else if (content instanceof Speaker_File_Impl) {
Speaker_File_Impl speakerContent = (Speaker_File_Impl) content;
contentDocuments.add(new Document("type", "speaker")
.append("contentId", speakerContent.getContentId())
.append("speechId", speakerContent.getSpeechId())
.append("speakerId", speakerContent.getSpeakerId())
.append("speakerName", speakerContent.getSpeakerName())
.append("fraction", speakerContent.getFraction()));
}
}
// Add the speech contents to the speech document
speechDocument.append("speechContents", contentDocuments);
// Add the speech document to the list
speechDocuments.add(speechDocument);
}
// Insert all documents at once using insertMany
speechesCollection.insertMany(speechDocuments);
}
public Speech insertSpeech(int speakerId, int sessionId, int agendaItemId) throws MemberNotFoundException, SessionNotFoundException, AgendaItemNotFoundException, ServerErrorException {
// fetch member by speakerId
MemberOfParliament member;
try {
List<MemberOfParliament> members = retrieveAllMembersOfParliament(Filters.eq("id", speakerId));
if (members.isEmpty()) {
Logger.error("No member found with id " + speakerId);
throw new MemberNotFoundException();
} else if (members.size() > 1) {
Logger.warn("Multiple members found with id " + speakerId);
throw new ServerErrorException();
}
member = members.get(0);
} catch (IOException e) {
Logger.error("Failed to retrieve member with id " + speakerId);
throw new ServerErrorException();
}
// check if session and agendaItem exist
List<Session> sessions = retrieveAllSessions(Filters.eq("sessionId", sessionId));
if (sessions.isEmpty()) {
Logger.error("No session found with id " + sessionId);
throw new SessionNotFoundException();
}
List<AgendaItem> agendaItems = retrieveAllAgendaItems(Filters.eq("id", agendaItemId));
if (agendaItems.isEmpty()) {
Logger.error("No agendaItem found with id " + agendaItemId);
throw new AgendaItemNotFoundException();
}
// get a new random speechId that is not already in use
int speechId = 0;
while (!retrieveAllSpeeches(Filters.eq("speechId", speechId)).isEmpty()) {
// generate random int
speechId = (int) (Math.random() * Integer.MAX_VALUE);
}
// create speech
Speech speech = new Speech_File_Impl(sessionId, agendaItemId, speakerId, speechId, member.getName(), member.getParty());
// insert speech into DB
insertSpeeches(List.of(speech));
return speech;
}
public List<Speech> retrieveAllSpeeches() {
List<Document> speeches = speechesCollection.find().into(new ArrayList<>());
List<Speech> result = new ArrayList<>();
for (Document speech : speeches) {
result.add(new Speech_MongoDB_Impl(speech));
}
return result;
}
public List<Speech> retrieveAllSpeeches(Bson filter) {
List<Document> speeches = speechesCollection.find(filter).into(new ArrayList<>());
List<Speech> result = new ArrayList<>();
for (Document speech : speeches) {
result.add(new Speech_MongoDB_Impl(speech));
}
return result;
}
public List<Session> retrieveAllSessions() {
List<Document> sessions = sessionsCollection.find().into(new ArrayList<>());
List<Session> result = new ArrayList<>();
for (Document session : sessions) {
result.add(new Session_MongoDB_Impl(session));
}
return result;
}
public List<Session> retrieveAllSessions(Bson filter) {
List<Document> speeches = sessionsCollection.find(filter).into(new ArrayList<>());
List<Session> result = new ArrayList<>();
for (Document speech : speeches) {
result.add(new Session_MongoDB_Impl(speech));
}
return result;
}
public List<AgendaItem> retrieveAllAgendaItems() {
List<Document> agendaItems = agendaItemsCollection.find().into(new ArrayList<>());
List<AgendaItem> result = new ArrayList<>();
for (Document agendaItem : agendaItems) {
result.add(new AgendaItem_MongoDB_Impl(agendaItem));
}
return result;
}
public List<AgendaItem> retrieveAllAgendaItems(Bson filter) {
List<Document> speeches = agendaItemsCollection.find(filter).into(new ArrayList<>());
List<AgendaItem> result = new ArrayList<>();
for (Document speech : speeches) {
result.add(new AgendaItem_MongoDB_Impl(speech));
}
return result;
}
public List<MemberOfParliament> retrieveAllMembersOfParliament() {
List<Document> members = membersCollection.find().into(new ArrayList<>());
List<MemberOfParliament> result = new ArrayList<>();
for (Document member : members) {
result.add(new MemberOfParliament_MongoDB_Impl(member));
}
return result;
}
public List<MemberOfParliament> retrieveAllMembersOfParliament(Bson filter) throws IOException {
List<Document> speeches = membersCollection.find(filter).into(new ArrayList<>());
List<MemberOfParliament> result = new ArrayList<>();
for (Document speech : speeches) {
result.add(new MemberOfParliament_MongoDB_Impl(speech));
}
return result;
}
public List<MemberOfParliament> retrieveAllMembersOfParliament(Bson filter, Bson projection) throws IOException {
List<Document> speeches = membersCollection.find(filter).projection(projection).into(new ArrayList<>());
List<MemberOfParliament> result = new ArrayList<>();
for (Document speech : speeches) {
result.add(new MemberOfParliament_MongoDB_Impl(speech));
}
return result;
}
public void deleteAllDocuments() {
speechesCollection.deleteMany(new Document());
sessionsCollection.deleteMany(new Document());
agendaItemsCollection.deleteMany(new Document());
//historyCollection.deleteMany(new Document());
}
public void close() {
mongoClient.close();
}
}

View file

@ -0,0 +1,23 @@
package org.texttechnologylab.project.gruppe_05_1.database;
import lombok.Getter;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.*;
import java.util.List;
@Getter
public class SpeechIndex {
private final List<Session> sessions;
private final List<Speech> speeches;
private final List<AgendaItem> agendaItems;
private final List<MemberOfParliament> members;
private final List<Fraction> fractions;
public SpeechIndex(List<Session> sessions, List<Speech> speeches, List<AgendaItem> agendaItems, List<MemberOfParliament> members, List<Fraction> fractions) {
this.sessions = sessions;
this.speeches = speeches;
this.agendaItems = agendaItems;
this.members = members;
this.fractions = fractions;
}
}

View file

@ -0,0 +1,60 @@
package org.texttechnologylab.project.gruppe_05_1.database;
import org.xml.sax.SAXException;
import javax.xml.parsers.ParserConfigurationException;
import java.io.IOException;
public interface SpeechIndexFactory {
/**
* Set the parseLegislativePeriods flag.
* @param parseLegislativePeriods that indicates whether the legislative periods should be parsed which takes a lot longer
* @return the SpeechIndexFactory instance
*/
SpeechIndexFactory parseLegislativePeriods(Boolean parseLegislativePeriods);
/**
* Creates a new SpeechIndexFactory instance.
* @return a new SpeechIndexFactory instance
* @throws ParserConfigurationException if the parser configuration is invalid
*/
SpeechIndexFactory builder() throws ParserConfigurationException;
/**
* Parses all sessions, speeches and agenda items.
* @return the SpeechIndexFactory instance
*/
SpeechIndexFactory parseSessions();
/**
* Parses all members of parliament.
* @return the SpeechIndexFactory instance
* @throws IOException if an I/O error occurs
* @throws SAXException if a SAX error occurs
*/
SpeechIndexFactory parseMembers() throws IOException, SAXException;
/**
* Parses all fractions.
* @return the SpeechIndexFactory instance
* @throws IOException if an I/O error occurs
* @throws SAXException if a SAX error occurs
*/
SpeechIndexFactory parseFractions() throws IOException, SAXException;
/**
* Builds a new SpeechIndex instance.
* @return a new SpeechIndex instance
* @throws IOException if an I/O error occurs
* @throws SAXException if a SAX error occurs
*/
SpeechIndex build() throws IOException, SAXException;
/**
* Filters the members for current members.
* @param filterForCurrentMembers that indicates whether the members should be filtered for current members
* @return the SpeechIndexFactory instance
*/
SpeechIndexFactory filterForCurrentMembers(boolean filterForCurrentMembers);
}

View file

@ -0,0 +1,63 @@
package org.texttechnologylab.project.gruppe_05_1.database;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.*;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.SpeechParser;
import org.xml.sax.SAXException;
import javax.xml.parsers.ParserConfigurationException;
import java.io.IOException;
import java.util.List;
import java.util.stream.Collectors;
public class SpeechIndexFactoryImpl implements SpeechIndexFactory {
private SpeechParser speechParser;
private List<Session> sessions;
private List<AgendaItem> agendaItems;
private List<Speech> speeches;
private List<MemberOfParliament> members;
private List<Fraction> fractions;
private Boolean parseLegislativePeriods = true;
@Override
public SpeechIndexFactoryImpl parseLegislativePeriods(Boolean parseLegislativePeriods) {
this.parseLegislativePeriods = parseLegislativePeriods;
try {this.speechParser.setParseLegislativePeriods(parseLegislativePeriods);} catch (NullPointerException ignored) {}
return this;
}
public SpeechIndexFactoryImpl builder() throws ParserConfigurationException {
this.speechParser = new SpeechParser();
return this;
}
public SpeechIndexFactoryImpl parseSessions() {
this.sessions = this.speechParser.parseAllSessions();
this.speeches = this.speechParser.getSpeeches();
this.agendaItems = this.speechParser.getAgendaItems();
return this;
}
@Override
public SpeechIndexFactory parseMembers() throws IOException, SAXException {
return null;
}
@Override
public SpeechIndexFactory parseFractions() throws IOException, SAXException {
return null;
}
@Override
public SpeechIndex build() throws IOException, SAXException {
return null;
}
public SpeechIndexFactoryImpl filterForCurrentMembers(boolean filterForCurrentMembers) {
if (!filterForCurrentMembers) return this;
this.members = this.members.stream().filter(MemberOfParliament::isCurrentMember).collect(Collectors.toList());
return this;
}
}

View file

@ -0,0 +1,15 @@
package org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches;
import org.bson.Document;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.AgendaItem_File_Impl;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.AgendaItem;
public class AgendaItem_MongoDB_Impl extends AgendaItem_File_Impl implements AgendaItem {
public AgendaItem_MongoDB_Impl(Document mongoDocument) {
super(
mongoDocument.getInteger("id"),
mongoDocument.getInteger("sessionId"),
mongoDocument.getString("title"));
}
}

View file

@ -0,0 +1,17 @@
package org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches;
import org.bson.Document;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.Comment_File_Impl;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Comment;
public class Comment_MongoDB_Impl extends Comment_File_Impl implements Comment {
public Comment_MongoDB_Impl(Document mongoDocument) {
super(
mongoDocument.getInteger("contentId"),
mongoDocument.getInteger("speechId"),
mongoDocument.getString("commentatorName"),
mongoDocument.getString("comment"));
}
}

View file

@ -0,0 +1,15 @@
package org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches;
import org.bson.Document;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.Line_File_Impl;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Line;
public class Line_MongoDB_Impl extends Line_File_Impl implements Line {
public Line_MongoDB_Impl(Document mongoDocument) {
super(
mongoDocument.getInteger("contentId"),
mongoDocument.getInteger("speechId"),
mongoDocument.getString("content"));
}
}

View file

@ -0,0 +1,25 @@
package org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches;
import org.bson.Document;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.MemberOfParliament_File_Impl;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.MemberOfParliament;
public class MemberOfParliament_MongoDB_Impl extends MemberOfParliament_File_Impl implements MemberOfParliament {
public MemberOfParliament_MongoDB_Impl(Document mongoDocument) {super(
mongoDocument.getString("name"),
mongoDocument.getString("firstName"),
mongoDocument.getString("title"),
mongoDocument.getString("dateOfBirth"),
mongoDocument.getString("dateOfDeath"),
mongoDocument.getString("placeOfBirth"),
mongoDocument.getString("gender"),
mongoDocument.getString("religion"),
mongoDocument.getInteger("id"),
mongoDocument.getString("party"),
null,
mongoDocument.getInteger("firstLegislativePeriod"),
mongoDocument.getInteger("lastLegislativePeriod"),
mongoDocument.getString("image_data"));
}
}

View file

@ -0,0 +1,17 @@
package org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches;
import org.bson.Document;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.Session_File_Impl;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Session;
public class Session_MongoDB_Impl extends Session_File_Impl implements Session {
public Session_MongoDB_Impl(Document mongoDocument) {
super(
mongoDocument.getString("legislativePeriod"),
mongoDocument.getInteger("sessionId"),
mongoDocument.getString("dateTime"),
mongoDocument.getString("endTime"));
}
}

View file

@ -0,0 +1,16 @@
package org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches;
import org.bson.Document;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.Speaker_File_Impl;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Speaker;
public class Speaker_MongoDB_Impl extends Speaker_File_Impl implements Speaker {
public Speaker_MongoDB_Impl(Document mongoDocument) {
super(
mongoDocument.getInteger("contentId"),
mongoDocument.getInteger("speechId"),
mongoDocument.getInteger("speakerId"),
mongoDocument.getString("speakerName"),
mongoDocument.getString("fraction"));
}
}

View file

@ -0,0 +1,36 @@
package org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches;
import org.bson.Document;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.Speech_File_Impl;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Speech;
import java.util.List;
public class Speech_MongoDB_Impl extends Speech_File_Impl implements Speech {
public Speech_MongoDB_Impl(Document mongoDocument) {
super(
mongoDocument.getInteger("sessionId"),
mongoDocument.getInteger("agendaItemId"),
mongoDocument.getInteger("speechId"),
mongoDocument.getInteger("speakerId"),
mongoDocument.getString("speakerName"),
mongoDocument.getString("fraction"));
for (Document content : (List<Document>) mongoDocument.get("speechContents")) {
switch (content.getString("type")) {
case "line":
this.addContent(new Line_MongoDB_Impl(content));
break;
case "comment":
this.addContent(new Comment_MongoDB_Impl(content));
break;
case "speaker":
this.addContent(new Speaker_MongoDB_Impl(content));
break;
default:
throw new IllegalArgumentException("Unknown content type: " + content.getString("type"));
}
}
}
}

View file

@ -0,0 +1,27 @@
package org.texttechnologylab.project.gruppe_05_1.util;
public class Logger {
private static final boolean DEBUG_LOGGING = false;
// info, warn, error with message and colors and datetime
public static void info(String message) {
System.out.println("\u001B[32m" + java.time.LocalTime.now() + " INFO: " + message + "\u001B[0m");
}
public static void warn(String message) {
System.out.println("\u001B[33m" + java.time.LocalTime.now() + " WARN: " + message + "\u001B[0m");
}
public static void error(String message) {
System.out.println("\u001B[31m" + java.time.LocalTime.now() + " ERROR: " + message + "\u001B[0m");
}
public static void debug(String message) {
if (DEBUG_LOGGING) {
System.out.println("\u001B[38;5;214m" + java.time.LocalTime.now() + " DEBUG: " + message + "\u001B[0m");
}
}
public static void pink(String message) {
System.out.println("\u001B[35m" + java.time.LocalTime.now() + " PINK: " + message + "\u001B[0m");
}
}

View file

@ -17,7 +17,6 @@ import org.texttechnologylab.project.gruppe_05_1.domain.speaker.Membership;
import org.texttechnologylab.project.gruppe_05_1.domain.speaker.Speaker;
import org.texttechnologylab.project.gruppe_05_1.nlp.NlpUtils;
import org.texttechnologylab.project.gruppe_05_1.xml.FileObjectFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.xml.sax.InputSource;
@ -361,7 +360,7 @@ public abstract class PPRUtils {
while (hasMore) {
String queryUrl = "https://www.bundestag.de/ajax/filterlist/de/services/opendata/866354-866354?limit="
+ limit + "&noFilterSet=true&offset=" + offset;
System.out.println("Lade: " + queryUrl);
//System.out.println("Lade: " + queryUrl);
try {
Document htmlDoc = Jsoup.connect(queryUrl).get();
Elements xmlLinks = htmlDoc.select("a.bt-link-dokument");
@ -372,10 +371,9 @@ public abstract class PPRUtils {
for (org.jsoup.nodes.Element link : xmlLinks) {
String xmlUrl = link.attr("href");
System.out.println("Verarbeite XML: " + xmlUrl);
//System.out.println("Verarbeite XML: " + xmlUrl);
try {
org.w3c.dom.Document xmlDoc = downloadAndParseXML(xmlUrl);
String uniqueId = xmlDoc.getDocumentElement().getAttribute("sitzung-nr");
if (processedProtocols.contains(uniqueId)) {
System.out.println("Protokoll bereits verarbeitet: " + uniqueId);
@ -383,7 +381,6 @@ public abstract class PPRUtils {
}
processedProtocols.add(uniqueId);
xmlProtocols.add(xmlDoc);
//TODO verarbeitung
} catch (Exception e) {
System.err.println("Fehler beim Verarbeiten der XML-Datei: " + xmlUrl);
e.printStackTrace();

View file

@ -37,8 +37,8 @@ public class SpeechParser {
List<Session> sessions = new ArrayList<>();
this.speeches = new ArrayList<>();
this.agendaItems = new ArrayList<>();
//TODO Logik so machen dass aus array von xmls gelesen wird nicht aus pfad
Set<Document> xmlDocuments = PPRUtils.processXML();
System.out.println("All sessions parsed");
for (org.w3c.dom.Document xmlDoc:xmlDocuments) {
try {
File tempFile = convertDocumentToFile(xmlDoc);
@ -65,7 +65,6 @@ public class SpeechParser {
Element root = document.getDocumentElement();
String legislativePeriod = root.getAttribute("wahlperiode");
int sessionId = Integer.parseInt(root.getAttribute("sitzung-nr"));
System.out.println("Session " + sessionId + " wurde gespeichert");
String sessionDate = root.getAttribute("sitzung-datum");
Element startTimeElement = (Element) root.getElementsByTagName("sitzungsbeginn").item(0);
String startTimeString = startTimeElement != null ? startTimeElement.getAttribute("sitzung-start-uhrzeit") : null;