Rollback point
This commit is contained in:
parent
8a6548662c
commit
ae5c3f17eb
22 changed files with 751 additions and 10 deletions
|
@ -0,0 +1,7 @@
|
|||
package exceptions;
|
||||
|
||||
public class AgendaItemNotFoundException extends RuntimeException {
|
||||
public AgendaItemNotFoundException() {
|
||||
super("Agenda Item not found");
|
||||
}
|
||||
}
|
|
@ -0,0 +1,7 @@
|
|||
package exceptions;
|
||||
|
||||
public class FractionAlreadyExistsException extends RuntimeException {
|
||||
public FractionAlreadyExistsException() {
|
||||
super("Fraction already exists");
|
||||
}
|
||||
}
|
7
src/main/java/exceptions/FractionNotFoundException.java
Normal file
7
src/main/java/exceptions/FractionNotFoundException.java
Normal file
|
@ -0,0 +1,7 @@
|
|||
package exceptions;
|
||||
|
||||
public class FractionNotFoundException extends RuntimeException {
|
||||
public FractionNotFoundException() {
|
||||
super("Fraction not found");
|
||||
}
|
||||
}
|
7
src/main/java/exceptions/MemberNotFoundException.java
Normal file
7
src/main/java/exceptions/MemberNotFoundException.java
Normal file
|
@ -0,0 +1,7 @@
|
|||
package exceptions;
|
||||
|
||||
public class MemberNotFoundException extends RuntimeException {
|
||||
public MemberNotFoundException() {
|
||||
super("Member not found");
|
||||
}
|
||||
}
|
7
src/main/java/exceptions/ServerErrorException.java
Normal file
7
src/main/java/exceptions/ServerErrorException.java
Normal file
|
@ -0,0 +1,7 @@
|
|||
package exceptions;
|
||||
|
||||
public class ServerErrorException extends RuntimeException {
|
||||
public ServerErrorException() {
|
||||
super("Server error occurred");
|
||||
}
|
||||
}
|
7
src/main/java/exceptions/SessionNotFoundException.java
Normal file
7
src/main/java/exceptions/SessionNotFoundException.java
Normal file
|
@ -0,0 +1,7 @@
|
|||
package exceptions;
|
||||
|
||||
public class SessionNotFoundException extends RuntimeException {
|
||||
public SessionNotFoundException() {
|
||||
super("Session not found");
|
||||
}
|
||||
}
|
7
src/main/java/exceptions/SpeechNotFoundException.java
Normal file
7
src/main/java/exceptions/SpeechNotFoundException.java
Normal file
|
@ -0,0 +1,7 @@
|
|||
package exceptions;
|
||||
|
||||
public class SpeechNotFoundException extends RuntimeException {
|
||||
public SpeechNotFoundException() {
|
||||
super("Speech not found");
|
||||
}
|
||||
}
|
|
@ -4,19 +4,27 @@ package org.texttechnologylab.project.gruppe_05_1;
|
|||
import com.mongodb.client.MongoDatabase;
|
||||
import org.texttechnologylab.project.gruppe_05_1.database.MongoDBHandler;
|
||||
import org.texttechnologylab.project.gruppe_05_1.database.MongoObjectFactory;
|
||||
import org.texttechnologylab.project.gruppe_05_1.database.SpeechIndex;
|
||||
import org.texttechnologylab.project.gruppe_05_1.database.SpeechIndexFactoryImpl;
|
||||
import org.texttechnologylab.project.gruppe_05_1.domain.mdb.Mdb;
|
||||
import org.texttechnologylab.project.gruppe_05_1.domain.mdb.MdbDocument;
|
||||
import org.texttechnologylab.project.gruppe_05_1.rest.RESTHandler;
|
||||
import org.texttechnologylab.project.gruppe_05_1.util.Logger;
|
||||
import org.texttechnologylab.project.gruppe_05_1.util.PPRUtils;
|
||||
import org.texttechnologylab.project.gruppe_05_1.util.PropertiesUtils;
|
||||
import org.texttechnologylab.project.gruppe_05_1.util.XmlUtils;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.FileObjectFactory;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.SpeechParser;
|
||||
import org.w3c.dom.Element;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
import javax.xml.parsers.ParserConfigurationException;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Properties;
|
||||
|
||||
import static java.lang.Boolean.TRUE;
|
||||
|
||||
/*
|
||||
import com.mongodb.client.*;
|
||||
import org.bson.Document;
|
||||
|
@ -46,9 +54,49 @@ public class Main {
|
|||
private static final FileObjectFactory xmlFactory = FileObjectFactory.getFactory();
|
||||
private static final MongoObjectFactory mongoFactory = MongoObjectFactory.getFactory();
|
||||
private static final SpeechParser speechParser = new SpeechParser();
|
||||
public static void main(String[] args) {
|
||||
public static void main(String[] args) throws ParserConfigurationException, IOException, SAXException {
|
||||
|
||||
//TEST
|
||||
speechParser.parseAllSessions();
|
||||
|
||||
Logger.pink("Parsing XML and inserting data into DB (Uebung 2)...");
|
||||
SpeechIndexFactoryImpl speechIndexFactory = new SpeechIndexFactoryImpl();
|
||||
|
||||
SpeechIndex speechIndex = speechIndexFactory
|
||||
.builder()
|
||||
.parseLegislativePeriods(TRUE)
|
||||
.parseSessions()
|
||||
.parseMembers()
|
||||
.parseFractions()
|
||||
.filterForCurrentMembers(TRUE)
|
||||
.build();
|
||||
|
||||
//speechIndex.printInfo();
|
||||
|
||||
System.out.println("Data retrieved from DB:");
|
||||
System.out.println("MEMBERCOUNT: " + speechIndex.getMembers().size());
|
||||
System.out.println("FRACTIONCOUNT: " + speechIndex.getFractions().size());
|
||||
System.out.println("SPEECHCOUNT: " + speechIndex.getSpeeches().size());
|
||||
System.out.println("SESSIONCOUNT: " + speechIndex.getSessions().size());
|
||||
System.out.println("AGENDAITEMCOUNT: " + speechIndex.getAgendaItems().size());
|
||||
|
||||
MongoDBHandler mongoDBHandler = new MongoDBHandler();
|
||||
mongoDBHandler.deleteAllDocuments(); // Clear the DB
|
||||
|
||||
Logger.pink("Adding Sessions to DB...");
|
||||
mongoDBHandler.insertSessions(speechIndex.getSessions());
|
||||
|
||||
Logger.pink("Adding Agenda Items to DB...");
|
||||
mongoDBHandler.insertAgendaItems(speechIndex.getAgendaItems());
|
||||
|
||||
Logger.pink("Adding Speeches to DB...");
|
||||
mongoDBHandler.insertSpeeches(speechIndex.getSpeeches());
|
||||
|
||||
mongoDBHandler.close(); // Close the connection to the DB
|
||||
|
||||
|
||||
//TEST
|
||||
|
||||
|
||||
// Stellt fest, dass alle nötigen Datenbank-Collections existieren
|
||||
PPRUtils.ensureCollectionExist();
|
||||
|
||||
|
|
|
@ -7,14 +7,29 @@ import com.mongodb.client.MongoClient;
|
|||
import com.mongodb.client.MongoClients;
|
||||
import com.mongodb.client.MongoCollection;
|
||||
import com.mongodb.client.MongoDatabase;
|
||||
import com.mongodb.client.model.Filters;
|
||||
import com.mongodb.client.model.Indexes;
|
||||
import com.mongodb.client.model.Updates;
|
||||
import exceptions.AgendaItemNotFoundException;
|
||||
import exceptions.MemberNotFoundException;
|
||||
import exceptions.ServerErrorException;
|
||||
import exceptions.SessionNotFoundException;
|
||||
import org.bson.Document;
|
||||
import org.bson.conversions.Bson;
|
||||
import org.bson.types.ObjectId;
|
||||
import org.texttechnologylab.DockerUnifiedUIMAInterface.connection.mongodb.MongoDBConfig;
|
||||
import org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches.AgendaItem_MongoDB_Impl;
|
||||
import org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches.MemberOfParliament_MongoDB_Impl;
|
||||
import org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches.Session_MongoDB_Impl;
|
||||
import org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches.Speech_MongoDB_Impl;
|
||||
import org.texttechnologylab.project.gruppe_05_1.util.Logger;
|
||||
import org.texttechnologylab.project.gruppe_05_1.util.PropertiesUtils;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.*;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.*;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import static com.mongodb.client.model.Filters.eq;
|
||||
|
||||
|
@ -22,7 +37,8 @@ public class MongoDBHandler {
|
|||
|
||||
public static final String propertiesFileName = "mongoDB.properties";
|
||||
public static final String DEFAULT_ID_FIELD_NAME = "_id";
|
||||
|
||||
private final MongoClient mongoClient;
|
||||
private final MongoDatabase database;
|
||||
private static MongoDatabase mongoDatabase = null;
|
||||
|
||||
public final static Class<? extends List> DOC_LIST_CLASS = new ArrayList<Document>().getClass();
|
||||
|
@ -35,6 +51,56 @@ public class MongoDBHandler {
|
|||
private static String collection;
|
||||
private static String databaseName;
|
||||
|
||||
private MongoCollection<Document> speechesCollection;
|
||||
private MongoCollection<Document> membersCollection;
|
||||
private MongoCollection<Document> fractionsCollection;
|
||||
private MongoCollection<Document> sessionsCollection;
|
||||
private MongoCollection<Document> agendaItemsCollection;
|
||||
private MongoCollection<Document> historyCollection;
|
||||
|
||||
|
||||
public MongoDBHandler() {
|
||||
// Load the MongoDB configuration from the properties file
|
||||
String propertiesFilePath = "config/database.properties";
|
||||
// Set loglevel for slf4j to avoid spam
|
||||
System.setProperty("org.slf4j.simpleLogger.defaultLogLevel", "error");
|
||||
Properties mongoProperties = PropertiesUtils.readPropertiesFromResource(propertiesFileName);
|
||||
// Zugangsdaten
|
||||
localServer = mongoProperties.getProperty("localserver");
|
||||
remoteServer = mongoProperties.getProperty("remote_host");
|
||||
user = mongoProperties.getProperty("remote_user");
|
||||
password = mongoProperties.getProperty("remote_password");
|
||||
port = mongoProperties.getProperty("remote_port");
|
||||
collection = mongoProperties.getProperty("remote_collection");
|
||||
databaseName = mongoProperties.getProperty("remote_database");
|
||||
|
||||
MongoCredential credential = MongoCredential
|
||||
.createCredential(
|
||||
user,
|
||||
databaseName,
|
||||
password.toCharArray());
|
||||
|
||||
MongoClientSettings settings = MongoClientSettings.builder()
|
||||
.credential(credential)
|
||||
.timeout(180, TimeUnit.HOURS) // needs increased timeout for the bulk speech inserts
|
||||
.applyToClusterSettings(builder ->
|
||||
builder.hosts(List.of(new ServerAddress(remoteServer, Integer.parseInt(port)))))
|
||||
.build();
|
||||
|
||||
mongoClient = MongoClients.create(settings);
|
||||
database = mongoClient.getDatabase(databaseName);
|
||||
speechesCollection = database.getCollection("speech");
|
||||
membersCollection = database.getCollection("members");
|
||||
fractionsCollection = database.getCollection("fractions");
|
||||
sessionsCollection = database.getCollection("sessions");
|
||||
agendaItemsCollection = database.getCollection("agendaItems");
|
||||
historyCollection = database.getCollection("history");
|
||||
Logger.info("Connected to MongoDB database: " + databaseName);
|
||||
|
||||
// hopeless attempt of creating the fulltext search index :(
|
||||
membersCollection.createIndex(new Document("collection", 1));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the MongoDB according to properties.
|
||||
* If a local server URI is defined, use it. Otherwise, use remote server.
|
||||
|
@ -392,4 +458,272 @@ public class MongoDBHandler {
|
|||
collection.deleteOne(deleteQuery);
|
||||
}
|
||||
|
||||
/*
|
||||
* Justus Jonas operations
|
||||
* =======================
|
||||
*/
|
||||
|
||||
public void insertSession(Session session) {
|
||||
Document sessionDocument = new Document("sessionId", session.getId())
|
||||
.append("dateTime", session.getDateTime())
|
||||
.append("endTime", session.getEndTime())
|
||||
.append("legislativePeriod", session.getLegislativePeriod());
|
||||
|
||||
sessionsCollection.insertOne(sessionDocument);
|
||||
}
|
||||
|
||||
public Session insertSession(String dateTime, String endTime, String legislativePeriod) {
|
||||
// get a new random sessionId that is not already in use
|
||||
int sessionId = 0;
|
||||
while (!retrieveAllSessions(Filters.eq("sessionId", sessionId)).isEmpty()) {
|
||||
// generate random int
|
||||
sessionId = (int) (Math.random() * Integer.MAX_VALUE);
|
||||
}
|
||||
|
||||
// create session
|
||||
Session session = new Session_File_Impl(legislativePeriod, sessionId, dateTime, endTime);
|
||||
|
||||
// insert session into DB
|
||||
insertSession(session);
|
||||
|
||||
return session;
|
||||
}
|
||||
|
||||
|
||||
public void insertSessions(List<Session> sessions) {
|
||||
for (Session session : sessions) {
|
||||
insertSession(session);
|
||||
}
|
||||
}
|
||||
|
||||
public void insertAgendaItems(List<AgendaItem> agendaItems) {
|
||||
List<Document> agendaItemDocuments = new ArrayList<>();
|
||||
for (AgendaItem agendaItem : agendaItems) {
|
||||
Document agendaItemDocument = new Document("id", agendaItem.getId())
|
||||
.append("sessionId", agendaItem.getSessionId())
|
||||
.append("title", agendaItem.getTitle());
|
||||
|
||||
agendaItemDocuments.add(agendaItemDocument);
|
||||
}
|
||||
|
||||
agendaItemsCollection.insertMany(agendaItemDocuments);
|
||||
|
||||
}
|
||||
|
||||
public AgendaItem insertAgendaItem(int sessionId, String title) throws SessionNotFoundException, ServerErrorException {
|
||||
// check if session exists
|
||||
List<Session> sessions = retrieveAllSessions(Filters.eq("sessionId", sessionId));
|
||||
if (sessions.isEmpty()) {
|
||||
Logger.error("No session found with id " + sessionId);
|
||||
throw new SessionNotFoundException();
|
||||
}
|
||||
// get a new random agendaItemId that is not already in use
|
||||
int agendaItemId = 0;
|
||||
while (!retrieveAllAgendaItems(Filters.eq("id", agendaItemId)).isEmpty()) {
|
||||
// generate random int
|
||||
agendaItemId = (int) (Math.random() * Integer.MAX_VALUE);
|
||||
}
|
||||
|
||||
// create agendaItem
|
||||
AgendaItem agendaItem = new AgendaItem_File_Impl(agendaItemId, sessionId, title);
|
||||
|
||||
// insert agendaItem into DB
|
||||
insertAgendaItems(List.of(agendaItem));
|
||||
|
||||
return agendaItem;
|
||||
}
|
||||
|
||||
public void insertSpeeches(List<Speech> speeches) {
|
||||
// Convert each Speech to a Document
|
||||
List<Document> speechDocuments = new ArrayList<>();
|
||||
for (Speech speech : speeches) {
|
||||
Document speechDocument = new Document("sessionId", speech.getSessionId())
|
||||
.append("agendaItemId", speech.getAgendaItemId())
|
||||
.append("speechId", speech.getSpeechId())
|
||||
.append("speakerId", speech.getSpeakerId())
|
||||
.append("speakerName", speech.getSpeakerName())
|
||||
.append("fraction", speech.getFraction());
|
||||
|
||||
// Convert speechContents to a list of Documents
|
||||
List<Document> contentDocuments = new ArrayList<>();
|
||||
for (Content content : speech.getSpeechContents()) {
|
||||
if (content instanceof Comment_File_Impl) {
|
||||
Comment_File_Impl commentContent = (Comment_File_Impl) content;
|
||||
contentDocuments.add(new Document("type", "comment")
|
||||
.append("contentId", commentContent.getContentId())
|
||||
.append("speechId", commentContent.getSpeechId())
|
||||
.append("commentatorName", commentContent.getCommentatorName())
|
||||
.append("comment", commentContent.getComment()));
|
||||
} else if (content instanceof Line_File_Impl) {
|
||||
Line_File_Impl lineContent = (Line_File_Impl) content;
|
||||
contentDocuments.add(new Document("type", "line")
|
||||
.append("contentId", lineContent.getContentId())
|
||||
.append("speechId", lineContent.getSpeechId())
|
||||
.append("content", lineContent.getContent()));
|
||||
} else if (content instanceof Speaker_File_Impl) {
|
||||
Speaker_File_Impl speakerContent = (Speaker_File_Impl) content;
|
||||
contentDocuments.add(new Document("type", "speaker")
|
||||
.append("contentId", speakerContent.getContentId())
|
||||
.append("speechId", speakerContent.getSpeechId())
|
||||
.append("speakerId", speakerContent.getSpeakerId())
|
||||
.append("speakerName", speakerContent.getSpeakerName())
|
||||
.append("fraction", speakerContent.getFraction()));
|
||||
}
|
||||
}
|
||||
|
||||
// Add the speech contents to the speech document
|
||||
speechDocument.append("speechContents", contentDocuments);
|
||||
|
||||
// Add the speech document to the list
|
||||
speechDocuments.add(speechDocument);
|
||||
}
|
||||
|
||||
// Insert all documents at once using insertMany
|
||||
speechesCollection.insertMany(speechDocuments);
|
||||
}
|
||||
|
||||
public Speech insertSpeech(int speakerId, int sessionId, int agendaItemId) throws MemberNotFoundException, SessionNotFoundException, AgendaItemNotFoundException, ServerErrorException {
|
||||
// fetch member by speakerId
|
||||
MemberOfParliament member;
|
||||
try {
|
||||
List<MemberOfParliament> members = retrieveAllMembersOfParliament(Filters.eq("id", speakerId));
|
||||
if (members.isEmpty()) {
|
||||
Logger.error("No member found with id " + speakerId);
|
||||
throw new MemberNotFoundException();
|
||||
} else if (members.size() > 1) {
|
||||
Logger.warn("Multiple members found with id " + speakerId);
|
||||
throw new ServerErrorException();
|
||||
}
|
||||
member = members.get(0);
|
||||
} catch (IOException e) {
|
||||
Logger.error("Failed to retrieve member with id " + speakerId);
|
||||
throw new ServerErrorException();
|
||||
}
|
||||
// check if session and agendaItem exist
|
||||
List<Session> sessions = retrieveAllSessions(Filters.eq("sessionId", sessionId));
|
||||
if (sessions.isEmpty()) {
|
||||
Logger.error("No session found with id " + sessionId);
|
||||
throw new SessionNotFoundException();
|
||||
}
|
||||
List<AgendaItem> agendaItems = retrieveAllAgendaItems(Filters.eq("id", agendaItemId));
|
||||
if (agendaItems.isEmpty()) {
|
||||
Logger.error("No agendaItem found with id " + agendaItemId);
|
||||
throw new AgendaItemNotFoundException();
|
||||
}
|
||||
// get a new random speechId that is not already in use
|
||||
int speechId = 0;
|
||||
while (!retrieveAllSpeeches(Filters.eq("speechId", speechId)).isEmpty()) {
|
||||
// generate random int
|
||||
speechId = (int) (Math.random() * Integer.MAX_VALUE);
|
||||
}
|
||||
|
||||
// create speech
|
||||
Speech speech = new Speech_File_Impl(sessionId, agendaItemId, speakerId, speechId, member.getName(), member.getParty());
|
||||
|
||||
// insert speech into DB
|
||||
insertSpeeches(List.of(speech));
|
||||
|
||||
return speech;
|
||||
}
|
||||
|
||||
public List<Speech> retrieveAllSpeeches() {
|
||||
List<Document> speeches = speechesCollection.find().into(new ArrayList<>());
|
||||
List<Speech> result = new ArrayList<>();
|
||||
for (Document speech : speeches) {
|
||||
result.add(new Speech_MongoDB_Impl(speech));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public List<Speech> retrieveAllSpeeches(Bson filter) {
|
||||
List<Document> speeches = speechesCollection.find(filter).into(new ArrayList<>());
|
||||
List<Speech> result = new ArrayList<>();
|
||||
for (Document speech : speeches) {
|
||||
result.add(new Speech_MongoDB_Impl(speech));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public List<Session> retrieveAllSessions() {
|
||||
List<Document> sessions = sessionsCollection.find().into(new ArrayList<>());
|
||||
List<Session> result = new ArrayList<>();
|
||||
for (Document session : sessions) {
|
||||
result.add(new Session_MongoDB_Impl(session));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public List<Session> retrieveAllSessions(Bson filter) {
|
||||
List<Document> speeches = sessionsCollection.find(filter).into(new ArrayList<>());
|
||||
List<Session> result = new ArrayList<>();
|
||||
for (Document speech : speeches) {
|
||||
result.add(new Session_MongoDB_Impl(speech));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public List<AgendaItem> retrieveAllAgendaItems() {
|
||||
List<Document> agendaItems = agendaItemsCollection.find().into(new ArrayList<>());
|
||||
List<AgendaItem> result = new ArrayList<>();
|
||||
for (Document agendaItem : agendaItems) {
|
||||
result.add(new AgendaItem_MongoDB_Impl(agendaItem));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public List<AgendaItem> retrieveAllAgendaItems(Bson filter) {
|
||||
List<Document> speeches = agendaItemsCollection.find(filter).into(new ArrayList<>());
|
||||
List<AgendaItem> result = new ArrayList<>();
|
||||
for (Document speech : speeches) {
|
||||
result.add(new AgendaItem_MongoDB_Impl(speech));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public List<MemberOfParliament> retrieveAllMembersOfParliament() {
|
||||
List<Document> members = membersCollection.find().into(new ArrayList<>());
|
||||
List<MemberOfParliament> result = new ArrayList<>();
|
||||
for (Document member : members) {
|
||||
result.add(new MemberOfParliament_MongoDB_Impl(member));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public List<MemberOfParliament> retrieveAllMembersOfParliament(Bson filter) throws IOException {
|
||||
List<Document> speeches = membersCollection.find(filter).into(new ArrayList<>());
|
||||
List<MemberOfParliament> result = new ArrayList<>();
|
||||
for (Document speech : speeches) {
|
||||
result.add(new MemberOfParliament_MongoDB_Impl(speech));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public List<MemberOfParliament> retrieveAllMembersOfParliament(Bson filter, Bson projection) throws IOException {
|
||||
List<Document> speeches = membersCollection.find(filter).projection(projection).into(new ArrayList<>());
|
||||
List<MemberOfParliament> result = new ArrayList<>();
|
||||
for (Document speech : speeches) {
|
||||
result.add(new MemberOfParliament_MongoDB_Impl(speech));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public void deleteAllDocuments() {
|
||||
speechesCollection.deleteMany(new Document());
|
||||
sessionsCollection.deleteMany(new Document());
|
||||
agendaItemsCollection.deleteMany(new Document());
|
||||
//historyCollection.deleteMany(new Document());
|
||||
}
|
||||
|
||||
public void close() {
|
||||
mongoClient.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,23 @@
|
|||
package org.texttechnologylab.project.gruppe_05_1.database;
|
||||
|
||||
import lombok.Getter;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.*;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@Getter
|
||||
public class SpeechIndex {
|
||||
private final List<Session> sessions;
|
||||
private final List<Speech> speeches;
|
||||
private final List<AgendaItem> agendaItems;
|
||||
private final List<MemberOfParliament> members;
|
||||
private final List<Fraction> fractions;
|
||||
|
||||
public SpeechIndex(List<Session> sessions, List<Speech> speeches, List<AgendaItem> agendaItems, List<MemberOfParliament> members, List<Fraction> fractions) {
|
||||
this.sessions = sessions;
|
||||
this.speeches = speeches;
|
||||
this.agendaItems = agendaItems;
|
||||
this.members = members;
|
||||
this.fractions = fractions;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,60 @@
|
|||
package org.texttechnologylab.project.gruppe_05_1.database;
|
||||
|
||||
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
import javax.xml.parsers.ParserConfigurationException;
|
||||
import java.io.IOException;
|
||||
|
||||
public interface SpeechIndexFactory {
|
||||
/**
|
||||
* Set the parseLegislativePeriods flag.
|
||||
* @param parseLegislativePeriods that indicates whether the legislative periods should be parsed which takes a lot longer
|
||||
* @return the SpeechIndexFactory instance
|
||||
*/
|
||||
SpeechIndexFactory parseLegislativePeriods(Boolean parseLegislativePeriods);
|
||||
|
||||
/**
|
||||
* Creates a new SpeechIndexFactory instance.
|
||||
* @return a new SpeechIndexFactory instance
|
||||
* @throws ParserConfigurationException if the parser configuration is invalid
|
||||
*/
|
||||
SpeechIndexFactory builder() throws ParserConfigurationException;
|
||||
|
||||
/**
|
||||
* Parses all sessions, speeches and agenda items.
|
||||
* @return the SpeechIndexFactory instance
|
||||
*/
|
||||
SpeechIndexFactory parseSessions();
|
||||
|
||||
/**
|
||||
* Parses all members of parliament.
|
||||
* @return the SpeechIndexFactory instance
|
||||
* @throws IOException if an I/O error occurs
|
||||
* @throws SAXException if a SAX error occurs
|
||||
*/
|
||||
SpeechIndexFactory parseMembers() throws IOException, SAXException;
|
||||
|
||||
/**
|
||||
* Parses all fractions.
|
||||
* @return the SpeechIndexFactory instance
|
||||
* @throws IOException if an I/O error occurs
|
||||
* @throws SAXException if a SAX error occurs
|
||||
*/
|
||||
SpeechIndexFactory parseFractions() throws IOException, SAXException;
|
||||
|
||||
/**
|
||||
* Builds a new SpeechIndex instance.
|
||||
* @return a new SpeechIndex instance
|
||||
* @throws IOException if an I/O error occurs
|
||||
* @throws SAXException if a SAX error occurs
|
||||
*/
|
||||
SpeechIndex build() throws IOException, SAXException;
|
||||
|
||||
/**
|
||||
* Filters the members for current members.
|
||||
* @param filterForCurrentMembers that indicates whether the members should be filtered for current members
|
||||
* @return the SpeechIndexFactory instance
|
||||
*/
|
||||
SpeechIndexFactory filterForCurrentMembers(boolean filterForCurrentMembers);
|
||||
}
|
|
@ -0,0 +1,63 @@
|
|||
package org.texttechnologylab.project.gruppe_05_1.database;
|
||||
|
||||
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.*;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.SpeechParser;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
import javax.xml.parsers.ParserConfigurationException;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class SpeechIndexFactoryImpl implements SpeechIndexFactory {
|
||||
private SpeechParser speechParser;
|
||||
private List<Session> sessions;
|
||||
private List<AgendaItem> agendaItems;
|
||||
private List<Speech> speeches;
|
||||
private List<MemberOfParliament> members;
|
||||
private List<Fraction> fractions;
|
||||
private Boolean parseLegislativePeriods = true;
|
||||
|
||||
|
||||
@Override
|
||||
public SpeechIndexFactoryImpl parseLegislativePeriods(Boolean parseLegislativePeriods) {
|
||||
this.parseLegislativePeriods = parseLegislativePeriods;
|
||||
try {this.speechParser.setParseLegislativePeriods(parseLegislativePeriods);} catch (NullPointerException ignored) {}
|
||||
return this;
|
||||
}
|
||||
|
||||
public SpeechIndexFactoryImpl builder() throws ParserConfigurationException {
|
||||
this.speechParser = new SpeechParser();
|
||||
return this;
|
||||
}
|
||||
|
||||
public SpeechIndexFactoryImpl parseSessions() {
|
||||
this.sessions = this.speechParser.parseAllSessions();
|
||||
this.speeches = this.speechParser.getSpeeches();
|
||||
this.agendaItems = this.speechParser.getAgendaItems();
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SpeechIndexFactory parseMembers() throws IOException, SAXException {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SpeechIndexFactory parseFractions() throws IOException, SAXException {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SpeechIndex build() throws IOException, SAXException {
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
public SpeechIndexFactoryImpl filterForCurrentMembers(boolean filterForCurrentMembers) {
|
||||
if (!filterForCurrentMembers) return this;
|
||||
this.members = this.members.stream().filter(MemberOfParliament::isCurrentMember).collect(Collectors.toList());
|
||||
return this;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,15 @@
|
|||
package org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches;
|
||||
|
||||
import org.bson.Document;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.AgendaItem_File_Impl;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.AgendaItem;
|
||||
|
||||
|
||||
public class AgendaItem_MongoDB_Impl extends AgendaItem_File_Impl implements AgendaItem {
|
||||
public AgendaItem_MongoDB_Impl(Document mongoDocument) {
|
||||
super(
|
||||
mongoDocument.getInteger("id"),
|
||||
mongoDocument.getInteger("sessionId"),
|
||||
mongoDocument.getString("title"));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,17 @@
|
|||
package org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches;
|
||||
|
||||
import org.bson.Document;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.Comment_File_Impl;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Comment;
|
||||
|
||||
|
||||
public class Comment_MongoDB_Impl extends Comment_File_Impl implements Comment {
|
||||
|
||||
public Comment_MongoDB_Impl(Document mongoDocument) {
|
||||
super(
|
||||
mongoDocument.getInteger("contentId"),
|
||||
mongoDocument.getInteger("speechId"),
|
||||
mongoDocument.getString("commentatorName"),
|
||||
mongoDocument.getString("comment"));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,15 @@
|
|||
package org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches;
|
||||
|
||||
import org.bson.Document;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.Line_File_Impl;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Line;
|
||||
|
||||
|
||||
public class Line_MongoDB_Impl extends Line_File_Impl implements Line {
|
||||
public Line_MongoDB_Impl(Document mongoDocument) {
|
||||
super(
|
||||
mongoDocument.getInteger("contentId"),
|
||||
mongoDocument.getInteger("speechId"),
|
||||
mongoDocument.getString("content"));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
package org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches;
|
||||
|
||||
import org.bson.Document;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.MemberOfParliament_File_Impl;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.MemberOfParliament;
|
||||
|
||||
|
||||
public class MemberOfParliament_MongoDB_Impl extends MemberOfParliament_File_Impl implements MemberOfParliament {
|
||||
public MemberOfParliament_MongoDB_Impl(Document mongoDocument) {super(
|
||||
mongoDocument.getString("name"),
|
||||
mongoDocument.getString("firstName"),
|
||||
mongoDocument.getString("title"),
|
||||
mongoDocument.getString("dateOfBirth"),
|
||||
mongoDocument.getString("dateOfDeath"),
|
||||
mongoDocument.getString("placeOfBirth"),
|
||||
mongoDocument.getString("gender"),
|
||||
mongoDocument.getString("religion"),
|
||||
mongoDocument.getInteger("id"),
|
||||
mongoDocument.getString("party"),
|
||||
null,
|
||||
mongoDocument.getInteger("firstLegislativePeriod"),
|
||||
mongoDocument.getInteger("lastLegislativePeriod"),
|
||||
mongoDocument.getString("image_data"));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,17 @@
|
|||
package org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches;
|
||||
|
||||
import org.bson.Document;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.Session_File_Impl;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Session;
|
||||
|
||||
|
||||
public class Session_MongoDB_Impl extends Session_File_Impl implements Session {
|
||||
|
||||
public Session_MongoDB_Impl(Document mongoDocument) {
|
||||
super(
|
||||
mongoDocument.getString("legislativePeriod"),
|
||||
mongoDocument.getInteger("sessionId"),
|
||||
mongoDocument.getString("dateTime"),
|
||||
mongoDocument.getString("endTime"));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,16 @@
|
|||
package org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches;
|
||||
|
||||
import org.bson.Document;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.Speaker_File_Impl;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Speaker;
|
||||
|
||||
public class Speaker_MongoDB_Impl extends Speaker_File_Impl implements Speaker {
|
||||
public Speaker_MongoDB_Impl(Document mongoDocument) {
|
||||
super(
|
||||
mongoDocument.getInteger("contentId"),
|
||||
mongoDocument.getInteger("speechId"),
|
||||
mongoDocument.getInteger("speakerId"),
|
||||
mongoDocument.getString("speakerName"),
|
||||
mongoDocument.getString("fraction"));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,36 @@
|
|||
package org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches;
|
||||
|
||||
import org.bson.Document;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.Speech_File_Impl;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Speech;
|
||||
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public class Speech_MongoDB_Impl extends Speech_File_Impl implements Speech {
|
||||
public Speech_MongoDB_Impl(Document mongoDocument) {
|
||||
super(
|
||||
mongoDocument.getInteger("sessionId"),
|
||||
mongoDocument.getInteger("agendaItemId"),
|
||||
mongoDocument.getInteger("speechId"),
|
||||
mongoDocument.getInteger("speakerId"),
|
||||
mongoDocument.getString("speakerName"),
|
||||
mongoDocument.getString("fraction"));
|
||||
|
||||
for (Document content : (List<Document>) mongoDocument.get("speechContents")) {
|
||||
switch (content.getString("type")) {
|
||||
case "line":
|
||||
this.addContent(new Line_MongoDB_Impl(content));
|
||||
break;
|
||||
case "comment":
|
||||
this.addContent(new Comment_MongoDB_Impl(content));
|
||||
break;
|
||||
case "speaker":
|
||||
this.addContent(new Speaker_MongoDB_Impl(content));
|
||||
break;
|
||||
default:
|
||||
throw new IllegalArgumentException("Unknown content type: " + content.getString("type"));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
package org.texttechnologylab.project.gruppe_05_1.util;
|
||||
|
||||
public class Logger {
|
||||
private static final boolean DEBUG_LOGGING = false;
|
||||
// info, warn, error with message and colors and datetime
|
||||
public static void info(String message) {
|
||||
System.out.println("\u001B[32m" + java.time.LocalTime.now() + " INFO: " + message + "\u001B[0m");
|
||||
}
|
||||
|
||||
public static void warn(String message) {
|
||||
System.out.println("\u001B[33m" + java.time.LocalTime.now() + " WARN: " + message + "\u001B[0m");
|
||||
}
|
||||
|
||||
public static void error(String message) {
|
||||
System.out.println("\u001B[31m" + java.time.LocalTime.now() + " ERROR: " + message + "\u001B[0m");
|
||||
}
|
||||
|
||||
public static void debug(String message) {
|
||||
if (DEBUG_LOGGING) {
|
||||
System.out.println("\u001B[38;5;214m" + java.time.LocalTime.now() + " DEBUG: " + message + "\u001B[0m");
|
||||
}
|
||||
}
|
||||
|
||||
public static void pink(String message) {
|
||||
System.out.println("\u001B[35m" + java.time.LocalTime.now() + " PINK: " + message + "\u001B[0m");
|
||||
}
|
||||
}
|
|
@ -17,7 +17,6 @@ import org.texttechnologylab.project.gruppe_05_1.domain.speaker.Membership;
|
|||
import org.texttechnologylab.project.gruppe_05_1.domain.speaker.Speaker;
|
||||
import org.texttechnologylab.project.gruppe_05_1.nlp.NlpUtils;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.FileObjectFactory;
|
||||
import org.w3c.dom.Document;
|
||||
import org.w3c.dom.Element;
|
||||
import org.w3c.dom.Node;
|
||||
import org.xml.sax.InputSource;
|
||||
|
@ -361,7 +360,7 @@ public abstract class PPRUtils {
|
|||
while (hasMore) {
|
||||
String queryUrl = "https://www.bundestag.de/ajax/filterlist/de/services/opendata/866354-866354?limit="
|
||||
+ limit + "&noFilterSet=true&offset=" + offset;
|
||||
System.out.println("Lade: " + queryUrl);
|
||||
//System.out.println("Lade: " + queryUrl);
|
||||
try {
|
||||
Document htmlDoc = Jsoup.connect(queryUrl).get();
|
||||
Elements xmlLinks = htmlDoc.select("a.bt-link-dokument");
|
||||
|
@ -372,10 +371,9 @@ public abstract class PPRUtils {
|
|||
|
||||
for (org.jsoup.nodes.Element link : xmlLinks) {
|
||||
String xmlUrl = link.attr("href");
|
||||
System.out.println("Verarbeite XML: " + xmlUrl);
|
||||
//System.out.println("Verarbeite XML: " + xmlUrl);
|
||||
try {
|
||||
org.w3c.dom.Document xmlDoc = downloadAndParseXML(xmlUrl);
|
||||
|
||||
String uniqueId = xmlDoc.getDocumentElement().getAttribute("sitzung-nr");
|
||||
if (processedProtocols.contains(uniqueId)) {
|
||||
System.out.println("Protokoll bereits verarbeitet: " + uniqueId);
|
||||
|
@ -383,7 +381,6 @@ public abstract class PPRUtils {
|
|||
}
|
||||
processedProtocols.add(uniqueId);
|
||||
xmlProtocols.add(xmlDoc);
|
||||
//TODO verarbeitung
|
||||
} catch (Exception e) {
|
||||
System.err.println("Fehler beim Verarbeiten der XML-Datei: " + xmlUrl);
|
||||
e.printStackTrace();
|
||||
|
|
|
@ -37,8 +37,8 @@ public class SpeechParser {
|
|||
List<Session> sessions = new ArrayList<>();
|
||||
this.speeches = new ArrayList<>();
|
||||
this.agendaItems = new ArrayList<>();
|
||||
//TODO Logik so machen dass aus array von xmls gelesen wird nicht aus pfad
|
||||
Set<Document> xmlDocuments = PPRUtils.processXML();
|
||||
System.out.println("All sessions parsed");
|
||||
for (org.w3c.dom.Document xmlDoc:xmlDocuments) {
|
||||
try {
|
||||
File tempFile = convertDocumentToFile(xmlDoc);
|
||||
|
@ -65,7 +65,6 @@ public class SpeechParser {
|
|||
Element root = document.getDocumentElement();
|
||||
String legislativePeriod = root.getAttribute("wahlperiode");
|
||||
int sessionId = Integer.parseInt(root.getAttribute("sitzung-nr"));
|
||||
System.out.println("Session " + sessionId + " wurde gespeichert");
|
||||
String sessionDate = root.getAttribute("sitzung-datum");
|
||||
Element startTimeElement = (Element) root.getElementsByTagName("sitzungsbeginn").item(0);
|
||||
String startTimeString = startTimeElement != null ? startTimeElement.getAttribute("sitzung-start-uhrzeit") : null;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue