Rollback point
This commit is contained in:
		
							parent
							
								
									8a6548662c
								
							
						
					
					
						commit
						ae5c3f17eb
					
				
					 22 changed files with 751 additions and 10 deletions
				
			
		|  | @ -0,0 +1,7 @@ | |||
| package exceptions; | ||||
| 
 | ||||
| public class AgendaItemNotFoundException extends RuntimeException { | ||||
|     public AgendaItemNotFoundException() { | ||||
|         super("Agenda Item not found"); | ||||
|     } | ||||
| } | ||||
|  | @ -0,0 +1,7 @@ | |||
| package exceptions; | ||||
| 
 | ||||
| public class FractionAlreadyExistsException extends RuntimeException { | ||||
|     public FractionAlreadyExistsException() { | ||||
|         super("Fraction already exists"); | ||||
|     } | ||||
| } | ||||
							
								
								
									
										7
									
								
								src/main/java/exceptions/FractionNotFoundException.java
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										7
									
								
								src/main/java/exceptions/FractionNotFoundException.java
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,7 @@ | |||
| package exceptions; | ||||
| 
 | ||||
| public class FractionNotFoundException extends RuntimeException { | ||||
|     public FractionNotFoundException() { | ||||
|         super("Fraction not found"); | ||||
|     } | ||||
| } | ||||
							
								
								
									
										7
									
								
								src/main/java/exceptions/MemberNotFoundException.java
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										7
									
								
								src/main/java/exceptions/MemberNotFoundException.java
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,7 @@ | |||
| package exceptions; | ||||
| 
 | ||||
| public class MemberNotFoundException extends RuntimeException { | ||||
|     public MemberNotFoundException() { | ||||
|         super("Member not found"); | ||||
|     } | ||||
| } | ||||
							
								
								
									
										7
									
								
								src/main/java/exceptions/ServerErrorException.java
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										7
									
								
								src/main/java/exceptions/ServerErrorException.java
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,7 @@ | |||
| package exceptions; | ||||
| 
 | ||||
| public class ServerErrorException extends RuntimeException { | ||||
|     public ServerErrorException() { | ||||
|         super("Server error occurred"); | ||||
|     } | ||||
| } | ||||
							
								
								
									
										7
									
								
								src/main/java/exceptions/SessionNotFoundException.java
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										7
									
								
								src/main/java/exceptions/SessionNotFoundException.java
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,7 @@ | |||
| package exceptions; | ||||
| 
 | ||||
| public class SessionNotFoundException extends RuntimeException { | ||||
|     public SessionNotFoundException() { | ||||
|         super("Session not found"); | ||||
|     } | ||||
| } | ||||
							
								
								
									
										7
									
								
								src/main/java/exceptions/SpeechNotFoundException.java
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										7
									
								
								src/main/java/exceptions/SpeechNotFoundException.java
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,7 @@ | |||
| package exceptions; | ||||
| 
 | ||||
| public class SpeechNotFoundException extends RuntimeException { | ||||
|     public SpeechNotFoundException() { | ||||
|         super("Speech not found"); | ||||
|     } | ||||
| } | ||||
|  | @ -4,19 +4,27 @@ package org.texttechnologylab.project.gruppe_05_1; | |||
| import com.mongodb.client.MongoDatabase; | ||||
| import org.texttechnologylab.project.gruppe_05_1.database.MongoDBHandler; | ||||
| import org.texttechnologylab.project.gruppe_05_1.database.MongoObjectFactory; | ||||
| import org.texttechnologylab.project.gruppe_05_1.database.SpeechIndex; | ||||
| import org.texttechnologylab.project.gruppe_05_1.database.SpeechIndexFactoryImpl; | ||||
| import org.texttechnologylab.project.gruppe_05_1.domain.mdb.Mdb; | ||||
| import org.texttechnologylab.project.gruppe_05_1.domain.mdb.MdbDocument; | ||||
| import org.texttechnologylab.project.gruppe_05_1.rest.RESTHandler; | ||||
| import org.texttechnologylab.project.gruppe_05_1.util.Logger; | ||||
| import org.texttechnologylab.project.gruppe_05_1.util.PPRUtils; | ||||
| import org.texttechnologylab.project.gruppe_05_1.util.PropertiesUtils; | ||||
| import org.texttechnologylab.project.gruppe_05_1.util.XmlUtils; | ||||
| import org.texttechnologylab.project.gruppe_05_1.xml.FileObjectFactory; | ||||
| import org.texttechnologylab.project.gruppe_05_1.xml.speeches.SpeechParser; | ||||
| import org.w3c.dom.Element; | ||||
| import org.xml.sax.SAXException; | ||||
| 
 | ||||
| import javax.xml.parsers.ParserConfigurationException; | ||||
| import java.io.IOException; | ||||
| import java.util.List; | ||||
| import java.util.Properties; | ||||
| 
 | ||||
| import static java.lang.Boolean.TRUE; | ||||
| 
 | ||||
| /* | ||||
| import com.mongodb.client.*; | ||||
| import org.bson.Document; | ||||
|  | @ -46,9 +54,49 @@ public class Main { | |||
|     private static final FileObjectFactory xmlFactory = FileObjectFactory.getFactory(); | ||||
|     private static final MongoObjectFactory mongoFactory = MongoObjectFactory.getFactory(); | ||||
|     private static final SpeechParser speechParser = new SpeechParser(); | ||||
|     public static void main(String[] args) { | ||||
|     public static void main(String[] args) throws ParserConfigurationException, IOException, SAXException { | ||||
| 
 | ||||
|         //TEST | ||||
|         speechParser.parseAllSessions(); | ||||
| 
 | ||||
|         Logger.pink("Parsing XML and inserting data into DB (Uebung 2)..."); | ||||
|         SpeechIndexFactoryImpl speechIndexFactory = new SpeechIndexFactoryImpl(); | ||||
| 
 | ||||
|         SpeechIndex speechIndex = speechIndexFactory | ||||
|                 .builder() | ||||
|                 .parseLegislativePeriods(TRUE) | ||||
|                 .parseSessions() | ||||
|                 .parseMembers() | ||||
|                 .parseFractions() | ||||
|                 .filterForCurrentMembers(TRUE) | ||||
|                 .build(); | ||||
| 
 | ||||
|         //speechIndex.printInfo(); | ||||
| 
 | ||||
|         System.out.println("Data retrieved from DB:"); | ||||
|         System.out.println("MEMBERCOUNT: " + speechIndex.getMembers().size()); | ||||
|         System.out.println("FRACTIONCOUNT: " + speechIndex.getFractions().size()); | ||||
|         System.out.println("SPEECHCOUNT: " + speechIndex.getSpeeches().size()); | ||||
|         System.out.println("SESSIONCOUNT: " + speechIndex.getSessions().size()); | ||||
|         System.out.println("AGENDAITEMCOUNT: " + speechIndex.getAgendaItems().size()); | ||||
| 
 | ||||
|         MongoDBHandler mongoDBHandler = new MongoDBHandler(); | ||||
|         mongoDBHandler.deleteAllDocuments(); // Clear the DB | ||||
| 
 | ||||
|         Logger.pink("Adding Sessions to DB..."); | ||||
|         mongoDBHandler.insertSessions(speechIndex.getSessions()); | ||||
| 
 | ||||
|         Logger.pink("Adding Agenda Items to DB..."); | ||||
|         mongoDBHandler.insertAgendaItems(speechIndex.getAgendaItems()); | ||||
| 
 | ||||
|         Logger.pink("Adding Speeches to DB..."); | ||||
|         mongoDBHandler.insertSpeeches(speechIndex.getSpeeches()); | ||||
| 
 | ||||
|         mongoDBHandler.close(); // Close the connection to the DB | ||||
| 
 | ||||
| 
 | ||||
|         //TEST | ||||
| 
 | ||||
| 
 | ||||
|         // Stellt fest, dass alle nötigen Datenbank-Collections existieren | ||||
|         PPRUtils.ensureCollectionExist(); | ||||
| 
 | ||||
|  |  | |||
|  | @ -7,14 +7,29 @@ import com.mongodb.client.MongoClient; | |||
| import com.mongodb.client.MongoClients; | ||||
| import com.mongodb.client.MongoCollection; | ||||
| import com.mongodb.client.MongoDatabase; | ||||
| import com.mongodb.client.model.Filters; | ||||
| import com.mongodb.client.model.Indexes; | ||||
| import com.mongodb.client.model.Updates; | ||||
| import exceptions.AgendaItemNotFoundException; | ||||
| import exceptions.MemberNotFoundException; | ||||
| import exceptions.ServerErrorException; | ||||
| import exceptions.SessionNotFoundException; | ||||
| import org.bson.Document; | ||||
| import org.bson.conversions.Bson; | ||||
| import org.bson.types.ObjectId; | ||||
| import org.texttechnologylab.DockerUnifiedUIMAInterface.connection.mongodb.MongoDBConfig; | ||||
| import org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches.AgendaItem_MongoDB_Impl; | ||||
| import org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches.MemberOfParliament_MongoDB_Impl; | ||||
| import org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches.Session_MongoDB_Impl; | ||||
| import org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches.Speech_MongoDB_Impl; | ||||
| import org.texttechnologylab.project.gruppe_05_1.util.Logger; | ||||
| import org.texttechnologylab.project.gruppe_05_1.util.PropertiesUtils; | ||||
| import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.*; | ||||
| import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.*; | ||||
| 
 | ||||
| import java.io.IOException; | ||||
| import java.util.*; | ||||
| import java.util.concurrent.TimeUnit; | ||||
| 
 | ||||
| import static com.mongodb.client.model.Filters.eq; | ||||
| 
 | ||||
|  | @ -22,7 +37,8 @@ public class MongoDBHandler { | |||
| 
 | ||||
|     public static final String propertiesFileName = "mongoDB.properties"; | ||||
|     public static final String DEFAULT_ID_FIELD_NAME = "_id"; | ||||
| 
 | ||||
|     private final MongoClient mongoClient; | ||||
|     private final MongoDatabase database; | ||||
|     private static MongoDatabase mongoDatabase = null; | ||||
| 
 | ||||
|     public final static Class<? extends List> DOC_LIST_CLASS = new ArrayList<Document>().getClass(); | ||||
|  | @ -35,6 +51,56 @@ public class MongoDBHandler { | |||
|     private static String collection; | ||||
|     private static String databaseName; | ||||
| 
 | ||||
|     private  MongoCollection<Document> speechesCollection; | ||||
|     private  MongoCollection<Document> membersCollection; | ||||
|     private  MongoCollection<Document> fractionsCollection; | ||||
|     private  MongoCollection<Document> sessionsCollection; | ||||
|     private  MongoCollection<Document> agendaItemsCollection; | ||||
|     private  MongoCollection<Document> historyCollection; | ||||
| 
 | ||||
| 
 | ||||
|     public MongoDBHandler() { | ||||
|         // Load the MongoDB configuration from the properties file | ||||
|         String propertiesFilePath = "config/database.properties"; | ||||
|         // Set loglevel for slf4j to avoid spam | ||||
|         System.setProperty("org.slf4j.simpleLogger.defaultLogLevel", "error"); | ||||
|         Properties mongoProperties = PropertiesUtils.readPropertiesFromResource(propertiesFileName); | ||||
|         // Zugangsdaten | ||||
|         localServer = mongoProperties.getProperty("localserver"); | ||||
|         remoteServer = mongoProperties.getProperty("remote_host"); | ||||
|         user = mongoProperties.getProperty("remote_user"); | ||||
|         password = mongoProperties.getProperty("remote_password"); | ||||
|         port = mongoProperties.getProperty("remote_port"); | ||||
|         collection = mongoProperties.getProperty("remote_collection"); | ||||
|         databaseName = mongoProperties.getProperty("remote_database"); | ||||
| 
 | ||||
|         MongoCredential credential = MongoCredential | ||||
|                 .createCredential( | ||||
|                         user, | ||||
|                         databaseName, | ||||
|                         password.toCharArray()); | ||||
| 
 | ||||
|         MongoClientSettings settings = MongoClientSettings.builder() | ||||
|                 .credential(credential) | ||||
|                 .timeout(180, TimeUnit.HOURS) // needs increased timeout for the bulk speech inserts | ||||
|                 .applyToClusterSettings(builder -> | ||||
|                         builder.hosts(List.of(new ServerAddress(remoteServer, Integer.parseInt(port))))) | ||||
|                 .build(); | ||||
| 
 | ||||
|         mongoClient = MongoClients.create(settings); | ||||
|         database = mongoClient.getDatabase(databaseName); | ||||
|         speechesCollection = database.getCollection("speech"); | ||||
|         membersCollection = database.getCollection("members"); | ||||
|         fractionsCollection = database.getCollection("fractions"); | ||||
|         sessionsCollection = database.getCollection("sessions"); | ||||
|         agendaItemsCollection = database.getCollection("agendaItems"); | ||||
|         historyCollection = database.getCollection("history"); | ||||
|         Logger.info("Connected to MongoDB database: " + databaseName); | ||||
| 
 | ||||
|         // hopeless attempt of creating the fulltext search index :( | ||||
|         membersCollection.createIndex(new Document("collection", 1)); | ||||
|     } | ||||
| 
 | ||||
|     /** | ||||
|      * Get the MongoDB according to properties. | ||||
|      * If a local server URI is defined, use it. Otherwise, use remote server. | ||||
|  | @ -392,4 +458,272 @@ public class MongoDBHandler { | |||
|         collection.deleteOne(deleteQuery); | ||||
|     } | ||||
| 
 | ||||
|     /* | ||||
|      * Justus Jonas operations | ||||
|      * ======================= | ||||
|      */ | ||||
| 
 | ||||
|     public void insertSession(Session session) { | ||||
|         Document sessionDocument = new Document("sessionId", session.getId()) | ||||
|                 .append("dateTime", session.getDateTime()) | ||||
|                 .append("endTime", session.getEndTime()) | ||||
|                 .append("legislativePeriod", session.getLegislativePeriod()); | ||||
| 
 | ||||
|         sessionsCollection.insertOne(sessionDocument); | ||||
|     } | ||||
| 
 | ||||
|     public Session insertSession(String dateTime, String endTime, String legislativePeriod) { | ||||
|         // get a new random sessionId that is not already in use | ||||
|         int sessionId = 0; | ||||
|         while (!retrieveAllSessions(Filters.eq("sessionId", sessionId)).isEmpty()) { | ||||
|             // generate random int | ||||
|             sessionId = (int) (Math.random() * Integer.MAX_VALUE); | ||||
|         } | ||||
| 
 | ||||
|         // create session | ||||
|         Session session = new Session_File_Impl(legislativePeriod, sessionId, dateTime, endTime); | ||||
| 
 | ||||
|         // insert session into DB | ||||
|         insertSession(session); | ||||
| 
 | ||||
|         return session; | ||||
|     } | ||||
| 
 | ||||
| 
 | ||||
|     public void insertSessions(List<Session> sessions) { | ||||
|         for (Session session : sessions) { | ||||
|             insertSession(session); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     public void insertAgendaItems(List<AgendaItem> agendaItems) { | ||||
|         List<Document> agendaItemDocuments = new ArrayList<>(); | ||||
|         for (AgendaItem agendaItem : agendaItems) { | ||||
|             Document agendaItemDocument = new Document("id", agendaItem.getId()) | ||||
|                     .append("sessionId", agendaItem.getSessionId()) | ||||
|                     .append("title", agendaItem.getTitle()); | ||||
| 
 | ||||
|             agendaItemDocuments.add(agendaItemDocument); | ||||
|         } | ||||
| 
 | ||||
|         agendaItemsCollection.insertMany(agendaItemDocuments); | ||||
| 
 | ||||
|     } | ||||
| 
 | ||||
|     public AgendaItem insertAgendaItem(int sessionId, String title) throws SessionNotFoundException, ServerErrorException { | ||||
|         // check if session exists | ||||
|         List<Session> sessions = retrieveAllSessions(Filters.eq("sessionId", sessionId)); | ||||
|         if (sessions.isEmpty()) { | ||||
|             Logger.error("No session found with id " + sessionId); | ||||
|             throw new SessionNotFoundException(); | ||||
|         } | ||||
|         // get a new random agendaItemId that is not already in use | ||||
|         int agendaItemId = 0; | ||||
|         while (!retrieveAllAgendaItems(Filters.eq("id", agendaItemId)).isEmpty()) { | ||||
|             // generate random int | ||||
|             agendaItemId = (int) (Math.random() * Integer.MAX_VALUE); | ||||
|         } | ||||
| 
 | ||||
|         // create agendaItem | ||||
|         AgendaItem agendaItem = new AgendaItem_File_Impl(agendaItemId, sessionId, title); | ||||
| 
 | ||||
|         // insert agendaItem into DB | ||||
|         insertAgendaItems(List.of(agendaItem)); | ||||
| 
 | ||||
|         return agendaItem; | ||||
|     } | ||||
| 
 | ||||
|     public void insertSpeeches(List<Speech> speeches) { | ||||
|         // Convert each Speech to a Document | ||||
|         List<Document> speechDocuments = new ArrayList<>(); | ||||
|         for (Speech speech : speeches) { | ||||
|             Document speechDocument = new Document("sessionId", speech.getSessionId()) | ||||
|                     .append("agendaItemId", speech.getAgendaItemId()) | ||||
|                     .append("speechId", speech.getSpeechId()) | ||||
|                     .append("speakerId", speech.getSpeakerId()) | ||||
|                     .append("speakerName", speech.getSpeakerName()) | ||||
|                     .append("fraction", speech.getFraction()); | ||||
| 
 | ||||
|             // Convert speechContents to a list of Documents | ||||
|             List<Document> contentDocuments = new ArrayList<>(); | ||||
|             for (Content content : speech.getSpeechContents()) { | ||||
|                 if (content instanceof Comment_File_Impl) { | ||||
|                     Comment_File_Impl commentContent = (Comment_File_Impl) content; | ||||
|                     contentDocuments.add(new Document("type", "comment") | ||||
|                             .append("contentId", commentContent.getContentId()) | ||||
|                             .append("speechId", commentContent.getSpeechId()) | ||||
|                             .append("commentatorName", commentContent.getCommentatorName()) | ||||
|                             .append("comment", commentContent.getComment())); | ||||
|                 } else if (content instanceof Line_File_Impl) { | ||||
|                     Line_File_Impl lineContent = (Line_File_Impl) content; | ||||
|                     contentDocuments.add(new Document("type", "line") | ||||
|                             .append("contentId", lineContent.getContentId()) | ||||
|                             .append("speechId", lineContent.getSpeechId()) | ||||
|                             .append("content", lineContent.getContent())); | ||||
|                 } else if (content instanceof Speaker_File_Impl) { | ||||
|                     Speaker_File_Impl speakerContent = (Speaker_File_Impl) content; | ||||
|                     contentDocuments.add(new Document("type", "speaker") | ||||
|                             .append("contentId", speakerContent.getContentId()) | ||||
|                             .append("speechId", speakerContent.getSpeechId()) | ||||
|                             .append("speakerId", speakerContent.getSpeakerId()) | ||||
|                             .append("speakerName", speakerContent.getSpeakerName()) | ||||
|                             .append("fraction", speakerContent.getFraction())); | ||||
|                 } | ||||
|             } | ||||
| 
 | ||||
|             // Add the speech contents to the speech document | ||||
|             speechDocument.append("speechContents", contentDocuments); | ||||
| 
 | ||||
|             // Add the speech document to the list | ||||
|             speechDocuments.add(speechDocument); | ||||
|         } | ||||
| 
 | ||||
|         // Insert all documents at once using insertMany | ||||
|         speechesCollection.insertMany(speechDocuments); | ||||
|     } | ||||
| 
 | ||||
|     public Speech insertSpeech(int speakerId, int sessionId, int agendaItemId) throws MemberNotFoundException, SessionNotFoundException, AgendaItemNotFoundException, ServerErrorException { | ||||
|         // fetch member by speakerId | ||||
|         MemberOfParliament member; | ||||
|         try { | ||||
|             List<MemberOfParliament> members = retrieveAllMembersOfParliament(Filters.eq("id", speakerId)); | ||||
|             if (members.isEmpty()) { | ||||
|                 Logger.error("No member found with id " + speakerId); | ||||
|                 throw new MemberNotFoundException(); | ||||
|             } else if (members.size() > 1) { | ||||
|                 Logger.warn("Multiple members found with id " + speakerId); | ||||
|                 throw new ServerErrorException(); | ||||
|             } | ||||
|             member = members.get(0); | ||||
|         } catch (IOException e) { | ||||
|             Logger.error("Failed to retrieve member with id " + speakerId); | ||||
|             throw new ServerErrorException(); | ||||
|         } | ||||
|         // check if session and agendaItem exist | ||||
|         List<Session> sessions = retrieveAllSessions(Filters.eq("sessionId", sessionId)); | ||||
|         if (sessions.isEmpty()) { | ||||
|             Logger.error("No session found with id " + sessionId); | ||||
|             throw new SessionNotFoundException(); | ||||
|         } | ||||
|         List<AgendaItem> agendaItems = retrieveAllAgendaItems(Filters.eq("id", agendaItemId)); | ||||
|         if (agendaItems.isEmpty()) { | ||||
|             Logger.error("No agendaItem found with id " + agendaItemId); | ||||
|             throw new AgendaItemNotFoundException(); | ||||
|         } | ||||
|         // get a new random speechId that is not already in use | ||||
|         int speechId = 0; | ||||
|         while (!retrieveAllSpeeches(Filters.eq("speechId", speechId)).isEmpty()) { | ||||
|             // generate random int | ||||
|             speechId = (int) (Math.random() * Integer.MAX_VALUE); | ||||
|         } | ||||
| 
 | ||||
|         // create speech | ||||
|         Speech speech = new Speech_File_Impl(sessionId, agendaItemId, speakerId, speechId, member.getName(), member.getParty()); | ||||
| 
 | ||||
|         // insert speech into DB | ||||
|         insertSpeeches(List.of(speech)); | ||||
| 
 | ||||
|         return speech; | ||||
|     } | ||||
| 
 | ||||
|     public List<Speech> retrieveAllSpeeches() { | ||||
|         List<Document> speeches = speechesCollection.find().into(new ArrayList<>()); | ||||
|         List<Speech> result = new ArrayList<>(); | ||||
|         for (Document speech : speeches) { | ||||
|             result.add(new Speech_MongoDB_Impl(speech)); | ||||
|         } | ||||
| 
 | ||||
|         return result; | ||||
|     } | ||||
| 
 | ||||
|     public List<Speech> retrieveAllSpeeches(Bson filter) { | ||||
|         List<Document> speeches = speechesCollection.find(filter).into(new ArrayList<>()); | ||||
|         List<Speech> result = new ArrayList<>(); | ||||
|         for (Document speech : speeches) { | ||||
|             result.add(new Speech_MongoDB_Impl(speech)); | ||||
|         } | ||||
| 
 | ||||
|         return result; | ||||
|     } | ||||
| 
 | ||||
|     public List<Session> retrieveAllSessions() { | ||||
|         List<Document> sessions = sessionsCollection.find().into(new ArrayList<>()); | ||||
|         List<Session> result = new ArrayList<>(); | ||||
|         for (Document session : sessions) { | ||||
|             result.add(new Session_MongoDB_Impl(session)); | ||||
|         } | ||||
| 
 | ||||
|         return result; | ||||
|     } | ||||
| 
 | ||||
|     public List<Session> retrieveAllSessions(Bson filter) { | ||||
|         List<Document> speeches = sessionsCollection.find(filter).into(new ArrayList<>()); | ||||
|         List<Session> result = new ArrayList<>(); | ||||
|         for (Document speech : speeches) { | ||||
|             result.add(new Session_MongoDB_Impl(speech)); | ||||
|         } | ||||
| 
 | ||||
|         return result; | ||||
|     } | ||||
| 
 | ||||
|     public List<AgendaItem> retrieveAllAgendaItems() { | ||||
|         List<Document> agendaItems = agendaItemsCollection.find().into(new ArrayList<>()); | ||||
|         List<AgendaItem> result = new ArrayList<>(); | ||||
|         for (Document agendaItem : agendaItems) { | ||||
|             result.add(new AgendaItem_MongoDB_Impl(agendaItem)); | ||||
|         } | ||||
| 
 | ||||
|         return result; | ||||
|     } | ||||
| 
 | ||||
|     public List<AgendaItem> retrieveAllAgendaItems(Bson filter) { | ||||
|         List<Document> speeches = agendaItemsCollection.find(filter).into(new ArrayList<>()); | ||||
|         List<AgendaItem> result = new ArrayList<>(); | ||||
|         for (Document speech : speeches) { | ||||
|             result.add(new AgendaItem_MongoDB_Impl(speech)); | ||||
|         } | ||||
| 
 | ||||
|         return result; | ||||
|     } | ||||
| 
 | ||||
|     public List<MemberOfParliament> retrieveAllMembersOfParliament() { | ||||
|         List<Document> members = membersCollection.find().into(new ArrayList<>()); | ||||
|         List<MemberOfParliament> result = new ArrayList<>(); | ||||
|         for (Document member : members) { | ||||
|             result.add(new MemberOfParliament_MongoDB_Impl(member)); | ||||
|         } | ||||
| 
 | ||||
|         return result; | ||||
|     } | ||||
| 
 | ||||
|     public List<MemberOfParliament> retrieveAllMembersOfParliament(Bson filter) throws IOException { | ||||
|         List<Document> speeches = membersCollection.find(filter).into(new ArrayList<>()); | ||||
|         List<MemberOfParliament> result = new ArrayList<>(); | ||||
|         for (Document speech : speeches) { | ||||
|             result.add(new MemberOfParliament_MongoDB_Impl(speech)); | ||||
|         } | ||||
|         return result; | ||||
|     } | ||||
| 
 | ||||
|     public List<MemberOfParliament> retrieveAllMembersOfParliament(Bson filter, Bson projection) throws IOException { | ||||
|         List<Document> speeches = membersCollection.find(filter).projection(projection).into(new ArrayList<>()); | ||||
|         List<MemberOfParliament> result = new ArrayList<>(); | ||||
|         for (Document speech : speeches) { | ||||
|             result.add(new MemberOfParliament_MongoDB_Impl(speech)); | ||||
|         } | ||||
| 
 | ||||
|         return result; | ||||
|     } | ||||
| 
 | ||||
|     public void deleteAllDocuments() { | ||||
|         speechesCollection.deleteMany(new Document()); | ||||
|         sessionsCollection.deleteMany(new Document()); | ||||
|         agendaItemsCollection.deleteMany(new Document()); | ||||
|         //historyCollection.deleteMany(new Document()); | ||||
|     } | ||||
| 
 | ||||
|     public void close() { | ||||
|         mongoClient.close(); | ||||
|     } | ||||
| 
 | ||||
| } | ||||
|  |  | |||
|  | @ -0,0 +1,23 @@ | |||
| package org.texttechnologylab.project.gruppe_05_1.database; | ||||
| 
 | ||||
| import lombok.Getter; | ||||
| import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.*; | ||||
| 
 | ||||
| import java.util.List; | ||||
| 
 | ||||
| @Getter | ||||
| public class SpeechIndex { | ||||
|     private final List<Session> sessions; | ||||
|     private final List<Speech> speeches; | ||||
|     private final List<AgendaItem> agendaItems; | ||||
|     private final List<MemberOfParliament> members; | ||||
|     private final List<Fraction> fractions; | ||||
| 
 | ||||
|     public SpeechIndex(List<Session> sessions, List<Speech> speeches, List<AgendaItem> agendaItems, List<MemberOfParliament> members, List<Fraction> fractions) { | ||||
|         this.sessions = sessions; | ||||
|         this.speeches = speeches; | ||||
|         this.agendaItems = agendaItems; | ||||
|         this.members = members; | ||||
|         this.fractions = fractions; | ||||
|     } | ||||
| } | ||||
|  | @ -0,0 +1,60 @@ | |||
| package org.texttechnologylab.project.gruppe_05_1.database; | ||||
| 
 | ||||
| 
 | ||||
| import org.xml.sax.SAXException; | ||||
| 
 | ||||
| import javax.xml.parsers.ParserConfigurationException; | ||||
| import java.io.IOException; | ||||
| 
 | ||||
| public interface SpeechIndexFactory { | ||||
|     /** | ||||
|      * Set the parseLegislativePeriods flag. | ||||
|      * @param parseLegislativePeriods that indicates whether the legislative periods should be parsed which takes a lot longer | ||||
|      * @return the SpeechIndexFactory instance | ||||
|      */ | ||||
|     SpeechIndexFactory parseLegislativePeriods(Boolean parseLegislativePeriods); | ||||
| 
 | ||||
|     /** | ||||
|      * Creates a new SpeechIndexFactory instance. | ||||
|      * @return a new SpeechIndexFactory instance | ||||
|      * @throws ParserConfigurationException if the parser configuration is invalid | ||||
|      */ | ||||
|     SpeechIndexFactory builder() throws ParserConfigurationException; | ||||
| 
 | ||||
|     /** | ||||
|      * Parses all sessions, speeches and agenda items. | ||||
|      * @return the SpeechIndexFactory instance | ||||
|      */ | ||||
|     SpeechIndexFactory parseSessions(); | ||||
| 
 | ||||
|     /** | ||||
|      * Parses all members of parliament. | ||||
|      * @return the SpeechIndexFactory instance | ||||
|      * @throws IOException if an I/O error occurs | ||||
|      * @throws SAXException if a SAX error occurs | ||||
|      */ | ||||
|     SpeechIndexFactory parseMembers() throws IOException, SAXException; | ||||
| 
 | ||||
|     /** | ||||
|      * Parses all fractions. | ||||
|      * @return the SpeechIndexFactory instance | ||||
|      * @throws IOException if an I/O error occurs | ||||
|      * @throws SAXException if a SAX error occurs | ||||
|      */ | ||||
|     SpeechIndexFactory parseFractions() throws IOException, SAXException; | ||||
| 
 | ||||
|     /** | ||||
|      * Builds a new SpeechIndex instance. | ||||
|      * @return a new SpeechIndex instance | ||||
|      * @throws IOException if an I/O error occurs | ||||
|      * @throws SAXException if a SAX error occurs | ||||
|      */ | ||||
|     SpeechIndex build() throws IOException, SAXException; | ||||
| 
 | ||||
|     /** | ||||
|      * Filters the members for current members. | ||||
|      * @param filterForCurrentMembers that indicates whether the members should be filtered for current members | ||||
|      * @return the SpeechIndexFactory instance | ||||
|      */ | ||||
|     SpeechIndexFactory filterForCurrentMembers(boolean filterForCurrentMembers); | ||||
| } | ||||
|  | @ -0,0 +1,63 @@ | |||
| package org.texttechnologylab.project.gruppe_05_1.database; | ||||
| 
 | ||||
| 
 | ||||
| import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.*; | ||||
| import org.texttechnologylab.project.gruppe_05_1.xml.speeches.SpeechParser; | ||||
| import org.xml.sax.SAXException; | ||||
| 
 | ||||
| import javax.xml.parsers.ParserConfigurationException; | ||||
| import java.io.IOException; | ||||
| import java.util.List; | ||||
| import java.util.stream.Collectors; | ||||
| 
 | ||||
| public class SpeechIndexFactoryImpl implements SpeechIndexFactory { | ||||
|     private SpeechParser speechParser; | ||||
|     private List<Session> sessions; | ||||
|     private List<AgendaItem> agendaItems; | ||||
|     private List<Speech> speeches; | ||||
|     private List<MemberOfParliament> members; | ||||
|     private List<Fraction> fractions; | ||||
|     private Boolean parseLegislativePeriods = true; | ||||
| 
 | ||||
| 
 | ||||
|     @Override | ||||
|     public SpeechIndexFactoryImpl parseLegislativePeriods(Boolean parseLegislativePeriods) { | ||||
|         this.parseLegislativePeriods = parseLegislativePeriods; | ||||
|         try {this.speechParser.setParseLegislativePeriods(parseLegislativePeriods);} catch (NullPointerException ignored) {} | ||||
|         return this; | ||||
|     } | ||||
| 
 | ||||
|     public SpeechIndexFactoryImpl builder() throws ParserConfigurationException { | ||||
|        this.speechParser = new SpeechParser(); | ||||
|        return this; | ||||
|     } | ||||
| 
 | ||||
|     public SpeechIndexFactoryImpl parseSessions() { | ||||
|         this.sessions = this.speechParser.parseAllSessions(); | ||||
|         this.speeches = this.speechParser.getSpeeches(); | ||||
|         this.agendaItems = this.speechParser.getAgendaItems(); | ||||
|         return this; | ||||
|     } | ||||
| 
 | ||||
|     @Override | ||||
|     public SpeechIndexFactory parseMembers() throws IOException, SAXException { | ||||
|         return null; | ||||
|     } | ||||
| 
 | ||||
|     @Override | ||||
|     public SpeechIndexFactory parseFractions() throws IOException, SAXException { | ||||
|         return null; | ||||
|     } | ||||
| 
 | ||||
|     @Override | ||||
|     public SpeechIndex build() throws IOException, SAXException { | ||||
|         return null; | ||||
|     } | ||||
| 
 | ||||
| 
 | ||||
|     public SpeechIndexFactoryImpl filterForCurrentMembers(boolean filterForCurrentMembers) { | ||||
|         if (!filterForCurrentMembers) return this; | ||||
|         this.members = this.members.stream().filter(MemberOfParliament::isCurrentMember).collect(Collectors.toList()); | ||||
|         return this; | ||||
|     } | ||||
| } | ||||
|  | @ -0,0 +1,15 @@ | |||
| package org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches; | ||||
| 
 | ||||
| import org.bson.Document; | ||||
| import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.AgendaItem_File_Impl; | ||||
| import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.AgendaItem; | ||||
| 
 | ||||
| 
 | ||||
| public class AgendaItem_MongoDB_Impl extends AgendaItem_File_Impl implements AgendaItem { | ||||
|     public AgendaItem_MongoDB_Impl(Document mongoDocument) { | ||||
|         super( | ||||
|                 mongoDocument.getInteger("id"), | ||||
|                 mongoDocument.getInteger("sessionId"), | ||||
|                 mongoDocument.getString("title")); | ||||
|     } | ||||
| } | ||||
|  | @ -0,0 +1,17 @@ | |||
| package org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches; | ||||
| 
 | ||||
| import org.bson.Document; | ||||
| import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.Comment_File_Impl; | ||||
| import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Comment; | ||||
| 
 | ||||
| 
 | ||||
| public class Comment_MongoDB_Impl extends Comment_File_Impl implements Comment { | ||||
| 
 | ||||
|     public Comment_MongoDB_Impl(Document mongoDocument) { | ||||
|         super( | ||||
|                 mongoDocument.getInteger("contentId"), | ||||
|                 mongoDocument.getInteger("speechId"), | ||||
|                 mongoDocument.getString("commentatorName"), | ||||
|                 mongoDocument.getString("comment")); | ||||
|     } | ||||
| } | ||||
|  | @ -0,0 +1,15 @@ | |||
| package org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches; | ||||
| 
 | ||||
| import org.bson.Document; | ||||
| import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.Line_File_Impl; | ||||
| import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Line; | ||||
| 
 | ||||
| 
 | ||||
| public class Line_MongoDB_Impl extends Line_File_Impl implements Line { | ||||
|     public Line_MongoDB_Impl(Document mongoDocument) { | ||||
|         super( | ||||
|                 mongoDocument.getInteger("contentId"), | ||||
|                 mongoDocument.getInteger("speechId"), | ||||
|                 mongoDocument.getString("content")); | ||||
|     } | ||||
| } | ||||
|  | @ -0,0 +1,25 @@ | |||
| package org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches; | ||||
| 
 | ||||
| import org.bson.Document; | ||||
| import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.MemberOfParliament_File_Impl; | ||||
| import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.MemberOfParliament; | ||||
| 
 | ||||
| 
 | ||||
| public class MemberOfParliament_MongoDB_Impl extends MemberOfParliament_File_Impl implements MemberOfParliament { | ||||
|     public MemberOfParliament_MongoDB_Impl(Document mongoDocument) {super( | ||||
|                 mongoDocument.getString("name"), | ||||
|                 mongoDocument.getString("firstName"), | ||||
|                 mongoDocument.getString("title"), | ||||
|                 mongoDocument.getString("dateOfBirth"), | ||||
|                 mongoDocument.getString("dateOfDeath"), | ||||
|                 mongoDocument.getString("placeOfBirth"), | ||||
|                 mongoDocument.getString("gender"), | ||||
|                 mongoDocument.getString("religion"), | ||||
|                 mongoDocument.getInteger("id"), | ||||
|                 mongoDocument.getString("party"), | ||||
|                 null, | ||||
|                 mongoDocument.getInteger("firstLegislativePeriod"), | ||||
|                 mongoDocument.getInteger("lastLegislativePeriod"), | ||||
|                 mongoDocument.getString("image_data")); | ||||
|     } | ||||
| } | ||||
|  | @ -0,0 +1,17 @@ | |||
| package org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches; | ||||
| 
 | ||||
| import org.bson.Document; | ||||
| import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.Session_File_Impl; | ||||
| import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Session; | ||||
| 
 | ||||
| 
 | ||||
| public class Session_MongoDB_Impl extends Session_File_Impl implements Session { | ||||
| 
 | ||||
|     public Session_MongoDB_Impl(Document mongoDocument) { | ||||
|         super( | ||||
|                 mongoDocument.getString("legislativePeriod"), | ||||
|                 mongoDocument.getInteger("sessionId"), | ||||
|                 mongoDocument.getString("dateTime"), | ||||
|                 mongoDocument.getString("endTime")); | ||||
|     } | ||||
| } | ||||
|  | @ -0,0 +1,16 @@ | |||
| package org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches; | ||||
| 
 | ||||
| import org.bson.Document; | ||||
| import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.Speaker_File_Impl; | ||||
| import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Speaker; | ||||
| 
 | ||||
| public class Speaker_MongoDB_Impl extends Speaker_File_Impl implements Speaker { | ||||
|     public Speaker_MongoDB_Impl(Document mongoDocument) { | ||||
|         super( | ||||
|                 mongoDocument.getInteger("contentId"), | ||||
|                 mongoDocument.getInteger("speechId"), | ||||
|                 mongoDocument.getInteger("speakerId"), | ||||
|                 mongoDocument.getString("speakerName"), | ||||
|                 mongoDocument.getString("fraction")); | ||||
|     } | ||||
| } | ||||
|  | @ -0,0 +1,36 @@ | |||
| package org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches; | ||||
| 
 | ||||
| import org.bson.Document; | ||||
| import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.Speech_File_Impl; | ||||
| import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Speech; | ||||
| 
 | ||||
| 
 | ||||
| import java.util.List; | ||||
| 
 | ||||
| public class Speech_MongoDB_Impl  extends Speech_File_Impl implements Speech { | ||||
|     public Speech_MongoDB_Impl(Document mongoDocument) { | ||||
|         super( | ||||
|                 mongoDocument.getInteger("sessionId"), | ||||
|                 mongoDocument.getInteger("agendaItemId"), | ||||
|                 mongoDocument.getInteger("speechId"), | ||||
|                 mongoDocument.getInteger("speakerId"), | ||||
|                 mongoDocument.getString("speakerName"), | ||||
|                 mongoDocument.getString("fraction")); | ||||
| 
 | ||||
|         for (Document content : (List<Document>) mongoDocument.get("speechContents")) { | ||||
|             switch (content.getString("type")) { | ||||
|                 case "line": | ||||
|                     this.addContent(new Line_MongoDB_Impl(content)); | ||||
|                     break; | ||||
|                 case "comment": | ||||
|                     this.addContent(new Comment_MongoDB_Impl(content)); | ||||
|                     break; | ||||
|                 case "speaker": | ||||
|                     this.addContent(new Speaker_MongoDB_Impl(content)); | ||||
|                     break; | ||||
|                 default: | ||||
|                     throw new IllegalArgumentException("Unknown content type: " + content.getString("type")); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | @ -0,0 +1,27 @@ | |||
| package org.texttechnologylab.project.gruppe_05_1.util; | ||||
| 
 | ||||
| public class Logger { | ||||
|     private static final boolean DEBUG_LOGGING = false; | ||||
|     // info, warn, error with message and colors and datetime | ||||
|     public static void info(String message) { | ||||
|         System.out.println("\u001B[32m" + java.time.LocalTime.now() + " INFO: " + message + "\u001B[0m"); | ||||
|     } | ||||
| 
 | ||||
|     public static void warn(String message) { | ||||
|         System.out.println("\u001B[33m" + java.time.LocalTime.now() + " WARN: " + message + "\u001B[0m"); | ||||
|     } | ||||
| 
 | ||||
|     public static void error(String message) { | ||||
|         System.out.println("\u001B[31m" + java.time.LocalTime.now() + " ERROR: " + message + "\u001B[0m"); | ||||
|     } | ||||
| 
 | ||||
|     public static void debug(String message) { | ||||
|         if (DEBUG_LOGGING) { | ||||
|             System.out.println("\u001B[38;5;214m" + java.time.LocalTime.now() + " DEBUG: " + message + "\u001B[0m"); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     public static void pink(String message) { | ||||
|         System.out.println("\u001B[35m" + java.time.LocalTime.now() + " PINK: " + message + "\u001B[0m"); | ||||
|     } | ||||
| } | ||||
|  | @ -17,7 +17,6 @@ import org.texttechnologylab.project.gruppe_05_1.domain.speaker.Membership; | |||
| import org.texttechnologylab.project.gruppe_05_1.domain.speaker.Speaker; | ||||
| import org.texttechnologylab.project.gruppe_05_1.nlp.NlpUtils; | ||||
| import org.texttechnologylab.project.gruppe_05_1.xml.FileObjectFactory; | ||||
| import org.w3c.dom.Document; | ||||
| import org.w3c.dom.Element; | ||||
| import org.w3c.dom.Node; | ||||
| import org.xml.sax.InputSource; | ||||
|  | @ -361,7 +360,7 @@ public abstract class PPRUtils { | |||
|         while (hasMore) { | ||||
|             String queryUrl = "https://www.bundestag.de/ajax/filterlist/de/services/opendata/866354-866354?limit=" | ||||
|                     + limit + "&noFilterSet=true&offset=" + offset; | ||||
|             System.out.println("Lade: " + queryUrl); | ||||
|             //System.out.println("Lade: " + queryUrl); | ||||
|             try { | ||||
|                 Document htmlDoc = Jsoup.connect(queryUrl).get(); | ||||
|                 Elements xmlLinks = htmlDoc.select("a.bt-link-dokument"); | ||||
|  | @ -372,10 +371,9 @@ public abstract class PPRUtils { | |||
| 
 | ||||
|                 for (org.jsoup.nodes.Element link : xmlLinks) { | ||||
|                     String xmlUrl = link.attr("href"); | ||||
|                     System.out.println("Verarbeite XML: " + xmlUrl); | ||||
|                     //System.out.println("Verarbeite XML: " + xmlUrl); | ||||
|                     try { | ||||
|                         org.w3c.dom.Document xmlDoc = downloadAndParseXML(xmlUrl); | ||||
| 
 | ||||
|                         String uniqueId = xmlDoc.getDocumentElement().getAttribute("sitzung-nr"); | ||||
|                         if (processedProtocols.contains(uniqueId)) { | ||||
|                             System.out.println("Protokoll bereits verarbeitet: " + uniqueId); | ||||
|  | @ -383,7 +381,6 @@ public abstract class PPRUtils { | |||
|                         } | ||||
|                         processedProtocols.add(uniqueId); | ||||
|                         xmlProtocols.add(xmlDoc); | ||||
|                         //TODO verarbeitung | ||||
|                     } catch (Exception e) { | ||||
|                         System.err.println("Fehler beim Verarbeiten der XML-Datei: " + xmlUrl); | ||||
|                         e.printStackTrace(); | ||||
|  |  | |||
|  | @ -37,8 +37,8 @@ public class SpeechParser { | |||
|         List<Session> sessions = new ArrayList<>(); | ||||
|         this.speeches = new ArrayList<>(); | ||||
|         this.agendaItems = new ArrayList<>(); | ||||
|         //TODO Logik so machen dass aus array von xmls gelesen wird nicht aus pfad | ||||
|         Set<Document> xmlDocuments = PPRUtils.processXML(); | ||||
|         System.out.println("All sessions parsed"); | ||||
|         for (org.w3c.dom.Document xmlDoc:xmlDocuments) { | ||||
|             try { | ||||
|                 File tempFile = convertDocumentToFile(xmlDoc); | ||||
|  | @ -65,7 +65,6 @@ public class SpeechParser { | |||
|         Element root = document.getDocumentElement(); | ||||
|         String legislativePeriod = root.getAttribute("wahlperiode"); | ||||
|         int sessionId = Integer.parseInt(root.getAttribute("sitzung-nr")); | ||||
|         System.out.println("Session " + sessionId + " wurde gespeichert"); | ||||
|         String sessionDate = root.getAttribute("sitzung-datum"); | ||||
|         Element startTimeElement = (Element) root.getElementsByTagName("sitzungsbeginn").item(0); | ||||
|         String startTimeString = startTimeElement != null ? startTimeElement.getAttribute("sitzung-start-uhrzeit") : null; | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue