implemented import of speeches to database
This commit is contained in:
parent
ae5c3f17eb
commit
631560a94e
7 changed files with 35 additions and 139 deletions
|
@ -2,10 +2,7 @@ package org.texttechnologylab.project.gruppe_05_1;
|
|||
|
||||
|
||||
import com.mongodb.client.MongoDatabase;
|
||||
import org.texttechnologylab.project.gruppe_05_1.database.MongoDBHandler;
|
||||
import org.texttechnologylab.project.gruppe_05_1.database.MongoObjectFactory;
|
||||
import org.texttechnologylab.project.gruppe_05_1.database.SpeechIndex;
|
||||
import org.texttechnologylab.project.gruppe_05_1.database.SpeechIndexFactoryImpl;
|
||||
import org.texttechnologylab.project.gruppe_05_1.database.*;
|
||||
import org.texttechnologylab.project.gruppe_05_1.domain.mdb.Mdb;
|
||||
import org.texttechnologylab.project.gruppe_05_1.domain.mdb.MdbDocument;
|
||||
import org.texttechnologylab.project.gruppe_05_1.rest.RESTHandler;
|
||||
|
@ -23,6 +20,7 @@ import java.io.IOException;
|
|||
import java.util.List;
|
||||
import java.util.Properties;
|
||||
|
||||
import static java.lang.Boolean.FALSE;
|
||||
import static java.lang.Boolean.TRUE;
|
||||
|
||||
/*
|
||||
|
@ -60,38 +58,38 @@ public class Main {
|
|||
|
||||
Logger.pink("Parsing XML and inserting data into DB (Uebung 2)...");
|
||||
SpeechIndexFactoryImpl speechIndexFactory = new SpeechIndexFactoryImpl();
|
||||
if (MongoPprUtils.getSpeechCollection().countDocuments() != 0) {
|
||||
System.out.println("Speeches werden nicht gelesen, da sie bereits in der Datenbank stehen");
|
||||
}
|
||||
else {
|
||||
SpeechIndex speechIndex = speechIndexFactory
|
||||
.parseLegislativePeriods(TRUE)
|
||||
.builder()
|
||||
.parseSessions()
|
||||
.filterForCurrentMembers(FALSE)
|
||||
.build();
|
||||
|
||||
SpeechIndex speechIndex = speechIndexFactory
|
||||
.builder()
|
||||
.parseLegislativePeriods(TRUE)
|
||||
.parseSessions()
|
||||
.parseMembers()
|
||||
.parseFractions()
|
||||
.filterForCurrentMembers(TRUE)
|
||||
.build();
|
||||
//speechIndex.printInfo();
|
||||
|
||||
//speechIndex.printInfo();
|
||||
System.out.println("Data retrieved from DB:");
|
||||
System.out.println("SPEECHCOUNT: " + speechIndex.getSpeeches().size());
|
||||
System.out.println("SESSIONCOUNT: " + speechIndex.getSessions().size());
|
||||
System.out.println("AGENDAITEMCOUNT: " + speechIndex.getAgendaItems().size());
|
||||
|
||||
System.out.println("Data retrieved from DB:");
|
||||
System.out.println("MEMBERCOUNT: " + speechIndex.getMembers().size());
|
||||
System.out.println("FRACTIONCOUNT: " + speechIndex.getFractions().size());
|
||||
System.out.println("SPEECHCOUNT: " + speechIndex.getSpeeches().size());
|
||||
System.out.println("SESSIONCOUNT: " + speechIndex.getSessions().size());
|
||||
System.out.println("AGENDAITEMCOUNT: " + speechIndex.getAgendaItems().size());
|
||||
MongoDBHandler mongoDBHandler = new MongoDBHandler();
|
||||
mongoDBHandler.deleteAllDocuments(); // Clear the DB
|
||||
|
||||
MongoDBHandler mongoDBHandler = new MongoDBHandler();
|
||||
mongoDBHandler.deleteAllDocuments(); // Clear the DB
|
||||
Logger.pink("Adding Sessions to DB...");
|
||||
mongoDBHandler.insertSessions(speechIndex.getSessions());
|
||||
|
||||
Logger.pink("Adding Sessions to DB...");
|
||||
mongoDBHandler.insertSessions(speechIndex.getSessions());
|
||||
Logger.pink("Adding Agenda Items to DB...");
|
||||
mongoDBHandler.insertAgendaItems(speechIndex.getAgendaItems());
|
||||
|
||||
Logger.pink("Adding Agenda Items to DB...");
|
||||
mongoDBHandler.insertAgendaItems(speechIndex.getAgendaItems());
|
||||
Logger.pink("Adding Speeches to DB...");
|
||||
mongoDBHandler.insertSpeeches(speechIndex.getSpeeches());
|
||||
|
||||
Logger.pink("Adding Speeches to DB...");
|
||||
mongoDBHandler.insertSpeeches(speechIndex.getSpeeches());
|
||||
|
||||
mongoDBHandler.close(); // Close the connection to the DB
|
||||
mongoDBHandler.close(); // Close the connection to the DB
|
||||
}
|
||||
|
||||
|
||||
//TEST
|
||||
|
|
|
@ -52,8 +52,7 @@ public class MongoDBHandler {
|
|||
private static String databaseName;
|
||||
|
||||
private MongoCollection<Document> speechesCollection;
|
||||
private MongoCollection<Document> membersCollection;
|
||||
private MongoCollection<Document> fractionsCollection;
|
||||
|
||||
private MongoCollection<Document> sessionsCollection;
|
||||
private MongoCollection<Document> agendaItemsCollection;
|
||||
private MongoCollection<Document> historyCollection;
|
||||
|
@ -90,15 +89,11 @@ public class MongoDBHandler {
|
|||
mongoClient = MongoClients.create(settings);
|
||||
database = mongoClient.getDatabase(databaseName);
|
||||
speechesCollection = database.getCollection("speech");
|
||||
membersCollection = database.getCollection("members");
|
||||
fractionsCollection = database.getCollection("fractions");
|
||||
sessionsCollection = database.getCollection("sessions");
|
||||
agendaItemsCollection = database.getCollection("agendaItems");
|
||||
historyCollection = database.getCollection("history");
|
||||
Logger.info("Connected to MongoDB database: " + databaseName);
|
||||
|
||||
// hopeless attempt of creating the fulltext search index :(
|
||||
membersCollection.createIndex(new Document("collection", 1));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -582,49 +577,6 @@ public class MongoDBHandler {
|
|||
speechesCollection.insertMany(speechDocuments);
|
||||
}
|
||||
|
||||
public Speech insertSpeech(int speakerId, int sessionId, int agendaItemId) throws MemberNotFoundException, SessionNotFoundException, AgendaItemNotFoundException, ServerErrorException {
|
||||
// fetch member by speakerId
|
||||
MemberOfParliament member;
|
||||
try {
|
||||
List<MemberOfParliament> members = retrieveAllMembersOfParliament(Filters.eq("id", speakerId));
|
||||
if (members.isEmpty()) {
|
||||
Logger.error("No member found with id " + speakerId);
|
||||
throw new MemberNotFoundException();
|
||||
} else if (members.size() > 1) {
|
||||
Logger.warn("Multiple members found with id " + speakerId);
|
||||
throw new ServerErrorException();
|
||||
}
|
||||
member = members.get(0);
|
||||
} catch (IOException e) {
|
||||
Logger.error("Failed to retrieve member with id " + speakerId);
|
||||
throw new ServerErrorException();
|
||||
}
|
||||
// check if session and agendaItem exist
|
||||
List<Session> sessions = retrieveAllSessions(Filters.eq("sessionId", sessionId));
|
||||
if (sessions.isEmpty()) {
|
||||
Logger.error("No session found with id " + sessionId);
|
||||
throw new SessionNotFoundException();
|
||||
}
|
||||
List<AgendaItem> agendaItems = retrieveAllAgendaItems(Filters.eq("id", agendaItemId));
|
||||
if (agendaItems.isEmpty()) {
|
||||
Logger.error("No agendaItem found with id " + agendaItemId);
|
||||
throw new AgendaItemNotFoundException();
|
||||
}
|
||||
// get a new random speechId that is not already in use
|
||||
int speechId = 0;
|
||||
while (!retrieveAllSpeeches(Filters.eq("speechId", speechId)).isEmpty()) {
|
||||
// generate random int
|
||||
speechId = (int) (Math.random() * Integer.MAX_VALUE);
|
||||
}
|
||||
|
||||
// create speech
|
||||
Speech speech = new Speech_File_Impl(sessionId, agendaItemId, speakerId, speechId, member.getName(), member.getParty());
|
||||
|
||||
// insert speech into DB
|
||||
insertSpeeches(List.of(speech));
|
||||
|
||||
return speech;
|
||||
}
|
||||
|
||||
public List<Speech> retrieveAllSpeeches() {
|
||||
List<Document> speeches = speechesCollection.find().into(new ArrayList<>());
|
||||
|
@ -686,34 +638,6 @@ public class MongoDBHandler {
|
|||
return result;
|
||||
}
|
||||
|
||||
public List<MemberOfParliament> retrieveAllMembersOfParliament() {
|
||||
List<Document> members = membersCollection.find().into(new ArrayList<>());
|
||||
List<MemberOfParliament> result = new ArrayList<>();
|
||||
for (Document member : members) {
|
||||
result.add(new MemberOfParliament_MongoDB_Impl(member));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public List<MemberOfParliament> retrieveAllMembersOfParliament(Bson filter) throws IOException {
|
||||
List<Document> speeches = membersCollection.find(filter).into(new ArrayList<>());
|
||||
List<MemberOfParliament> result = new ArrayList<>();
|
||||
for (Document speech : speeches) {
|
||||
result.add(new MemberOfParliament_MongoDB_Impl(speech));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public List<MemberOfParliament> retrieveAllMembersOfParliament(Bson filter, Bson projection) throws IOException {
|
||||
List<Document> speeches = membersCollection.find(filter).projection(projection).into(new ArrayList<>());
|
||||
List<MemberOfParliament> result = new ArrayList<>();
|
||||
for (Document speech : speeches) {
|
||||
result.add(new MemberOfParliament_MongoDB_Impl(speech));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public void deleteAllDocuments() {
|
||||
speechesCollection.deleteMany(new Document());
|
||||
|
|
|
@ -10,14 +10,10 @@ public class SpeechIndex {
|
|||
private final List<Session> sessions;
|
||||
private final List<Speech> speeches;
|
||||
private final List<AgendaItem> agendaItems;
|
||||
private final List<MemberOfParliament> members;
|
||||
private final List<Fraction> fractions;
|
||||
|
||||
public SpeechIndex(List<Session> sessions, List<Speech> speeches, List<AgendaItem> agendaItems, List<MemberOfParliament> members, List<Fraction> fractions) {
|
||||
public SpeechIndex(List<Session> sessions, List<Speech> speeches, List<AgendaItem> agendaItems) {
|
||||
this.sessions = sessions;
|
||||
this.speeches = speeches;
|
||||
this.agendaItems = agendaItems;
|
||||
this.members = members;
|
||||
this.fractions = fractions;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,21 +27,6 @@ public interface SpeechIndexFactory {
|
|||
*/
|
||||
SpeechIndexFactory parseSessions();
|
||||
|
||||
/**
|
||||
* Parses all members of parliament.
|
||||
* @return the SpeechIndexFactory instance
|
||||
* @throws IOException if an I/O error occurs
|
||||
* @throws SAXException if a SAX error occurs
|
||||
*/
|
||||
SpeechIndexFactory parseMembers() throws IOException, SAXException;
|
||||
|
||||
/**
|
||||
* Parses all fractions.
|
||||
* @return the SpeechIndexFactory instance
|
||||
* @throws IOException if an I/O error occurs
|
||||
* @throws SAXException if a SAX error occurs
|
||||
*/
|
||||
SpeechIndexFactory parseFractions() throws IOException, SAXException;
|
||||
|
||||
/**
|
||||
* Builds a new SpeechIndex instance.
|
||||
|
|
|
@ -39,19 +39,10 @@ public class SpeechIndexFactoryImpl implements SpeechIndexFactory {
|
|||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SpeechIndexFactory parseMembers() throws IOException, SAXException {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SpeechIndexFactory parseFractions() throws IOException, SAXException {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SpeechIndex build() throws IOException, SAXException {
|
||||
return null;
|
||||
return new SpeechIndex(sessions, speeches, agendaItems);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -356,7 +356,6 @@ public abstract class PPRUtils {
|
|||
int offset = 0;
|
||||
int limit = 10;
|
||||
boolean hasMore = true;
|
||||
|
||||
while (hasMore) {
|
||||
String queryUrl = "https://www.bundestag.de/ajax/filterlist/de/services/opendata/866354-866354?limit="
|
||||
+ limit + "&noFilterSet=true&offset=" + offset;
|
||||
|
|
|
@ -2,6 +2,7 @@ package org.texttechnologylab.project.gruppe_05_1.xml.speeches;
|
|||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
import org.texttechnologylab.project.gruppe_05_1.database.MongoPprUtils;
|
||||
import org.texttechnologylab.project.gruppe_05_1.util.PPRUtils;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.*;
|
||||
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.AgendaItem;
|
||||
|
@ -34,12 +35,13 @@ public class SpeechParser {
|
|||
private Boolean parseLegislativePeriods;
|
||||
|
||||
public List<Session> parseAllSessions() {
|
||||
List<Session> sessionsEmpty = new ArrayList<>();
|
||||
List<Session> sessions = new ArrayList<>();
|
||||
this.speeches = new ArrayList<>();
|
||||
this.agendaItems = new ArrayList<>();
|
||||
Set<Document> xmlDocuments = PPRUtils.processXML();
|
||||
System.out.println("All sessions parsed");
|
||||
for (org.w3c.dom.Document xmlDoc:xmlDocuments) {
|
||||
for (org.w3c.dom.Document xmlDoc : xmlDocuments) {
|
||||
try {
|
||||
File tempFile = convertDocumentToFile(xmlDoc);
|
||||
Session session = parseSessionFile(tempFile);
|
||||
|
@ -50,8 +52,8 @@ public class SpeechParser {
|
|||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
return sessions;
|
||||
|
||||
}
|
||||
|
||||
private Session parseSessionFile(File file) throws Exception {
|
||||
|
@ -168,6 +170,7 @@ public class SpeechParser {
|
|||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Konvertiert ein org.w3c.dom.Document in eine temporäre Datei.
|
||||
*/
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue