Compare commits

...
Sign in to create a new pull request.

178 commits

Author SHA1 Message Date
s5260822
67e117b647 fixed bad merge 2025-03-24 01:01:08 +01:00
Jonas Werner
4db18688b5 Merge branch 'main' of https://ppr.gitlab.texttechnologylab.org/s1188354/multimodal_parliament_explorer_05_1 2025-03-24 00:51:55 +01:00
Leon Kastner
a312b7bef0 Benutzerhandbuch 2025-03-23 23:40:19 +00:00
s5260822
88cef04f49 fixed about page missing 2025-03-24 00:34:47 +01:00
s5260822
5cf33608cf final touches 2025-03-24 00:28:39 +01:00
s5260822
bfb8fb65e8 final Class Diagram 2025-03-24 00:22:09 +01:00
Leon Kastner
e8272742db Mockup Uploaded 2025-03-23 22:56:00 +00:00
Picman2000
7b68a4988b updated diagrams 2025-03-23 23:53:21 +01:00
Picman2000
ac79a35bfb Merge remote-tracking branch 'origin/main' 2025-03-23 23:49:49 +01:00
Picman2000
8bd75e1f73 updated readme 2025-03-23 23:49:34 +01:00
vysitor
2a1127d68c Sped up loading of individual speech list 2025-03-23 23:37:58 +01:00
Picman2000
e7eb638e29 updated gatt chart 2025-03-23 23:30:50 +01:00
Picman2000
d0f9fa5473 Merge remote-tracking branch 'origin/main' 2025-03-23 23:17:38 +01:00
Picman2000
85ba4a4424 documentation updated 2025-03-23 23:17:29 +01:00
Artorias
520e5b0348 Merge remote-tracking branch 'origin/main' 2025-03-23 23:05:31 +01:00
Artorias
e4a684cfa8 Updated gantt chart 2025-03-23 23:04:48 +01:00
s5260822
f5cbbb8197 Merge branch 'main' of https://ppr.gitlab.texttechnologylab.org/s1188354/multimodal_parliament_explorer_05_1 2025-03-23 22:46:09 +01:00
s5260822
5650f1c63b added JavaDoc to my classes and methods 2025-03-23 22:46:04 +01:00
s5260822
8b4ff83cc6 deleted old class 2025-03-23 22:45:46 +01:00
vysitor
2d7f62d192 Added Gantt-Chart 2025-03-23 22:34:37 +01:00
Artorias
cdfa0a37f2 Updated radar chart 2025-03-23 22:01:54 +01:00
s5260822
1c36e16006 added direct photos fetching backend 2025-03-23 21:47:09 +01:00
vysitor
90a5bfead2 finished auther javadoc attribution 2025-03-23 21:44:33 +01:00
Artorias
039aad9937 Merge remote-tracking branch 'origin/main' 2025-03-23 21:40:14 +01:00
Artorias
a891e03e32 Updated radar chart and documentation 2025-03-23 21:39:26 +01:00
vysitor
1cfdcf7b52 continued auther adding in javadoc 2025-03-23 21:34:56 +01:00
Artorias
6c407db15c Updated radar chart and documentation 2025-03-23 21:32:21 +01:00
vysitor
a83bf93806 Finished Javadoc Auther-entries for rest, work in progress 2025-03-23 21:32:09 +01:00
Artorias
932c793f94 Merge remote-tracking branch 'origin/main' 2025-03-23 21:31:50 +01:00
vysitor
cd40f5a16a attempt to merge Leon change 2025-03-23 21:24:31 +01:00
Artorias
a9e251d5b7 Updated radar chart 2025-03-23 21:22:40 +01:00
vysitor
19914883b4 Finished Javadoc Auther-entries for domain, work in progress 2025-03-23 21:22:38 +01:00
vysitor
b831c34f52 Finished Javadoc Auther-entries for nlp, work in progress 2025-03-23 21:20:06 +01:00
vysitor
f70165a924 Finished Javadoc Auther-entries for database, work in progress 2025-03-23 21:13:29 +01:00
vysitor
fba66751e5 Start of Javadoc Auther-entries, work in progress 2025-03-23 21:05:00 +01:00
Picman2000
51485c3379 Merge remote-tracking branch 'origin/main' 2025-03-23 21:04:20 +01:00
Picman2000
b72ac9e213 Readme updated 2025-03-23 21:04:09 +01:00
Artorias
3f69161ebf Updated documentation 2025-03-23 20:42:41 +01:00
vysitor
c6523bf96e Added Filter to Speeches search function 2025-03-23 20:29:27 +01:00
Picman2000
f746443415 Merge remote-tracking branch 'origin/main' 2025-03-23 20:28:25 +01:00
Picman2000
6bec903301 *documentation* 2025-03-23 20:28:13 +01:00
s5260822
89ca422661 updated readme with export doc 2025-03-23 19:50:10 +01:00
s5260822
e47186f685 updated puml and added different versions of it to /doc 2025-03-23 19:49:49 +01:00
Picman2000
08c8205088 Merge remote-tracking branch 'origin/main' 2025-03-23 19:28:26 +01:00
Picman2000
98ca3f0fef Videos werden zu sitzung 187 Punkt 4 reden geladen (die ersten 9 reden, weil eine danach seeeeeeehr lang ist und das programm unnötig lang laufen lässt) 2025-03-23 19:27:50 +01:00
Artorias
995d811a93 Updated documentation 2025-03-23 18:55:54 +01:00
vysitor
62a35e30c7 small enhancement 2025-03-23 17:50:33 +01:00
Jonas Werner
846e130418 added support for speech export on /export page 2025-03-23 17:46:38 +01:00
vysitor
9796b1366c Added index for creation of speechcollection 2025-03-23 17:42:49 +01:00
Artorias
02c8de4459 Merge remote-tracking branch 'origin/main' 2025-03-23 17:33:00 +01:00
Artorias
36e2453a7e Implemented rudimentary Chart overview for all Speeches. 2025-03-23 17:31:03 +01:00
Jonas Werner
e26bb412aa added export and about page 2025-03-23 17:23:52 +01:00
vysitor
756603e8bf Added READMe 2025-03-23 17:20:20 +01:00
Jonas Werner
d2e9cf4dba export now opens in new tab 2025-03-23 17:17:06 +01:00
Jonas Werner
796b36a55e fixed speech key for XML exports 2025-03-23 17:13:43 +01:00
Jonas Werner
5472b4a75f added speech and speaker-speech export buttons 2025-03-23 17:11:34 +01:00
Jonas Werner
74c6666efa fixed formatting for latex graphs on export 2025-03-23 17:00:44 +01:00
Jonas Werner
547479c3e4 fixed nlp data not being exported with xml 2025-03-23 17:00:13 +01:00
Jonas Werner
53bade28e9 fixed Hashtag causing tex gen issues 2025-03-23 15:22:52 +01:00
Jonas Werner
11059d08f0 moved duplicate code to method 2025-03-23 14:57:54 +01:00
Jonas Werner
6ef665c38f fixed method name 2025-03-23 14:52:22 +01:00
Jonas Werner
ae126f0df3 Merge branch 'speech_export_feature' 2025-03-23 14:49:30 +01:00
Jonas Werner
a84b6f7722 removed radar chart 2025-03-23 14:43:30 +01:00
vysitor
91ca1ad007 Hotfixed Parlamentarier Image now shown again 2025-03-23 14:32:00 +01:00
Jonas Werner
f0d1fffec4 added bar chart 2025-03-23 14:28:44 +01:00
Jonas Werner
9332ceab03 fixed party not showing in section of speech 2025-03-23 14:28:26 +01:00
Jonas Werner
e43dea4e36 added NLP Data to XML export 2025-03-23 13:53:21 +01:00
vysitor
92c2d21b8f Sped up All Speeches Call 2025-03-23 13:43:19 +01:00
s5260822
b82597dfeb pushing some non-functional code 2025-03-23 02:41:33 +01:00
s5260822
c866162d31 added vscode config dir 2025-03-23 01:06:35 +01:00
s5260822
0af10f2f19 Merge branch 'speech_export_feature' 2025-03-23 01:05:43 +01:00
s5260822
ab37c00ea9 added XML exports 2025-03-23 00:57:29 +01:00
vysitor
a071ad253f Sped up loading all speeches, still slow 2025-03-23 00:32:01 +01:00
s5260822
85ea3dd5f5 moved tex export request to /pdf sub path 2025-03-22 23:01:39 +01:00
s5260822
76951476ef added warning in console if TeX sdk not installed 2025-03-22 22:55:28 +01:00
s5260822
edb91b7b72 added export for list of speeches 2025-03-22 22:48:14 +01:00
s5260822
dabac9e316 added export based on Topic 2025-03-22 22:33:27 +01:00
s5260822
320f4dbdc2 added request for all speeches exort and improved fetching speeds 2025-03-22 22:20:54 +01:00
s5260822
22ea6f047c removed unnecessary logs 2025-03-22 22:02:59 +01:00
s5260822
483f168ae9 moved to latexmk for proper toc generation 2025-03-22 22:02:38 +01:00
s5260822
ea68b3204b added method to get speechIDs from speaker 2025-03-22 22:02:08 +01:00
s5260822
25fc8b5480 fixed section in toc 2025-03-22 22:01:47 +01:00
s5260822
8d00ef14d4 added support for bulk speech export from Speaker 2025-03-22 22:01:31 +01:00
s5260822
704d71dd75 added flag to pdflatex command to fix endless loading bug 2025-03-22 21:20:20 +01:00
s5260822
116a681ca9 removed unnecessary log message 2025-03-22 21:19:52 +01:00
s5260822
5814f55969 added placeholder for NLP data 2025-03-22 21:19:39 +01:00
s5260822
86f681ebbb fixed log 2025-03-22 21:19:08 +01:00
s5260822
0ef4ea52a8 removed unnecessary log message 2025-03-22 21:18:56 +01:00
s5260822
f0e39dfb42 fixed religion 2025-03-22 21:18:48 +01:00
s5260822
fc63958640 fixed speaker title when not present 2025-03-22 21:18:30 +01:00
s5260822
f2f941f4d6 added check for no-image and add placeholder 2025-03-22 20:47:02 +01:00
s5260822
417aee9cc1 added 500 code when pdf generation goes wrong 2025-03-22 20:46:42 +01:00
s5260822
6d06aae46c Fixed logger output 2025-03-22 20:46:08 +01:00
s5260822
28ae98bd86 changed color of speaker to green 2025-03-22 20:02:41 +01:00
s5260822
41aab00db7 added section title to Speech export 2025-03-22 20:02:28 +01:00
s5260822
f3bf22f197 fixed member image not being saved on first speech export 2025-03-22 19:25:41 +01:00
Jonas Werner
359d057a9b main latex export functionality 2025-03-22 18:52:08 +01:00
Jonas Werner
eb3124e30d added latex preamble 2025-03-22 18:51:43 +01:00
vysitor
861e14b64d Improved loading speech, work in progress, hotfixed RadarChart issue 2025-03-22 17:20:09 +01:00
Jonas Werner
76a12e5a3d updated puml classdiagram 2025-03-22 13:29:31 +01:00
Jonas Werner
0a961688c2 removed unused class 2025-03-22 13:29:09 +01:00
Jonas Werner
669d7d1ab8 removed unused class 2025-03-22 13:28:54 +01:00
Jonas Werner
038cc460bc Merge branch 'main' of https://ppr.gitlab.texttechnologylab.org/s1188354/multimodal_parliament_explorer_05_1 2025-03-22 13:22:19 +01:00
Jonas Werner
12f1f67405 major domain impl class refactoring 2025-03-22 13:13:05 +01:00
Jonas Werner
2e5d43d34f removed unnecessary comments 2025-03-22 12:44:40 +01:00
vysitor
d956acc109 Started metadata work to speed up loading process 2025-03-21 22:55:49 +01:00
Artorias
05405a9329 Changed Radar Charts to illustrate every analyzed sentence in a Speech, including the whole speech. 2025-03-21 22:22:39 +01:00
Jonas Werner
8547915658 added scroll animation to header 2025-03-21 17:45:31 +01:00
vysitor
1189f0e5a8 Small Hotfix for sentiment issue with certain speeches 2025-03-21 17:00:08 +01:00
Jonas Werner
abd25d9ca6 Merge branch 'main' of https://ppr.gitlab.texttechnologylab.org/s1188354/multimodal_parliament_explorer_05_1 2025-03-21 15:38:01 +01:00
vysitor
e5a9b8fbfc Hotfixed FontAwesome Stylesheet to display icons 2025-03-21 14:43:17 +01:00
Jonas Werner
bd76d1dd05 fixed scaling (not entirely but a little) 2025-03-21 13:24:17 +01:00
vysitor
0b1c05acea Fix for corrupt data in database, Show sentiment icons 2025-03-21 13:15:44 +01:00
vysitor
3a2dce4853 Fixed all Chart issues, all 4 now displayed. FrontendController largerly absorbed into Speech-/ Parlamentariercontroller 2025-03-21 01:07:41 +01:00
vysitor
8a10ef9364 Added Sunburst Chart (named entities) 2025-03-20 22:56:06 +01:00
s5260822
381aa65734 Added indicator for party label 2025-03-20 21:54:15 +01:00
s5260822
b78a3b3686 changed Plos to parteilos 2025-03-20 21:47:35 +01:00
s5260822
b20cee59fe moved member-photo style to index.css and added border radius 2025-03-20 21:42:20 +01:00
s5260822
765ef4342b migrated and removed unnecessary logs 2025-03-20 21:39:25 +01:00
s5260822
036e360e0d rearranged charts to 2x2 grid 2025-03-20 21:37:17 +01:00
Leon Kastner
5856b87b25 Merge branch 'Sentiments-Radar-Chart' into 'main'
Sentiments radar chart

See merge request s1188354/multimodal_parliament_explorer_05_1!9
2025-03-20 19:55:32 +00:00
s5260822
3721dddadd fixed german translation 2025-03-20 20:54:48 +01:00
Leon Kastner
54ff4b1c7f Merge branch 'main' into 'Sentiments-Radar-Chart'
# Conflicts:
#   src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/html/HtmlSpeech.java
#   src/main/java/org/texttechnologylab/project/gruppe_05_1/domain/nlp/Sentiment.java
#   src/main/resources/templates/nlp.ftl
2025-03-20 19:53:56 +00:00
Artorias
8ca9d286c7 Working Sentiments Radar Chart! (In Cool) 2025-03-20 20:48:29 +01:00
Artorias
229e0809fc Working Sentiments Radar Chart!!!!! 2025-03-20 20:27:24 +01:00
Artorias
a9edbe8477 Developing grounds for the Radar Chart 2025-03-20 19:47:00 +01:00
vysitor
48a2e9af19 Preparations for sunburst chart 2025-03-20 17:33:20 +01:00
vysitor
a21c19ab75 Error handling for non existing speeches 2025-03-20 17:31:39 +01:00
vysitor
e65abe88bc Work on Sentiment, Bug with empty topic list fixed 2025-03-20 15:30:43 +01:00
vysitor
b795314716 added stylesheet sentiment colors 2025-03-20 15:24:44 +01:00
Jonas Werner
a4e76dbc77 Migrated logs to my implementation 2025-03-20 15:22:32 +01:00
Jonas Werner
9306a1f715 Migrated logs to my implementation 2025-03-20 15:21:02 +01:00
Jonas Werner
7c24f570de centered image in speech view 2025-03-20 15:16:23 +01:00
Artorias
5d6cb314a1 Slight restructuring 2025-03-20 14:20:54 +01:00
s5260822
bf0a242265 removed duplicate css properties 2025-03-20 01:01:12 +01:00
s5260822
c5645e4f5d centered contents of all pages 2025-03-20 01:01:01 +01:00
s5260822
7c59a42efe fixed padding 2025-03-20 01:00:46 +01:00
s5260822
847d55e6a3 fixed and enhanced buttons 2025-03-20 01:00:27 +01:00
s5260822
c939a08a42 fixed style sheet in html head 2025-03-20 00:59:49 +01:00
s5260822
f382c58b26 little style adjustment regarding the navbar 2025-03-20 00:37:47 +01:00
s5260822
e03e6e75c9 adjusted Header and Navbar to provided mockup 2025-03-20 00:33:20 +01:00
vysitor
4f3ee4ef51 Modified Bubble Chart, Added Images to Speech, Added Speeches Tab, modified filter, loading of speeches for individual politicians is quicker 2025-03-19 22:46:58 +01:00
¨J-onasJones¨
253a6ed78b Removed Add-Member button 2025-03-19 17:51:58 +01:00
¨J-onasJones¨
c95179951f Fixed css file path in html head 2025-03-19 17:50:34 +01:00
Artorias
717ef1c7e5 Charts Work again. 2025-03-19 16:48:33 +01:00
Picman2000
6950006a3b Nachladen und verarbeiten von neuen reden funktioniert jz actually 2025-03-19 13:03:07 +01:00
s5260822
307773f54e generic front-end polish 2025-03-19 01:07:18 +01:00
s5260822
d8e36ed8d4 code cleanup 2025-03-19 01:07:00 +01:00
s5260822
9884229f8c fixed border shadow 2025-03-19 00:28:02 +01:00
s5260822
f480be79d6 enhanced search filter 2025-03-19 00:27:48 +01:00
s5260822
8a20f0e952 added member count after filter 2025-03-19 00:27:28 +01:00
s5260822
baf6b70b00 moved h from header to body 2025-03-19 00:26:49 +01:00
s5260822
9b0c46a468 fixed stylesheet import 2025-03-19 00:26:22 +01:00
s5260822
35c2eaddd9 added basic info to homepage 2025-03-19 00:25:58 +01:00
s5260822
857dcff0ff added more form styling 2025-03-19 00:25:32 +01:00
s5260822
5d89deacc6 added fraction-list method for filter form 2025-03-19 00:25:13 +01:00
s5260822
e26c2beab2 added favicon 2025-03-19 00:24:34 +01:00
s5260822
61fb607b3e added filter functions for member list 2025-03-19 00:24:24 +01:00
s5260822
332fe49a02 other merge conflict resloves 2025-03-18 20:38:50 +01:00
s5260822
37a300fa50 moved config files to dedicated dir 2025-03-18 20:38:32 +01:00
s5260822
e0dde25b00 added other freemarker templates 2025-03-18 20:36:38 +01:00
s5260822
32a3823e79 moved templates to resource directory 2025-03-18 20:36:18 +01:00
s5260822
62beaba05a migrated logs to Logger class 2025-03-18 20:34:40 +01:00
s5260822
624a4ae0d7 moved to global variables 2025-03-18 20:34:29 +01:00
s5260822
5457fd86a4 removed random lines 2025-03-18 20:31:39 +01:00
s5260822
270b53a3e4 Merge branch 'main' of https://ppr.gitlab.texttechnologylab.org/s1188354/multimodal_parliament_explorer_05_1 2025-03-18 16:37:45 +01:00
s5260822
9b44d3eebd ignoring .DS_Store 2025-03-18 16:37:33 +01:00
Artorias
fcc064a616 Working POS Bar Chart for every speech implemented 2025-03-18 15:40:09 +01:00
s5260822
22a555d8a2 fixed speech overview for some invalid date strings 2025-03-18 13:55:38 +01:00
vysitor
f5b5d7235c Bubble Chart for Topics works 2025-03-17 21:08:10 +01:00
vysitor
5046f2bbe5 Front End - NLP - work in progress 2025-03-17 20:26:18 +01:00
Picman2000
be1f993871 Merge remote-tracking branch 'origin/main' 2025-03-17 14:12:59 +01:00
Picman2000
47f5afcd54 reden import multithread 2025-03-17 14:12:38 +01:00
vysitor
4da94c660b Added NLP Entities 2025-03-17 01:48:52 +01:00
s5260822
b7ffa45fa9 Merge branch 'main' of https://ppr.gitlab.texttechnologylab.org/s1188354/multimodal_parliament_explorer_05_1 2025-03-16 23:59:32 +01:00
s5260822
da9e6c7da2 crawled new/updated images 2025-03-16 23:58:54 +01:00
s5260822
37f88f9936 fixed image crawler accurace ft. Valentin 2025-03-16 23:58:26 +01:00
vysitor
4064ca42bd Showing pictures of the MdBs 2025-03-16 21:34:04 +01:00
140 changed files with 10011 additions and 1750 deletions

BIN
.DS_Store vendored Normal file

Binary file not shown.

1
.gitignore vendored
View file

@ -1,3 +1,4 @@
.idea/
.vscode/
target/
.DS_Store

38
Benutzerhandbuch.txt Normal file
View file

@ -0,0 +1,38 @@
BENUTZERHANDBUCH MULTIMODAL PARLIAMENT EXPLORER
GRUPPE_05_01
Der Multimodal Parliament Explorer ist eine client-server basierte Anwendung zum automatischen abrufen, analysieren, visualisieren und exportieren aller Reden des Bundestags der aktuellen Legislaturperiode.
Mit unserem Programm kann man Reden des Bundestags interaktiv erkunden, Statistiken zu den zugehörigen Reden einsehen und diese selbst weiterverwenden.
1. Systemanforderungen
- Java 17+
- Maven
- Einen modernen Webbrowser (Chrome, Firefox, Edge)
- Einen Internetzugang
2. Hauptfunktionen
Datenimport:
Automatischer Download der XML Protokolle inklusive Videos
NLP-Analyse:
Analyse der Reden durch einen NLP-Docker
Datenexploration:
Filterbar nach Redner, Thema und Datum
Visualisierung der analysierten Daten:
Durch verschiedene Charts werden die Analysedaten anschaulich und verdaubar wiedergegeben
Export:
Alle Reden können als PDF und XML exportiert werden.
3. Support und Weiterführende Dokumentation
Detaillierte Entwicklungs und Nutzungsanleitungen finden Sie im Repository unter /doc oder auf unserer GitLab-Page bei https://ppr.gitlab.texttechnologylab.org/leonkastner/multimodal-parliament-explorer-docu
Bei Fragen oder Fehlern können sie sich gerne bei uns melden.

188
README.md
View file

@ -1,12 +1,196 @@
# Multimodal Parliament Explorer (Gruppe_05_1)
## Umgebung - wichtiger Hinweis!
### Docker und NLP
Damit der Zugriff auf die Docker-Treiber funktioniert, muss man in IntelliJ folgendes tun:
Edit Run Environment --> Modify Option --> Add VM options
Dann im neu erscheinenden "VM options"-Feld folgendes schreiben:
--add-opens java.base/java.util=ALL-UNNAMED
### LaTex Export
Damit der LaTeX Export funktioniert, muss der LaTeX Compiler installiert sein.
Unter Ubuntu (Linux) kann das Paket mit dem folgenden Befehl installiert werden:
```shell
sudo apt install texlive-latex-extra
```
Unter Windows ist die Installation über MikTex möglich.
Ist kein Compiler installiert, wird beim Start des Programms eine Warnung ausgegeben:
```shell
-------------------------------------------------o
TeX SDK not installed. PDF export will not work.
-------------------------------------------------o
```
## Aufruf
Der Nutzer führt die **Main.java** Datei aus.
Folgende Command Line Argumente werden akzeptiert:
- onlyRunWeb: Fährt die WebServies hoch und wartet auf Requests. Die Einstiegsseite kann im Browser geöffnet werden: http://localhost:5876 (Port 5876 ist in der Resourcendatei javalin.properties definiert). In der Regel würde man Main nur mit diesem Parameter aufrufen.
- uploadMemberPhotos: Um Parlamentarierfotos vom Resource-Verzeichnis in die Datenbank hochzuladen.
- forceUploadMembers: Lädt die Daten der Parlamentariern erneut in die Datenbank.
- forceUploadSpeeches: Lädt die Reden erneut in die Datenbank, führt die NLP-Analyse auf der Reden und speichert die Analyseergebnisse in die Datenbank.
- rebuildMetadata: Berechnet die Metadaten neu. In der Metadata-Collection stehen Daten, welche oft benötigt werden, aber nicht jedes Mal erneut berechnet werden sollen. Momentan sind es die Liste der Parteien und die Liste der möglichen NLP-Topics.
- debugLogging: Diese Flag bestimmt, ob Debug-Ausgaben des Loggers angezeigt werden sollen.
## Dokumentation
- Use Case Diagramm
- Package/Klassen-Diagramm
- Die Planung
- Das Gantt Diagramm
stehen im "doku"-Verzeichnis.
Die Planung beinhaltet:
- Die Liste der Aufgaben, nach Bereichen gruppiert. Die Bereiche sind: Planung, Domain-Entitäten, XML, Datenbank, Rest Services, Front End, NLP-Verarbeitug, Export, Dokumentation
- Wer macht was
- Geschätzter Aufwand (niedrig / mittel / hoch)
### Classdiagram puml generator
Das Classdiagram wird durch den puml generator generiert:
## Classdiagram puml generator
```shell
python puml_generator.py
```
## Upload member Images
Die aktuelle Version des Klassen-/Package-Diagramms ist bei der Abgabe unter `generated_class_diagram.puml` zu finden. Außerdem gibt es das Diagram auch als LaTe, PDF und Bild (png) Version im `/doc` Ordner zu funden.
## Datenbank
Die wichtigsten Collections sind *"speech"* und *"speaker"*. In "pictures" stehen die Bilder der Abgeordneten. In "metadata" stehen die bereits erwähnten Metadaten.
Die Collections "agendaItems" und "sessions" werden zwar initial befüllt und kurzzeitig verwendet, um die Zeitstempel der Reden sowie die Agendapunkte zu konstruieren, werden aber danach nicht weiter verwendet.
## Erzeugung der NLP-Daten
Ressourcenbereitstellung:
- Die benötigten Annotationen werden aus dem Ressourcenordner geladen
- Es wird vorausgesetzt, dass in der "speeches"-Collection eine ZIP-Datei (20.zip) vorhanden ist, die alle erforderlichen Annotationen enthält
- Zusätzlich muss das TypeSystem in Form der Datei TypeSystem.xml vorliegen, um die korrekte Umwandlung der Annotationen in den JCas zu ermöglichen.
Abruf der Reden:
- Die Reden werden anhand der eindeutigen ID (speechKey) aus der Datenbank abgerufen.
- Dabei werden die Reden, die bereits annotiert vorliegen, zur weiteren Verarbeitung selektiert
Annotationen verarbeiten:
- Die geladenen Annotationen werden in einen JCas (Java Common Analysis Structure) umgewandelt.
- Im JCas werden die Annotationen serialisiert, sodass sie in einem einheitlichen Format vorliegen.
- Anschließend werden diese serialisierten Daten in der Datenbank dem entsprechenden Speech-Dokument zugeordnet und gespeichert
Remote-Verarbeitung:
- Reden, die nicht in der 20.zip vorhanden sind also noch nicht lokal annotiert wurden werden anschließend durch den RemoteDriver verarbeitet.
- Der RemoteDriver ruft dabei externe NLP-Komponenten (z.B. spaCy oder Vader) auf, um fehlende Annotationen zu erzeugen.
- Auch die Ergebnisse der Remote-Verarbeitung werden serialisiert und in der Datenbank an das zugehörige Speech-Dokument angehängt.
## Komponenten und Package Struktur
- database: die Klasse *MongoDBHandler* kümmert sich um die Herstellung der Datenbankverbindung und um generelle CRUD-Operationen. Die Klasse *MongoPprUtils* kümmert sich um das Lesen der Objekte und Datenstrukturen, die für diese Übung notwendig sind.
- domain: hier werden die Entitäten definiert. Subpackage "database" behandelt die Objekte, die aus der Datenbank kommen. Subpackage "html" behandelt die Objekte, die nicht in der Datenbank sind und lediglich angezeigt werden.
- rest: alles, was mit den WebServices zu tun hat - die 4 Controller (für Parlamentarier, Fotos, Videos und Reden), eine Configklasse und der Handler. Im Handler wird die Javalin-Konfiguration definiert sowie die Routes für die 8 Endpoints.
- export: alles, was mit dem Export (LaTex/PDF, XML) zu tun hat, findet hier Platz.
- xml: für das Einlesen der Parlamentarier- und Rede-Daten.
- util: eine kleine Sammlung von Utility-Klassen.
- nlp: Utils für die NLP-Verarbeitung
- exceptions: für die Exceptions
## Struktur des resources-Ordner
- config: config files für javalin und für XML (im letzteren steht die URL zum Herunterladen der Parlamentarier)
- plenarprotokolle: enthält die DTD-Datei zum Parsen der Abgeordnetendaten
- speeches: enthält die Datei TypeSystem.xml
- static: enthält das Stylesheet und das Favicon
- templates: enthält die FreeMarker-Templates
- tex: enthält die nötigen Resourcen für Latex, etwa die preamble.tex
## Ablauf / Workflow
Wir gehen hier vom Workflow des Endnutzers aus.
Wie man die Daten hochlädt, wurde bereit oben kurz erklärt.
1. Die Klasse Main wird ausgefüht und zwar mit dem "onlyRunWeb" Parameter.
2. Javalin fährt die Webservices hoch und wartet auf Requests.
3. Der User lädt die Einstiegsseite im Browser, etwas http://localhost:5876 . Die Startseite wird angezeigt. Das Hauptmenü enthält im Burgermenü Links zu "Parlamentarier", "Reden", "Exportieren" sowie "Home" und "Über".
### Parlamentarier-Seite
Eine Liste der Parlamentarier wird angezeigt. Man kann die Liste filtern. Man kann auf die jeweiligen Parlamentarier klicken und gelangt dann auf die Seite eines Parlamentariers.
Die Seite eines Parlamentariers beinhaltet Name, Foto, persöhnliche Daten, Mitgliedschaften und einen Link zu den Reden des Parlamentariers.
### Reden-Seite
Es werden alle Reden der jetzigen Legislaturperiode angezeigt.
Man kann die Liste filtern (MOMENTAN NOCH ZU IMPLEMENTIEREN). Man kann auf die jeweilige Rede klicken und gelangt dann auf die Rede-Seite.
### Redeseite
Folgende Informationen werden angezeigt:
- Informationen zum Redner (Name, Partei, Foto).
- Informationen zur Rede (Datum, Uhrzeit, Agendapunkt).
- Redetext. Vorstellung und Kommentare werden farblich gekennzeichnet.
- Video bei Reden der Sitzung 187 Tagesordnungspunkt 4
- NLP Informationen.
Der NLP-Abschnitt beinhaltet folgende Informationen:
- Topics Information (als Bubble Chart): dabei entspricht die Größe der Bubbles der Häufigkeit/Prävalenz der Topics. Durch einen Mouse-Hover verändert sich die Schriftgröße, damit man auch die Texte der kleinen Bubbles sehen kann.
- POS Information (als Bar Chart): Auf der X-Achse werden alle verschiedenen POS-Elemente die in der Rede erfasst wurden abgebildet, auf der Y-Achse die genaue Anzahl dieser Elemente.
- Sentiments Information (als Radar Chart): Dieses Chart besitzt 3 Achsen, eine für den wert des positiven sentiments, eine für das negative sentiment, und die letzte für das neutrale sentiment. Die mitte des Charts steht für den Wert 0, und jede Ecke steht für den Wert 1 des zugehörigen sentiments. Es wird jeder analysierte Satz inklusive die gesamte Rede abgebildet. Ein Polygon steht hierbei für einen bestimmten analysierten Abschnitt der rede. Rechts neben dem Chart ist eine Legende aller Analysierten Objekte, von welchem das erste die Werte der gesamten Rede, und alle danach die einzelnen Satzteile darstellen.
- Named Entities (als Sunburst Chart): der innere Kreis steht für den Typ (LOC, PER, ORG, MISC). Der äußere Kreis steht für die jeweiligen Named Entities. Die Größe der Bögen entsprechen den Häufigkeiten der Entitäten bzw. der Typen.
Ein "Zurück"-Button ist auf den Seiten implementiert. Er führt erwartungsgemäß zur vorherigen Seite.
### Charts-Seite
Gleiche Struktur wie die Charts für die einzelnen reden, hier nur auf alle in der Datenbank vorhandenen Reden angewendet. Die sammlung der Daten passiert hier dynamisch ab Abruf der Seite.
### Export-Seite
Wie auch auf den einzelnen Seiten, gibt es hier die Option Reden zu exportieren. Es kann zwischen PDF und XML gewählt werden. Die verschiedenen Export-Optionen sind:
- Export einer einzelnen Rede (mit Reden-ID)
- Export aller Reden eines Parlamentariers (mit Parlamentarier-ID)
- Export aller Reden eines NLP-Topics (mit Topic-String)
- Export aller Reden (ohne Parameter)
Achtung: Der Export kann je nach Anzahl der Reden und der gewählten Option einige Zeit in Anspruch nehmen. Meist wird der Nutzer darüber informiert, dass der Prozess länger dauern kann.
## Verschiedenes
### Nachladen neuer Reden (Thread)
Alle 10 Minuten checkt das Programm, ob auf der Bundestags.de Website neue Reden erschienen sind.
Wenn neue Reden erkannt werden, werden diese heruntergeladen, mit dem RemoteDriver verarbeitert und abschließend noch in
die Datenbank hochegladen. Auch wenn einfach so Reden in die Datenbank hinzugefügt werden, werden diese verarbeitet.
### Upload member Images
Crawl member images (not required as already in repository)
```shell
cd src/main/resources

Binary file not shown.

Binary file not shown.

BIN
doc/PPR_Mockup.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 187 KiB

77
doc/UseCasePlantUML Normal file
View file

@ -0,0 +1,77 @@
@startuml
left to right direction
actor Admin
actor Nutzer
actor SystemScheduler as "Zeitgesteuerter Dienst"
rectangle "Multimodal Parliament Explorer" {
package "Datenimport" {
usecase "XML Protokolle herunterladen" as UC1
usecase "XML verarbeiten & speichern" as UC2
usecase "Videos herunterladen & speichern" as UC3
usecase "Fotos der Abgeordneten speichern" as UC4
usecase "NLP-Daten einlesen & verarbeiten" as UC5
}
package "NLP Verarbeitung" {
usecase "NLP Analyse mit DUUI durchführen" as UC6
usecase "NLP-Resultate serialisieren\n& in DB speichern" as UC7
}
package "Export / Serialisierung" {
usecase "Rede als PDF exportieren" as UC8
usecase "Rede als XML exportieren" as UC9
usecase "Rede als XMI exportieren" as UC10
}
package "Visualisierung & UI" {
usecase "Reden durchsuchen" as UC11
usecase "Rede anzeigen (HTML, NLP, Video)" as UC12
usecase "POS / Sentiment / NER / Topics visualisieren" as UC13
}
package "Datenbankoperationen" {
usecase "Reden / Sessions / AgendaItems\nin MongoDB speichern" as UC14
usecase "Metadaten zu Reden abfragen" as UC15
usecase "Video-Referenzen zu Reden abfragen" as UC16
}
package "Systemdienste" {
usecase "Auf neue Protokolle automatisch prüfen und herunterladen" as UC17
usecase "Neue NLP-Reden automatisch analysieren" as UC18
}
' Verbindungen
Admin--> UC1
Admin--> UC2
Admin--> UC3
Admin--> UC4
Admin--> UC5
Admin--> UC6
Admin--> UC7
Admin--> UC8
Admin--> UC9
Admin--> UC10
Nutzer--> UC8
Nutzer--> UC9
Nutzer--> UC10
Nutzer --> UC11
Nutzer --> UC12
Nutzer --> UC13
UC6 --> UC7
UC5 --> UC6
UC2 --> UC14
UC3 --> UC14
UC4 --> UC14
UC14 --> UC15
UC14 --> UC16
UC12 --> UC13
UC12 --> UC16
SystemScheduler --> UC17
SystemScheduler --> UC18
}
@enduml

BIN
doc/class_diagram.pdf Normal file

Binary file not shown.

BIN
doc/class_diagram.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 MiB

3564
doc/class_diagram.tex Normal file

File diff suppressed because it is too large Load diff

BIN
doc/usecasediagram.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 198 KiB

File diff suppressed because it is too large Load diff

BIN
src/.DS_Store vendored Normal file

Binary file not shown.

BIN
src/main/.DS_Store vendored Normal file

Binary file not shown.

BIN
src/main/java/.DS_Store vendored Normal file

Binary file not shown.

BIN
src/main/java/org/.DS_Store vendored Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -7,11 +7,14 @@ import org.texttechnologylab.project.gruppe_05_1.nlp.XmiExtractor;
import org.texttechnologylab.project.gruppe_05_1.rest.RESTHandler;
import org.texttechnologylab.project.gruppe_05_1.util.Logger;
import org.texttechnologylab.project.gruppe_05_1.util.PPRUtils;
import org.texttechnologylab.project.gruppe_05_1.util.SpeechVideoUpdater;
import org.texttechnologylab.project.gruppe_05_1.xml.FileObjectFactory;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Session;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.SpeechParser;
import org.w3c.dom.Document;
import java.util.Arrays;
import java.util.List;
import java.util.Set;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
@ -19,6 +22,7 @@ import java.util.concurrent.TimeUnit;
import static java.lang.Boolean.FALSE;
import static java.lang.Boolean.TRUE;
import static org.texttechnologylab.project.gruppe_05_1.export.TeXUtil.isTeXSdkInstalled;
import static org.texttechnologylab.project.gruppe_05_1.util.PPRUtils.checkAndProcessNewProtocols;
public class Main {
@ -26,21 +30,33 @@ public class Main {
public static boolean FORCE_UPLOAD_MEMBERS;
public static boolean FORCE_UPLOAD_SPEECHES;
public static boolean ONLY_RUN_WEB;
public static boolean REBUILD_METADATA;
public static boolean DEBUG_LOGGING;
private static final FileObjectFactory xmlFactory = FileObjectFactory.getFactory();
private static final MongoObjectFactory mongoFactory = MongoObjectFactory.getFactory();
public static final String RESOURCES_DIR = "src/main/resources";
public static final String CONFIG_DIR = "src/main/resources/config";
public static final String JAVALIN_TEMPLATE_DIR = "src/main/resources/templates";
public static final String JAVALIN_STATIC_FILES_DIR = "src/main/resources/static";
public static final String JCAS_SPEECHES_TYPESYSTEM_DIR = "src/main/resources/speeches/TypeSystem";
public static final String MEMBER_IMAGES_DIR = "src/main/resources/membersOfParliamentImages/";
public static final String TEMP_EXPORT_DIR = "src/main/resources/tempExport/";
/**
* Main Methode zum Start des Multimodalen Parlament Explorers
* Programm-Flag Implementierung und DIR Konstanten von Jonas
*
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception {
UPLOAD_MEMBER_PHOTOS = Arrays.asList(args).contains("uploadMemberPhotos");
FORCE_UPLOAD_MEMBERS = Arrays.asList(args).contains("forceUploadMembers");
FORCE_UPLOAD_SPEECHES = Arrays.asList(args).contains("forceUploadSpeeches");
ONLY_RUN_WEB = Arrays.asList(args).contains("onlyRunWeb");
REBUILD_METADATA = Arrays.asList(args).contains("rebuildMetadata");
DEBUG_LOGGING = Arrays.asList(args).contains("debugLogging");
System.out.println("Starting Multimodal Parliament Explorer...");
@ -50,9 +66,16 @@ public class Main {
System.out.println(" - Force Upload Members: " + FORCE_UPLOAD_MEMBERS);
System.out.println(" - Force Upload Speeches: " + FORCE_UPLOAD_SPEECHES);
System.out.println(" - Only Run javalin Web Server: " + ONLY_RUN_WEB);
System.out.println(" - Rebuild Metadata: " + REBUILD_METADATA);
System.out.println(" - Debug Logging: " + DEBUG_LOGGING);
System.out.println("--------------------------------------------o");
if (!isTeXSdkInstalled()) {
Logger.orange("-------------------------------------------------o");
Logger.orange("TeX SDK not installed. PDF export will not work.");
Logger.orange("-------------------------------------------------o");
}
if (ONLY_RUN_WEB) {
Logger.info("Starting Web Service...");
RESTHandler restHandler = new RESTHandler();
@ -62,6 +85,12 @@ public class Main {
MongoDBHandler mongoDBHandler = new MongoDBHandler();
if (REBUILD_METADATA) {
Logger.info("Rebuilding Metadata...");
MongoPprUtils.rebuildMetadata();
System.exit(0);
}
SpeechIndexFactoryImpl speechIndexFactory = new SpeechIndexFactoryImpl();
if ((mongoDBHandler.getDatabase().getCollection(MongoPprUtils.SPEECH_COLLECTION_NAME).countDocuments() != 0) && !FORCE_UPLOAD_SPEECHES) {
Logger.info("Skipping Speech parsing and DB insertion as they are already present...");
@ -95,6 +124,9 @@ public class Main {
Logger.pink("Adding Speeches to DB...");
mongoDBHandler.insertSpeeches(speechIndex.getSpeeches());
Logger.pink("Building Metadata...");
MongoPprUtils.rebuildMetadata();
// only upload member photos if database was empty by default, not when speeches are force-overwritten
if (!FORCE_UPLOAD_SPEECHES) {
Logger.pink("Uploading Member Photos to DB...");
@ -122,10 +154,10 @@ public class Main {
if (UPLOAD_MEMBER_PHOTOS) {
Logger.pink("Uploading Member Photos to DB...");
mongoDBHandler.uploadMemberPhotos();
mongoDBHandler.uploadMemberPhotosFromResourceFolder();
}
NlpUtils.runRemoteDriver();
/*ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(1);
SpeechVideoUpdater.init();
ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(1);
scheduler.scheduleAtFixedRate(() -> {
try {
NlpUtils.runRemoteDriver();
@ -141,7 +173,8 @@ public class Main {
Logger.info("Keine neuen Protokolle gefunden, Upload wird übersprungen.");
} else {
SpeechParser speechParser = new SpeechParser();
mongoDBHandler.insertSessions(speechParser.parseAllSessions(newProtocols));
List<Session> newSessions = speechParser.parseAllSessions(newProtocols);
mongoDBHandler.insertSessions(newSessions);
mongoDBHandler.insertAgendaItems(speechParser.getAgendaItems());
mongoDBHandler.insertSpeeches(speechParser.getSpeeches());
Logger.info("Neuer Protokolle uploaded: " + newProtocols.size());
@ -149,7 +182,7 @@ public class Main {
} catch (Exception ex) {
Logger.error("Fehler bei der Protokollaktualisierung: " + ex.getMessage());
}
}, 0, 10, TimeUnit.MINUTES);*/
}, 0, 10, TimeUnit.MINUTES);
RESTHandler restHandler = new RESTHandler();
restHandler.startJavalin();

View file

@ -3,21 +3,19 @@ package org.texttechnologylab.project.gruppe_05_1.database;
import com.mongodb.MongoClientSettings;
import com.mongodb.MongoCredential;
import com.mongodb.ServerAddress;
import com.mongodb.WriteConcern;
import com.mongodb.bulk.BulkWriteResult;
import com.mongodb.client.MongoClient;
import com.mongodb.client.MongoClients;
import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoDatabase;
import com.mongodb.client.*;
import com.mongodb.client.model.*;
import org.texttechnologylab.project.gruppe_05_1.domain.html.HtmlSpeech;
import org.texttechnologylab.project.gruppe_05_1.domain.nlp.Topic;
import org.texttechnologylab.project.gruppe_05_1.exceptions.ServerErrorException;
import org.texttechnologylab.project.gruppe_05_1.exceptions.SessionNotFoundException;
import org.bson.Document;
import org.bson.conversions.Bson;
import org.bson.types.ObjectId;
import org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches.AgendaItem_MongoDB_Impl;
import org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches.Session_MongoDB_Impl;
import org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches.Speech_MongoDB_Impl;
import org.texttechnologylab.project.gruppe_05_1.database.domainimpl.mdb.AgendaItem_MongoDB_Impl;
import org.texttechnologylab.project.gruppe_05_1.database.domainimpl.mdb.Session_MongoDB_Impl;
import org.texttechnologylab.project.gruppe_05_1.database.domainimpl.mdb.Speech_MongoDB_Impl;
import org.texttechnologylab.project.gruppe_05_1.util.Logger;
import org.texttechnologylab.project.gruppe_05_1.util.PPRUtils;
import org.texttechnologylab.project.gruppe_05_1.util.PropertiesUtils;
@ -32,6 +30,8 @@ import java.util.concurrent.TimeUnit;
import static com.mongodb.client.model.Filters.eq;
import static org.texttechnologylab.project.gruppe_05_1.Main.MEMBER_IMAGES_DIR;
import static org.texttechnologylab.project.gruppe_05_1.util.PPRUtils.fetchMemberImageBase64FromNameString;
import static org.texttechnologylab.project.gruppe_05_1.util.PPRUtils.getSessionCookies;
public class MongoDBHandler {
@ -58,7 +58,9 @@ public class MongoDBHandler {
private MongoCollection<Document> memberPhotoCollection;
private MongoCollection<Document> historyCollection;
/**
* Implementiert von Valentin
*/
public MongoDBHandler() {
// Set loglevel for slf4j to avoid spam // TODO: Fix this (optional)
System.setProperty("org.slf4j.simpleLogger.defaultLogLevel", "error");
@ -117,6 +119,7 @@ public class MongoDBHandler {
/**
* Get the MongoDB according to properties.
* If a local server URI is defined, use it. Otherwise, use remote server.
* Implementiert von Valentin
* @return MongoDatabase
*/
static public MongoDatabase getMongoDatabase() {
@ -177,6 +180,7 @@ public class MongoDBHandler {
/**
*
* @return List<String> with the names of all collections
* Implementiert von Valentin
*/
public Set<String> getCollectionNames() {
// return getDatabase().listCollectionNames().into(new ArrayList<>());
@ -187,6 +191,7 @@ public class MongoDBHandler {
*
* @param name Name of collection to check for existance
* @return does the collection exist
* Implementiert von Valentin
*/
public boolean collectionExists(String name) {
return getDatabase().listCollectionNames().into(new ArrayList<>()).contains(name);
@ -195,6 +200,7 @@ public class MongoDBHandler {
/**
* Tries to create a collection. If the collection exists and contains documents, throw an exception
* Implementiert von Valentin
* @param database
* @param collectionName
*/
@ -220,6 +226,7 @@ public class MongoDBHandler {
/**
* Create Collection
* Implementiert von Valentin
* @param database
* @param collectionName
*/
@ -238,6 +245,7 @@ public class MongoDBHandler {
/**
* Creates a collection. If the collection exists already, delete all its document
* Implementiert von Valentin
* @param database
* @param collectionName
*/
@ -253,7 +261,12 @@ public class MongoDBHandler {
}
}
/**
* Implementiert von Valentin
* @param collection
* @param indexName
* @param isAscending
*/
static public void createIndexForCollection(MongoCollection<Document> collection, String indexName, boolean isAscending) {
// MongoDB creates automatically an index on "_id"
if (indexName.equals(DEFAULT_ID_FIELD_NAME)) {
@ -267,6 +280,12 @@ public class MongoDBHandler {
}
}
/**
* Implementiert von Valentin
* @param collection
* @param indexNames
* @param isAscending
*/
static public void createIndexForCollection(MongoCollection<Document> collection, List<String> indexNames, boolean isAscending) {
// MongoDB creates automatically an index on "_id"
if (indexNames.contains(DEFAULT_ID_FIELD_NAME)) {
@ -281,6 +300,9 @@ public class MongoDBHandler {
}
}
/**
* Implementiert von Valentin
*/
public void createIndicesForSpeakerCollection() {
if (speakerCollection.listIndexes().into(new ArrayList<>()).size() == 1) {
MongoDBHandler.createIndexForCollection(speakerCollection,"name", true);
@ -289,10 +311,15 @@ public class MongoDBHandler {
}
}
/**
* Implementiert von Valentin
*/
public void createIndicesForSpeechCollection() {
if (speechesCollection.listIndexes().into(new ArrayList<>()).size() == 1) {
MongoDBHandler.createIndexForCollection(speechesCollection, "speakerId", true);
MongoDBHandler.createIndexForCollection(speechesCollection, "speechKey", true);
MongoDBHandler.createIndexForCollection(speechesCollection, "dateTime", false);
}
}
@ -302,6 +329,7 @@ public class MongoDBHandler {
/**
* Does a document with a given ID (for the "_id"-field) exists in a given collection?
* Implementiert von Valentin
* @param collection
* @param id
* @return
@ -315,6 +343,7 @@ public class MongoDBHandler {
/**
* Find a document with a given ID (for the "_id"-field) in a given collection
* Implementiert von Valentin
* @param collection
* @param id
* @return the document (null if not found)
@ -326,7 +355,7 @@ public class MongoDBHandler {
}
/**
*
*Implementiert von Valentin
* @param collection
* @return count of documents in the collection
*/
@ -340,7 +369,7 @@ public class MongoDBHandler {
/**
*
*Implementiert von Valentin
* @param database
* @param collectionName
* @return count of documents in the collection
@ -354,7 +383,7 @@ public class MongoDBHandler {
}
/**
*
*Implementiert von Valentin
* @param database
* @param collectionName
*/
@ -373,6 +402,7 @@ public class MongoDBHandler {
*/
/**
* Implementiert von Valentin
* Creates a BSON document containing only simple fields according to fields given in a map
* @param attributes
* @return
@ -391,6 +421,7 @@ public class MongoDBHandler {
}
/**
* Implementiert von Valentin
* Creates a BSON document containing simple fields (attributes) as well as other (possibly nested) objects
* @param attributes the simple fields
* @param fields the (possibly nested) objects
@ -418,6 +449,22 @@ public class MongoDBHandler {
return doc;
}
/**
* Implementiert von Valentin
* Liefert ein Feldwert aks Double, auch wenn er in der Datenbank als Integer oder String steht
* @param doc Mongo-Dokument
* @param fieldName Feldname
* @return Double
*/
public static Double getFieldAsDouble(Document doc, String fieldName) {
Object obj = doc.get(fieldName);
if (obj instanceof Double) return (Double) obj;
if (obj instanceof Integer) return ((Integer) obj).doubleValue();
if (obj instanceof String) return Double.valueOf((String) obj);
Logger.error("Wert " + obj + " sollte Double sein, ist aber nicht");
return (Double) obj;
}
/*
* Weitere CRUD Operations
* =======================
@ -425,7 +472,7 @@ public class MongoDBHandler {
/**
*
*Implementiert von Valentin
* @param collection
* @param doc
* @return
@ -435,7 +482,7 @@ public class MongoDBHandler {
}
/**
*
*Implementiert von Valentin
* @param collection
* @param docs
* @return
@ -445,7 +492,7 @@ public class MongoDBHandler {
}
/**
*
*Implementiert von Valentin
* @param collection
* @param fieldName
* @param fieldValue
@ -457,6 +504,7 @@ public class MongoDBHandler {
/**
* Implementiert von Valentin
* Searches a document and performs an update on it
* The document to update must be matched by name and value of a certain field
* @param collection
@ -479,7 +527,7 @@ public class MongoDBHandler {
/**
*
*Implementiert von Valentin
* @param collection
* @param searchCriteriaName search criteria: name of the field
* @param searchCriteriaValue search criteria: value of the field
@ -490,11 +538,12 @@ public class MongoDBHandler {
collection.deleteOne(deleteQuery);
}
/*
* Justus Jonas operations
* =======================
*/
/**
* Fügt eine Session in die Datenbank ein.
*
* @param session Das Session-Objekt, das eingefügt werden soll.
*/
public void insertSession(Session session) {
Document sessionDocument = new Document("sessionId", session.getId())
.append("dateTime", session.getDateTime())
@ -504,6 +553,15 @@ public class MongoDBHandler {
sessionsCollection.insertOne(sessionDocument);
}
/**
* Erstellt eine neue Session anhand der übergebenen Parameter, generiert eine eindeutige sessionId,
* fügt sie in die Datenbank ein und gibt die erstellte Session zurück.
*
* @param dateTime Startzeit der Session.
* @param endTime Endzeit der Session.
* @param legislativePeriod Die Legislaturperiode.
* @return Die neu erstellte Session.
*/
public Session insertSession(String dateTime, String endTime, String legislativePeriod) {
// get a new random sessionId that is not already in use
int sessionId = 0;
@ -521,13 +579,22 @@ public class MongoDBHandler {
return session;
}
/**
* Fügt eine Liste von Sessions in die Datenbank ein.
*
* @param sessions Liste der Sessions, die eingefügt werden sollen.
*/
public void insertSessions(List<Session> sessions) {
for (Session session : sessions) {
insertSession(session);
}
}
/**
* Fügt eine Liste von AgendaItems in die Datenbank ein.
*
* @param agendaItems Liste der AgendaItems, die eingefügt werden sollen.
*/
public void insertAgendaItems(List<AgendaItem> agendaItems) {
List<Document> agendaItemDocuments = new ArrayList<>();
for (AgendaItem agendaItem : agendaItems) {
@ -541,7 +608,15 @@ public class MongoDBHandler {
agendaItemsCollection.insertMany(agendaItemDocuments);
}
/**
* Erstellt ein neues AgendaItem für eine bestimmte Session und einen Titel.
*
* @param sessionId Die Session-ID, zu der das AgendaItem gehört.
* @param title Der Titel des AgendaItems.
* @return Das neu erstellte AgendaItem.
* @throws SessionNotFoundException Falls keine Session mit der angegebenen sessionId existiert.
* @throws ServerErrorException Falls ein Serverfehler auftritt.
*/
public AgendaItem insertAgendaItem(int sessionId, String title) throws SessionNotFoundException, ServerErrorException {
// check if session exists
List<Session> sessions = retrieveAllSessions(Filters.eq("sessionId", sessionId));
@ -565,6 +640,11 @@ public class MongoDBHandler {
return agendaItem;
}
/**
* Fügt eine Liste von Speech-Objekten in die Datenbank ein.
*
* @param speeches Liste der Speeches, die eingefügt werden sollen.
*/
public void insertSpeeches(List<Speech> speeches) {
// Convert each Speech to a Document
List<Document> speechDocuments = new ArrayList<>();
@ -615,27 +695,43 @@ public class MongoDBHandler {
speechesCollection.insertMany(speechDocuments);
}
/**
* Ruft alle Speech-Dokumente ohne Filter ab und wandelt sie in eine Liste von Speech-Objekten um.
*
* @return Eine Liste aller Speeches.
*/
public List<Speech> retrieveAllSpeeches() {
List<Document> speeches = speechesCollection.find().into(new ArrayList<>());
List<Speech> result = new ArrayList<>();
for (Document speech : speeches) {
result.add(new Speech_MongoDB_Impl(speech));
result.add(new Speech_MongoDB_Impl(speech, true));
}
return result;
}
/**
* Ruft alle Speech-Dokumente ab, die dem übergebenen Filter entsprechen, und wandelt sie in eine Liste von Speech-Objekten um.
*
* @param filter Der Filter als Bson-Objekt.
* @return Eine Liste der Speeches, die dem Filter entsprechen.
*/
public List<Speech> retrieveAllSpeeches(Bson filter) {
List<Document> speeches = speechesCollection.find(filter).into(new ArrayList<>());
List<Speech> result = new ArrayList<>();
for (Document speech : speeches) {
result.add(new Speech_MongoDB_Impl(speech));
result.add(new Speech_MongoDB_Impl(speech, true));
}
return result;
}
/**
* Ruft alle Session-Dokumente ohne Filter ab und wandelt sie in eine Liste von Session-Objekten um.
*
* @return Eine Liste aller Sessions.
*/
public List<Session> retrieveAllSessions() {
List<Document> sessions = sessionsCollection.find().into(new ArrayList<>());
List<Session> result = new ArrayList<>();
@ -646,6 +742,12 @@ public class MongoDBHandler {
return result;
}
/**
* Ruft alle Session-Dokumente ab, die dem übergebenen Filter entsprechen, und wandelt sie in eine Liste von Session-Objekten um.
*
* @param filter Der Filter als Bson-Objekt.
* @return Eine Liste der Sessions, die dem Filter entsprechen.
*/
public List<Session> retrieveAllSessions(Bson filter) {
List<Document> speeches = sessionsCollection.find(filter).into(new ArrayList<>());
List<Session> result = new ArrayList<>();
@ -656,6 +758,11 @@ public class MongoDBHandler {
return result;
}
/**
* Ruft alle AgendaItem-Dokumente ohne Filter ab und wandelt sie in eine Liste von AgendaItem-Objekten um.
*
* @return Eine Liste aller AgendaItems.
*/
public List<AgendaItem> retrieveAllAgendaItems() {
List<Document> agendaItems = agendaItemsCollection.find().into(new ArrayList<>());
List<AgendaItem> result = new ArrayList<>();
@ -666,6 +773,12 @@ public class MongoDBHandler {
return result;
}
/**
* Ruft alle AgendaItem-Dokumente ab, die dem übergebenen Filter entsprechen, und wandelt sie in eine Liste von AgendaItem-Objekten um.
*
* @param filter Der Filter als Bson-Objekt.
* @return Eine Liste der AgendaItems, die dem Filter entsprechen.
*/
public List<AgendaItem> retrieveAllAgendaItems(Bson filter) {
List<Document> speeches = agendaItemsCollection.find(filter).into(new ArrayList<>());
List<AgendaItem> result = new ArrayList<>();
@ -676,6 +789,13 @@ public class MongoDBHandler {
return result;
}
/**
* Aktualisiert das Speech-Dokument, das durch den speechKey identifiziert wird,
* und setzt das Feld "xmiData" auf den übergebenen xmiContent.
*
* @param speechKey Der Schlüssel der Rede.
* @param xmiContent Der XMI-Inhalt als String.
*/
public void updateXmiData(String speechKey, String xmiContent) {
speechesCollection.updateOne(
Filters.eq("speechKey", speechKey),
@ -683,6 +803,9 @@ public class MongoDBHandler {
);
}
/**
* Löscht alle Dokumente, die mit den Reden, Sessions, AgendaItems (und History) zusammenhängen.
*/
public void deleteSpeechRelatedDocuments() {
speechesCollection.deleteMany(new Document());
sessionsCollection.deleteMany(new Document());
@ -690,6 +813,11 @@ public class MongoDBHandler {
//historyCollection.deleteMany(new Document());
}
/**
* Führt einen Bulk-Write für NLP-bezogene Updates in der Speech-Collection aus.
*
* @param bulkOperations Eine Liste von Bulk-Update-Operationen.
*/
public void bulkWriteNlpData(List<WriteModel<Document>> bulkOperations) {
if (!bulkOperations.isEmpty()) {
BulkWriteOptions options = new BulkWriteOptions().ordered(false);
@ -707,6 +835,11 @@ public class MongoDBHandler {
}
}
/**
* Zählt die Anzahl der Speech-Dokumente, die ein Feld "analysisResults" besitzen.
*
* @return Die Anzahl der Speech-Dokumente mit einem "analysisResults"-Feld.
*/
public long checkAnalysisResultsField() {
return speechesCollection.countDocuments(Filters.exists("analysisResults"));
}
@ -723,6 +856,12 @@ public class MongoDBHandler {
return loadMemberImageFromFileByName(firstName, name);
}
/**
* Lädt das Bild eines Mitglieds aus dem Ordner resources/membersOfParliamentImages anhand des Namens.
* @param firstName Der Vorname des Mitglieds.
* @return das Bild des Mitglieds als base64-String.
* Implementiert von Jonas
**/
public String loadMemberImageFromFileByName(String firstName, String name) {
// get the member photo from the resources/membersOfParliamentImages folder
File photo = new File(MEMBER_IMAGES_DIR + name + "_" + firstName + ".jpg");
@ -740,6 +879,12 @@ public class MongoDBHandler {
return image_data;
}
/**
* Lädt das Bild eines Mitglieds aus dem Ordner resources/membersOfParliamentImages anhand des Namens.
* @param memberId Der Vorname des Mitglieds.
* @param base64String Der base64-String des Bildes.
* Implementiert von Jonas
**/
public void uploadMemberPhoto(String memberId, String base64String) {
if (memberPhotoCollection.find(eq("memberId", memberId)).first() != null) {
Logger.warn("Member photo for " + memberId + " already exists in the database. Overwriting...");
@ -750,7 +895,41 @@ public class MongoDBHandler {
memberPhotoCollection.insertOne(photoDocument);
}
/**
* Lädt die Bilder aller Mitglieder in die Datenbank.
* Implementiert von Jonas
**/
public void uploadMemberPhotos() {
// get a list of the string of first and last name of all members from the DB
// only fetch the first and lastname
List<Document> speakers = speakerCollection.find().projection(Projections.include("_id", "name", "firstName")).into(new ArrayList<>());
try {
Logger.pink(getSessionCookies());
} catch (IOException e) {
Logger.error("Failed to get session cookies: " + e.getMessage());
}
for (Document speaker : speakers) {
String memberId = speaker.getString("_id");
String name = speaker.getString("name");
String firstName = speaker.getString("firstName");
try {
String base64String = fetchMemberImageBase64FromNameString(firstName + " " + name);
uploadMemberPhoto(memberId, base64String);
Logger.debug("Uploaded member photo for " + firstName + " " + name);
} catch (IOException e) {
Logger.error("Failed to fetch member image for " + firstName + " " + name + ": " + e.getMessage());
}
}
}
/**
* Lädt die Bilder aller Mitglieder aus dem Ordner resources/membersOfParliamentImages in die Datenbank.
* Implementiert von Jonas
**/
public void uploadMemberPhotosFromResourceFolder() {
Logger.info("Found " + PPRUtils.listFilesInDirectory(MEMBER_IMAGES_DIR).size() + " member photos to upload.");
// loop over file names in the directory
// for each file name, extract the name of the member
@ -767,8 +946,14 @@ public class MongoDBHandler {
}
}
/**
* Überprüft, ob eine Session mit der angegebenen Session-Nummer in der sessionsCollection existiert.
*
* @param sessionNumber Die Session-Nummer als String.
* @return true, wenn mindestens ein Dokument mit der angegebenen sessionId existiert, andernfalls false.
*/
public boolean sessionExists(String sessionNumber) {
Document filter = new Document("sessionNumber", sessionNumber);
Document filter = new Document("sessionId", Integer.valueOf(sessionNumber));
long count = sessionsCollection.countDocuments(filter);
return count > 0;
}
@ -781,8 +966,20 @@ public class MongoDBHandler {
return photoDocument.getString("base64");
}
/**
* Schließt die MongoDB-Verbindung.
*/
public void close() {
mongoClient.close();
}
/**
* Gibt ein einzelnes Speech-Dokument zurück, das dem angegebenen Filter entspricht.
*
* @param filter Der Filter als Document.
* @return Das erste Speech-Dokument, das dem Filter entspricht, oder null, wenn keines gefunden wird.
*/
public Document getSpeech(Document filter) {
return speechesCollection.find(filter).first();
}
}

View file

@ -13,7 +13,7 @@ public class MongoObjectFactory {
private static MongoObjectFactory factory = null;
/**
*
*Gesamte File implementiert von Valentin
* @return MongoObjectFactory
*/
public static MongoObjectFactory getFactory() {
@ -28,23 +28,23 @@ public class MongoObjectFactory {
* =====================
*/
public Document createBiografischeAngaben(BiografischeAngaben entity) {
return new BiografischeAngaben_Mongo_Impl().createEntity(entity);
return new BiographicalInformation_MongoDB_Impl().createEntity(entity);
}
public Document createInstitution(Institution entity) {
return new Institution_Mongo_Impl().createEntity(entity);
return new Institution_MongoDB_Impl().createEntity(entity);
}
public Document createMdb(Mdb entity) {
return new Mdb_Mongo_Impl().createEntity(entity);
return new Mdb_MongoDB_Impl().createEntity(entity);
}
public Document createMdbName(MdbName entity) {
return new MdbName_Mongo_Impl().createEntity(entity);
return new MdbName_MongoDB_Impl().createEntity(entity);
}
public Document createWahlperiode(Wahlperiode entity) {
return new Wahlperiode_Mongo_Impl().createEntity(entity);
return new LegislativePeriod_MongoDB_Impl().createEntity(entity);
}
/*
@ -52,14 +52,14 @@ public class MongoObjectFactory {
* ========================
*/
public Document createSpeaker(Speaker entity) {
return new Speaker_Mongo_Impl().createEntity(entity);
return new Speaker_MongoDB_Impl().createEntity(entity);
}
public Document createMembership(Membership entity) {
return new Membership_Mongo_Impl().createEntity(entity);
return new Membership_MongoDB_Impl().createEntity(entity);
}
public List<Document> createMemberships(List<Membership> list) {
return new Membership_Mongo_Impl().createList(list);
return new Membership_MongoDB_Impl().createList(list);
}
}

View file

@ -4,6 +4,10 @@ import org.bson.Document;
import java.util.List;
/**
* Implementiert von Valentin
* @param <T>
*/
public interface MongoOperations<T> {
MongoObjectFactory factory = MongoObjectFactory.getFactory();
public Document createEntity(T entity);

View file

@ -1,24 +1,33 @@
package org.texttechnologylab.project.gruppe_05_1.database;
import com.mongodb.client.FindIterable;
import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoCursor;
import com.mongodb.client.*;
import com.mongodb.client.model.*;
import io.javalin.http.Context;
import org.bson.Document;
import org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches.Speech_MongoDB_Impl;
import org.bson.conversions.Bson;
import org.texttechnologylab.project.gruppe_05_1.database.domainimpl.mdb.Speaker_MongoDB_Impl;
import org.texttechnologylab.project.gruppe_05_1.database.domainimpl.mdb.Speech_MongoDB_Impl;
import org.texttechnologylab.project.gruppe_05_1.domain.html.HtmlSpeech;
import org.texttechnologylab.project.gruppe_05_1.domain.html.Parlamentarier;
import org.texttechnologylab.project.gruppe_05_1.domain.html.ParlamentarierDetails;
import org.texttechnologylab.project.gruppe_05_1.domain.nlp.*;
import org.texttechnologylab.project.gruppe_05_1.domain.nlp.NamedEntity;
import org.texttechnologylab.project.gruppe_05_1.domain.nlp.Token;
import org.texttechnologylab.project.gruppe_05_1.domain.html.SpeechOverview;
import org.texttechnologylab.project.gruppe_05_1.domain.speaker.Membership;
import org.texttechnologylab.project.gruppe_05_1.domain.speech.SpeechMetaData;
import org.texttechnologylab.project.gruppe_05_1.util.GeneralUtils;
import org.texttechnologylab.project.gruppe_05_1.util.Logger;
import org.texttechnologylab.project.gruppe_05_1.util.PPRUtils;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Speech;
import java.io.IOException;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.ZoneId;
import java.util.*;
import java.util.stream.Collectors;
import static com.mongodb.client.model.Filters.eq;
/**
* Diese Klasse beinhaltet Mongo-Utilities, welche spezifisch für die PPR-Datenstrukturen sind.
@ -38,6 +47,7 @@ public class MongoPprUtils {
public static final String HISTORY_COLLECTION_NAME = "history";
public static final String PICTURES_COLLECTION_NAME = "pictures";
public static final String COMMENT_COLLECTION_NAME = "comment";
public static final String METADATA_COLLECTION_NAME = "metadata";
private static MongoCollection<Document> speakerCollection = null;
private static MongoCollection<Document> speechCollection = null;
@ -45,12 +55,20 @@ public class MongoPprUtils {
private static MongoCollection<Document> agendaItemsCollection = null;
private static MongoCollection<Document> picturesCollection = null;
private static MongoCollection<Document> commentCollection = null;
private static MongoCollection<Document> metadataCollection = null;
/**
* Implementiert von Valentin
* @return
*/
public static MongoCollection<Document> getSpeakerCollection() {
if (speakerCollection == null) speakerCollection = MongoDBHandler.getMongoDatabase().getCollection(SPEAKER_COLLECTION_NAME);
return speakerCollection;
}
/**
* Implementiert von Valentin
* @return
*/
public static MongoCollection<Document> getSpeechCollection() {
if (speechCollection == null) speechCollection = MongoDBHandler.getMongoDatabase().getCollection(SPEECH_COLLECTION_NAME);
return speechCollection;
@ -72,6 +90,16 @@ public class MongoPprUtils {
}
/**
* Implementiert von Valentin
* @return
*/
public static MongoCollection<Document> getMetadataCollection() {
if (metadataCollection == null) metadataCollection = MongoDBHandler.getMongoDatabase().getCollection(METADATA_COLLECTION_NAME);
return metadataCollection;
}
/**
* Implementiert von Valentin
* Create the Speaker Collection and useful indices for it
*/
public static void createIndexForSpeakerCollection() {
@ -82,6 +110,7 @@ public class MongoPprUtils {
}
/**
* Implementiert von Valentin
* Create the Speech Collection and useful indices for it
*/
public static void createIndexForSpeechCollection() {
@ -91,6 +120,7 @@ public class MongoPprUtils {
/**
* Implementiert von Valentin
* Truncate the Speaker Collection.
* Note that it is quicker (and saves space) to drop and re-create rather than removing all documents using "remove({})"
*/
@ -107,6 +137,7 @@ public class MongoPprUtils {
/**
* Implementiert von Valentin
* Holt alle Parlamentarier, die einen Suchkriterium erfüllen.
* Das Suchkriterium wird auf allen Feldern angewandt: Vorname, Nachname, Partei.
* Ist das Suchkriterium leer, werden alle Parlamentarier zurückgeliefert
@ -145,8 +176,204 @@ public class MongoPprUtils {
return plist;
}
/**
* Fetched alle Parlamentarier, die einen Suchkriterium erfüllen.
* Das Suchkriterium wird auf allen Feldern angewandt: Vorname, Nachname, Partei.
* Ist das Suchkriterium leer, werden alle Parlamentarier zurückgeliefert
* Implementiert von Jonas
* @param ctx Session Context
* @return List<Parlamentarier>
*/
public static List<Parlamentarier> getFilteredMembers(Context ctx) {
// Get optional filter arguments
String memberIdParam = ctx.queryParam("memberId");
Integer memberId = (memberIdParam != null) ? Integer.parseInt(memberIdParam) : null;
String name = ctx.queryParam("name");
String lastName = ctx.queryParam("lastName");
String firstName = ctx.queryParam("firstName");
String title = ctx.queryParam("title");
String dateOfBirth = ctx.queryParam("dateOfBirth");
String dateOfDeath = ctx.queryParam("dateOfDeath");
String placeOfBirth = ctx.queryParam("placeOfBirth");
String gender = ctx.queryParam("gender");
String religion = ctx.queryParam("religion");
String party = (!Objects.equals(ctx.queryParam("party"), "")) ? ctx.queryParam("party") : null;
String firstLegislativePeriod = ctx.queryParam("firstLegislativePeriod");
Integer firstLegislativePeriodInt = (firstLegislativePeriod != null) ? Integer.parseInt(firstLegislativePeriod) : null;
String lastLegislativePeriod = ctx.queryParam("lastLegislativePeriod");
Integer lastLegislativePeriodInt = (lastLegislativePeriod != null) ? Integer.parseInt(lastLegislativePeriod) : null;
List<Bson> filters = new ArrayList<>();
if (memberId != null) filters.add(eq("id", memberId));
if (name != null) filters.add(Filters.regex("name", ".*" + name + ".*", "i"));
if (lastName != null) filters.add(Filters.regex("lastName", ".*" + lastName + ".*", "i"));
if (firstName != null) filters.add(Filters.regex("firstName", ".*" + firstName + ".*", "i"));
if (title != null) filters.add(Filters.regex("title", ".*" + title + ".*", "i"));
if (dateOfBirth != null) filters.add(eq("dateOfBirth", dateOfBirth));
if (dateOfDeath != null) filters.add(eq("dateOfDeath", dateOfDeath));
if (placeOfBirth != null) filters.add(Filters.regex("placeOfBirth", ".*" + placeOfBirth + ".*", "i"));
if (gender != null) filters.add(eq("gender", gender));
if (religion != null) filters.add(eq("religion", religion));
if (party != null) filters.add(eq("party", party));
if (firstLegislativePeriodInt != null)
filters.add(eq("firstLegislativePeriod", firstLegislativePeriodInt));
if (lastLegislativePeriodInt != null)
filters.add(eq("lastLegislativePeriod", lastLegislativePeriodInt));
// Check if the filters list is empty
Bson filter;
if (filters.isEmpty()) {
filter = Filters.empty(); // No filters, match all documents
} else {
filter = Filters.and(filters); // Combine all filters with AND
}
Bson projection = Projections.fields(Projections.exclude("image_data"));
try {
List<Parlamentarier> members = retrieveAllMembersOfParliament(filter, projection);
ctx.json(members);
return members;
} catch (IOException e) {
ctx.status(500);
ctx.result("Server error occurred");
return null;
}
}
/**
* Fetched alle Parlamentarier aus der Datenbank, die einen Filter erfüllen
* es wird nur die gegebene Projektion zurückgegeben
* Implementiert von Jonas
* @param filter Filter, der auf die Datenbank angewendet wird
* @param projection Projektion, die auf die Datenbank angewendet wird
* @return Liste von Parlamentariern
**/
public static List<Parlamentarier> retrieveAllMembersOfParliament(Bson filter, Bson projection) throws IOException {
List<Document> speeches = getSpeakerCollection().find(filter).projection(projection).into(new ArrayList<>());
List<Parlamentarier> result = new ArrayList<>();
for (Document speech : speeches) {
Parlamentarier parlamentarier = new Parlamentarier();
parlamentarier.setId(speech.getString("_id"));
parlamentarier.setVorname(speech.getString("firstName"));
parlamentarier.setNachname(speech.getString("name"));
parlamentarier.setPartei(speech.getString("party"));
result.add(parlamentarier);
}
return result;
}
/**
* Implementiert von Valentin
* Hole alle Reden gefiltert nach den Form-Parameter
* @param ctx Session Context, aus dem man alle Parameter abfragen kann
* @return
*/
public static List<SpeechOverview> getFilteredSpeechesOverview(Context ctx) {
String name = ctx.queryParam("name");
String party = (!Objects.equals(ctx.queryParam("party"), "")) ? ctx.queryParam("party") : null;
String topic = (!Objects.equals(ctx.queryParam("topic"), "")) ? ctx.queryParam("topic") : null;
List<Bson> filters = new ArrayList<>();
if (name != null) filters.add(Filters.regex("speakerName", ".*" + name + ".*", "i"));
if (party != null) filters.add(Filters.regex("fraction", ".*" + party + ".*", "i"));
Bson filter;
if (filters.isEmpty()) {
filter = Filters.empty();
} else {
filter = Filters.and(filters);
}
List<SpeechOverview> result = new ArrayList<>();
MongoCollection<Document> collection = getSpeechCollection();
Document projection = new Document("speechKey", 1)
.append("speakerId", 1)
.append("dateTimeString", 1)
.append("speakerName", 1)
.append("fraction", 1)
.append("agendaTitel", 1);
List<Document> docs = collection.find(filter)
.projection(projection)
.sort(Sorts.descending("dateTime"))
.into(new ArrayList<>());
for (Document doc : docs) {
result.add(new SpeechOverview(
doc.getString("speechKey"),
doc.getInteger("speakerId"),
doc.getString("dateTimeString"),
doc.getString("speakerName"),
doc.getString("fraction"),
doc.getString("agendaTitel")
));
}
return result;
}
/**
* Implementiert von Valentin
* Holt alle Reden eines Parlamentariers
* @param speakerId
* @return
*/
public static List<SpeechOverview> getSpeechesOverviewForSpeaker(Integer speakerId) {
List<SpeechOverview> result = new ArrayList<>();
MongoCollection<Document> collection = getSpeechCollection();
Document projection = new Document("speechKey", 1)
.append("speakerId", 1)
.append("dateTimeString", 1)
.append("speakerName", 1)
.append("fraction", 1)
.append("agendaTitel", 1);
Bson filter = Filters.eq("speakerId", speakerId);
List<Document> docs = collection.find(filter)
.projection(projection)
.sort(Sorts.descending("dateTime"))
.into(new ArrayList<>());
for (Document doc : docs) {
result.add(new SpeechOverview(
doc.getString("speechKey"),
doc.getInteger("speakerId"),
doc.getString("dateTimeString"),
doc.getString("speakerName"),
doc.getString("fraction"),
doc.getString("agendaTitel")
));
}
return result;
}
/**
* Implementiert von Valentin
* Liest einen Parlamentarier von der MongoDB
* @param doc - MongoDB Dokument eines Parlamentariers
* @return Parlamentarier
@ -197,8 +424,24 @@ public class MongoPprUtils {
return readParlamentarierDetailsFromSpeaker(doc);
}
/**
* Holt einen Speaker aus der Datenbank
* Implementiert von Jonas
* @param id ID des Parlamentariers
* @return Speaker
*/
public static Speaker_MongoDB_Impl getSpeakerById(String id) {
Logger.debug("ID: " + id);
Document doc = MongoDBHandler.findFirstDocumentInCollection(getSpeakerCollection(), "_id", id);
Logger.debug("Speaker: " + doc);
Speaker_MongoDB_Impl speaker = new Speaker_MongoDB_Impl().createSpeakerMongoDBImpl(doc);
Logger.debug("Speaker parsed" + speaker);
return speaker;
}
/**
* Implementiert von Valentin
* Holt die Details eines Parlamentariers
* @param id Parlamentarier-ID (Integer)
* @return ParlamentarierDetails
@ -306,9 +549,9 @@ public class MongoPprUtils {
// Speech
// TODO: kopiere die Speech-Sachen von Übung 4 hierher!
/**
* Implementiert von Valentin
* Aufzählen, wie viele Reden eines bestimmten Redners gespeichert sind
* @param speakerId
* @return Anzahl Reden
@ -319,6 +562,7 @@ public class MongoPprUtils {
/**
* Implementiert von Valentin
* Liefert alle Reden eines Redners zurück
* @param speakerId
* @return Alle Reden eines Redners
@ -330,13 +574,51 @@ public class MongoPprUtils {
List<Document> docs = getSpeechCollection().find(filter).into(new ArrayList<>());
for (Document doc : docs) {
speeches.add(new Speech_MongoDB_Impl(doc));
speeches.add(new Speech_MongoDB_Impl(doc, true));
}
return speeches;
}
/**
* Implementiert von Valentin
* Liefert alle Reden zurück
* Die Auswahl kann durch einen (textuellen) Filter eingeschränkt werden
* @param filter
* @return
*/
public static List<Speech> getSpeeches(String filter) {
List<Speech> speeches = new ArrayList<>();
MongoCursor<Document> cursor;
if (filter== null || filter.isBlank()) {
cursor = getSpeechCollection().find().iterator();
} else {
String pattern = ".*" + filter + ".*";
Document searchDocument = new Document("$or", List.of(
new Document("speakerName", new Document("$regex", pattern).append("$options", "i")),
new Document("fraction", new Document("$regex", pattern).append("$options", "i")),
new Document("speechKey", new Document("$regex", pattern).append("$options", "i"))
));
cursor = getSpeechCollection().find(searchDocument).cursor();
}
try {
while (cursor.hasNext()) {
speeches.add(new Speech_MongoDB_Impl(cursor.next(), false));
}
} catch (Throwable t) {
Logger.error(String.valueOf(t));
} finally {
cursor.close();
}
return speeches;
}
/**
* Implementiert von Valentin
* Liefert Metadaten (aber keine Inhalte!) für alle Reden eines Redners zurück.
* Als Metadaten zählen das Datum, Agenda-ID etc.
* @param speakerId
@ -357,18 +639,7 @@ public class MongoPprUtils {
// aus "sessions" Collection
String dateTimeString = getSessionDateTime(sessionId);
if (dateTimeString != null) {
md.setDateTimeString(dateTimeString);
LocalDateTime tmp = GeneralUtils.parseDateTime(dateTimeString, "dd.MM.yyyy HH:mm");
if (tmp == null) {
tmp = GeneralUtils.parseDateTime(dateTimeString, "dd.MM.yyyy H:mm");
if (tmp == null) {
Logger.error(dateTimeString + " could not be parsed");
}
}
md.setDateTime(tmp);
}
augmentSpeechMetaDataFromSession(sessionId, md);
// aus "agendaItems" Collection
int agendaItemId = speech.getAgendaItemId();
@ -381,11 +652,72 @@ public class MongoPprUtils {
// Sortiere nach Datum, absteigend
speechMetaDataList.sort((md1, md2) -> {
try {
return md2.getDateTime().compareTo(md1.getDateTime());
} catch (NullPointerException e) {
return 0;
if ((md2.getDateTime() == null) && (md1.getDateTime()) == null) return 0;
if (md2.getDateTime() == null) return -1;
if (md1.getDateTime() == null) return 1;
return md2.getDateTime().compareTo(md1.getDateTime());
});
return speechMetaDataList;
}
/**
* Implementiert von Valentin
* Liefert Metadaten (aber keine Inhalte!) für alle Reden zurück.
* Die Auswahl kann durch einen (textuellen) Filter eingeschränkt werden
* Als Metadaten zählen das Datum, Agenda-ID etc.
* @param filter
* @return
*/
public static List<SpeechMetaData> getSpeechesMetadata(String filter) {
List<SpeechMetaData> speechMetaDataList = new ArrayList<>();
MongoCursor<Document> cursor;
if (filter== null || filter.isBlank()) {
cursor = getSpeechCollection().find().iterator();
} else {
String pattern = ".*" + filter + ".*";
Document searchDocument = new Document("$or", List.of(
new Document("speakerName", new Document("$regex", pattern).append("$options", "i")),
new Document("fraction", new Document("$regex", pattern).append("$options", "i")),
new Document("speechKey", new Document("$regex", pattern).append("$options", "i"))
));
cursor = getSpeechCollection().find(searchDocument).cursor();
}
try {
while (cursor.hasNext()) {
Document doc = cursor.next();
SpeechMetaData smd = new SpeechMetaData();
smd.setSpeechKey(doc.getString("speechKey"));
Date dateTimeInMongo = doc.getDate("dateTime");
smd.setDateTime(LocalDateTime.ofInstant(dateTimeInMongo.toInstant(), ZoneId.systemDefault()));
smd.setDateTimeString(doc.getString("dateTimeString"));
smd.setSpeakerName(doc.getString("speakerName"));
String fraktion = (doc.getString("fraction"));
if (fraktion == null) {
smd.setFraktion(PPRUtils.PARTEILOS_KUERZEL);
} else {
smd.setFraktion(fraktion);
}
smd.setAgendaTitle(doc.getString("agendaTitel"));
smd.setSpeakerId(doc.getInteger("speakerId"));
speechMetaDataList.add(smd);
}
} catch (Throwable t) {
Logger.error(String.valueOf(t));
} finally {
cursor.close();
}
// Sortiere nach Datum, absteigend
speechMetaDataList.sort((md1, md2) -> {
if ((md2.getDateTime() == null) && (md1.getDateTime()) == null) return 0;
if (md2.getDateTime() == null) return -1;
if (md1.getDateTime() == null) return 1;
return md2.getDateTime().compareTo(md1.getDateTime());
});
return speechMetaDataList;
@ -393,6 +725,110 @@ public class MongoPprUtils {
/**
* Implementiert von Valentin
* Holt die Redeinformationen aus der Datenbank, die wichtig sind, um eine Liste der Reden in HTML darzustellen
* @return
*/
public static List<SpeechOverview> getSpeechOverview() {
List<SpeechOverview> result = new ArrayList<>();
MongoCollection<Document> collection = getSpeechCollection();
Document projection = new Document("speechKey", 1)
.append("speakerId", 1)
.append("dateTimeString", 1)
.append("speakerName", 1)
.append("fraction", 1)
.append("agendaTitel", 1);
List<Document> docs = collection.find()
.projection(projection)
.sort(Sorts.descending("dateTime"))
.into(new ArrayList<>());
for (Document doc : docs) {
result.add(new SpeechOverview(
doc.getString("speechKey"),
doc.getInteger("speakerId"),
doc.getString("dateTimeString"),
doc.getString("speakerName"),
doc.getString("fraction"),
doc.getString("agendaTitel")
));
}
return result;
}
/**
* Implementiert von Valentin
* Holt die Redeinformationen aus der Datenbank, die wichtig sind, um eine Liste der Reden eines Parlamentariers in HTML darzustellen
* @param speakerId
* @return
*/
public static List<SpeechOverview> getSpeechOverviewBySpeaker(Integer speakerId) {
List<SpeechOverview> result = new ArrayList<>();
MongoCollection<Document> collection = getSpeechCollection();
Document projection = new Document("speechKey", 1)
.append("speakerId", 1)
.append("dateTimeString", 1)
.append("speakerName", 1)
.append("fraction", 1)
.append("agendaTitel", 1);
Bson filter = Filters.eq("speakerId", speakerId);
List<Document> docs = collection.find(filter)
.projection(projection)
.sort(Sorts.descending("dateTime"))
.into(new ArrayList<>());
for (Document doc : docs) {
result.add(new SpeechOverview(
doc.getString("speechKey"),
doc.getInteger("speakerId"),
doc.getString("dateTimeString"),
doc.getString("speakerName"),
doc.getString("fraction"),
doc.getString("agendaTitel")
));
}
return result;
}
/**
* Implementiert von Valentin
* Füge Rede-Metadaten (welche in der Session-Collection stehen) der Rede hinzu.
* Achtung: Redezeit ist in der Datenbank in unterschiedlichen Formaten vorhanden.
* @param sessionId
* @param md
*/
public static void augmentSpeechMetaDataFromSession(int sessionId, SpeechMetaData md) {
String dateTimeString = getSessionDateTime(sessionId);
if (dateTimeString != null) {
md.setDateTimeString(dateTimeString);
for (String format : Arrays.asList("dd.MM.yyyy HH:mm",
"dd.MM.yyyy H:mm",
"dd.MM.yyyy HH.mm",
"dd.MM.yyyy H.mm")) {
LocalDateTime tmp = GeneralUtils.parseDateTime(dateTimeString,format);
if (tmp != null) {
md.setDateTime(tmp);
return;
}
}
Logger.error(dateTimeString + " could not be parsed");
}
}
/**
* Implementiert von Valentin
* Liefert das Datum und die Uhrzeit einer Sitzung zurück
* @param sessionId
* @return
@ -408,6 +844,7 @@ public class MongoPprUtils {
}
/**
* Implementiert von Valentin
* Liefert den Agenda-Titel zurück
* @param sessionId
* @return
@ -418,11 +855,13 @@ public class MongoPprUtils {
if ((iter == null || (iter.first() == null))) {
return "(kein Agendatitel)";
} else {
return (String) iter.first().get("title");
String agendaTitel = (String) iter.first().get("title");
return String.format("%d / %s", sessionId, agendaTitel);
}
}
/**
* Implementiert von Valentin
* Liefert die Rede-Informationen für die Anzeige einer Rede:
* - die Rede-ID
* - Name und Fraktion des Redners
@ -430,20 +869,321 @@ public class MongoPprUtils {
* @param key: Rede ID
* @return
*/
public static HtmlSpeech getSpeechByKey(String key) {
public static HtmlSpeech getHtmlSpeechByKey(String key) {
Document filter = new Document("speechKey", key);
Document speechDoc = getSpeechCollection().find(filter).first();
if (speechDoc == null) {
Logger.error("Rede " + key + " nicht gefunden");
return null;
}
return new HtmlSpeech(speechDoc);
}
public static String getMemberPhoto(String memberId) {
Document filter = new Document("memberId", memberId);
Document pictureDoc = getPicturesCollection().find(filter).first();
if (pictureDoc == null) {
/**
* Implementiert von Jonas
* Liefert die Rede-Informationen für die Anzeige einer Rede
* @param key: Rede ID
* @return Speech
*/
public static Speech getSpeechByKey(String key) {
Document filter = new Document("speechKey", key);
Document speechDoc = getSpeechCollection().find(filter).first();
if (speechDoc == null) {
Logger.error("Rede " + key + " nicht gefunden");
return null;
} else {
return (String) pictureDoc.get("base64");
}
return new Speech_MongoDB_Impl(speechDoc, true);
}
// getMemberPhoto
/**
* Liefert das Bild eines Abgeordneten zurück
* @param id
* @return Base64-encoded Photo
*/
public static String getMemberPhoto(String id) {
Document doc = MongoDBHandler.findFirstDocumentInCollection(getPicturesCollection(), "memberId", id);
if (doc == null) {
return null;
} else return doc.getString("base64");
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Metadata
/**
* Implementiert von Valentin
* Aktualisiert (or erzeugt, falls nicht bereits vorhanden) diverse Metadaten:
* - Die Liste der Parteien/Fraktionen, wie sie im Speaker-Collection stehen
* - Die Liste der Parteien/Fraktionen, wie sie im Speech-Collection stehen (diese Listen sind recht unterschiedlich)
* - Topics nach NLP-Analyse der Reden
*/
public static void rebuildMetadata() {
MongoDatabase db = MongoDBHandler.getMongoDatabase();
Logger.info("Collecting Partei/Fraktion Information");
List<String> distinctPartiesOfSpeakers = getSpeakerCollection().distinct("party", String.class).into(new java.util.ArrayList<>());
List<String> distinctPartiesFromSpeeches = getSpeechCollection().distinct("fraction", String.class).into(new java.util.ArrayList<>());
Logger.info("Updating Metadata Collection: begin");
MongoDBHandler.createCollection(db, METADATA_COLLECTION_NAME);
MongoCollection<Document> metadataCollection = getMetadataCollection();
Document filterPartiesFromSpeeches = new Document("type", "parties_from_speeches");
Document partiesDocFromSpeeches = MongoDBHandler.createDocument(false, Map.of("type", "parties_from_speeches",
"value", distinctPartiesFromSpeeches));
metadataCollection.replaceOne(filterPartiesFromSpeeches, partiesDocFromSpeeches, new com.mongodb.client.model.ReplaceOptions().upsert(true));
Document filterPartiesOfSpeakers = new Document("type", "parties_of_speakers");
Document partiesDocOfSpeakers = MongoDBHandler.createDocument(false, Map.of("type", "parties_of_speakers",
"value", distinctPartiesOfSpeakers));
metadataCollection.replaceOne(filterPartiesOfSpeakers, partiesDocOfSpeakers, new com.mongodb.client.model.ReplaceOptions().upsert(true));
Logger.info("Enriching Speech Information: begin");
enrichSpeechDocuments();
Logger.info("Enriching Speech Information: end");
Logger.info("Collecting Topics Information");
Document unwindStage = new Document("$unwind", "$topics"); // Deconstruct the "topics" array
Document groupStage = new Document("$group", new Document("_id", "$topics")); // Group by "topics"
Document projectStage = new Document("$project", new Document("topic", "$_id").append("_id", 0)); // Optionally format the result
AggregateIterable<Document> result = getSpeechCollection().aggregate(Arrays.asList(unwindStage, groupStage, projectStage));
Set<String> topics = new HashSet<>();
for (Document doc : result) {
topics.add(doc.getString("topic"));
}
Document filterTopics = new Document("type", "topics");
Document topicsDoc = MongoDBHandler.createDocument(false, Map.of("type", "topics",
"value", topics));
metadataCollection.replaceOne(filterTopics, topicsDoc, new com.mongodb.client.model.ReplaceOptions().upsert(true));
Logger.info("Updating Metadata Collection: end");
}
/**
* Implementiert von Valentin
* @param speakerId
* @return
*/
public static List<Speech> getSpeechesBySpeakerId(String speakerId) {
List<Speech> speechIds = new ArrayList<>();
Document filter = new Document("speakerId", Integer.parseInt(speakerId));
List<Document> docs = getSpeechCollection().find(filter).into(new ArrayList<>());
for (Document doc : docs) {
speechIds.add(new Speech_MongoDB_Impl(doc, true));
}
return speechIds;
}
/**
* Implementiert von Valentin
* @return
*/
public static List<Speech> getAllSpeeches() {
List<Speech> speechIds = new ArrayList<>();
Document filter = new Document();
List<Document> docs = getSpeechCollection().find(filter).into(new ArrayList<>());
for (Document doc : docs) {
speechIds.add(new Speech_MongoDB_Impl(doc, true));
}
return speechIds;
}
/**
* Implementiert von Jonas
* Liefert alle Reden zurück, die ein bestimmtes Topic haben
* @param topic Topic der Reden
* @return Liste von Reden
*/
public static List<Speech> getAllSpeechesWithTopic(String topic) {
List<Speech> speechIds = new ArrayList<>();
Document filter = new Document("analysisResults.topics.topic", topic);
List<Document> docs = getSpeechCollection().find(filter).into(new ArrayList<>());
for (Document doc : docs) {
speechIds.add(new Speech_MongoDB_Impl(doc, true));
}
return speechIds;
}
/**
* Implementiert von Jonas
* Liefert eine Map der POS Einträge und deren Häufigkeit für eine Rede
* @param speechId ID der Rede
* @return Map<String, Integer> POS Einträge und deren Häufigkeit
*/
public static Map<String, Integer> getPOSInformationCardinalitiesForSpeechById(String speechId) {
List<Token> tokens = getHtmlSpeechByKey(speechId).getNlp().getTokens();
Map<String, Integer> posCounts = Token.countPOS(tokens);
List<Token> posList = posCounts.entrySet().stream()
.map(entry -> new Token(entry.getKey(), String.valueOf(entry.getValue()), "")) // Lemma remains empty
.collect(Collectors.toList());
return posCounts;
}
/**
* Implementiert von Jonas
* Liefert eine Map der Named Entities und deren Häufigkeit für eine Rede
* @param speechId ID der Rede
* @return Map<String, Integer> Named Entities und deren Häufigkeit
*/
public static Map<String, Integer> getNamedEntitiesInformationCardinalitiesForSpeechById(String speechId) {
Map<String, Map<String, Integer>> namedEntitiesMapOfMaps = new HashMap<>();
for (NamedEntity ne : getHtmlSpeechByKey(speechId).getNlp().getNamedEntities()) {
String type = ne.getType();
String text = ne.getText();
if (namedEntitiesMapOfMaps.containsKey(type)) {
// Named Entity Type bekannt...
Map<String, Integer> typeAppearance = namedEntitiesMapOfMaps.get(type);
if (typeAppearance.containsKey(text)) {
// ... und der Text auch bekannt --> erhöhe die Anzahl um 1
typeAppearance.replace(
text,
typeAppearance.get(text) + 1) ;
} else {
typeAppearance.put(text, 1);
}
} else {
// Named Entity Type unbekannt: erstelle einen neuen Eintrag für Type sowie einen Eintrag für den ihm gehörigen Text
Map<String, Integer> firstTextAppearance = new HashMap<>();
firstTextAppearance.put(text, 1);
namedEntitiesMapOfMaps.put(type, firstTextAppearance);
}
}
return namedEntitiesMapOfMaps.get("CARDINAL"); // needs fixing
}
/**
* Implementiert von Valentin
* Liefert die Liste aller Parteien/Fraktionen, welche in der Liste der Parlamentarier stehen, zurück.
* Diese Liste dient zur Filterung der Parlamentarier auf der entsprechenden Seite.
* @return List<String> Liste aller Parteien/Fraktionen, welche in der Liste der Parlamentarier stehen
*/
public static List<String> getAllPartiesOfSpeakers() {
Document doc = MongoDBHandler.findFirstDocumentInCollection(getMetadataCollection(), "type", "parties_of_speakers");
if (doc == null) {return new ArrayList<>();}
else {
return new ArrayList<>(doc.getList("value", String.class));
}
}
public static final List<String> ALL_PARTIES_FROM_SPEECHES = Arrays.asList(
"Afd", "BSW", "GRÜNEN", "CDU/CSU", "LINKE", "FDP",
"Fraktionslos" /* auch als "fraktionslos" vorhanden!*/,
"SPD",
"keine" /* entspricht den null Wert */
);
/**
* Implementiert von Valentin
* Liefert die Liste aller Parteien/Fraktionen, welche in der Liste der Reden stehen, zurück.
* Diese Liste dient zur Filterung der Reden auf der entsprechenden Seite.
* Da die Datenqualität dieses Feldes extrem schlecht ist, muss man hier etwas tricksen:
* - Für Bündnis 90 / Die Grünen sind 5 unterschiedlichen Schreibweisen vorhanden
* - Für Die Linke sind ebenfalls 5 unterschiedlichen Schreibweisen vorhanden
* - Wegen der unterschiedlichen Schreibweisen muss man für das Frontend mit Pattern Matching arbeiten
* - Bei 6 Reden steht "SPDCDU/CSU". Diese Reden werden dann bei der Filterung nicht berücksichtigt
* - Für 3561 der 25387 Reden wurde keine Partei/Fraktion eingetragen. Diese Zahl ist zu hoch, um sie einfach zu ignorieren, daher der Eintrag "keine"
* - Beide Schreibweise "Fraktionslos" (166 Reden) und "fraktionslos" (311 Reden) sind vorhanden
* @return List<String> Liste aller Parteien/Fraktionen, welche in der Liste der Reden stehen
*/
public static List<String> getAllPartiesFromSpeeches() {
return ALL_PARTIES_FROM_SPEECHES;
}
/**
* Implementiert von Valentin
* Reichere die Rede-Dokumente um Informationen an:
* - Datum und Uhrzeit der Rede (als DateTime und textuell): dateTimeString , dateTime
* - Agenda-Titel: agendaTitel
* - Die Topics der Rede aus der NLP-Analyse
*/
public static void enrichSpeechDocuments() {
MongoCollection<Document> collection = getSpeechCollection();
FindIterable<Document> documents = collection.find();
for (Document doc : documents) {
// Enrich with Info from Session & Agenda, which is always available
if ( ! doc.containsKey("dateTime")) {
int sessionId = doc.getInteger("sessionId");
int agendaItemId = doc.getInteger("agendaItemId");
String agendaTitel = getAgendaTitle(sessionId, agendaItemId);
LocalDateTime dateTime = null;
String dateTimeString = getSessionDateTime(sessionId);
if (dateTimeString != null) {
for (String format : Arrays.asList("dd.MM.yyyy HH:mm",
"dd.MM.yyyy H:mm",
"dd.MM.yyyy HH.mm",
"dd.MM.yyyy H.mm")) {
dateTime = GeneralUtils.parseDateTime(dateTimeString,format);
if (dateTime != null) break;
}
if (dateTime == null) {Logger.error(dateTimeString + " could not be parsed");}
}
Document updateFieldsFromSession = new Document()
.append("dateTime", dateTime)
.append("dateTimeString", dateTimeString)
.append("agendaTitel", agendaTitel);
collection.updateOne(
new Document("_id", doc.get("_id")),
new Document("$set", updateFieldsFromSession)
);
}
// Enrich with NLP Info which is only available after running the analysis
if (( ! doc.containsKey("topics"))
&& (doc.containsKey("analysisResults"))) {
Document nlpDoc = (Document) doc.get("analysisResults");
if (nlpDoc.containsKey("topics")) {
Set<String> topics = new HashSet<>();
List<Document> topicsDocs = nlpDoc.getList("topics", Document.class);
for (Document topicDoc : topicsDocs) {
topics.add(topicDoc.getString("topic"));
}
Document updateFieldsFromTopics = new Document()
.append("topics", topics);
collection.updateOne(
new Document("_id", doc.get("_id")),
new Document("$set", updateFieldsFromTopics)
);
}
}
}
}
/**
* Implementiert von Valentin
* Liefert die Liste aller Topics, zurück.
* Diese Liste dient zur Filterung der Reden auf der entsprechenden Seite.
* @return Liste aller Topics
*/
public static List<String> getAllTopics() {
Document doc = MongoDBHandler.findFirstDocumentInCollection(getMetadataCollection(), "type", "topics");
if (doc == null) {return new ArrayList<>();}
else {
return new ArrayList<>(doc.getList("value", String.class));
}
}
}

View file

@ -1,25 +0,0 @@
package org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches;
import org.bson.Document;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.MemberOfParliament_File_Impl;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.MemberOfParliament;
public class MemberOfParliament_MongoDB_Impl extends MemberOfParliament_File_Impl implements MemberOfParliament {
public MemberOfParliament_MongoDB_Impl(Document mongoDocument) {super(
mongoDocument.getString("name"),
mongoDocument.getString("firstName"),
mongoDocument.getString("title"),
mongoDocument.getString("dateOfBirth"),
mongoDocument.getString("dateOfDeath"),
mongoDocument.getString("placeOfBirth"),
mongoDocument.getString("gender"),
mongoDocument.getString("religion"),
mongoDocument.getInteger("id"),
mongoDocument.getString("party"),
null,
mongoDocument.getInteger("firstLegislativePeriod"),
mongoDocument.getInteger("lastLegislativePeriod"),
mongoDocument.getString("image_data"));
}
}

View file

@ -1,16 +0,0 @@
package org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches;
import org.bson.Document;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.Speaker_File_Impl;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Speaker;
public class Speaker_MongoDB_Impl extends Speaker_File_Impl implements Speaker {
public Speaker_MongoDB_Impl(Document mongoDocument) {
super(
mongoDocument.getInteger("contentId"),
mongoDocument.getInteger("speechId"),
mongoDocument.getInteger("speakerId"),
mongoDocument.getString("speakerName"),
mongoDocument.getString("fraction"));
}
}

View file

@ -1,78 +0,0 @@
package org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches;
import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData;
import org.apache.uima.UIMAException;
import org.apache.uima.fit.factory.JCasFactory;
import org.apache.uima.jcas.JCas;
import org.bson.Document;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.Speech_File_Impl;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Content;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Speech;
import java.util.List;
public class Speech_MongoDB_Impl extends Speech_File_Impl implements Speech {
public Speech_MongoDB_Impl(Document mongoDocument) {
super(
mongoDocument.getInteger("sessionId"),
mongoDocument.getInteger("agendaItemId"),
mongoDocument.getInteger("speechId"),
mongoDocument.getInteger("speakerId"),
mongoDocument.getString("speakerName"),
mongoDocument.getString("fraction"),
mongoDocument.getString("speechKey")
);
for (Document content : (List<Document>) mongoDocument.get("speechContents")) {
switch (content.getString("type")) {
case "line":
this.addContent(new Line_MongoDB_Impl(content));
break;
case "comment":
this.addContent(new Comment_MongoDB_Impl(content));
break;
case "speaker":
this.addContent(new Speaker_MongoDB_Impl(content));
break;
default:
throw new IllegalArgumentException("Unknown content type: " + content.getString("type"));
}
}
}
public String getFullText() {
StringBuilder fullText = new StringBuilder();
// Iteriere über alle Inhalte, die bereits in der Rede gespeichert wurden.
for (Object content : this.getSpeechContents()) {
if (content instanceof Line_MongoDB_Impl) {
// Wir gehen davon aus, dass Line_MongoDB_Impl eine Methode getContent() hat, die den Text zurückgibt.
String lineText = ((Line_MongoDB_Impl) content).getContent();
if (lineText != null && !lineText.isEmpty()) {
fullText.append(lineText).append("\n");
}
} else if (content instanceof Comment_MongoDB_Impl) {
// Wir gehen davon aus, dass Comment_MongoDB_Impl eine Methode getComment() hat, die den Kommentartext liefert.
String commentText = ((Comment_MongoDB_Impl) content).getComment();
if (commentText != null && !commentText.isEmpty()) {
fullText.append(commentText).append("\n");
}
}
// Inhalte vom Typ "speaker" werden ignoriert.
}
return fullText.toString().trim();
}
//TODO not going fuckin insane
public JCas toCas() throws UIMAException {
JCas jCas = JCasFactory.createJCas();
jCas.setDocumentText(this.getFullText());
DocumentMetaData dmd = new DocumentMetaData(jCas);
dmd.setDocumentId(this.getSpeechKey());;
dmd.addToIndexes();
return jCas;
}
}

View file

@ -1,10 +1,12 @@
package org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches;
package org.texttechnologylab.project.gruppe_05_1.database.domainimpl.mdb;
import org.bson.Document;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.AgendaItem_File_Impl;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.AgendaItem;
/**
* Datei implementiert von Henry
*/
public class AgendaItem_MongoDB_Impl extends AgendaItem_File_Impl implements AgendaItem {
public AgendaItem_MongoDB_Impl(Document mongoDocument) {
super(

View file

@ -11,7 +11,10 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class BiografischeAngaben_Mongo_Impl extends BiografischeAngaben implements MongoOperations<BiografischeAngaben> {
/**
* Datei implementiert von Valentin
*/
public class BiographicalInformation_MongoDB_Impl extends BiografischeAngaben implements MongoOperations<BiografischeAngaben> {
@Override
public Document createEntity(BiografischeAngaben entity) {
Map<String, Object> fields = new HashMap<>();

View file

@ -10,7 +10,10 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class Institution_Mongo_Impl extends Institution implements MongoOperations<Institution> {
/**
* Datei implementiert von Valentin
*/
public class Institution_MongoDB_Impl extends Institution implements MongoOperations<Institution> {
@Override
public Document createEntity(Institution entity) {
Map<String, Object> fields = new HashMap<>();

View file

@ -11,7 +11,10 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class Wahlperiode_Mongo_Impl extends Wahlperiode implements MongoOperations<Wahlperiode> {
/**
* Datei implementiert von Valentin
*/
public class LegislativePeriod_MongoDB_Impl extends Wahlperiode implements MongoOperations<Wahlperiode> {
@Override
public Document createEntity(Wahlperiode entity) {

View file

@ -10,7 +10,10 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class MdbName_Mongo_Impl extends MdbName implements MongoOperations<MdbName> {
/**
* Datei implementiert von Valentin
*/
public class MdbName_MongoDB_Impl extends MdbName implements MongoOperations<MdbName> {
@Override
public Document createEntity(MdbName entity) {
Map<String, Object> fields = new HashMap<>();

View file

@ -11,7 +11,10 @@ import java.util.ArrayList;
import java.util.List;
import java.util.Map;
public class Mdb_Mongo_Impl extends Mdb implements MongoOperations<Mdb> {
/**
* Datei implementiert von Valentin
*/
public class Mdb_MongoDB_Impl extends Mdb implements MongoOperations<Mdb> {
@Override
public Document createEntity(Mdb entity) {
Document bioDoc = factory.createBiografischeAngaben(entity.getBio());

View file

@ -3,17 +3,17 @@ package org.texttechnologylab.project.gruppe_05_1.database.domainimpl.mdb;
import org.bson.Document;
import org.texttechnologylab.project.gruppe_05_1.database.MongoDBHandler;
import org.texttechnologylab.project.gruppe_05_1.database.MongoOperations;
import org.texttechnologylab.project.gruppe_05_1.domain.mdb.MdbName;
import org.texttechnologylab.project.gruppe_05_1.domain.mdb.Wahlperiode;
import org.texttechnologylab.project.gruppe_05_1.domain.speaker.Membership;
import org.texttechnologylab.project.gruppe_05_1.domain.speaker.Speaker;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class Membership_Mongo_Impl extends Membership implements MongoOperations<Membership> {
/**
* Datei implementiert von Valentin
*/
public class Membership_MongoDB_Impl extends Membership implements MongoOperations<Membership> {
@Override
public Document createEntity(Membership entity) {
Map<String, Object> fields = new HashMap<>();

View file

@ -1,10 +1,12 @@
package org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches;
package org.texttechnologylab.project.gruppe_05_1.database.domainimpl.mdb;
import org.bson.Document;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.Session_File_Impl;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Session;
/**
* Datei implementiert von Henry
*/
public class Session_MongoDB_Impl extends Session_File_Impl implements Session {
public Session_MongoDB_Impl(Document mongoDocument) {

View file

@ -0,0 +1,192 @@
package org.texttechnologylab.project.gruppe_05_1.database.domainimpl.mdb;
import org.bson.Document;
import org.texttechnologylab.project.gruppe_05_1.database.MongoDBHandler;
import org.texttechnologylab.project.gruppe_05_1.database.MongoOperations;
import org.texttechnologylab.project.gruppe_05_1.domain.speaker.Membership;
import org.texttechnologylab.project.gruppe_05_1.domain.speaker.Speaker;
import org.texttechnologylab.project.gruppe_05_1.util.Logger;
import org.w3c.dom.Element;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.time.LocalDate;
import java.util.*;
import static org.texttechnologylab.project.gruppe_05_1.Main.TEMP_EXPORT_DIR;
import static org.texttechnologylab.project.gruppe_05_1.database.MongoPprUtils.getMemberPhoto;
/**
* Datei implementiert von Valentin
* Datei modifiziert von Jonas
* toTeX() und toXML() hinzugefügt von Jonas
*/
public class Speaker_MongoDB_Impl extends Speaker implements MongoOperations<Speaker> {
public Speaker_MongoDB_Impl createSpeakerMongoDBImpl(Document mongoDocument) {
this.setId(mongoDocument.getString("_id"));
this.setName(mongoDocument.getString("name"));
this.setFirstName(mongoDocument.getString("firstName"));
this.setTitle(mongoDocument.getString("title"));
this.setGeburtsdatum(parseTimestampSafely(mongoDocument.getDate("geburtsdatum")));
this.setGeburtsort(mongoDocument.getString("geburtsort"));
this.setSterbedatum(parseTimestampSafely(mongoDocument.getDate("sterbedatum")));
this.setGeschlecht(mongoDocument.getString("geschlecht"));
this.setBeruf(mongoDocument.getString("beruf"));
this.setAkademischertitel(mongoDocument.getString("akademischertitel"));
this.setFamilienstand(mongoDocument.getString("familienstand"));
this.setReligion(mongoDocument.getString("religion"));
this.setVita(mongoDocument.getString("vita"));
this.setParty(mongoDocument.getString("party"));
List<Document> memberships = (List<Document>) mongoDocument.get("memberships");
List<Membership> membershipList = new ArrayList<>();
for (Document membership : memberships) {
Membership membershipObj = new Membership();
membershipObj.setRole(membership.getString("role"));
membershipObj.setMember(membership.getString("member"));
membershipObj.setBegin(parseTimestampSafely(membership.getDate("begin")));
membershipObj.setEnd(parseTimestampSafely(membership.getDate("end")));
membershipObj.setLabel(membership.getString("label"));
membershipObj.setWp(membership.getInteger("wp"));
membershipList.add(membershipObj);
}
this.setMemberships(membershipList);
return this;
}
public LocalDate parseTimestampSafely(Date timestamp) {
try {
return LocalDate.parse(timestamp.toString());
} catch (Exception e) {
return null;
}
}
@Override
public Document createEntity(Speaker entity) {
List<Membership> memberships= entity.getMemberships();
List<Document> membershipDocs = factory.createMemberships(memberships);
Map<String, Object> fields = new HashMap<>();
fields.put("_id", entity.getId());
fields.put("name", entity.getName());
fields.put("firstName", entity.getFirstName());
fields.put("title", entity.getTitle());
fields.put("geburtsdatum", entity.getGeburtsdatum());
fields.put("geburtsort", entity.getGeburtsort());
fields.put("sterbedatum", entity.getSterbedatum());
fields.put("geschlecht", entity.getGeschlecht());
fields.put("beruf", entity.getBeruf());
fields.put("akademischertitel", entity.getAkademischertitel());
fields.put("familienstand", entity.getFamilienstand());
fields.put("religion", entity.getReligion());
fields.put("vita", entity.getVita());
fields.put("party", entity.getParty());
fields.put("memberships", membershipDocs);
return MongoDBHandler.createDocument(false, fields);
}
@Override
public List<Document> createList(List<Speaker> list) {
List<Document> result = new ArrayList<>();
for (Speaker speaker : list) {
result.add(createEntity(speaker));
}
return result;
}
public String toTeX() {
// Save image to file
File outputFile = null;
byte[] imageBytes = null;
// Decode the Base64 string to a byte array
try {
imageBytes = Base64.getDecoder().decode(getMemberPhoto(this.getId()));
// check if imageBytes is empty
if (imageBytes.length != 0) {
// Define the output PNG file
outputFile = new File(TEMP_EXPORT_DIR + "speaker_photo_" + this.getId() + ".png");
// Write the decoded byte array to the file
try (FileOutputStream fos = new FileOutputStream(outputFile)) {
fos.write(imageBytes);
Logger.debug("PNG file has been saved successfully.");
} catch (IOException e) {
Logger.error("Failed to save member PNG file.");
Logger.error(e.getMessage());
}
}
} catch (Exception e) {
Logger.warn("Failed to decode member photo. Maybe its missing from the DB?");
Logger.debug(Arrays.toString(e.getStackTrace()));
}
// Spkeaer metadata LaTeX code with image, Full Name + title, party, religion, dob, dod, pob, gender, ID
StringBuilder tex = new StringBuilder();
// image on the left, metadata on the right
tex.append("\\begin{minipage}{0.5\\textwidth}\n");
if (imageBytes != null) {
tex.append("\\includegraphics[width=0.5\\textwidth]{").append("speaker_photo_" + this.getId() + ".png").append("}\n");
} else {
tex.append("(No image available)\n");
}
tex.append("\\end{minipage}\n");
tex.append("\\begin{minipage}{0.5\\textwidth}\n");
tex.append("\\begin{tabular}{r l}\n");
if (this.getTitle() != null) {
tex.append("Name & ").append(this.getTitle()).append(" ").append(this.getFirstName()).append(" ").append(this.getName()).append("\\\\\n");
} else {
tex.append("Name & ").append(this.getFirstName()).append(" ").append(this.getName()).append("\\\\\n");
}
tex.append("Fraktion & ").append(this.getParty()).append("\\\\\n");
if (this.getReligion() != null) {
tex.append("Religion & ").append(this.getReligion()).append("\\\\\n"); // only show if available
}
if (this.getGeburtsdatum() != null) {
tex.append("Geburtsdatum & ").append(this.getGeburtsdatum()).append("\\\\\n");
}
if (this.getSterbedatum() != null) {
tex.append("Sterbedatum & ").append(this.getSterbedatum()).append("\\\\\n"); // only show if available
}
tex.append("Geburtsort & ").append(this.getGeburtsort()).append("\\\\\n");
tex.append("\\end{tabular}\n");
tex.append("\\end{minipage}\n");
return tex.toString();
}
public Element toXML(org.w3c.dom.Document doc) {
Element speakerElement = doc.createElement("speaker");
speakerElement.setAttribute("id", this.getId());
speakerElement.setAttribute("name", this.getName());
speakerElement.setAttribute("firstName", this.getFirstName());
speakerElement.setAttribute("title", this.getTitle() != null ? this.getTitle() : "");
speakerElement.setAttribute("geburtsdatum", this.getGeburtsdatum() != null ? this.getGeburtsdatum().toString() : "");
speakerElement.setAttribute("geburtsort", this.getGeburtsort());
speakerElement.setAttribute("sterbedatum", this.getSterbedatum() != null ? this.getSterbedatum().toString() : "");
speakerElement.setAttribute("geschlecht", this.getGeschlecht());
speakerElement.setAttribute("beruf", this.getBeruf());
speakerElement.setAttribute("akademischertitel", this.getAkademischertitel());
speakerElement.setAttribute("familienstand", this.getFamilienstand());
speakerElement.setAttribute("religion", this.getReligion() != null ? this.getReligion() : "");
speakerElement.setAttribute("vita", this.getVita());
speakerElement.setAttribute("party", this.getParty());
List<Membership> memberships = this.getMemberships();
for (Membership membership : memberships) {
Element membershipElement = doc.createElement("membership");
membershipElement.setAttribute("role", membership.getRole());
membershipElement.setAttribute("member", membership.getMember());
membershipElement.setAttribute("begin", membership.getBegin() != null ? membership.getBegin().toString() : "");
membershipElement.setAttribute("end", membership.getEnd() != null ? membership.getEnd().toString() : "");
membershipElement.setAttribute("label", membership.getLabel());
membershipElement.setAttribute("wp", String.valueOf(membership.getWp()));
speakerElement.appendChild(membershipElement);
}
return speakerElement;
}
}

View file

@ -1,49 +0,0 @@
package org.texttechnologylab.project.gruppe_05_1.database.domainimpl.mdb;
import org.bson.Document;
import org.texttechnologylab.project.gruppe_05_1.database.MongoDBHandler;
import org.texttechnologylab.project.gruppe_05_1.database.MongoOperations;
import org.texttechnologylab.project.gruppe_05_1.domain.speaker.Membership;
import org.texttechnologylab.project.gruppe_05_1.domain.speaker.Speaker;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class Speaker_Mongo_Impl extends Speaker implements MongoOperations<Speaker> {
@Override
public Document createEntity(Speaker entity) {
List<Membership> memberships= entity.getMemberships();
List<Document> membershipDocs = factory.createMemberships(memberships);
Map<String, Object> fields = new HashMap<>();
fields.put("_id", entity.getId());
fields.put("name", entity.getName());
fields.put("firstName", entity.getFirstName());
fields.put("title", entity.getTitle());
fields.put("geburtsdatum", entity.getGeburtsdatum());
fields.put("geburtsort", entity.getGeburtsort());
fields.put("sterbedatum", entity.getSterbedatum());
fields.put("geschlecht", entity.getGeschlecht());
fields.put("beruf", entity.getBeruf());
fields.put("akademischertitel", entity.getAkademischertitel());
fields.put("familienstand", entity.getFamilienstand());
fields.put("religion", entity.getReligion());
fields.put("vita", entity.getVita());
fields.put("party", entity.getParty());
fields.put("memberships", membershipDocs);
return MongoDBHandler.createDocument(false, fields);
}
@Override
public List<Document> createList(List<Speaker> list) {
List<Document> result = new ArrayList<>();
for (Speaker speaker : list) {
result.add(createEntity(speaker));
}
return result;
}
}

View file

@ -0,0 +1,164 @@
package org.texttechnologylab.project.gruppe_05_1.database.domainimpl.mdb;
import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData;
import org.apache.uima.UIMAException;
import org.apache.uima.fit.factory.JCasFactory;
import org.apache.uima.jcas.JCas;
import org.bson.Document;
import org.texttechnologylab.project.gruppe_05_1.database.domainimpl.mdb.speechline.Comment_MongoDB_Impl;
import org.texttechnologylab.project.gruppe_05_1.database.domainimpl.mdb.speechline.Line_MongoDB_Impl;
import org.texttechnologylab.project.gruppe_05_1.database.domainimpl.mdb.speechline.Speaker_MongoDB_Impl;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.Speech_File_Impl;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Content;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Speech;
import org.w3c.dom.Element;
import java.util.List;
import static org.texttechnologylab.project.gruppe_05_1.database.MongoPprUtils.getAgendaTitle;
import static org.texttechnologylab.project.gruppe_05_1.database.MongoPprUtils.getSessionDateTime;
/**
* Datei implementiert von Henry
* Modifiziert von Jonas
* toTeX und toXML von Jonas
*/
public class Speech_MongoDB_Impl extends Speech_File_Impl implements Speech {
public Speech_MongoDB_Impl(Document mongoDocument, boolean includeContent) {
super(
mongoDocument.getInteger("sessionId"),
mongoDocument.getInteger("agendaItemId"),
mongoDocument.getInteger("speechId"),
mongoDocument.getInteger("speakerId"),
mongoDocument.getString("speakerName"),
mongoDocument.getString("fraction"),
mongoDocument.getString("speechKey")
);
if (includeContent) {
for (Document content : (List<Document>) mongoDocument.get("speechContents")) {
switch (content.getString("type")) {
case "line":
this.addContent(new Line_MongoDB_Impl(content));
break;
case "comment":
this.addContent(new Comment_MongoDB_Impl(content));
break;
case "speaker":
this.addContent(new Speaker_MongoDB_Impl(content));
break;
default:
throw new IllegalArgumentException("Unknown content type: " + content.getString("type"));
}
}
}
}
/**
* Gibt den vollständigen Text der Rede zurück, indem alle Zeilen- und Kommentarinhalte
* zusammengesetzt werden. Inhalte vom Typ "speaker" werden dabei ignoriert.
*
* @return Der vollständige Redetext als String
*/
public String getFullText() {
StringBuilder fullText = new StringBuilder();
for (Object content : this.getSpeechContents()) {
if (content instanceof Line_MongoDB_Impl) {
String lineText = ((Line_MongoDB_Impl) content).getContent();
if (lineText != null && !lineText.isEmpty()) {
fullText.append(lineText).append("\n");
}
} else if (content instanceof Comment_MongoDB_Impl) {
String commentText = ((Comment_MongoDB_Impl) content).getComment();
if (commentText != null && !commentText.isEmpty()) {
fullText.append(commentText).append("\n");
}
}
}
return fullText.toString().trim();
}
/**
* Wandelt die aktuelle Rede in ein JCas-Objekt um, um sie z.B. für NLP-Verarbeitung
* weiterzuverwenden.
*
* @return JCas mit dem Redetext und der speechKey als Metadaten
* @throws UIMAException falls ein Fehler bei der Erstellung des JCas auftritt
*/
public JCas toCas() throws UIMAException {
JCas jCas = JCasFactory.createJCas();
jCas.setDocumentText(this.getFullText());
DocumentMetaData dmd = new DocumentMetaData(jCas);
dmd.setDocumentId(this.getSpeechKey());;
dmd.addToIndexes();
return jCas;
}
/**
* Exportiert die Rede im LaTeX-Format inkl. Platzhalter für Sprecher- und NLP-Informationen.
*
* @return LaTeX-Darstellung der Rede
*/
public String toTeX() {
StringBuilder tex = new StringBuilder();
String party = (this.getFraction() != null ? " (" + this.getFraction() + ")" : "");
String speechTitle = "Rede " +
this.getSpeechKey() +
"/" +
getAgendaTitle(this.getSessionId(), this.getAgendaItemId()) +
" von " +
this.getSpeakerName() +
party +
" vom " +
getSessionDateTime(this.getSessionId());
tex.append("\\section*{").append(speechTitle).append("}\n");
tex.append("\\addcontentsline{toc}{section}{").append(speechTitle).append("}\n");
tex.append("$$SPEAKERINFO$$\n");
tex.append("\\subsection*{NLP Metadata}\n");
tex.append("$$NLPMETADATA$$\n");
tex.append("\\subsection*{Speech Content}\n");
// Add content block of speeches
for (Content content: this.getSpeechContents()) {
tex.append(content.toTeX());
}
return tex.toString();
}
/**
* Wandelt die Rede in ein XML-Element um, das in ein vollständiges XML-Dokument eingefügt werden kann.
*
* @param doc das übergeordnete XML-Dokument
* @return das XML-Element, das die Rede darstellt
*/
public Element toXML(org.w3c.dom.Document doc) {
Element speech = doc.createElement("speech");
speech.setAttribute("sessionId", String.valueOf(this.getSessionId()));
speech.setAttribute("agendaItemId", String.valueOf(this.getAgendaItemId()));
speech.setAttribute("speechId", String.valueOf(this.getSpeechId()));
speech.setAttribute("speakerId", String.valueOf(this.getSpeakerId()));
speech.setAttribute("speakerName", this.getSpeakerName());
speech.setAttribute("fraction", this.getFraction());
speech.setAttribute("speechKey", this.getSpeechKey());
for (Content content: this.getSpeechContents()) {
speech.appendChild(content.toXML(doc));
}
return speech;
}
}

View file

@ -1,10 +1,13 @@
package org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches;
package org.texttechnologylab.project.gruppe_05_1.database.domainimpl.mdb.speechline;
import org.bson.Document;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.Comment_File_Impl;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Comment;
import org.w3c.dom.Element;
/*
* Klassen-Implementieren von Jonas
*/
public class Comment_MongoDB_Impl extends Comment_File_Impl implements Comment {
public Comment_MongoDB_Impl(Document mongoDocument) {
@ -14,4 +17,17 @@ public class Comment_MongoDB_Impl extends Comment_File_Impl implements Comment {
mongoDocument.getString("commentatorName"),
mongoDocument.getString("comment"));
}
public String toTeX() {
return "\\textcolor{blue}{Kommentar}: " + this.getComment() + "\\\\\n";
}
public Element toXML(org.w3c.dom.Document doc) {
Element comment = doc.createElement("comment");
comment.setAttribute("contentId", String.valueOf(this.getContentId()));
comment.setAttribute("speechId", String.valueOf(this.getSpeechId()));
comment.setAttribute("commentatorName", this.getCommentatorName());
comment.setTextContent(this.getComment());
return comment;
}
}

View file

@ -1,10 +1,13 @@
package org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches;
package org.texttechnologylab.project.gruppe_05_1.database.domainimpl.mdb.speechline;
import org.bson.Document;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.Line_File_Impl;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Line;
import org.w3c.dom.Element;
/*
* Klassen-Implementieren von Jonas
*/
public class Line_MongoDB_Impl extends Line_File_Impl implements Line {
public Line_MongoDB_Impl(Document mongoDocument) {
super(
@ -12,4 +15,16 @@ public class Line_MongoDB_Impl extends Line_File_Impl implements Line {
mongoDocument.getInteger("speechId"),
mongoDocument.getString("content"));
}
public String toTeX() {
return this.getContent() + "\\\\\n";
}
public Element toXML(org.w3c.dom.Document doc) {
Element line = doc.createElement("line");
line.setAttribute("contentId", String.valueOf(this.getContentId()));
line.setAttribute("speechId", String.valueOf(this.getSpeechId()));
line.setTextContent(this.getContent());
return line;
}
}

View file

@ -0,0 +1,34 @@
package org.texttechnologylab.project.gruppe_05_1.database.domainimpl.mdb.speechline;
import org.bson.Document;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.Speaker_File_Impl;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Speaker;
import org.w3c.dom.Element;
/*
* Klassen-Implementieren von Jonas
*/
public class Speaker_MongoDB_Impl extends Speaker_File_Impl implements Speaker {
public Speaker_MongoDB_Impl(Document mongoDocument) {
super(
mongoDocument.getInteger("contentId"),
mongoDocument.getInteger("speechId"),
mongoDocument.getInteger("speakerId"),
mongoDocument.getString("speakerName"),
mongoDocument.getString("fraction"));
}
public String toTeX() {
return "\\textcolor{darkgreen}{Redner/Rednerin}: " + this.getSpeakerName() + "\\\\\n";
}
public Element toXML(org.w3c.dom.Document doc) {
Element speaker = doc.createElement("speaker");
speaker.setAttribute("contentId", String.valueOf(this.getContentId()));
speaker.setAttribute("speechId", String.valueOf(this.getSpeechId()));
speaker.setAttribute("speakerId", String.valueOf(this.getSpeakerId()));
speaker.setAttribute("speakerName", this.getSpeakerName());
speaker.setAttribute("fraction", this.getFraction());
return speaker;
}
}

View file

@ -1,5 +1,7 @@
package org.texttechnologylab.project.gruppe_05_1.domain;
/**
* Datei implementiert von Valentin
*/
public enum Gender { // TODO: Delete
M("männlich"),

View file

@ -2,21 +2,28 @@ package org.texttechnologylab.project.gruppe_05_1.domain.html;
import org.bson.Document;
import org.texttechnologylab.project.gruppe_05_1.database.MongoDBHandler;
import org.texttechnologylab.project.gruppe_05_1.domain.nlp.NlpInfo;
import org.texttechnologylab.project.gruppe_05_1.domain.nlp.Token;
import org.texttechnologylab.project.gruppe_05_1.domain.nlp.Topic;
import org.texttechnologylab.project.gruppe_05_1.database.MongoPprUtils;
import org.texttechnologylab.project.gruppe_05_1.domain.nlp.*;
import org.texttechnologylab.project.gruppe_05_1.domain.speech.SpeechMetaData;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.StringJoiner;
/**
* Datei implementiert von Valentin
*/
public class HtmlSpeech {
String speechKey;
String speakerName;
String fraction;
String dateTimeString; // aus "sessions" Collection
String agendaTitle; // aus "agendaItems" Collection
List<SpeechContent> content = new ArrayList<>();
NlpInfo nlp = null;
String video = null;
String videoData;
public HtmlSpeech() {
}
@ -25,7 +32,7 @@ public class HtmlSpeech {
setSpeechKey(doc.getString("speechKey"));
setSpeakerName(doc.getString("speakerName"));
setFraction(doc.getString("fraction"));
setVideo(doc.getString("video"));
List<Document> contentDocList = doc.get("speechContents", MongoDBHandler.DOC_LIST_CLASS);
if (contentDocList == null) {
setContent(new ArrayList<>());
@ -35,15 +42,27 @@ public class HtmlSpeech {
}
}
// ergänzen um Datum, Uhrzeit und Agendapunkt der Rede
SpeechMetaData md = new SpeechMetaData();
int sessionId = doc.getInteger("sessionId");
md.setSessionId(sessionId);
MongoPprUtils.augmentSpeechMetaDataFromSession(sessionId, md);
dateTimeString = md.getDateTimeString();
int agendaItemId = doc.getInteger("agendaItemId");
String title = MongoPprUtils.getAgendaTitle(sessionId, agendaItemId);
agendaTitle = title;
// Ergänzung um NLP-Informationen
Document nlpDoc = (Document) doc.get("analysisResults");
nlp = readNlpInfo(nlpDoc);
}
private NlpInfo readNlpInfo(Document nlpDoc) {
if (nlpDoc == null) return null;
NlpInfo nlp = new NlpInfo();
// TODO: HERE
List<Document> tokensDocs = nlpDoc.get("tokens", MongoDBHandler.DOC_LIST_CLASS);
nlp.setTokens(Token.readTokensFromMongo(tokensDocs));
@ -52,13 +71,14 @@ public class HtmlSpeech {
List<Document> dependenciesDocs = nlpDoc.get("dependencies", MongoDBHandler.DOC_LIST_CLASS);
List<Document> namedEntitiesDocs = nlpDoc.get("namedEntities", MongoDBHandler.DOC_LIST_CLASS);
nlp.setNamedEntities(NamedEntity.readNamedEntitiesFromMongo(namedEntitiesDocs));
List<Document> sentimentsDocs = nlpDoc.get("sentiments", MongoDBHandler.DOC_LIST_CLASS);
List<Document> sentimentDocs = nlpDoc.get("sentiments", MongoDBHandler.DOC_LIST_CLASS);
nlp.setSentiments(Sentiment.readSentimentsFromMongo((sentimentDocs)));
List<Document> topicsDocs = nlpDoc.get("topics", MongoDBHandler.DOC_LIST_CLASS);
nlp.setTopics(Topic.readTopicsFromMongo(topicsDocs));
// TODO: Video
return nlp;
}
@ -86,6 +106,13 @@ public class HtmlSpeech {
public void setFraction(String fraction) {
this.fraction = fraction;
}
public String getDateTimeString() {return dateTimeString;}
public void setDateTimeString(String dateTimeString) {this.dateTimeString = dateTimeString;}
public String getAgendaTitle() {return agendaTitle;}
public void setAgendaTitle(String agendaTitle) {this.agendaTitle = agendaTitle;}
public List<SpeechContent> getContent() {
return content;
@ -112,13 +139,14 @@ public class HtmlSpeech {
if (this == o) return true;
if (!(o instanceof HtmlSpeech that)) return false;
return Objects.equals(speechKey, that.speechKey) && Objects.equals(speakerName, that.speakerName)
&& Objects.equals(fraction, that.fraction) && Objects.equals(content, that.content)
&& Objects.equals(fraction, that.fraction) && Objects.equals(dateTimeString, that.dateTimeString)
&& Objects.equals(agendaTitle, that.agendaTitle) && Objects.equals(content, that.content)
&& Objects.equals(nlp, that.nlp);
}
@Override
public int hashCode() {
return Objects.hash(speechKey, speakerName, fraction, content, nlp);
return Objects.hash(speechKey, speakerName, fraction, dateTimeString, agendaTitle, content, nlp);
}
@Override
@ -127,8 +155,26 @@ public class HtmlSpeech {
.add("speechKey='" + speechKey + "'")
.add("speakerName='" + speakerName + "'")
.add("fraction='" + fraction + "'")
.add("dateTimeString='" + dateTimeString + "'")
.add("agendaTitle='" + agendaTitle + "'")
.add("content=" + content)
.add("nlp=" + nlp)
.toString();
}
public void setVideo(String video) {
this.video = video;
}
public void setVideoData(String videoData) {
this.videoData = videoData;
}
public String getVideo() {
return video;
}
public String getVideoData() {
return videoData;
}
}

View file

@ -2,7 +2,9 @@ package org.texttechnologylab.project.gruppe_05_1.domain.html;
import java.util.Objects;
import java.util.StringJoiner;
/**
* Datei implementiert von Valentin
*/
public class Parlamentarier {
String id;
String vorname;

View file

@ -8,7 +8,9 @@ import java.time.LocalDate;
import java.util.List;
import java.util.Objects;
import java.util.StringJoiner;
/**
* Datei implementiert von Valentin
*/
public class ParlamentarierDetails {
String id;
String vorname;

View file

@ -0,0 +1,101 @@
package org.texttechnologylab.project.gruppe_05_1.domain.html;
import java.util.Objects;
import java.util.StringJoiner;
/**
* Datei implementiert von Valentin
*/
public class SpeechOverview {
String speechKey;
Integer speakerId;
String dateTimeString;
String speakerName;
String fraction;
String agendaTitel;
public SpeechOverview() {}
public SpeechOverview(String speechKey, Integer speakerId, String dateTimeString, String speakerName, String fraction, String agendaTitel) {
this.speechKey = speechKey;
this.speakerId = speakerId;
this.dateTimeString = dateTimeString;
this.speakerName = speakerName;
this.fraction = fraction;
this.agendaTitel = agendaTitel;
}
public String getSpeechKey() {
return speechKey;
}
public void setSpeechKey(String speechKey) {
this.speechKey = speechKey;
}
public Integer getSpeakerId() {
return speakerId;
}
public void setSpeakerId(Integer speakerId) {
this.speakerId = speakerId;
}
public String getDateTimeString() {
return dateTimeString;
}
public void setDateTimeString(String dateTimeString) {
this.dateTimeString = dateTimeString;
}
public String getSpeakerName() {
return speakerName;
}
public void setSpeakerName(String speakerName) {
this.speakerName = speakerName;
}
public String getFraction() {
return fraction;
}
public void setFraction(String fraction) {
this.fraction = fraction;
}
public String getAgendaTitel() {
return agendaTitel;
}
public void setAgendaTitel(String agendaTitel) {
this.agendaTitel = agendaTitel;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof SpeechOverview that)) return false;
return Objects.equals(speechKey, that.speechKey) && Objects.equals(speakerId, that.speakerId)
&& Objects.equals(dateTimeString, that.dateTimeString) && Objects.equals(speakerName, that.speakerName)
&& Objects.equals(fraction, that.fraction) && Objects.equals(agendaTitel, that.agendaTitel);
}
@Override
public int hashCode() {
return Objects.hash(speechKey, speakerId, dateTimeString, speakerName, fraction, agendaTitel);
}
@Override
public String toString() {
return new StringJoiner(", ", SpeechOverview.class.getSimpleName() + "[", "]")
.add("speechKey='" + speechKey + "'")
.add("speakerId=" + speakerId)
.add("dateTimeString='" + dateTimeString + "'")
.add("speakerName='" + speakerName + "'")
.add("fraction='" + fraction + "'")
.add("agendaTitel='" + agendaTitel + "'")
.toString();
}
}

View file

@ -3,6 +3,9 @@ package org.texttechnologylab.project.gruppe_05_1.domain.mdb;
import java.time.LocalDate;
import java.util.Objects;
import java.util.StringJoiner;
/**
* Datei implementiert von Valentin
*/
public abstract class BiografischeAngaben {

View file

@ -3,6 +3,9 @@ package org.texttechnologylab.project.gruppe_05_1.domain.mdb;
import java.time.LocalDate;
import java.util.Objects;
import java.util.StringJoiner;
/**
* Datei implementiert von Valentin
*/
public abstract class Institution {

View file

@ -1,6 +1,9 @@
package org.texttechnologylab.project.gruppe_05_1.domain.mdb;
public enum Mandatsart {
/**
* Datei implementiert von Valentin
*/
DIREKT("Direktwahl"),
LANDESLISTE("Landesliste"),

View file

@ -3,6 +3,9 @@ package org.texttechnologylab.project.gruppe_05_1.domain.mdb;
import java.util.List;
import java.util.Objects;
import java.util.StringJoiner;
/**
* Datei implementiert von Valentin
*/
public abstract class Mdb {

View file

@ -3,6 +3,9 @@ package org.texttechnologylab.project.gruppe_05_1.domain.mdb;
import java.util.List;
import java.util.Objects;
import java.util.StringJoiner;
/**
* Datei implementiert von Valentin
*/
public abstract class MdbDocument {

View file

@ -3,6 +3,9 @@ package org.texttechnologylab.project.gruppe_05_1.domain.mdb;
import java.time.LocalDate;
import java.util.Objects;
import java.util.StringJoiner;
/**
* Datei implementiert von Valentin
*/
public abstract class MdbName {

View file

@ -4,6 +4,9 @@ import java.time.LocalDate;
import java.util.List;
import java.util.Objects;
import java.util.StringJoiner;
/**
* Datei implementiert von Valentin
*/
public abstract class Wahlperiode {

View file

@ -2,7 +2,9 @@ package org.texttechnologylab.project.gruppe_05_1.domain.nlp;
import java.util.Objects;
import java.util.StringJoiner;
/**
* Datei implementiert von Valentin
*/
public class AudioToken {
private int begin;

View file

@ -2,7 +2,9 @@ package org.texttechnologylab.project.gruppe_05_1.domain.nlp;
import java.util.Objects;
import java.util.StringJoiner;
/**
* Datei implementiert von Valentin
*/
public class Dependency {
String type;
String governor;

View file

@ -1,12 +1,18 @@
package org.texttechnologylab.project.gruppe_05_1.domain.nlp;
import org.bson.Document;
import org.w3c.dom.Element;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.StringJoiner;
/**
* Datei implementiert von Valentin
* toXML implementiert von Jonas
*/
public class NamedEntity {
String type; // PER, LOC etc.
// int begin; // TODO: momentan nicht in MongoDB
// int end; // TODO: momentan nicht in MongoDB
String text;
public NamedEntity() {
@ -52,4 +58,22 @@ public class NamedEntity {
.add("text='" + text + "'")
.toString();
}
public static List<NamedEntity> readNamedEntitiesFromMongo(List<Document> nadocs) {
List<NamedEntity> nes = new ArrayList<>();
for (Document doc : nadocs) {
nes.add(new NamedEntity(
doc.getString("type"),
doc.getString("text")
));
}
return nes;
}
public Element toXML(org.w3c.dom.Document doc) {
Element ne = doc.createElement("NamedEntity");
ne.setAttribute("type", type);
ne.setTextContent(text);
return ne;
}
}

View file

@ -4,7 +4,9 @@ import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.StringJoiner;
/**
* Datei implementiert von Valentin
*/
public class NlpInfo {
List<Token> tokens;
List<Sentence> sentences;

View file

@ -1,8 +1,13 @@
package org.texttechnologylab.project.gruppe_05_1.domain.nlp;
import org.w3c.dom.Element;
import java.util.Objects;
import java.util.StringJoiner;
/**
* Datei implementiert von Valentin
* toXML implementiert von Jonas
*/
public class Pos {
String posValue; // ART, NN...
String coarseValue; // PROPN...
@ -116,4 +121,14 @@ public class Pos {
MyPos{posValue='ADJD', coarseValue='ADV', begin=127, end=130, coveredText='gut'},
MyPos{posValue='$.', coarseValue='PUNCT', begin=130, end=131, coveredText='.'}],
*/
public Element toXML(org.w3c.dom.Document doc) {
Element posElement = doc.createElement("pos");
posElement.setAttribute("posValue", posValue);
posElement.setAttribute("coarseValue", coarseValue);
posElement.setAttribute("begin", String.valueOf(begin));
posElement.setAttribute("end", String.valueOf(end));
posElement.setTextContent(coveredText);
return posElement;
}
}

View file

@ -2,10 +2,10 @@ package org.texttechnologylab.project.gruppe_05_1.domain.nlp;
import java.util.Objects;
import java.util.StringJoiner;
/**
* Datei implementiert von Valentin
*/
public class Sentence {
// int begin; // TODO: momentan nicht in MongoDB
// int end; // TODO: momentan nicht in MongoDB
String text;
public Sentence() {

View file

@ -1,8 +1,17 @@
package org.texttechnologylab.project.gruppe_05_1.domain.nlp;
import org.bson.Document;
import org.texttechnologylab.project.gruppe_05_1.database.MongoDBHandler;
import org.w3c.dom.Element;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.StringJoiner;
/**
* Datei implementiert von Valentin
* toXML-Methode implementiert von Jonas
*/
public class Sentiment {
int begin;
int end;
@ -94,4 +103,36 @@ public class Sentiment {
.add("positive=" + positive)
.toString();
}
/**
*
* @param sentimentDocs Die Sentiment-Dokumente (Speech --> analysisResults --> sentiment) aus der MongoDB lesen.
* Das erste Dokument ist für die gesamte Rede, Sentiments 1..n entsprechen Sentences 0..n-1
* @return
*/
public static List<Sentiment> readSentimentsFromMongo(List<Document> sentimentDocs) {
List<Sentiment> sentiments = new ArrayList<>();
for (Document doc : sentimentDocs) {
sentiments.add(new Sentiment(
doc.getInteger("begin"),
doc.getInteger("end"),
MongoDBHandler.getFieldAsDouble(doc, "score"),
MongoDBHandler.getFieldAsDouble(doc, "neg"),
MongoDBHandler.getFieldAsDouble(doc, "neu"),
MongoDBHandler.getFieldAsDouble(doc, "pos")
));
}
return sentiments;
}
public org.w3c.dom.Element toXML(org.w3c.dom.Document doc) {
Element sentimentElement = doc.createElement("sentiment");
sentimentElement.setAttribute("begin", String.valueOf(this.begin));
sentimentElement.setAttribute("end", String.valueOf(this.end));
sentimentElement.setAttribute("sentiment", String.valueOf(this.sentiment));
sentimentElement.setAttribute("negative", String.valueOf(this.negative));
sentimentElement.setAttribute("neutral", String.valueOf(this.neutral));
sentimentElement.setAttribute("positive", String.valueOf(this.positive));
return sentimentElement;
}
}

View file

@ -3,7 +3,9 @@ package org.texttechnologylab.project.gruppe_05_1.domain.nlp;
import org.bson.Document;
import java.util.*;
/**
* Datei implementiert von Valentin
*/
public class Token {
String text;
String pos;
@ -67,6 +69,7 @@ public class Token {
* Die Token-Dokumente (Speech --> analysisResults --> token) aus der MongoDB lesen
* @param tokenDocs Eine Liste von Mongo-Dokumenten
* @return Eine Liste der Token
* Implementiert von Leon
*/
public static List<Token> readTokensFromMongo(List<Document> tokenDocs) {
List<Token> tokens = new ArrayList<>();
@ -83,6 +86,7 @@ public class Token {
* Zählt alle verschiedenen POS Vorkommen auf
* @param tokenList
* @return Jede POS art mit ihrer Anzahl an Vorkommen
* Implementiert von Leon
*/
public static Map<String, Integer> countPOS(List<Token> tokenList) {
Map<String, Integer> posCounts = new HashMap<>();

View file

@ -1,10 +1,14 @@
package org.texttechnologylab.project.gruppe_05_1.domain.nlp;
import org.bson.Document;
import org.w3c.dom.Element;
import java.util.*;
import java.util.stream.Collectors;
/**
* Datei implementiert von Valentin
* toXML-Methode implementiert von Jonas
*/
public class Topic {
String topic;
Double score;
@ -104,4 +108,12 @@ public class Topic {
return condensedTopicInfo;
}
public Element toXML(org.w3c.dom.Document doc) {
Element topicElement = doc.createElement("topic");
topicElement.setAttribute("topic", this.getTopic());
topicElement.setAttribute("score", this.getScore().toString());
topicElement.setTextContent(this.getText());
return topicElement;
}
}

View file

@ -3,7 +3,9 @@ package org.texttechnologylab.project.gruppe_05_1.domain.nlp;
import java.util.List;
import java.util.Objects;
import java.util.StringJoiner;
/**
* Datei implementiert von Valentin
*/
public class VideoInformation {
List<AudioToken> audioTokens;

View file

@ -3,6 +3,7 @@ package org.texttechnologylab.project.gruppe_05_1.domain.nlp.html;
import java.util.Objects;
/**
* Datei implementiert von Valentin
* Diese Klasse ordnet das entspreche Sentiment zu einem Satz zu.
* Sie ist ein Datencontainer für die Darstellung über FreeMarker
*/

View file

@ -3,12 +3,14 @@ package org.texttechnologylab.project.gruppe_05_1.domain.speaker;
import java.time.LocalDate;
import java.util.Objects;
import java.util.StringJoiner;
/**
* Datei implementiert von Valentin
*/
public class Membership {
String role;
String member; // TODO: wozu? Dr. Abrami hat hier die ID des Parlamentariers gespeichert
LocalDate begin; // TODO: in Mongo eigentlich Date?
LocalDate end; // TODO: in Mongo eigentlich Date?
String member;
LocalDate begin;
LocalDate end;
String label;
Integer wp;

View file

@ -4,7 +4,9 @@ import java.time.LocalDate;
import java.util.List;
import java.util.Objects;
import java.util.StringJoiner;
/**
* Datei implementiert von Valentin
*/
public abstract class Speaker {
String id;
String name;
@ -23,9 +25,6 @@ public abstract class Speaker {
String primaryFoto;
List<Membership> memberships;
// TODO: List<Speech> speeches;
// TODO: Photos
public String getId() {
return id;

View file

@ -2,7 +2,9 @@ package org.texttechnologylab.project.gruppe_05_1.domain.speech;
import java.util.Objects;
import java.util.StringJoiner;
/**
* Datei implementiert von Valentin
*/
public abstract class Agenda {
String index;
String id;

View file

@ -4,7 +4,9 @@ import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.StringJoiner;
/**
* Datei implementiert von Valentin
*/
public abstract class Comment {
String id;

View file

@ -4,7 +4,9 @@ import java.time.LocalDate;
import java.time.LocalTime;
import java.util.Objects;
import java.util.StringJoiner;
/**
* Datei implementiert von Valentin
*/
public abstract class Protocol {
LocalDate date;
LocalTime startTime;

View file

@ -3,7 +3,9 @@ package org.texttechnologylab.project.gruppe_05_1.domain.speech;
import java.util.List;
import java.util.Objects;
import java.util.StringJoiner;
/**
* Datei implementiert von Valentin
*/
public abstract class Speech {
String id;
String text;

View file

@ -5,14 +5,17 @@ import java.util.Objects;
import java.util.StringJoiner;
/**
* Datei implementiert von Valentin
* Diese Klasse dient der Darstellung aller Reden eines Parlamentariers.
*/
public class SpeechMetaData {
// aus "speech" Collection
String speechKey; // z.B. "ID2011400300"
int speechId; // TODO: nötig?
int speechId;
int speakerId;
int sessionId; // TODO: nötig?
String speakerName;
String fraktion;
int sessionId;
// aus "sessions" Collection
LocalDateTime dateTime;
@ -45,6 +48,14 @@ public class SpeechMetaData {
this.speakerId = speakerId;
}
public String getSpeakerName() {return speakerName;}
public void setSpeakerName(String speakerName) {this.speakerName = speakerName;}
public String getFraktion() {return fraktion;}
public void setFraktion(String fraktion) {this.fraktion = fraktion;}
public int getSessionId() {
return sessionId;
}
@ -81,12 +92,15 @@ public class SpeechMetaData {
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof SpeechMetaData that)) return false;
return speechId == that.speechId && speakerId == that.speakerId && sessionId == that.sessionId && Objects.equals(speechKey, that.speechKey) && Objects.equals(dateTime, that.dateTime) && Objects.equals(dateTimeString, that.dateTimeString) && Objects.equals(agendaTitle, that.agendaTitle);
return speechId == that.speechId && speakerId == that.speakerId && sessionId == that.sessionId
&& Objects.equals(speechKey, that.speechKey) && Objects.equals(speakerName, that.speakerName)
&& Objects.equals(fraktion, that.fraktion) && Objects.equals(dateTime, that.dateTime)
&& Objects.equals(dateTimeString, that.dateTimeString) && Objects.equals(agendaTitle, that.agendaTitle);
}
@Override
public int hashCode() {
return Objects.hash(speechKey, speechId, speakerId, sessionId, dateTime, dateTimeString, agendaTitle);
return Objects.hash(speechKey, speechId, speakerId, speakerName, fraktion, sessionId, dateTime, dateTimeString, agendaTitle);
}
@Override
@ -95,6 +109,8 @@ public class SpeechMetaData {
.add("speechKey='" + speechKey + "'")
.add("speechId=" + speechId)
.add("speakerId=" + speakerId)
.add("speakerName='" + speakerName + "'")
.add("fraktion='" + fraktion + "'")
.add("sessionId=" + sessionId)
.add("dateTime=" + dateTime)
.add("dateTimeString='" + dateTimeString + "'")

View file

@ -2,7 +2,9 @@ package org.texttechnologylab.project.gruppe_05_1.domain.speech;
import java.util.Objects;
import java.util.StringJoiner;
/**
* Datei implementiert von Valentin
*/
public abstract class TextContent {
String id;
String speakerId;

View file

@ -0,0 +1,373 @@
package org.texttechnologylab.project.gruppe_05_1.export;
import org.texttechnologylab.project.gruppe_05_1.database.domainimpl.mdb.Speaker_MongoDB_Impl;
import org.texttechnologylab.project.gruppe_05_1.domain.nlp.Sentiment;
import org.texttechnologylab.project.gruppe_05_1.domain.nlp.Topic;
import org.texttechnologylab.project.gruppe_05_1.util.Logger;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Speech;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.Arrays;
import java.util.Base64;
import java.util.List;
import java.util.Map;
import static org.texttechnologylab.project.gruppe_05_1.Main.RESOURCES_DIR;
import static org.texttechnologylab.project.gruppe_05_1.Main.TEMP_EXPORT_DIR;
import static org.texttechnologylab.project.gruppe_05_1.database.MongoPprUtils.*;
/**
* Utility Klasse für die Erstellung von TeX-Dateien und Konvertierung in Base64-kodierte PDF-Dateien.
* Implementiert von Jonas
*/
public class TeXUtil {
private static final String PREAMBLE = readFileContentFromTeXDir();
private static final String BEGIN_DOCUMENT = "\\begin{document}\n";
private static final String END_DOCUMENT = "\\end{document}";
private static final String TABLEOFCONTENTS = "\\tableofcontents\n\\newpage\n";
private static final String NEWPAGE = "\\newpage\n";
private static String readFileContentFromTeXDir() {
try {
return Files.readString(new File(RESOURCES_DIR, "tex/preamble.tex").toPath());
} catch (IOException e) {
Logger.error("Failed to read file content from tex directory.");
Logger.error(e.getMessage());
Logger.debug(Arrays.toString(e.getStackTrace()));
return "";
}
}
public static String getSpeechToTexComponent(String speechId) {
createTempDir();
Speech speech = getSpeechByKey(speechId);
Speaker_MongoDB_Impl speaker = getSpeakerById(String.valueOf(speech.getSpeakerId()));
StringBuilder tex = new StringBuilder();
tex.append(speech.toTeX());
Map<String, Double> topics = Topic.condenseTopicInformation(getHtmlSpeechByKey(speechId).getNlp().getTopics());
return tex.toString().replace("$$SPEAKERINFO$$", speaker.toTeX())
.replace("$$NLPMETADATA$$",
generateChartView(generateBubbleChartLatex(topics),
generateBarChartLatex(getPOSInformationCardinalitiesForSpeechById(speechId)),
generateRadarChartLatex(getHtmlSpeechByKey(speechId).getNlp().getSentiments()), ""));
}
public static String getSpeechToTexComponent(Speech speech) {
createTempDir();
Speaker_MongoDB_Impl speaker = getSpeakerById(String.valueOf(speech.getSpeakerId()));
StringBuilder tex = new StringBuilder();
tex.append(speech.toTeX());
Map<String, Double> topics = null;
Logger.pink(String.valueOf(speech.getSpeechKey()));
try {
topics = Topic.condenseTopicInformation(getHtmlSpeechByKey(speech.getSpeechKey()).getNlp().getTopics());
} catch (Exception e) {
topics = Map.of();
}
return tex.toString().replace("$$SPEAKERINFO$$", speaker.toTeX()).replace("$$NLPMETADATA$$",
generateChartView(generateBubbleChartLatex(topics),
generateBarChartLatex(getPOSInformationCardinalitiesForSpeechById(speech.getSpeechKey())),
generateRadarChartLatex(getHtmlSpeechByKey(speech.getSpeechKey()).getNlp().getSentiments()), ""));
}
public static String getExportedSpeechBase64StringBySpeechId(String speechId) throws IOException, InterruptedException {
// Read preamble from resources directory tex/preamble.tex
return convertTexToBase64PDF(PREAMBLE.replace("$$EXPORTCATEGORY$$", "Speech " + speechId) + BEGIN_DOCUMENT + getSpeechToTexComponent(speechId) + END_DOCUMENT);
}
public static String getBulkExportedSpeechBase64StringFromSpeakerById(String speakerId) throws IOException, InterruptedException {
// Fetch all speechIDs from the speaker
List<Speech> speechIds = getSpeechesBySpeakerId(speakerId);
StringBuilder tex = new StringBuilder();
tex.append(PREAMBLE.replace("$$EXPORTCATEGORY$$", "Speaker ID" + speakerId));
tex.append(BEGIN_DOCUMENT);
tex.append(TABLEOFCONTENTS);
for (Speech speech : speechIds) {
tex.append(getSpeechToTexComponent(speech));
tex.append(NEWPAGE);
}
tex.append(END_DOCUMENT);
return convertTexToBase64PDF(tex.toString());
}
public static String getBulkExportedAllSpeechesBase64String() throws IOException, InterruptedException {
// Fetch all speechIDs from the speaker
List<Speech> speechIds = getAllSpeeches();
StringBuilder tex = new StringBuilder();
tex.append(PREAMBLE.replace("$$EXPORTCATEGORY$$", "all speeches"));
tex.append(BEGIN_DOCUMENT);
tex.append(TABLEOFCONTENTS);
for (Speech speech : speechIds) {
tex.append(getSpeechToTexComponent(speech));
tex.append(NEWPAGE);
}
tex.append(END_DOCUMENT);
return convertTexToBase64PDF(tex.toString());
}
public static String getBulkExportedAllSpeechesWithTopicBase64String(String topic) throws IOException, InterruptedException {
// Fetch all speechIDs from the speaker
List<Speech> speechIds = getAllSpeechesWithTopic(topic);
StringBuilder tex = new StringBuilder();
tex.append(PREAMBLE.replace("$$EXPORTCATEGORY$$", "Speeches with topic " + topic));
tex.append(BEGIN_DOCUMENT);
tex.append(TABLEOFCONTENTS);
for (Speech speech : speechIds) {
tex.append(getSpeechToTexComponent(speech));
tex.append(NEWPAGE);
}
tex.append(END_DOCUMENT);
return convertTexToBase64PDF(tex.toString());
}
public static String getBulkExportedSpeechesBase64String(List<String> speechIds) throws IOException, InterruptedException {
StringBuilder tex = new StringBuilder();
tex.append(PREAMBLE.replace("$$EXPORTCATEGORY$$", "selected speeches"));
tex.append(BEGIN_DOCUMENT);
tex.append(TABLEOFCONTENTS);
for (String speechId : speechIds) {
tex.append(getSpeechToTexComponent(speechId));
tex.append(NEWPAGE);
}
tex.append(END_DOCUMENT);
return convertTexToBase64PDF(tex.toString());
}
public static String convertTexToBase64PDF(String tex) throws IOException, InterruptedException {
// Create a temporary directory
File tempDir = new File(TEMP_EXPORT_DIR);
createTempDir();
// Format tex string to UTF-8
tex = new String(tex.getBytes("UTF-8"));
tex = tex.replaceAll("#", "\\\\#"); // Replace all # with \#
tex = tex.replaceAll("[^\\x00-\\x7F]", ""); // Replace all non-ASCII characters
// Local datetime stamp
String dateTime = DateTimeFormatter.ofPattern("yyyy-MM-dd_HH-mm-ss").format(LocalDateTime.now());
// Write the LaTeX content to a temporary .tex file
File texFile = new File(tempDir, "speech_export" + dateTime + ".tex");
try (BufferedWriter writer = new BufferedWriter(new FileWriter(texFile, StandardCharsets.UTF_8))) {
writer.write(tex);
}
// Run pdflatex to generate the .pdf file
//String command = "pdflatex -interaction=nonstopmode -output-directory=" + tempDir.getAbsolutePath() + " " + texFile.getAbsolutePath();
// using latexmk instead of pdflatex to fix TOC not generating properly
String command = "latexmk -pdf -interaction=nonstopmode -outdir=" + tempDir.getAbsolutePath() + " " + texFile.getAbsolutePath();
Process process = Runtime.getRuntime().exec(command);
BufferedReader stdInput = new BufferedReader(new InputStreamReader(process.getInputStream()));
Logger.debug("Standard Output:");
String s;
while ((s = stdInput.readLine()) != null) {
Logger.debug(s);
}
Logger.debug("LaTeX Process ended with exit code " + process.waitFor());
// Path to the generated PDF file
File pdfFile = new File(tempDir, "speech_export" + dateTime + ".pdf");
// Check if the PDF was created
if (!pdfFile.exists()) {
throw new IOException("PDF generation failed.");
}
// Read the PDF file into a byte array
byte[] pdfBytes = Files.readAllBytes(pdfFile.toPath());
// Convert the byte array to a Base64 encoded string
return Base64.getEncoder().encodeToString(pdfBytes);
}
public static void createTempDir() {
File tempDir = new File(TEMP_EXPORT_DIR);
if (!tempDir.exists()) {
tempDir.mkdirs();
}
}
public static void deleteTeXTempDirContents() throws IOException {
// Walk through the directory
Files.walkFileTree(Path.of(TEMP_EXPORT_DIR), new SimpleFileVisitor<Path>() {
@Override
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
// Delete file
Files.delete(file);
return FileVisitResult.CONTINUE;
}
@Override
public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException {
// Delete the directory after its contents are deleted
Files.delete(dir);
return FileVisitResult.CONTINUE;
}
});
}
public static void tryDeleteTeXTempDirContents() {
try {
deleteTeXTempDirContents();
} catch (IOException e) {
Logger.error("Failed to delete temporary folder.");
Logger.error(e.getMessage());
Logger.debug(Arrays.toString(e.getStackTrace()));
}
}
public static boolean isTeXSdkInstalled() {
try {
Process process = Runtime.getRuntime().exec("pdflatex --version");
BufferedReader stdInput = new BufferedReader(new InputStreamReader(process.getInputStream()));
String s;
while ((s = stdInput.readLine()) != null) {
if (s.contains("pdfTeX")) {
return true;
}
}
} catch (IOException ignored) {}
return false;
}
public static String generateChartView(String bubbleChartTeX, String barChartTeX, String radarChartTeX, String sunburstCharTeX) {
StringBuilder tex = new StringBuilder();
// 2x2 minipage layout
tex.append("\\begin{minipage}{1\\textwidth}\n")
.append(barChartTeX)
.append("\\end{minipage}\n")
.append("\\begin{minipage}{0.4\\textwidth}\n")
.append(bubbleChartTeX)
.append("\\end{minipage}\n")
.append("\\begin{minipage}{0.3\\textwidth}\n")
.append(radarChartTeX)
.append("\\end{minipage}\n")
.append("\\begin{minipage}{0.3\\textwidth}\n")
.append(sunburstCharTeX)
.append("\\end{minipage}\n");
return tex.toString();
}
public static String generateBubbleChartLatex(Map<String, Double> bubbleData) {
StringBuilder tex = new StringBuilder();
tex.append("Topics Information\\\\\n");
// draw generic table with String | Double
tex.append("\\begin{tabular}{|c|c|}\n")
.append("\\hline\n")
.append("Category & Value \\\\ \\hline\n");
for (Map.Entry<String, Double> entry : bubbleData.entrySet()) {
tex.append(entry.getKey()).append(" & ").append(entry.getValue()).append(" \\\\ \\hline\n");
}
tex.append("\\end{tabular}\n\n");
return tex.toString();
}
public static String generateBarChartLatex(Map<String, Integer> barData) {
StringBuilder tex = new StringBuilder();
tex.append("POS Information\\\\\n");
tex.append("\n" +
"\\scalebox{0.25}{" + // the only way to reliably show most of the POS is by scaling it down this far
"\\begin{tikzpicture}\n" +
"\n" +
"\\begin{axis}[\n" +
" ybar,\n" +
" width=4\\textwidth,\n" +
" height=0.5\\textwidth,\n");
StringBuilder graphData = new StringBuilder();
StringBuilder xCords = new StringBuilder();
xCords.append("{");
for (Map.Entry<String, Integer> entry : barData.entrySet()) {
xCords.append(entry.getKey().replace(",", "\\,")).append(", ");
graphData.append("\t(").append(entry.getKey().replace(",", "\\,")).append(", ").append(entry.getValue()).append(")\n");
}
xCords.append("}");
String xCordsString = xCords.toString().replace("$", "\\$");
tex.append(" symbolic x coords=").append(xCordsString).append(",\n" +
" xtick=data,\n" +
" ylabel={Value},\n" +
" xlabel={Category},\n" +
" ymin=0, ymax=800\n" +
" ]" +
"\\addplot coordinates {\n");
tex.append(graphData.toString().replace("$", "\\$"));
tex.append("};\n" +
"\\end{axis}\n" +
"\n" +
"\\end{tikzpicture}" +
"}");
return tex.toString();
}
public static String generateRadarChartLatex(List<Sentiment> sentimets) {
StringBuilder tex = new StringBuilder();
/*tex.append("\\begin{tikzpicture}\n" +
" \\coordinate (origin) at (0, 0);\n" +
"\n" +
" % Define the axes (3 axes) with unit length (1)\n" +
" \\foreach[count=\\i] \\dim in {Negative, Neutral, Positive}{\n" +
" \\coordinate (\\i) at (\\i * 360 / 3: 1); % Set radius to 1 for unit length axes\n" +
" \\node at (\\i * 360 / 3: 1.1) {\\huge\\dim}; % Axis labels (slightly outside)\n" +
" \\draw (origin) -- (\\i); % Draw the axes\n" +
" }");
for (Sentiment sentiment : sentimets) {
tex.append("\\foreach \\i/\\value in {1/")
.append(sentiment.getNegative())
.append(", 2/")
.append(sentiment.getNeutral())
.append(", 3/")
.append(sentiment.getPositive())
.append("}{\n")
.append(" \\coordinate (point-\\i) at (\\i * 360 / 3: \\value);\n")
.append(" }\n");
}
tex.append("\\draw [fill=blue!20, opacity=.7] (point-1) -- (point-2) -- (point-3) -- cycle;\n" +
"\\end{tikzpicture}");*/
return tex.toString();
}
}

View file

@ -0,0 +1,191 @@
package org.texttechnologylab.project.gruppe_05_1.export;
import org.texttechnologylab.project.gruppe_05_1.database.domainimpl.mdb.Speaker_MongoDB_Impl;
import org.texttechnologylab.project.gruppe_05_1.domain.html.HtmlSpeech;
import org.texttechnologylab.project.gruppe_05_1.domain.nlp.*;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Speech;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.StringWriter;
import java.util.List;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import static org.texttechnologylab.project.gruppe_05_1.database.MongoPprUtils.*;
/**
* Utility Klasse für die Erstellung von XML-Dateien und Konvertierung in Base64-kodierte PDF-Dateien.
* Implementiert von Jonas
*/
public class XMLUtil {
public static String documentToString(Document doc) {
try {
TransformerFactory transformerFactory = TransformerFactory.newInstance();
Transformer transformer = transformerFactory.newTransformer();
DOMSource source = new DOMSource(doc);
// Writer to store the XML string
StringWriter writer = new StringWriter();
StreamResult result = new StreamResult(writer);
// Perform transformation
transformer.transform(source, result);
return writer.toString();
} catch (Exception e) {
e.printStackTrace();
return null;
}
}
public static Document createXmlDocument() throws ParserConfigurationException {
// create new doc
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
Document doc = factory.newDocumentBuilder().newDocument();
Element speechesElement = doc.createElement("speeches");
doc.appendChild(speechesElement);
return doc;
}
public static void addNlpData(Document doc, HtmlSpeech nlpData) {
Element nlpDataElement = doc.createElement("nlp");
Element sentimentsElement = doc.createElement("sentiments");
nlpDataElement.appendChild(sentimentsElement);
List<Sentiment> sentiments = nlpData.getNlp().getSentiments();
for (Sentiment sentiment: sentiments) {
sentimentsElement.appendChild(sentiment.toXML(doc));
}
Element topicsElement = doc.createElement("topics");
nlpDataElement.appendChild(topicsElement);
List<Topic> topics = nlpData.getNlp().getTopics();
if (topics != null) {
for (Topic topic: topics) {
topicsElement.appendChild(topic.toXML(doc));
}
}
Element namedEntitiesElement = doc.createElement("NamedEntities");
nlpDataElement.appendChild(namedEntitiesElement);
List<NamedEntity> namedEntities = nlpData.getNlp().getNamedEntities();
if (namedEntities != null) {
for (NamedEntity namedEntity: namedEntities) {
namedEntitiesElement.appendChild(namedEntity.toXML(doc));
}
}
Element posElement = doc.createElement("pos");
nlpDataElement.appendChild(posElement);
List<Pos> posElements = nlpData.getNlp().getPosList();
if (posElements != null) {
for (Pos pos: posElements) {
posElement.appendChild(pos.toXML(doc));
}
}
doc.getFirstChild().appendChild(nlpDataElement);
}
public static void addSpeechById(Document doc, String speechId) {
// get speeches element
Element speechesElement = (Element) doc.getElementsByTagName("speeches").item(0);
// create new speech element
Element speechElement = doc.createElement("speech");
speechesElement.appendChild(speechElement);
Speech speech = getSpeechByKey(speechId);
Speaker_MongoDB_Impl speaker = getSpeakerById(String.valueOf(speech.getSpeakerId()));
HtmlSpeech htmlSpeech = getHtmlSpeechByKey(speechId);
addNlpData(doc, htmlSpeech);
speechElement.appendChild(speaker.toXML(doc));
speechElement.appendChild(speech.toXML(doc));
}
public static void addSpeechBySpeech(Document doc, Speech speech) {
// get speeches element
Element speechesElement = (Element) doc.getElementsByTagName("speeches").item(0);
// create new speech element
Element speechElement = doc.createElement("speech");
speechesElement.appendChild(speechElement);
Speaker_MongoDB_Impl speaker = getSpeakerById(String.valueOf(speech.getSpeakerId()));
HtmlSpeech htmlSpeech = getHtmlSpeechByKey(speech.getSpeechKey());
addNlpData(doc, htmlSpeech);
speechElement.appendChild(speaker.toXML(doc));
speechElement.appendChild(speech.toXML(doc));
}
public static String getExportedSpeechById(String speechId) throws ParserConfigurationException {
Document doc = createXmlDocument();
addSpeechById(doc, speechId);
return documentToString(doc);
}
public static String getExportedSpeechesFromSpeakerById(String speakerId) throws ParserConfigurationException {
Document doc = createXmlDocument();
List<Speech> speeches = getSpeechesBySpeakerId(speakerId);
for (Speech speech : speeches) {
addSpeechBySpeech(doc, speech);
}
return documentToString(doc);
}
public static String getExportedAllSpeeches() throws ParserConfigurationException {
Document doc = createXmlDocument();
List<Speech> speeches = getAllSpeeches();
for (Speech speech : speeches) {
addSpeechBySpeech(doc, speech);
}
return documentToString(doc);
}
public static String getExportedSpeechesWhithTopic(String topic) throws ParserConfigurationException {
Document doc = createXmlDocument();
List<Speech> speeches = getAllSpeechesWithTopic(topic);
for (Speech speech : speeches) {
addSpeechBySpeech(doc, speech);
}
return documentToString(doc);
}
public static String getExportedSpeechesbyIds(List<String> speechIds) throws ParserConfigurationException {
Document doc = createXmlDocument();
for (String speechId : speechIds) {
addSpeechById(doc, speechId);
}
return documentToString(doc);
}
}

View file

@ -2,9 +2,7 @@ package org.texttechnologylab.project.gruppe_05_1.nlp;
import com.mongodb.client.model.Filters;
import com.mongodb.client.model.UpdateOneModel;
import com.mongodb.client.model.Updates;
import com.mongodb.client.model.WriteModel;
import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS;
import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
@ -16,9 +14,6 @@ import org.apache.uima.fit.factory.JCasFactory;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.resource.metadata.TypeSystemDescription;
import org.apache.uima.util.CasCreationUtils;
import org.apache.uima.util.XMLInputSource;
import org.bson.Document;
import org.bson.conversions.Bson;
import org.dkpro.core.io.xmi.XmiWriter;
@ -30,9 +25,7 @@ import org.texttechnologylab.DockerUnifiedUIMAInterface.driver.DUUIUIMADriver;
import org.texttechnologylab.DockerUnifiedUIMAInterface.lua.DUUILuaContext;
import org.texttechnologylab.annotation.NamedEntity;
import org.texttechnologylab.project.gruppe_05_1.database.MongoDBHandler;
import org.texttechnologylab.project.gruppe_05_1.database.domainimp.speeches.Speech_MongoDB_Impl;
import org.texttechnologylab.project.gruppe_05_1.util.Logger;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Content;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Speech;
import org.texttechnologylab.uima.type.Sentiment;
import org.xml.sax.SAXException;
@ -44,9 +37,6 @@ import java.nio.file.Files;
import java.nio.file.Path;
import java.util.*;
import java.util.stream.Collectors;
import java.util.zip.GZIPInputStream;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription;
@ -58,9 +48,10 @@ public class NlpUtils {
private static final String TYPE_SYSTEM_DESCRIPTOR_PATH = "/speeches/TypeSystem.xml.gz";
private static final int MAX_FEATURE_LENGTH = 10000;
/**
* implementiert von Valentin
*/
public static void createNlpData() {
// Source: Dr. Abrami - Beispiel TODO
duuiInit();
runDockerDriver();
try {
@ -78,6 +69,9 @@ public class NlpUtils {
createSentimentInfo();
}
/**
* implementiert von Valentin
*/
private static void duuiInit() {
DUUILuaContext ctx;
@ -126,6 +120,7 @@ public class NlpUtils {
}
/**
* implementiert von Valentin
* Initialization of a sample CAS document
* @return JCas object
*/
@ -153,7 +148,9 @@ public class NlpUtils {
return pCas;
}
/**
* implementiert von Valentin
*/
public static void runDockerDriver() {
// reset existing pipeline-components
@ -198,7 +195,9 @@ public class NlpUtils {
Logger.info(JCasUtil.selectCovered(org.hucompute.textimager.uima.type.Sentiment.class, sentence).toString());
});
}
/**
* implementiert von Valentin
*/
private static void casInit() {
JCas jcas;
try {
@ -252,6 +251,7 @@ public class NlpUtils {
/**
* implementiert von Valentin
* Execution of video processing via DUUI using the RemoteDriver
* @throws Exception in case of an error
*/
@ -325,6 +325,15 @@ public class NlpUtils {
}
/**
* Führt den Remote-NLP-Driver auf unprozessierten Reden aus und aktualisiert die Datenbank.
* <p>
* Es werden alle Reden ohne "analysisResults" abgerufen, mit den Remote-Komponenten (spaCy und Vader)
* verarbeitet, die NLP-Ergebnisse extrahiert und per Bulk-Update in der Datenbank gespeichert.
* </p>
*
* @throws Exception falls ein Fehler auftritt.
*/
public static void runRemoteDriver() throws Exception {
DUUILuaContext luaContext = new DUUILuaContext().withJsonLibrary();
pComposer = new DUUIComposer()
@ -360,8 +369,8 @@ public class NlpUtils {
bulkOperations.add(new UpdateOneModel<>(updateFilter, update));
}
if (!bulkOperations.isEmpty()) {
System.out.println("Processing of " + bulkOperations.size() + " documents finished");
System.out.println("uploading...");
Logger.debug("Processing of " + bulkOperations.size() + " documents finished");
Logger.debug("uploading...");
mongoDBHandler.bulkWriteNlpData(bulkOperations);
Logger.debug("Bulk write completed for " + bulkOperations.size() + " documents.");
mongoDBHandler.close();

View file

@ -6,7 +6,6 @@ import com.mongodb.client.model.WriteModel;
import org.apache.uima.fit.util.JCasUtil;
import org.bson.Document;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
@ -24,7 +23,12 @@ import org.texttechnologylab.project.gruppe_05_1.util.Logger;
import static org.texttechnologylab.project.gruppe_05_1.Main.JCAS_SPEECHES_TYPESYSTEM_DIR;
/**
* Extrahiert NLPAnalyseergebnisse aus komprimierten XMIDateien und lädt sie batchweise in MongoDB hoch.
* Liest .xmi.gz Dateien aus einem ZIPArchiv, erstellt für jede Rede ein "analysisResults" Dokument und führt BulkWrites durch.
* Implementiert von Leon
* Modifiziert von Henry
*/
public class XmiExtractor {
private final List<WriteModel<Document>> bulkOperations = Collections.synchronizedList(new ArrayList<>());
@ -36,6 +40,13 @@ public class XmiExtractor {
mongoDBHandler = new MongoDBHandler();
}
/**
* Liest alle .xmi.gz Dateien aus dem ZIPArchiv "speeches/20.zip", extrahiert deren NLPDaten
* und lädt sie in Form von BATCH_SIZE Dokumenten in MongoDB hoch.
* @throws IOException falls die ZIPDatei nicht gefunden oder ein Lese-/Schreibfehler auftritt
* Implementiert von Leon
* Modifiziert von Henry
*/
public void extractAndUploadXmiData() throws IOException {
InputStream resourceStream = getClass().getClassLoader().getResourceAsStream("speeches/20.zip");
if (resourceStream == null) {
@ -85,6 +96,13 @@ public class XmiExtractor {
mongoDBHandler.close();
}
/**
* Liest ein komprimiertes XMI ein und erstellt BulkUpdateOperationen für MongoDB.
* @param inputStream komprimierter XMIInputStream
* @param filename Name der Datei im ZIPArchiv (für speechKey)
* Implementiert von Leon
* Modifiziert von Henry
*/
private void processXmiGzStream(InputStream inputStream, String filename) {
JCas jCas = null;
try (GZIPInputStream gis = new GZIPInputStream(inputStream)) {
@ -181,11 +199,21 @@ public class XmiExtractor {
}
}
/**
* Extrahiert aus dem Dateinamen (z.B. "20/ABC123.xmi.gz") den speechKey.
* @param filename Name der Datei innerhalb des ZIPArchivs
* @return speechKey oder null, wenn das Format nicht erkannt wird
* Implementiert von Leon
*/
private static String extractSpeechKeyFromFilename(String filename) {
String baseName = filename.replace(".xmi.gz", "");
return baseName.replace("20/", "");
}
/**
* Führt alle gesammelten BulkWrite-Operationen in MongoDB aus und leert den Batch.
* Implementiert von Henry
*/
private synchronized void flushBatch() {
if (!bulkOperations.isEmpty()) {
mongoDBHandler.bulkWriteNlpData(bulkOperations);

View file

@ -1,23 +1,32 @@
package org.texttechnologylab.project.gruppe_05_1.rest;
import gnu.trove.impl.sync.TSynchronizedShortObjectMap;
import com.mongodb.client.MongoCollection;
import io.javalin.http.Context;
import io.javalin.openapi.*;
import org.apache.commons.collections.bag.SynchronizedSortedBag;
import org.texttechnologylab.project.gruppe_05_1.database.MongoPprUtils;
import org.texttechnologylab.project.gruppe_05_1.domain.html.HtmlSpeech;
import org.texttechnologylab.project.gruppe_05_1.domain.html.Parlamentarier;
import org.texttechnologylab.project.gruppe_05_1.domain.html.ParlamentarierDetails;
import org.texttechnologylab.project.gruppe_05_1.domain.speech.SpeechMetaData;
import org.texttechnologylab.project.gruppe_05_1.util.Logger;
import org.texttechnologylab.project.gruppe_05_1.domain.nlp.Sentiment;
import org.texttechnologylab.project.gruppe_05_1.domain.nlp.Token;
import org.texttechnologylab.project.gruppe_05_1.domain.nlp.Topic;
import org.texttechnologylab.project.gruppe_05_1.util.PPRUtils;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Speech;
import com.mongodb.client.model.Aggregates;
import com.mongodb.client.model.Accumulators;
import com.mongodb.client.model.Filters;
import com.mongodb.client.model.Projections;
import org.bson.Document;
import org.bson.conversions.Bson;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CompletableFuture;
import java.util.stream.Collectors;
/**
* Controller für die FrontendRouten.
* Größtenteils implementiert von Jonas
*/
public class FrontEndController {
@OpenApi(
summary = "Get the homepage.",
@ -33,6 +42,24 @@ public class FrontEndController {
ctx.render("home.ftl");
}
@OpenApi(
summary = "Get the about page.",
description = "Get the about",
operationId = "getAbout",
path = "/about",
methods = HttpMethod.GET,
tags = {"About"},
responses = {
@OpenApiResponse(status = "200")
})
public static void getAbout(Context ctx) {
ctx.render("about.ftl");
}
/*
TODO: getAllParlamentarier gibt es hier UND im ParlamentarierController (etwas unterschiedliche Implementierungen)
--> konsolidieren!
*/
@OpenApi(
summary = "Get alle Parlamentarier. Man kann nach Vor-, Nachname oder Partei filtern.",
description = "Listet alle Parlamentarier bzw. diejenige, welche den Filter entsprechen",
@ -47,115 +74,113 @@ public class FrontEndController {
@OpenApiResponse(status = "200", content = {@OpenApiContent(from = Parlamentarier[].class)})
})
public static void getAllParlamentarier(Context ctx) {
String filter = ctx.queryParam("filter");
Logger.info("Filter: '" + filter + "'");
List<Parlamentarier> parlamentarier = MongoPprUtils.getAllParlamentarier(filter);
List<Parlamentarier> parlamentarier = MongoPprUtils.getFilteredMembers(ctx);
PPRUtils.sortParlamentarierByName(parlamentarier);
Logger.info(parlamentarier.size() + " MdBs gefunden");
Map<String, Object> attributes = new HashMap<>();
attributes.put("parlamentarier", parlamentarier);
attributes.put("filter", filter);
attributes.put("parties", MongoPprUtils.getAllPartiesOfSpeakers());
ctx.render("parlamentarier.ftl", attributes);
}
/**
* Zeigt die Details eines Parlamentariers an:
* - persönliche Daten (Geburtsdatum, -ort, Vita, Religion etc.).
* - Mitgliederschaften, falls vorhanden
* - Fotos, falls vorhanden
* @param ctx JavaLin-Context
*/
@OpenApi(
summary = "Get the export page.",
description = "Get the export page",
operationId = "getExportPage",
path = "/export",
methods = HttpMethod.GET,
tags = {"Export"},
responses = {
@OpenApiResponse(status = "200")
})
public static void getExportPage(Context ctx) {
ctx.render("export.ftl");
}
@OpenApi(
summary = "Zeigt die Details eines Parlamentariers an",
description = "Zeigt persönliche Daten, Mitgliederschaften, Fotos",
operationId = "getParlamentarierDetails",
path = "/portfolio/{id}",
summary = "Get the about page.",
description = "Get the about page",
operationId = "getAboutPage",
path = "/about",
methods = HttpMethod.GET,
tags = {"Parlamentarier"},
pathParams = {
@OpenApiParam(name = "id", description = "id des Parlamentariers", required = true),
},
tags = {"About"},
responses = {
@OpenApiResponse(status = "200", content = {@OpenApiContent(from = ParlamentarierDetails.class)})
@OpenApiResponse(status = "200")
})
public static void getParlamentarierDetails(Context ctx) {
String id = ctx.pathParam("id");
Logger.info("getParlamentarierDetails, ID = " + id);
ParlamentarierDetails pd = MongoPprUtils.getParlamentarierDetailsByID(id);
Map<String, Object> attributes = new HashMap<>();
attributes.put("p", pd);
Long speechCount = MongoPprUtils.countSpeechesOfSpeaker(pd.getId());
attributes.put("speechesCount", speechCount);
attributes.put("pic", MongoPprUtils.getMemberPhoto(pd.getId()));
if (speechCount == 0) {
attributes.put("speechesPlaceholder", null);
} else {
attributes.put("speechesPlaceholder", new ArrayList<>());
}
ctx.render("parlamentarierDetails.ftl", attributes);
public static void getAboutPage(Context ctx) {
ctx.render("about.ftl");
}
/**
* Liste alle Reden eines Parlamentariers an
* @param ctx Javalin Context
* Aggregiert für alle Reden die NLPErgebnisse (Topics, POS, Named Entities, erste SentimentObjekte)
* und liefert die zusammengefassten Daten an die ChartsAnsicht.
* @param ctx Javalin Context zum Rendern der Seite mit den aggregierten ChartDaten
* Implementiert von Leon
*/
@OpenApi(
summary = "Liste alle Reden eines Parlamentariers an",
description = "Liste alle Reden eines Parlamentariers an",
operationId = "listSpeeches",
path = "/reden/{id}",
methods = HttpMethod.GET,
tags = {"Rede"},
pathParams = {
@OpenApiParam(name = "id", description = "id des Parlamentariers", required = true),
},
responses = {
@OpenApiResponse(status = "200", content = {@OpenApiContent(from = Speech[].class)})
})
public static void listSpeeches(Context ctx) {
String parlamentarierId = ctx.pathParam("id");
public static void getCharts(Context ctx) {
MongoCollection<Document> col = MongoPprUtils.getSpeechCollection();
ParlamentarierDetails p = MongoPprUtils.getParlamentarierDetailsByID(parlamentarierId);
List<SpeechMetaData> speechMetaDataList = MongoPprUtils.getSpeechesMetadataForSpeaker(parlamentarierId);
List<Bson> topicsPipeline = List.of(
Aggregates.match(Filters.exists("analysisResults.topics.0")),
Aggregates.unwind("$analysisResults.topics"),
Aggregates.group(
"$analysisResults.topics.topic",
Accumulators.sum("totalScore", "$analysisResults.topics.score")
)
);
List<Bson> posPipeline = List.of(
Aggregates.match(Filters.exists("analysisResults.tokens")),
Aggregates.unwind("$analysisResults.tokens"),
Aggregates.group("$analysisResults.tokens.pos", Accumulators.sum("count", 1))
);
List<Bson> nePipeline = List.of(
Aggregates.match(Filters.exists("analysisResults.namedEntities")),
Aggregates.unwind("$analysisResults.namedEntities"),
Aggregates.group("$analysisResults.namedEntities.type", Accumulators.sum("count", 1))
);
List<Bson> sentimentsPipeline = List.of(
Aggregates.match(Filters.exists("analysisResults.sentiments")),
Aggregates.project(Projections.computed("firstSentiment",
new Document("$arrayElemAt", List.of("$analysisResults.sentiments", 0)))),
Aggregates.replaceRoot("$firstSentiment")
);
Map<String, Object> attributes = new HashMap<>();
attributes.put("p", p);
attributes.put("speechesMetaDataList", speechMetaDataList);
ctx.render("showSpeechesList.ftl", attributes);
}
CompletableFuture<List<Document>> topicsF = CompletableFuture.supplyAsync(() -> col.aggregate(topicsPipeline).into(new ArrayList<>()));
CompletableFuture<List<Document>> posF = CompletableFuture.supplyAsync(() -> col.aggregate(posPipeline).into(new ArrayList<>()));
CompletableFuture<List<Document>> neF = CompletableFuture.supplyAsync(() -> col.aggregate(nePipeline).into(new ArrayList<>()));
CompletableFuture<List<Document>> sentF = CompletableFuture.supplyAsync(() -> col.aggregate(sentimentsPipeline).into(new ArrayList<>()));
/**
* Zeige eine bestimmte Rede des Parlamentariers an
* @param ctx Javalin Context
*/
@OpenApi(
summary = "Zeige eine bestimmte Rede des Parlamentariers an",
description = "Zeige eine bestimmte Rede des Parlamentariers an",
operationId = "showSpeech",
path = "/reden/{id}/{redeID}",
methods = HttpMethod.GET,
tags = {"Rede"},
pathParams = {
@OpenApiParam(name = "id", description = "id des Parlamentariers", required = true),
@OpenApiParam(name = "redeId", description = "id der Rede", required = true),
},
responses = {
@OpenApiResponse(status = "200", content = {@OpenApiContent(from = Speech.class)})
})
public static void showSpeech(Context ctx) {
String redeId = ctx.pathParam("redeId");
CompletableFuture.allOf(topicsF, posF, neF, sentF).join();
Map<String, Object> attributes = new HashMap<>();
List<Topic> aggregatedTopics = topicsF.join().stream()
.map(d -> new Topic(d.getString("_id"), d.getDouble("totalScore"), null))
.collect(Collectors.toList());
HtmlSpeech speech = MongoPprUtils.getSpeechByKey(redeId);
attributes.put("s", speech);
List<Token> aggregatedPOS = posF.join().stream()
.map(d -> new Token(d.getString("_id"), String.valueOf(d.getInteger("count")), ""))
.collect(Collectors.toList());
ctx.render("speech.ftl", attributes);
Map<String, Map<String, Integer>> aggregatedNE = new HashMap<>();
neF.join().forEach(d -> {
List<Document> entities = d.getList("entities", Document.class);
Map<String, Integer> typeMap = (entities == null)
? new HashMap<>()
: entities.stream()
.collect(Collectors.toMap(
e -> e.getString("text"),
e -> e.getInteger("count")
));
aggregatedNE.put(d.getString("_id"), typeMap);
});
List<Sentiment> aggregatedSentiments = Sentiment.readSentimentsFromMongo(sentF.join());
ctx.render("charts.ftl", Map.of(
"aggregatedTopics", aggregatedTopics,
"aggregatedPOS", aggregatedPOS,
"aggregatedNE", aggregatedNE,
"aggregatedSentiments", aggregatedSentiments
));
}
}

View file

@ -9,6 +9,7 @@ import java.io.InputStream;
import java.util.Properties;
/**
* Datei implementiert von Valentin
* Diese Klasse dient der Konfiguruerung von Javalin
*/
public class JavalinConfig extends Properties {

View file

@ -15,6 +15,7 @@ import java.util.List;
import java.util.Map;
/**
* Datei implementiert von Valentin
* Dieser Kontroller zeigt Informationen um den Parlamentariern:
* 1. Die Einstiegsseite: eine Filter-fähige Auflistung der Parlamentariern.
* Angezeigt werden Vor- und Nachname, Partei sowie ID.
@ -109,30 +110,4 @@ public class ParlamentarierController {
ctx.render("parlamentarierDetails.ftl", attributes);
}
/**
* Lösche alle Abgeordnete.
* @param ctx JavaLin-Context
*/
@OpenApi(
summary = "Lösche alle Parlamentarier",
description = "Lösche alle Parlamentarier aus der Datenbank",
operationId = "deleteAllParlamentarier",
path = "/deleteParlamentarier",
methods = HttpMethod.DELETE,
tags = {"Parlamentarier"},
responses = {
@OpenApiResponse(status = "204", content = {@OpenApiContent(from = Parlamentarier[].class)})
})
public static void deleteAllParlamentarier(Context ctx) {
MongoPprUtils.truncateSpeakerCollection();
List<Parlamentarier> parlamentarier = MongoPprUtils.getAllParlamentarier("");
Map<String, Object> attributes = new HashMap<>();
attributes.put("parlamentarier", parlamentarier);
attributes.put("filter", "filter");
ctx.render("parlamentarier.ftl", attributes);
}
}

View file

@ -15,6 +15,9 @@ import java.io.IOException;
import static org.texttechnologylab.project.gruppe_05_1.Main.JAVALIN_STATIC_FILES_DIR;
import static org.texttechnologylab.project.gruppe_05_1.Main.JAVALIN_TEMPLATE_DIR;
/**
* Datei implementiert von Valentin
*/
public class RESTHandler {
public void startJavalin() {
@ -58,11 +61,32 @@ public class RESTHandler {
// Parlamentarier
app.get("/", FrontEndController::getHomepage);
app.get("/members", FrontEndController::getAllParlamentarier);
app.get("/portfolio/{id}", FrontEndController::getParlamentarierDetails);
app.delete("/deleteParlamentarier", ParlamentarierController::deleteAllParlamentarier);
app.get("/portfolio/{id}", ParlamentarierController::getParlamentarierDetails);
app.get("/export", FrontEndController::getExportPage);
app.get("/about", FrontEndController::getAboutPage);
// Reden
app.get("/reden/{id}", FrontEndController::listSpeeches); // zeige Reden eines Parlamentariers an
app.get("/reden/{id}/{redeId}", FrontEndController::showSpeech); // zeige eine bestimmte Rede des Parlamentariers an
app.get("/reden/{id}", SpeechController::listSpeeches); // zeige Reden eines Parlamentariers an
app.get("/reden/{id}/{redeId}", SpeechController::showSpeech); // zeige eine bestimmte Rede des Parlamentariers an
app.get("/reden", SpeechController::listAllSpeeches); // zeige alle Reden an (Filtern möglich)
// Charts
app.get("/charts", FrontEndController::getCharts);
app.get("/export/pdf/speech/{id}", SpeechesLatexExportController::exportSpeech); // exportiere eine Rede als PDF
app.get("/export/pdf/speech", SpeechesLatexExportController::exportSpeech); // exportiere eine Rede als PDF
app.get("/export/pdf/speaker/{id}", SpeechesLatexExportController::exportSpeechesFromSpeaker); // exportiere alle Reden eines Parlamentariers als PDF
app.get("/export/pdf/topic/{topic}", SpeechesLatexExportController::exportSpeechesWithTopic); // exportiere alle Reden zu einem Thema als PDF
app.get("/export/pdf/all", SpeechesLatexExportController::exportAllSpeeches); // exportiere alle Reden als PDF CAUTION!!!: This will take forever but is required in the exercise
app.get("/export/pdf/speeches/{speechIds}", SpeechesLatexExportController::exportSpeeches); // exportiere eine Liste von Reden als PDF
app.get("/export/xml/speech/{id}", SpeechesXMLExportController::exportSpeech); // exportiere eine Rede als XML
app.get("/export/xml/speech", SpeechesXMLExportController::exportSpeech); // exportiere eine Rede als XML
app.get("/export/xml/speaker/{id}", SpeechesXMLExportController::exportSpeechesFromSpeaker); // exportiere alle Reden eines Parlamentariers als XML
app.get("/export/xml/topic/{topic}", SpeechesXMLExportController::exportSpeechesWithTopic); // exportiere alle Reden zu einem Thema als XML
app.get("/export/xml/all", SpeechesXMLExportController::exportAllSpeeches); // exportiere alle Reden als XML
app.get("/export/xml/speeches/{speechIds}", SpeechesXMLExportController::exportSpeeches); // exportiere eine Liste von Reden als XML
}
}

View file

@ -1,69 +0,0 @@
package org.texttechnologylab.project.gruppe_05_1.rest;
import freemarker.template.Configuration;
import freemarker.template.TemplateExceptionHandler;
import io.javalin.Javalin;
import io.javalin.http.staticfiles.Location;
import io.javalin.openapi.plugin.OpenApiPlugin;
import io.javalin.openapi.plugin.redoc.ReDocPlugin;
import io.javalin.rendering.template.JavalinFreemarker;
import org.texttechnologylab.project.gruppe_05_1.util.Logger;
import java.io.File;
import java.io.IOException;
import static org.texttechnologylab.project.gruppe_05_1.Main.JAVALIN_STATIC_FILES_DIR;
import static org.texttechnologylab.project.gruppe_05_1.Main.JAVALIN_TEMPLATE_DIR;
public class RESTHandlerOld {
public void startJavalin() {
// Javalin Konfiguration (z.B. port)
JavalinConfig jlConfig = new JavalinConfig();
int port = jlConfig.getPort();
// FreeMarker Konfiguration
Configuration fmConfig = new Configuration(Configuration.VERSION_2_3_33);
fmConfig.setDefaultEncoding("UTF-8");
try {
fmConfig.setDirectoryForTemplateLoading(new File(JAVALIN_TEMPLATE_DIR));
} catch (IOException e) {
throw new RuntimeException(e);
}
fmConfig.setLogTemplateExceptions(true);
fmConfig.setTemplateExceptionHandler(TemplateExceptionHandler.RETHROW_HANDLER);
// Erzeuge die Javalin app
Javalin app = Javalin.create(config -> {
config.staticFiles.add(JAVALIN_STATIC_FILES_DIR, Location.EXTERNAL); // momentan nicht benutzt
config.fileRenderer(new JavalinFreemarker(fmConfig));
config.registerPlugin(new OpenApiPlugin(pluginConfig -> {
// Define OpenAPI spec configuration
pluginConfig.withDefinitionConfiguration((version, definition) -> {
definition.withOpenApiInfo(info -> info.setTitle("Javalin OpenAPI Documentation"));
});
}));
config.registerPlugin(new ReDocPlugin());
})
.start(port);
Logger.info("Javalin app started on http://localhost:" + port);
// Routes
// ======
// Parlamentarier
app.get("/", ParlamentarierController::getAllParlamentarier);
app.get("/portfolio/{id}", ParlamentarierController::getParlamentarierDetails);
app.delete("/deleteParlamentarier", ParlamentarierController::deleteAllParlamentarier);
// Reden
app.get("/reden/{id}", SpeechController::listSpeeches); // zeige Reden eines Parlamentariers an
app.get("/reden/{id}/{redeId}", SpeechController::showSpeech); // zeige eine bestimmte Rede des Parlamentariers an
}
}

View file

@ -1,23 +1,30 @@
package org.texttechnologylab.project.gruppe_05_1.rest;
import com.mongodb.client.gridfs.GridFSBucket;
import com.mongodb.client.gridfs.GridFSBuckets;
import io.javalin.http.Context;
import io.javalin.openapi.*;
import org.bson.types.ObjectId;
import org.texttechnologylab.project.gruppe_05_1.database.MongoDBHandler;
import org.texttechnologylab.project.gruppe_05_1.database.MongoPprUtils;
import org.texttechnologylab.project.gruppe_05_1.domain.html.HtmlSpeech;
import org.texttechnologylab.project.gruppe_05_1.domain.html.ParlamentarierDetails;
import org.texttechnologylab.project.gruppe_05_1.domain.html.SpeechOverview;
import org.texttechnologylab.project.gruppe_05_1.domain.nlp.NamedEntity;
import org.texttechnologylab.project.gruppe_05_1.domain.nlp.Sentiment;
import org.texttechnologylab.project.gruppe_05_1.domain.nlp.Token;
import org.texttechnologylab.project.gruppe_05_1.domain.nlp.Topic;
import org.texttechnologylab.project.gruppe_05_1.domain.speech.SpeechMetaData;
import org.texttechnologylab.project.gruppe_05_1.util.Logger;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Speech;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.io.ByteArrayOutputStream;
import java.util.*;
import java.util.stream.Collectors;
public class SpeechController {
/**
* Datei implementiert von Valentin
* Liste alle Reden eines Parlamentariers an
* @param ctx Javalin Context
*/
@ -38,8 +45,7 @@ public class SpeechController {
String parlamentarierId = ctx.pathParam("id");
ParlamentarierDetails p = MongoPprUtils.getParlamentarierDetailsByID(parlamentarierId);
List<SpeechMetaData> speechMetaDataList = MongoPprUtils.getSpeechesMetadataForSpeaker(parlamentarierId);
List<SpeechOverview> speechMetaDataList = MongoPprUtils.getSpeechesOverviewForSpeaker(Integer.parseInt(parlamentarierId));
Map<String, Object> attributes = new HashMap<>();
attributes.put("p", p);
attributes.put("speechesMetaDataList", speechMetaDataList);
@ -49,6 +55,8 @@ public class SpeechController {
/**
* Zeige eine bestimmte Rede des Parlamentariers an
* @param ctx Javalin Context
* Implementiert von Valentin
* Modifiziert von Leon
*/
@OpenApi(
summary = "Zeige eine bestimmte Rede des Parlamentariers an",
@ -65,46 +73,187 @@ public class SpeechController {
@OpenApiResponse(status = "200", content = {@OpenApiContent(from = Speech.class)})
})
public static void showSpeech(Context ctx) {
String parlamentarierId = ctx.pathParam("id");
String redeId = ctx.pathParam("redeId");
Map<String, Object> attributes = new HashMap<>();
HtmlSpeech speech = MongoPprUtils.getSpeechByKey(redeId);
HtmlSpeech speech = MongoPprUtils.getHtmlSpeechByKey(redeId);
if (speech == null) {
attributes.put("error", "Rede " + redeId + " nicht vorhanden");
ctx.render("speech.ftl", attributes);
return;
}
attributes.put("s", speech);
// NLP: Topic
if ((speech.getNlp() != null) && (speech.getNlp().getTopics() != null)) {
Map<String, Double> topics = Topic.condenseTopicInformation(speech.getNlp().getTopics()); // Daten "verdichten"...
// ... und ersetzen
speech.getNlp().setTopics(
topics.entrySet().stream()
.map(me -> new Topic(me.getKey(), me.getValue(), null))
.collect(Collectors.toList()));
if (speech.getVideo() != null && !speech.getVideo().trim().isEmpty()) {
MongoDBHandler mongoDBHandler = new MongoDBHandler();
try {
GridFSBucket gridFSBucket = GridFSBuckets.create(mongoDBHandler.getDatabase(), "videos");
ObjectId fileId = new ObjectId(speech.getVideo());
ByteArrayOutputStream baos = new ByteArrayOutputStream();
gridFSBucket.downloadToStream(fileId, baos);
byte[] videoBytes = baos.toByteArray();
String base64Video = Base64.getEncoder().encodeToString(videoBytes);
speech.setVideoData(base64Video);
} catch (Exception e) {
System.err.println("Error fetching video from GridFS: " + e.getMessage());
} finally {
mongoDBHandler.close();
}
}
// NLP: POS
if (speech.getNlp() != null && speech.getNlp().getTokens() != null) {
List<Token> tokens = speech.getNlp().getTokens();
// Foto des Abgeordnetes
String picture = MongoPprUtils.getParlamentarierPictureByID(parlamentarierId);
attributes.put("picture", picture);
Map<String, Integer> posCounts = Token.countPOS(tokens);
// NLP
if (speech.getNlp() != null) {
List<Token> posList = posCounts.entrySet().stream()
.map(entry -> new Token(entry.getKey(), String.valueOf(entry.getValue()), "")) // Lemma remains empty
.collect(Collectors.toList());
// NLP: Topic
if ((speech.getNlp().getTopics() != null) && (speech.getNlp().getTopics().size() > 0)) {
Map<String, Double> topics = Topic.condenseTopicInformation(speech.getNlp().getTopics()); // Daten "verdichten"...
// ... und ersetzen
speech.getNlp().setTopics(
topics.entrySet().stream()
.map(me -> new Topic(me.getKey(), me.getValue(), null))
.collect(Collectors.toList()));
} else {
speech.getNlp().setTopics(null);
}
System.out.println("DEBUG: Sending POS List to NLP - " + posList);
// NLP: POS
if (speech.getNlp() != null && speech.getNlp().getTokens() != null) {
List<Token> tokens = speech.getNlp().getTokens();
speech.getNlp().setPosList((List) posList);
Map<String, Integer> posCounts = Token.countPOS(tokens);
List<Token> posList = posCounts.entrySet().stream()
.map(entry -> new Token(entry.getKey(), String.valueOf(entry.getValue()), ""))
.collect(Collectors.toList());
Logger.debug("Sending POS List to NLP - " + posList);
speech.getNlp().setPosList((List) posList);
} else {
Logger.debug("POS List is EMPTY");
speech.getNlp().setPosList((List) new ArrayList<Token>());
}
// NLP: Named Entities
if ((speech.getNlp().getNamedEntities() != null)
&& (speech.getNlp().getNamedEntities().size() > 0)) {
Map<String, Map<String, Integer>> namedEntitiesMapOfMaps = new HashMap<>();
for (NamedEntity ne : speech.getNlp().getNamedEntities()) {
String type = ne.getType();
String text = ne.getText();
if (namedEntitiesMapOfMaps.containsKey(type)) {
// Named Entity Type bekannt...
Map<String, Integer> typeAppearance = namedEntitiesMapOfMaps.get(type);
if (typeAppearance.containsKey(text)) {
// ... und der Text auch bekannt --> erhöhe die Anzahl um 1
typeAppearance.replace(
text,
typeAppearance.get(text) + 1) ;
} else {
typeAppearance.put(text, 1);
}
} else {
// Named Entity Type unbekannt: erstelle einen neuen Eintrag für Type sowie einen Eintrag für den ihm gehörigen Text
Map<String, Integer> firstTextAppearance = new HashMap<>();
firstTextAppearance.put(text, 1);
namedEntitiesMapOfMaps.put(type, firstTextAppearance);
}
}
attributes.put("na_info", namedEntitiesMapOfMaps);
} else {
attributes.put("na_info", null);
}
// NLP: Sentiments
// Der erste Sentiment gilt der gesamten Rede. Die weitere Sentiments entsprechen die Sätze. overallSentiments speichert alle Analyseobjekte und sentiments nur die der einzelnen Sätze
List<Sentiment> sentiments = speech.getNlp().getSentiments();
if ((sentiments != null) && ! sentiments.isEmpty()) {
List<Sentiment> overallSentiments = new ArrayList<>(sentiments);
attributes.put("overallSentiments", overallSentiments);
sentiments.remove(0);
// Sentiment-Icon
List<String> sentimentIcons = calculateSentimentIcons(sentiments);
attributes.put("sentimentIcons", sentimentIcons);
} else {
attributes.put("overallSentiment", null);
attributes.put("sentimentIcons", null);
}
attributes.put("sentiments", sentiments);
} else {
System.out.println("DEBUG: POS List is EMPTY");
speech.getNlp().setPosList((List) new ArrayList<Token>()); // Ensure it's never null
}
// TODO: Token wird momentan etwas komisch abgespeichert, da im Attribut text die POS art steht, und in pos die Anzahl dieser POS arten. Umstrukturieren damit keine Verwirrung herrscht
ctx.render("speech.ftl", attributes);
}
private static String POSITIVE_SENTIMENT= "fas fa-thumbs-up sentiment-positive"; // Alternativ: fas fa-smile positive
private static String NEGATIVE_SENTIMENT= "fas fa-thumbs-down sentiment-negative"; // Alternativ: fas fa-frown negative
private static String NEUTRAL_SENTIMENT = "fas fa-meh sentiment-neutral"; // Alternativ: fas fa-circle neutral
/**
* Ordne einem Satz ein Sentiment-Icon
* @param sentiments
* @return Icon Name (als Font Awesome Icon)
*/
private static List<String> calculateSentimentIcons(List<Sentiment> sentiments) {
double threshold = 0.15;
List<String> iconNames = new ArrayList<>();
for (Sentiment s: sentiments) {
if (s.getPositive() - s.getNegative() > threshold) {
iconNames.add(POSITIVE_SENTIMENT);
} else if (s.getNegative() - s.getPositive() > threshold) {
iconNames.add(NEGATIVE_SENTIMENT);
} else iconNames.add(NEUTRAL_SENTIMENT);
}
return iconNames;
}
/**
* Zeige alle Reden
* @param ctx
*/
@OpenApi(
summary = "Liste alle Reden (Filtern ist möglich)",
description = "Liste alle Reden. Man kann nach Freitext (MdB Name, Partei/Fraktion) oder nach Thema (Topic) filtern",
operationId = "listAllSpeeches",
path = "/reden",
methods = HttpMethod.GET,
tags = {"Rede"},
queryParams = {
@OpenApiParam(name = "filter", description = "Full-Text-Filter. Kann Vorname, Nachname oder Partei filtern", required = false),
},
responses = {
@OpenApiResponse(status = "200", content = {@OpenApiContent(from = Speech[].class)})
})
public static void listAllSpeeches(Context ctx) {
List<SpeechOverview> speechOverviews = MongoPprUtils.getFilteredSpeechesOverview(ctx);
Map<String, Object> attributes = new HashMap<>();
attributes.put("speechesMetaDataList", speechOverviews);
String name = ctx.queryParam("name");
if ((name != null) && ( ! name.isBlank())) attributes.put("name", name);
attributes.put("parties", MongoPprUtils.getAllPartiesFromSpeeches());
List<String> topics = MongoPprUtils.getAllTopics();
attributes.put("topics", topics);
ctx.render("showAllSpeechesList.ftl", attributes);
}
}

View file

@ -0,0 +1,215 @@
package org.texttechnologylab.project.gruppe_05_1.rest;
import io.javalin.http.Context;
import io.javalin.openapi.HttpMethod;
import io.javalin.openapi.OpenApi;
import io.javalin.openapi.OpenApiResponse;
import org.texttechnologylab.project.gruppe_05_1.util.Logger;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.util.Arrays;
import java.util.Base64;
import java.util.List;
import static org.texttechnologylab.project.gruppe_05_1.export.TeXUtil.*;
/**
* Controller für die Endpunkte zum Export von Reden als PDF.
* Implementiert von Jonas
*/
public class SpeechesLatexExportController {
@OpenApi(
summary = "Get a speech as a PDF",
description = "Returns a LaTeX generated pdf of a selected speech",
operationId = "getSpeechExport",
path = "/export/pdf/speech/{id}",
methods = HttpMethod.GET,
tags = {"Export", "Speeches", "PDF"},
responses = {
@OpenApiResponse(status = "200")
})
public static void exportSpeech(Context ctx) {
String speechId = null;
try {
speechId = ctx.pathParam("id");
} catch (Exception e) {
// check query param
speechId = ctx.queryParam("speechId");
}
byte[] pdfBytes = new byte[0];
try {
pdfBytes = Base64.getDecoder().decode(getExportedSpeechBase64StringBySpeechId(speechId));
} catch (Exception e) {
Logger.error("Failed to generate Export of Speech with ID " + speechId);
Logger.error(e.getMessage());
Logger.debug(Arrays.toString(e.getStackTrace()));
}
// Set the response content type to PDF
ctx.contentType("application/pdf");
ByteArrayInputStream stream = new ByteArrayInputStream(pdfBytes);
if (stream.available() == 0) {
Logger.error("PDF stream is empty.");
ctx.result("Internal Server Error");
ctx.status(500);
return;
}
// Send the PDF as a response
ctx.result(stream);
tryDeleteTeXTempDirContents();
}
@OpenApi(
summary = "Get all speeches from a speaker as a PDF",
description = "Returns a LaTeX generated pdf of all speeches of a selected speech",
operationId = "getSpeechesFromSpeakerExport",
path = "/export/pdf/speaker/{id}",
methods = HttpMethod.GET,
tags = {"Export", "Speeches", "PDF"},
responses = {
@OpenApiResponse(status = "200")
})
public static void exportSpeechesFromSpeaker(Context ctx) {
byte[] pdfBytes = new byte[0];
try {
pdfBytes = Base64.getDecoder().decode(getBulkExportedSpeechBase64StringFromSpeakerById(ctx.pathParam("id")));
} catch (Exception e) {
Logger.error("Failed to generate Export of Speeches from Speaker with ID " + ctx.pathParam("id"));
Logger.error(e.getMessage());
Logger.debug(Arrays.toString(e.getStackTrace()));
}
// Set the response content type to PDF
ctx.contentType("application/pdf");
ByteArrayInputStream stream = new ByteArrayInputStream(pdfBytes);
if (stream.available() == 0) {
Logger.error("PDF stream is empty.");
ctx.result("Internal Server Error");
ctx.status(500);
return;
}
// Send the PDF as a response
ctx.result(stream);
tryDeleteTeXTempDirContents();
}
@OpenApi(
summary = "Get all speeches as a PDF",
description = "Returns a LaTeX generated pdf of all speeches",
operationId = "getAllSpeeches",
path = "/export/pdf/all",
methods = HttpMethod.GET,
tags = {"Export", "Speeches", "PDF"},
responses = {
@OpenApiResponse(status = "200")
})
public static void exportAllSpeeches(Context ctx) {
byte[] pdfBytes = new byte[0];
try {
pdfBytes = Base64.getDecoder().decode(getBulkExportedAllSpeechesBase64String());
} catch (Exception e) {
Logger.error("Failed to generate Export of all Speeches");
Logger.error(e.getMessage());
Logger.debug(Arrays.toString(e.getStackTrace()));
}
// Set the response content type to PDF
ctx.contentType("application/pdf");
ByteArrayInputStream stream = new ByteArrayInputStream(pdfBytes);
if (stream.available() == 0) {
Logger.error("PDF stream is empty.");
ctx.result("Internal Server Error");
ctx.status(500);
return;
}
// Send the PDF as a response
ctx.result(stream);
tryDeleteTeXTempDirContents();
}
@OpenApi(
summary = "Get all speeches with specific topic as a PDF",
description = "Returns a LaTeX generated pdf of all speeches with specific topic",
operationId = "getAllSpeechesWithTopic",
path = "/export/pdf/topic/{topic}",
methods = HttpMethod.GET,
tags = {"Export", "Speeches", "PDF"},
responses = {
@OpenApiResponse(status = "200")
})
public static void exportSpeechesWithTopic(Context ctx) {
byte[] pdfBytes = new byte[0];
try {
pdfBytes = Base64.getDecoder().decode(getBulkExportedAllSpeechesWithTopicBase64String(ctx.pathParam("topic")));
} catch (Exception e) {
Logger.error("Failed to generate Export of all Speeches with Topic " + ctx.pathParam("topic"));
Logger.error(e.getMessage());
Logger.debug(Arrays.toString(e.getStackTrace()));
}
// Set the response content type to PDF
ctx.contentType("application/pdf");
ByteArrayInputStream stream = new ByteArrayInputStream(pdfBytes);
if (stream.available() == 0) {
Logger.error("PDF stream is empty.");
ctx.result("Internal Server Error");
ctx.status(500);
return;
}
// Send the PDF as a response
ctx.result(stream);
tryDeleteTeXTempDirContents();
}
@OpenApi(
summary = "Get speeches by IDs as a PDF",
description = "Returns a LaTeX-generated PDF of the speeches specified by their IDs",
operationId = "getSpeechesByIds",
path = "/export/pdf/speeches/{speechIds}", // Comma-separated IDs
methods = HttpMethod.GET,
tags = {"Export", "Speeches", "PDF"},
responses = {
@OpenApiResponse(status = "200")
})
public static void exportSpeeches(Context ctx) {
byte[] pdfBytes = new byte[0];
try {
// Extract speech IDs from the path
String speechIdsParam = ctx.pathParam("speechIds");
List<String> speechIds = Arrays.asList(speechIdsParam.split(","));
// Generate PDF for given speech IDs
pdfBytes = Base64.getDecoder().decode(getBulkExportedSpeechesBase64String(speechIds));
} catch (Exception e) {
Logger.error("Failed to generate export for speeches: " + ctx.pathParam("speechIds"));
Logger.error(e.getMessage());
Logger.debug(Arrays.toString(e.getStackTrace()));
}
// Set response content type
ctx.contentType("application/pdf");
ByteArrayInputStream stream = new ByteArrayInputStream(pdfBytes);
if (stream.available() == 0) {
Logger.error("PDF stream is empty.");
ctx.result("Internal Server Error");
ctx.status(500);
return;
}
// Send the PDF as response
ctx.result(stream);
tryDeleteTeXTempDirContents();
}
}

View file

@ -0,0 +1,192 @@
package org.texttechnologylab.project.gruppe_05_1.rest;
import io.javalin.http.Context;
import io.javalin.openapi.HttpMethod;
import io.javalin.openapi.OpenApi;
import io.javalin.openapi.OpenApiResponse;
import org.texttechnologylab.project.gruppe_05_1.util.Logger;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.Base64;
import java.util.List;
import static org.texttechnologylab.project.gruppe_05_1.export.XMLUtil.*;
import static org.texttechnologylab.project.gruppe_05_1.export.TeXUtil.*;
/**
* Controller für die Endpunkte zum Export von Reden als XML.
* Implementiert von Jonas
*/
public class SpeechesXMLExportController {
@OpenApi(
summary = "Get a speech as XML",
description = "Returns an XML file of a selected speech",
operationId = "getSpeechExport",
path = "/export/xml/speech/{id}",
methods = HttpMethod.GET,
tags = {"Export", "Speeches", "XML"},
responses = {
@OpenApiResponse(status = "200")
})
public static void exportSpeech(Context ctx) {
String speechId = null;
try {
speechId = ctx.pathParam("id");
} catch (Exception e) {
// check query param
speechId = ctx.queryParam("speechId");
}
String xmlContent;
try {
xmlContent = getExportedSpeechById(speechId);
ByteArrayInputStream stream = new ByteArrayInputStream(xmlContent.getBytes());
if (stream.available() == 0) {
Logger.error("XML stream is empty.");
ctx.result("Internal Server Error");
ctx.status(500);
return;
}
ctx.contentType("application/xml");
ctx.result(stream);
} catch (Exception e) {
Logger.error("Failed to generate Export of Speech with ID " + speechId);
Logger.error(e.getMessage());
Logger.debug(Arrays.toString(e.getStackTrace()));
ctx.result("Internal Server Error");
ctx.status(500);
}
}
@OpenApi(
summary = "Get all speeches from a speaker as XML",
description = "Returns an XML file of all speeches of a selected speech",
operationId = "getSpeechesFromSpeakerExport",
path = "/export/xml/speaker/{id}",
methods = HttpMethod.GET,
tags = {"Export", "Speeches", "XML"},
responses = {
@OpenApiResponse(status = "200")
})
public static void exportSpeechesFromSpeaker(Context ctx) {
String xmlContent;
try {
xmlContent = getExportedSpeechesFromSpeakerById(ctx.pathParam("id"));
ByteArrayInputStream stream = new ByteArrayInputStream(xmlContent.getBytes());
if (stream.available() == 0) {
Logger.error("XML stream is empty.");
ctx.result("Internal Server Error");
ctx.status(500);
return;
}
ctx.contentType("application/xml");
ctx.result(stream);
} catch (Exception e) {
Logger.error("Failed to generate Export of Speeches from Speaker with ID " + ctx.pathParam("id"));
Logger.error(e.getMessage());
Logger.debug(Arrays.toString(e.getStackTrace()));
ctx.result("Internal Server Error");
ctx.status(500);
}
}
@OpenApi(
summary = "Get all speeches as XML",
description = "Returns an XML file of all speeches",
operationId = "getAllSpeeches",
path = "/export/xml/all",
methods = HttpMethod.GET,
tags = {"Export", "Speeches", "XML"},
responses = {
@OpenApiResponse(status = "200")
})
public static void exportAllSpeeches(Context ctx) {
String xmlContent;
try {
xmlContent = getExportedAllSpeeches();
ByteArrayInputStream stream = new ByteArrayInputStream(xmlContent.getBytes());
if (stream.available() == 0) {
Logger.error("XML stream is empty.");
ctx.result("Internal Server Error");
ctx.status(500);
return;
}
ctx.contentType("application/xml");
ctx.result(stream);
} catch (Exception e) {
Logger.error("Failed to generate Export of all Speeches");
Logger.error(e.getMessage());
Logger.debug(Arrays.toString(e.getStackTrace()));
ctx.result("Internal Server Error");
ctx.status(500);
}
}
@OpenApi(
summary = "Get all speeches with specific topic as XML",
description = "Returns an XML file of all speeches with specific topic",
operationId = "getAllSpeechesWithTopic",
path = "/export/xml/topic/{topic}",
methods = HttpMethod.GET,
tags = {"Export", "Speeches", "XML"},
responses = {
@OpenApiResponse(status = "200")
})
public static void exportSpeechesWithTopic(Context ctx) {
String xmlContent;
try {
xmlContent = getExportedSpeechesWhithTopic(ctx.pathParam("topic"));
ByteArrayInputStream stream = new ByteArrayInputStream(xmlContent.getBytes());
if (stream.available() == 0) {
Logger.error("XML stream is empty.");
ctx.result("Internal Server Error");
ctx.status(500);
return;
}
ctx.contentType("application/xml");
ctx.result(stream);
} catch (Exception e) {
Logger.error("Failed to generate Export of all Speeches");
Logger.error(e.getMessage());
Logger.debug(Arrays.toString(e.getStackTrace()));
ctx.result("Internal Server Error");
ctx.status(500);
}
}
@OpenApi(
summary = "Get speeches by IDs as XML",
description = "Returns an XML file of the speeches specified by their IDs",
operationId = "getSpeechesByIds",
path = "/export/xml/speeches/{speechIds}", // Comma-separated IDs
methods = HttpMethod.GET,
tags = {"Export", "Speeches", "XML"},
responses = {
@OpenApiResponse(status = "200")
})
public static void exportSpeeches(Context ctx) {
String xmlContent;
try {
String speechIdsParam = ctx.pathParam("speechIds");
List<String> speechIds = Arrays.asList(speechIdsParam.split(","));
xmlContent = getExportedSpeechesbyIds(speechIds);
ByteArrayInputStream stream = new ByteArrayInputStream(xmlContent.getBytes());
if (stream.available() == 0) {
Logger.error("XML stream is empty.");
ctx.result("Internal Server Error");
ctx.status(500);
return;
}
ctx.contentType("application/xml");
ctx.result(stream);
} catch (Exception e) {
Logger.error("Failed to generate Export of all Speeches");
Logger.error(e.getMessage());
Logger.debug(Arrays.toString(e.getStackTrace()));
ctx.result("Internal Server Error");
ctx.status(500);
}
}
}

View file

@ -17,6 +17,7 @@ public abstract class FileUtils {
/**
* Datei implementiert von Valentin
* Creates a (possibly nested) directory
* @param dir (e.g. "generated" , "level1/level2/level3" etc.
*/

View file

@ -9,7 +9,7 @@ import java.time.format.DateTimeParseException;
public abstract class GeneralUtils {
/**
*
* Datei implementiert von Valentin
* @param integer the integer to be parsed
* @return the parsed integer or null if the integer could not be parsed
*/

View file

@ -4,6 +4,10 @@ import java.time.LocalTime;
import java.time.format.DateTimeFormatter;
import static org.texttechnologylab.project.gruppe_05_1.Main.DEBUG_LOGGING;
/**
* Logger Klasse für die Ausgabe von Lognachrichten
* Implementiert von Jonas
*/
public class Logger {
private static final DateTimeFormatter DATE_TIME_FORMATTER = DateTimeFormatter.ofPattern("HH:mm:ss");
// info, warn, error with message and colors and datetime
@ -28,4 +32,8 @@ public class Logger {
public static void pink(String message) {
System.out.println("\u001B[35m" + java.time.LocalTime.now() + " PINK: " + message + "\u001B[0m");
}
public static void orange(String message) {
System.out.println("\u001B[38;5;214m" + message + "\u001B[0m");
}
}

View file

@ -1,6 +1,8 @@
package org.texttechnologylab.project.gruppe_05_1.util;
import com.mongodb.client.MongoCollection;
import org.json.JSONArray;
import org.json.JSONObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
@ -22,6 +24,7 @@ import javax.xml.parsers.DocumentBuilderFactory;
import java.io.*;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLEncoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -41,6 +44,7 @@ public abstract class PPRUtils {
/**
* Von Valentin angelegt, von allen verändert
* Alle Informationen lesen...
* - Parlamentarier
* - Reden
@ -61,21 +65,16 @@ public abstract class PPRUtils {
Logger.warn("Members already in the DB. Skipping...");
}
// Reden und Kommentare einlesen und persistieren - TODO
// Reden und Kommentare einlesen und persistieren
readSpeechesAndComments(xmlFactory, mongoFactory);
// Fotos hochladen - TODO
// Fotos hochladen
readPhotos(mongoFactory);
// NLP-Analyse (Text und Video)
// TODO: Anpassung notwendig, Daten aus dem NEtz statt aus resources holen
// Achtung: läuft nicht unter Windows. Verwende Linux (nativ oder im Virtual Box)
// NlpUtils.createNlpData();
}
/**
* Fotos hochladen - TODO
* Fotos hochladen
* @param mongoFactory Factory für die MongoDB-Objekte
*/
public static void readPhotos(MongoObjectFactory mongoFactory) {
@ -83,7 +82,7 @@ public abstract class PPRUtils {
}
/**
* Reden und Kommentare einlesen - TODO
* Reden und Kommentare einlesen
* @param xmlFactory Factory für die XML-Objekte
* @param mongoFactory Factory für die MongoDB-Objekte
*/
@ -99,7 +98,6 @@ public abstract class PPRUtils {
public static void readAndPersistMdbs(String mdbUrl, FileObjectFactory xmlFactory, MongoObjectFactory mongoFactory) {
org.w3c.dom.Document mdbRoot = getMdbFromRemoteXmlZipfile(mdbUrl);
Element rootElement = mdbRoot.getDocumentElement();
// TODO: optional! persist the metadata of the <VERSION>1723434311</VERSION> element
List<Node> mdbNodes = XmlUtils.getChildrenByName(rootElement, "MDB");
@ -114,12 +112,9 @@ public abstract class PPRUtils {
}
}
// TODO: persist each speaker!
// TODO: guard: if not in the DB already
}
// TODO - HERE
private static boolean mdbActiveInWp(Speaker speaker, Integer legislaturPeriode) {
List<Integer> wps = speaker.getMemberships().stream()
.map(Membership::getWp)
@ -156,7 +151,7 @@ public abstract class PPRUtils {
zipInputStream.closeEntry();
}
if (xmlOutputStream.size() == 0 || dtdOutputStream.size() == 0) { // TODO
if (xmlOutputStream.size() == 0 || dtdOutputStream.size() == 0) {
// throw new FileNotFoundException("XML or DTD not found in the ZIP archive");
}
InputStream xmlInputStreamFinal = new ByteArrayInputStream(xmlOutputStream.toByteArray());
@ -171,7 +166,6 @@ public abstract class PPRUtils {
return doc;
} catch (IOException e) {
// TODO
throw new RuntimeException(e);
}
}
@ -209,8 +203,6 @@ public abstract class PPRUtils {
});
}
// TODO: altes Zeug, sortieren...
/**
* Alle Parteien (aus einer Liste der MdBs) herausfinden.
* null-Einträge durch einen Platzhalter ersetzen, damit später keine null pointer exceptions auftretten
@ -327,7 +319,19 @@ public abstract class PPRUtils {
});
}
/**
* Liest XML-Protokolle von der Bundestag-OpenData-URL und verarbeitet sie.
*
* <p>
* Diese Methode ruft wiederholt Seiten mit XML-Links (Protokollen) ab, basierend auf einem
* Offset und Limit. Für jeden gefundenen Link wird die XML-Datei heruntergeladen und geparst.
* Einzigartige Protokolle werden anhand des "sitzung-nr"-Attributs identifiziert. Falls ein Protokoll
* bereits verarbeitet wurde (gespeichert in {@code processedProtocols}), wird es übersprungen.
* Die verarbeiteten XML-Dokumente werden in der globalen Menge {@code xmlProtocols} gesammelt.
* </p>
*
* @return Ein Set von {@code org.w3c.dom.Document}, das die verarbeiteten XML-Protokolle enthält.
*/
public static Set<org.w3c.dom.Document> processXML() {
int offset = 0;
int limit = 10;
@ -411,6 +415,12 @@ public abstract class PPRUtils {
return doc;
}
/**
* Listet die Dateien im gegebenen Verzeichnis auf.
* Implementiert von Jonas
* @param directory Verzeichnis
* @return Liste der Dateinamen
*/
public static ArrayList<String> listFilesInDirectory(String directory) {
File folder = new File(directory);
File[] files = folder.listFiles();
@ -425,6 +435,24 @@ public abstract class PPRUtils {
return fileNames;
}
/**
* Ruft neue Protokoll-XML-Dokumente von der Bundestag OpenData-API ab und verarbeitet sie.
*
* Diese Methode verwendet eine paginierte Abfrage (über Offset und Limit) der URL
* "https://www.bundestag.de/ajax/filterlist/de/services/opendata/866354-866354".
* Für jeden gefundenen XML-Link wird anhand des Dateinamens (z.B "20212.xml") die Sitzungsnummer extrahiert.
* Protokolle mit den Dateinamen "20007" oder "20212" werden als fehlerhaft erkannt und übersprungen.
* Falls die extrahierte Sitzungsnummer (nach Entfernen eines möglichen "20"-Präfixes) noch nicht in der Datenbank existiert
* (bestimmt durch mongoDBHandler.sessionExists(sessionNumber)), wird das XML-Dokument heruntergeladen und geparst.
* Die neu verarbeiteten XML-Dokumente werden in einem Set gesammelt und zurückgegeben.
*
* Die Pagination erfolgt über das HTML-Element <div class="meta-slider"> mit dem Attribut
* data-nextoffset. Ist kein nächster Offset vorhanden, wird die Schleife beendet.
*
*
* @param mongoDBHandler der MongoDBHandler, der für die Prüfung der Existenz einer Session in der Datenbank verwendet wird
* @return ein Setvon org.w3c.dom.Document, das alle neuen (noch nicht verarbeiteten) Protokoll-XML-Dokumente enthält
*/
public static Set<org.w3c.dom.Document> checkAndProcessNewProtocols(MongoDBHandler mongoDBHandler) {
Set<org.w3c.dom.Document> newProtocols = new HashSet<>();
int offset = 0;
@ -446,6 +474,10 @@ public abstract class PPRUtils {
// Entferne die Dateiendung
String sessionNumberFull = fileName.replace(".xml", ""); // z.B. "20212"
String sessionNumber;
if (sessionNumberFull.equals("20007") || sessionNumberFull.equals("20212")) {
Logger.warn("Skipping faulty protocol: " + sessionNumberFull);
continue;
}
if (sessionNumberFull.startsWith("20") && sessionNumberFull.length() > 2) {
sessionNumber = sessionNumberFull.substring(2);
} else {
@ -481,5 +513,126 @@ public abstract class PPRUtils {
return newProtocols;
}
/**
* Listet die Fraktionen von einer Liste an Membern auf
* Implementiert von Jonas
* @param mdbList Liste der Mitglieder
* @return Liste der Fraktionen
*/
public static ArrayList<String> listFractionsFromMembers(List<Parlamentarier> mdbList) {
ArrayList<String> fractions = new ArrayList<>();
for (Parlamentarier parlamentarier : mdbList) {
if (parlamentarier.getPartei() != null) {
if (!fractions.contains(parlamentarier.getPartei())) {
fractions.add(parlamentarier.getPartei());
}
} else {
if (!fractions.contains(parlamentarier.getPartei())) {
fractions.add(PARTEILOS_KUERZEL);
}
}
}
return fractions;
}
/**
* Fetched das Bild eines Mitglieds aus der Bundestagsdatenbank und gibt es als Base64-String zurück
* Implementiert von Jonas
* @param inputString Name des Mitglieds
* @return Base64-String des Bildes
* @throws IOException
*/
public static String fetchMemberImageBase64FromNameString(String inputString) throws IOException {
// Step 1: Send POST request
String urlString = "https://bilddatenbank.bundestag.de/ajax/picture-result";
URL url = new URL(urlString);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestMethod("POST");
conn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
conn.setRequestProperty("Cookie", getSessionCookies());
conn.setDoOutput(true);
// Form data
String postData = "query=" + URLEncoder.encode(inputString, "UTF-8") + "&sortVal=2";
try (OutputStream os = conn.getOutputStream()) {
byte[] input = postData.getBytes("UTF-8");
os.write(input, 0, input.length);
}
// Read response
int responseCode = conn.getResponseCode();
if (responseCode == HttpURLConnection.HTTP_OK) {
try (BufferedReader br = new BufferedReader(new InputStreamReader(conn.getInputStream(), "UTF-8"))) {
StringBuilder response = new StringBuilder();
String line;
while ((line = br.readLine()) != null) {
response.append(line);
}
// Parse JSON response
JSONObject jsonResponse = new JSONObject(response.toString());
JSONArray fotosArray = jsonResponse.optJSONArray("fotos");
if (fotosArray != null && !fotosArray.isEmpty()) {
String hqBild = fotosArray.getJSONObject(0).optString("hqBild", "");
if (!hqBild.isEmpty()) {
// Step 2: Fetch image
String imageUrl = "https://bilddatenbank.bundestag.de/fotos/" + hqBild;
URL imageDownloadUrl = new URL(imageUrl);
HttpURLConnection imageConn = (HttpURLConnection) imageDownloadUrl.openConnection();
imageConn.setRequestMethod("GET");
if (imageConn.getResponseCode() == HttpURLConnection.HTTP_OK) {
try (InputStream is = imageConn.getInputStream(); ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
byte[] buffer = new byte[8192];
int bytesRead;
while ((bytesRead = is.read(buffer)) != -1) {
baos.write(buffer, 0, bytesRead);
}
return Base64.getEncoder().encodeToString(baos.toByteArray());
}
}
}
}
}
}
return "Error: Unable to retrieve image";
}
/**
* Gibt die Session-Cookies zurück, die für die Requests der Bildersuche benötigt werden
* Implementiert von Jonas
* @return Session-Cookies
* @throws IOException
*/
public static String getSessionCookies() throws IOException {
String urlString = "https://bilddatenbank.bundestag.de/search/picture-result?query=Angela+Merkel&sortVal=2";
URL url = new URL(urlString);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestMethod("GET");
// get both header fields with the name "Set-Cookie"
Map<String, List<String>> headerFields = conn.getHeaderFields();
String phpSessId = "";
String csrfToken = "";
// iterate over the header fields
for (Map.Entry<String, List<String>> entry : headerFields.entrySet()) {
// if header field is "Set-Cookie"
if (entry.getKey() != null && entry.getKey().equals("set-cookie")) {
// iterate over the values of the header field
for (String value : entry.getValue()) {
// if value contains "PHPSESSID"
if (value.contains("PHPSESSID")) {
phpSessId = value.split(";")[0];
}
// if value contains "_csrf"
if (value.contains("_csrf")) {
csrfToken = value.split(";")[0];
}
}
}
}
return "PHPSESSID=" + phpSessId + "; _csrf=" + csrfToken;
}
}

View file

@ -3,7 +3,9 @@ package org.texttechnologylab.project.gruppe_05_1.util;
import java.io.IOException;
import java.io.InputStream;
import java.util.Properties;
/**
* Datei implementiert von Valentin
*/
public abstract class PropertiesUtils {
public static Properties readPropertiesFromResource(String propertiesFileName) {

View file

@ -0,0 +1,153 @@
package org.texttechnologylab.project.gruppe_05_1.util;
import com.mongodb.client.MongoDatabase;
import com.mongodb.client.gridfs.GridFSBucket;
import com.mongodb.client.gridfs.GridFSBuckets;
import com.mongodb.client.gridfs.model.GridFSUploadOptions;
import com.mongodb.client.model.UpdateOneModel;
import com.mongodb.client.model.WriteModel;
import org.apache.commons.logging.Log;
import org.bson.Document;
import org.bson.types.ObjectId;
import org.texttechnologylab.project.gruppe_05_1.database.MongoDBHandler;
import java.io.*;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
//Implementiert von Henry
public class SpeechVideoUpdater {
private static final int BASE_VIDEO_ID = 7615435;
private static final String BASE_URL = "https://cldf-od.r53.cdn.tv1.eu/1000153copo/ondemand/app144277506/145293313/";
private static final String SUFFIX = "_h264_720_400_2000kb_baseline_de_2192.mp4?fdl=1";
/**
* Initialisiert den Video-Upload-Prozess.
* Diese Methode prüft, ob bereits ein Video für die erste Rede (speechId 0) in Session 187 und AgendaItem 4 vorhanden ist.
* Falls ein Video vorhanden ist, wird der Download übersprungen. Andernfalls wird findVideos() aufgerufen,
* um die Videos herunterzuladen und in GridFS hochzuladen sowie die Speech-Dokumente zu aktualisieren.
*/
public static void init() {
MongoDBHandler mongoDBHandler = new MongoDBHandler();
try {
Document filter = new Document("sessionId", 187)
.append("agendaItemId", 4)
.append("speechId", 0);
Document firstSpeech = mongoDBHandler.getSpeech(filter);
if (firstSpeech != null && firstSpeech.containsKey("video")
&& firstSpeech.getString("video") != null
&& !firstSpeech.getString("video").trim().isEmpty()) {
Logger.info("Videos sind bereits vorhanden. Kein Download notwendig.");
} else {
Logger.info("Keine Videos gefunden. Starte Download für die Reden der Session 187, AgendaItem 4...");
findVideos();
}
} catch (Exception e) {
System.err.println("Fehler beim Prüfen der Video-Felder: " + e.getMessage());
e.printStackTrace();
} finally {
mongoDBHandler.close();
}
}
/**
* Lädt Videos für Reden in Session 187, AgendaItem 4 herunter und lädt diese in GridFS hoch.
* Anschließend wird für jede Rede (speechId 0 bis 8) ein Bulk-Update erstellt, um das Speech-Dokument
* mit dem Videolink (GridFS-ID als Hex-String) zu aktualisieren.
*
* @throws Exception Falls beim Download oder Upload ein Fehler auftritt.
*/
public static void findVideos() throws Exception {
MongoDBHandler mongoDBHandler = new MongoDBHandler();
MongoDatabase db = mongoDBHandler.getDatabase();
GridFSBucket gridFSBucket = GridFSBuckets.create(db, "videos");
List<WriteModel<Document>> bulkOperations = new ArrayList<>();
for (int speechId = 0; speechId < 9; speechId++) {
int videoId = BASE_VIDEO_ID + speechId;
String videoUrl = BASE_URL + videoId + "/" + videoId + SUFFIX;
Logger.info("Downloading video for speech " + speechId + ": " + videoUrl);
File videoFile = downloadVideo(videoUrl, "video_" + videoId + ".mp4");
ObjectId gridFsId = uploadVideoToGridFS(gridFSBucket, videoFile, "video_" + videoId + ".mp4");
Logger.info("Uploaded video with GridFS ID: " + gridFsId);
Document filter = new Document("sessionId", 187)
.append("agendaItemId", 4)
.append("speechId", speechId);
Document update = new Document("$set", new Document("video", gridFsId.toHexString()));
bulkOperations.add(new UpdateOneModel<>(filter, update));
videoFile.delete();
}
if (!bulkOperations.isEmpty()) {
Logger.info("Uploading bulk update for " + bulkOperations.size() + " documents...");
mongoDBHandler.bulkWriteNlpData(bulkOperations);
Logger.info("Bulk update completed.");
}
mongoDBHandler.close();
}
/**
* Lädt das Video von der angegebenen URL herunter und speichert es als temporäre Datei.
*
* @param videoUrl Die URL des Videos.
* @param fileName Der gewünschte Dateiname für die temporäre Datei.
* @return Eine {@code File}-Instanz, die auf die heruntergeladene Datei verweist.
* @throws IOException Falls beim Download oder Schreiben der Datei ein Fehler auftritt.
*/
private static File downloadVideo(String videoUrl, String fileName) throws IOException {
URL url = new URL(videoUrl);
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
connection.setRequestMethod("GET");
connection.setRequestProperty("User-Agent", "Mozilla/5.0");
File tempFile = new File(fileName);
try (InputStream in = connection.getInputStream();
FileOutputStream out = new FileOutputStream(tempFile)) {
byte[] buffer = new byte[4096];
int bytesRead;
while ((bytesRead = in.read(buffer)) != -1) {
out.write(buffer, 0, bytesRead);
}
}
return tempFile;
}
/**
* Lädt die übergebene Videodatei in GridFS hoch.
*
* @param gridFSBucket Der {@code GridFSBucket}, der für die Collection "videos" in der Datenbank konfiguriert ist.
* @param videoFile Die zu hochladende Videodatei.
* @param fileName Der Name, unter dem die Datei in GridFS gespeichert werden soll.
* @return Die {@code ObjectId} der in GridFS gespeicherten Datei.
* @throws IOException Falls beim Hochladen der Datei ein Fehler auftritt.
*/
private static ObjectId uploadVideoToGridFS(GridFSBucket gridFSBucket, File videoFile, String fileName) throws IOException {
try (InputStream streamToUploadFrom = new FileInputStream(videoFile)) {
GridFSUploadOptions options = new GridFSUploadOptions()
.chunkSizeBytes(358400)
.metadata(new Document("type", "video").append("fileName", fileName));
ObjectId fileId = gridFSBucket.uploadFromStream(fileName, streamToUploadFrom, options);
return fileId;
}
}
}

View file

@ -16,6 +16,9 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.zip.ZipEntry;
/**
* Datei implementiert von Valentin
*/
public abstract class XmlUtils {
@ -43,7 +46,6 @@ public abstract class XmlUtils {
try {
document = builder.parse(xmlInputStream);
// TODO: Error handling...
} catch (SAXException e) {
throw new RuntimeException(e);
} catch (IOException e) {

View file

@ -5,7 +5,9 @@ import org.texttechnologylab.project.gruppe_05_1.domain.speaker.Speaker;
import org.texttechnologylab.project.gruppe_05_1.xml.mdb.*;
import org.texttechnologylab.project.gruppe_05_1.xml.speaker.Speaker_File_Impl;
import org.w3c.dom.Node;
/**
* implementiert von Valentin
*/
public class FileObjectFactory {
private static FileObjectFactory oFactory = null;

View file

@ -1,7 +1,9 @@
package org.texttechnologylab.project.gruppe_05_1.xml;
import org.w3c.dom.Node;
/**
* implementiert von Valentin
*/
public interface XmlOperations {
FileObjectFactory factory = FileObjectFactory.getFactory();
Object fromXmlNode(Node node);

View file

@ -5,7 +5,9 @@ import org.texttechnologylab.project.gruppe_05_1.util.GeneralUtils;
import org.texttechnologylab.project.gruppe_05_1.util.Logger;
import org.texttechnologylab.project.gruppe_05_1.xml.XmlOperations;
import org.w3c.dom.Node;
/**
* implementiert von Valentin
*/
public class BiografischeAngaben_File_Impl extends BiografischeAngaben implements XmlOperations {
@Override
public BiografischeAngaben fromXmlNode(Node node) {

View file

@ -5,7 +5,9 @@ import org.texttechnologylab.project.gruppe_05_1.util.GeneralUtils;
import org.texttechnologylab.project.gruppe_05_1.util.Logger;
import org.texttechnologylab.project.gruppe_05_1.xml.XmlOperations;
import org.w3c.dom.Node;
/**
* implementiert von Valentin
*/
public class Institution_File_Impl extends Institution implements XmlOperations {
@Override
public Institution fromXmlNode(Node node) {

View file

@ -8,7 +8,9 @@ import org.w3c.dom.Node;
import java.util.ArrayList;
import java.util.List;
/**
* implementiert von Valentin
*/
public class MdbDocument_File_Impl extends MdbDocument implements XmlOperations {
@Override
public MdbDocument fromXmlNode(Node node) {

Some files were not shown because too many files have changed in this diff Show more