main latex export functionality

This commit is contained in:
Jonas Werner 2025-03-22 18:52:08 +01:00
parent eb3124e30d
commit 359d057a9b
17 changed files with 345 additions and 7 deletions

View file

@ -34,11 +34,13 @@ public class Main {
private static final FileObjectFactory xmlFactory = FileObjectFactory.getFactory();
private static final MongoObjectFactory mongoFactory = MongoObjectFactory.getFactory();
public static final String RESOURCES_DIR = "src/main/resources";
public static final String CONFIG_DIR = "src/main/resources/config";
public static final String JAVALIN_TEMPLATE_DIR = "src/main/resources/templates";
public static final String JAVALIN_STATIC_FILES_DIR = "src/main/resources/static";
public static final String JCAS_SPEECHES_TYPESYSTEM_DIR = "src/main/resources/speeches/TypeSystem";
public static final String MEMBER_IMAGES_DIR = "src/main/resources/membersOfParliamentImages/";
public static final String TEMP_EXPORT_DIR = "src/main/resources/tempExport/";
public static void main(String[] args) throws Exception {
UPLOAD_MEMBER_PHOTOS = Arrays.asList(args).contains("uploadMemberPhotos");

View file

@ -11,6 +11,7 @@ import com.mongodb.client.model.Projections;
import io.javalin.http.Context;
import org.bson.Document;
import org.bson.conversions.Bson;
import org.texttechnologylab.project.gruppe_05_1.database.domainimpl.mdb.Speaker_MongoDB_Impl;
import org.texttechnologylab.project.gruppe_05_1.database.domainimpl.mdb.Speech_MongoDB_Impl;
import org.texttechnologylab.project.gruppe_05_1.domain.html.HtmlSpeech;
import org.texttechnologylab.project.gruppe_05_1.domain.html.Parlamentarier;
@ -297,6 +298,15 @@ public class MongoPprUtils {
return readParlamentarierDetailsFromSpeaker(doc);
}
public static Speaker_MongoDB_Impl getSpeakerById(String id) {
Logger.debug("ID: " + id);
Document doc = MongoDBHandler.findFirstDocumentInCollection(getSpeakerCollection(), "_id", id);
Logger.debug("Speaker: " + doc);
Speaker_MongoDB_Impl speaker = new Speaker_MongoDB_Impl().createSpeakerMongoDBImpl(doc);
Logger.debug("Speaker parsed" + speaker);
return speaker;
}
/**
* Holt die Details eines Parlamentariers
@ -623,7 +633,7 @@ public class MongoPprUtils {
* @param key: Rede ID
* @return
*/
public static HtmlSpeech getSpeechByKey(String key) {
public static HtmlSpeech getHTMLSpeechByKey(String key) {
Document filter = new Document("speechKey", key);
Document speechDoc = getSpeechCollection().find(filter).first();
if (speechDoc == null) {
@ -634,6 +644,18 @@ public class MongoPprUtils {
return new HtmlSpeech(speechDoc);
}
public static Speech getSpeechByKey(String key) {
Document filter = new Document("speechKey", key);
Document speechDoc = getSpeechCollection().find(filter).first();
if (speechDoc == null) {
Logger.error("Rede " + key + " nicht gefunden");
return null;
}
return new Speech_MongoDB_Impl(speechDoc, true);
}
// getMemberPhoto
/**

View file

@ -5,15 +5,60 @@ import org.texttechnologylab.project.gruppe_05_1.database.MongoDBHandler;
import org.texttechnologylab.project.gruppe_05_1.database.MongoOperations;
import org.texttechnologylab.project.gruppe_05_1.domain.speaker.Membership;
import org.texttechnologylab.project.gruppe_05_1.domain.speaker.Speaker;
import org.texttechnologylab.project.gruppe_05_1.util.Logger;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.time.LocalDate;
import java.util.*;
import static org.texttechnologylab.project.gruppe_05_1.Main.TEMP_EXPORT_DIR;
import static org.texttechnologylab.project.gruppe_05_1.database.MongoPprUtils.getMemberPhoto;
public class Speaker_MongoDB_Impl extends Speaker implements MongoOperations<Speaker> {
public Speaker_MongoDB_Impl createSpeakerMongoDBImpl(Document mongoDocument) {
this.setId(mongoDocument.getString("_id"));
this.setName(mongoDocument.getString("name"));
this.setFirstName(mongoDocument.getString("firstName"));
this.setTitle(mongoDocument.getString("title"));
Logger.debug("First part parsed succesfully");
this.setGeburtsdatum(parseTimestampSafely(mongoDocument.getDate("geburtsdatum")));
this.setGeburtsort(mongoDocument.getString("geburtsort"));
this.setSterbedatum(parseTimestampSafely(mongoDocument.getDate("sterbedatum")));
this.setGeschlecht(mongoDocument.getString("geschlecht"));
this.setBeruf(mongoDocument.getString("beruf"));
this.setAkademischertitel(mongoDocument.getString("akademischertitel"));
this.setFamilienstand(mongoDocument.getString("familienstand"));
this.setReligion(mongoDocument.getString("religion"));
this.setVita(mongoDocument.getString("vita"));
this.setParty(mongoDocument.getString("party"));
List<Document> memberships = (List<Document>) mongoDocument.get("memberships");
List<Membership> membershipList = new ArrayList<>();
for (Document membership : memberships) {
Membership membershipObj = new Membership();
membershipObj.setRole(membership.getString("role"));
membershipObj.setMember(membership.getString("member"));
membershipObj.setBegin(parseTimestampSafely(membership.getDate("begin")));
membershipObj.setEnd(parseTimestampSafely(membership.getDate("end")));
membershipObj.setLabel(membership.getString("label"));
membershipObj.setWp(membership.getInteger("wp"));
membershipList.add(membershipObj);
}
this.setMemberships(membershipList);
return this;
}
public LocalDate parseTimestampSafely(Date timestamp) {
try {
Logger.debug("Parsing timestamp: " + timestamp);
return LocalDate.parse(timestamp.toString());
} catch (Exception e) {
return null;
}
}
@Override
public Document createEntity(Speaker entity) {
@ -48,4 +93,42 @@ public class Speaker_MongoDB_Impl extends Speaker implements MongoOperations<Spe
}
return result;
}
public String toTeX() {
// Save image to file
// Decode the Base64 string to a byte array
byte[] imageBytes = Base64.getDecoder().decode(getMemberPhoto(this.getId()));
// Define the output PNG file
File outputFile = new File(TEMP_EXPORT_DIR + "speaker_photo_" + this.getId() + ".png");
// Write the decoded byte array to the file
try (FileOutputStream fos = new FileOutputStream(outputFile)) {
fos.write(imageBytes);
System.out.println("PNG file has been saved successfully.");
} catch (IOException e) {
Logger.error("Failed to save member PNG file.");
Logger.error(e.getMessage());
}
// Spkeaer metadata LaTeX code with image, Full Name + title, party, religion, dob, dod, pob, gender, ID
StringBuilder tex = new StringBuilder();
// image on the left, metadata on the right
tex.append("\\begin{minipage}{0.5\\textwidth}\n");
tex.append("\\includegraphics[width=0.5\\textwidth]{").append(outputFile.getName()).append("}\n");
tex.append("\\end{minipage}\n");
tex.append("\\begin{minipage}{0.5\\textwidth}\n");
tex.append("\\begin{tabular}{r l}\n");
tex.append("Name & ").append(this.getTitle()).append(" ").append(this.getFirstName()).append(" ").append(this.getName()).append("\\\\\n");
tex.append("Fraktion & ").append(this.getParty()).append("\\\\\n");
tex.append("Religion & ").append(this.getReligion()).append("\\\\\n");
tex.append("Geburtsdatum & ").append(this.getGeburtsdatum()).append("\\\\\n");
if (this.getSterbedatum() != null) {
tex.append("Sterbedatum & ").append(this.getSterbedatum()).append("\\\\\n"); // only show if available
}
tex.append("Geburtsort & ").append(this.getGeburtsort()).append("\\\\\n");
tex.append("\\end{tabular}\n");
tex.append("\\end{minipage}\n");
return tex.toString();
}
}

View file

@ -9,6 +9,7 @@ import org.texttechnologylab.project.gruppe_05_1.database.domainimpl.mdb.speechl
import org.texttechnologylab.project.gruppe_05_1.database.domainimpl.mdb.speechline.Line_MongoDB_Impl;
import org.texttechnologylab.project.gruppe_05_1.database.domainimpl.mdb.speechline.Speaker_MongoDB_Impl;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Impls.Speech_File_Impl;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Content;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Speech;
@ -80,7 +81,16 @@ public class Speech_MongoDB_Impl extends Speech_File_Impl implements Speech {
public String toTeX() {
StringBuilder tex = new StringBuilder();
tex.append("");
tex.append("\\subsection*{NLP Metadata}\n");
tex.append("[Graphs] //TODO: Implement\n");
tex.append("\\subsection*{Speech Content}\n");
// Add content block of speeches
for (Content content: this.getSpeechContents()) {
tex.append(content.toTeX());
}
return tex.toString();

View file

@ -14,4 +14,8 @@ public class Comment_MongoDB_Impl extends Comment_File_Impl implements Comment {
mongoDocument.getString("commentatorName"),
mongoDocument.getString("comment"));
}
public String toTeX() {
return "\\textcolor{blue}{Kommentar}: " + this.getComment() + "\\\\\n";
}
}

View file

@ -12,4 +12,8 @@ public class Line_MongoDB_Impl extends Line_File_Impl implements Line {
mongoDocument.getInteger("speechId"),
mongoDocument.getString("content"));
}
public String toTeX() {
return this.getContent() + "\\\\\n";
}
}

View file

@ -13,4 +13,8 @@ public class Speaker_MongoDB_Impl extends Speaker_File_Impl implements Speaker {
mongoDocument.getString("speakerName"),
mongoDocument.getString("fraction"));
}
public String toTeX() {
return "\\textcolor{blue}{Redner/Rednerin}: " + this.getSpeakerName() + "\\\\\n";
}
}

View file

@ -0,0 +1,119 @@
package org.texttechnologylab.project.gruppe_05_1.export;
import org.texttechnologylab.project.gruppe_05_1.database.domainimpl.mdb.Speaker_MongoDB_Impl;
import org.texttechnologylab.project.gruppe_05_1.util.Logger;
import org.texttechnologylab.project.gruppe_05_1.xml.speeches.Interfaces.Speech;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.Arrays;
import java.util.Base64;
import static org.texttechnologylab.project.gruppe_05_1.Main.RESOURCES_DIR;
import static org.texttechnologylab.project.gruppe_05_1.Main.TEMP_EXPORT_DIR;
import static org.texttechnologylab.project.gruppe_05_1.database.MongoPprUtils.*;
public class TeXUtil {
private static String BEGIN_DOCUMENT = "\\begin{document}";
private static String END_DOCUMENT = "\\end{document}";
public static String getSpeechToTexComponent(String speechId) {
Speech speech = getSpeechByKey(speechId);
Logger.debug("Speech ID: " + speechId);
Logger.debug("Speech: " + speech);
Speaker_MongoDB_Impl speaker = getSpeakerById(String.valueOf(speech.getSpeakerId()));
Logger.debug("Speaker: " + speaker);
StringBuilder tex = new StringBuilder();
tex.append(speaker.toTeX());
tex.append(speech.toTeX());
Logger.debug("TeX: " + tex);
return tex.toString();
}
public static String getExportedSpeechBase64StringBySpeechId(String speechId) throws IOException, InterruptedException {
// Read preamble from resources directory tex/preamble.tex
String preamble = Files.readString(new File(RESOURCES_DIR, "tex/preamble.tex").toPath()).replace("$$EXPORTCATEGORY$$", "Speech " + speechId);
return convertTexToBase64PDF(preamble + BEGIN_DOCUMENT + getSpeechToTexComponent(speechId) + END_DOCUMENT);
}
public static String convertTexToBase64PDF(String tex) throws IOException, InterruptedException {
// Create a temporary directory
File tempDir = new File(TEMP_EXPORT_DIR);
if (!tempDir.exists()) {
tempDir.mkdir();
}
// Format tex string to UTF-8
tex = new String(tex.getBytes("UTF-8"));
tex = tex.replaceAll("[^\\x00-\\x7F]", ""); // Replace all non-ASCII characters
// Local datetime stamp
String dateTime = DateTimeFormatter.ofPattern("yyyy-MM-dd_HH-mm-ss").format(LocalDateTime.now());
// Write the LaTeX content to a temporary .tex file
File texFile = new File(tempDir, "speech_export" + dateTime + ".tex");
try (BufferedWriter writer = new BufferedWriter(new FileWriter(texFile, StandardCharsets.UTF_8))) {
writer.write(tex);
}
// Run pdflatex to generate the .pdf file
String command = "pdflatex -output-directory=" + tempDir.getAbsolutePath() + " " + texFile.getAbsolutePath();
Process process = Runtime.getRuntime().exec(command);
BufferedReader stdInput = new BufferedReader(new InputStreamReader(process.getInputStream()));
System.out.println("Standard Output:");
String s;
while ((s = stdInput.readLine()) != null) {
Logger.debug(s);
}
Logger.debug("LaTeX Process ended with exit code " + process.waitFor());
// Path to the generated PDF file
File pdfFile = new File(tempDir, "speech_export" + dateTime + ".pdf");
// Check if the PDF was created
if (!pdfFile.exists()) {
throw new IOException("PDF generation failed.");
}
// Read the PDF file into a byte array
byte[] pdfBytes = Files.readAllBytes(pdfFile.toPath());
// Convert the byte array to a Base64 encoded string
return Base64.getEncoder().encodeToString(pdfBytes);
}
public static void deleteTeXTempDirContents() throws IOException {
// Walk through the directory
Files.walkFileTree(Path.of(TEMP_EXPORT_DIR), new SimpleFileVisitor<Path>() {
@Override
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
// Delete file
Files.delete(file);
return FileVisitResult.CONTINUE;
}
@Override
public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException {
// Delete the directory after its contents are deleted
Files.delete(dir);
return FileVisitResult.CONTINUE;
}
});
}
}

View file

@ -65,5 +65,9 @@ public class RESTHandler {
app.get("/reden/{id}/{redeId}", SpeechController::showSpeech); // zeige eine bestimmte Rede des Parlamentariers an
app.get("/reden", SpeechController::listAllSpeeches); // zeige alle Reden an (Filtern möglich)
app.get("/export/speech/{id}", SpeechesExportController::exportSpeech); // exportiere eine Rede als PDF
//app.get("/portfolio/{id}/export", SpeechesExportController::exportSpeechesFromParlamentarier); // exportiere alle Reden eines Parlamentariers als PDF
//app.get("/topic/{topic}/export", SpeechesExportController::exportSpeechesWithTopic); // exportiere alle Reden zu einem Thema als PDF
}
}

View file

@ -72,7 +72,7 @@ public class SpeechController {
Map<String, Object> attributes = new HashMap<>();
HtmlSpeech speech = MongoPprUtils.getSpeechByKey(redeId);
HtmlSpeech speech = MongoPprUtils.getHTMLSpeechByKey(redeId);
if (speech == null) {
attributes.put("error", "Rede " + redeId + " nicht vorhanden");
ctx.render("speech.ftl", attributes);

View file

@ -0,0 +1,44 @@
package org.texttechnologylab.project.gruppe_05_1.rest;
import io.javalin.http.Context;
import io.javalin.openapi.HttpMethod;
import io.javalin.openapi.OpenApi;
import io.javalin.openapi.OpenApiResponse;
import org.texttechnologylab.project.gruppe_05_1.util.Logger;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.util.Base64;
import static org.texttechnologylab.project.gruppe_05_1.export.TeXUtil.*;
public class SpeechesExportController {
@OpenApi(
summary = "Get a speech as a PDF",
description = "Returns a LaTeX generated pdf of a selected speech",
operationId = "getSpeechExport",
path = "/export/speech/{id}",
methods = HttpMethod.GET,
tags = {"Export", "Speeches", "PDF"},
responses = {
@OpenApiResponse(status = "200")
})
public static void exportSpeech(Context ctx) throws IOException {
byte[] pdfBytes = new byte[0];
try {
pdfBytes = Base64.getDecoder().decode(getExportedSpeechBase64StringBySpeechId(ctx.pathParam("id")));
} catch (Exception e) {
Logger.error("Failed to generate Export of Speech with ID " + ctx.pathParam("id"));
Logger.error(e.getMessage());
}
// Set the response content type to PDF
ctx.contentType("application/pdf");
// Send the PDF as a response
ctx.result(new ByteArrayInputStream(pdfBytes));
// delete the temporary folder
deleteTeXTempDirContents();
}
}

View file

@ -42,4 +42,9 @@ public class Comment_File_Impl implements Content, Comment {
public MongoDBEntryType getType() {
return MongoDBEntryType.SPEECH_COMMENT;
}
@Override
public String toTeX() {
return "\\textcolor{blue}{Kommentar}: " + this.getComment() + "\\\n";
}
}

View file

@ -34,4 +34,9 @@ public class Line_File_Impl implements Content, Line {
public MongoDBEntryType getType() {
return MongoDBEntryType.SPEECH_LINE;
}
@Override
public String toTeX() {
return this.getContent() + "\\\n";
}
}

View file

@ -49,4 +49,9 @@ public class Speaker_File_Impl implements Content, Speaker {
public MongoDBEntryType getType() {
return MongoDBEntryType.SPEECH_SPEAKER;
}
@Override
public String toTeX() {
return "\\textcolor{blue}{Redner/Rednerin}: " + this.getSpeakerName() + "\\\n";
}
}

View file

@ -103,4 +103,22 @@ public class Speech_File_Impl implements Speech {
return null;
}
@Override
public String toTeX() {
StringBuilder tex = new StringBuilder();
tex.append("\\NLP Metadata\\\n");
tex.append("[Graphs] //TODO: Implement\n");
tex.append("\\subsection*{Speech Content}\n");
// Add content block of speeches
for (Content content: this.getSpeechContents()) {
tex.append(content.toTeX());
}
return tex.toString();
}
}

View file

@ -24,4 +24,11 @@ public interface Content {
* @return The speech ID.
*/
int getSpeechId();
/**
* Returns the content as LaTeX.
*
* @return The content.
*/
String toTeX();
}

View file

@ -85,4 +85,6 @@ public interface Speech {
String getFullText();
JCas toCas() throws UIMAException;
String toTeX();
}