Skip to content

Commit

Permalink
Merge pull request #297 from Frooodle/allInfo
Browse files Browse the repository at this point in the history
new features
  • Loading branch information
Frooodle authored Aug 6, 2023
2 parents 882cd41 + 38ec68b commit 379791a
Show file tree
Hide file tree
Showing 41 changed files with 2,642 additions and 1,948 deletions.
5 changes: 3 additions & 2 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ plugins {
}

group = 'stirling.software'
version = '0.11.2'
version = '0.12.0'
sourceCompatibility = '17'

repositories {
Expand Down Expand Up @@ -61,8 +61,9 @@ dependencies {
implementation 'com.itextpdf:itext7-core:7.2.5'
implementation 'org.springframework.boot:spring-boot-starter-actuator'
implementation 'io.micrometer:micrometer-core'

implementation group: 'com.google.zxing', name: 'core', version: '3.5.1'
// https://mvnrepository.com/artifact/org.commonmark/commonmark
implementation 'org.commonmark:commonmark:0.21.0'

developmentOnly("org.springframework.boot:spring-boot-devtools")

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
package stirling.software.SPDF.controller.api;

import java.io.IOException;
import java.io.ByteArrayOutputStream;
import com.itextpdf.kernel.pdf.*;
import com.itextpdf.kernel.pdf.xobject.PdfFormXObject;
import com.itextpdf.kernel.geom.PageSize;
import com.itextpdf.kernel.geom.Rectangle;
import com.itextpdf.layout.Document;
import com.itextpdf.layout.element.Image;
import java.util.ArrayList;
import java.util.List;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageTree;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestPart;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;

import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.Parameter;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.utils.WebResponseUtils;
import org.apache.pdfbox.pdmodel.*;
import org.apache.pdfbox.multipdf.PDFMergerUtility;
@RestController
@Tag(name = "General", description = "General APIs")
public class ToSinglePageController {

private static final Logger logger = LoggerFactory.getLogger(ToSinglePageController.class);


@PostMapping(consumes = "multipart/form-data", value = "/pdf-to-single-page")
@Operation(
summary = "Convert a multi-page PDF into a single long page PDF",
description = "This endpoint converts a multi-page PDF document into a single paged PDF document. The width of the single page will be same as the input's width, but the height will be the sum of all the pages' heights. Input:PDF Output:PDF Type:SISO"
)
public ResponseEntity<byte[]> pdfToSinglePage(
@RequestPart(required = true, value = "fileInput")
@Parameter(description = "The input multi-page PDF file to be converted into a single page", required = true)
MultipartFile file) throws IOException {

PdfReader reader = new PdfReader(file.getInputStream());
PdfDocument sourceDocument = new PdfDocument(reader);

float totalHeight = 0;
float width = 0;

for (int i = 1; i <= sourceDocument.getNumberOfPages(); i++) {
Rectangle pageSize = sourceDocument.getPage(i).getPageSize();
totalHeight += pageSize.getHeight();
if(width < pageSize.getWidth())
width = pageSize.getWidth();
}

ByteArrayOutputStream baos = new ByteArrayOutputStream();
PdfWriter writer = new PdfWriter(baos);
PdfDocument newDocument = new PdfDocument(writer);
PageSize newPageSize = new PageSize(width, totalHeight);
newDocument.addNewPage(newPageSize);

Document layoutDoc = new Document(newDocument);
float yOffset = totalHeight;

for (int i = 1; i <= sourceDocument.getNumberOfPages(); i++) {
PdfFormXObject pageCopy = sourceDocument.getPage(i).copyAsFormXObject(newDocument);
Image copiedPage = new Image(pageCopy);
copiedPage.setFixedPosition(0, yOffset - sourceDocument.getPage(i).getPageSize().getHeight());
yOffset -= sourceDocument.getPage(i).getPageSize().getHeight();
layoutDoc.add(copiedPage);
}

layoutDoc.close();
sourceDocument.close();

byte[] result = baos.toByteArray();
return WebResponseUtils.bytesToWebResponse(result, file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_singlePage.pdf");
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
package stirling.software.SPDF.controller.api.converters;

import java.io.IOException;

import org.commonmark.node.Node;
import org.commonmark.parser.Parser;
import org.commonmark.renderer.html.HtmlRenderer;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestPart;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;

import io.swagger.v3.oas.annotations.Hidden;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.utils.FileToPdf;
import stirling.software.SPDF.utils.WebResponseUtils;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestPart;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import org.springframework.http.ResponseEntity;

import io.swagger.v3.oas.annotations.Operation;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.DocumentBuilder;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;

@RestController
@Tag(name = "Convert", description = "Convert APIs")
public class ConvertEpubToPdf {
//TODO
@PostMapping(consumes = "multipart/form-data", value = "/epub-to-single-pdf")
@Hidden
@Operation(
summary = "Convert an EPUB file to a single PDF",
description = "This endpoint takes an EPUB file input and converts it to a single PDF."
)
public ResponseEntity<byte[]> epubToSinglePdf(
@RequestPart(required = true, value = "fileInput") MultipartFile fileInput)
throws Exception {

if (fileInput == null) {
throw new IllegalArgumentException("Please provide an EPUB file for conversion.");
}

String originalFilename = fileInput.getOriginalFilename();
if (originalFilename == null || !originalFilename.endsWith(".epub")) {
throw new IllegalArgumentException("File must be in .epub format.");
}

Map<String, byte[]> epubContents = extractEpubContent(fileInput);
List<String> htmlFilesOrder = getHtmlFilesOrderFromOpf(epubContents);

List<byte[]> individualPdfs = new ArrayList<>();

for (String htmlFile : htmlFilesOrder) {
byte[] htmlContent = epubContents.get(htmlFile);
byte[] pdfBytes = FileToPdf.convertHtmlToPdf(htmlContent, htmlFile.replace(".html", ".pdf"));
individualPdfs.add(pdfBytes);
}

// Pseudo-code to merge individual PDFs into one.
byte[] mergedPdfBytes = mergeMultiplePdfsIntoOne(individualPdfs);

return WebResponseUtils.bytesToWebResponse(mergedPdfBytes, originalFilename.replace(".epub", ".pdf"));
}

// Assuming a pseudo-code function that merges multiple PDFs into one.
private byte[] mergeMultiplePdfsIntoOne(List<byte[]> individualPdfs) {
// You can use a library such as iText or PDFBox to perform the merging here.
// Return the byte[] of the merged PDF.
return null;
}

private Map<String, byte[]> extractEpubContent(MultipartFile fileInput) throws IOException {
Map<String, byte[]> contentMap = new HashMap<>();

try (ZipInputStream zis = new ZipInputStream(fileInput.getInputStream())) {
ZipEntry zipEntry = zis.getNextEntry();
while (zipEntry != null) {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
byte[] buffer = new byte[1024];
int read = 0;
while ((read = zis.read(buffer)) != -1) {
baos.write(buffer, 0, read);
}
contentMap.put(zipEntry.getName(), baos.toByteArray());
zipEntry = zis.getNextEntry();
}
}

return contentMap;
}

private List<String> getHtmlFilesOrderFromOpf(Map<String, byte[]> epubContents) throws Exception {
String opfContent = new String(epubContents.get("OEBPS/content.opf")); // Adjusting for given path
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
InputSource is = new InputSource(new StringReader(opfContent));
Document doc = dBuilder.parse(is);

NodeList itemRefs = doc.getElementsByTagName("itemref");
List<String> htmlFilesOrder = new ArrayList<>();

for (int i = 0; i < itemRefs.getLength(); i++) {
Element itemRef = (Element) itemRefs.item(i);
String idref = itemRef.getAttribute("idref");

NodeList items = doc.getElementsByTagName("item");
for (int j = 0; j < items.getLength(); j++) {
Element item = (Element) items.item(j);
if (idref.equals(item.getAttribute("id"))) {
htmlFilesOrder.add(item.getAttribute("href")); // Fetching the actual href
break;
}
}
}

return htmlFilesOrder;
}


}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.utils.FileToPdf;
import stirling.software.SPDF.utils.GeneralUtils;
import stirling.software.SPDF.utils.ProcessExecutor;
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
Expand All @@ -44,87 +45,17 @@ public ResponseEntity<byte[]> HtmlToPdf(
String originalFilename = fileInput.getOriginalFilename();
if (originalFilename == null || (!originalFilename.endsWith(".html") && !originalFilename.endsWith(".zip"))) {
throw new IllegalArgumentException("File must be either .html or .zip format.");
}
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
Path tempInputFile = null;
byte[] pdfBytes;
try {
if (originalFilename.endsWith(".html")) {
tempInputFile = Files.createTempFile("input_", ".html");
Files.write(tempInputFile, fileInput.getBytes());
} else {
tempInputFile = unzipAndGetMainHtml(fileInput);
}

List<String> command = new ArrayList<>();
command.add("weasyprint");
command.add(tempInputFile.toString());
command.add(tempOutputFile.toString());
ProcessExecutorResult returnCode;
if (originalFilename.endsWith(".zip")) {
returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT)
.runCommandWithOutputHandling(command, tempInputFile.getParent().toFile());
} else {

returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT)
.runCommandWithOutputHandling(command);
}

pdfBytes = Files.readAllBytes(tempOutputFile);
} finally {
// Clean up temporary files
Files.delete(tempOutputFile);
Files.delete(tempInputFile);

if (originalFilename.endsWith(".zip")) {
GeneralUtils.deleteDirectory(tempInputFile.getParent());
}
}
}byte[] pdfBytes = FileToPdf.convertHtmlToPdf( fileInput.getBytes(), originalFilename);

String outputFilename = originalFilename.replaceFirst("[.][^.]+$", "") + ".pdf"; // Remove file extension and append .pdf

return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename);
}



private Path unzipAndGetMainHtml(MultipartFile zipFile) throws IOException {
Path tempDirectory = Files.createTempDirectory("unzipped_");
try (ZipInputStream zipIn = new ZipInputStream(new ByteArrayInputStream(zipFile.getBytes()))) {
ZipEntry entry = zipIn.getNextEntry();
while (entry != null) {
Path filePath = tempDirectory.resolve(entry.getName());
if (entry.isDirectory()) {
Files.createDirectories(filePath); // Explicitly create the directory structure
} else {
Files.createDirectories(filePath.getParent()); // Create parent directories if they don't exist
Files.copy(zipIn, filePath);
}
zipIn.closeEntry();
entry = zipIn.getNextEntry();
}
}

//search for the main HTML file.
try (Stream<Path> walk = Files.walk(tempDirectory)) {
List<Path> htmlFiles = walk.filter(file -> file.toString().endsWith(".html"))
.collect(Collectors.toList());

if (htmlFiles.isEmpty()) {
throw new IOException("No HTML files found in the unzipped directory.");
}

// Prioritize 'index.html' if it exists, otherwise use the first .html file
for (Path htmlFile : htmlFiles) {
if (htmlFile.getFileName().toString().equals("index.html")) {
return htmlFile;
}
}

return htmlFiles.get(0);
}
}






}
Loading

0 comments on commit 379791a

Please sign in to comment.