diff --git a/engine/src/main/java/pro/verron/officestamper/core/RunUtil.java b/engine/src/main/java/pro/verron/officestamper/core/RunUtil.java index bda437a6..2e3d851c 100644 --- a/engine/src/main/java/pro/verron/officestamper/core/RunUtil.java +++ b/engine/src/main/java/pro/verron/officestamper/core/RunUtil.java @@ -11,7 +11,8 @@ import java.util.Objects; import static java.util.stream.Collectors.joining; -import static pro.verron.officestamper.utils.WmlFactory.*; +import static pro.verron.officestamper.utils.WmlFactory.newRun; +import static pro.verron.officestamper.utils.WmlFactory.newText; /** * Utility class to handle runs. @@ -68,6 +69,8 @@ public static CharSequence getText(Object content) { case R.AnnotationRef ignored -> ""; case R.CommentReference ignored -> ""; case Drawing ignored -> ""; + case CTFtnEdnRef ref -> ref.getId() + .toString(); case R.Sym sym -> "".formatted(sym.getFont(), sym.getChar()); default -> { log.debug("Unhandled object type: {}", content.getClass()); diff --git a/engine/src/main/java/pro/verron/officestamper/preset/OfficeStamperConfigurations.java b/engine/src/main/java/pro/verron/officestamper/preset/OfficeStamperConfigurations.java index 0c2d9bfd..e3a3785f 100644 --- a/engine/src/main/java/pro/verron/officestamper/preset/OfficeStamperConfigurations.java +++ b/engine/src/main/java/pro/verron/officestamper/preset/OfficeStamperConfigurations.java @@ -43,6 +43,8 @@ public static OfficeStamperConfiguration standardWithPreprocessing() { configuration.addPreprocessor(Preprocessors.removeLanguageProof()); configuration.addPreprocessor(Preprocessors.removeLanguageInfo()); configuration.addPreprocessor(Preprocessors.mergeSimilarRuns()); + configuration.addPostprocessor(Postprocessors.removeOrphanedFootnotes()); + configuration.addPostprocessor(Postprocessors.removeOrphanedEndnotes()); return configuration; } diff --git a/engine/src/main/java/pro/verron/officestamper/preset/Postprocessors.java b/engine/src/main/java/pro/verron/officestamper/preset/Postprocessors.java new file mode 100644 index 00000000..0dc52c14 --- /dev/null +++ b/engine/src/main/java/pro/verron/officestamper/preset/Postprocessors.java @@ -0,0 +1,20 @@ +package pro.verron.officestamper.preset; + +import pro.verron.officestamper.api.OfficeStamperException; +import pro.verron.officestamper.api.PostProcessor; +import pro.verron.officestamper.preset.postprocessors.cleanendnotes.RemoveOrphanedEndnotesProcessor; +import pro.verron.officestamper.preset.postprocessors.cleanfootnotes.RemoveOrphanedFootnotesProcessor; + +public class Postprocessors { + private Postprocessors() { + throw new OfficeStamperException("This is a utility class and cannot be instantiated"); + } + + public static PostProcessor removeOrphanedFootnotes() { + return new RemoveOrphanedFootnotesProcessor(); + } + + public static PostProcessor removeOrphanedEndnotes() { + return new RemoveOrphanedEndnotesProcessor(); + } +} diff --git a/engine/src/main/java/pro/verron/officestamper/preset/postprocessors/NoteRefsVisitor.java b/engine/src/main/java/pro/verron/officestamper/preset/postprocessors/NoteRefsVisitor.java new file mode 100644 index 00000000..1e1d615e --- /dev/null +++ b/engine/src/main/java/pro/verron/officestamper/preset/postprocessors/NoteRefsVisitor.java @@ -0,0 +1,22 @@ +package pro.verron.officestamper.preset.postprocessors; + +import org.docx4j.utils.TraversalUtilVisitor; +import org.docx4j.wml.CTFtnEdnRef; + +import java.math.BigInteger; +import java.util.SortedSet; +import java.util.TreeSet; + +public class NoteRefsVisitor + extends TraversalUtilVisitor { + private final SortedSet ids = new TreeSet<>(); + + @Override + public void apply(CTFtnEdnRef element) { + ids.add(element.getId()); + } + + public SortedSet referencedNoteIds() { + return ids; + } +} diff --git a/engine/src/main/java/pro/verron/officestamper/preset/postprocessors/cleanendnotes/RemoveOrphanedEndnotesProcessor.java b/engine/src/main/java/pro/verron/officestamper/preset/postprocessors/cleanendnotes/RemoveOrphanedEndnotesProcessor.java new file mode 100644 index 00000000..b21a5f6c --- /dev/null +++ b/engine/src/main/java/pro/verron/officestamper/preset/postprocessors/cleanendnotes/RemoveOrphanedEndnotesProcessor.java @@ -0,0 +1,44 @@ +package pro.verron.officestamper.preset.postprocessors.cleanendnotes; + +import org.docx4j.openpackaging.packages.WordprocessingMLPackage; +import org.docx4j.openpackaging.parts.WordprocessingML.EndnotesPart; +import org.docx4j.wml.CTEndnotes; +import org.docx4j.wml.CTFtnEdn; +import pro.verron.officestamper.api.PostProcessor; +import pro.verron.officestamper.preset.postprocessors.NoteRefsVisitor; +import pro.verron.officestamper.utils.WmlUtils; + +import java.util.Collection; +import java.util.Optional; + +import static org.docx4j.wml.STFtnEdn.NORMAL; +import static pro.verron.officestamper.api.OfficeStamperException.throwing; +import static pro.verron.officestamper.core.DocumentUtil.visitDocument; + +public class RemoveOrphanedEndnotesProcessor + implements PostProcessor { + @Override + public void process(WordprocessingMLPackage document) { + var visitor = new NoteRefsVisitor(); + visitDocument(document, visitor); + var referencedNoteIds = visitor.referencedNoteIds(); + var mainDocumentPart = document.getMainDocumentPart(); + + var ednPart = mainDocumentPart.getEndNotesPart(); + Optional.ofNullable(ednPart) + .stream() + .map(throwing(EndnotesPart::getContents)) + .map(CTEndnotes::getEndnote) + .flatMap(Collection::stream) + .filter(RemoveOrphanedEndnotesProcessor::normalNotes) + .filter(note -> !referencedNoteIds.contains(note.getId())) + .toList() + .forEach(WmlUtils::remove); + } + + private static boolean normalNotes(CTFtnEdn note) { + return Optional.ofNullable(note.getType()) + .orElse(NORMAL) + .equals(NORMAL); + } +} diff --git a/engine/src/main/java/pro/verron/officestamper/preset/postprocessors/cleanfootnotes/RemoveOrphanedFootnotesProcessor.java b/engine/src/main/java/pro/verron/officestamper/preset/postprocessors/cleanfootnotes/RemoveOrphanedFootnotesProcessor.java new file mode 100644 index 00000000..673d0ede --- /dev/null +++ b/engine/src/main/java/pro/verron/officestamper/preset/postprocessors/cleanfootnotes/RemoveOrphanedFootnotesProcessor.java @@ -0,0 +1,44 @@ +package pro.verron.officestamper.preset.postprocessors.cleanfootnotes; + +import org.docx4j.openpackaging.packages.WordprocessingMLPackage; +import org.docx4j.openpackaging.parts.WordprocessingML.FootnotesPart; +import org.docx4j.wml.CTFootnotes; +import org.docx4j.wml.CTFtnEdn; +import pro.verron.officestamper.api.PostProcessor; +import pro.verron.officestamper.preset.postprocessors.NoteRefsVisitor; +import pro.verron.officestamper.utils.WmlUtils; + +import java.util.Collection; +import java.util.Optional; + +import static org.docx4j.wml.STFtnEdn.NORMAL; +import static pro.verron.officestamper.api.OfficeStamperException.throwing; +import static pro.verron.officestamper.core.DocumentUtil.visitDocument; + +public class RemoveOrphanedFootnotesProcessor + implements PostProcessor { + @Override + public void process(WordprocessingMLPackage document) { + var visitor = new NoteRefsVisitor(); + visitDocument(document, visitor); + var referencedNoteIds = visitor.referencedNoteIds(); + var mainDocumentPart = document.getMainDocumentPart(); + + var ftnPart = mainDocumentPart.getFootnotesPart(); + Optional.ofNullable(ftnPart) + .stream() + .map(throwing(FootnotesPart::getContents)) + .map(CTFootnotes::getFootnote) + .flatMap(Collection::stream) + .filter(RemoveOrphanedFootnotesProcessor::normalNotes) + .filter(note -> !referencedNoteIds.contains(note.getId())) + .toList() + .forEach(WmlUtils::remove); + } + + private static boolean normalNotes(CTFtnEdn note) { + return Optional.ofNullable(note.getType()) + .orElse(NORMAL) + .equals(NORMAL); + } +} diff --git a/engine/src/main/java/pro/verron/officestamper/utils/WmlUtils.java b/engine/src/main/java/pro/verron/officestamper/utils/WmlUtils.java index fc7dee3f..39657e9c 100644 --- a/engine/src/main/java/pro/verron/officestamper/utils/WmlUtils.java +++ b/engine/src/main/java/pro/verron/officestamper/utils/WmlUtils.java @@ -8,9 +8,7 @@ import org.docx4j.openpackaging.packages.WordprocessingMLPackage; import org.docx4j.openpackaging.parts.PartName; import org.docx4j.openpackaging.parts.WordprocessingML.CommentsPart; -import org.docx4j.wml.Comments; -import org.docx4j.wml.ContentAccessor; -import org.docx4j.wml.Tc; +import org.docx4j.wml.*; import org.jvnet.jaxb2_commons.ppp.Child; import pro.verron.officestamper.api.OfficeStamperException; import pro.verron.officestamper.core.TableCellUtil; @@ -85,13 +83,29 @@ private static Predicate idEqual(BigInteger id) { } public static void remove(Child child) { - var parent = (ContentAccessor) child.getParent(); - remove(parent, child); - if (parent instanceof Tc cell && TableCellUtil.hasNoParagraphOrTable(cell)) { + switch (child.getParent()) { + case ContentAccessor parent -> remove(parent, child); + case CTFootnotes parent -> remove(parent, child); + case CTEndnotes parent -> remove(parent, child); + default -> throw new OfficeStamperException("Unexpected value: " + child.getParent()); + } + if (child.getParent() instanceof Tc cell && TableCellUtil.hasNoParagraphOrTable(cell)) { TableCellUtil.addEmptyParagraph(cell); } } + @SuppressWarnings("SuspiciousMethodCalls") + private static void remove(CTFootnotes parent, Child child) { + parent.getFootnote() + .remove(child); + } + + @SuppressWarnings("SuspiciousMethodCalls") + private static void remove(CTEndnotes parent, Child child) { + parent.getEndnote() + .remove(child); + } + private static void remove(ContentAccessor parent, Child child) { var siblings = parent.getContent(); var iterator = siblings.listIterator(); diff --git a/engine/src/test/java/pro/verron/officestamper/test/ConditionalDisplayTest.java b/engine/src/test/java/pro/verron/officestamper/test/ConditionalDisplayTest.java index a7b868f1..79bea338 100644 --- a/engine/src/test/java/pro/verron/officestamper/test/ConditionalDisplayTest.java +++ b/engine/src/test/java/pro/verron/officestamper/test/ConditionalDisplayTest.java @@ -221,22 +221,14 @@ void conditionalDisplayOfFootnotes(ContextFactory factory) { [Quote] "Springfield, USA is a town like no other, brought to life through the antics of the Simpson family. Here, in the heart of Springfield, every day is an adventure." == Homer Simpson's Favorite Pastimes - Homer Simpson, the patriarch of the Simpson family, is well-known for his love of donuts and Duff beer❬[1]❘{rStyle=Appelnotedebasdep}❭. He spends most of his time at the Springfield Nuclear Power Plant, though he often finds himself in various predicaments❬[2]❘{rStyle=Appelnotedebasdep}❭. == Marge Simpson: The Heart of the Family - Marge Simpson, with her iconic blue hair, is the moral center of the family. She manages the household with grace and patience❬[3]❘{rStyle=Appelnotedebasdep}❭. Despite the chaos around her, Marge always finds a way to keep the family together. - == Bart Simpson: The Troublemaker - - Bart Simpson, the eldest child, is notorious for his mischievous behavior. His prankster ways often land him in trouble, yet his cleverness sometimes helps solve the family's problems❬[4]❘{rStyle=Appelnotedebasdep}❭. + Marge Simpson, with her iconic blue hair, is the moral center of the family. She manages the household with the chaos around her, Marge always finds a way to keep the family together. |=== |Character |Role |Fun Fact - |Homer Simpson - |Patriarch - |"D'oh!" is Homer's trademark exclamation❬[5]❘{rStyle=Appelnotedebasdep}❭. - |Marge Simpson |Matriarch |Her hair once hid an entire toolbox❬[6]❘{rStyle=Appelnotedebasdep}❭. @@ -260,16 +252,68 @@ void conditionalDisplayOfFootnotes(ContextFactory factory) { [Quote] "From the simplicity of everyday life to the extraordinary events in Springfield, The Simpsons continue to entertain audiences with their unique charm and wit." [footnotes] --- - [1] Donuts, preferably with pink frosting and sprinkles, are Homer's favorite treat. + [6] Marge's hairdo was designed to hide various items, a nod to cartoon logic. + + [7] Bart's rebellious attitude is encapsulated in this catchphrase. + + [8] Lisa's musical talent often shines through her saxophone solos. + + [9] Despite her silence, Maggie has saved her family on multiple occasions. + + --- + """; + + var config = standardWithPreprocessing(); + var stamper = new TestDocxStamper<>(config); + var actual = stamper.stampAndLoadAndExtract(template, context); + assertEquals(expected, actual); + } + + @DisplayName("Display endnotes elements") + @ParameterizedTest + @MethodSource("factories") + void conditionalDisplayOfEndnotes(ContextFactory factory) { + var context = factory.name("Bart"); + var template = getResource(Path.of("endnotes.docx")); + var expected = """ + = Springfield Chronicles: The Simpsons Edition + + == Introduction - [2] Homer’s adventures range from becoming an astronaut to leading a vigilante group. + [Quote] "Springfield, USA is a town like no other, brought to life through the antics of the Simpson family. Here, in the heart of Springfield, every day is an adventure." + == Homer Simpson's Favorite Pastimes - [3] Marge once served as a police officer and even ran for mayor of Springfield. + == Marge Simpson: The Heart of the Family - [4] Bart once saved Springfield from a dam break with his skateboarding skills. + Marge Simpson, with her iconic blue hair, is the moral center of the family. She manages the household with the chaos around her, Marge always finds a way to keep the family together. + |=== + |Character + |Role + |Fun Fact + + |Marge Simpson + |Matriarch + |Her hair once hid an entire toolbox❬[6]❘{rStyle=Appeldenotedefin}❭. - [5] "D'oh!" was first added to the Oxford English Dictionary in 2001. + |Bart Simpson + |Eldest Child + |Bart's famous catchphrase is "Eat my shorts!"❬[7]❘{rStyle=Appeldenotedefin}❭. + |Lisa Simpson + |Middle Child + |Lisa is a talented saxophonist❬[8]❘{rStyle=Appeldenotedefin}❭. + + |Maggie Simpson + |Youngest Child + |Maggie is known for her pacifier and silent wisdom❬[9]❘{rStyle=Appeldenotedefin}❭. + + + |=== + == Conclusion + + [Quote] "From the simplicity of everyday life to the extraordinary events in Springfield, The Simpsons continue to entertain audiences with their unique charm and wit." + [endnotes] + --- [6] Marge's hairdo was designed to hide various items, a nod to cartoon logic. [7] Bart's rebellious attitude is encapsulated in this catchphrase. diff --git a/engine/src/test/java/pro/verron/officestamper/test/Stringifier.java b/engine/src/test/java/pro/verron/officestamper/test/Stringifier.java index 11ff8485..010990df 100644 --- a/engine/src/test/java/pro/verron/officestamper/test/Stringifier.java +++ b/engine/src/test/java/pro/verron/officestamper/test/Stringifier.java @@ -570,8 +570,7 @@ private Function decorateWithStyle(String value) { case "heading 5" -> "====== %s\n"::formatted; case "heading 6" -> "======= %s\n"::formatted; case "caption" -> ".%s"::formatted; - case "annotation text" -> string -> string; - case "footnote text" -> string -> string; + case "annotation text", "footnote text", "endnote text" -> string -> string; default -> "[%s] %%s".formatted(value)::formatted; }; } diff --git a/test/sources/endnotes.docx b/test/sources/endnotes.docx new file mode 100644 index 00000000..523086a2 Binary files /dev/null and b/test/sources/endnotes.docx differ