Skip to content

Commit

Permalink
feat: add post-processors to remove orphaned footnotes and endnotes
Browse files Browse the repository at this point in the history
Introduce processors to clean up unused footnotes and endnotes in Word documents by removing unreferenced notes. Integrate these processors into the default configuration to ensure document consistency after stamping.
  • Loading branch information
caring-coder committed Dec 14, 2024
1 parent 6b19a2a commit 480eecb
Show file tree
Hide file tree
Showing 10 changed files with 215 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
import java.util.Objects;

import static java.util.stream.Collectors.joining;
import static pro.verron.officestamper.utils.WmlFactory.*;
import static pro.verron.officestamper.utils.WmlFactory.newRun;
import static pro.verron.officestamper.utils.WmlFactory.newText;

/**
* Utility class to handle runs.
Expand Down Expand Up @@ -68,6 +69,8 @@ public static CharSequence getText(Object content) {
case R.AnnotationRef ignored -> "";
case R.CommentReference ignored -> "";
case Drawing ignored -> "";
case CTFtnEdnRef ref -> ref.getId()
.toString();
case R.Sym sym -> "<sym(%s, %s)>".formatted(sym.getFont(), sym.getChar());
default -> {
log.debug("Unhandled object type: {}", content.getClass());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ public static OfficeStamperConfiguration standardWithPreprocessing() {
configuration.addPreprocessor(Preprocessors.removeLanguageProof());
configuration.addPreprocessor(Preprocessors.removeLanguageInfo());
configuration.addPreprocessor(Preprocessors.mergeSimilarRuns());
configuration.addPostprocessor(Postprocessors.removeOrphanedFootnotes());
configuration.addPostprocessor(Postprocessors.removeOrphanedEndnotes());
return configuration;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package pro.verron.officestamper.preset;

import pro.verron.officestamper.api.OfficeStamperException;
import pro.verron.officestamper.api.PostProcessor;
import pro.verron.officestamper.preset.postprocessors.cleanendnotes.RemoveOrphanedEndnotesProcessor;
import pro.verron.officestamper.preset.postprocessors.cleanfootnotes.RemoveOrphanedFootnotesProcessor;

public class Postprocessors {
private Postprocessors() {
throw new OfficeStamperException("This is a utility class and cannot be instantiated");
}

public static PostProcessor removeOrphanedFootnotes() {
return new RemoveOrphanedFootnotesProcessor();
}

public static PostProcessor removeOrphanedEndnotes() {
return new RemoveOrphanedEndnotesProcessor();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package pro.verron.officestamper.preset.postprocessors;

import org.docx4j.utils.TraversalUtilVisitor;
import org.docx4j.wml.CTFtnEdnRef;

import java.math.BigInteger;
import java.util.SortedSet;
import java.util.TreeSet;

public class NoteRefsVisitor
extends TraversalUtilVisitor<CTFtnEdnRef> {
private final SortedSet<BigInteger> ids = new TreeSet<>();

@Override
public void apply(CTFtnEdnRef element) {
ids.add(element.getId());
}

public SortedSet<BigInteger> referencedNoteIds() {
return ids;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package pro.verron.officestamper.preset.postprocessors.cleanendnotes;

import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.openpackaging.parts.WordprocessingML.EndnotesPart;
import org.docx4j.wml.CTEndnotes;
import org.docx4j.wml.CTFtnEdn;
import pro.verron.officestamper.api.PostProcessor;
import pro.verron.officestamper.preset.postprocessors.NoteRefsVisitor;
import pro.verron.officestamper.utils.WmlUtils;

import java.util.Collection;
import java.util.Optional;

import static org.docx4j.wml.STFtnEdn.NORMAL;
import static pro.verron.officestamper.api.OfficeStamperException.throwing;
import static pro.verron.officestamper.core.DocumentUtil.visitDocument;

public class RemoveOrphanedEndnotesProcessor
implements PostProcessor {
@Override
public void process(WordprocessingMLPackage document) {
var visitor = new NoteRefsVisitor();
visitDocument(document, visitor);
var referencedNoteIds = visitor.referencedNoteIds();
var mainDocumentPart = document.getMainDocumentPart();

var ednPart = mainDocumentPart.getEndNotesPart();
Optional.ofNullable(ednPart)
.stream()
.map(throwing(EndnotesPart::getContents))
.map(CTEndnotes::getEndnote)
.flatMap(Collection::stream)
.filter(RemoveOrphanedEndnotesProcessor::normalNotes)
.filter(note -> !referencedNoteIds.contains(note.getId()))
.toList()
.forEach(WmlUtils::remove);
}

private static boolean normalNotes(CTFtnEdn note) {
return Optional.ofNullable(note.getType())
.orElse(NORMAL)
.equals(NORMAL);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package pro.verron.officestamper.preset.postprocessors.cleanfootnotes;

import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.openpackaging.parts.WordprocessingML.FootnotesPart;
import org.docx4j.wml.CTFootnotes;
import org.docx4j.wml.CTFtnEdn;
import pro.verron.officestamper.api.PostProcessor;
import pro.verron.officestamper.preset.postprocessors.NoteRefsVisitor;
import pro.verron.officestamper.utils.WmlUtils;

import java.util.Collection;
import java.util.Optional;

import static org.docx4j.wml.STFtnEdn.NORMAL;
import static pro.verron.officestamper.api.OfficeStamperException.throwing;
import static pro.verron.officestamper.core.DocumentUtil.visitDocument;

public class RemoveOrphanedFootnotesProcessor
implements PostProcessor {
@Override
public void process(WordprocessingMLPackage document) {
var visitor = new NoteRefsVisitor();
visitDocument(document, visitor);
var referencedNoteIds = visitor.referencedNoteIds();
var mainDocumentPart = document.getMainDocumentPart();

var ftnPart = mainDocumentPart.getFootnotesPart();
Optional.ofNullable(ftnPart)
.stream()
.map(throwing(FootnotesPart::getContents))
.map(CTFootnotes::getFootnote)
.flatMap(Collection::stream)
.filter(RemoveOrphanedFootnotesProcessor::normalNotes)
.filter(note -> !referencedNoteIds.contains(note.getId()))
.toList()
.forEach(WmlUtils::remove);
}

private static boolean normalNotes(CTFtnEdn note) {
return Optional.ofNullable(note.getType())
.orElse(NORMAL)
.equals(NORMAL);
}
}
26 changes: 20 additions & 6 deletions engine/src/main/java/pro/verron/officestamper/utils/WmlUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.openpackaging.parts.PartName;
import org.docx4j.openpackaging.parts.WordprocessingML.CommentsPart;
import org.docx4j.wml.Comments;
import org.docx4j.wml.ContentAccessor;
import org.docx4j.wml.Tc;
import org.docx4j.wml.*;
import org.jvnet.jaxb2_commons.ppp.Child;
import pro.verron.officestamper.api.OfficeStamperException;
import pro.verron.officestamper.core.TableCellUtil;
Expand Down Expand Up @@ -85,13 +83,29 @@ private static Predicate<Comments.Comment> idEqual(BigInteger id) {
}

public static void remove(Child child) {
var parent = (ContentAccessor) child.getParent();
remove(parent, child);
if (parent instanceof Tc cell && TableCellUtil.hasNoParagraphOrTable(cell)) {
switch (child.getParent()) {
case ContentAccessor parent -> remove(parent, child);
case CTFootnotes parent -> remove(parent, child);
case CTEndnotes parent -> remove(parent, child);
default -> throw new OfficeStamperException("Unexpected value: " + child.getParent());
}
if (child.getParent() instanceof Tc cell && TableCellUtil.hasNoParagraphOrTable(cell)) {
TableCellUtil.addEmptyParagraph(cell);
}
}

@SuppressWarnings("SuspiciousMethodCalls")
private static void remove(CTFootnotes parent, Child child) {
parent.getFootnote()
.remove(child);
}

@SuppressWarnings("SuspiciousMethodCalls")
private static void remove(CTEndnotes parent, Child child) {
parent.getEndnote()
.remove(child);
}

private static void remove(ContentAccessor parent, Child child) {
var siblings = parent.getContent();
var iterator = siblings.listIterator();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -221,22 +221,14 @@ void conditionalDisplayOfFootnotes(ContextFactory factory) {
[Quote] "Springfield, USA is a town like no other, brought to life through the antics of the Simpson family. Here, in the heart of Springfield, every day is an adventure."
== Homer Simpson's Favorite Pastimes
Homer Simpson, the patriarch of the Simpson family, is well-known for his love of donuts and Duff beer❬[1]❘{rStyle=Appelnotedebasdep}❭. He spends most of his time at the Springfield Nuclear Power Plant, though he often finds himself in various predicaments❬[2]❘{rStyle=Appelnotedebasdep}❭.
== Marge Simpson: The Heart of the Family
Marge Simpson, with her iconic blue hair, is the moral center of the family. She manages the household with grace and patience❬[3]❘{rStyle=Appelnotedebasdep}❭. Despite the chaos around her, Marge always finds a way to keep the family together.
== Bart Simpson: The Troublemaker
Bart Simpson, the eldest child, is notorious for his mischievous behavior. His prankster ways often land him in trouble, yet his cleverness sometimes helps solve the family's problems❬[4]❘{rStyle=Appelnotedebasdep}❭.
Marge Simpson, with her iconic blue hair, is the moral center of the family. She manages the household with the chaos around her, Marge always finds a way to keep the family together.
|===
|Character
|Role<cnfStyle=100000000000>
|Fun Fact<cnfStyle=100000000000>
|Homer Simpson
|Patriarch<cnfStyle=000000100000>
|"D'oh!" is Homer's trademark exclamation❬[5]❘{rStyle=Appelnotedebasdep}❭.<cnfStyle=000000100000>
|Marge Simpson
|Matriarch<cnfStyle=000000000000>
|Her hair once hid an entire toolbox❬[6]❘{rStyle=Appelnotedebasdep}❭.<cnfStyle=000000000000>
Expand All @@ -260,16 +252,68 @@ void conditionalDisplayOfFootnotes(ContextFactory factory) {
[Quote] "From the simplicity of everyday life to the extraordinary events in Springfield, The Simpsons continue to entertain audiences with their unique charm and wit."
[footnotes]
---
[1] Donuts, preferably with pink frosting and sprinkles, are Homer's favorite treat.
[6] Marge's hairdo was designed to hide various items, a nod to cartoon logic.
[7] Bart's rebellious attitude is encapsulated in this catchphrase.
[8] Lisa's musical talent often shines through her saxophone solos.
[9] Despite her silence, Maggie has saved her family on multiple occasions.
---
""";

var config = standardWithPreprocessing();
var stamper = new TestDocxStamper<>(config);
var actual = stamper.stampAndLoadAndExtract(template, context);
assertEquals(expected, actual);
}

@DisplayName("Display endnotes elements")
@ParameterizedTest
@MethodSource("factories")
void conditionalDisplayOfEndnotes(ContextFactory factory) {
var context = factory.name("Bart");
var template = getResource(Path.of("endnotes.docx"));
var expected = """
= Springfield Chronicles: The Simpsons Edition
== Introduction
[2] Homer’s adventures range from becoming an astronaut to leading a vigilante group.
[Quote] "Springfield, USA is a town like no other, brought to life through the antics of the Simpson family. Here, in the heart of Springfield, every day is an adventure."
== Homer Simpson's Favorite Pastimes
[3] Marge once served as a police officer and even ran for mayor of Springfield.
== Marge Simpson: The Heart of the Family
[4] Bart once saved Springfield from a dam break with his skateboarding skills.
Marge Simpson, with her iconic blue hair, is the moral center of the family. She manages the household with the chaos around her, Marge always finds a way to keep the family together.
|===
|Character
|Role<cnfStyle=100000000000>
|Fun Fact<cnfStyle=100000000000>
|Marge Simpson
|Matriarch<cnfStyle=000000000000>
|Her hair once hid an entire toolbox❬[6]❘{rStyle=Appeldenotedefin}❭.<cnfStyle=000000000000>
[5] "D'oh!" was first added to the Oxford English Dictionary in 2001.
|Bart Simpson
|Eldest Child<cnfStyle=000000100000>
|Bart's famous catchphrase is "Eat my shorts!"❬[7]❘{rStyle=Appeldenotedefin}❭.<cnfStyle=000000100000>
|Lisa Simpson
|Middle Child<cnfStyle=000000000000>
|Lisa is a talented saxophonist❬[8]❘{rStyle=Appeldenotedefin}❭.<cnfStyle=000000000000>
|Maggie Simpson
|Youngest Child<cnfStyle=000000100000>
|Maggie is known for her pacifier and silent wisdom❬[9]❘{rStyle=Appeldenotedefin}❭.<cnfStyle=000000100000>
|===
== Conclusion
[Quote] "From the simplicity of everyday life to the extraordinary events in Springfield, The Simpsons continue to entertain audiences with their unique charm and wit."
[endnotes]
---
[6] Marge's hairdo was designed to hide various items, a nod to cartoon logic.
[7] Bart's rebellious attitude is encapsulated in this catchphrase.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -570,8 +570,7 @@ private Function<? super String, String> decorateWithStyle(String value) {
case "heading 5" -> "====== %s\n"::formatted;
case "heading 6" -> "======= %s\n"::formatted;
case "caption" -> ".%s"::formatted;
case "annotation text" -> string -> string;
case "footnote text" -> string -> string;
case "annotation text", "footnote text", "endnote text" -> string -> string;
default -> "[%s] %%s".formatted(value)::formatted;
};
}
Expand Down
Binary file added test/sources/endnotes.docx
Binary file not shown.

0 comments on commit 480eecb

Please sign in to comment.