From 8fd23654cd1647ea656842352fa297de4bbc6c9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dominik=20Fuch=C3=9F?= Date: Mon, 19 Feb 2024 13:51:22 +0100 Subject: [PATCH] Add tracelink types (LiSSA) to Common --- .../DiagramGoldStandardTraceLink.java | 91 ++++++++++++++ .../diagrams/DiagramTextTraceLink.java | 92 ++++++++++++++ .../diagrams/DiagramWordTraceLink.java | 114 ++++++++++++++++++ .../models/tracelinks/diagrams/TraceType.java | 31 +++++ 4 files changed, 328 insertions(+) create mode 100644 framework/common/src/main/java/edu/kit/kastel/mcse/ardoco/core/api/models/tracelinks/diagrams/DiagramGoldStandardTraceLink.java create mode 100644 framework/common/src/main/java/edu/kit/kastel/mcse/ardoco/core/api/models/tracelinks/diagrams/DiagramTextTraceLink.java create mode 100644 framework/common/src/main/java/edu/kit/kastel/mcse/ardoco/core/api/models/tracelinks/diagrams/DiagramWordTraceLink.java create mode 100644 framework/common/src/main/java/edu/kit/kastel/mcse/ardoco/core/api/models/tracelinks/diagrams/TraceType.java diff --git a/framework/common/src/main/java/edu/kit/kastel/mcse/ardoco/core/api/models/tracelinks/diagrams/DiagramGoldStandardTraceLink.java b/framework/common/src/main/java/edu/kit/kastel/mcse/ardoco/core/api/models/tracelinks/diagrams/DiagramGoldStandardTraceLink.java new file mode 100644 index 000000000..b5e83bd38 --- /dev/null +++ b/framework/common/src/main/java/edu/kit/kastel/mcse/ardoco/core/api/models/tracelinks/diagrams/DiagramGoldStandardTraceLink.java @@ -0,0 +1,91 @@ +/* Licensed under MIT 2023-2024. */ +package edu.kit.kastel.mcse.ardoco.core.api.models.tracelinks.diagrams; + +import java.util.Comparator; +import java.util.Objects; + +import edu.kit.kastel.mcse.ardoco.core.api.diagramrecognition.DiagramElement; +import edu.kit.kastel.mcse.ardoco.core.api.text.Sentence; + +public class DiagramGoldStandardTraceLink extends DiagramTextTraceLink { + private final String goldStandard; + private final TraceType traceType; + + /** + * Creates a tracelink between a diagram element and a sentence number + * + * @param diagramElement diagram element + * @param sentence sentence + * @param projectName project name + * @param goldStandard path to the textual gold standard file + */ + public DiagramGoldStandardTraceLink(DiagramElement diagramElement, Sentence sentence, String projectName, String goldStandard) { + this(diagramElement, sentence, projectName, goldStandard, TraceType.ENTITY); + } + + /** + * Creates a tracelink between a diagram element and a sentence number + * + * @param diagramElement diagram element + * @param sentence sentence + * @param projectName project name + * @param goldStandard path to the textual gold standard file + * @param traceType type of the trace + */ + public DiagramGoldStandardTraceLink(DiagramElement diagramElement, Sentence sentence, String projectName, String goldStandard, TraceType traceType) { + super(diagramElement, sentence, projectName); + this.goldStandard = goldStandard; + this.traceType = traceType; + } + + /** + * {@return the path to the goldstandard text file} + */ + public String getGoldStandard() { + return goldStandard; + } + + /** + * {@return the type of this trace} + */ + public TraceType getTraceType() { + return traceType; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj instanceof DiagramGoldStandardTraceLink other) { + return Objects.equals(getGoldStandard(), other.getGoldStandard()) && Objects.equals(getDiagramElement(), other.getDiagramElement()) && Objects + .equals(getSentenceNo(), other.getSentenceNo()) && Objects.equals(getTraceType(), other.getTraceType()); + } + return super.equals(obj); + } + + @Override + public int hashCode() { + return Objects.hash(getSentenceNo(), getDiagramElement(), getGoldStandard(), getTraceType()); + } + + @Override + public int compareTo(DiagramTextTraceLink o) { + if (equals(o)) + return 0; + if (o instanceof DiagramWordTraceLink) + return -1; + if (o instanceof DiagramGoldStandardTraceLink other) { + return Comparator.comparing(DiagramGoldStandardTraceLink::getGoldStandard) + .thenComparing(DiagramGoldStandardTraceLink::getDiagramElement) + .thenComparingInt(DiagramGoldStandardTraceLink::getSentenceNo) + .thenComparing(DiagramGoldStandardTraceLink::getTraceType) + .compare(this, other); + } + return super.compareTo(o); + } + + @Override + public String toString() { + return String.format("%s-[%s]", super.toString(), getTraceType().name()); + } +} diff --git a/framework/common/src/main/java/edu/kit/kastel/mcse/ardoco/core/api/models/tracelinks/diagrams/DiagramTextTraceLink.java b/framework/common/src/main/java/edu/kit/kastel/mcse/ardoco/core/api/models/tracelinks/diagrams/DiagramTextTraceLink.java new file mode 100644 index 000000000..ad8359120 --- /dev/null +++ b/framework/common/src/main/java/edu/kit/kastel/mcse/ardoco/core/api/models/tracelinks/diagrams/DiagramTextTraceLink.java @@ -0,0 +1,92 @@ +/* Licensed under MIT 2023-2024. */ +package edu.kit.kastel.mcse.ardoco.core.api.models.tracelinks.diagrams; + +import java.io.Serializable; +import java.text.MessageFormat; +import java.util.Objects; + +import edu.kit.kastel.mcse.ardoco.core.api.diagramrecognition.DiagramElement; +import edu.kit.kastel.mcse.ardoco.core.api.text.Sentence; +import edu.kit.kastel.mcse.ardoco.core.common.util.SimilarityComparable; +import edu.kit.kastel.mcse.ardoco.core.data.GlobalConfiguration; + +public class DiagramTextTraceLink implements SimilarityComparable, Comparable, Serializable { + protected final DiagramElement diagramElement; + protected final Sentence sentence; + protected final String projectName; + + /** + * Creates a tracelink between a diagram element and a sentence + * + * @param diagramElement diagram element + * @param sentence sentence + * @param projectName project name + */ + public DiagramTextTraceLink(DiagramElement diagramElement, Sentence sentence, String projectName) { + this.diagramElement = diagramElement; + this.sentence = sentence; + this.projectName = projectName; + } + + public DiagramElement getDiagramElement() { + return diagramElement; + } + + /** + * Gets the sentence number, indexing starts at 1. + * + * @return sentence number + */ + public int getSentenceNo() { + return sentence.getSentenceNumberForOutput(); + } + + @Override + public String toString() { + return toString(true); + } + + public String toString(boolean withSentence) { + if (withSentence) { + return MessageFormat.format("[{0}]-[{1}]-[{2}]", getDiagramElement(), getSentence().getSentenceNumberForOutput(), getSentence().getText()); + } + return MessageFormat.format("[{0}]-[{1}]", getDiagramElement(), getSentence().getSentenceNumberForOutput()); + } + + public Sentence getSentence() { + return this.sentence; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + else if (obj instanceof DiagramTextTraceLink other) { + return Objects.equals(getSentenceNo(), other.getSentenceNo()) && Objects.equals(getDiagramElement(), other.getDiagramElement()); + } + return false; + } + + @Override + public int hashCode() { + return Objects.hash(getSentenceNo(), getDiagramElement()); + } + + @Override + public int compareTo(DiagramTextTraceLink o) { + if (equals(o)) + return 0; + var comp = getDiagramElement().compareTo(o.getDiagramElement()); + if (comp == 0) + return getSentenceNo() - o.getSentenceNo(); + return comp; + } + + @Override + public boolean similar(GlobalConfiguration globalConfiguration, DiagramTextTraceLink obj) { + if (equals(obj)) + return true; + return getDiagramElement().getBoundingBox().similar(globalConfiguration, obj.getDiagramElement().getBoundingBox()) && Objects.equals(getSentenceNo(), + obj.getSentenceNo()); + } +} diff --git a/framework/common/src/main/java/edu/kit/kastel/mcse/ardoco/core/api/models/tracelinks/diagrams/DiagramWordTraceLink.java b/framework/common/src/main/java/edu/kit/kastel/mcse/ardoco/core/api/models/tracelinks/diagrams/DiagramWordTraceLink.java new file mode 100644 index 000000000..52d3f15b1 --- /dev/null +++ b/framework/common/src/main/java/edu/kit/kastel/mcse/ardoco/core/api/models/tracelinks/diagrams/DiagramWordTraceLink.java @@ -0,0 +1,114 @@ +/* Licensed under MIT 2023-2024. */ +package edu.kit.kastel.mcse.ardoco.core.api.models.tracelinks.diagrams; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Comparator; +import java.util.Objects; +import java.util.TreeSet; +import java.util.stream.Collectors; + +import edu.kit.kastel.mcse.ardoco.core.api.diagramrecognition.DiagramElement; +import edu.kit.kastel.mcse.ardoco.core.api.text.Word; +import edu.kit.kastel.mcse.ardoco.core.pipeline.agent.Claimant; + +/** + * Represents a tracelink between a {@link DiagramElement} and a {@link Word}. + */ +public class DiagramWordTraceLink extends DiagramTextTraceLink { + public static final Comparator CONFIDENCE_COMPARATOR = Comparator.comparingDouble(DiagramWordTraceLink::getConfidence); + + private final Word word; + private final double confidence; + private final Serializable origin; + private final TreeSet relatedWordLinks = new TreeSet<>(); + private final TreeSet relatedGSLinks = new TreeSet<>(); + + /** + * Creates a tracelink between a diagram element and a sentence number of a word + * + * @param diagramElement diagram element + * @param word word + * @param projectName project name + * @param confidence confidence + * @param origin claimant this link was derived from + */ + public DiagramWordTraceLink(DiagramElement diagramElement, Word word, String projectName, double confidence, Claimant origin) { + super(diagramElement, word.getSentence(), projectName); + + this.word = word; + this.confidence = confidence; + this.origin = origin; + } + + public Word getWord() { + return this.word; + } + + public double getConfidence() { + return this.confidence; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + else if (obj instanceof DiagramWordTraceLink other) { + return Objects.equals(getDiagramElement(), other.getDiagramElement()) && getWord().getPosition() == other.getWord().getPosition() && Objects.equals( + getConfidence(), other.getConfidence()); + } + return super.equals(obj); + } + + @Override + public int hashCode() { + return Objects.hash(getDiagramElement(), getWord(), getConfidence()); + } + + @Override + public int compareTo(DiagramTextTraceLink o) { + if (equals(o)) + return 0; + var supComp = super.compareTo(o); + if (o instanceof DiagramWordTraceLink other && supComp == 0) { + var comp = Integer.compare(getWord().getPosition(), other.getWord().getPosition()); + if (comp == 0) { + return Double.compare(getConfidence(), other.getConfidence()); + } + return comp; + } + return supComp; + } + + public void addRelated(Collection related) { + var list = new ArrayList<>(related); + for (var link : list) { + if (link == this) + continue; + if (link instanceof DiagramWordTraceLink wLink) { + relatedWordLinks.add(wLink); + } else if (link instanceof DiagramGoldStandardTraceLink gsLink) { + relatedGSLinks.add(gsLink); + } + } + } + + public TreeSet getRelatedGSLinks() { + return relatedGSLinks; + } + + public TreeSet getRelatedWordLinks() { + return relatedWordLinks; + } + + @Override + public String toString() { + var relatedTypes = ""; + if (!relatedGSLinks.isEmpty()) { + relatedTypes = relatedGSLinks.stream().map(g -> "[" + g.getTraceType().name() + "]").collect(Collectors.joining("-")) + "-"; + } + return String.format("%s-[%s]-[%s]-[%.3f]-%s", super.toString(false), getWord().getText(), getWord().getPhrase().getText(), getConfidence(), + relatedTypes); + } +} diff --git a/framework/common/src/main/java/edu/kit/kastel/mcse/ardoco/core/api/models/tracelinks/diagrams/TraceType.java b/framework/common/src/main/java/edu/kit/kastel/mcse/ardoco/core/api/models/tracelinks/diagrams/TraceType.java new file mode 100644 index 000000000..e9e9fe439 --- /dev/null +++ b/framework/common/src/main/java/edu/kit/kastel/mcse/ardoco/core/api/models/tracelinks/diagrams/TraceType.java @@ -0,0 +1,31 @@ +/* Licensed under MIT 2023-2024. */ +package edu.kit.kastel.mcse.ardoco.core.api.models.tracelinks.diagrams; + +import java.io.Serializable; + +/** + * The {@link TraceType} refers to the type associated with a trace link. It can be used to annotate trace links in the gold standard with additional + * information. Actual negatives can be marked using {@link #COMMON_NOUN}, {@link #SHARED_STEM} and {@link #OTHER_ENTITY}. The gold standard is not complete for + * actual negatives, however the provided information can be used to determine potential causes of false positives. + */ +public enum TraceType implements Serializable { + ENTITY(true),//Both endpoints point to the same entity + COMMON_NOUN(false),//Created due to a common noun usage (e.g. Entity "test" and word "test" in the text) + SHARED_STEM(false),//Created due to a shared word stem (e.g. Entity "testing" and word "testing" in the text) + ENTITY_COREFERENCE(true),//Both endpoints point to the same entity, but the textual endpoint is a coreference + OTHER_ENTITY(false),//Created due to another (similarly-named) entity + UNCERTAIN(false);//Marker for discussion + + private final boolean actualPositive; + + TraceType(boolean actualPositive) { + this.actualPositive = actualPositive; + } + + /** + * {@return whether a trace link of this type is an actual positive} + */ + public boolean isActualPositive() { + return this.actualPositive; + } +}