diff --git a/cli/src/main/java/de/jplag/cli/JPlagOptionsBuilder.java b/cli/src/main/java/de/jplag/cli/JPlagOptionsBuilder.java index 079a4dbdf8..26fae2d6ff 100644 --- a/cli/src/main/java/de/jplag/cli/JPlagOptionsBuilder.java +++ b/cli/src/main/java/de/jplag/cli/JPlagOptionsBuilder.java @@ -99,6 +99,6 @@ private ClusteringOptions getClusteringOptions() { private MergingOptions getMergingOptions() { return new MergingOptions(this.cliOptions.merging.enabled, this.cliOptions.merging.minimumNeighborLength, - this.cliOptions.merging.maximumGapSize); + this.cliOptions.merging.maximumGapSize, this.cliOptions.merging.minimumRequiredMerges); } } diff --git a/cli/src/main/java/de/jplag/cli/options/CliOptions.java b/cli/src/main/java/de/jplag/cli/options/CliOptions.java index bc88561389..5adea3d477 100644 --- a/cli/src/main/java/de/jplag/cli/options/CliOptions.java +++ b/cli/src/main/java/de/jplag/cli/options/CliOptions.java @@ -139,6 +139,10 @@ public static class Merging { "--gap-size"}, description = "Maximal gap between neighboring matches to be merged (between 1 and minTokenMatch, default: ${DEFAULT-VALUE}).") public int maximumGapSize = MergingOptions.DEFAULT_GAP_SIZE; + @Option(names = { + "--required-merges"}, description = "Minimal required merges for the merging to be applied (between 1 and 50, default: ${DEFAULT-VALUE}).") + public int minimumRequiredMerges = MergingOptions.DEFAULT_REQUIRED_MERGES; + } @Option(names = {"--cluster-spectral-bandwidth"}, hidden = true) diff --git a/cli/src/test/java/de/jplag/cli/MergingOptionsTest.java b/cli/src/test/java/de/jplag/cli/MergingOptionsTest.java index 595c420b91..47eeb1cf31 100644 --- a/cli/src/test/java/de/jplag/cli/MergingOptionsTest.java +++ b/cli/src/test/java/de/jplag/cli/MergingOptionsTest.java @@ -1,5 +1,9 @@ package de.jplag.cli; +import static de.jplag.cli.test.CliArgument.GAP_SIZE; +import static de.jplag.cli.test.CliArgument.MERGING_ENABLED; +import static de.jplag.cli.test.CliArgument.NEIGHBOR_LENGTH; +import static de.jplag.cli.test.CliArgument.REQUIRED_MERGES; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; @@ -27,5 +31,22 @@ void testMergingDefault() throws ExitException, IOException { assertEquals(MergingOptions.DEFAULT_ENABLED, options.mergingOptions().enabled()); assertEquals(MergingOptions.DEFAULT_NEIGHBOR_LENGTH, options.mergingOptions().minimumNeighborLength()); assertEquals(MergingOptions.DEFAULT_GAP_SIZE, options.mergingOptions().maximumGapSize()); + assertEquals(MergingOptions.DEFAULT_REQUIRED_MERGES, options.mergingOptions().minimumRequiredMerges()); } + + @Test + @DisplayName("Test if custom values are correctly propagated when creating merging options from CLI") + void testMergingCustom() throws ExitException, IOException { + int customNumber = 7; + + JPlagOptions options = runCliForOptions(args -> args.with(MERGING_ENABLED).with(NEIGHBOR_LENGTH, customNumber).with(GAP_SIZE, customNumber) + .with(REQUIRED_MERGES, customNumber)); + + assertNotNull(options.mergingOptions()); + assertEquals(true, options.mergingOptions().enabled()); + assertEquals(customNumber, options.mergingOptions().minimumNeighborLength()); + assertEquals(customNumber, options.mergingOptions().maximumGapSize()); + assertEquals(customNumber, options.mergingOptions().minimumRequiredMerges()); + } + } diff --git a/cli/src/test/java/de/jplag/cli/test/CliArgument.java b/cli/src/test/java/de/jplag/cli/test/CliArgument.java index 168be61d96..6731b5a3c3 100644 --- a/cli/src/test/java/de/jplag/cli/test/CliArgument.java +++ b/cli/src/test/java/de/jplag/cli/test/CliArgument.java @@ -36,4 +36,9 @@ public record CliArgument(String name, boolean isPositional) { public static CliArgument EXCLUDE_FILES = new CliArgument<>("x", false); public static CliArgument MODE = new CliArgument<>("mode", false); + + public static CliArgument MERGING_ENABLED = new CliArgument<>("match-merging", false); + public static CliArgument NEIGHBOR_LENGTH = new CliArgument<>("neighbor-length", false); + public static CliArgument GAP_SIZE = new CliArgument<>("gap-size", false); + public static CliArgument REQUIRED_MERGES = new CliArgument<>("required-merges", false); } diff --git a/core/src/main/java/de/jplag/merging/MatchMerging.java b/core/src/main/java/de/jplag/merging/MatchMerging.java index bc31e269c5..ae9f0156ec 100644 --- a/core/src/main/java/de/jplag/merging/MatchMerging.java +++ b/core/src/main/java/de/jplag/merging/MatchMerging.java @@ -25,6 +25,7 @@ */ public class MatchMerging { private final JPlagOptions options; + private int numberOfMerges; /** * Instantiates the match merging algorithm for a comparison result and a set of specific options. @@ -47,13 +48,18 @@ public JPlagResult mergeMatchesOf(JPlagResult result) { List comparisonsMerged = new ArrayList<>(); ProgressBarLogger.iterate(ProgressBarType.MATCH_MERGING, comparisons, comparison -> { + numberOfMerges = 0; Submission leftSubmission = comparison.firstSubmission().copy(); Submission rightSubmission = comparison.secondSubmission().copy(); List globalMatches = new ArrayList<>(comparison.matches()); globalMatches.addAll(comparison.ignoredMatches()); globalMatches = mergeNeighbors(globalMatches, leftSubmission, rightSubmission); globalMatches = globalMatches.stream().filter(it -> it.length() >= options.minimumTokenMatch()).toList(); - comparisonsMerged.add(new JPlagComparison(leftSubmission, rightSubmission, globalMatches, new ArrayList<>())); + if (numberOfMerges >= options.mergingOptions().minimumRequiredMerges()) { + comparisonsMerged.add(new JPlagComparison(leftSubmission, rightSubmission, globalMatches, new ArrayList<>())); + } else { + comparisonsMerged.add(comparison); + } }); long durationInMillis = System.currentTimeMillis() - timeBeforeStartInMillis; @@ -111,6 +117,7 @@ private List mergeNeighbors(List globalMatches, Submission leftSub globalMatches = removeToken(globalMatches, leftSubmission, rightSubmission, upperNeighbor, tokenBetweenLeft, tokensBetweenRight); neighbors = computeNeighbors(globalMatches); i = 0; + numberOfMerges++; } else { i++; } diff --git a/core/src/main/java/de/jplag/merging/MergingOptions.java b/core/src/main/java/de/jplag/merging/MergingOptions.java index 7d77f7f31b..864f1d2d28 100644 --- a/core/src/main/java/de/jplag/merging/MergingOptions.java +++ b/core/src/main/java/de/jplag/merging/MergingOptions.java @@ -8,44 +8,59 @@ * @param maximumGapSize describes how many tokens can be between to neighboring matches (Defaults to 6). */ public record MergingOptions(@JsonProperty("enabled") boolean enabled, @JsonProperty("min_neighbour_length") int minimumNeighborLength, - @JsonProperty("max_gap_size") int maximumGapSize) { + @JsonProperty("max_gap_size") int maximumGapSize, @JsonProperty("min_required_merges") int minimumRequiredMerges) { public static final boolean DEFAULT_ENABLED = false; public static final int DEFAULT_NEIGHBOR_LENGTH = 2; public static final int DEFAULT_GAP_SIZE = 6; + public static final int DEFAULT_REQUIRED_MERGES = 6; /** - * The default values of MergingOptions are false for the enable-switch, which deactivate MatchMerging, while - * minimumNeighborLength and maximumGapSize default to (2,6), which in testing yielded the best results. + * Creates merging options with default parameters. + * @see MergingOptions#DEFAULT_ENABLED + * @see MergingOptions#DEFAULT_NEIGHBOR_LENGTH + * @see MergingOptions#DEFAULT_GAP_SIZE + * @see MergingOptions#DEFAULT_REQUIRED_MERGES */ public MergingOptions() { - this(DEFAULT_ENABLED, DEFAULT_NEIGHBOR_LENGTH, DEFAULT_GAP_SIZE); + this(DEFAULT_ENABLED, DEFAULT_NEIGHBOR_LENGTH, DEFAULT_GAP_SIZE, DEFAULT_REQUIRED_MERGES); } /** - * Builder pattern method for setting enabled - * @param enabled containing the new value - * @return MergingOptions with specified enabled + * Builder pattern method for enabling and disabling the subsequence match merging mechanism. + * @param enabled specifying if merging is enabled or not. + * @return the options with the specified configuration. */ public MergingOptions withEnabled(boolean enabled) { - return new MergingOptions(enabled, minimumNeighborLength, maximumGapSize); + return new MergingOptions(enabled, minimumNeighborLength, maximumGapSize, minimumRequiredMerges); } /** - * Builder pattern method for setting minimumNeighborLength - * @param minimumNeighborLength containing the new value - * @return MergingOptions with specified minimumNeighborLength + * Builder pattern method for setting minimum length (in tokens) for a pair of neighboring matches to be considered for + * merging. + * @param minimumNeighborLength containing the new value. + * @return the options with the specified configuration. */ public MergingOptions withMinimumNeighborLength(int minimumNeighborLength) { - return new MergingOptions(enabled, minimumNeighborLength, maximumGapSize); + return new MergingOptions(enabled, minimumNeighborLength, maximumGapSize, minimumRequiredMerges); } /** - * Builder pattern method for setting maximumGapSize - * @param maximumGapSize containing the new value - * @return MergingOptions with specified maximumGapSize + * Builder pattern method for setting maximum gap (in tokens) between a pair of matches to be considered for merging. + * @param maximumGapSize containing the new value. + * @return the options with the specified configuration. */ public MergingOptions withMaximumGapSize(int maximumGapSize) { - return new MergingOptions(enabled, minimumNeighborLength, maximumGapSize); + return new MergingOptions(enabled, minimumNeighborLength, maximumGapSize, minimumRequiredMerges); + } + + /** + * Builder pattern method for setting the minimal number of required merges before subsequence match merging has an + * effect. + * @param minimumRequiredMerges containing the new value. + * @return the options with the specified configuration. + */ + public MergingOptions withMinimumRequiredMerges(int minimumRequiredMerges) { + return new MergingOptions(enabled, minimumNeighborLength, maximumGapSize, minimumRequiredMerges); } } diff --git a/core/src/test/java/de/jplag/merging/MergingTest.java b/core/src/test/java/de/jplag/merging/MergingTest.java index e2a606e160..755d3b1065 100644 --- a/core/src/test/java/de/jplag/merging/MergingTest.java +++ b/core/src/test/java/de/jplag/merging/MergingTest.java @@ -41,9 +41,11 @@ class MergingTest extends TestBase { private final SubmissionSet submissionSet; private static final int MINIMUM_NEIGHBOR_LENGTH = 1; private static final int MAXIMUM_GAP_SIZE = 10; + private static final int MINIMUM_REQUIRED_MERGES = 0; MergingTest() throws ExitException { - options = getDefaultOptions("merging").withMergingOptions(new MergingOptions(true, MINIMUM_NEIGHBOR_LENGTH, MAXIMUM_GAP_SIZE)); + options = getDefaultOptions("merging") + .withMergingOptions(new MergingOptions(true, MINIMUM_NEIGHBOR_LENGTH, MAXIMUM_GAP_SIZE, MINIMUM_REQUIRED_MERGES)); GreedyStringTiling coreAlgorithm = new GreedyStringTiling(options); comparisonStrategy = new ParallelComparisonStrategy(options, coreAlgorithm); @@ -201,31 +203,27 @@ void testCorrectMerges() { @DisplayName("Sanity check for match merging") void testSanity() { - List matchesBefore = new ArrayList<>(); - List matchesAfter = new ArrayList<>(); + List matchesBefore = findComparison(comparisonsBefore, "sanityA.java", "sanityB.java").ignoredMatches(); + List matchesAfter = findComparison(comparisonsAfter, "sanityA.java", "sanityB.java").matches(); - for (JPlagComparison comparison : comparisonsBefore) { - if (comparison.toString().equals("sanityA.java <-> sanityB.java")) { - matchesBefore = comparison.ignoredMatches(); - } - } - for (JPlagComparison comparison : comparisonsAfter) { - if (comparison.toString().equals("sanityA.java <-> sanityB.java")) { - matchesAfter = comparison.matches(); - } - } - - List expectedBefore = new ArrayList<>(); - expectedBefore.add(new Match(5, 3, 6)); - expectedBefore.add(new Match(11, 12, 6)); - expectedBefore.add(new Match(0, 0, 3)); - expectedBefore.add(new Match(3, 18, 2)); - expectedBefore.add(new Match(17, 20, 2)); + List expectedBefore = List.of( // + new Match(5, 3, 6), // + new Match(11, 12, 6), // + new Match(0, 0, 3), // + new Match(3, 18, 2), // + new Match(17, 20, 2) // + ); - List expectedAfter = new ArrayList<>(); - expectedAfter.add(new Match(5, 3, 12)); + List expectedAfter = List.of(new Match(5, 3, 12)); assertEquals(expectedBefore, matchesBefore); + assertEquals(expectedAfter, matchesAfter); } + + private static JPlagComparison findComparison(List comparisons, String firstName, String secondName) { + return comparisons.stream() + .filter(it -> firstName.equals(it.firstSubmission().getName()) && secondName.equals(it.secondSubmission().getName())).findAny() + .orElseThrow(); + } } \ No newline at end of file diff --git a/report-viewer/src/model/CliOptions.ts b/report-viewer/src/model/CliOptions.ts index 266d693106..af668affce 100644 --- a/report-viewer/src/model/CliOptions.ts +++ b/report-viewer/src/model/CliOptions.ts @@ -37,4 +37,5 @@ export interface CliMergingOptions { enabled: boolean minNeighborLength: number maxGapSize: number + minimumRequiredMerges: number } diff --git a/report-viewer/src/model/factories/OptionsFactory.ts b/report-viewer/src/model/factories/OptionsFactory.ts index 3cd5a30bd5..0b4241e7b3 100644 --- a/report-viewer/src/model/factories/OptionsFactory.ts +++ b/report-viewer/src/model/factories/OptionsFactory.ts @@ -48,7 +48,8 @@ export class OptionsFactory extends BaseFactory { return { enabled: json['enabled'] as boolean, minNeighborLength: json['min_neighbour_length'] as number, - maxGapSize: json['max_gap_size'] as number + maxGapSize: json['max_gap_size'] as number, + minimumRequiredMerges: json['min_required_merges'] as number } } diff --git a/report-viewer/src/views/InformationView.vue b/report-viewer/src/views/InformationView.vue index a41f91b394..8c6431559d 100644 --- a/report-viewer/src/views/InformationView.vue +++ b/report-viewer/src/views/InformationView.vue @@ -94,6 +94,9 @@ {{ options.mergingOptions.maxGapSize }} }} + {{ options.mergingOptions.minimumRequiredMerges }} }} diff --git a/report-viewer/tests/unit/model/factories/OptionsFactory.test.ts b/report-viewer/tests/unit/model/factories/OptionsFactory.test.ts index 5cbc82bee9..52a39eed27 100644 --- a/report-viewer/tests/unit/model/factories/OptionsFactory.test.ts +++ b/report-viewer/tests/unit/model/factories/OptionsFactory.test.ts @@ -46,7 +46,8 @@ describe('Test JSON to Options', async () => { mergingOptions: { enabled: false, minNeighborLength: 0, - maxGapSize: 0 + maxGapSize: 0, + minimumRequiredMerges: 3 } }) }) diff --git a/report-viewer/tests/unit/model/factories/ValidOptions.json b/report-viewer/tests/unit/model/factories/ValidOptions.json index 1c4fbb595d..dd3afa9f83 100644 --- a/report-viewer/tests/unit/model/factories/ValidOptions.json +++ b/report-viewer/tests/unit/model/factories/ValidOptions.json @@ -33,6 +33,7 @@ "merging": { "enabled": false, "min_neighbour_length": 0, - "max_gap_size": 0 + "max_gap_size": 0, + "min_required_merges": 3 } } \ No newline at end of file