Skip to content

Commit

Permalink
Merge pull request #2188 from jplag/feature/match-merging-improvement
Browse files Browse the repository at this point in the history
Enforce minimum merges when using match merging.
  • Loading branch information
tsaglam authored Feb 19, 2025
2 parents 2f45711 + f074b37 commit cd83953
Show file tree
Hide file tree
Showing 12 changed files with 100 additions and 43 deletions.
2 changes: 1 addition & 1 deletion cli/src/main/java/de/jplag/cli/JPlagOptionsBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,6 @@ private ClusteringOptions getClusteringOptions() {

private MergingOptions getMergingOptions() {
return new MergingOptions(this.cliOptions.merging.enabled, this.cliOptions.merging.minimumNeighborLength,
this.cliOptions.merging.maximumGapSize);
this.cliOptions.merging.maximumGapSize, this.cliOptions.merging.minimumRequiredMerges);
}
}
4 changes: 4 additions & 0 deletions cli/src/main/java/de/jplag/cli/options/CliOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,10 @@ public static class Merging {
"--gap-size"}, description = "Maximal gap between neighboring matches to be merged (between 1 and minTokenMatch, default: ${DEFAULT-VALUE}).")
public int maximumGapSize = MergingOptions.DEFAULT_GAP_SIZE;

@Option(names = {
"--required-merges"}, description = "Minimal required merges for the merging to be applied (between 1 and 50, default: ${DEFAULT-VALUE}).")
public int minimumRequiredMerges = MergingOptions.DEFAULT_REQUIRED_MERGES;

}

@Option(names = {"--cluster-spectral-bandwidth"}, hidden = true)
Expand Down
21 changes: 21 additions & 0 deletions cli/src/test/java/de/jplag/cli/MergingOptionsTest.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
package de.jplag.cli;

import static de.jplag.cli.test.CliArgument.GAP_SIZE;
import static de.jplag.cli.test.CliArgument.MERGING_ENABLED;
import static de.jplag.cli.test.CliArgument.NEIGHBOR_LENGTH;
import static de.jplag.cli.test.CliArgument.REQUIRED_MERGES;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;

Expand Down Expand Up @@ -27,5 +31,22 @@ void testMergingDefault() throws ExitException, IOException {
assertEquals(MergingOptions.DEFAULT_ENABLED, options.mergingOptions().enabled());
assertEquals(MergingOptions.DEFAULT_NEIGHBOR_LENGTH, options.mergingOptions().minimumNeighborLength());
assertEquals(MergingOptions.DEFAULT_GAP_SIZE, options.mergingOptions().maximumGapSize());
assertEquals(MergingOptions.DEFAULT_REQUIRED_MERGES, options.mergingOptions().minimumRequiredMerges());
}

@Test
@DisplayName("Test if custom values are correctly propagated when creating merging options from CLI")
void testMergingCustom() throws ExitException, IOException {
int customNumber = 7;

JPlagOptions options = runCliForOptions(args -> args.with(MERGING_ENABLED).with(NEIGHBOR_LENGTH, customNumber).with(GAP_SIZE, customNumber)
.with(REQUIRED_MERGES, customNumber));

assertNotNull(options.mergingOptions());
assertEquals(true, options.mergingOptions().enabled());
assertEquals(customNumber, options.mergingOptions().minimumNeighborLength());
assertEquals(customNumber, options.mergingOptions().maximumGapSize());
assertEquals(customNumber, options.mergingOptions().minimumRequiredMerges());
}

}
5 changes: 5 additions & 0 deletions cli/src/test/java/de/jplag/cli/test/CliArgument.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,9 @@ public record CliArgument<T>(String name, boolean isPositional) {
public static CliArgument<String> EXCLUDE_FILES = new CliArgument<>("x", false);

public static CliArgument<String> MODE = new CliArgument<>("mode", false);

public static CliArgument<Boolean> MERGING_ENABLED = new CliArgument<>("match-merging", false);
public static CliArgument<Integer> NEIGHBOR_LENGTH = new CliArgument<>("neighbor-length", false);
public static CliArgument<Integer> GAP_SIZE = new CliArgument<>("gap-size", false);
public static CliArgument<Integer> REQUIRED_MERGES = new CliArgument<>("required-merges", false);
}
9 changes: 8 additions & 1 deletion core/src/main/java/de/jplag/merging/MatchMerging.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
*/
public class MatchMerging {
private final JPlagOptions options;
private int numberOfMerges;

/**
* Instantiates the match merging algorithm for a comparison result and a set of specific options.
Expand All @@ -47,13 +48,18 @@ public JPlagResult mergeMatchesOf(JPlagResult result) {
List<JPlagComparison> comparisonsMerged = new ArrayList<>();

ProgressBarLogger.iterate(ProgressBarType.MATCH_MERGING, comparisons, comparison -> {
numberOfMerges = 0;
Submission leftSubmission = comparison.firstSubmission().copy();
Submission rightSubmission = comparison.secondSubmission().copy();
List<Match> globalMatches = new ArrayList<>(comparison.matches());
globalMatches.addAll(comparison.ignoredMatches());
globalMatches = mergeNeighbors(globalMatches, leftSubmission, rightSubmission);
globalMatches = globalMatches.stream().filter(it -> it.length() >= options.minimumTokenMatch()).toList();
comparisonsMerged.add(new JPlagComparison(leftSubmission, rightSubmission, globalMatches, new ArrayList<>()));
if (numberOfMerges >= options.mergingOptions().minimumRequiredMerges()) {
comparisonsMerged.add(new JPlagComparison(leftSubmission, rightSubmission, globalMatches, new ArrayList<>()));
} else {
comparisonsMerged.add(comparison);
}
});

long durationInMillis = System.currentTimeMillis() - timeBeforeStartInMillis;
Expand Down Expand Up @@ -111,6 +117,7 @@ private List<Match> mergeNeighbors(List<Match> globalMatches, Submission leftSub
globalMatches = removeToken(globalMatches, leftSubmission, rightSubmission, upperNeighbor, tokenBetweenLeft, tokensBetweenRight);
neighbors = computeNeighbors(globalMatches);
i = 0;
numberOfMerges++;
} else {
i++;
}
Expand Down
47 changes: 31 additions & 16 deletions core/src/main/java/de/jplag/merging/MergingOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,44 +8,59 @@
* @param maximumGapSize describes how many tokens can be between to neighboring matches (Defaults to 6).
*/
public record MergingOptions(@JsonProperty("enabled") boolean enabled, @JsonProperty("min_neighbour_length") int minimumNeighborLength,
@JsonProperty("max_gap_size") int maximumGapSize) {
@JsonProperty("max_gap_size") int maximumGapSize, @JsonProperty("min_required_merges") int minimumRequiredMerges) {

public static final boolean DEFAULT_ENABLED = false;
public static final int DEFAULT_NEIGHBOR_LENGTH = 2;
public static final int DEFAULT_GAP_SIZE = 6;
public static final int DEFAULT_REQUIRED_MERGES = 6;

/**
* The default values of MergingOptions are false for the enable-switch, which deactivate MatchMerging, while
* minimumNeighborLength and maximumGapSize default to (2,6), which in testing yielded the best results.
* Creates merging options with default parameters.
* @see MergingOptions#DEFAULT_ENABLED
* @see MergingOptions#DEFAULT_NEIGHBOR_LENGTH
* @see MergingOptions#DEFAULT_GAP_SIZE
* @see MergingOptions#DEFAULT_REQUIRED_MERGES
*/
public MergingOptions() {
this(DEFAULT_ENABLED, DEFAULT_NEIGHBOR_LENGTH, DEFAULT_GAP_SIZE);
this(DEFAULT_ENABLED, DEFAULT_NEIGHBOR_LENGTH, DEFAULT_GAP_SIZE, DEFAULT_REQUIRED_MERGES);
}

/**
* Builder pattern method for setting enabled
* @param enabled containing the new value
* @return MergingOptions with specified enabled
* Builder pattern method for enabling and disabling the subsequence match merging mechanism.
* @param enabled specifying if merging is enabled or not.
* @return the options with the specified configuration.
*/
public MergingOptions withEnabled(boolean enabled) {
return new MergingOptions(enabled, minimumNeighborLength, maximumGapSize);
return new MergingOptions(enabled, minimumNeighborLength, maximumGapSize, minimumRequiredMerges);
}

/**
* Builder pattern method for setting minimumNeighborLength
* @param minimumNeighborLength containing the new value
* @return MergingOptions with specified minimumNeighborLength
* Builder pattern method for setting minimum length (in tokens) for a pair of neighboring matches to be considered for
* merging.
* @param minimumNeighborLength containing the new value.
* @return the options with the specified configuration.
*/
public MergingOptions withMinimumNeighborLength(int minimumNeighborLength) {
return new MergingOptions(enabled, minimumNeighborLength, maximumGapSize);
return new MergingOptions(enabled, minimumNeighborLength, maximumGapSize, minimumRequiredMerges);
}

/**
* Builder pattern method for setting maximumGapSize
* @param maximumGapSize containing the new value
* @return MergingOptions with specified maximumGapSize
* Builder pattern method for setting maximum gap (in tokens) between a pair of matches to be considered for merging.
* @param maximumGapSize containing the new value.
* @return the options with the specified configuration.
*/
public MergingOptions withMaximumGapSize(int maximumGapSize) {
return new MergingOptions(enabled, minimumNeighborLength, maximumGapSize);
return new MergingOptions(enabled, minimumNeighborLength, maximumGapSize, minimumRequiredMerges);
}

/**
* Builder pattern method for setting the minimal number of required merges before subsequence match merging has an
* effect.
* @param minimumRequiredMerges containing the new value.
* @return the options with the specified configuration.
*/
public MergingOptions withMinimumRequiredMerges(int minimumRequiredMerges) {
return new MergingOptions(enabled, minimumNeighborLength, maximumGapSize, minimumRequiredMerges);
}
}
42 changes: 20 additions & 22 deletions core/src/test/java/de/jplag/merging/MergingTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,11 @@ class MergingTest extends TestBase {
private final SubmissionSet submissionSet;
private static final int MINIMUM_NEIGHBOR_LENGTH = 1;
private static final int MAXIMUM_GAP_SIZE = 10;
private static final int MINIMUM_REQUIRED_MERGES = 0;

MergingTest() throws ExitException {
options = getDefaultOptions("merging").withMergingOptions(new MergingOptions(true, MINIMUM_NEIGHBOR_LENGTH, MAXIMUM_GAP_SIZE));
options = getDefaultOptions("merging")
.withMergingOptions(new MergingOptions(true, MINIMUM_NEIGHBOR_LENGTH, MAXIMUM_GAP_SIZE, MINIMUM_REQUIRED_MERGES));

GreedyStringTiling coreAlgorithm = new GreedyStringTiling(options);
comparisonStrategy = new ParallelComparisonStrategy(options, coreAlgorithm);
Expand Down Expand Up @@ -201,31 +203,27 @@ void testCorrectMerges() {
@DisplayName("Sanity check for match merging")
void testSanity() {

List<Match> matchesBefore = new ArrayList<>();
List<Match> matchesAfter = new ArrayList<>();
List<Match> matchesBefore = findComparison(comparisonsBefore, "sanityA.java", "sanityB.java").ignoredMatches();
List<Match> matchesAfter = findComparison(comparisonsAfter, "sanityA.java", "sanityB.java").matches();

for (JPlagComparison comparison : comparisonsBefore) {
if (comparison.toString().equals("sanityA.java <-> sanityB.java")) {
matchesBefore = comparison.ignoredMatches();
}
}
for (JPlagComparison comparison : comparisonsAfter) {
if (comparison.toString().equals("sanityA.java <-> sanityB.java")) {
matchesAfter = comparison.matches();
}
}

List<Match> expectedBefore = new ArrayList<>();
expectedBefore.add(new Match(5, 3, 6));
expectedBefore.add(new Match(11, 12, 6));
expectedBefore.add(new Match(0, 0, 3));
expectedBefore.add(new Match(3, 18, 2));
expectedBefore.add(new Match(17, 20, 2));
List<Match> expectedBefore = List.of( //
new Match(5, 3, 6), //
new Match(11, 12, 6), //
new Match(0, 0, 3), //
new Match(3, 18, 2), //
new Match(17, 20, 2) //
);

List<Match> expectedAfter = new ArrayList<>();
expectedAfter.add(new Match(5, 3, 12));
List<Match> expectedAfter = List.of(new Match(5, 3, 12));

assertEquals(expectedBefore, matchesBefore);

assertEquals(expectedAfter, matchesAfter);
}

private static JPlagComparison findComparison(List<JPlagComparison> comparisons, String firstName, String secondName) {
return comparisons.stream()
.filter(it -> firstName.equals(it.firstSubmission().getName()) && secondName.equals(it.secondSubmission().getName())).findAny()
.orElseThrow();
}
}
1 change: 1 addition & 0 deletions report-viewer/src/model/CliOptions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,5 @@ export interface CliMergingOptions {
enabled: boolean
minNeighborLength: number
maxGapSize: number
minimumRequiredMerges: number
}
3 changes: 2 additions & 1 deletion report-viewer/src/model/factories/OptionsFactory.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@ export class OptionsFactory extends BaseFactory {
return {
enabled: json['enabled'] as boolean,
minNeighborLength: json['min_neighbour_length'] as number,
maxGapSize: json['max_gap_size'] as number
maxGapSize: json['max_gap_size'] as number,
minimumRequiredMerges: json['min_required_merges'] as number
}
}

Expand Down
3 changes: 3 additions & 0 deletions report-viewer/src/views/InformationView.vue
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,9 @@
<TextInformation label="Max Gap Size"
>{{ options.mergingOptions.maxGapSize }} }}</TextInformation
>
<TextInformation label="Min Required Merges"
>{{ options.mergingOptions.minimumRequiredMerges }} }}</TextInformation
>
</div>
</div>
</ScrollableComponent>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ describe('Test JSON to Options', async () => {
mergingOptions: {
enabled: false,
minNeighborLength: 0,
maxGapSize: 0
maxGapSize: 0,
minimumRequiredMerges: 3
}
})
})
Expand Down
3 changes: 2 additions & 1 deletion report-viewer/tests/unit/model/factories/ValidOptions.json
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
"merging": {
"enabled": false,
"min_neighbour_length": 0,
"max_gap_size": 0
"max_gap_size": 0,
"min_required_merges": 3
}
}

0 comments on commit cd83953

Please sign in to comment.