Skip to content

Commit

Permalink
Add option and CLI paramter for the minimum merges and adapt tests.
Browse files Browse the repository at this point in the history
  • Loading branch information
tsaglam committed Feb 17, 2025
1 parent 20b86ec commit fa3badc
Show file tree
Hide file tree
Showing 5 changed files with 57 additions and 41 deletions.
2 changes: 1 addition & 1 deletion cli/src/main/java/de/jplag/cli/JPlagOptionsBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,6 @@ private ClusteringOptions getClusteringOptions() {

private MergingOptions getMergingOptions() {
return new MergingOptions(this.cliOptions.merging.enabled, this.cliOptions.merging.minimumNeighborLength,
this.cliOptions.merging.maximumGapSize);
this.cliOptions.merging.maximumGapSize, this.cliOptions.merging.minimumRequiredMerges);
}
}
4 changes: 4 additions & 0 deletions cli/src/main/java/de/jplag/cli/options/CliOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,10 @@ public static class Merging {
"--gap-size"}, description = "Maximal gap between neighboring matches to be merged (between 1 and minTokenMatch, default: ${DEFAULT-VALUE}).")
public int maximumGapSize = MergingOptions.DEFAULT_GAP_SIZE;

@Option(names = {
"--required-merges"}, description = "Minimal required merges for the merging to be applied (between 1 and 50, default: ${DEFAULT-VALUE}).")
public int minimumRequiredMerges = MergingOptions.DEFAULT_REQUIRED_MERGES;

}

@Option(names = {"--cluster-spectral-bandwidth"}, hidden = true)
Expand Down
3 changes: 1 addition & 2 deletions core/src/main/java/de/jplag/merging/MatchMerging.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
*/
public class MatchMerging {
private final JPlagOptions options;
private final static int MINIMUM_REQUIRED_MERGES = 3;
private int numberOfMerges;

/**
Expand Down Expand Up @@ -56,7 +55,7 @@ public JPlagResult mergeMatchesOf(JPlagResult result) {
globalMatches.addAll(comparison.ignoredMatches());
globalMatches = mergeNeighbors(globalMatches, leftSubmission, rightSubmission);
globalMatches = globalMatches.stream().filter(it -> it.length() >= options.minimumTokenMatch()).toList();
if (numberOfMerges >= MINIMUM_REQUIRED_MERGES) {
if (numberOfMerges >= options.mergingOptions().minimumRequiredMerges()) {
comparisonsMerged.add(new JPlagComparison(leftSubmission, rightSubmission, globalMatches, new ArrayList<>()));
} else {
comparisonsMerged.add(comparison);
Expand Down
47 changes: 31 additions & 16 deletions core/src/main/java/de/jplag/merging/MergingOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,44 +8,59 @@
* @param maximumGapSize describes how many tokens can be between to neighboring matches (Defaults to 6).
*/
public record MergingOptions(@JsonProperty("enabled") boolean enabled, @JsonProperty("min_neighbour_length") int minimumNeighborLength,
@JsonProperty("max_gap_size") int maximumGapSize) {
@JsonProperty("max_gap_size") int maximumGapSize, @JsonProperty("min_required_merges") int minimumRequiredMerges) {

public static final boolean DEFAULT_ENABLED = false;
public static final int DEFAULT_NEIGHBOR_LENGTH = 2;
public static final int DEFAULT_GAP_SIZE = 6;
public static final int DEFAULT_REQUIRED_MERGES = 3;

/**
* The default values of MergingOptions are false for the enable-switch, which deactivate MatchMerging, while
* minimumNeighborLength and maximumGapSize default to (2,6), which in testing yielded the best results.
* Creates merging options with default parameters.
* @see MergingOptions#DEFAULT_ENABLED
* @see MergingOptions#DEFAULT_NEIGHBOR_LENGTH
* @see MergingOptions#DEFAULT_GAP_SIZE
* @see MergingOptions#DEFAULT_REQUIRED_MERGES
*/
public MergingOptions() {
this(DEFAULT_ENABLED, DEFAULT_NEIGHBOR_LENGTH, DEFAULT_GAP_SIZE);
this(DEFAULT_ENABLED, DEFAULT_NEIGHBOR_LENGTH, DEFAULT_GAP_SIZE, DEFAULT_REQUIRED_MERGES);
}

/**
* Builder pattern method for setting enabled
* @param enabled containing the new value
* @return MergingOptions with specified enabled
* Builder pattern method for enabling and disabling the subsequence match merging mechanism.
* @param enabled specifying if merging is enabled or not.
* @return the options with the specified configuration.
*/
public MergingOptions withEnabled(boolean enabled) {
return new MergingOptions(enabled, minimumNeighborLength, maximumGapSize);
return new MergingOptions(enabled, minimumNeighborLength, maximumGapSize, minimumRequiredMerges);
}

/**
* Builder pattern method for setting minimumNeighborLength
* @param minimumNeighborLength containing the new value
* @return MergingOptions with specified minimumNeighborLength
* Builder pattern method for setting minimum length (in tokens) for a pair of neighboring matches to be considered for
* merging.
* @param minimumNeighborLength containing the new value.
* @return the options with the specified configuration.
*/
public MergingOptions withMinimumNeighborLength(int minimumNeighborLength) {
return new MergingOptions(enabled, minimumNeighborLength, maximumGapSize);
return new MergingOptions(enabled, minimumNeighborLength, maximumGapSize, minimumRequiredMerges);
}

/**
* Builder pattern method for setting maximumGapSize
* @param maximumGapSize containing the new value
* @return MergingOptions with specified maximumGapSize
* Builder pattern method for setting maximum gap (in tokens) between a pair of matches to be considered for merging.
* @param maximumGapSize containing the new value.
* @return the options with the specified configuration.
*/
public MergingOptions withMaximumGapSize(int maximumGapSize) {
return new MergingOptions(enabled, minimumNeighborLength, maximumGapSize);
return new MergingOptions(enabled, minimumNeighborLength, maximumGapSize, minimumRequiredMerges);
}

/**
* Builder pattern method for setting the minimal number of required merges before subsequence match merging has an
* effect.
* @param minimumRequiredMerges containing the new value.
* @return the options with the specified configuration.
*/
public MergingOptions withMinimumRequiredMerges(int minimumRequiredMerges) {
return new MergingOptions(enabled, minimumNeighborLength, maximumGapSize, minimumRequiredMerges);
}
}
42 changes: 20 additions & 22 deletions core/src/test/java/de/jplag/merging/MergingTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,11 @@ class MergingTest extends TestBase {
private final SubmissionSet submissionSet;
private static final int MINIMUM_NEIGHBOR_LENGTH = 1;
private static final int MAXIMUM_GAP_SIZE = 10;
private static final int MINIMUM_REQUIRED_MERGES = 0;

MergingTest() throws ExitException {
options = getDefaultOptions("merging").withMergingOptions(new MergingOptions(true, MINIMUM_NEIGHBOR_LENGTH, MAXIMUM_GAP_SIZE));
options = getDefaultOptions("merging")
.withMergingOptions(new MergingOptions(true, MINIMUM_NEIGHBOR_LENGTH, MAXIMUM_GAP_SIZE, MINIMUM_REQUIRED_MERGES));

GreedyStringTiling coreAlgorithm = new GreedyStringTiling(options);
comparisonStrategy = new ParallelComparisonStrategy(options, coreAlgorithm);
Expand Down Expand Up @@ -201,31 +203,27 @@ void testCorrectMerges() {
@DisplayName("Sanity check for match merging")
void testSanity() {

List<Match> matchesBefore = new ArrayList<>();
List<Match> matchesAfter = new ArrayList<>();
List<Match> matchesBefore = findComparison(comparisonsBefore, "sanityA.java", "sanityB.java").ignoredMatches();
List<Match> matchesAfter = findComparison(comparisonsAfter, "sanityA.java", "sanityB.java").matches();

for (JPlagComparison comparison : comparisonsBefore) {
if (comparison.toString().equals("sanityA.java <-> sanityB.java")) {
matchesBefore = comparison.ignoredMatches();
}
}
for (JPlagComparison comparison : comparisonsAfter) {
if (comparison.toString().equals("sanityA.java <-> sanityB.java")) {
matchesAfter = comparison.matches();
}
}

List<Match> expectedBefore = new ArrayList<>();
expectedBefore.add(new Match(5, 3, 6));
expectedBefore.add(new Match(11, 12, 6));
expectedBefore.add(new Match(0, 0, 3));
expectedBefore.add(new Match(3, 18, 2));
expectedBefore.add(new Match(17, 20, 2));
List<Match> expectedBefore = List.of( //
new Match(5, 3, 6), //
new Match(11, 12, 6), //
new Match(0, 0, 3), //
new Match(3, 18, 2), //
new Match(17, 20, 2) //
);

List<Match> expectedAfter = new ArrayList<>();
expectedAfter.add(new Match(5, 3, 12));
List<Match> expectedAfter = List.of(new Match(5, 3, 12));

assertEquals(expectedBefore, matchesBefore);

assertEquals(expectedAfter, matchesAfter);
}

private static JPlagComparison findComparison(List<JPlagComparison> comparisons, String firstName, String secondName) {
return comparisons.stream()
.filter(it -> firstName.equals(it.firstSubmission().getName()) && secondName.equals(it.secondSubmission().getName())).findAny()
.orElseThrow();
}
}

0 comments on commit fa3badc

Please sign in to comment.