Skip to content

Commit

Permalink
Improve performance of the neighbor calculation and match filtering d…
Browse files Browse the repository at this point in the history
…uring match merging.
  • Loading branch information
tsaglam committed Dec 15, 2023
1 parent ca9dd7c commit 16ccc33
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 27 deletions.
6 changes: 2 additions & 4 deletions core/src/main/java/de/jplag/Match.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,8 @@ public boolean overlaps(Match other) {
if ((other.startOfFirst - startOfFirst) < length) {
return true;
}
} else {
if ((startOfFirst - other.startOfFirst) < other.length) {
return true;
}
} else if ((startOfFirst - other.startOfFirst) < other.length) {
return true;
}

if (startOfSecond < other.startOfSecond) {
Expand Down
47 changes: 24 additions & 23 deletions core/src/main/java/de/jplag/merging/MatchMerging.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import de.jplag.JPlagComparison;
import de.jplag.JPlagResult;
Expand Down Expand Up @@ -49,7 +51,8 @@ public JPlagResult mergeMatchesOf(JPlagResult result) {
Submission rightSubmission = comparison.secondSubmission().copy();
List<Match> globalMatches = new ArrayList<>(comparison.matches());
globalMatches.addAll(comparison.ignoredMatches());
globalMatches = removeTooShortMatches(mergeNeighbors(globalMatches, leftSubmission, rightSubmission));
globalMatches = mergeNeighbors(globalMatches, leftSubmission, rightSubmission);
globalMatches = globalMatches.stream().filter(it -> it.length() >= options.minimumTokenMatch()).toList();
comparisonsMerged.add(new JPlagComparison(leftSubmission, rightSubmission, globalMatches, new ArrayList<>()));
}

Expand All @@ -65,15 +68,28 @@ public JPlagResult mergeMatchesOf(JPlagResult result) {
*/
private List<Neighbor> computeNeighbors(List<Match> globalMatches) {
List<Neighbor> neighbors = new ArrayList<>();
List<Match> sortedByLeft = new ArrayList<>(globalMatches);
Collections.sort(sortedByLeft, (match1, match2) -> match1.startOfFirst() - match2.startOfFirst());
List<Match> sortedByRight = new ArrayList<>(globalMatches);
Collections.sort(sortedByRight, (match1, match2) -> match1.startOfSecond() - match2.startOfSecond());
for (int i = 0; i < sortedByLeft.size() - 1; i++) {
if (sortedByRight.indexOf(sortedByLeft.get(i)) == (sortedByRight.indexOf(sortedByLeft.get(i + 1)) - 1)) {
neighbors.add(new Neighbor(sortedByLeft.get(i), sortedByLeft.get(i + 1)));

Map<Integer, List<Match>> matchesByLeft = new HashMap<>();
Map<Integer, List<Match>> matchesByRight = new HashMap<>();

// Group matches by their left and right positions
for (Match match : globalMatches) {
matchesByLeft.computeIfAbsent(match.startOfFirst(), key -> new ArrayList<>()).add(match);
matchesByRight.computeIfAbsent(match.startOfSecond(), key -> new ArrayList<>()).add(match);
}

// Iterate through the matches and find neighbors
for (List<Match> matches : matchesByLeft.values()) {
for (Match match : matches) {
List<Match> rightMatches = matchesByRight.getOrDefault(match.startOfSecond(), Collections.emptyList());
for (Match rightMatch : rightMatches) {
if (rightMatch != match) {
neighbors.add(new Neighbor(match, rightMatch));
}
}
}
}

return neighbors;
}

Expand Down Expand Up @@ -185,19 +201,4 @@ private List<Match> removeToken(List<Match> globalMatches, Submission leftSubmis

return shiftedMatches;
}

/**
* This method marks the end of the merging pipeline and removes the remaining too short matches from
* @param globalMatches
*/
private List<Match> removeTooShortMatches(List<Match> globalMatches) {
List<Match> toRemove = new ArrayList<>();
for (Match match : globalMatches) {
if (match.length() < options.minimumTokenMatch()) {
toRemove.add(match);
}
}
globalMatches.removeAll(toRemove);
return globalMatches;
}
}

0 comments on commit 16ccc33

Please sign in to comment.