Skip to content

Commit

Permalink
Added code to strip non common tokens
Browse files Browse the repository at this point in the history
  • Loading branch information
TwoOfTwelve committed Jan 15, 2025
1 parent e4025ac commit d1253fd
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 1 deletion.
18 changes: 17 additions & 1 deletion core/src/main/java/de/jplag/JPlag.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import java.util.List;
import java.util.ResourceBundle;
import java.util.Set;
import java.util.stream.Collectors;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand Down Expand Up @@ -70,7 +71,7 @@ public static JPlagResult run(JPlagOptions options) throws ExitException {
ComparisonStrategy comparisonStrategy = new ParallelComparisonStrategy(options, coreAlgorithm);
// Parse and validate submissions.
SubmissionSetBuilder builder = new SubmissionSetBuilder(options);
SubmissionSet submissionSet = builder.buildSubmissionSet();
SubmissionSet submissionSet = JPlag.stripNonCommonTokens(builder.buildSubmissionSet());
if (options.normalize() && options.language().supportsNormalization() && options.language().requiresCoreNormalization()) {
submissionSet.normalizeSubmissions();
}
Expand All @@ -97,6 +98,21 @@ public static JPlagResult run(JPlagOptions options) throws ExitException {
return result;
}

private static SubmissionSet stripNonCommonTokens(SubmissionSet submissionSet) throws ExitException {
List<Submission> submissions = submissionSet.getSubmissions();
Set<Class<?>> tokenContexts = submissions.stream()
.flatMap(it -> it.getTokenList().stream().flatMap(token -> token.getLanguage().getTokenContexts().stream()))
.collect(Collectors.toSet());

List<Submission> strippedSubmissions = submissions.stream().map(sub -> {
Submission copy = sub.copy();
copy.setTokenList(sub.getTokenList().stream().filter(it -> tokenContexts.contains(it.getType().getContext())).toList());
return copy;
}).toList();

return new SubmissionSet(strippedSubmissions, submissionSet.getBaseCode(), submissionSet.getOptions());
}

private static void logSkippedSubmissions(SubmissionSet submissionSet, JPlagOptions options) {
List<Submission> skippedSubmissions = submissionSet.getInvalidSubmissions();
if (!skippedSubmissions.isEmpty()) {
Expand Down
3 changes: 3 additions & 0 deletions core/src/main/java/de/jplag/SubmissionSet.java
Original file line number Diff line number Diff line change
Expand Up @@ -175,4 +175,7 @@ private void parseSubmissions(List<Submission> submissions) throws LanguageExcep
logger.debug("{} parser error{}!", errors, (errors != 1 ? "s" : ""));
}

public JPlagOptions getOptions() {
return options;
}
}

0 comments on commit d1253fd

Please sign in to comment.