From 25257054258ef1bd0f56e2bef328e6c43de098eb Mon Sep 17 00:00:00 2001 From: Alexander Milster Date: Wed, 29 Jan 2025 20:39:15 +0100 Subject: [PATCH 01/10] Added automatic mode selection --- cli/src/main/java/de/jplag/cli/CLI.java | 29 +++++++++++++++++++ .../java/de/jplag/cli/options/CliOptions.java | 5 ++-- .../java/de/jplag/cli/options/JPlagMode.java | 6 +++- 3 files changed, 37 insertions(+), 3 deletions(-) diff --git a/cli/src/main/java/de/jplag/cli/CLI.java b/cli/src/main/java/de/jplag/cli/CLI.java index 263a9021ed..a0b462f372 100644 --- a/cli/src/main/java/de/jplag/cli/CLI.java +++ b/cli/src/main/java/de/jplag/cli/CLI.java @@ -3,6 +3,8 @@ import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; import org.slf4j.ILoggerFactory; import org.slf4j.Logger; @@ -32,6 +34,8 @@ public final class CLI { private static final String OUTPUT_FILE_EXISTS = "The output file (also with suffixes e.g. results(1).zip) already exists. You can use --overwrite to overwrite the file."; private static final String OUTPUT_FILE_NOT_WRITABLE = "The output file (%s) cannot be written to."; + private static final String ZIP_FILE_ENDING = ".zip"; + private final CliInputHandler inputHandler; /** @@ -59,6 +63,7 @@ public void executeCli() throws ExitException, IOException { case RUN -> runJPlag(); case VIEW -> runViewer(null); case RUN_AND_VIEW -> runViewer(runJPlag()); + case AUTO -> selectModeAutomatically(); } } } @@ -115,6 +120,30 @@ public void runViewer(File zipFile) throws IOException { JPlagRunner.runInternalServer(zipFile, this.inputHandler.getCliOptions().advanced.port); } + private void selectModeAutomatically() throws IOException, ExitException { + List inputs = this.getAllInputs(); + + if (inputs.isEmpty()) { + this.runViewer(null); + return; + } + + if (inputs.size() == 1 && inputs.getFirst().getName().endsWith(ZIP_FILE_ENDING)) { + this.runViewer(inputs.getFirst()); + return; + } + + this.runViewer(this.runJPlag()); + } + + private List getAllInputs() { + List inputs = new ArrayList<>(); + inputs.addAll(List.of(this.inputHandler.getCliOptions().newDirectories)); + inputs.addAll(List.of(this.inputHandler.getCliOptions().oldDirectories)); + inputs.addAll(List.of(this.inputHandler.getCliOptions().rootDirectory)); + return inputs; + } + private void finalizeLogger() { ILoggerFactory factory = LoggerFactory.getILoggerFactory(); if (!(factory instanceof CollectedLoggerFactory collectedLoggerFactory)) { diff --git a/cli/src/main/java/de/jplag/cli/options/CliOptions.java b/cli/src/main/java/de/jplag/cli/options/CliOptions.java index 748a8f6c8b..7a363cd7df 100644 --- a/cli/src/main/java/de/jplag/cli/options/CliOptions.java +++ b/cli/src/main/java/de/jplag/cli/options/CliOptions.java @@ -53,8 +53,9 @@ public class CliOptions implements Runnable { "--result-file"}, description = "Name of the file in which the comparison results will be stored (default: ${DEFAULT-VALUE}). Missing .zip endings will be automatically added.") public String resultFile = "results"; - @Option(names = {"-M", "--mode"}, description = "The mode of JPlag. One of: ${COMPLETION-CANDIDATES} (default: ${DEFAULT_VALUE})") - public JPlagMode mode = JPlagMode.RUN_AND_VIEW; + @Option(names = {"-M", + "--mode"}, description = "The mode of JPlag. By default JPlag will automatically select the mode based on your input files. If none are selected the viewer will open on the file select screen. If a single result zip is selected it will be opened in the viewer directly. Otherwise JPlag will run on the submissions in the input files and show the result in the viewer. One of: ${COMPLETION-CANDIDATES} (default: ${DEFAULT_VALUE})") + public JPlagMode mode = JPlagMode.AUTO; @Option(names = {"--normalize"}, description = "Activate the normalization of tokens. Supported for languages: Java, C++.") public boolean normalize = false; diff --git a/cli/src/main/java/de/jplag/cli/options/JPlagMode.java b/cli/src/main/java/de/jplag/cli/options/JPlagMode.java index 8d1607d46b..c9fe1f9918 100644 --- a/cli/src/main/java/de/jplag/cli/options/JPlagMode.java +++ b/cli/src/main/java/de/jplag/cli/options/JPlagMode.java @@ -15,5 +15,9 @@ public enum JPlagMode { /** * Run JPlag and open the result in report viewer */ - RUN_AND_VIEW + RUN_AND_VIEW, + /** + * Choose the mode automatically from the given input files + */ + AUTO, } From be5511fc73c22c341a39427ea40f5d94cd0f48f3 Mon Sep 17 00:00:00 2001 From: TwoOfTwelve Date: Thu, 30 Jan 2025 13:36:37 +0100 Subject: [PATCH 02/10] Update cli/src/main/java/de/jplag/cli/options/CliOptions.java Co-authored-by: Alex | Kronox <39801116+Kr0nox@users.noreply.github.com> --- cli/src/main/java/de/jplag/cli/options/CliOptions.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/src/main/java/de/jplag/cli/options/CliOptions.java b/cli/src/main/java/de/jplag/cli/options/CliOptions.java index 7a363cd7df..43e5dc0e9b 100644 --- a/cli/src/main/java/de/jplag/cli/options/CliOptions.java +++ b/cli/src/main/java/de/jplag/cli/options/CliOptions.java @@ -54,7 +54,7 @@ public class CliOptions implements Runnable { public String resultFile = "results"; @Option(names = {"-M", - "--mode"}, description = "The mode of JPlag. By default JPlag will automatically select the mode based on your input files. If none are selected the viewer will open on the file select screen. If a single result zip is selected it will be opened in the viewer directly. Otherwise JPlag will run on the submissions in the input files and show the result in the viewer. One of: ${COMPLETION-CANDIDATES} (default: ${DEFAULT_VALUE})") + "--mode"}, description = "The mode of JPlag. By default JPlag will automatically select the mode based on the given input files. If none are given the report viewer will open on the file upload page. If a single result zip is given it will be opened in the report viewer directly. Otherwise JPlag will run on the submissions with the input files and show the result in the report viewer. One of: ${COMPLETION-CANDIDATES} (default: ${DEFAULT_VALUE})") public JPlagMode mode = JPlagMode.AUTO; @Option(names = {"--normalize"}, description = "Activate the normalization of tokens. Supported for languages: Java, C++.") From 9592d664950311360e234ff99c9a125897076c81 Mon Sep 17 00:00:00 2001 From: TwoOfTwelve Date: Thu, 30 Jan 2025 13:38:07 +0100 Subject: [PATCH 03/10] Update CliOptions.java --- cli/src/main/java/de/jplag/cli/options/CliOptions.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/src/main/java/de/jplag/cli/options/CliOptions.java b/cli/src/main/java/de/jplag/cli/options/CliOptions.java index 43e5dc0e9b..0b42e23612 100644 --- a/cli/src/main/java/de/jplag/cli/options/CliOptions.java +++ b/cli/src/main/java/de/jplag/cli/options/CliOptions.java @@ -54,7 +54,7 @@ public class CliOptions implements Runnable { public String resultFile = "results"; @Option(names = {"-M", - "--mode"}, description = "The mode of JPlag. By default JPlag will automatically select the mode based on the given input files. If none are given the report viewer will open on the file upload page. If a single result zip is given it will be opened in the report viewer directly. Otherwise JPlag will run on the submissions with the input files and show the result in the report viewer. One of: ${COMPLETION-CANDIDATES} (default: ${DEFAULT_VALUE})") + "--mode"}, description = "The mode of JPlag. By default JPlag will automatically select the mode based on the given input files. If none are given the report viewer will open on the file upload page. If a single result zip is given it will be opened in the report viewer directly. Otherwise, JPlag will check the given submissions and show the result in the report viewer. One of: ${COMPLETION-CANDIDATES} (default: ${DEFAULT_VALUE})") public JPlagMode mode = JPlagMode.AUTO; @Option(names = {"--normalize"}, description = "Activate the normalization of tokens. Supported for languages: Java, C++.") From ea69c5b4ea8ea25412476a1f37dd0e144a6633f1 Mon Sep 17 00:00:00 2001 From: Alexander Milster Date: Wed, 5 Feb 2025 12:54:32 +0100 Subject: [PATCH 04/10] Improved code style --- cli/src/main/java/de/jplag/cli/CLI.java | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/cli/src/main/java/de/jplag/cli/CLI.java b/cli/src/main/java/de/jplag/cli/CLI.java index a0b462f372..69d8e52fdd 100644 --- a/cli/src/main/java/de/jplag/cli/CLI.java +++ b/cli/src/main/java/de/jplag/cli/CLI.java @@ -62,7 +62,7 @@ public void executeCli() throws ExitException, IOException { switch (this.inputHandler.getCliOptions().mode) { case RUN -> runJPlag(); case VIEW -> runViewer(null); - case RUN_AND_VIEW -> runViewer(runJPlag()); + case RUN_AND_VIEW -> runAndView(); case AUTO -> selectModeAutomatically(); } } @@ -110,6 +110,15 @@ public File runJPlag() throws ExitException, FileNotFoundException { return target; } + /** + * Runs JPlag and shows the result in the report viewer + * @throws IOException If something went wrong with the internal server + * @throws ExitException If JPlag threw an exception + */ + public void runAndView() throws IOException, ExitException { + runViewer(runJPlag()); + } + /** * Runs the report viewer using the given file as the default result.zip. * @param zipFile The zip file to pass to the viewer. Can be null, if no result should be opened by default @@ -133,14 +142,14 @@ private void selectModeAutomatically() throws IOException, ExitException { return; } - this.runViewer(this.runJPlag()); + this.runAndView(); } private List getAllInputs() { List inputs = new ArrayList<>(); + inputs.addAll(List.of(this.inputHandler.getCliOptions().rootDirectory)); inputs.addAll(List.of(this.inputHandler.getCliOptions().newDirectories)); inputs.addAll(List.of(this.inputHandler.getCliOptions().oldDirectories)); - inputs.addAll(List.of(this.inputHandler.getCliOptions().rootDirectory)); return inputs; } From b94f334f99491c2e208a2514de9a272e3113eab6 Mon Sep 17 00:00:00 2001 From: Alexander Milster Date: Wed, 5 Feb 2025 13:01:25 +0100 Subject: [PATCH 05/10] Updated documentation --- README.md | 93 +++++++++++++++++++++++----------- docs/1.-How-to-Use-JPlag.md | 99 +++++++++++++++++++++++++------------ 2 files changed, 131 insertions(+), 61 deletions(-) diff --git a/README.md b/README.md index d819572ea4..2878029151 100644 --- a/README.md +++ b/README.md @@ -85,52 +85,84 @@ Parameter descriptions: [root-dirs[,root-dirs...]...] Root-directory with submissions to check for plagiarism. -bc, --bc, --base-code= - Path to the base code directory (common framework used in all submissions). - -l, --language= - Select the language of the submissions (default: java). See subcommands below. - -M, --mode=<{RUN, VIEW, RUN_AND_VIEW}> - The mode of JPlag: either only run analysis, only open the viewer, or do both (default: null) - -n, --shown-comparisons= - The maximum number of comparisons that will be shown in the generated report, if set to -1 all comparisons will be shown (default: 500) + Path to the base code directory (common framework used + in all submissions). + -l, --language= + Select the language of the submissions (default: java). + See subcommands below. + -M, --mode=<{RUN, VIEW, RUN_AND_VIEW, AUTO}> + The mode of JPlag. By default JPlag will automatically + select the mode based on the given input files. If + none are given the report viewer will open on the + file upload page. If a single result zip is given it + will be opened in the report viewer directly. + Otherwise, JPlag will check the given submissions and + show the result in the report viewer. One of: RUN, + VIEW, RUN_AND_VIEW, AUTO (default: null) + -n, --shown-comparisons= + The maximum number of comparisons that will be shown in + the generated report, if set to -1 all comparisons + will be shown (default: 2500) -new, --new=[,...] - Root-directories with submissions to check for plagiarism (same as root). - --normalize Activate the normalization of tokens. Supported for languages: Java, C++. + Root-directories with submissions to check for + plagiarism (same as root). + --normalize Activate the normalization of tokens. Supported for + languages: Java, C++. -old, --old=[,...] - Root-directories with prior submissions to compare against. - -r, --result-file= - Name of the file in which the comparison results will be stored (default: results). Missing .zip endings will be automatically added. - -t, --min-tokens= - Tunes the comparison sensitivity by adjusting the minimum token required to be counted as a matching section. A smaller value increases the sensitivity but might lead to more - false-positives. + Root-directories with prior submissions to compare + against. + -r, --result-file= + Name of the file in which the comparison results will + be stored (default: results). Missing .zip endings + will be automatically added. + -t, --min-tokens= + Tunes the comparison sensitivity by adjusting the + minimum token required to be counted as a matching + section. A smaller value increases the sensitivity + but might lead to more false-positives. Advanced --csv-export Export pairwise similarity values as a CSV file. - -d, --debug Store on-parsable files in error folder. - -m, --similarity-threshold= - Comparison similarity threshold [0.0-1.0]: All comparisons above this threshold will be saved (default: 0.0). + -d, --debug Store on-parsable files in error folder. + --log-level=<{ERROR, WARN, INFO, DEBUG, TRACE}> + Set the log level for the cli. + -m, --similarity-threshold= + Comparison similarity threshold [0.0-1.0]: All + comparisons above this threshold will be saved + (default: 0.0). --overwrite Existing result files will be overwritten. - -p, --suffixes=[,...] - comma-separated list of all filename suffixes that are included. - -P, --port= The port used for the internal report viewer (default: 1996). - -s, --subdirectory= + -p, --suffixes=[,...] + comma-separated list of all filename suffixes that are + included. + -P, --port= The port used for the internal report viewer (default: + 1996). + -s, --subdirectory= Look in directories /*/ for programs. - -x, --exclusion-file= - All files named in this file will be ignored in the comparison (line-separated list). + -x, --exclusion-file= + All files named in this file will be ignored in the + comparison (line-separated list). Clustering --cluster-alg, --cluster-algorithm=<{AGGLOMERATIVE, SPECTRAL}> - Specifies the clustering algorithm (default: spectral). + Specifies the clustering algorithm. Available + algorithms: agglomerative, spectral (default: + spectral). --cluster-metric=<{AVG, MIN, MAX, INTERSECTION}> - The similarity metric used for clustering (default: average similarity). + The similarity metric used for clustering. Available + metrics: average similarity, minimum similarity, + maximal similarity, matched tokens (default: average + similarity). --cluster-skip Skips the cluster calculation. Subsequence Match Merging --gap-size= - Maximal gap between neighboring matches to be merged (between 1 and minTokenMatch, default: 6). - --match-merging Enables merging of neighboring matches to counteract obfuscation attempts. + Maximal gap between neighboring matches to be merged + (between 1 and minTokenMatch, default: 6). + --match-merging Enables merging of neighboring matches to counteract + obfuscation attempts. --neighbor-length= - Minimal length of neighboring matches to be merged (between 1 and minTokenMatch, default: 2). - + Minimal length of neighboring matches to be merged + (between 1 and minTokenMatch, default: 2). Languages: c cpp @@ -142,6 +174,7 @@ Languages: javascript kotlin llvmir + multi python3 rlang rust diff --git a/docs/1.-How-to-Use-JPlag.md b/docs/1.-How-to-Use-JPlag.md index c940816d7e..a0ba5ee38c 100644 --- a/docs/1.-How-to-Use-JPlag.md +++ b/docs/1.-How-to-Use-JPlag.md @@ -9,59 +9,95 @@ The language can either be set with the -l parameter or as a subcommand. If both When using the subcommand, language-specific arguments can be set. A list of language-specific options can be obtained by requesting the help page of a subcommand (e.g., "jplag java -h"). +To open an existing report run: `java -jar jplag.jar ` + +To open the report viewer without any file selected run: `java -jar jplag.jar` + The following arguments can be used to control JPlag: ``` Parameter descriptions: [root-dirs[,root-dirs...]...] Root-directory with submissions to check for plagiarism. -bc, --bc, --base-code= - Path to the base code directory (common framework used in all submissions). - -l, --language= - Select the language of the submissions (default: java). See subcommands below. - -M, --mode=<{RUN, VIEW, RUN_AND_VIEW}> - The mode of JPlag: either only run analysis, only open the viewer, or do both (default: null) - -n, --shown-comparisons= - The maximum number of comparisons that will be shown in the generated report, if set to -1 all comparisons will be shown (default: 500) + Path to the base code directory (common framework used + in all submissions). + -l, --language= + Select the language of the submissions (default: java). + See subcommands below. + -M, --mode=<{RUN, VIEW, RUN_AND_VIEW, AUTO}> + The mode of JPlag. By default JPlag will automatically + select the mode based on the given input files. If + none are given the report viewer will open on the + file upload page. If a single result zip is given it + will be opened in the report viewer directly. + Otherwise, JPlag will check the given submissions and + show the result in the report viewer. One of: RUN, + VIEW, RUN_AND_VIEW, AUTO (default: null) + -n, --shown-comparisons= + The maximum number of comparisons that will be shown in + the generated report, if set to -1 all comparisons + will be shown (default: 2500) -new, --new=[,...] - Root-directories with submissions to check for plagiarism (same as root). - --normalize Activate the normalization of tokens. Supported for languages: Java, C++. + Root-directories with submissions to check for + plagiarism (same as root). + --normalize Activate the normalization of tokens. Supported for + languages: Java, C++. -old, --old=[,...] - Root-directories with prior submissions to compare against. - -r, --result-file= - Name of the file in which the comparison results will be stored (default: results). Missing .zip endings will be automatically added. - -t, --min-tokens= - Tunes the comparison sensitivity by adjusting the minimum token required to be counted as a matching section. A smaller value increases the sensitivity but might lead to more - false-positives. + Root-directories with prior submissions to compare + against. + -r, --result-file= + Name of the file in which the comparison results will + be stored (default: results). Missing .zip endings + will be automatically added. + -t, --min-tokens= + Tunes the comparison sensitivity by adjusting the + minimum token required to be counted as a matching + section. A smaller value increases the sensitivity + but might lead to more false-positives. Advanced --csv-export Export pairwise similarity values as a CSV file. - -d, --debug Store on-parsable files in error folder. - -m, --similarity-threshold= - Comparison similarity threshold [0.0-1.0]: All comparisons above this threshold will be saved (default: 0.0). + -d, --debug Store on-parsable files in error folder. + --log-level=<{ERROR, WARN, INFO, DEBUG, TRACE}> + Set the log level for the cli. + -m, --similarity-threshold= + Comparison similarity threshold [0.0-1.0]: All + comparisons above this threshold will be saved + (default: 0.0). --overwrite Existing result files will be overwritten. - -p, --suffixes=[,...] - comma-separated list of all filename suffixes that are included. - -P, --port= The port used for the internal report viewer (default: 1996). - -s, --subdirectory= + -p, --suffixes=[,...] + comma-separated list of all filename suffixes that are + included. + -P, --port= The port used for the internal report viewer (default: + 1996). + -s, --subdirectory= Look in directories /*/ for programs. - -x, --exclusion-file= - All files named in this file will be ignored in the comparison (line-separated list). + -x, --exclusion-file= + All files named in this file will be ignored in the + comparison (line-separated list). Clustering --cluster-alg, --cluster-algorithm=<{AGGLOMERATIVE, SPECTRAL}> - Specifies the clustering algorithm (default: spectral). + Specifies the clustering algorithm. Available + algorithms: agglomerative, spectral (default: + spectral). --cluster-metric=<{AVG, MIN, MAX, INTERSECTION}> - The similarity metric used for clustering (default: average similarity). + The similarity metric used for clustering. Available + metrics: average similarity, minimum similarity, + maximal similarity, matched tokens (default: average + similarity). --cluster-skip Skips the cluster calculation. Subsequence Match Merging --gap-size= - Maximal gap between neighboring matches to be merged (between 1 and minTokenMatch, default: 6). - --match-merging Enables merging of neighboring matches to counteract obfuscation attempts. + Maximal gap between neighboring matches to be merged + (between 1 and minTokenMatch, default: 6). + --match-merging Enables merging of neighboring matches to counteract + obfuscation attempts. --neighbor-length= - Minimal length of neighboring matches to be merged (between 1 and minTokenMatch, default: 2). - -Subcommands (supported languages): + Minimal length of neighboring matches to be merged + (between 1 and minTokenMatch, default: 2). +Languages: c cpp csharp @@ -72,6 +108,7 @@ Subcommands (supported languages): javascript kotlin llvmir + multi python3 rlang rust From 8ed5b548775e505110b03802abe82c925ba02443 Mon Sep 17 00:00:00 2001 From: Alexander Milster Date: Tue, 18 Feb 2025 18:47:19 +0100 Subject: [PATCH 06/10] Updated documentation --- README.md | 94 ++++++---------- cli/src/main/java/de/jplag/cli/CLI.java | 5 +- .../java/de/jplag/cli/options/CliOptions.java | 4 +- docs/1.-How-to-Use-JPlag.md | 106 +++++++----------- 4 files changed, 80 insertions(+), 129 deletions(-) diff --git a/README.md b/README.md index ddc6f09b50..704c684621 100644 --- a/README.md +++ b/README.md @@ -80,86 +80,59 @@ The language can either be set with the -l parameter or as a subcommand (`jplag Language-specific arguments can be set when using the subcommand. A list of language-specific options can be obtained by requesting the help page of a subcommand (e.g., `jplag java —h`). ``` -Parameter descriptions: +Parameter descriptions: [root-dirs[,root-dirs...]...] - Root-directory with submissions to check for - plagiarism. If mode is set to VIEW, this parameter - can be used to specify a file to open. In that case - only a single file may be specified. + Root-directory with submissions to check for plagiarism. If mode is set to VIEW, this parameter can be used to specify a report file to open. In that + case only a single file may be specified. -bc, --bc, --base-code= - Path to the base code directory (common framework used - in all submissions). - -l, --language= - Select the language of the submissions (default: java). - See subcommands below. - -M, --mode=<{RUN, VIEW, RUN_AND_VIEW}> - The mode of JPlag. If VIEW is chosen, you can specify a - result file to display. One of: RUN, VIEW, - RUN_AND_VIEW (default: null) - -n, --shown-comparisons= - The maximum number of comparisons that will be shown in - the generated report, if set to -1 all comparisons - will be shown (default: 2500) + Path to the base code directory (common framework used in all submissions). + -l, --language= + Select the language of the submissions (default: java). See subcommands below. + -M, --mode=<{RUN, VIEW, RUN_AND_VIEW, AUTO}> + The mode of JPlag. One of: RUN, VIEW, RUN_AND_VIEW, AUTO (default: null). If VIEW is chosen, you can optionally specify a path to an existing report. + -n, --shown-comparisons= + The maximum number of comparisons that will be shown in the generated report, if set to -1 all comparisons will be shown (default: 2500) -new, --new=[,...] - Root-directories with submissions to check for - plagiarism (same as root). - --normalize Activate the normalization of tokens. Supported for - languages: Java, C++. + Root-directories with submissions to check for plagiarism (same as root). + --normalize Activate the normalization of tokens. Supported for languages: Java, C++. -old, --old=[,...] - Root-directories with prior submissions to compare - against. - -r, --result-file= - Name of the file in which the comparison results will - be stored (default: results). Missing .zip endings - will be automatically added. - -t, --min-tokens= - Tunes the comparison sensitivity by adjusting the - minimum token required to be counted as a matching - section. A smaller value increases the sensitivity - but might lead to more false-positives. + Root-directories with prior submissions to compare against. + -r, --result-file= + Name of the file in which the comparison results will be stored (default: results). Missing .zip endings will be automatically added. + -t, --min-tokens= + Tunes the comparison sensitivity by adjusting the minimum token required to be counted as a matching section. A smaller value increases the + sensitivity but might lead to more false-positives. Advanced --csv-export Export pairwise similarity values as a CSV file. - -d, --debug Store on-parsable files in error folder. + -d, --debug Store on-parsable files in error folder. --log-level=<{ERROR, WARN, INFO, DEBUG, TRACE}> Set the log level for the cli. - -m, --similarity-threshold= - Comparison similarity threshold [0.0-1.0]: All - comparisons above this threshold will be saved - (default: 0.0). + -m, --similarity-threshold= + Comparison similarity threshold [0.0-1.0]: All comparisons above this threshold will be saved (default: 0.0). --overwrite Existing result files will be overwritten. - -p, --suffixes=[,...] - comma-separated list of all filename suffixes that are - included. - -P, --port= The port used for the internal report viewer (default: - 1996). - -s, --subdirectory= + -p, --suffixes=[,...] + comma-separated list of all filename suffixes that are included. + -P, --port= The port used for the internal report viewer (default: 1996). + -s, --subdirectory= Look in directories /*/ for programs. - -x, --exclusion-file= - All files named in this file will be ignored in the - comparison (line-separated list). + -x, --exclusion-file= + All files named in this file will be ignored in the comparison (line-separated list). Clustering --cluster-alg, --cluster-algorithm=<{AGGLOMERATIVE, SPECTRAL}> - Specifies the clustering algorithm. Available - algorithms: agglomerative, spectral (default: - spectral). + Specifies the clustering algorithm. Available algorithms: agglomerative, spectral (default: spectral). --cluster-metric=<{AVG, MIN, MAX, INTERSECTION}> - The similarity metric used for clustering. Available - metrics: average similarity, minimum similarity, - maximal similarity, matched tokens (default: average - similarity). + The similarity metric used for clustering. Available metrics: average similarity, minimum similarity, maximal similarity, matched tokens (default: + average similarity). --cluster-skip Skips the cluster calculation. Subsequence Match Merging --gap-size= - Maximal gap between neighboring matches to be merged - (between 1 and minTokenMatch, default: 6). - --match-merging Enables merging of neighboring matches to counteract - obfuscation attempts. + Maximal gap between neighboring matches to be merged (between 1 and minTokenMatch, default: 6). + --match-merging Enables merging of neighboring matches to counteract obfuscation attempts. --neighbor-length= - Minimal length of neighboring matches to be merged - (between 1 and minTokenMatch, default: 2). + Minimal length of neighboring matches to be merged (between 1 and minTokenMatch, default: 2). Languages: c cpp @@ -171,6 +144,7 @@ Languages: javascript kotlin llvmir + multi python3 rlang rust diff --git a/cli/src/main/java/de/jplag/cli/CLI.java b/cli/src/main/java/de/jplag/cli/CLI.java index e872979325..9d73729b51 100644 --- a/cli/src/main/java/de/jplag/cli/CLI.java +++ b/cli/src/main/java/de/jplag/cli/CLI.java @@ -34,7 +34,7 @@ public final class CLI { private static final String OUTPUT_FILE_EXISTS = "The output file (also with suffixes e.g. results(1).zip) already exists. You can use --overwrite to overwrite the file."; private static final String OUTPUT_FILE_NOT_WRITABLE = "The output file (%s) cannot be written to."; - private static final String ZIP_FILE_ENDING = ".zip"; + private static final String ZIP_FILE_EXTENSION = ".zip"; private final CliInputHandler inputHandler; @@ -137,7 +137,8 @@ private void selectModeAutomatically() throws IOException, ExitException { return; } - if (inputs.size() == 1 && inputs.getFirst().getName().endsWith(ZIP_FILE_ENDING)) { + // if the selected mode is auto and there is exactly one zip file selected as an input it is opened in the report viewer + if (inputs.size() == 1 && inputs.getFirst().getName().endsWith(ZIP_FILE_EXTENSION)) { this.runViewer(inputs.getFirst()); return; } diff --git a/cli/src/main/java/de/jplag/cli/options/CliOptions.java b/cli/src/main/java/de/jplag/cli/options/CliOptions.java index 48c216bfa8..bc88561389 100644 --- a/cli/src/main/java/de/jplag/cli/options/CliOptions.java +++ b/cli/src/main/java/de/jplag/cli/options/CliOptions.java @@ -54,8 +54,8 @@ public class CliOptions implements Runnable { public String resultFile = "results"; @Option(names = {"-M", - "--mode"}, description = "The mode of JPlag. By default JPlag will automatically select the mode based on the given input files. If none are given the report viewer will open on the file upload page. If a single result zip is given it will be opened in the report viewer directly. Otherwise, JPlag will check the given submissions and show the result in the report viewer. One of: ${COMPLETION-CANDIDATES} (default: ${DEFAULT_VALUE})") - public JPlagMode mode = JPlagMode.AUTO; + "--mode"}, description = "The mode of JPlag. One of: ${COMPLETION-CANDIDATES} (default: ${DEFAULT_VALUE}). If VIEW is chosen, you can optionally specify a path to an existing report.") + public JPlagMode mode = JPlagMode.RUN_AND_VIEW; @Option(names = {"--normalize"}, description = "Activate the normalization of tokens. Supported for languages: Java, C++.") public boolean normalize = false; diff --git a/docs/1.-How-to-Use-JPlag.md b/docs/1.-How-to-Use-JPlag.md index a7fce2e00c..d2858f63df 100644 --- a/docs/1.-How-to-Use-JPlag.md +++ b/docs/1.-How-to-Use-JPlag.md @@ -11,86 +11,59 @@ A list of language-specific options can be obtained by requesting the help page The following arguments can be used to control JPlag: ``` -Parameter descriptions: +Parameter descriptions: [root-dirs[,root-dirs...]...] - Root-directory with submissions to check for - plagiarism. If mode is set to VIEW, this parameter - can be used to specify a file to open. In that case - only a single file may be specified. + Root-directory with submissions to check for plagiarism. If mode is set to VIEW, this parameter can be used to specify a report file to open. In that + case only a single file may be specified. -bc, --bc, --base-code= - Path to the base code directory (common framework used - in all submissions). - -l, --language= - Select the language of the submissions (default: java). - See subcommands below. - -M, --mode=<{RUN, VIEW, RUN_AND_VIEW}> - The mode of JPlag. If VIEW is chosen, you can specify a - result file to display. One of: RUN, VIEW, - RUN_AND_VIEW (default: null) - -n, --shown-comparisons= - The maximum number of comparisons that will be shown in - the generated report, if set to -1 all comparisons - will be shown (default: 2500) + Path to the base code directory (common framework used in all submissions). + -l, --language= + Select the language of the submissions (default: java). See subcommands below. + -M, --mode=<{RUN, VIEW, RUN_AND_VIEW, AUTO}> + The mode of JPlag. One of: RUN, VIEW, RUN_AND_VIEW, AUTO (default: null). If VIEW is chosen, you can optionally specify a path to an existing report. + -n, --shown-comparisons= + The maximum number of comparisons that will be shown in the generated report, if set to -1 all comparisons will be shown (default: 2500) -new, --new=[,...] - Root-directories with submissions to check for - plagiarism (same as root). - --normalize Activate the normalization of tokens. Supported for - languages: Java, C++. + Root-directories with submissions to check for plagiarism (same as root). + --normalize Activate the normalization of tokens. Supported for languages: Java, C++. -old, --old=[,...] - Root-directories with prior submissions to compare - against. - -r, --result-file= - Name of the file in which the comparison results will - be stored (default: results). Missing .zip endings - will be automatically added. - -t, --min-tokens= - Tunes the comparison sensitivity by adjusting the - minimum token required to be counted as a matching - section. A smaller value increases the sensitivity - but might lead to more false-positives. + Root-directories with prior submissions to compare against. + -r, --result-file= + Name of the file in which the comparison results will be stored (default: results). Missing .zip endings will be automatically added. + -t, --min-tokens= + Tunes the comparison sensitivity by adjusting the minimum token required to be counted as a matching section. A smaller value increases the + sensitivity but might lead to more false-positives. Advanced --csv-export Export pairwise similarity values as a CSV file. - -d, --debug Store on-parsable files in error folder. + -d, --debug Store on-parsable files in error folder. --log-level=<{ERROR, WARN, INFO, DEBUG, TRACE}> Set the log level for the cli. - -m, --similarity-threshold= - Comparison similarity threshold [0.0-1.0]: All - comparisons above this threshold will be saved - (default: 0.0). + -m, --similarity-threshold= + Comparison similarity threshold [0.0-1.0]: All comparisons above this threshold will be saved (default: 0.0). --overwrite Existing result files will be overwritten. - -p, --suffixes=[,...] - comma-separated list of all filename suffixes that are - included. - -P, --port= The port used for the internal report viewer (default: - 1996). - -s, --subdirectory= + -p, --suffixes=[,...] + comma-separated list of all filename suffixes that are included. + -P, --port= The port used for the internal report viewer (default: 1996). + -s, --subdirectory= Look in directories /*/ for programs. - -x, --exclusion-file= - All files named in this file will be ignored in the - comparison (line-separated list). + -x, --exclusion-file= + All files named in this file will be ignored in the comparison (line-separated list). Clustering --cluster-alg, --cluster-algorithm=<{AGGLOMERATIVE, SPECTRAL}> - Specifies the clustering algorithm. Available - algorithms: agglomerative, spectral (default: - spectral). + Specifies the clustering algorithm. Available algorithms: agglomerative, spectral (default: spectral). --cluster-metric=<{AVG, MIN, MAX, INTERSECTION}> - The similarity metric used for clustering. Available - metrics: average similarity, minimum similarity, - maximal similarity, matched tokens (default: average - similarity). + The similarity metric used for clustering. Available metrics: average similarity, minimum similarity, maximal similarity, matched tokens (default: + average similarity). --cluster-skip Skips the cluster calculation. Subsequence Match Merging --gap-size= - Maximal gap between neighboring matches to be merged - (between 1 and minTokenMatch, default: 6). - --match-merging Enables merging of neighboring matches to counteract - obfuscation attempts. + Maximal gap between neighboring matches to be merged (between 1 and minTokenMatch, default: 6). + --match-merging Enables merging of neighboring matches to counteract obfuscation attempts. --neighbor-length= - Minimal length of neighboring matches to be merged - (between 1 and minTokenMatch, default: 2). + Minimal length of neighboring matches to be merged (between 1 and minTokenMatch, default: 2). Languages: c cpp @@ -102,6 +75,7 @@ Languages: javascript kotlin llvmir + multi python3 rlang rust @@ -152,13 +126,15 @@ The report will always be zipped unless there is an error during the zipping pro Starting with version v6.0.0, the report viewer is bundled with JPlag and will be launched automatically. The `--mode` option controls this behavior. By default, JPlag will process the input files and produce a zipped result file. After that, the report viewer is launched (on localhost), and the report will be shown in your browser. -The option `--mode show` will only open the report viewer. -This allows you to view existing reports. -You can optionally provide the path to a report file to immediately display it in the viewer; otherwise, the viewer will require you to select a report, just like the online version. -By specifying `--mode run`, JPlag will run but generate the zipped report but will not open the report viewer. +To open the report viewer normally run JPlag without additional parameters: `java -jar jplag.jar`. This will open the report viewer in the file selection screen. +An old report can be opened by passing the report zip as a parameter to JPlag: `java -jar jplag.jar `. This will skip the file selection and immediately open the report. -An online version of the viewer is still hosted at https://jplag.github.io/JPlag/ in order to view pre-v6.0.0 reports. Your submissions will neither be uploaded to a server nor stored permanently. They are stored as long as you view them. Once you refresh the page, all information will be erased. +When running JPlag normally with a set of submissions, the report viewer will be opened with the generated support selected automatically: `java -jar jplag.jar java` +This can be prevented by passing `--mode run` (`java -jar jplag.jar --mode run java`). + +Additional information can be found [here](7.-Report-Viewer.md) +An online version of the viewer is still hosted at https://jplag.github.io/JPlag/ in order to view pre-v6.0.0 reports. Your submissions will neither be uploaded to a server nor stored permanently. They are stored as long as you view them. Once you refresh the page, all information will be erased. ## Basic Concepts From 75a8575ff818edbbb3b014d6b5a5fb9c93f45f97 Mon Sep 17 00:00:00 2001 From: Alexander Milster Date: Tue, 18 Feb 2025 19:03:33 +0100 Subject: [PATCH 07/10] Added some simple tests for automatic mode selection --- cli/src/test/java/de/jplag/cli/ModeTest.java | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/cli/src/test/java/de/jplag/cli/ModeTest.java b/cli/src/test/java/de/jplag/cli/ModeTest.java index f9f33f711c..ef248a3e89 100644 --- a/cli/src/test/java/de/jplag/cli/ModeTest.java +++ b/cli/src/test/java/de/jplag/cli/ModeTest.java @@ -1,6 +1,7 @@ package de.jplag.cli; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertThrowsExactly; import java.io.File; @@ -53,6 +54,18 @@ void testViewWithMultipleFiles() { }); } + @Test + void testImplicitView() throws IOException, ExitException { + CliInputHandler inputHandler = this.runCli(args -> args.with(CliArgument.RESULT_FILE, "result.zip")).inputHandler(); + assertEquals(new File("result.zip"), inputHandler.getFileForViewMode()); + } + + @Test + void testImplicitReportViewer() throws IOException, ExitException { + CliInputHandler inputHandler = this.runCli().inputHandler(); + assertNull(inputHandler.getFileForViewMode()); + } + @Override public void addDefaultParameters() { // prevents the submission directory from being added to the parameters automatically From 035093032b6afabcef44453b8a001426c8c73724 Mon Sep 17 00:00:00 2001 From: Alexander Milster Date: Tue, 18 Feb 2025 19:16:17 +0100 Subject: [PATCH 08/10] Changed default mode flag to auto again --- cli/src/main/java/de/jplag/cli/options/CliOptions.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/src/main/java/de/jplag/cli/options/CliOptions.java b/cli/src/main/java/de/jplag/cli/options/CliOptions.java index bc88561389..442060c2f6 100644 --- a/cli/src/main/java/de/jplag/cli/options/CliOptions.java +++ b/cli/src/main/java/de/jplag/cli/options/CliOptions.java @@ -55,7 +55,7 @@ public class CliOptions implements Runnable { @Option(names = {"-M", "--mode"}, description = "The mode of JPlag. One of: ${COMPLETION-CANDIDATES} (default: ${DEFAULT_VALUE}). If VIEW is chosen, you can optionally specify a path to an existing report.") - public JPlagMode mode = JPlagMode.RUN_AND_VIEW; + public JPlagMode mode = JPlagMode.AUTO; @Option(names = {"--normalize"}, description = "Activate the normalization of tokens. Supported for languages: Java, C++.") public boolean normalize = false; From ef5d148a15bcc893899944c4f7dc13c11f649bb2 Mon Sep 17 00:00:00 2001 From: Alexander Milster Date: Wed, 19 Feb 2025 16:41:10 +0100 Subject: [PATCH 09/10] Updated documentation for implicit view mode --- docs/1.-How-to-Use-JPlag.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/docs/1.-How-to-Use-JPlag.md b/docs/1.-How-to-Use-JPlag.md index d2858f63df..4fb12956a7 100644 --- a/docs/1.-How-to-Use-JPlag.md +++ b/docs/1.-How-to-Use-JPlag.md @@ -9,7 +9,14 @@ The language can either be set with the -l parameter or as a subcommand. If both When using the subcommand, language-specific arguments can be set. A list of language-specific options can be obtained by requesting the help page of a subcommand (e.g., "jplag java -h"). -The following arguments can be used to control JPlag: +To run jplag normally on a set of submissions run: `java -jar jplag.jar ` +If the language is java, it can be omitted: `java -jar jplag.jar ` + +To open an existing report run: `java -jar jplag.jar ` + +To open the report viewer without any file selected run: `java -jar jplag.jar` + +This is the list of all options available to configure JPlag: ``` Parameter descriptions: [root-dirs[,root-dirs...]...] From ffc90c95c7075f917f0f81addf22557e011b7295 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Sa=C4=9Flam?= Date: Thu, 20 Feb 2025 09:43:56 +0100 Subject: [PATCH 10/10] Update docs/1.-How-to-Use-JPlag.md Co-authored-by: Alex | Kronox <39801116+Kr0nox@users.noreply.github.com> --- docs/1.-How-to-Use-JPlag.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/1.-How-to-Use-JPlag.md b/docs/1.-How-to-Use-JPlag.md index 4fb12956a7..d25d6fdafd 100644 --- a/docs/1.-How-to-Use-JPlag.md +++ b/docs/1.-How-to-Use-JPlag.md @@ -131,7 +131,6 @@ The report will always be zipped unless there is an error during the zipping pro ## Viewing Reports Starting with version v6.0.0, the report viewer is bundled with JPlag and will be launched automatically. The `--mode` option controls this behavior. -By default, JPlag will process the input files and produce a zipped result file. After that, the report viewer is launched (on localhost), and the report will be shown in your browser. To open the report viewer normally run JPlag without additional parameters: `java -jar jplag.jar`. This will open the report viewer in the file selection screen. An old report can be opened by passing the report zip as a parameter to JPlag: `java -jar jplag.jar `. This will skip the file selection and immediately open the report.