diff --git a/R/git.R b/R/git.R index e9e8029a..9d0474f1 100644 --- a/R/git.R +++ b/R/git.R @@ -23,10 +23,19 @@ parse_gitlog <- function(perceval_path,git_repo_path,save_path=NA,perl_regex=NA) git_uri <- git_repo_path save_path <- ifelse(!is.na(save_path),path.expand(save_path),NA) + # DEBUG + print(paste("Perceval path:", perceval_path)) + print(paste("Git repo path:", git_repo_path)) + print(paste("Save path:", save_path)) + print(paste("Perl regex:", perl_regex)) + # Use percerval to parse .git --json line is required to be parsed by jsonlite::fromJSON. # The log will be saved to the /tmp/ folder gitlog_path <- "/tmp/gitlog.log" + # DEBUG + print(paste("Gitlog path:", gitlog_path)) + # Perceval suggested flags perceval_flags <- c( @@ -62,18 +71,30 @@ parse_gitlog <- function(perceval_path,git_repo_path,save_path=NA,perl_regex=NA) } } + # DEBUG + print("Git log call message:") + print(gitlog_call_message) + # Parsed JSON output. perceval_output <- system2(perceval_path, args = c('git', '--git-log',gitlog_path,git_uri,'--json-line'), stdout = TRUE, stderr = FALSE) + # DEBUG + print("Perceval Output:") + cat(perceval_output, sep = "\n") + perceval_parsed <- data.table(jsonlite::stream_in(textConnection(perceval_output),verbose = FALSE)) if(nrow(perceval_parsed) == 0){ stop("The repository specified has no commits.") } + # DEBUG + print("Parsed data structure:") + print(str(perceval_parsed)) + # APR very first commit is a weird single case of commit without files. We filter them here. is_commit_with_files <- !!sapply(perceval_parsed$data.files,length) perceval_parsed <- perceval_parsed[is_commit_with_files] diff --git a/R/mail.R b/R/mail.R index 9496002e..8e8b7620 100644 --- a/R/mail.R +++ b/R/mail.R @@ -589,17 +589,54 @@ parse_mbox <- function(perceval_path, mbox_file_path) { mbox_dir <- dirname(mbox_file_path) # Extract directory path mbox_uri <- mbox_file_path # URI points to the mbox file + + + # Debugging + print(paste("Perceval path:", perceval_path)) + print(paste("Mbox file path:", mbox_file_path)) + print(paste("Mbox directory path:", mbox_dir)) + # Use Perceval to parse the mbox file - perceval_output <- system2(perceval_path, - args = c('mbox', mbox_uri, mbox_dir, '--json-line'), - stdout = TRUE, - stderr = TRUE) + perceval_output <- tryCatch({ + system2(perceval_path, + args = c('mbox', mbox_uri, mbox_dir, '--json-line'), + stdout = TRUE, + stderr = TRUE) + }, error = function(e) { + print("Error running Perceval:") + print(e$message) + stop("Perceval execution failed.") + }) + + # Debugging Perceval output + print("Perceval Output:") + cat(perceval_output, sep = "\n") + + + # Filter JSON lines from Perceval output json_lines <- perceval_output[grepl("^\\{", perceval_output)] # Escape the `{` character + + if (length(json_lines) == 0) { + stop("No valid JSON lines found in Perceval output. Check the mbox file or Perceval configuration.") + } + + # Parse JSON output as a data.table - perceval_parsed <- data.table(jsonlite::stream_in(textConnection(json_lines), verbose = FALSE)) + perceval_parsed <- tryCatch({ + data.table(jsonlite::stream_in(textConnection(json_lines), verbose = FALSE)) + }, error = function(e) { + print("Error parsing JSON lines:") + print(e$message) + stop("JSON parsing failed.") + }) + + # Debugging parsed data + print("Parsed data structure:") + print(str(perceval_parsed)) + columns_of_interest <- c("data.Message.ID", "data.In.Reply.To", "data.Date", "data.From", "data.To", "data.Cc", "data.Subject", "data.body.plain", "data.body") columns_rename <- c("reply_id", "in_reply_to_id", "reply_datetimetz", "reply_from", "reply_to", "reply_cc", "reply_subject", "reply_body", "reply_body") @@ -613,6 +650,11 @@ parse_mbox <- function(perceval_path, mbox_file_path) { old = colnames(perceval_parsed), new = columns_rename[is_available_column]) + # Debugging final parsed data + print("Final parsed data:") + print(perceval_parsed) + + return(perceval_parsed) } diff --git a/man/parse_gitlog.Rd b/man/parse_gitlog.Rd index d4370808..7d65786f 100644 --- a/man/parse_gitlog.Rd +++ b/man/parse_gitlog.Rd @@ -23,16 +23,16 @@ Other parsers: \code{\link{parse_bugzilla_perceval_rest_issue_comments}()}, \code{\link{parse_bugzilla_perceval_traditional_issue_comments}()}, \code{\link{parse_bugzilla_rest_comments}()}, -\code{\link{parse_bugzilla_rest_issues_comments}()}, \code{\link{parse_bugzilla_rest_issues}()}, +\code{\link{parse_bugzilla_rest_issues_comments}()}, \code{\link{parse_commit_message_id}()}, \code{\link{parse_dependencies}()}, \code{\link{parse_dv8_clusters}()}, +\code{\link{parse_jira}()}, \code{\link{parse_jira_latest_date}()}, \code{\link{parse_jira_rss_xml}()}, -\code{\link{parse_jira}()}, -\code{\link{parse_mbox_latest_date}()}, \code{\link{parse_mbox}()}, +\code{\link{parse_mbox_latest_date}()}, \code{\link{parse_nvdfeed}()} } \concept{parsers} diff --git a/man/parse_mbox.Rd b/man/parse_mbox.Rd index 9b128dd8..349d009c 100644 --- a/man/parse_mbox.Rd +++ b/man/parse_mbox.Rd @@ -19,6 +19,22 @@ data used. This function only ensures if columns of interest are available, then consistently renamed for clarity. } \seealso{ +Other parsers: +\code{\link{parse_bugzilla_perceval_rest_issue_comments}()}, +\code{\link{parse_bugzilla_perceval_traditional_issue_comments}()}, +\code{\link{parse_bugzilla_rest_comments}()}, +\code{\link{parse_bugzilla_rest_issues}()}, +\code{\link{parse_bugzilla_rest_issues_comments}()}, +\code{\link{parse_commit_message_id}()}, +\code{\link{parse_dependencies}()}, +\code{\link{parse_dv8_clusters}()}, +\code{\link{parse_gitlog}()}, +\code{\link{parse_jira}()}, +\code{\link{parse_jira_latest_date}()}, +\code{\link{parse_jira_rss_xml}()}, +\code{\link{parse_mbox_latest_date}()}, +\code{\link{parse_nvdfeed}()} + Other parsers: \code{\link{parse_bugzilla_perceval_rest_issue_comments}()}, \code{\link{parse_bugzilla_perceval_traditional_issue_comments}()}, diff --git a/tests/testthat/test-git.R b/tests/testthat/test-git.R index 3659d6dd..740bb6ad 100644 --- a/tests/testthat/test-git.R +++ b/tests/testthat/test-git.R @@ -30,9 +30,25 @@ test_that("Calling parse_gitlog with correct perceval and correct git log path r tools_path <- file.path(tools_path) tool <- yaml::read_yaml(tools_path) perceval_path <- tool[["perceval"]] + + # Debugging output + print("Debugging parse_gitlog:") + print(paste("Tools path:", tools_path)) + print(paste("Perceval path:", perceval_path)) + git_repo_path <- suppressWarnings(git_create_sample_log()) + + # Debugging output + print(paste("Generated Git repo path:", git_repo_path)) + result <- parse_gitlog(perceval_path, git_repo_path) + + # Debugging output + print("Result of parse_gitlog:") + print(head(result)) + expect_is(result, "data.table") + suppressWarnings(git_delete_sample_log(git_repo_path)) }) diff --git a/tests/testthat/test-mail.R b/tests/testthat/test-mail.R index b7426917..bf190130 100644 --- a/tests/testthat/test-mail.R +++ b/tests/testthat/test-mail.R @@ -20,16 +20,30 @@ test_that("Calling parse_mbox with correct perceval and mbox path returns a data tools_path <- file.path(tools_path) tool <- yaml::read_yaml(tools_path) perceval_path <- tool[["perceval"]] - mbox_path <- example_mailing_list_two_threads(folder_path = "/tmp", - folder_name="example_two_threads_mailing_list", - file_name = "two_thread_mailing_list") + + # Debugging output + print("Debugging parse_mbox:") + print(paste("Tools path:", tools_path)) + print(paste("Perceval path:", perceval_path)) + + mbox_path <- example_mailing_list_two_threads( + folder_path = "/tmp", + folder_name = "example_two_threads_mailing_list", + file_name = "two_thread_mailing_list" + ) + + # Debugging output + print(paste("Generated Mbox path:", mbox_path)) + result <- parse_mbox(perceval_path, mbox_path) - io_delete_folder(folder_path="/tmp", folder_name="example_two_threads_mailing_list") + # Debugging output + print("Result of parse_mbox:") + print(head(result)) + io_delete_folder(folder_path = "/tmp", folder_name = "example_two_threads_mailing_list") expect_equal(result[reply_from == "John Doe "]$reply_subject, "Subject 1") expect_equal(result[reply_subject == "Re: Subject 1"]$reply_from, "Smithsonian Doe ") - })