From 12ed2d7acae2380fa0117a9bc866120e4174b7ec Mon Sep 17 00:00:00 2001 From: RavenMarQ Date: Thu, 12 Sep 2024 10:28:26 -1000 Subject: [PATCH 01/19] Test This is to test Committing and PR --- .idea/.gitignore | 3 +++ .idea/kaiaulu.iml | 9 +++++++++ .idea/misc.xml | 6 ++++++ .idea/modules.xml | 8 ++++++++ .idea/vcs.xml | 6 ++++++ DESCRIPTION | 2 +- 6 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 .idea/.gitignore create mode 100644 .idea/kaiaulu.iml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/vcs.xml diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 00000000..26d33521 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,3 @@ +# Default ignored files +/shelf/ +/workspace.xml diff --git a/.idea/kaiaulu.iml b/.idea/kaiaulu.iml new file mode 100644 index 00000000..d6ebd480 --- /dev/null +++ b/.idea/kaiaulu.iml @@ -0,0 +1,9 @@ + + + + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 00000000..639900d1 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 00000000..59f59099 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 00000000..35eb1ddf --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION index 128ee129..745f98c3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,7 +2,7 @@ Package: kaiaulu Type: Package Title: Kaiaulu Version: 0.0.0.9700 -Description: Kaiaulu is an R package and common interface that helps with understanding evolving software development communities, and the artifacts (gitlog, mailing list, files, etc.) which developers collaborate and communicate about. See Paradis et al., (2012) . +Description: Kaiaulu is an R package and common interface that helps with understanding evolving software development communities, and the artifacts (GitLog, mailing list, files, etc.) which developers collaborate and communicate about. See Paradis et al., (2012) . Authors@R: c( person('Carlos', 'Paradis', role = c('aut', 'cre'), email = 'cvas@hawaii.edu', From 334bac39e52f96c3ebaffaad665e405402b60db5 Mon Sep 17 00:00:00 2001 From: RavenMarQ Date: Wed, 25 Sep 2024 15:33:26 -1000 Subject: [PATCH 02/19] A untested prototype of understand_parse_dependencies Modeled after parse_dependencies with a similar output, this is for review to see if it is in-line with specification --- R/src.R | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/R/src.R b/R/src.R index da5b9d9a..4ec79b20 100644 --- a/R/src.R +++ b/R/src.R @@ -7,6 +7,57 @@ ############## Parsers ############## +#' Parse dependencies from Scitool's Understand +#' +#' @param understand_path path to the und folder +#' @param project_path path to the project folder to analyze +#' @param language the language of the .git repo (accepts cpp, java, ruby, python, pom) +#' @param output_dir path to output directory (formatted output_path/) +#' @export +#' @family parsers +understand_parse_dependencies <- function(project_path,language,output_dir="/tmp/"){ + # Use Understand to parse the code folder. + # Create the variables used in command lines + db_dir <- paste0(output_dir, "/Understand.und") + xml_dir <- paste0(db_dir, "/Dependencies.xml") + + # Generate the XML file + system2("und", "create", "-db", db_dir, "languages", language) + system2("und", "-db", db_dir, "add", project_path) + system2("und", "analyze", db_dir) + system2("und", "export", "-dependencies", "file", "cytoscape", xml_dir, db_dir) + + # Parse the XML file + xml_data <- xmlParse(xml_dir) + + # Extract nodes + nodes <- xpathSApply(xml_data, "//node", xmlToList) + + # Create the data table with id and label + node_list <- lapply(nodes, function(node) { + id <- xmlGetAttr(node, "id") + label <- xpathSApply(node, ".//att[@name='node.label']", xmlGetAttr, "value") + data.table(id = id, label = label) + }) + + # Extract edges + edges <- xpathSApply(xml_data, "//edge", xmlToList) + + # Create the data table with id_from, id_to, and dependency_kind + edge_list <- lapply(nodes, function(edge) { + id_from <- xmlGetAttr(edge, "source") + id_to <- xmlGetAttr(edge, "target") + dependency_kind <- xpathSApply(edge, ".//att[@name='node.label']", xmlGetAttr, "value") + data.table(id_from = id_from, id_to = id_to, dependency_kind = dependency_kind) + }) + + # Combine the lists into a single data frame + edge_list <- rbindlist(edge_list) + node_list <- rbindlist(node_list) + graph <- list(edge_list = edge_list, node_list = node_list) + return(graph) +} + #' Parse dependencies from Depends #' #' @param depends_jar_path path to depends jar From 33a293b32a0acde5ee8008ebd0b511a14fc4a2c8 Mon Sep 17 00:00:00 2001 From: RavenMarQ Date: Tue, 1 Oct 2024 16:31:46 -1000 Subject: [PATCH 03/19] Completed Functions Per the specifications in Issue 308, the files are functional and ready to be put into a notebook. --- R/src.R | 140 +++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 109 insertions(+), 31 deletions(-) diff --git a/R/src.R b/R/src.R index 4ec79b20..4b021066 100644 --- a/R/src.R +++ b/R/src.R @@ -6,55 +6,96 @@ ############## Parsers ############## - -#' Parse dependencies from Scitool's Understand +#' Parse file dependencies from Scitool's Understand #' -#' @param understand_path path to the und folder #' @param project_path path to the project folder to analyze -#' @param language the language of the .git repo (accepts cpp, java, ruby, python, pom) +#' @param language the language of the project (language must be supported by Understand) #' @param output_dir path to output directory (formatted output_path/) +#' @param parse_type Type of dependencies to generate into xml (either "file" or "class") #' @export #' @family parsers -understand_parse_dependencies <- function(project_path,language,output_dir="/tmp/"){ +understand_parse_dependencies <- function(project_path,language,output_dir="../tmp/", parse_type){ + # Before running, check if parse_type is correct + validInput <- function(input) { + is.character(input) && length(input) == 1 && input %in% c("file", "class") + } + if (!validInput(parse_type)) { + stop("Error: Invalid parse_type provided. Please input either \"file\" or \"class\"") + } + # Use Understand to parse the code folder. # Create the variables used in command lines + project_path <- paste0("\"", project_path, "\"") db_dir <- paste0(output_dir, "/Understand.und") - xml_dir <- paste0(db_dir, "/Dependencies.xml") + xml_dir <- paste0(db_dir, "/", parse_type, "Dependencies.xml") + command <- "und" + args <- c("create", "-db", db_dir, "-languages", language) # Generate the XML file - system2("und", "create", "-db", db_dir, "languages", language) - system2("und", "-db", db_dir, "add", project_path) - system2("und", "analyze", db_dir) - system2("und", "export", "-dependencies", "file", "cytoscape", xml_dir, db_dir) + system2(command, args) + args <- c("-db", db_dir, "add", project_path) + system2(command, args) + args <- c("analyze", db_dir) + system2(command, args) + args <- c("export", "-dependencies", "file", "cytoscape", xml_dir, db_dir) + system2(command, args) # Parse the XML file xml_data <- xmlParse(xml_dir) + xml_nodes <- xmlRoot(xml_data) + xml_nodes <- xmlChildren(xml_nodes) + + # Helper function to search for an attribute + findAtt <- function(search_nodes, att_name) { + found_att <- NA + for (att in search_nodes) { + if (xmlGetAttr(att, "name") == att_name) { + found_att <- xmlGetAttr(att, "value") + break + } + } + return(found_att) + } - # Extract nodes - nodes <- xpathSApply(xml_data, "//node", xmlToList) - - # Create the data table with id and label - node_list <- lapply(nodes, function(node) { - id <- xmlGetAttr(node, "id") - label <- xpathSApply(node, ".//att[@name='node.label']", xmlGetAttr, "value") - data.table(id = id, label = label) + # From child nodes- filter for those with name "node" + node_elements <- lapply(xml_nodes, function(child) { + if (xmlName(child) == "node") { + # Extract the id + id <- xmlGetAttr(child, "id") + # Find the node.label attribute + att_nodes <- xmlChildren(child) + node_label <- findAtt(att_nodes, "node.label") + long_name <- findAtt(att_nodes, "longName") + return(data.table(node_label = node_label, id = id, long_name = long_name)) + } else { + return(NULL) + } }) - # Extract edges - edges <- xpathSApply(xml_data, "//edge", xmlToList) - - # Create the data table with id_from, id_to, and dependency_kind - edge_list <- lapply(nodes, function(edge) { - id_from <- xmlGetAttr(edge, "source") - id_to <- xmlGetAttr(edge, "target") - dependency_kind <- xpathSApply(edge, ".//att[@name='node.label']", xmlGetAttr, "value") - data.table(id_from = id_from, id_to = id_to, dependency_kind = dependency_kind) + # Remove NULLs and combine the results into a data frame + node_list <- do.call(rbind, node_elements[!sapply(node_elements, is.null)]) + + # From child nodes- filter for those with name "edge" + edge_elements <- lapply(xml_nodes, function(child) { + if (xmlName(child) == "edge") { + # Extract the id_from and id_to + id_from <- xmlGetAttr(child, "source") + id_to <- xmlGetAttr(child, "target") + # Find the dependency kind attribute + att_nodes <- xmlChildren(child) + dependency_kind <- findAtt(att_nodes, "dependency kind") + dependency_kind <- unlist(strsplit(dependency_kind, ",\\s*")) + return(data.table(id_from = id_from, id_to = id_to, dependency_kind = dependency_kind)) + } else { + return(NULL) + } }) - # Combine the lists into a single data frame - edge_list <- rbindlist(edge_list) - node_list <- rbindlist(node_list) - graph <- list(edge_list = edge_list, node_list = node_list) + # Remove NULLs and combine the results into a data frame + edge_list <- do.call(rbind, edge_elements[!sapply(edge_elements, is.null)]) + + # Create a list to return + graph <- list(node_list = node_list, edge_list = edge_list) return(graph) } @@ -266,6 +307,43 @@ parse_r_dependencies <- function(folder_path){ ############## Network Transform ############## +#' Transform parsed dependencies into a network +#' +#' @param depends_parsed Parsed data from understand_parse_class_dependencies. +#' @param weight_types The weight types as defined in Depends. +#' Accepts single string and vector input +#' +#' @export +#' @family edgelists +transform_und_class_dependencies_to_network <- function(parsed, weight_types) { + + nodes <- parsed[["node_list"]] + edges <- parsed[["edge_list"]] + + # Merge edges with nodes to get label_from + edges <- merge(edges, nodes[, .(id, node_label)], by.x = "id_from", by.y = "id", all.x = TRUE) + setnames(edges, "node_label", "label_from") + + # Merge again to get label_to + edges <- merge(edges, nodes[, .(id, node_label)], by.x = "id_to", by.y = "id", all.x = TRUE) + setnames(edges, "node_label", "label_to") + + # Reorder columns to have label_from and label_to on the left + edges <- edges[, .(label_from, label_to, id_from, id_to, dependency_kind)] + + # Filter out by weights + edges <- edges[dependency_kind %in% weight_types] + + # If filter removed all edges: + if (nrow(edges) == 0) { + stop("Error: No edges found under weight_types.") + } + + # Create a list to return + graph <- list(node_list = nodes, edge_list = edges) + return(graph) +} + #' Transform parsed dependencies into a network #' #' @param depends_parsed A parsed mbox by \code{\link{parse_dependencies}}. From b5bf0076f1c5fa7c6854d156e88034f9d090adf5 Mon Sep 17 00:00:00 2001 From: RavenMarQ Date: Tue, 1 Oct 2024 16:56:36 -1000 Subject: [PATCH 04/19] Update src.R Quick fix --- R/src.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/src.R b/R/src.R index 4b021066..2b4ffdab 100644 --- a/R/src.R +++ b/R/src.R @@ -37,7 +37,7 @@ understand_parse_dependencies <- function(project_path,language,output_dir="../t system2(command, args) args <- c("analyze", db_dir) system2(command, args) - args <- c("export", "-dependencies", "file", "cytoscape", xml_dir, db_dir) + args <- c("export", "-dependencies", parse_type, "cytoscape", xml_dir, db_dir) system2(command, args) # Parse the XML file From b17d2860e80e59a0b8c0f2423e8cb47aebf1902a Mon Sep 17 00:00:00 2001 From: RavenMarQ Date: Thu, 3 Oct 2024 03:11:23 -1000 Subject: [PATCH 05/19] Working On Creating Notebook Updating NAMESPACE to export the new functions and creating the Rmd, are the primary notes. The folder holding the sample project also is included locally, but uses the calculator project provided in Issue from Carlos --- NAMESPACE | 2 + R/src.R | 2 +- vignettes/understand_showcase.Rmd | 65 +++++++++++++++++++++++++++++++ 3 files changed, 68 insertions(+), 1 deletion(-) create mode 100644 vignettes/understand_showcase.Rmd diff --git a/NAMESPACE b/NAMESPACE index 2e3b17bf..3af220f6 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -157,6 +157,8 @@ export(transform_gitlog_to_temporal_network) export(transform_r_dependencies_to_network) export(transform_reply_to_bipartite_network) export(transform_temporal_gitlog_to_adsmj) +export(transform_und_dependencies_to_network) +export(understand_parse_dependencies) export(weight_scheme_count_deleted_nodes) export(weight_scheme_cum_temporal) export(weight_scheme_pairwise_cum_temporal) diff --git a/R/src.R b/R/src.R index 2b4ffdab..aee650dd 100644 --- a/R/src.R +++ b/R/src.R @@ -315,7 +315,7 @@ parse_r_dependencies <- function(folder_path){ #' #' @export #' @family edgelists -transform_und_class_dependencies_to_network <- function(parsed, weight_types) { +transform_und_dependencies_to_network <- function(parsed, weight_types) { nodes <- parsed[["node_list"]] edges <- parsed[["edge_list"]] diff --git a/vignettes/understand_showcase.Rmd b/vignettes/understand_showcase.Rmd new file mode 100644 index 00000000..568da759 --- /dev/null +++ b/vignettes/understand_showcase.Rmd @@ -0,0 +1,65 @@ +--- +title: "Understand Showcase" +output: + html_document: + toc: true + number_sections: true +vignette: > + %\VignetteEngine{knitr::rmarkdown} + %\VignetteIndexEntry{Understand Showcase} + %\VignetteEncoding{UTF-8} +--- + + +```{r warning = FALSE, message = FALSE} +rm(list = ls()) +library(kaiaulu) +library(visNetwork) +library(igraph) +library(data.table) +``` + + +# Parse a sample project folder + +```{r} +folder_path <- "../tests/sample_project" +``` + + +# File Dependencies + +```{r} +file_dependencies <- understand_parse_dependencies(project_path = folder_path, language = "java", parse_type = "file") +head(file_dependencies) +``` + + +# Class Dependencies + +```{r} +class_dependencies <- understand_parse_dependencies(project_path = folder_path, language = "java", parse_type = "class") +head(class_dependencies) +``` + +## File + +```{r} +file_graph <- transform_und_dependencies_to_network(parsed = file_dependencies, weight_types = c("", "")) +project_function_network <- igraph::graph_from_data_frame(d=function_graph[["edgelist"]], + directed = TRUE, + vertices = function_graph[["nodes"]]) +visIgraph(project_function_network,randomSeed = 1) +``` + + +## Class + +```{r} +class_graph <- transform_und_dependencies_to_network(parsed = class_dependencies, weight_types = c("", "")) +project_function_network <- igraph::graph_from_data_frame(d=file_graph[["edgelist"]], + directed = TRUE, + vertices = file_graph[["nodes"]]) +visIgraph(project_function_network,randomSeed = 1) +``` + From 57150b4ced06b0103d90cc6830efc5da971d3c35 Mon Sep 17 00:00:00 2001 From: RavenMarQ Date: Thu, 3 Oct 2024 12:42:13 -1000 Subject: [PATCH 06/19] Addressing the Code Review Most of the code review notes have been resolved, except for changing the file.path and descriptions. The notebook is currently being updated, but a preview on the proposed format is provided. --- DESCRIPTION | 5 +- NAMESPACE | 3 +- NEWS.md | 2 +- R/src.R | 85 ++++++++++++++++--------------- vignettes/understand_showcase.Rmd | 28 ++++++++-- 5 files changed, 73 insertions(+), 50 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 745f98c3..7d5ac0d8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,7 +2,7 @@ Package: kaiaulu Type: Package Title: Kaiaulu Version: 0.0.0.9700 -Description: Kaiaulu is an R package and common interface that helps with understanding evolving software development communities, and the artifacts (GitLog, mailing list, files, etc.) which developers collaborate and communicate about. See Paradis et al., (2012) . +Description: Kaiaulu is an R package and common interface that helps with understanding evolving software development communities, and the artifacts (gitlog, mailing list, files, etc.) which developers collaborate and communicate about. See Paradis et al., (2012) . Authors@R: c( person('Carlos', 'Paradis', role = c('aut', 'cre'), email = 'cvas@hawaii.edu', @@ -21,6 +21,7 @@ Authors@R: c( person('Anthony', 'Lau', role = c('ctb')), person('Sean', 'Sunoo', role = c('ctb')), person('Ian Jaymes', 'Iwata', role= c('ctb')) + person('Raven', 'Quiddaoen', role= c('ctb')) ) Maintainer: Carlos Paradis License: MPL-2.0 | file LICENSE @@ -49,4 +50,4 @@ Imports: VignetteBuilder: knitr URL: https://github.com/sailuh/kaiaulu BugReports: https://github.com/sailuh/kaiaulu/issues -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.2 diff --git a/NAMESPACE b/NAMESPACE index 3af220f6..7f6951eb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,7 @@ export(annotate_src_text) export(assign_exact_identity) export(bipartite_graph_projection) +export(build_understand_project) export(commit_message_id_coverage) export(community_oslom) export(convert_pipermail_to_mbox) @@ -132,6 +133,7 @@ export(parse_r_dependencies) export(parse_r_function_definition) export(parse_r_function_dependencies) export(parse_rfile_ast) +export(parse_understand_dependencies) export(query_src_text) export(query_src_text_class_names) export(query_src_text_namespace) @@ -158,7 +160,6 @@ export(transform_r_dependencies_to_network) export(transform_reply_to_bipartite_network) export(transform_temporal_gitlog_to_adsmj) export(transform_und_dependencies_to_network) -export(understand_parse_dependencies) export(weight_scheme_count_deleted_nodes) export(weight_scheme_cum_temporal) export(weight_scheme_pairwise_cum_temporal) diff --git a/NEWS.md b/NEWS.md index cf2de75b..b4afc2ae 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,7 +2,7 @@ __kaiaulu 0.0.0.9700 (in development)__ ========================= ### NEW FEATURES - + * `build_understand_project (project_path , language, output_dir)`, `parse_understand_dependencies(output_dir, parse_type)`, and `transform_understand_dependencies_to_network(parsed, weights)` have been added. These functions handle creating tables from xml data generated from Scitool's Understand. [#308](https://github.com/sailuh/kaiaulu/issues/308) * `refresh_jira_issues()` had been added. It is a wrapper function for the previous downloader and downloads only issues greater than the greatest key already downloaded. * `download_jira_issues()`, `download_jira_issues_by_issue_key()`, and `download_jira_issues_by_date()` has been added. This allows for downloading of Jira issues without the use of JirAgileR [#275](https://github.com/sailuh/kaiaulu/issues/275) and specification of issue Id and created ranges. It also interacts with `parse_jira_latest_date` to implement a refresh capability. * `make_jira_issue()` and `make_jira_issue_tracker()` no longer create fake issues following JirAgileR format, but instead the raw data obtained from JIRA API. This is compatible with the new parser function for JIRA. [#277](https://github.com/sailuh/kaiaulu/issues/277) diff --git a/R/src.R b/R/src.R index aee650dd..aa8ba8b6 100644 --- a/R/src.R +++ b/R/src.R @@ -4,68 +4,69 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at https://mozilla.org/MPL/2.0/. -############## Parsers ############## +############## Understand Project Builder ############## -#' Parse file dependencies from Scitool's Understand +#' Build the Scitool's Understand project folder for analysis of a project +#' This function creates the data file for Understand #' #' @param project_path path to the project folder to analyze -#' @param language the language of the project (language must be supported by Understand) +#' @param language the primary language of the project (language must be supported by Understand) #' @param output_dir path to output directory (formatted output_path/) -#' @param parse_type Type of dependencies to generate into xml (either "file" or "class") #' @export #' @family parsers -understand_parse_dependencies <- function(project_path,language,output_dir="../tmp/", parse_type){ - # Before running, check if parse_type is correct - validInput <- function(input) { - is.character(input) && length(input) == 1 && input %in% c("file", "class") - } - if (!validInput(parse_type)) { - stop("Error: Invalid parse_type provided. Please input either \"file\" or \"class\"") - } +build_understand_project <- function(project_path, language, output_dir = "../tmp/"){ - # Use Understand to parse the code folder. - # Create the variables used in command lines + # Create variables for command line + command <- "und" project_path <- paste0("\"", project_path, "\"") db_dir <- paste0(output_dir, "/Understand.und") - xml_dir <- paste0(db_dir, "/", parse_type, "Dependencies.xml") - command <- "und" args <- c("create", "-db", db_dir, "-languages", language) - # Generate the XML file + # Build the Understand project system2(command, args) args <- c("-db", db_dir, "add", project_path) system2(command, args) args <- c("analyze", db_dir) system2(command, args) + +} + +############## Parsers ############## + +#' Parse dependencies from Scitool's Understand +#' +#' +#' @param understand_dir path to the built Understand project folder (same used in build_understand_project) +#' @param parse_type Type of dependencies to generate into xml (either "file" or "class") +#' @export +#' @family parsers +parse_understand_dependencies <- function(understand_dir="../tmp/", parse_type = C("file", "class")){ + # Before running, check if parse_type is correct + parse_type <- match.arg(parse_type) + + # Use Understand to parse the code folder. + # Create the variables used in command lines + db_dir <- paste0(understand_dir, "/Understand.und") + xml_dir <- paste0(db_dir, "/", parse_type, "Dependencies.xml") + + # Generate the XML file args <- c("export", "-dependencies", parse_type, "cytoscape", xml_dir, db_dir) - system2(command, args) + system2("und", args) # Parse the XML file xml_data <- xmlParse(xml_dir) - xml_nodes <- xmlRoot(xml_data) - xml_nodes <- xmlChildren(xml_nodes) - - # Helper function to search for an attribute - findAtt <- function(search_nodes, att_name) { - found_att <- NA - for (att in search_nodes) { - if (xmlGetAttr(att, "name") == att_name) { - found_att <- xmlGetAttr(att, "value") - break - } - } - return(found_att) - } + xml_nodes <- xmlRoot(xml_data) # The head of the xml + xml_nodes <- xmlChildren(xml_nodes) # Retrieve all the subnodes of the head (the data) # From child nodes- filter for those with name "node" node_elements <- lapply(xml_nodes, function(child) { if (xmlName(child) == "node") { # Extract the id id <- xmlGetAttr(child, "id") - # Find the node.label attribute + # Extract the necessary attributes from the attribute list att_nodes <- xmlChildren(child) - node_label <- findAtt(att_nodes, "node.label") - long_name <- findAtt(att_nodes, "longName") + node_label <- xmlGetAttr(att_nodes[[3]], "value"); + long_name <- xmlGetAttr(att_nodes[[4]], "value"); return(data.table(node_label = node_label, id = id, long_name = long_name)) } else { return(NULL) @@ -73,7 +74,7 @@ understand_parse_dependencies <- function(project_path,language,output_dir="../t }) # Remove NULLs and combine the results into a data frame - node_list <- do.call(rbind, node_elements[!sapply(node_elements, is.null)]) + node_list <- rbindlist(node_elements[!sapply(edge_elements, is.null)], use.names = TRUE, fill = TRUE) # From child nodes- filter for those with name "edge" edge_elements <- lapply(xml_nodes, function(child) { @@ -81,10 +82,10 @@ understand_parse_dependencies <- function(project_path,language,output_dir="../t # Extract the id_from and id_to id_from <- xmlGetAttr(child, "source") id_to <- xmlGetAttr(child, "target") - # Find the dependency kind attribute + # Extract the necessary attributes from the attribute list att_nodes <- xmlChildren(child) - dependency_kind <- findAtt(att_nodes, "dependency kind") - dependency_kind <- unlist(strsplit(dependency_kind, ",\\s*")) + dependency_kind <- xmlGetAttr(att_nodes[[5]], "value"); + dependency_kind <- unlist(stri_split(dependency_kind, regex = ",\\s*")) return(data.table(id_from = id_from, id_to = id_to, dependency_kind = dependency_kind)) } else { return(NULL) @@ -92,7 +93,7 @@ understand_parse_dependencies <- function(project_path,language,output_dir="../t }) # Remove NULLs and combine the results into a data frame - edge_list <- do.call(rbind, edge_elements[!sapply(edge_elements, is.null)]) + edge_list <- rbindlist(edge_elements[!sapply(edge_elements, is.null)], use.names = TRUE, fill = TRUE) # Create a list to return graph <- list(node_list = node_list, edge_list = edge_list) @@ -309,13 +310,13 @@ parse_r_dependencies <- function(folder_path){ #' Transform parsed dependencies into a network #' -#' @param depends_parsed Parsed data from understand_parse_class_dependencies. +#' @param depends_parsed Parsed data from parse_understand_dependencies #' @param weight_types The weight types as defined in Depends. #' Accepts single string and vector input #' #' @export #' @family edgelists -transform_und_dependencies_to_network <- function(parsed, weight_types) { +transform_understand_dependencies_to_network <- function(parsed, weight_types) { nodes <- parsed[["node_list"]] edges <- parsed[["edge_list"]] diff --git a/vignettes/understand_showcase.Rmd b/vignettes/understand_showcase.Rmd index 568da759..5bc81f68 100644 --- a/vignettes/understand_showcase.Rmd +++ b/vignettes/understand_showcase.Rmd @@ -11,17 +11,29 @@ vignette: > --- +# Introduction + +Within a project, we might want to see the dependencies between files and classes. parse_r_dependencies and parse_dependencies uses in-house or Depends software, respectively, to analyze projects. parse_dependencies only provides file dependencies, while parse_r_dependencies provides file and function dependencies for a set of R files. Having Understand installed, we can analyze projects for both file and class dependencies in multiple languages like Java, PHP, HTML, C/C++, Python, Assembly, Ada, etc. + +Here is more information on [Scitool's Understand](https://scitools.com)). + +This notebook demonstrates a sample use case of the two functions that generates tables from the dependency data Understand outputs without opening Understand. + + ```{r warning = FALSE, message = FALSE} rm(list = ls()) -library(kaiaulu) -library(visNetwork) -library(igraph) -library(data.table) +require(kaiaulu) +require(visNetwork) +require(igraph) +require(data.table) ``` # Parse a sample project folder +For sample purposes, we will use the project from [Houari Zegai's Calculator](https://github.com/HouariZegai/Calculator)) saved into a folder called +sample_project. + ```{r} folder_path <- "../tests/sample_project" ``` @@ -29,6 +41,10 @@ folder_path <- "../tests/sample_project" # File Dependencies +To generate a table containing the file dependencies of the project, provide the project_path, the primary project language, which in this case is Java, and the parse type we want: file. There is a fourth parameter named output_dir which allows us to redirect to an output folder so that one can see all the .xml and file data Understand generates (by default, will go to '../tmp'). + +Note the format of the generated table after running the below code. + ```{r} file_dependencies <- understand_parse_dependencies(project_path = folder_path, language = "java", parse_type = "file") head(file_dependencies) @@ -37,6 +53,10 @@ head(file_dependencies) # Class Dependencies +Near-identical to parsing for file dependencies, class dependencies only requires us change the parse_type to class. In the output folder, this .xml will be separate from the one generated for file dependencies: fileDependencies.xml and classDependencies.xml respectively. + +The generated data is in the same format, however note the different types of + ```{r} class_dependencies <- understand_parse_dependencies(project_path = folder_path, language = "java", parse_type = "class") head(class_dependencies) From fbb63203c41f64deb3a3d89529384a25f0652501 Mon Sep 17 00:00:00 2001 From: RavenMarQ Date: Thu, 3 Oct 2024 14:30:04 -1000 Subject: [PATCH 07/19] About-Completed Milestone Having addressing most things, especially using file.path, the functions are functional and the notebook works. --- DESCRIPTION | 2 +- NAMESPACE | 2 +- R/src.R | 40 +++++++++++---------- vignettes/understand_showcase.Rmd | 60 +++++++++++++++++++++++-------- 4 files changed, 69 insertions(+), 35 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 7d5ac0d8..8c78bc1f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -20,7 +20,7 @@ Authors@R: c( person('Nicole', 'Hoess', role = c('ctb')), person('Anthony', 'Lau', role = c('ctb')), person('Sean', 'Sunoo', role = c('ctb')), - person('Ian Jaymes', 'Iwata', role= c('ctb')) + person('Ian Jaymes', 'Iwata', role= c('ctb')), person('Raven', 'Quiddaoen', role= c('ctb')) ) Maintainer: Carlos Paradis diff --git a/NAMESPACE b/NAMESPACE index 7f6951eb..cc0f689e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -159,7 +159,7 @@ export(transform_gitlog_to_temporal_network) export(transform_r_dependencies_to_network) export(transform_reply_to_bipartite_network) export(transform_temporal_gitlog_to_adsmj) -export(transform_und_dependencies_to_network) +export(transform_understand_dependencies_to_network) export(weight_scheme_count_deleted_nodes) export(weight_scheme_cum_temporal) export(weight_scheme_pairwise_cum_temporal) diff --git a/R/src.R b/R/src.R index aa8ba8b6..9dfa5d34 100644 --- a/R/src.R +++ b/R/src.R @@ -6,8 +6,7 @@ ############## Understand Project Builder ############## -#' Build the Scitool's Understand project folder for analysis of a project -#' This function creates the data file for Understand +#' @description This function builds the data files for Understand from the provided project folder, reading from files that are written in the target language into a folder #' #' @param project_path path to the project folder to analyze #' @param language the primary language of the project (language must be supported by Understand) @@ -33,14 +32,13 @@ build_understand_project <- function(project_path, language, output_dir = "../tm ############## Parsers ############## -#' Parse dependencies from Scitool's Understand -#' +#' @description This function parses the data in the Understand build folder to find the parse_type dependencies into a list of two data.tables #' #' @param understand_dir path to the built Understand project folder (same used in build_understand_project) #' @param parse_type Type of dependencies to generate into xml (either "file" or "class") #' @export #' @family parsers -parse_understand_dependencies <- function(understand_dir="../tmp/", parse_type = C("file", "class")){ +parse_understand_dependencies <- function(understand_dir="../tmp/", parse_type = c("file", "class")) { # Before running, check if parse_type is correct parse_type <- match.arg(parse_type) @@ -55,18 +53,18 @@ parse_understand_dependencies <- function(understand_dir="../tmp/", parse_type = # Parse the XML file xml_data <- xmlParse(xml_dir) - xml_nodes <- xmlRoot(xml_data) # The head of the xml - xml_nodes <- xmlChildren(xml_nodes) # Retrieve all the subnodes of the head (the data) + xml_nodes <- xmlRoot(xml_data) + xml_nodes <- xmlChildren(xml_nodes) # From child nodes- filter for those with name "node" node_elements <- lapply(xml_nodes, function(child) { if (xmlName(child) == "node") { # Extract the id id <- xmlGetAttr(child, "id") - # Extract the necessary attributes from the attribute list + # Find the node.label attribute att_nodes <- xmlChildren(child) - node_label <- xmlGetAttr(att_nodes[[3]], "value"); - long_name <- xmlGetAttr(att_nodes[[4]], "value"); + node_label <- xmlGetAttr(att_nodes[[3]], "value") + long_name <- xmlGetAttr(att_nodes[[4]], "value") return(data.table(node_label = node_label, id = id, long_name = long_name)) } else { return(NULL) @@ -74,7 +72,7 @@ parse_understand_dependencies <- function(understand_dir="../tmp/", parse_type = }) # Remove NULLs and combine the results into a data frame - node_list <- rbindlist(node_elements[!sapply(edge_elements, is.null)], use.names = TRUE, fill = TRUE) + node_list <- rbindlist(node_elements[!sapply(node_elements, is.null)], use.names = TRUE, fill = TRUE) # From child nodes- filter for those with name "edge" edge_elements <- lapply(xml_nodes, function(child) { @@ -82,11 +80,15 @@ parse_understand_dependencies <- function(understand_dir="../tmp/", parse_type = # Extract the id_from and id_to id_from <- xmlGetAttr(child, "source") id_to <- xmlGetAttr(child, "target") - # Extract the necessary attributes from the attribute list + # Find the dependency kind attribute att_nodes <- xmlChildren(child) - dependency_kind <- xmlGetAttr(att_nodes[[5]], "value"); - dependency_kind <- unlist(stri_split(dependency_kind, regex = ",\\s*")) - return(data.table(id_from = id_from, id_to = id_to, dependency_kind = dependency_kind)) + dependency_kind <- xmlGetAttr(att_nodes[[5]], "value") + if (!is.null(dependency_kind) && dependency_kind != "") { + dependency_kind <- unlist(stri_split(dependency_kind, regex = ",\\s*")) + return(data.table(id_from = id_from, id_to = id_to, dependency_kind = dependency_kind)) + } else { + return(NULL) + } } else { return(NULL) } @@ -308,7 +310,7 @@ parse_r_dependencies <- function(folder_path){ ############## Network Transform ############## -#' Transform parsed dependencies into a network +#' @description This function transforms parsed Understand data.tables into networks by appending columns to edge_tables and filtering for weight_types. #' #' @param depends_parsed Parsed data from parse_understand_dependencies #' @param weight_types The weight types as defined in Depends. @@ -332,8 +334,10 @@ transform_understand_dependencies_to_network <- function(parsed, weight_types) { # Reorder columns to have label_from and label_to on the left edges <- edges[, .(label_from, label_to, id_from, id_to, dependency_kind)] - # Filter out by weights - edges <- edges[dependency_kind %in% weight_types] + # Filter out by weights if vector provided + if (length(weight_types) > 0) { + edges <- edges[dependency_kind %in% weight_types] + } # If filter removed all edges: if (nrow(edges) == 0) { diff --git a/vignettes/understand_showcase.Rmd b/vignettes/understand_showcase.Rmd index 5bc81f68..68835904 100644 --- a/vignettes/understand_showcase.Rmd +++ b/vignettes/understand_showcase.Rmd @@ -20,10 +20,13 @@ Here is more information on [Scitool's Understand](https://scitools.com)). This notebook demonstrates a sample use case of the two functions that generates tables from the dependency data Understand outputs without opening Understand. +# Required libraries + ```{r warning = FALSE, message = FALSE} rm(list = ls()) require(kaiaulu) require(visNetwork) +require(stringi) require(igraph) require(data.table) ``` @@ -32,54 +35,81 @@ require(data.table) # Parse a sample project folder For sample purposes, we will use the project from [Houari Zegai's Calculator](https://github.com/HouariZegai/Calculator)) saved into a folder called -sample_project. +sample_project. This folder contains all the .java files that the project uses and the ones we will be analyzing. ```{r} folder_path <- "../tests/sample_project" ``` +# Build the Understand project + +Before asking for the generation of data or parsing through it, we must first build the Understand project data. Firstly, provide the project_path and the primary project language, which in this case is Java. There is a third parameter named output_dir where the project will be built at (by default, will go to '../tmp'). After building once, you do not need to call this function again unless you have changed files within the project that would need to be re-analyzed. + +Please remember/save where the folder is generated as it is necessary for understand_parse_dependencies. + +```{r} +build_understand_project(project_path = folder_path, language = "java") +``` + + # File Dependencies -To generate a table containing the file dependencies of the project, provide the project_path, the primary project language, which in this case is Java, and the parse type we want: file. There is a fourth parameter named output_dir which allows us to redirect to an output folder so that one can see all the .xml and file data Understand generates (by default, will go to '../tmp'). +To generate a list containing the node and edge data.tables containing the file dependencies of the project, we will provide it the file path to the built Understand project folder and the parse_type. In this case, we will tell Understand to build the "file" dependencies for us. We will not provide a folder as the default folder to search for is '../tmp' (the same as found in build_understand_project). -Note the format of the generated table after running the below code. +Note the format of the generated data.tables after running the below code. ```{r} -file_dependencies <- understand_parse_dependencies(project_path = folder_path, language = "java", parse_type = "file") +file_dependencies <- parse_understand_dependencies(parse_type = "file") head(file_dependencies) ``` # Class Dependencies -Near-identical to parsing for file dependencies, class dependencies only requires us change the parse_type to class. In the output folder, this .xml will be separate from the one generated for file dependencies: fileDependencies.xml and classDependencies.xml respectively. +Near-identical to parsing for file dependencies, class dependencies only requires us change the parse_type to "class". In the output folder, this .xml will be separate from the one generated for file dependencies: fileDependencies.xml and classDependencies.xml respectively. -The generated data is in the same format, however note the different types of +The generated data is in the same format, however note the different types of dependency types in the edge_table. ```{r} -class_dependencies <- understand_parse_dependencies(project_path = folder_path, language = "java", parse_type = "class") +class_dependencies <- parse_understand_dependencies(parse_type = "class") head(class_dependencies) ``` -## File + +## Transforming to Network (File) + +To filter out edges by their dependency_kind and prepare the generated table from understand_parse_dependencies by appending label_from and label_to columns to the edge_table, we can call transform_und_dependencies_to_network. All we need to do is provide the generated data into parsed and the vector containing the filter weights into weight_types. + +Some valid weight_types that are included for files are: +- Import +- Call +- Create +- Use +- Type GenericArgument + +Apart from having a filtered edge_table, this data can also be used for inputting into graph_from_data_frame and visIgraph to visualize the table as a graph, as shown below where we filter by Call and Type GenericArgument. ```{r} -file_graph <- transform_und_dependencies_to_network(parsed = file_dependencies, weight_types = c("", "")) -project_function_network <- igraph::graph_from_data_frame(d=function_graph[["edgelist"]], +file_graph <- transform_understand_dependencies_to_network(parsed = file_dependencies, weight_types = c("Call", "Type GenericArgument")) +head(file_graph) +project_function_network <- igraph::graph_from_data_frame(d=file_graph[["edge_list"]], directed = TRUE, - vertices = function_graph[["nodes"]]) + vertices = file_graph[["node_list"]]) visIgraph(project_function_network,randomSeed = 1) ``` -## Class +## Transforming to Network (Class) + +The same applies to outputting our class_dependencies, but we can visualize what our class dependency data is using the same parameters sans the parsed data table. ```{r} -class_graph <- transform_und_dependencies_to_network(parsed = class_dependencies, weight_types = c("", "")) -project_function_network <- igraph::graph_from_data_frame(d=file_graph[["edgelist"]], +class_graph <- transform_understand_dependencies_to_network(parsed = class_dependencies, weight_types = c("Call", "Type GenericArgument")) +head(class_graph) +project_function_network <- igraph::graph_from_data_frame(d=class_graph[["edge_list"]], directed = TRUE, - vertices = file_graph[["nodes"]]) + vertices = class_graph[["node_list"]]) visIgraph(project_function_network,randomSeed = 1) ``` From 654cac7ef689be7041ac96ca031e1116b123b92d Mon Sep 17 00:00:00 2001 From: RavenMarQ Date: Tue, 8 Oct 2024 17:23:07 -1000 Subject: [PATCH 08/19] Final Touches Resolving comments received on the notebook. --- DESCRIPTION | 2 +- R/src.R | 125 ++++++++++++++++++------------ vignettes/understand_showcase.Rmd | 35 +++++---- 3 files changed, 95 insertions(+), 67 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 8c78bc1f..41bc3299 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -50,4 +50,4 @@ Imports: VignetteBuilder: knitr URL: https://github.com/sailuh/kaiaulu BugReports: https://github.com/sailuh/kaiaulu/issues -RoxygenNote: 7.3.2 +RoxygenNote: 7.2.3 diff --git a/R/src.R b/R/src.R index 9dfa5d34..80cd9794 100644 --- a/R/src.R +++ b/R/src.R @@ -6,22 +6,22 @@ ############## Understand Project Builder ############## -#' @description This function builds the data files for Understand from the provided project folder, reading from files that are written in the target language into a folder +#' @description This function builds the data files for Understand from the project_path folder, reading from files that are written in the target language into output_dir #' #' @param project_path path to the project folder to analyze #' @param language the primary language of the project (language must be supported by Understand) #' @param output_dir path to output directory (formatted output_path/) #' @export #' @family parsers -build_understand_project <- function(project_path, language, output_dir = "../tmp/"){ - +build_understand_project <- function(project_path, language, output_dir){ # Create variables for command line command <- "und" - project_path <- paste0("\"", project_path, "\"") - db_dir <- paste0(output_dir, "/Understand.und") + project_path <- shQuote(project_path) # Quoting the project path + db_dir <- file.path(output_dir, "Understand.und") args <- c("create", "-db", db_dir, "-languages", language) - # Build the Understand project + # Build the Understand project by parsing through using Understand's und command + # Derived from pg. 352 in https://documentation.scitools.com/pdf/understand.pdf Sept. 2024 Edition system2(command, args) args <- c("-db", db_dir, "add", project_path) system2(command, args) @@ -32,72 +32,108 @@ build_understand_project <- function(project_path, language, output_dir = "../tm ############## Parsers ############## -#' @description This function parses the data in the Understand build folder to find the parse_type dependencies into a list of two data.tables +#' @description This function parses the data in the Understand build folder to export the parse_type dependencies into a network #' -#' @param understand_dir path to the built Understand project folder (same used in build_understand_project) +#' @param understand_dir path to the built Understand project folder used in \code{\link{build_understand_project}} #' @param parse_type Type of dependencies to generate into xml (either "file" or "class") #' @export #' @family parsers -parse_understand_dependencies <- function(understand_dir="../tmp/", parse_type = c("file", "class")) { +parse_understand_dependencies <- function(understand_dir, parse_type = c("file", "class")) { # Before running, check if parse_type is correct parse_type <- match.arg(parse_type) - # Use Understand to parse the code folder. # Create the variables used in command lines - db_dir <- paste0(understand_dir, "/Understand.und") - xml_dir <- paste0(db_dir, "/", parse_type, "Dependencies.xml") + db_dir <- file.path(understand_dir, "Understand.und") + file_name <- paste0(parse_type, "Dependencies.xml") + xml_dir <- file.path(db_dir, file_name) # Generate the XML file + # Derived from pg. 352 in https://documentation.scitools.com/pdf/understand.pdf Sept. 2024 Edition args <- c("export", "-dependencies", parse_type, "cytoscape", xml_dir, db_dir) system2("und", args) + # Generated XML file is assumed to be in this approximate format (regardless of parse_type) using Understand Build 1202 + # + # ... [Irrelevant graph attributes and rdf grandchildren] + # + # + # + # + # + # + # + # + # ... [Other nodes sharing the format] + # + # + # + # + # + # + # + # ... [Other edges sharing the format] + + # Parse the XML file - xml_data <- xmlParse(xml_dir) - xml_nodes <- xmlRoot(xml_data) + xml_data <- xmlParse(xml_dir) # Creates pointer to file + xml_nodes <- xmlRoot(xml_data) # Finds the head: graph xml_nodes <- xmlChildren(xml_nodes) + # xml_nodes now contains the nodes and edges (which were children of graph) and also graph's atts # From child nodes- filter for those with name "node" + # Create a list by iterating through all the children in xml_nodes node_elements <- lapply(xml_nodes, function(child) { - if (xmlName(child) == "node") { - # Extract the id - id <- xmlGetAttr(child, "id") - # Find the node.label attribute - att_nodes <- xmlChildren(child) - node_label <- xmlGetAttr(att_nodes[[3]], "value") - long_name <- xmlGetAttr(att_nodes[[4]], "value") - return(data.table(node_label = node_label, id = id, long_name = long_name)) + if (xmlName(child) == "node") { # We're searching for nodes, not att or edges + id <- xmlGetAttr(child, "id") # Extract the id from the node line + att_nodes <- xmlChildren(child) # To access the atts of the node + node_label <- xmlGetAttr(att_nodes[[3]], "value") # Relevant att is the 3rd line + long_name <- xmlGetAttr(att_nodes[[4]], "value") # Relevant att is the 4th line + return(data.table(node_label = node_label, id = id, long_name = long_name)) # Returns the table containing the filtered node data } else { - return(NULL) + return(NULL) # Return NULL for the entry to be filtered out later } }) - # Remove NULLs and combine the results into a data frame + # Remove NULLs and combine the results from the node_elements list node_list <- rbindlist(node_elements[!sapply(node_elements, is.null)], use.names = TRUE, fill = TRUE) # From child nodes- filter for those with name "edge" + # Create a list by iterating through all the children in xml_nodes edge_elements <- lapply(xml_nodes, function(child) { - if (xmlName(child) == "edge") { - # Extract the id_from and id_to + if (xmlName(child) == "edge") { # We're searching for edges, not att or nodes + # Extract the id_from and id_to from the edge line id_from <- xmlGetAttr(child, "source") id_to <- xmlGetAttr(child, "target") - # Find the dependency kind attribute - att_nodes <- xmlChildren(child) - dependency_kind <- xmlGetAttr(att_nodes[[5]], "value") + att_nodes <- xmlChildren(child) # To access the atts of the edge + dependency_kind <- xmlGetAttr(att_nodes[[5]], "value") # Relevant att is the 5th line + # Error handling for empty and NULL dependency_kind (this is necessary as errors do occur even in the formatted style) + # Code correctly handles all the edges, however produces error if error handling is not included... so... if (!is.null(dependency_kind) && dependency_kind != "") { - dependency_kind <- unlist(stri_split(dependency_kind, regex = ",\\s*")) - return(data.table(id_from = id_from, id_to = id_to, dependency_kind = dependency_kind)) + dependency_kind <- unlist(stri_split(dependency_kind, regex = ",\\s*")) # Separates the string into a vector + return(data.table(id_from = id_from, id_to = id_to, dependency_kind = dependency_kind)) # Returns the table containing the filtered node data } else { - return(NULL) + return(NULL) # Return NULL for the entry to be filtered out later } } else { - return(NULL) + return(NULL) # Return NULL for the entry to be filtered out later } }) - # Remove NULLs and combine the results into a data frame + # Remove NULLs and combine the results from the edge_elements list edge_list <- rbindlist(edge_elements[!sapply(edge_elements, is.null)], use.names = TRUE, fill = TRUE) - # Create a list to return + # Merge edges with nodes to get label_from + edge_list <- merge(edge_list, node_list[, .(id, node_label)], by.x = "id_from", by.y = "id", all.x = TRUE) + setnames(edge_list, "node_label", "label_from") + + # Merge again to get label_to + edge_list <- merge(edge_list, node_list[, .(id, node_label)], by.x = "id_to", by.y = "id", all.x = TRUE) + setnames(edge_list, "node_label", "label_to") + + # Reorder columns to have label_from and label_to on the left + edge_list <- edge_list[, .(label_from, label_to, id_from, id_to, dependency_kind)] + + # Create a list of the network to return graph <- list(node_list = node_list, edge_list = edge_list) return(graph) } @@ -310,12 +346,10 @@ parse_r_dependencies <- function(folder_path){ ############## Network Transform ############## -#' @description This function transforms parsed Understand data.tables into networks by appending columns to edge_tables and filtering for weight_types. -#' -#' @param depends_parsed Parsed data from parse_understand_dependencies -#' @param weight_types The weight types as defined in Depends. -#' Accepts single string and vector input +#' @description This function subsets a parsed table from parse_understand_dependencies #' +#' @param depends_parsed Parsed table from \code{\link{parse_understand_dependencies}} +#' @param weight_types The weight types as defined in Depends. Accepts single string and vector input #' @export #' @family edgelists transform_understand_dependencies_to_network <- function(parsed, weight_types) { @@ -323,17 +357,6 @@ transform_understand_dependencies_to_network <- function(parsed, weight_types) { nodes <- parsed[["node_list"]] edges <- parsed[["edge_list"]] - # Merge edges with nodes to get label_from - edges <- merge(edges, nodes[, .(id, node_label)], by.x = "id_from", by.y = "id", all.x = TRUE) - setnames(edges, "node_label", "label_from") - - # Merge again to get label_to - edges <- merge(edges, nodes[, .(id, node_label)], by.x = "id_to", by.y = "id", all.x = TRUE) - setnames(edges, "node_label", "label_to") - - # Reorder columns to have label_from and label_to on the left - edges <- edges[, .(label_from, label_to, id_from, id_to, dependency_kind)] - # Filter out by weights if vector provided if (length(weight_types) > 0) { edges <- edges[dependency_kind %in% weight_types] diff --git a/vignettes/understand_showcase.Rmd b/vignettes/understand_showcase.Rmd index 68835904..836a3901 100644 --- a/vignettes/understand_showcase.Rmd +++ b/vignettes/understand_showcase.Rmd @@ -13,7 +13,7 @@ vignette: > # Introduction -Within a project, we might want to see the dependencies between files and classes. parse_r_dependencies and parse_dependencies uses in-house or Depends software, respectively, to analyze projects. parse_dependencies only provides file dependencies, while parse_r_dependencies provides file and function dependencies for a set of R files. Having Understand installed, we can analyze projects for both file and class dependencies in multiple languages like Java, PHP, HTML, C/C++, Python, Assembly, Ada, etc. +Within a project, we might want to see the dependencies of files and classes between themselves respectively. parse_r_dependencies and parse_dependencies uses in-house or Depends software respectively to analyze projects. parse_dependencies only provides file dependencies, while parse_r_dependencies provides file and function dependencies for a set of R files. Having Understand installed, we can analyze projects for both file and class dependencies in multiple languages like Java, PHP, HTML, C/C++, Python, Assembly, Ada, etc. Here is more information on [Scitool's Understand](https://scitools.com)). @@ -26,6 +26,7 @@ This notebook demonstrates a sample use case of the two functions that generates rm(list = ls()) require(kaiaulu) require(visNetwork) +require(XML) require(stringi) require(igraph) require(data.table) @@ -34,50 +35,54 @@ require(data.table) # Parse a sample project folder -For sample purposes, we will use the project from [Houari Zegai's Calculator](https://github.com/HouariZegai/Calculator)) saved into a folder called -sample_project. This folder contains all the .java files that the project uses and the ones we will be analyzing. +For sample purposes, we will use the project from [Houari Zegai's Calculator](https://github.com/HouariZegai/Calculator) saved into a folder called sample_project. This folder contains all the .java files that the project uses and the ones we will be analyzing. The directory where we will build the understand analysis base will be saved in a folder called understand ```{r} -folder_path <- "../tests/sample_project" +project_path <- "tests/sample_project" +understand_folder <- "understand" ``` # Build the Understand project -Before asking for the generation of data or parsing through it, we must first build the Understand project data. Firstly, provide the project_path and the primary project language, which in this case is Java. There is a third parameter named output_dir where the project will be built at (by default, will go to '../tmp'). After building once, you do not need to call this function again unless you have changed files within the project that would need to be re-analyzed. +Before asking for the generation of data or parsing through it, we must first build the Understand project data. Firstly, provide the project_path and the primary project language, which in this case is Java. There is a third parameter named output_dir where the project will be built at, so we will give it understand_folder. After building once, you do not need to call this function again unless you have changed files within the project that would need to be re-analyzed. Please remember/save where the folder is generated as it is necessary for understand_parse_dependencies. ```{r} -build_understand_project(project_path = folder_path, language = "java") +build_understand_project(project_path = project_path, language = "java", output_dir = understand_folder) ``` -# File Dependencies +# Generate Dependencies Network +## For Files -To generate a list containing the node and edge data.tables containing the file dependencies of the project, we will provide it the file path to the built Understand project folder and the parse_type. In this case, we will tell Understand to build the "file" dependencies for us. We will not provide a folder as the default folder to search for is '../tmp' (the same as found in build_understand_project). +To generate a list containing the node and edge data.tables containing the file dependencies of the project, we will provide it the file path to the built Understand project folder and the parse_type. In this case, we will tell Understand to build the "file" dependencies for us. Once again, providing understand_folder as the value for understand_dir. Note the format of the generated data.tables after running the below code. ```{r} -file_dependencies <- parse_understand_dependencies(parse_type = "file") -head(file_dependencies) +file_dependencies <- parse_understand_dependencies(understand_dir = understand_folder, parse_type = "file") +head(file_dependencies[["node_list"]]) +head(file_dependencies[["edge_list"]]) ``` -# Class Dependencies +## For Classes Near-identical to parsing for file dependencies, class dependencies only requires us change the parse_type to "class". In the output folder, this .xml will be separate from the one generated for file dependencies: fileDependencies.xml and classDependencies.xml respectively. The generated data is in the same format, however note the different types of dependency types in the edge_table. ```{r} -class_dependencies <- parse_understand_dependencies(parse_type = "class") -head(class_dependencies) +class_dependencies <- parse_understand_dependencies(understand_dir = understand_folder, parse_type = "class") +head(class_dependencies[["node_list"]]) +head(class_dependencies[["edge_list"]]) ``` -## Transforming to Network (File) +# Transforming to Network +## For Files To filter out edges by their dependency_kind and prepare the generated table from understand_parse_dependencies by appending label_from and label_to columns to the edge_table, we can call transform_und_dependencies_to_network. All we need to do is provide the generated data into parsed and the vector containing the filter weights into weight_types. @@ -100,7 +105,7 @@ visIgraph(project_function_network,randomSeed = 1) ``` -## Transforming to Network (Class) +## For Classes The same applies to outputting our class_dependencies, but we can visualize what our class dependency data is using the same parameters sans the parsed data table. From 3f3e425cd78ee8557480637beeb4484a8afda807 Mon Sep 17 00:00:00 2001 From: Nicholas Beydler Date: Wed, 9 Oct 2024 19:52:29 -1000 Subject: [PATCH 09/19] i #308 Refactored understand_showcase.Rmd - Refactored the understand_showcase.Rmd notebook to expect the use of the getters from R/config.R (i #230 contains the getter functions in R/config.R). --- vignettes/understand_showcase.Rmd | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/vignettes/understand_showcase.Rmd b/vignettes/understand_showcase.Rmd index 836a3901..3ce85ef0 100644 --- a/vignettes/understand_showcase.Rmd +++ b/vignettes/understand_showcase.Rmd @@ -32,16 +32,19 @@ require(igraph) require(data.table) ``` - -# Parse a sample project folder - -For sample purposes, we will use the project from [Houari Zegai's Calculator](https://github.com/HouariZegai/Calculator) saved into a folder called sample_project. This folder contains all the .java files that the project uses and the ones we will be analyzing. The directory where we will build the understand analysis base will be saved in a folder called understand +# Project Configuration File ```{r} -project_path <- "tests/sample_project" -understand_folder <- "understand" +conf <- parse_config("conf/kaiaulu.yml") +keep_dependencies_type <- get_understand_keep_dependencies_type(conf) +project_path <- get_understand_project_path(conf) +understand_folder <- get_understand_output_path(conf) +code_language <- get_understand_code_language(conf) ``` +# Parse a sample project folder + +For sample purposes, we will use the project from [Houari Zegai's Calculator](https://github.com/HouariZegai/Calculator) saved into a folder called sample_project. This folder contains all the .java files that the project uses and the ones we will be analyzing. The directory where we will build the understand analysis base will be saved in a folder called understand # Build the Understand project @@ -50,7 +53,7 @@ Before asking for the generation of data or parsing through it, we must first bu Please remember/save where the folder is generated as it is necessary for understand_parse_dependencies. ```{r} -build_understand_project(project_path = project_path, language = "java", output_dir = understand_folder) +build_understand_project(project_path = project_path, language = code_language, output_dir = understand_folder) ``` @@ -96,7 +99,7 @@ Some valid weight_types that are included for files are: Apart from having a filtered edge_table, this data can also be used for inputting into graph_from_data_frame and visIgraph to visualize the table as a graph, as shown below where we filter by Call and Type GenericArgument. ```{r} -file_graph <- transform_understand_dependencies_to_network(parsed = file_dependencies, weight_types = c("Call", "Type GenericArgument")) +file_graph <- transform_understand_dependencies_to_network(parsed = file_dependencies, weight_types = c(keep_dependencies_type[2], keep_dependencies_type[5])) head(file_graph) project_function_network <- igraph::graph_from_data_frame(d=file_graph[["edge_list"]], directed = TRUE, @@ -110,7 +113,7 @@ visIgraph(project_function_network,randomSeed = 1) The same applies to outputting our class_dependencies, but we can visualize what our class dependency data is using the same parameters sans the parsed data table. ```{r} -class_graph <- transform_understand_dependencies_to_network(parsed = class_dependencies, weight_types = c("Call", "Type GenericArgument")) +class_graph <- transform_understand_dependencies_to_network(parsed = class_dependencies, weight_types = c(keep_dependencies_type[2], keep_dependencies_type[5])) head(class_graph) project_function_network <- igraph::graph_from_data_frame(d=class_graph[["edge_list"]], directed = TRUE, From 857d3c4eaeecb405700291dcb343c3fab96c561c Mon Sep 17 00:00:00 2001 From: RavenMarQ Date: Thu, 10 Oct 2024 15:11:29 -1000 Subject: [PATCH 10/19] After Merge with master Branch --- NEWS.md | 2 +- vignettes/understand_showcase.Rmd | 28 ++++++++++++++++++---------- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/NEWS.md b/NEWS.md index 5da1b52c..8e1e9a31 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,7 +2,7 @@ __kaiaulu 0.0.0.9700 (in development)__ ========================= ### NEW FEATURES - + * `build_understand_project (project_path , language, output_dir)`, `parse_understand_dependencies(output_dir, parse_type)`, and `transform_understand_dependencies_to_network(parsed, weights)` have been added. These functions handle creating tables from xml data generated from Scitool's Understand. [#308](https://github.com/sailuh/kaiaulu/issues/308) * `refresh_jira_issues()` had been added. It is a wrapper function for the previous downloader and downloads only issues greater than the greatest key already downloaded. [#275](https://github.com/sailuh/kaiaulu/issues/275) * `download_jira_issues()`, `download_jira_issues_by_issue_key()`, and `download_jira_issues_by_date()` has been added. This allows for downloading of Jira issues without the use of JirAgileR and specification of issue Id and created ranges. It also interacts with `parse_jira_latest_date()` to implement a refresh capability. [#275](https://github.com/sailuh/kaiaulu/issues/275) * `make_jira_issue()` and `make_jira_issue_tracker()` no longer create fake issues following JirAgileR format, but instead the raw data obtained from JIRA API. This is compatible with the new parser function for JIRA. [#277](https://github.com/sailuh/kaiaulu/issues/277) diff --git a/vignettes/understand_showcase.Rmd b/vignettes/understand_showcase.Rmd index 3ce85ef0..f6a4b769 100644 --- a/vignettes/understand_showcase.Rmd +++ b/vignettes/understand_showcase.Rmd @@ -33,6 +33,10 @@ require(data.table) ``` # Project Configuration File +For our variables we will be grabbing them from our configuration file kaiaulu.yml. We will provide a short insight into what we're doing throughout the notebook, but to summarize: +- Our folder and path will be saved into the rawdata and analysis folders respectively +- Within the configuration file, keep_dependencies_type is a list of dependency types that Understand finds for analysis +- And our code_language. Although self-explanatory, Understand supports [many languages](https://support.scitools.com/support/solutions/articles/70000582794-supported-languages)- but for today we will be analyzing a Java project. ```{r} conf <- parse_config("conf/kaiaulu.yml") @@ -42,15 +46,15 @@ understand_folder <- get_understand_output_path(conf) code_language <- get_understand_code_language(conf) ``` -# Parse a sample project folder +## Parse a sample project folder -For sample purposes, we will use the project from [Houari Zegai's Calculator](https://github.com/HouariZegai/Calculator) saved into a folder called sample_project. This folder contains all the .java files that the project uses and the ones we will be analyzing. The directory where we will build the understand analysis base will be saved in a folder called understand +For sample purposes, we will use the project from [Houari Zegai's Calculator](https://github.com/HouariZegai/Calculator) saved into the folder ../../rawdata/kaiaulu/git_rep/understand/ This folder contains all the .java files that the project uses and the ones we will be analyzing. The directory where we will build the understand analysis base will be saved in a folder called understand # Build the Understand project Before asking for the generation of data or parsing through it, we must first build the Understand project data. Firstly, provide the project_path and the primary project language, which in this case is Java. There is a third parameter named output_dir where the project will be built at, so we will give it understand_folder. After building once, you do not need to call this function again unless you have changed files within the project that would need to be re-analyzed. -Please remember/save where the folder is generated as it is necessary for understand_parse_dependencies. +Please remember/save where the folder is generated as it is necessary for understand_parse_dependencies, but for the purpose of this notebook- we will save this output at ../../analysis/kaiaulu/understand ```{r} build_understand_project(project_path = project_path, language = code_language, output_dir = understand_folder) @@ -87,15 +91,19 @@ head(class_dependencies[["edge_list"]]) # Transforming to Network ## For Files -To filter out edges by their dependency_kind and prepare the generated table from understand_parse_dependencies by appending label_from and label_to columns to the edge_table, we can call transform_und_dependencies_to_network. All we need to do is provide the generated data into parsed and the vector containing the filter weights into weight_types. +To filter out edges by their dependency_kind we can call transform_und_dependencies_to_network. Although this function performs as a simple sub-setter (filters for the provided dependency types), as the parsed data is a valid network, this function is kept for consistency across our parser functions. -Some valid weight_types that are included for files are: -- Import -- Call -- Create -- Use -- Type GenericArgument +Back to the function, all we need to do is provide the generated data into parsed and the vector containing the filter weights into weight_types. +Some valid weight_types that are included are: +- Import (1) +- Call (2) +- Create (3) +- Use (4) +- Type GenericArgument (5) + +NOTE: These weight_types are listed in the same order saved into our keep_dependencies_type variable pulled from kaiaulu.yml, and are a list (note the index numbers provided next to each itemized list). Here is a [good place to start](https://documentation.scitools.com/pdf/understand.pdf) for reading on dependencies generated from Understand. + Apart from having a filtered edge_table, this data can also be used for inputting into graph_from_data_frame and visIgraph to visualize the table as a graph, as shown below where we filter by Call and Type GenericArgument. ```{r} From 814b5a4265b439bf4cc4955c9b2b8a2828b905d1 Mon Sep 17 00:00:00 2001 From: RavenMarQ Date: Thu, 10 Oct 2024 15:51:24 -1000 Subject: [PATCH 11/19] Documentation for the three functions --- R/src.R | 9 +++-- man/build_understand_project.Rd | 37 +++++++++++++++++++ man/parse_understand_dependencies.Rd | 35 ++++++++++++++++++ ...form_understand_dependencies_to_network.Rd | 31 ++++++++++++++++ 4 files changed, 109 insertions(+), 3 deletions(-) create mode 100644 man/build_understand_project.Rd create mode 100644 man/parse_understand_dependencies.Rd create mode 100644 man/transform_understand_dependencies_to_network.Rd diff --git a/R/src.R b/R/src.R index 80cd9794..dd8e0e3a 100644 --- a/R/src.R +++ b/R/src.R @@ -5,7 +5,8 @@ # file, You can obtain one at https://mozilla.org/MPL/2.0/. ############## Understand Project Builder ############## - +#' Build Understand Analysis Folder +#' #' @description This function builds the data files for Understand from the project_path folder, reading from files that are written in the target language into output_dir #' #' @param project_path path to the project folder to analyze @@ -31,7 +32,8 @@ build_understand_project <- function(project_path, language, output_dir){ } ############## Parsers ############## - +#' Parse Built Folder to Network +#' #' @description This function parses the data in the Understand build folder to export the parse_type dependencies into a network #' #' @param understand_dir path to the built Understand project folder used in \code{\link{build_understand_project}} @@ -345,7 +347,8 @@ parse_r_dependencies <- function(folder_path){ } ############## Network Transform ############## - +#' Transform Understand Dependencies +#' #' @description This function subsets a parsed table from parse_understand_dependencies #' #' @param depends_parsed Parsed table from \code{\link{parse_understand_dependencies}} diff --git a/man/build_understand_project.Rd b/man/build_understand_project.Rd new file mode 100644 index 00000000..351bbe58 --- /dev/null +++ b/man/build_understand_project.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/src.R +\name{build_understand_project} +\alias{build_understand_project} +\title{Build Understand Analysis Folder} +\usage{ +build_understand_project(project_path, language, output_dir) +} +\arguments{ +\item{project_path}{path to the project folder to analyze} + +\item{language}{the primary language of the project (language must be supported by Understand)} + +\item{output_dir}{path to output directory (formatted output_path/)} +} +\description{ +This function builds the data files for Understand from the project_path folder, reading from files that are written in the target language into output_dir +} +\seealso{ +Other parsers: +\code{\link{parse_bugzilla_perceval_rest_issue_comments}()}, +\code{\link{parse_bugzilla_perceval_traditional_issue_comments}()}, +\code{\link{parse_bugzilla_rest_comments}()}, +\code{\link{parse_bugzilla_rest_issues}()}, +\code{\link{parse_bugzilla_rest_issues_comments}()}, +\code{\link{parse_commit_message_id}()}, +\code{\link{parse_dependencies}()}, +\code{\link{parse_dv8_clusters}()}, +\code{\link{parse_gitlog}()}, +\code{\link{parse_jira}()}, +\code{\link{parse_jira_latest_date}()}, +\code{\link{parse_jira_rss_xml}()}, +\code{\link{parse_mbox}()}, +\code{\link{parse_nvdfeed}()}, +\code{\link{parse_understand_dependencies}()} +} +\concept{parsers} diff --git a/man/parse_understand_dependencies.Rd b/man/parse_understand_dependencies.Rd new file mode 100644 index 00000000..55eb1b46 --- /dev/null +++ b/man/parse_understand_dependencies.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/src.R +\name{parse_understand_dependencies} +\alias{parse_understand_dependencies} +\title{Parse Built Folder to Network} +\usage{ +parse_understand_dependencies(understand_dir, parse_type = c("file", "class")) +} +\arguments{ +\item{understand_dir}{path to the built Understand project folder used in \code{\link{build_understand_project}}} + +\item{parse_type}{Type of dependencies to generate into xml (either "file" or "class")} +} +\description{ +This function parses the data in the Understand build folder to export the parse_type dependencies into a network +} +\seealso{ +Other parsers: +\code{\link{build_understand_project}()}, +\code{\link{parse_bugzilla_perceval_rest_issue_comments}()}, +\code{\link{parse_bugzilla_perceval_traditional_issue_comments}()}, +\code{\link{parse_bugzilla_rest_comments}()}, +\code{\link{parse_bugzilla_rest_issues}()}, +\code{\link{parse_bugzilla_rest_issues_comments}()}, +\code{\link{parse_commit_message_id}()}, +\code{\link{parse_dependencies}()}, +\code{\link{parse_dv8_clusters}()}, +\code{\link{parse_gitlog}()}, +\code{\link{parse_jira}()}, +\code{\link{parse_jira_latest_date}()}, +\code{\link{parse_jira_rss_xml}()}, +\code{\link{parse_mbox}()}, +\code{\link{parse_nvdfeed}()} +} +\concept{parsers} diff --git a/man/transform_understand_dependencies_to_network.Rd b/man/transform_understand_dependencies_to_network.Rd new file mode 100644 index 00000000..c6b4ff35 --- /dev/null +++ b/man/transform_understand_dependencies_to_network.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/src.R +\name{transform_understand_dependencies_to_network} +\alias{transform_understand_dependencies_to_network} +\title{Transform Understand Dependencies} +\usage{ +transform_understand_dependencies_to_network(parsed, weight_types) +} +\arguments{ +\item{weight_types}{The weight types as defined in Depends. Accepts single string and vector input} + +\item{depends_parsed}{Parsed table from \code{\link{parse_understand_dependencies}}} +} +\description{ +This function subsets a parsed table from parse_understand_dependencies +} +\seealso{ +Other edgelists: +\code{\link{transform_commit_message_id_to_network}()}, +\code{\link{transform_cve_cwe_file_to_network}()}, +\code{\link{transform_dependencies_to_network}()}, +\code{\link{transform_dependencies_to_sdsmj}()}, +\code{\link{transform_gitlog_to_bipartite_network}()}, +\code{\link{transform_gitlog_to_entity_bipartite_network}()}, +\code{\link{transform_gitlog_to_entity_temporal_network}()}, +\code{\link{transform_gitlog_to_hdsmj}()}, +\code{\link{transform_gitlog_to_temporal_network}()}, +\code{\link{transform_reply_to_bipartite_network}()}, +\code{\link{transform_temporal_gitlog_to_adsmj}()} +} +\concept{edgelists} From ac36de200cc85b4ba592e5c103d2a9e9d183cb94 Mon Sep 17 00:00:00 2001 From: RavenMarQ Date: Thu, 10 Oct 2024 16:03:46 -1000 Subject: [PATCH 12/19] Fixing mismatched name in Documentation --- R/src.R | 2 +- man/transform_understand_dependencies_to_network.Rd | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/src.R b/R/src.R index dd8e0e3a..34bb420d 100644 --- a/R/src.R +++ b/R/src.R @@ -351,7 +351,7 @@ parse_r_dependencies <- function(folder_path){ #' #' @description This function subsets a parsed table from parse_understand_dependencies #' -#' @param depends_parsed Parsed table from \code{\link{parse_understand_dependencies}} +#' @param parsed Parsed table from \code{\link{parse_understand_dependencies}} #' @param weight_types The weight types as defined in Depends. Accepts single string and vector input #' @export #' @family edgelists diff --git a/man/transform_understand_dependencies_to_network.Rd b/man/transform_understand_dependencies_to_network.Rd index c6b4ff35..b09eb9a3 100644 --- a/man/transform_understand_dependencies_to_network.Rd +++ b/man/transform_understand_dependencies_to_network.Rd @@ -7,9 +7,9 @@ transform_understand_dependencies_to_network(parsed, weight_types) } \arguments{ -\item{weight_types}{The weight types as defined in Depends. Accepts single string and vector input} +\item{parsed}{Parsed table from \code{\link{parse_understand_dependencies}}} -\item{depends_parsed}{Parsed table from \code{\link{parse_understand_dependencies}}} +\item{weight_types}{The weight types as defined in Depends. Accepts single string and vector input} } \description{ This function subsets a parsed table from parse_understand_dependencies From 9f999c13d47ced1c42f1951d0dc405033228e295 Mon Sep 17 00:00:00 2001 From: Nicholas Beydler Date: Thu, 17 Oct 2024 18:12:04 -1000 Subject: [PATCH 13/19] i #308 Fixed Relative Paths in a Notebook - The project configuration sections of a notebook was incorrectly using the project directory (kaiaulu/) as its working directory rather than the directory that it resides in (/vignettes/) as its working directory. --- vignettes/understand_showcase.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/understand_showcase.Rmd b/vignettes/understand_showcase.Rmd index f6a4b769..fb6cc476 100644 --- a/vignettes/understand_showcase.Rmd +++ b/vignettes/understand_showcase.Rmd @@ -39,7 +39,7 @@ For our variables we will be grabbing them from our configuration file kaiaulu.y - And our code_language. Although self-explanatory, Understand supports [many languages](https://support.scitools.com/support/solutions/articles/70000582794-supported-languages)- but for today we will be analyzing a Java project. ```{r} -conf <- parse_config("conf/kaiaulu.yml") +conf <- parse_config("../conf/kaiaulu.yml") keep_dependencies_type <- get_understand_keep_dependencies_type(conf) project_path <- get_understand_project_path(conf) understand_folder <- get_understand_output_path(conf) From 222e3bae8b03882f8d53f5257fddd105a7a24237 Mon Sep 17 00:00:00 2001 From: Carlos Paradis Date: Sun, 8 Dec 2024 02:41:52 -0800 Subject: [PATCH 14/19] Remove unrelated files Signed-off-by: Carlos Paradis --- .idea/.gitignore | 3 --- .idea/kaiaulu.iml | 9 --------- .idea/misc.xml | 6 ------ .idea/modules.xml | 8 -------- .idea/vcs.xml | 6 ------ 5 files changed, 32 deletions(-) delete mode 100644 .idea/.gitignore delete mode 100644 .idea/kaiaulu.iml delete mode 100644 .idea/misc.xml delete mode 100644 .idea/modules.xml delete mode 100644 .idea/vcs.xml diff --git a/.idea/.gitignore b/.idea/.gitignore deleted file mode 100644 index 26d33521..00000000 --- a/.idea/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -# Default ignored files -/shelf/ -/workspace.xml diff --git a/.idea/kaiaulu.iml b/.idea/kaiaulu.iml deleted file mode 100644 index d6ebd480..00000000 --- a/.idea/kaiaulu.iml +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - - - - \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml deleted file mode 100644 index 639900d1..00000000 --- a/.idea/misc.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml deleted file mode 100644 index 59f59099..00000000 --- a/.idea/modules.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - - \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml deleted file mode 100644 index 35eb1ddf..00000000 --- a/.idea/vcs.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file From a683b4c3ff6cb360619f27fbd748464e3713ac4e Mon Sep 17 00:00:00 2001 From: Carlos Paradis Date: Sun, 8 Dec 2024 03:20:01 -0800 Subject: [PATCH 15/19] Add scitools to tools.yml change conf used Changed conf used to helix, the same used as depends_showcase notebook Added scitools understand to tools.yml as not every computer will have und available. Signed-off-by: Carlos Paradis --- R/src.R | 41 +++++++++++++++++++++---------- conf/helix.yml | 15 ++++++++++- conf/kaiaulu.yml | 15 ----------- tools.yml | 2 ++ vignettes/understand_showcase.Rmd | 33 ++++++++++++++++++------- 5 files changed, 68 insertions(+), 38 deletions(-) diff --git a/R/src.R b/R/src.R index 34bb420d..81adc4f2 100644 --- a/R/src.R +++ b/R/src.R @@ -5,42 +5,56 @@ # file, You can obtain one at https://mozilla.org/MPL/2.0/. ############## Understand Project Builder ############## -#' Build Understand Analysis Folder + +#' Build Understand DB #' -#' @description This function builds the data files for Understand from the project_path folder, reading from files that are written in the target language into output_dir +#' This function builds the data files for Understand from the project_path folder, +#' reading from files that are written in the target language into output_dir #' +#' @param scitools_path path to the scitools binary `und` #' @param project_path path to the project folder to analyze #' @param language the primary language of the project (language must be supported by Understand) #' @param output_dir path to output directory (formatted output_path/) +#' +#' @return The output directory where the db will be created, i.e. output_dir parameter. +#' @references See pg. 352 in https://documentation.scitools.com/pdf/understand.pdf Sept. 2024 Edition #' @export #' @family parsers -build_understand_project <- function(project_path, language, output_dir){ +build_understand_project <- function(scitools_path, project_path, language, output_dir){ + + scitools_path <- path.expand(scitools_path) + # Create variables for command line - command <- "und" + command <- scitools_path project_path <- shQuote(project_path) # Quoting the project path db_dir <- file.path(output_dir, "Understand.und") args <- c("create", "-db", db_dir, "-languages", language) # Build the Understand project by parsing through using Understand's und command - # Derived from pg. 352 in https://documentation.scitools.com/pdf/understand.pdf Sept. 2024 Edition - system2(command, args) + build_output <- system2(command, args) args <- c("-db", db_dir, "add", project_path) - system2(command, args) - args <- c("analyze", db_dir) - system2(command, args) + db_output <- system2(command, args) + analyze_output <- args <- c("analyze", db_dir) + output <- system2(command, args) + + return(output_dir) } ############## Parsers ############## -#' Parse Built Folder to Network + +#' Parse XML from Understand DB #' -#' @description This function parses the data in the Understand build folder to export the parse_type dependencies into a network +#' This function parses the data in the Understand build folder to export the parse_type dependencies into a network #' +#' @param scitools_path path to the scitools binary `und` #' @param understand_dir path to the built Understand project folder used in \code{\link{build_understand_project}} #' @param parse_type Type of dependencies to generate into xml (either "file" or "class") #' @export #' @family parsers -parse_understand_dependencies <- function(understand_dir, parse_type = c("file", "class")) { +parse_understand_dependencies <- function(scitools_path, understand_dir, parse_type = c("file", "class")) { + scitools_path <- path.expand(scitools_path) + # Before running, check if parse_type is correct parse_type <- match.arg(parse_type) @@ -52,7 +66,7 @@ parse_understand_dependencies <- function(understand_dir, parse_type = c("file", # Generate the XML file # Derived from pg. 352 in https://documentation.scitools.com/pdf/understand.pdf Sept. 2024 Edition args <- c("export", "-dependencies", parse_type, "cytoscape", xml_dir, db_dir) - system2("und", args) + output <- system2(scitools_path, args) # Generated XML file is assumed to be in this approximate format (regardless of parse_type) using Understand Build 1202 # @@ -347,6 +361,7 @@ parse_r_dependencies <- function(folder_path){ } ############## Network Transform ############## + #' Transform Understand Dependencies #' #' @description This function subsets a parsed table from parse_understand_dependencies diff --git a/conf/helix.yml b/conf/helix.yml index 9acd7804..aedf0a58 100644 --- a/conf/helix.yml +++ b/conf/helix.yml @@ -219,7 +219,20 @@ tool: # project_path: ../../rawdata/kaiaulu/git_repo/understand/ # # Where the output for the understands analysis is stored # output_path: ../../analysis/kaiaulu/understand/ - + understand: + # Accepts one language at a time: ada, assembly, c/c++, c#, fortran, java, jovial, delphi/pascal, python, vhdl, basic, javascript + code_language: java + # Specify which types of Dependencies to keep + keep_dependencies_type: + - Import + - Call + - Create + - Use + - Type GenericArgument + # Where the files to analyze should be stored + project_path: ../../rawdata/helix/git_repo/helix/ + # Where the output for the understands analysis is stored + output_path: ../../analysis/helix/understand/ # Analysis Configuration # analysis: # You can specify the intervals in 2 ways: window, or enumeration diff --git a/conf/kaiaulu.yml b/conf/kaiaulu.yml index 14cc2ed9..800e4b44 100644 --- a/conf/kaiaulu.yml +++ b/conf/kaiaulu.yml @@ -208,21 +208,6 @@ tool: # 3. Use sudo ./gradlew build # 4. After building, locate the engine class files and specify as the class_folder_path: # in this case they are in: /path/to/junit5/analysis/junit-platform-engine/build/classes/java/main/org/junit/platform/engine/ - understand: - # Accepts one language at a time: ada, assembly, c/c++, c#, fortran, java, jovial, delphi/pascal, python, vhdl, basic, javascript - code_language: java - # Specify which types of Dependencies to keep - keep_dependencies_type: - - Import - - Call - - Create - - Use - - Type GenericArgument - # Where the files to analyze should be stored - project_path: ../../rawdata/kaiaulu/git_repo/understand/ - # Where the output for the understands analysis is stored - output_path: ../../analysis/kaiaulu/understand/ - # Analysis Configuration # analysis: diff --git a/tools.yml b/tools.yml index 27951fe6..df8bc87d 100644 --- a/tools.yml +++ b/tools.yml @@ -17,3 +17,5 @@ oslom_undir: ~/OSLOM2/oslom_undir srcml: /usr/local/bin/srcml # pattern4: https://users.encs.concordia.ca/~nikolaos/pattern_detection.html pattern4: ~/Desktop/kaiaulu/tools/pattern4.jar +# Scitools Understand: https://scitools.com/ +scitools: /Applications/Understand.app/Contents/MacOS/und diff --git a/vignettes/understand_showcase.Rmd b/vignettes/understand_showcase.Rmd index fb6cc476..4e6bb2e7 100644 --- a/vignettes/understand_showcase.Rmd +++ b/vignettes/understand_showcase.Rmd @@ -30,6 +30,7 @@ require(XML) require(stringi) require(igraph) require(data.table) +require(gt) ``` # Project Configuration File @@ -39,7 +40,10 @@ For our variables we will be grabbing them from our configuration file kaiaulu.y - And our code_language. Although self-explanatory, Understand supports [many languages](https://support.scitools.com/support/solutions/articles/70000582794-supported-languages)- but for today we will be analyzing a Java project. ```{r} -conf <- parse_config("../conf/kaiaulu.yml") +tool <- parse_config("../tools.yml") +scitools_path <- get_tool_project("scitools", tool) + +conf <- parse_config("../conf/helix.yml") keep_dependencies_type <- get_understand_keep_dependencies_type(conf) project_path <- get_understand_project_path(conf) understand_folder <- get_understand_output_path(conf) @@ -56,8 +60,8 @@ Before asking for the generation of data or parsing through it, we must first bu Please remember/save where the folder is generated as it is necessary for understand_parse_dependencies, but for the purpose of this notebook- we will save this output at ../../analysis/kaiaulu/understand -```{r} -build_understand_project(project_path = project_path, language = code_language, output_dir = understand_folder) +```{r eval = FALSE} +build_understand_project(scitools_path = scitools_path, project_path = project_path, language = code_language, output_dir = understand_folder) ``` @@ -69,9 +73,14 @@ To generate a list containing the node and edge data.tables containing the file Note the format of the generated data.tables after running the below code. ```{r} -file_dependencies <- parse_understand_dependencies(understand_dir = understand_folder, parse_type = "file") -head(file_dependencies[["node_list"]]) -head(file_dependencies[["edge_list"]]) +file_dependencies <- parse_understand_dependencies(scitools_path = scitools_path, understand_dir = understand_folder, parse_type = "file") + +head(file_dependencies[["node_list"]]) %>% + gt(auto_align = FALSE) +``` +```{r} +head(file_dependencies[["edge_list"]]) %>% + gt(auto_align = FALSE) ``` @@ -82,9 +91,15 @@ Near-identical to parsing for file dependencies, class dependencies only require The generated data is in the same format, however note the different types of dependency types in the edge_table. ```{r} -class_dependencies <- parse_understand_dependencies(understand_dir = understand_folder, parse_type = "class") -head(class_dependencies[["node_list"]]) -head(class_dependencies[["edge_list"]]) +class_dependencies <- parse_understand_dependencies(scitools_path = scitools_path, understand_dir = understand_folder, parse_type = "class") + +head(class_dependencies[["node_list"]]) %>% + gt(auto_align = FALSE) +``` + +```{r} +head(class_dependencies[["edge_list"]]) %>% + gt(auto_align = FALSE) ``` From a38e6177e2a2d18fe8b40a5b4407d7959b03c4f8 Mon Sep 17 00:00:00 2001 From: Carlos Paradis Date: Sun, 8 Dec 2024 03:44:30 -0800 Subject: [PATCH 16/19] Makes raw .xml files to be saved on folder This modifies the XML files so they are saved locally. This adds an additional function step, removes the scitools and dependency from parser. The independent parser function means the raw data is not locked behind the db, should a new version make it incompatible to extract in the future. It also facilitates inspection of the raw data should the parser ever break in the future. Signed-off-by: Carlos Paradis --- NAMESPACE | 1 + R/src.R | 47 ++++++++++++----- man/build_understand_project.Rd | 20 +++++-- man/export_understand_dependencies.Rd | 52 +++++++++++++++++++ ...e_bugzilla_perceval_rest_issue_comments.Rd | 5 +- ...lla_perceval_traditional_issue_comments.Rd | 5 +- man/parse_bugzilla_rest_comments.Rd | 5 +- man/parse_bugzilla_rest_issues.Rd | 5 +- man/parse_bugzilla_rest_issues_comments.Rd | 5 +- man/parse_commit_message_id.Rd | 5 +- man/parse_dependencies.Rd | 5 +- man/parse_dv8_clusters.Rd | 5 +- man/parse_gitlog.Rd | 5 +- man/parse_jira.Rd | 5 +- man/parse_jira_latest_date.Rd | 5 +- man/parse_jira_rss_xml.Rd | 5 +- man/parse_mbox.Rd | 5 +- man/parse_nvdfeed.Rd | 5 +- man/parse_understand_dependencies.Rd | 13 ++--- man/transform_commit_message_id_to_network.Rd | 3 +- man/transform_cve_cwe_file_to_network.Rd | 3 +- man/transform_dependencies_to_network.Rd | 3 +- man/transform_dependencies_to_sdsmj.Rd | 3 +- man/transform_gitlog_to_bipartite_network.Rd | 3 +- ...form_gitlog_to_entity_bipartite_network.Rd | 3 +- ...sform_gitlog_to_entity_temporal_network.Rd | 3 +- man/transform_gitlog_to_hdsmj.Rd | 3 +- man/transform_gitlog_to_temporal_network.Rd | 3 +- man/transform_reply_to_bipartite_network.Rd | 3 +- man/transform_temporal_gitlog_to_adsmj.Rd | 3 +- vignettes/understand_showcase.Rmd | 23 ++++++-- 31 files changed, 205 insertions(+), 54 deletions(-) create mode 100644 man/export_understand_dependencies.Rd diff --git a/NAMESPACE b/NAMESPACE index 66ca9927..7321af5b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -43,6 +43,7 @@ export(example_notebook_alternating_function_in_files) export(example_notebook_function_in_code_blocks) export(example_renamed_file) export(example_test_example_src_repo) +export(export_understand_dependencies) export(filter_by_commit_interval) export(filter_by_commit_size) export(filter_by_file_extension) diff --git a/R/src.R b/R/src.R index 81adc4f2..f864591b 100644 --- a/R/src.R +++ b/R/src.R @@ -16,7 +16,7 @@ #' @param language the primary language of the project (language must be supported by Understand) #' @param output_dir path to output directory (formatted output_path/) #' -#' @return The output directory where the db will be created, i.e. output_dir parameter. +#' @return The created Scitools Understand DB path #' @references See pg. 352 in https://documentation.scitools.com/pdf/understand.pdf Sept. 2024 Edition #' @export #' @family parsers @@ -37,37 +37,44 @@ build_understand_project <- function(scitools_path, project_path, language, outp analyze_output <- args <- c("analyze", db_dir) output <- system2(command, args) - return(output_dir) + return(db_dir) } -############## Parsers ############## - -#' Parse XML from Understand DB +#' Extract Understand Dependencies #' -#' This function parses the data in the Understand build folder to export the parse_type dependencies into a network +#' Extract XML dependency files for either class or file granularity from +#' an understand DB. #' #' @param scitools_path path to the scitools binary `und` -#' @param understand_dir path to the built Understand project folder used in \code{\link{build_understand_project}} +#' @param db_path path to the scitools DB (see \code{\link{build_understand_project}}) #' @param parse_type Type of dependencies to generate into xml (either "file" or "class") +#' @param output_filepath path to output XML filepath of dependencies +#' +#' @return The output directory where the db will be created, i.e. output_dir parameter. +#' @references See pg. 352 in https://documentation.scitools.com/pdf/understand.pdf Sept. 2024 Edition #' @export #' @family parsers -parse_understand_dependencies <- function(scitools_path, understand_dir, parse_type = c("file", "class")) { +export_understand_dependencies <- function(scitools_path, db_filepath, parse_type = c("file", "class"), output_filepath){ + scitools_path <- path.expand(scitools_path) # Before running, check if parse_type is correct parse_type <- match.arg(parse_type) # Create the variables used in command lines - db_dir <- file.path(understand_dir, "Understand.und") - file_name <- paste0(parse_type, "Dependencies.xml") - xml_dir <- file.path(db_dir, file_name) + #db_dir <- file.path(understand_dir, "Understand.und") + + #file_name <- paste0(parse_type, "Dependencies.xml") + #xml_dir <- file.path(db_dir, file_name) # Generate the XML file # Derived from pg. 352 in https://documentation.scitools.com/pdf/understand.pdf Sept. 2024 Edition - args <- c("export", "-dependencies", parse_type, "cytoscape", xml_dir, db_dir) + args <- c("export", "-dependencies", parse_type, "cytoscape", output_filepath, db_filepath) output <- system2(scitools_path, args) + return(output_filepath) + # Generated XML file is assumed to be in this approximate format (regardless of parse_type) using Understand Build 1202 # # ... [Irrelevant graph attributes and rdf grandchildren] @@ -90,8 +97,22 @@ parse_understand_dependencies <- function(scitools_path, understand_dir, parse_t # ... [Other edges sharing the format] +} + +############## Parsers ############## + +#' Parse XML from Understand DB +#' +#' This function parses the data in the Understand build folder +#' to export the parse_type dependencies into a network +#' +#' @param dependencies_path path to the exported Understand dependencies file (see \code{\link{export_understand_dependencies}}). +#' @export +#' @family parsers +parse_understand_dependencies <- function(dependencies_path) { + # Parse the XML file - xml_data <- xmlParse(xml_dir) # Creates pointer to file + xml_data <- xmlParse(dependencies_path) # Creates pointer to file xml_nodes <- xmlRoot(xml_data) # Finds the head: graph xml_nodes <- xmlChildren(xml_nodes) # xml_nodes now contains the nodes and edges (which were children of graph) and also graph's atts diff --git a/man/build_understand_project.Rd b/man/build_understand_project.Rd index 351bbe58..c7325dbd 100644 --- a/man/build_understand_project.Rd +++ b/man/build_understand_project.Rd @@ -2,34 +2,44 @@ % Please edit documentation in R/src.R \name{build_understand_project} \alias{build_understand_project} -\title{Build Understand Analysis Folder} +\title{Build Understand DB} \usage{ -build_understand_project(project_path, language, output_dir) +build_understand_project(scitools_path, project_path, language, output_dir) } \arguments{ +\item{scitools_path}{path to the scitools binary `und`} + \item{project_path}{path to the project folder to analyze} \item{language}{the primary language of the project (language must be supported by Understand)} \item{output_dir}{path to output directory (formatted output_path/)} } +\value{ +The created Scitools Understand DB path +} \description{ -This function builds the data files for Understand from the project_path folder, reading from files that are written in the target language into output_dir +This function builds the data files for Understand from the project_path folder, +reading from files that are written in the target language into output_dir +} +\references{ +See pg. 352 in https://documentation.scitools.com/pdf/understand.pdf Sept. 2024 Edition } \seealso{ Other parsers: +\code{\link{export_understand_dependencies}()}, \code{\link{parse_bugzilla_perceval_rest_issue_comments}()}, \code{\link{parse_bugzilla_perceval_traditional_issue_comments}()}, \code{\link{parse_bugzilla_rest_comments}()}, -\code{\link{parse_bugzilla_rest_issues}()}, \code{\link{parse_bugzilla_rest_issues_comments}()}, +\code{\link{parse_bugzilla_rest_issues}()}, \code{\link{parse_commit_message_id}()}, \code{\link{parse_dependencies}()}, \code{\link{parse_dv8_clusters}()}, \code{\link{parse_gitlog}()}, -\code{\link{parse_jira}()}, \code{\link{parse_jira_latest_date}()}, \code{\link{parse_jira_rss_xml}()}, +\code{\link{parse_jira}()}, \code{\link{parse_mbox}()}, \code{\link{parse_nvdfeed}()}, \code{\link{parse_understand_dependencies}()} diff --git a/man/export_understand_dependencies.Rd b/man/export_understand_dependencies.Rd new file mode 100644 index 00000000..f6dc830e --- /dev/null +++ b/man/export_understand_dependencies.Rd @@ -0,0 +1,52 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/src.R +\name{export_understand_dependencies} +\alias{export_understand_dependencies} +\title{Extract Understand Dependencies} +\usage{ +export_understand_dependencies( + scitools_path, + db_filepath, + parse_type = c("file", "class"), + output_filepath +) +} +\arguments{ +\item{scitools_path}{path to the scitools binary `und`} + +\item{parse_type}{Type of dependencies to generate into xml (either "file" or "class")} + +\item{output_filepath}{path to output XML filepath of dependencies} + +\item{db_path}{path to the scitools DB (see \code{\link{build_understand_project}})} +} +\value{ +The output directory where the db will be created, i.e. output_dir parameter. +} +\description{ +Extract XML dependency files for either class or file granularity from +an understand DB. +} +\references{ +See pg. 352 in https://documentation.scitools.com/pdf/understand.pdf Sept. 2024 Edition +} +\seealso{ +Other parsers: +\code{\link{build_understand_project}()}, +\code{\link{parse_bugzilla_perceval_rest_issue_comments}()}, +\code{\link{parse_bugzilla_perceval_traditional_issue_comments}()}, +\code{\link{parse_bugzilla_rest_comments}()}, +\code{\link{parse_bugzilla_rest_issues_comments}()}, +\code{\link{parse_bugzilla_rest_issues}()}, +\code{\link{parse_commit_message_id}()}, +\code{\link{parse_dependencies}()}, +\code{\link{parse_dv8_clusters}()}, +\code{\link{parse_gitlog}()}, +\code{\link{parse_jira_latest_date}()}, +\code{\link{parse_jira_rss_xml}()}, +\code{\link{parse_jira}()}, +\code{\link{parse_mbox}()}, +\code{\link{parse_nvdfeed}()}, +\code{\link{parse_understand_dependencies}()} +} +\concept{parsers} diff --git a/man/parse_bugzilla_perceval_rest_issue_comments.Rd b/man/parse_bugzilla_perceval_rest_issue_comments.Rd index d8788d60..2616e16b 100644 --- a/man/parse_bugzilla_perceval_rest_issue_comments.Rd +++ b/man/parse_bugzilla_perceval_rest_issue_comments.Rd @@ -24,6 +24,8 @@ Parse Bugzilla data obtained from Perceval REST API Bugzilla backend \code{\link{download_bugzilla_perceval_rest_issue_comments}} a donwoloader function download bugzilla data with perceval Other parsers: +\code{\link{build_understand_project}()}, +\code{\link{export_understand_dependencies}()}, \code{\link{parse_bugzilla_perceval_traditional_issue_comments}()}, \code{\link{parse_bugzilla_rest_comments}()}, \code{\link{parse_bugzilla_rest_issues_comments}()}, @@ -36,6 +38,7 @@ Other parsers: \code{\link{parse_jira_rss_xml}()}, \code{\link{parse_jira}()}, \code{\link{parse_mbox}()}, -\code{\link{parse_nvdfeed}()} +\code{\link{parse_nvdfeed}()}, +\code{\link{parse_understand_dependencies}()} } \concept{parsers} diff --git a/man/parse_bugzilla_perceval_traditional_issue_comments.Rd b/man/parse_bugzilla_perceval_traditional_issue_comments.Rd index 06f9397d..8d22b68c 100644 --- a/man/parse_bugzilla_perceval_traditional_issue_comments.Rd +++ b/man/parse_bugzilla_perceval_traditional_issue_comments.Rd @@ -24,6 +24,8 @@ Parse Bugzilla data obtained from Perceval traditional Bugzilla backend \code{\link{download_bugzilla_perceval_traditional_issue_comments}} a downloader function to download bugzilla data with perceval Other parsers: +\code{\link{build_understand_project}()}, +\code{\link{export_understand_dependencies}()}, \code{\link{parse_bugzilla_perceval_rest_issue_comments}()}, \code{\link{parse_bugzilla_rest_comments}()}, \code{\link{parse_bugzilla_rest_issues_comments}()}, @@ -36,6 +38,7 @@ Other parsers: \code{\link{parse_jira_rss_xml}()}, \code{\link{parse_jira}()}, \code{\link{parse_mbox}()}, -\code{\link{parse_nvdfeed}()} +\code{\link{parse_nvdfeed}()}, +\code{\link{parse_understand_dependencies}()} } \concept{parsers} diff --git a/man/parse_bugzilla_rest_comments.Rd b/man/parse_bugzilla_rest_comments.Rd index 8121d873..4c23b9c8 100644 --- a/man/parse_bugzilla_rest_comments.Rd +++ b/man/parse_bugzilla_rest_comments.Rd @@ -17,6 +17,8 @@ Parse Bugzilla comments data obtained from json files from Bugzilla crawler \cod } \seealso{ Other parsers: +\code{\link{build_understand_project}()}, +\code{\link{export_understand_dependencies}()}, \code{\link{parse_bugzilla_perceval_rest_issue_comments}()}, \code{\link{parse_bugzilla_perceval_traditional_issue_comments}()}, \code{\link{parse_bugzilla_rest_issues_comments}()}, @@ -29,6 +31,7 @@ Other parsers: \code{\link{parse_jira_rss_xml}()}, \code{\link{parse_jira}()}, \code{\link{parse_mbox}()}, -\code{\link{parse_nvdfeed}()} +\code{\link{parse_nvdfeed}()}, +\code{\link{parse_understand_dependencies}()} } \concept{parsers} diff --git a/man/parse_bugzilla_rest_issues.Rd b/man/parse_bugzilla_rest_issues.Rd index 69d55e6b..6d1a7086 100644 --- a/man/parse_bugzilla_rest_issues.Rd +++ b/man/parse_bugzilla_rest_issues.Rd @@ -19,6 +19,8 @@ Parse Bugzilla issues data obtained from json files from Bugzilla crawler \code{\link{download_bugzilla_rest_issues_comments}} a downloader function to parse Bugzilla issues and comments data Other parsers: +\code{\link{build_understand_project}()}, +\code{\link{export_understand_dependencies}()}, \code{\link{parse_bugzilla_perceval_rest_issue_comments}()}, \code{\link{parse_bugzilla_perceval_traditional_issue_comments}()}, \code{\link{parse_bugzilla_rest_comments}()}, @@ -31,6 +33,7 @@ Other parsers: \code{\link{parse_jira_rss_xml}()}, \code{\link{parse_jira}()}, \code{\link{parse_mbox}()}, -\code{\link{parse_nvdfeed}()} +\code{\link{parse_nvdfeed}()}, +\code{\link{parse_understand_dependencies}()} } \concept{parsers} diff --git a/man/parse_bugzilla_rest_issues_comments.Rd b/man/parse_bugzilla_rest_issues_comments.Rd index 68939e2c..cda90b6c 100644 --- a/man/parse_bugzilla_rest_issues_comments.Rd +++ b/man/parse_bugzilla_rest_issues_comments.Rd @@ -21,6 +21,8 @@ Parse Bugzilla issues and comments data table \code{\link{download_bugzilla_rest_issues_comments}} a downloader function to parse Bugzilla issues and comments data Other parsers: +\code{\link{build_understand_project}()}, +\code{\link{export_understand_dependencies}()}, \code{\link{parse_bugzilla_perceval_rest_issue_comments}()}, \code{\link{parse_bugzilla_perceval_traditional_issue_comments}()}, \code{\link{parse_bugzilla_rest_comments}()}, @@ -33,6 +35,7 @@ Other parsers: \code{\link{parse_jira_rss_xml}()}, \code{\link{parse_jira}()}, \code{\link{parse_mbox}()}, -\code{\link{parse_nvdfeed}()} +\code{\link{parse_nvdfeed}()}, +\code{\link{parse_understand_dependencies}()} } \concept{parsers} diff --git a/man/parse_commit_message_id.Rd b/man/parse_commit_message_id.Rd index 1fe5fd3f..caef84ed 100644 --- a/man/parse_commit_message_id.Rd +++ b/man/parse_commit_message_id.Rd @@ -16,6 +16,8 @@ Adds a column commit_message_id containing the parsed commit message } \seealso{ Other parsers: +\code{\link{build_understand_project}()}, +\code{\link{export_understand_dependencies}()}, \code{\link{parse_bugzilla_perceval_rest_issue_comments}()}, \code{\link{parse_bugzilla_perceval_traditional_issue_comments}()}, \code{\link{parse_bugzilla_rest_comments}()}, @@ -28,6 +30,7 @@ Other parsers: \code{\link{parse_jira_rss_xml}()}, \code{\link{parse_jira}()}, \code{\link{parse_mbox}()}, -\code{\link{parse_nvdfeed}()} +\code{\link{parse_nvdfeed}()}, +\code{\link{parse_understand_dependencies}()} } \concept{parsers} diff --git a/man/parse_dependencies.Rd b/man/parse_dependencies.Rd index 9cd9d487..062bbf5b 100644 --- a/man/parse_dependencies.Rd +++ b/man/parse_dependencies.Rd @@ -25,6 +25,8 @@ Parse dependencies from Depends } \seealso{ Other parsers: +\code{\link{build_understand_project}()}, +\code{\link{export_understand_dependencies}()}, \code{\link{parse_bugzilla_perceval_rest_issue_comments}()}, \code{\link{parse_bugzilla_perceval_traditional_issue_comments}()}, \code{\link{parse_bugzilla_rest_comments}()}, @@ -37,6 +39,7 @@ Other parsers: \code{\link{parse_jira_rss_xml}()}, \code{\link{parse_jira}()}, \code{\link{parse_mbox}()}, -\code{\link{parse_nvdfeed}()} +\code{\link{parse_nvdfeed}()}, +\code{\link{parse_understand_dependencies}()} } \concept{parsers} diff --git a/man/parse_dv8_clusters.Rd b/man/parse_dv8_clusters.Rd index 474205be..524e4d40 100644 --- a/man/parse_dv8_clusters.Rd +++ b/man/parse_dv8_clusters.Rd @@ -14,6 +14,8 @@ Parses a cluster *-clsx.json into a data table. } \seealso{ Other parsers: +\code{\link{build_understand_project}()}, +\code{\link{export_understand_dependencies}()}, \code{\link{parse_bugzilla_perceval_rest_issue_comments}()}, \code{\link{parse_bugzilla_perceval_traditional_issue_comments}()}, \code{\link{parse_bugzilla_rest_comments}()}, @@ -26,7 +28,8 @@ Other parsers: \code{\link{parse_jira_rss_xml}()}, \code{\link{parse_jira}()}, \code{\link{parse_mbox}()}, -\code{\link{parse_nvdfeed}()} +\code{\link{parse_nvdfeed}()}, +\code{\link{parse_understand_dependencies}()} Other dv8: \code{\link{dependencies_to_sdsmj}()}, diff --git a/man/parse_gitlog.Rd b/man/parse_gitlog.Rd index 5552e83c..5a0c8cad 100644 --- a/man/parse_gitlog.Rd +++ b/man/parse_gitlog.Rd @@ -20,6 +20,8 @@ Parses the `.git` file in a github repository using the Perceval library. } \seealso{ Other parsers: +\code{\link{build_understand_project}()}, +\code{\link{export_understand_dependencies}()}, \code{\link{parse_bugzilla_perceval_rest_issue_comments}()}, \code{\link{parse_bugzilla_perceval_traditional_issue_comments}()}, \code{\link{parse_bugzilla_rest_comments}()}, @@ -32,6 +34,7 @@ Other parsers: \code{\link{parse_jira_rss_xml}()}, \code{\link{parse_jira}()}, \code{\link{parse_mbox}()}, -\code{\link{parse_nvdfeed}()} +\code{\link{parse_nvdfeed}()}, +\code{\link{parse_understand_dependencies}()} } \concept{parsers} diff --git a/man/parse_jira.Rd b/man/parse_jira.Rd index 26b2da1f..b0363181 100644 --- a/man/parse_jira.Rd +++ b/man/parse_jira.Rd @@ -30,6 +30,8 @@ If a field is not present in an issue, then its value will be NA. } \seealso{ Other parsers: +\code{\link{build_understand_project}()}, +\code{\link{export_understand_dependencies}()}, \code{\link{parse_bugzilla_perceval_rest_issue_comments}()}, \code{\link{parse_bugzilla_perceval_traditional_issue_comments}()}, \code{\link{parse_bugzilla_rest_comments}()}, @@ -42,6 +44,7 @@ Other parsers: \code{\link{parse_jira_latest_date}()}, \code{\link{parse_jira_rss_xml}()}, \code{\link{parse_mbox}()}, -\code{\link{parse_nvdfeed}()} +\code{\link{parse_nvdfeed}()}, +\code{\link{parse_understand_dependencies}()} } \concept{parsers} diff --git a/man/parse_jira_latest_date.Rd b/man/parse_jira_latest_date.Rd index f5b8b18f..e92216bd 100644 --- a/man/parse_jira_latest_date.Rd +++ b/man/parse_jira_latest_date.Rd @@ -22,6 +22,8 @@ For example: "SAILUH_1231234_2312413.json". This nomenclature is defined by \cod } \seealso{ Other parsers: +\code{\link{build_understand_project}()}, +\code{\link{export_understand_dependencies}()}, \code{\link{parse_bugzilla_perceval_rest_issue_comments}()}, \code{\link{parse_bugzilla_perceval_traditional_issue_comments}()}, \code{\link{parse_bugzilla_rest_comments}()}, @@ -34,6 +36,7 @@ Other parsers: \code{\link{parse_jira_rss_xml}()}, \code{\link{parse_jira}()}, \code{\link{parse_mbox}()}, -\code{\link{parse_nvdfeed}()} +\code{\link{parse_nvdfeed}()}, +\code{\link{parse_understand_dependencies}()} } \concept{parsers} diff --git a/man/parse_jira_rss_xml.Rd b/man/parse_jira_rss_xml.Rd index 38bb6948..1043f976 100644 --- a/man/parse_jira_rss_xml.Rd +++ b/man/parse_jira_rss_xml.Rd @@ -25,6 +25,8 @@ in IEEE Transactions on Software Engineering, vol. 48, no. 8, pp. 3159-3184, } \seealso{ Other parsers: +\code{\link{build_understand_project}()}, +\code{\link{export_understand_dependencies}()}, \code{\link{parse_bugzilla_perceval_rest_issue_comments}()}, \code{\link{parse_bugzilla_perceval_traditional_issue_comments}()}, \code{\link{parse_bugzilla_rest_comments}()}, @@ -37,6 +39,7 @@ Other parsers: \code{\link{parse_jira_latest_date}()}, \code{\link{parse_jira}()}, \code{\link{parse_mbox}()}, -\code{\link{parse_nvdfeed}()} +\code{\link{parse_nvdfeed}()}, +\code{\link{parse_understand_dependencies}()} } \concept{parsers} diff --git a/man/parse_mbox.Rd b/man/parse_mbox.Rd index f048bd48..8b8ad909 100644 --- a/man/parse_mbox.Rd +++ b/man/parse_mbox.Rd @@ -20,6 +20,8 @@ consistently renamed for clarity. } \seealso{ Other parsers: +\code{\link{build_understand_project}()}, +\code{\link{export_understand_dependencies}()}, \code{\link{parse_bugzilla_perceval_rest_issue_comments}()}, \code{\link{parse_bugzilla_perceval_traditional_issue_comments}()}, \code{\link{parse_bugzilla_rest_comments}()}, @@ -32,6 +34,7 @@ Other parsers: \code{\link{parse_jira_latest_date}()}, \code{\link{parse_jira_rss_xml}()}, \code{\link{parse_jira}()}, -\code{\link{parse_nvdfeed}()} +\code{\link{parse_nvdfeed}()}, +\code{\link{parse_understand_dependencies}()} } \concept{parsers} diff --git a/man/parse_nvdfeed.Rd b/man/parse_nvdfeed.Rd index e861f2a3..7b49c51c 100644 --- a/man/parse_nvdfeed.Rd +++ b/man/parse_nvdfeed.Rd @@ -15,6 +15,8 @@ Parse NVD Feed CVEs, descriptions and CWE ids } \seealso{ Other parsers: +\code{\link{build_understand_project}()}, +\code{\link{export_understand_dependencies}()}, \code{\link{parse_bugzilla_perceval_rest_issue_comments}()}, \code{\link{parse_bugzilla_perceval_traditional_issue_comments}()}, \code{\link{parse_bugzilla_rest_comments}()}, @@ -27,6 +29,7 @@ Other parsers: \code{\link{parse_jira_latest_date}()}, \code{\link{parse_jira_rss_xml}()}, \code{\link{parse_jira}()}, -\code{\link{parse_mbox}()} +\code{\link{parse_mbox}()}, +\code{\link{parse_understand_dependencies}()} } \concept{parsers} diff --git a/man/parse_understand_dependencies.Rd b/man/parse_understand_dependencies.Rd index 55eb1b46..881fecce 100644 --- a/man/parse_understand_dependencies.Rd +++ b/man/parse_understand_dependencies.Rd @@ -2,14 +2,14 @@ % Please edit documentation in R/src.R \name{parse_understand_dependencies} \alias{parse_understand_dependencies} -\title{Parse Built Folder to Network} +\title{Parse XML from Understand DB} \usage{ -parse_understand_dependencies(understand_dir, parse_type = c("file", "class")) +parse_understand_dependencies(scitools_path, understand_dir) } \arguments{ -\item{understand_dir}{path to the built Understand project folder used in \code{\link{build_understand_project}}} +\item{scitools_path}{path to the scitools binary `und`} -\item{parse_type}{Type of dependencies to generate into xml (either "file" or "class")} +\item{understand_dir}{path to the built Understand project folder used in \code{\link{build_understand_project}}} } \description{ This function parses the data in the Understand build folder to export the parse_type dependencies into a network @@ -17,18 +17,19 @@ This function parses the data in the Understand build folder to export the parse \seealso{ Other parsers: \code{\link{build_understand_project}()}, +\code{\link{export_understand_dependencies}()}, \code{\link{parse_bugzilla_perceval_rest_issue_comments}()}, \code{\link{parse_bugzilla_perceval_traditional_issue_comments}()}, \code{\link{parse_bugzilla_rest_comments}()}, -\code{\link{parse_bugzilla_rest_issues}()}, \code{\link{parse_bugzilla_rest_issues_comments}()}, +\code{\link{parse_bugzilla_rest_issues}()}, \code{\link{parse_commit_message_id}()}, \code{\link{parse_dependencies}()}, \code{\link{parse_dv8_clusters}()}, \code{\link{parse_gitlog}()}, -\code{\link{parse_jira}()}, \code{\link{parse_jira_latest_date}()}, \code{\link{parse_jira_rss_xml}()}, +\code{\link{parse_jira}()}, \code{\link{parse_mbox}()}, \code{\link{parse_nvdfeed}()} } diff --git a/man/transform_commit_message_id_to_network.Rd b/man/transform_commit_message_id_to_network.Rd index 04af4987..e66752bd 100644 --- a/man/transform_commit_message_id_to_network.Rd +++ b/man/transform_commit_message_id_to_network.Rd @@ -25,6 +25,7 @@ Other edgelists: \code{\link{transform_gitlog_to_hdsmj}()}, \code{\link{transform_gitlog_to_temporal_network}()}, \code{\link{transform_reply_to_bipartite_network}()}, -\code{\link{transform_temporal_gitlog_to_adsmj}()} +\code{\link{transform_temporal_gitlog_to_adsmj}()}, +\code{\link{transform_understand_dependencies_to_network}()} } \concept{edgelists} diff --git a/man/transform_cve_cwe_file_to_network.Rd b/man/transform_cve_cwe_file_to_network.Rd index cea7aac3..9cbc0b46 100644 --- a/man/transform_cve_cwe_file_to_network.Rd +++ b/man/transform_cve_cwe_file_to_network.Rd @@ -25,6 +25,7 @@ Other edgelists: \code{\link{transform_gitlog_to_hdsmj}()}, \code{\link{transform_gitlog_to_temporal_network}()}, \code{\link{transform_reply_to_bipartite_network}()}, -\code{\link{transform_temporal_gitlog_to_adsmj}()} +\code{\link{transform_temporal_gitlog_to_adsmj}()}, +\code{\link{transform_understand_dependencies_to_network}()} } \concept{edgelists} diff --git a/man/transform_dependencies_to_network.Rd b/man/transform_dependencies_to_network.Rd index 7d051f76..79f10ca4 100644 --- a/man/transform_dependencies_to_network.Rd +++ b/man/transform_dependencies_to_network.Rd @@ -25,6 +25,7 @@ Other edgelists: \code{\link{transform_gitlog_to_hdsmj}()}, \code{\link{transform_gitlog_to_temporal_network}()}, \code{\link{transform_reply_to_bipartite_network}()}, -\code{\link{transform_temporal_gitlog_to_adsmj}()} +\code{\link{transform_temporal_gitlog_to_adsmj}()}, +\code{\link{transform_understand_dependencies_to_network}()} } \concept{edgelists} diff --git a/man/transform_dependencies_to_sdsmj.Rd b/man/transform_dependencies_to_sdsmj.Rd index a9eeb0fb..6d477988 100644 --- a/man/transform_dependencies_to_sdsmj.Rd +++ b/man/transform_dependencies_to_sdsmj.Rd @@ -39,7 +39,8 @@ Other edgelists: \code{\link{transform_gitlog_to_hdsmj}()}, \code{\link{transform_gitlog_to_temporal_network}()}, \code{\link{transform_reply_to_bipartite_network}()}, -\code{\link{transform_temporal_gitlog_to_adsmj}()} +\code{\link{transform_temporal_gitlog_to_adsmj}()}, +\code{\link{transform_understand_dependencies_to_network}()} Other dv8: \code{\link{dependencies_to_sdsmj}()}, diff --git a/man/transform_gitlog_to_bipartite_network.Rd b/man/transform_gitlog_to_bipartite_network.Rd index 052de1be..1bc147b7 100644 --- a/man/transform_gitlog_to_bipartite_network.Rd +++ b/man/transform_gitlog_to_bipartite_network.Rd @@ -28,6 +28,7 @@ Other edgelists: \code{\link{transform_gitlog_to_hdsmj}()}, \code{\link{transform_gitlog_to_temporal_network}()}, \code{\link{transform_reply_to_bipartite_network}()}, -\code{\link{transform_temporal_gitlog_to_adsmj}()} +\code{\link{transform_temporal_gitlog_to_adsmj}()}, +\code{\link{transform_understand_dependencies_to_network}()} } \concept{edgelists} diff --git a/man/transform_gitlog_to_entity_bipartite_network.Rd b/man/transform_gitlog_to_entity_bipartite_network.Rd index f95ea70b..9079a048 100644 --- a/man/transform_gitlog_to_entity_bipartite_network.Rd +++ b/man/transform_gitlog_to_entity_bipartite_network.Rd @@ -28,6 +28,7 @@ Other edgelists: \code{\link{transform_gitlog_to_hdsmj}()}, \code{\link{transform_gitlog_to_temporal_network}()}, \code{\link{transform_reply_to_bipartite_network}()}, -\code{\link{transform_temporal_gitlog_to_adsmj}()} +\code{\link{transform_temporal_gitlog_to_adsmj}()}, +\code{\link{transform_understand_dependencies_to_network}()} } \concept{edgelists} diff --git a/man/transform_gitlog_to_entity_temporal_network.Rd b/man/transform_gitlog_to_entity_temporal_network.Rd index 9e02d4d5..5267c903 100644 --- a/man/transform_gitlog_to_entity_temporal_network.Rd +++ b/man/transform_gitlog_to_entity_temporal_network.Rd @@ -45,6 +45,7 @@ Other edgelists: \code{\link{transform_gitlog_to_hdsmj}()}, \code{\link{transform_gitlog_to_temporal_network}()}, \code{\link{transform_reply_to_bipartite_network}()}, -\code{\link{transform_temporal_gitlog_to_adsmj}()} +\code{\link{transform_temporal_gitlog_to_adsmj}()}, +\code{\link{transform_understand_dependencies_to_network}()} } \concept{edgelists} diff --git a/man/transform_gitlog_to_hdsmj.Rd b/man/transform_gitlog_to_hdsmj.Rd index 4c766638..4072dbde 100644 --- a/man/transform_gitlog_to_hdsmj.Rd +++ b/man/transform_gitlog_to_hdsmj.Rd @@ -37,7 +37,8 @@ Other edgelists: \code{\link{transform_gitlog_to_entity_temporal_network}()}, \code{\link{transform_gitlog_to_temporal_network}()}, \code{\link{transform_reply_to_bipartite_network}()}, -\code{\link{transform_temporal_gitlog_to_adsmj}()} +\code{\link{transform_temporal_gitlog_to_adsmj}()}, +\code{\link{transform_understand_dependencies_to_network}()} Other dv8: \code{\link{dependencies_to_sdsmj}()}, diff --git a/man/transform_gitlog_to_temporal_network.Rd b/man/transform_gitlog_to_temporal_network.Rd index 67219e27..109c7e38 100644 --- a/man/transform_gitlog_to_temporal_network.Rd +++ b/man/transform_gitlog_to_temporal_network.Rd @@ -47,6 +47,7 @@ Other edgelists: \code{\link{transform_gitlog_to_entity_temporal_network}()}, \code{\link{transform_gitlog_to_hdsmj}()}, \code{\link{transform_reply_to_bipartite_network}()}, -\code{\link{transform_temporal_gitlog_to_adsmj}()} +\code{\link{transform_temporal_gitlog_to_adsmj}()}, +\code{\link{transform_understand_dependencies_to_network}()} } \concept{edgelists} diff --git a/man/transform_reply_to_bipartite_network.Rd b/man/transform_reply_to_bipartite_network.Rd index 84168bdf..a6011f65 100644 --- a/man/transform_reply_to_bipartite_network.Rd +++ b/man/transform_reply_to_bipartite_network.Rd @@ -23,6 +23,7 @@ Other edgelists: \code{\link{transform_gitlog_to_entity_temporal_network}()}, \code{\link{transform_gitlog_to_hdsmj}()}, \code{\link{transform_gitlog_to_temporal_network}()}, -\code{\link{transform_temporal_gitlog_to_adsmj}()} +\code{\link{transform_temporal_gitlog_to_adsmj}()}, +\code{\link{transform_understand_dependencies_to_network}()} } \concept{edgelists} diff --git a/man/transform_temporal_gitlog_to_adsmj.Rd b/man/transform_temporal_gitlog_to_adsmj.Rd index 3f968ac4..27bd6aad 100644 --- a/man/transform_temporal_gitlog_to_adsmj.Rd +++ b/man/transform_temporal_gitlog_to_adsmj.Rd @@ -35,7 +35,8 @@ Other edgelists: \code{\link{transform_gitlog_to_entity_temporal_network}()}, \code{\link{transform_gitlog_to_hdsmj}()}, \code{\link{transform_gitlog_to_temporal_network}()}, -\code{\link{transform_reply_to_bipartite_network}()} +\code{\link{transform_reply_to_bipartite_network}()}, +\code{\link{transform_understand_dependencies_to_network}()} Other dv8: \code{\link{dependencies_to_sdsmj}()}, diff --git a/vignettes/understand_showcase.Rmd b/vignettes/understand_showcase.Rmd index 4e6bb2e7..3ecc31df 100644 --- a/vignettes/understand_showcase.Rmd +++ b/vignettes/understand_showcase.Rmd @@ -46,8 +46,15 @@ scitools_path <- get_tool_project("scitools", tool) conf <- parse_config("../conf/helix.yml") keep_dependencies_type <- get_understand_keep_dependencies_type(conf) project_path <- get_understand_project_path(conf) + +# Scitools understand_folder <- get_understand_output_path(conf) code_language <- get_understand_code_language(conf) + +db_path <- stringi::stri_c(understand_folder,"Understand.und") + +file_dependencies_path <- stringi::stri_c(understand_folder,"file_dependencies.xml") +class_dependencies_path <- stringi::stri_c(understand_folder,"class_dependencies.xml") ``` ## Parse a sample project folder @@ -61,23 +68,26 @@ Before asking for the generation of data or parsing through it, we must first bu Please remember/save where the folder is generated as it is necessary for understand_parse_dependencies, but for the purpose of this notebook- we will save this output at ../../analysis/kaiaulu/understand ```{r eval = FALSE} -build_understand_project(scitools_path = scitools_path, project_path = project_path, language = code_language, output_dir = understand_folder) +db_path <- build_understand_project(scitools_path = scitools_path, project_path = project_path, language = code_language, output_dir = understand_folder) ``` - # Generate Dependencies Network -## For Files + +## Files To generate a list containing the node and edge data.tables containing the file dependencies of the project, we will provide it the file path to the built Understand project folder and the parse_type. In this case, we will tell Understand to build the "file" dependencies for us. Once again, providing understand_folder as the value for understand_dir. Note the format of the generated data.tables after running the below code. ```{r} -file_dependencies <- parse_understand_dependencies(scitools_path = scitools_path, understand_dir = understand_folder, parse_type = "file") +file_dependencies_path <- export_understand_dependencies(scitools_path = scitools_path, db_filepath = db_path, parse_type = "file", output_filepath = file_dependencies_path) + +file_dependencies <- parse_understand_dependencies(dependencies_path = file_dependencies_path) head(file_dependencies[["node_list"]]) %>% gt(auto_align = FALSE) ``` + ```{r} head(file_dependencies[["edge_list"]]) %>% gt(auto_align = FALSE) @@ -91,8 +101,11 @@ Near-identical to parsing for file dependencies, class dependencies only require The generated data is in the same format, however note the different types of dependency types in the edge_table. ```{r} -class_dependencies <- parse_understand_dependencies(scitools_path = scitools_path, understand_dir = understand_folder, parse_type = "class") +class_dependencies_path <- export_understand_dependencies(scitools_path = scitools_path, db_filepath = db_path, parse_type = "file", output_filepath = class_dependencies_path) + +class_dependencies <- parse_understand_dependencies(dependencies_path = class_dependencies_path) + head(class_dependencies[["node_list"]]) %>% gt(auto_align = FALSE) ``` From 2b1e6500261420498c9b974d63d05c1b9a91d84a Mon Sep 17 00:00:00 2001 From: Carlos Paradis Date: Sun, 8 Dec 2024 04:09:09 -0800 Subject: [PATCH 17/19] Fix network display duplicate error Concatenate labels for node and edges Signed-off-by: Carlos Paradis --- R/src.R | 8 ++++++++ vignettes/understand_showcase.Rmd | 16 +++++++++++----- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/R/src.R b/R/src.R index f864591b..c9ee08cd 100644 --- a/R/src.R +++ b/R/src.R @@ -396,6 +396,14 @@ transform_understand_dependencies_to_network <- function(parsed, weight_types) { nodes <- parsed[["node_list"]] edges <- parsed[["edge_list"]] + # Create an ID column, as the file name in a label may occur + # again in other parts of the code. + + nodes$node_label <- stringi::stri_c(nodes$node_label,"|",nodes$id) + + edges$label_from <- stringi::stri_c(edges$label_from,"|",edges$id_from) + edges$label_to <- stringi::stri_c(edges$label_to,"|",edges$id_to) + # Filter out by weights if vector provided if (length(weight_types) > 0) { edges <- edges[dependency_kind %in% weight_types] diff --git a/vignettes/understand_showcase.Rmd b/vignettes/understand_showcase.Rmd index 3ecc31df..6a0a1b49 100644 --- a/vignettes/understand_showcase.Rmd +++ b/vignettes/understand_showcase.Rmd @@ -79,9 +79,12 @@ To generate a list containing the node and edge data.tables containing the file Note the format of the generated data.tables after running the below code. -```{r} +```{r eval = FALSE} file_dependencies_path <- export_understand_dependencies(scitools_path = scitools_path, db_filepath = db_path, parse_type = "file", output_filepath = file_dependencies_path) +``` + +```{r} file_dependencies <- parse_understand_dependencies(dependencies_path = file_dependencies_path) head(file_dependencies[["node_list"]]) %>% @@ -100,10 +103,12 @@ Near-identical to parsing for file dependencies, class dependencies only require The generated data is in the same format, however note the different types of dependency types in the edge_table. -```{r} - +```{r eval = FALSE} class_dependencies_path <- export_understand_dependencies(scitools_path = scitools_path, db_filepath = db_path, parse_type = "file", output_filepath = class_dependencies_path) +``` + +```{r} class_dependencies <- parse_understand_dependencies(dependencies_path = class_dependencies_path) head(class_dependencies[["node_list"]]) %>% @@ -136,7 +141,8 @@ Apart from having a filtered edge_table, this data can also be used for inputtin ```{r} file_graph <- transform_understand_dependencies_to_network(parsed = file_dependencies, weight_types = c(keep_dependencies_type[2], keep_dependencies_type[5])) -head(file_graph) + + project_function_network <- igraph::graph_from_data_frame(d=file_graph[["edge_list"]], directed = TRUE, vertices = file_graph[["node_list"]]) @@ -150,7 +156,7 @@ The same applies to outputting our class_dependencies, but we can visualize what ```{r} class_graph <- transform_understand_dependencies_to_network(parsed = class_dependencies, weight_types = c(keep_dependencies_type[2], keep_dependencies_type[5])) -head(class_graph) + project_function_network <- igraph::graph_from_data_frame(d=class_graph[["edge_list"]], directed = TRUE, vertices = class_graph[["node_list"]]) From 94933198d05fd210e9ded67a8ae4bb2290ce0a3a Mon Sep 17 00:00:00 2001 From: Carlos Paradis Date: Sun, 8 Dec 2024 04:52:41 -0800 Subject: [PATCH 18/19] Passes checks Passes unit tests, notebooks, etc. Signed-off-by: Carlos Paradis --- R/src.R | 14 +++---- _pkgdown.yml | 4 ++ man/build_understand_project.Rd | 5 +-- man/export_understand_dependencies.Rd | 4 +- man/parse_understand_dependencies.Rd | 10 ++--- vignettes/understand_showcase.Rmd | 60 +++++++-------------------- 6 files changed, 34 insertions(+), 63 deletions(-) diff --git a/R/src.R b/R/src.R index c9ee08cd..ea3b3465 100644 --- a/R/src.R +++ b/R/src.R @@ -8,11 +8,10 @@ #' Build Understand DB #' -#' This function builds the data files for Understand from the project_path folder, -#' reading from files that are written in the target language into output_dir +#' Uses Scitools Understand to create a source code project Und Database. #' #' @param scitools_path path to the scitools binary `und` -#' @param project_path path to the project folder to analyze +#' @param project_path path to the project source code folder to create the Understand DB. #' @param language the primary language of the project (language must be supported by Understand) #' @param output_dir path to output directory (formatted output_path/) #' @@ -43,13 +42,13 @@ build_understand_project <- function(scitools_path, project_path, language, outp #' Extract Understand Dependencies #' -#' Extract XML dependency files for either class or file granularity from +#' Extract the XML dependency file for either class or file granularity from #' an understand DB. #' #' @param scitools_path path to the scitools binary `und` #' @param db_path path to the scitools DB (see \code{\link{build_understand_project}}) #' @param parse_type Type of dependencies to generate into xml (either "file" or "class") -#' @param output_filepath path to output XML filepath of dependencies +#' @param output_filepath path to the output XML filepath of dependencies #' #' @return The output directory where the db will be created, i.e. output_dir parameter. #' @references See pg. 352 in https://documentation.scitools.com/pdf/understand.pdf Sept. 2024 Edition @@ -101,10 +100,9 @@ export_understand_dependencies <- function(scitools_path, db_filepath, parse_typ ############## Parsers ############## -#' Parse XML from Understand DB +#' Parse Scitools Understand Dependencies XML #' -#' This function parses the data in the Understand build folder -#' to export the parse_type dependencies into a network +#' Parses either a file or class scitools understand dependency XML to table. #' #' @param dependencies_path path to the exported Understand dependencies file (see \code{\link{export_understand_dependencies}}). #' @export diff --git a/_pkgdown.yml b/_pkgdown.yml index 5df12100..aede9d0c 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -25,8 +25,12 @@ reference: Notebooks for examples. - contents: - parse_dependencies + - build_understand_project + - export_understand_dependencies + - parse_understand_dependencies - parse_r_dependencies - transform_dependencies_to_network + - transform_understand_dependencies_to_network - transform_r_dependencies_to_network - subtitle: __Gang of Four Patterns__ desc: > diff --git a/man/build_understand_project.Rd b/man/build_understand_project.Rd index c7325dbd..adb43563 100644 --- a/man/build_understand_project.Rd +++ b/man/build_understand_project.Rd @@ -9,7 +9,7 @@ build_understand_project(scitools_path, project_path, language, output_dir) \arguments{ \item{scitools_path}{path to the scitools binary `und`} -\item{project_path}{path to the project folder to analyze} +\item{project_path}{path to the project source code folder to create the Understand DB.} \item{language}{the primary language of the project (language must be supported by Understand)} @@ -19,8 +19,7 @@ build_understand_project(scitools_path, project_path, language, output_dir) The created Scitools Understand DB path } \description{ -This function builds the data files for Understand from the project_path folder, -reading from files that are written in the target language into output_dir +Uses Scitools Understand to create a source code project Und Database. } \references{ See pg. 352 in https://documentation.scitools.com/pdf/understand.pdf Sept. 2024 Edition diff --git a/man/export_understand_dependencies.Rd b/man/export_understand_dependencies.Rd index f6dc830e..a14503df 100644 --- a/man/export_understand_dependencies.Rd +++ b/man/export_understand_dependencies.Rd @@ -16,7 +16,7 @@ export_understand_dependencies( \item{parse_type}{Type of dependencies to generate into xml (either "file" or "class")} -\item{output_filepath}{path to output XML filepath of dependencies} +\item{output_filepath}{path to the output XML filepath of dependencies} \item{db_path}{path to the scitools DB (see \code{\link{build_understand_project}})} } @@ -24,7 +24,7 @@ export_understand_dependencies( The output directory where the db will be created, i.e. output_dir parameter. } \description{ -Extract XML dependency files for either class or file granularity from +Extract the XML dependency file for either class or file granularity from an understand DB. } \references{ diff --git a/man/parse_understand_dependencies.Rd b/man/parse_understand_dependencies.Rd index 881fecce..4411d5dd 100644 --- a/man/parse_understand_dependencies.Rd +++ b/man/parse_understand_dependencies.Rd @@ -2,17 +2,15 @@ % Please edit documentation in R/src.R \name{parse_understand_dependencies} \alias{parse_understand_dependencies} -\title{Parse XML from Understand DB} +\title{Parse Scitools Understand Dependencies XML} \usage{ -parse_understand_dependencies(scitools_path, understand_dir) +parse_understand_dependencies(dependencies_path) } \arguments{ -\item{scitools_path}{path to the scitools binary `und`} - -\item{understand_dir}{path to the built Understand project folder used in \code{\link{build_understand_project}}} +\item{dependencies_path}{path to the exported Understand dependencies file (see \code{\link{export_understand_dependencies}}).} } \description{ -This function parses the data in the Understand build folder to export the parse_type dependencies into a network +Parses either a file or class scitools understand dependency XML to table. } \seealso{ Other parsers: diff --git a/vignettes/understand_showcase.Rmd b/vignettes/understand_showcase.Rmd index 6a0a1b49..f8f05314 100644 --- a/vignettes/understand_showcase.Rmd +++ b/vignettes/understand_showcase.Rmd @@ -13,14 +13,8 @@ vignette: > # Introduction -Within a project, we might want to see the dependencies of files and classes between themselves respectively. parse_r_dependencies and parse_dependencies uses in-house or Depends software respectively to analyze projects. parse_dependencies only provides file dependencies, while parse_r_dependencies provides file and function dependencies for a set of R files. Having Understand installed, we can analyze projects for both file and class dependencies in multiple languages like Java, PHP, HTML, C/C++, Python, Assembly, Ada, etc. +Within a project, we might want to see the in-between file or in-between class dependencies. This notebook showcases how to obtain either dependencies using [Scitool's Understand](https://scitools.com)). Note you will need a license from Scitools to use this Notebook. -Here is more information on [Scitool's Understand](https://scitools.com)). - -This notebook demonstrates a sample use case of the two functions that generates tables from the dependency data Understand outputs without opening Understand. - - -# Required libraries ```{r warning = FALSE, message = FALSE} rm(list = ls()) @@ -34,10 +28,8 @@ require(gt) ``` # Project Configuration File -For our variables we will be grabbing them from our configuration file kaiaulu.yml. We will provide a short insight into what we're doing throughout the notebook, but to summarize: -- Our folder and path will be saved into the rawdata and analysis folders respectively -- Within the configuration file, keep_dependencies_type is a list of dependency types that Understand finds for analysis -- And our code_language. Although self-explanatory, Understand supports [many languages](https://support.scitools.com/support/solutions/articles/70000582794-supported-languages)- but for today we will be analyzing a Java project. + +For this notebook, we will use Apache Helix as an example. Refer to Helix project configuration file under the conf/ folder, in particular the `understand` section. For supported languages, see [scitools documentation](https://support.scitools.com/support/solutions/articles/70000582794-supported-languages). ```{r} tool <- parse_config("../tools.yml") @@ -57,32 +49,26 @@ file_dependencies_path <- stringi::stri_c(understand_folder,"file_dependencies.x class_dependencies_path <- stringi::stri_c(understand_folder,"class_dependencies.xml") ``` -## Parse a sample project folder - -For sample purposes, we will use the project from [Houari Zegai's Calculator](https://github.com/HouariZegai/Calculator) saved into the folder ../../rawdata/kaiaulu/git_rep/understand/ This folder contains all the .java files that the project uses and the ones we will be analyzing. The directory where we will build the understand analysis base will be saved in a folder called understand -# Build the Understand project +# Build Understand DB -Before asking for the generation of data or parsing through it, we must first build the Understand project data. Firstly, provide the project_path and the primary project language, which in this case is Java. There is a third parameter named output_dir where the project will be built at, so we will give it understand_folder. After building once, you do not need to call this function again unless you have changed files within the project that would need to be re-analyzed. - -Please remember/save where the folder is generated as it is necessary for understand_parse_dependencies, but for the purpose of this notebook- we will save this output at ../../analysis/kaiaulu/understand +To export dependencies, scitools require a `und` database to be created: ```{r eval = FALSE} db_path <- build_understand_project(scitools_path = scitools_path, project_path = project_path, language = code_language, output_dir = understand_folder) ``` -# Generate Dependencies Network - -## Files +# Export Dependencies -To generate a list containing the node and edge data.tables containing the file dependencies of the project, we will provide it the file path to the built Understand project folder and the parse_type. In this case, we will tell Understand to build the "file" dependencies for us. Once again, providing understand_folder as the value for understand_dir. +Subsequently, we can make our dependencies export out of the database. We can either export XML files or class dependencies. -Note the format of the generated data.tables after running the below code. +## File Dependencies ```{r eval = FALSE} file_dependencies_path <- export_understand_dependencies(scitools_path = scitools_path, db_filepath = db_path, parse_type = "file", output_filepath = file_dependencies_path) ``` +The XML files can then be parsed into tables for manipulation in Kaiaulu: ```{r} file_dependencies <- parse_understand_dependencies(dependencies_path = file_dependencies_path) @@ -97,14 +83,12 @@ head(file_dependencies[["edge_list"]]) %>% ``` -## For Classes - -Near-identical to parsing for file dependencies, class dependencies only requires us change the parse_type to "class". In the output folder, this .xml will be separate from the one generated for file dependencies: fileDependencies.xml and classDependencies.xml respectively. +## Class Dependencies -The generated data is in the same format, however note the different types of dependency types in the edge_table. +The process to export and parse class dependencies is identical, save for specifying the `parse_type` to "class": ```{r eval = FALSE} -class_dependencies_path <- export_understand_dependencies(scitools_path = scitools_path, db_filepath = db_path, parse_type = "file", output_filepath = class_dependencies_path) +class_dependencies_path <- export_understand_dependencies(scitools_path = scitools_path, db_filepath = db_path, parse_type = "class", output_filepath = class_dependencies_path) ``` @@ -121,23 +105,11 @@ head(class_dependencies[["edge_list"]]) %>% ``` -# Transforming to Network -## For Files - -To filter out edges by their dependency_kind we can call transform_und_dependencies_to_network. Although this function performs as a simple sub-setter (filters for the provided dependency types), as the parsed data is a valid network, this function is kept for consistency across our parser functions. - -Back to the function, all we need to do is provide the generated data into parsed and the vector containing the filter weights into weight_types. +# Network Visualization -Some valid weight_types that are included are: -- Import (1) -- Call (2) -- Create (3) -- Use (4) -- Type GenericArgument (5) +We can display the parsed dependencies as a network. You can use the dependency_kind parameter to subset the dependencies used. Refer to [Kaiaulu wiki](https://github.com/sailuh/kaiaulu/wiki/Scitools) for the types of dependencies Scitools support. -NOTE: These weight_types are listed in the same order saved into our keep_dependencies_type variable pulled from kaiaulu.yml, and are a list (note the index numbers provided next to each itemized list). Here is a [good place to start](https://documentation.scitools.com/pdf/understand.pdf) for reading on dependencies generated from Understand. - -Apart from having a filtered edge_table, this data can also be used for inputting into graph_from_data_frame and visIgraph to visualize the table as a graph, as shown below where we filter by Call and Type GenericArgument. +## File Network ```{r} file_graph <- transform_understand_dependencies_to_network(parsed = file_dependencies, weight_types = c(keep_dependencies_type[2], keep_dependencies_type[5])) @@ -150,7 +122,7 @@ visIgraph(project_function_network,randomSeed = 1) ``` -## For Classes +## Class Network The same applies to outputting our class_dependencies, but we can visualize what our class dependency data is using the same parameters sans the parsed data table. From 5f677d88dbc4d68a74409e132344b52854ff254a Mon Sep 17 00:00:00 2001 From: Carlos Paradis Date: Sun, 8 Dec 2024 04:58:31 -0800 Subject: [PATCH 19/19] Edit news Signed-off-by: Carlos Paradis --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 0605ee0e..c9c66cfd 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,7 +3,7 @@ __kaiaulu 0.0.0.9700 (in development)__ ### NEW FEATURES - * `build_understand_project (project_path , language, output_dir)`, `parse_understand_dependencies(output_dir, parse_type)`, and `transform_understand_dependencies_to_network(parsed, weights)` have been added. These functions handle creating tables from xml data generated from Scitool's Understand. [#308](https://github.com/sailuh/kaiaulu/issues/308) + * `build`, `export` `parse` and `transform` functions for Scitools Understand have been added. [#308](https://github.com/sailuh/kaiaulu/issues/308) * The GitHUB API has been expanded to use refresh, along with other functions. `github_api_project_issue_search` has been added that makes the search/issues endpoint API calls. `github_api_project_issue_or_pr_comments_by_date` and `github_api_project_issue_by_date` have been added to download issue data and comments by date ranges. `github_parse_search_issues_refresh` has been added that parses the issue data downloaded from the search endpoint in the refresh_issues folder. `github_api_project_issue_refresh` and `github_api_project_issue_or_pr_comment_refresh` were added to download issue data or comments respectively that have not already been downloaded. `format_created_at_from_file` was added to retrieve the greatest date from a JSON file. See the Reference Docs on GitHub section for more details. [#282](https://github.com/sailuh/kaiaulu/issues/282) * `config.R` now contains a set of getter functions used to centralize the gathering of configuration data and these getter functions are used to refactor configuration file information gathering. For example, loading configuration file information with variable assignment is as follows `git_repo_path <- config_file[["version_control"]][["log"]]` but refactoring with a config.R getter function becomes `git_repo_path <- get_git_repo_path(config_file)`. [#230](https://github.com/sailuh/kaiaulu/issues/230) * `refresh_jira_issues()` had been added. It is a wrapper function for the previous downloader and downloads only issues greater than the greatest key already downloaded. [#275](https://github.com/sailuh/kaiaulu/issues/275)