Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Scitools Understand Parser #309

Merged
merged 21 commits into from
Dec 8, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .idea/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions .idea/kaiaulu.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/modules.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ Package: kaiaulu
Type: Package
Title: Kaiaulu
Version: 0.0.0.9700
Description: Kaiaulu is an R package and common interface that helps with understanding evolving software development communities, and the artifacts (gitlog, mailing list, files, etc.) which developers collaborate and communicate about. See Paradis et al., (2012) <doi:10.1007/978-3-031-15116-3_6>.
Description: Kaiaulu is an R package and common interface that helps with understanding evolving software development communities, and the artifacts (GitLog, mailing list, files, etc.) which developers collaborate and communicate about. See Paradis et al., (2012) <doi:10.1007/978-3-031-15116-3_6>.
Authors@R: c(
person('Carlos', 'Paradis', role = c('aut', 'cre'),
email = 'cvas@hawaii.edu',
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,8 @@ export(transform_gitlog_to_temporal_network)
export(transform_r_dependencies_to_network)
export(transform_reply_to_bipartite_network)
export(transform_temporal_gitlog_to_adsmj)
export(transform_und_dependencies_to_network)
export(understand_parse_dependencies)
export(weight_scheme_count_deleted_nodes)
export(weight_scheme_cum_temporal)
export(weight_scheme_pairwise_cum_temporal)
Expand Down
129 changes: 129 additions & 0 deletions R/src.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,98 @@

############## Parsers ##############

#' Parse file dependencies from Scitool's Understand
#'
#' @param project_path path to the project folder to analyze
#' @param language the language of the project (language must be supported by Understand)
#' @param output_dir path to output directory (formatted output_path/)
#' @param parse_type Type of dependencies to generate into xml (either "file" or "class")
#' @export
#' @family parsers
understand_parse_dependencies <- function(project_path,language,output_dir="../tmp/", parse_type){
# Before running, check if parse_type is correct
validInput <- function(input) {
is.character(input) && length(input) == 1 && input %in% c("file", "class")
}
if (!validInput(parse_type)) {
stop("Error: Invalid parse_type provided. Please input either \"file\" or \"class\"")
}

# Use Understand to parse the code folder.
# Create the variables used in command lines
project_path <- paste0("\"", project_path, "\"")
db_dir <- paste0(output_dir, "/Understand.und")
xml_dir <- paste0(db_dir, "/", parse_type, "Dependencies.xml")
command <- "und"
args <- c("create", "-db", db_dir, "-languages", language)

# Generate the XML file
system2(command, args)
args <- c("-db", db_dir, "add", project_path)
system2(command, args)
args <- c("analyze", db_dir)
system2(command, args)
args <- c("export", "-dependencies", parse_type, "cytoscape", xml_dir, db_dir)
system2(command, args)

# Parse the XML file
xml_data <- xmlParse(xml_dir)
xml_nodes <- xmlRoot(xml_data)
xml_nodes <- xmlChildren(xml_nodes)

# Helper function to search for an attribute
findAtt <- function(search_nodes, att_name) {
found_att <- NA
for (att in search_nodes) {
if (xmlGetAttr(att, "name") == att_name) {
found_att <- xmlGetAttr(att, "value")
break
}
}
return(found_att)
}

# From child nodes- filter for those with name "node"
node_elements <- lapply(xml_nodes, function(child) {
if (xmlName(child) == "node") {
# Extract the id
id <- xmlGetAttr(child, "id")
# Find the node.label attribute
att_nodes <- xmlChildren(child)
node_label <- findAtt(att_nodes, "node.label")
long_name <- findAtt(att_nodes, "longName")
return(data.table(node_label = node_label, id = id, long_name = long_name))
} else {
return(NULL)
}
})

# Remove NULLs and combine the results into a data frame
node_list <- do.call(rbind, node_elements[!sapply(node_elements, is.null)])

# From child nodes- filter for those with name "edge"
edge_elements <- lapply(xml_nodes, function(child) {
if (xmlName(child) == "edge") {
# Extract the id_from and id_to
id_from <- xmlGetAttr(child, "source")
id_to <- xmlGetAttr(child, "target")
# Find the dependency kind attribute
att_nodes <- xmlChildren(child)
dependency_kind <- findAtt(att_nodes, "dependency kind")
dependency_kind <- unlist(strsplit(dependency_kind, ",\\s*"))
return(data.table(id_from = id_from, id_to = id_to, dependency_kind = dependency_kind))
} else {
return(NULL)
}
})

# Remove NULLs and combine the results into a data frame
edge_list <- do.call(rbind, edge_elements[!sapply(edge_elements, is.null)])

# Create a list to return
graph <- list(node_list = node_list, edge_list = edge_list)
return(graph)
}

#' Parse dependencies from Depends
#'
Expand Down Expand Up @@ -215,6 +307,43 @@ parse_r_dependencies <- function(folder_path){

############## Network Transform ##############

#' Transform parsed dependencies into a network
#'
#' @param depends_parsed Parsed data from understand_parse_class_dependencies.
#' @param weight_types The weight types as defined in Depends.
#' Accepts single string and vector input
#'
#' @export
#' @family edgelists
transform_und_dependencies_to_network <- function(parsed, weight_types) {

nodes <- parsed[["node_list"]]
edges <- parsed[["edge_list"]]

# Merge edges with nodes to get label_from
edges <- merge(edges, nodes[, .(id, node_label)], by.x = "id_from", by.y = "id", all.x = TRUE)
setnames(edges, "node_label", "label_from")

# Merge again to get label_to
edges <- merge(edges, nodes[, .(id, node_label)], by.x = "id_to", by.y = "id", all.x = TRUE)
setnames(edges, "node_label", "label_to")

# Reorder columns to have label_from and label_to on the left
edges <- edges[, .(label_from, label_to, id_from, id_to, dependency_kind)]

# Filter out by weights
edges <- edges[dependency_kind %in% weight_types]

# If filter removed all edges:
if (nrow(edges) == 0) {
stop("Error: No edges found under weight_types.")
}

# Create a list to return
graph <- list(node_list = nodes, edge_list = edges)
return(graph)
}

#' Transform parsed dependencies into a network
#'
#' @param depends_parsed A parsed mbox by \code{\link{parse_dependencies}}.
Expand Down
65 changes: 65 additions & 0 deletions vignettes/understand_showcase.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
---
title: "Understand Showcase"
output:
html_document:
toc: true
number_sections: true
vignette: >
%\VignetteEngine{knitr::rmarkdown}
%\VignetteIndexEntry{Understand Showcase}
%\VignetteEncoding{UTF-8}
---


```{r warning = FALSE, message = FALSE}
rm(list = ls())
library(kaiaulu)
library(visNetwork)
library(igraph)
library(data.table)
```


# Parse a sample project folder

```{r}
folder_path <- "../tests/sample_project"
```


# File Dependencies

```{r}
file_dependencies <- understand_parse_dependencies(project_path = folder_path, language = "java", parse_type = "file")
head(file_dependencies)
```


# Class Dependencies

```{r}
class_dependencies <- understand_parse_dependencies(project_path = folder_path, language = "java", parse_type = "class")
head(class_dependencies)
```

## File

```{r}
file_graph <- transform_und_dependencies_to_network(parsed = file_dependencies, weight_types = c("", ""))
project_function_network <- igraph::graph_from_data_frame(d=function_graph[["edgelist"]],
directed = TRUE,
vertices = function_graph[["nodes"]])
visIgraph(project_function_network,randomSeed = 1)
```


## Class

```{r}
class_graph <- transform_und_dependencies_to_network(parsed = class_dependencies, weight_types = c("", ""))
project_function_network <- igraph::graph_from_data_frame(d=file_graph[["edgelist"]],
directed = TRUE,
vertices = file_graph[["nodes"]])
visIgraph(project_function_network,randomSeed = 1)
```

Loading