Skip to content

Commit

Permalink
i #317 Added More Extractable Information
Browse files Browse the repository at this point in the history
- config.R: added primary_language to openhub_parse_portfolio_projects, added html_url and mailing_list to openhub_parse_projects, added min_month and code_languages to openhub_parse_analyses.
- openhub_project_search.Rmd: incorporated the config.R changes (primary_language, html_url, mailing_list, min_month, and code_languages) under each respective section.
  • Loading branch information
beydlern committed Nov 6, 2024
1 parent 5fbd82d commit 314d195
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 4 deletions.
31 changes: 29 additions & 2 deletions R/config.R
Original file line number Diff line number Diff line change
Expand Up @@ -182,9 +182,9 @@ openhub_parse_portfolio_projects <- function(api_responses, openhub_api_paramete
parsed_response <- list()
if (status == "success") {
for (i in 1:itemsReturned) {
if (XML::xmlValue(returnItems[[1]][[i]][[3]]) == language) {
if (stringi::stri_detect_regex(XML::xmlValue(returnItems[[1]][[i]][[3]]), language, case_insensitive = TRUE)) {
parsed_response[["name"]] <- append(parsed_response[["name"]], XML::xmlValue(returnItems[[1]][[i]][[1]])) # means <result><portfolio_projects><project><name>
parsed_response[["language"]] <- append(parsed_response[["language"]], XML::xmlValue(returnItems[[1]][[i]][[3]])) # means <result><portfolio_projects><project><primary_language>
parsed_response[["primary_language"]] <- append(parsed_response[["primary_language"]], XML::xmlValue(returnItems[[1]][[i]][[3]])) # means <result><portfolio_projects><project><primary_language>
parsed_response[["activity"]] <- append(parsed_response[["activity"]], XML::xmlValue(returnItems[[1]][[i]][[2]])) # means <result><portfolio_projects><project><activity>
}
}
Expand Down Expand Up @@ -228,6 +228,22 @@ openhub_parse_projects <- function(api_responses, openhub_api_parameters) {
if (XML::xmlValue(returnItems[[i]][[2]]) == project_name) {
parsed_response[["name"]] <- append(parsed_response[["name"]], XML::xmlValue(returnItems[[i]][[2]])) # means <result><project><name>
parsed_response[["id"]] <- append(parsed_response[["id"]], XML::xmlValue(returnItems[[i]][[1]])) # means <result><project><id>
parsed_response[["html_url"]] <- append(parsed_response[["html_url"]], XML::xmlValue(returnItems[[i]][[4]])) # means <result><project><html_url>
links_tag <- returnItems[[i]][[23]] # <links> tag (sometimes present in a project's api response)
mailing_list <- "N/A"
if (!is.null(links_tag)) {
links <- XML::xmlChildren(links_tag)
for (i in seq_along(links)) {
link <- links[[i]] # i-th <link> tag in <links>
link_title <- stringi::stri_detect_regex(XML::xmlValue(link[[1]]), "Mailing List", case_insensitive = TRUE) # checks <title> in specific the <link> to see if "Mailing List" is contained, case insensitive
link_category <- stringi::stri_detect_regex(XML::xmlValue(link[[3]]), "Mailing List", case_insensitive = TRUE) # checks <category> in specific the <link> to see if "Mailing List" is contained, case insensitive
if (link_title || link_category) {
mailing_list <- XML::xmlValue(link[[2]]) # <url> in the specific <link> tag
break
}
}
}
parsed_response[["mailing_list"]] <- append(parsed_response[["mailing_list"]], mailing_list) # means <result><project><links><link><url> specific link that has a mailing list or not found (N/A)
break
}
}
Expand Down Expand Up @@ -262,11 +278,22 @@ openhub_parse_analyses <- function(api_responses) {
parsed_response <- list()
if (status == "success") {
parsed_response[["id"]] <- append(parsed_response[["id"]], XML::xmlValue(returnItems[[1]][[3]])) # primary key link to other data tables that possess the "id" key
parsed_response[["min_month"]] <- append(parsed_response[["min_month"]], stri_replace_all_regex(XML::xmlValue(returnItems[[1]][[6]]), "-\\d{2}$", "")) # means <result><analysis><min_month> truncated day because it is meaningless (always 01), only YYYY-MM is relevant
parsed_response[["twelve_month_contributor_count"]] <- append(parsed_response[["twelve_month_contributor_count"]], XML::xmlValue(returnItems[[1]][[8]])) # means <result><analysis><twelve_month_contributor_count>
parsed_response[["total_contributor_count"]] <- append(parsed_response[["total_contributor_count"]], XML::xmlValue(returnItems[[1]][[9]])) # means <result><analysis><total_contributor_count>
parsed_response[["twelve_month_commit_count"]] <- append(parsed_response[["twelve_month_commit_count"]], XML::xmlValue(returnItems[[1]][[10]])) # means <result><analysis><twelve_month_commit_count>
parsed_response[["total_commit_count"]] <- append(parsed_response[["total_commit_count"]], XML::xmlValue(returnItems[[1]][[11]])) # means <result><analysis><total_commit_count>
parsed_response[["total_code_lines"]] <- append(parsed_response[["total_code_lines"]], XML::xmlValue(returnItems[[1]][[12]])) # means <result><analysis><total_code_lines>
languages <- XML::xmlChildren(returnItems[[1]][[14]]) # <languages> children tags
code_languages_data_text <- list()
for (i in seq_along(languages)) {
language <- languages[[i]]
code_language_percentage <- paste0(XML::xmlGetAttr(language, "percentage"), "%") # adds a percentage symbol to the end of the percentage value for the code language
code_language <- stringi::stri_trim_both(stringi::stri_replace_all_fixed(XML::xmlValue(language), "\n", "")) # grabs code language text, then removes spaces and new line characters
code_languages_data_text[[i]] <- paste(code_language_percentage, code_language)
}
code_languages_data_text <- paste(code_languages_data_text, collapse = ", ")
parsed_response[["code_languages"]] <- append(parsed_response[["code_languages"]], code_languages_data_text)
} else {
warning(status) # prints the status warning message
}
Expand Down
10 changes: 8 additions & 2 deletions vignettes/openhub_project_search.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ Please ensure the following R packages are installed on your computer.
```{r warning = FALSE, message = FALSE}
rm(list = ls())
require(kaiaulu)
require(stringi)
require(data.table)
require(knitr)
require(httr)
Expand All @@ -53,7 +54,7 @@ Explanation:

* openhub_api_parameters: A list containing key-value pairs of parameters to pass into the `openhub_*` functions. The following key names are valid and are required for proper execution (`openhub_*` functions possess documentation that detail the key-value pairs required for proper execution): organization_name, language, portfolio_project_site, project_name, project_id
* organization_name: The name of the organization (case sensitive).
* language: The code language to filter for projects only containing the specified code language (case sensitive).
* language: The code language to filter for projects only containing the specified code language (case insensitive).
* token: The file named "openhub_token" containing the OpenHub API Token.

# Collecting and Parsing Data via Ohloh API
Expand Down Expand Up @@ -98,7 +99,7 @@ portfolio_projects_api_requests <- openhub_api_iterate_pages(token, openhub_api_
We ensure that `openhub_api_parameters` possesses the "language" key-value pair and pass the portfolio_projects API requests into its corresponding parser function to acquire a data table with columns representing the tags for each portfolio project listed:

* name: The name of the portfolio project.
* language: The primary code language used by the portfolio project.
* primary_language: The primary code language used by the portfolio project.
* activity: The portfolio project's activity level (Very Low, Low, Moderate, High, and Very High).

```{r, eval = FALSE}
Expand All @@ -123,6 +124,9 @@ With the list of project API requests, we perform another for loop to parse thes

* name: The name of the project.
* id: The project's unique ID.
* html_url: The project's url to the current Project's details page on OpenHub.
* mailing_list: The project's mailing list url link (if "N/A", please check the project's url (html_url) and verify under the links section to verify that the project doesn't have a mailing list to be certain).


```{r, eval = FALSE}
openhub_projects <- list()
Expand Down Expand Up @@ -158,11 +162,13 @@ for (i in 1:length(openhub_combined_projects[["name"]])) {
With the list of analysis API requests, we perform another for loop to parse these responses to acquire a data table with columns representing the tags for each analysis listed:

* id: The project's unique ID.
* min_month: OpenHub's first recorded year and month of the project's data (typically the date of the project's first commit, YYYY-MM format).
* twelve_month_contributor_count: The number of contributors who made at least one commit to the project source code in the past twelve months.
* total_contributor_count: The total number of contributors who made at least one commit to the project source code since the project's inception.
* twelve_month_commit_count: The total number of commits to the project source code in the past twelve months.
* total_commit_count: The total number of commits to the project source code since the project's inception.
* total_code_lines: The most recent total count of all source code lines.
* code_languages: A language breakdown with percentages for each substantial (as determined by OpenHub, less contributing languages are grouped and renamed as "Other") contributing language in the project's source code.

```{r, eval = FALSE}
openhub_analyses <- list()
Expand Down

0 comments on commit 314d195

Please sign in to comment.