Skip to content

Commit

Permalink
i #282 comment downloader works and commit naming convention works
Browse files Browse the repository at this point in the history
commit refresher not yet working
  • Loading branch information
Ssunoo2 committed Mar 22, 2024
1 parent c36f970 commit c0377c3
Show file tree
Hide file tree
Showing 10 changed files with 221 additions and 20 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,4 @@ Imports:
VignetteBuilder: knitr
URL: https://github.com/sailuh/kaiaulu
BugReports: https://github.com/sailuh/kaiaulu/issues
RoxygenNote: 7.3.0
RoxygenNote: 7.2.3
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,12 @@ export(github_api_page_first)
export(github_api_page_last)
export(github_api_page_next)
export(github_api_page_prev)
export(github_api_project_commit_refresh)
export(github_api_project_commits)
export(github_api_project_contributors)
export(github_api_project_issue)
export(github_api_project_issue_events)
export(github_api_project_issue_or_pr_comment_refresh)
export(github_api_project_issue_or_pr_comments)
export(github_api_project_issue_refresh)
export(github_api_project_pull_request)
Expand Down
129 changes: 118 additions & 11 deletions R/github.R
Original file line number Diff line number Diff line change
Expand Up @@ -464,6 +464,7 @@ github_api_page_last <- function(gh_response){
github_api_iterate_pages <- function(token,gh_response,save_folder_path,prefix=NA,max_pages=NA){
page_number <- 1

data_exists = TRUE
# Set the max_pages to your api limit unless specified
if(is.na(max_pages)){
max_pages <- github_api_rate_limit(token)$remaining
Expand All @@ -473,7 +474,7 @@ github_api_iterate_pages <- function(token,gh_response,save_folder_path,prefix=N
while(!is.null(gh_response) & page_number < max_pages){
# Find the unixtime of created at at the first index.
# This will be the most recent by default in the file
if(length(gh_response) > 0 && !is.null(gh_response[[1]]$created_at)) {
if(length(gh_response) > 0 && !is.null(gh_response[[1]]$created_at) && (prefix != "commit")) {
# Get 'created_at' for the first issue in the response
most_recent_created_at <- as.POSIXct(gh_response[[1]]$created_at, format = "%Y-%m-%dT%H:%M:%OS", tz = "UTC")
# Convert to Unix timestamp
Expand All @@ -483,22 +484,49 @@ github_api_iterate_pages <- function(token,gh_response,save_folder_path,prefix=N
least_recently_created <- as.POSIXct(gh_response[[1]]$created_at, tz = "UTC")
# Convert to Unix timestamp
least_recently_created_at_unix <- as.numeric(least_recently_created)
message("extracted dates")
} else {
most_recent_created_at_unix <- "no_data"
least_recently_created_at_unix <- "no_data"
if (prefix != "commit"){
most_recent_created_at_unix <- paste0("_", page_number)
least_recently_created_at_unix <- "no_data"
data_exists = FALSE
message("Nothing to download")
}
}
message("extracted dates")

# Extract dates for commit
if (prefix == "commit"){
if(length(gh_response) > 0 && !is.null(gh_response[[1]]$commit$author$date)){
commit_date = gh_response[[1]]$commit$author$date
most_recent_created_at <- as.POSIXct(commit_date, format = "%Y-%m-%dT%H:%M:%OS", tz = "UTC")
# Convert to Unix timestamp
most_recent_created_at_unix <- as.numeric(most_recent_created_at)

least_recently_created <- as.POSIXct(commit_date, tz = "UTC")
# Convert to Unix timestamp
least_recently_created_at_unix <- as.numeric(least_recently_created)
message("extracted dates")
} else {
most_recent_created_at_unix <- paste0("_", page_number)
least_recently_created_at_unix <- "no_data"
data_exists = FALSE
message("Nothing to download")
} }




# Save the pages to file
write_json(gh_response,paste0(save_folder_path,
if (data_exists == TRUE){write_json(gh_response,paste0(save_folder_path,
#put startunixtime_endunixtime
owner,"_",repo,"_",prefix,"_",
least_recently_created_at_unix, "_",
most_recent_created_at_unix,
".json"),
pretty=TRUE,auto_unbox=TRUE)
message("written to file")
}

page_number <- page_number + 1
res <- try(
{
Expand All @@ -519,6 +547,7 @@ github_api_iterate_pages <- function(token,gh_response,save_folder_path,prefix=N
#' @param repo GitHub's repository name (e.g. kaiaulu)
#' @param created Github's created at date
#' @param token Your GitHub API token
#' @param created Only issues created after this date are fetched
#' @export
#' @references For details, see \url{https://docs.github.com/en/rest/reference/issues#list-repository-issues}.
github_api_project_issue_refresh <- function(owner,repo,token,created){
Expand All @@ -538,17 +567,82 @@ github_api_project_issue_refresh <- function(owner,repo,token,created){
return(items_only)
}

#' Download Project Pull Requests after a date
#' Download Project issues or pr comments after certain date
#'
#' Download Issues from "GET /repos/{owner}/{repo}/issues" endpoint.
#'
#' @param owner GitHub's repository owner (e.g. sailuh)
#' @param repo GitHub's repository name (e.g. kaiaulu)
#' @param created Github's created at date
#' @param token Your GitHub API token
#' @param created Only issues created after this date are fetched
#' @export
#' @references For details, see \url{https://docs.github.com/en/rest/reference/issues#list-repository-issues}.
github_api_project_issue_or_pr_comment_refresh <- function(owner,repo,token,created){
# Construct the search query
# Ensure 'created' is in the format "YYYY-MM-DD"
# For more precise filtering, including time, use "YYYY-MM-DDTHH:MM:SSZ"
# query <- sprintf("repo:%s/%s is:issue created:>%s", owner, repo, created)
#
# # Use the Search API endpoint to search for issues
# issues <- gh::gh("/search/issues",
# q = query,
# .token = token,
# .limit = 100) # Adjust .limit as needed, though GitHub API has its own paging mechanisms
#
# items_only <- issues$items
# #issues_json <- jsonlite::toJSON(items_only, auto_unbox = TRUE, pretty = TRUE)
# return(items_only)


gh::gh("GET /repos/{owner}/{repo}/issues/comments",
owner=owner,
repo=repo,
since=created, # Pass the `since` parameter in the API request
page=1,
per_page=100,
.token=token)
}

#' Download Project Pull Requests after a date
#'
#' Download Issues from "GET /repos/{owner}/{repo}/search/issues" endpoint.
#'
#' @param owner GitHub's repository owner (e.g. sailuh)
#' @param repo GitHub's repository name (e.g. kaiaulu)
#' @param created Github's created at date
#' @param token Your GitHub API token
#' @param created Only issues created after this date are fetched
#' @export
#' @references For details, see \url{https://docs.github.com/en/rest/reference/issues#list-repository-issues}.
github_api_project_pull_request_refresh <- function(owner,repo,token,created){

query <- sprintf('repo:%s/%s+type:issue+in:comments+created:>%s', owner, repo, since)

# Perform the API call to search issues/comments
comments <- gh::gh("/search/issues",
q=query,
page=1,
per_page=100,
.token=token)


#items_only <- pull_requests$items
return(items_only)
}

#' Download Project Pull Requests after a date
#'
#' Download Issues from "GET /repos/{owner}/{repo}/search/issues" endpoint.
#'
#' @param owner GitHub's repository owner (e.g. sailuh)
#' @param repo GitHub's repository name (e.g. kaiaulu)
#' @param created Github's created at date
#' @param token Your GitHub API token
#' @param created Only issues created after this date are fetched
#' @export
#' @references For details, see \url{https://docs.github.com/en/rest/reference/issues#list-repository-issues}.
github_api_project_commit_refresh <- function(owner,repo,token,created){
# Construct the search query for pull requests
# Ensure 'created' is in the format "YYYY-MM-DD"
# For more precise filtering, including time, use "YYYY-MM-DDTHH:MM:SSZ"
Expand All @@ -559,14 +653,27 @@ github_api_project_pull_request_refresh <- function(owner,repo,token,created){
q = query,
.token = token,
.limit = 100) # Adjust .limit as needed, though GitHub API has its own paging mechanisms
items_only <- issues$items
items_only <- pull_requests$items
return(items_only)
# Call the GitHub API to get issue comments created after a certain date
#comments <-
gh::gh("GET /repos/{owner}/{repo}/issues/comments",
owner=owner,
repo=repo,
since=created, # Pass the `since` parameter in the API request
page=1,
per_page=100,
.token=token)

#return(comments)
}

#' parse latest date
#'
#' Download Issues from "GET /repos/{owner}/{repo}/issues" endpoint.
#' Takes a filepath and returns a filename of the .json file that contains the
#' most recent 'created_at' value
#'
#' @param json_path the path
#' @param json_path the path with folders to read
#' @export
parse_jira_latest_date <- function(json_path){
file_list <- list.files(json_path)
Expand All @@ -592,11 +699,11 @@ parse_jira_latest_date <- function(json_path){

#' get the created_at field from a filename
#'
#' Download Issues from "GET /repos/{owner}/{repo}/issues" endpoint.
#' Function to read a JSON file along a path and return the 'created_at'
#' date of the first item
#'
#' @param filename the path and the filename
#' @export
# Function to read a JSON file and format the 'created_at' date of the first item
format_created_at_from_file <- function(filename) {
# Read the JSON file
data <- jsonlite::fromJSON(txt= filename, simplifyVector = FALSE)
Expand Down
3 changes: 2 additions & 1 deletion man/format_created_at_from_file.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 23 additions & 0 deletions man/github_api_project_commit_refresh.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 23 additions & 0 deletions man/github_api_project_issue_or_pr_comment_refresh.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/github_api_project_issue_refresh.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/github_api_project_pull_request_refresh.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions man/parse_jira_latest_date.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

48 changes: 46 additions & 2 deletions vignettes/download_github_comments.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -136,11 +136,11 @@ print(pr_most_recent_created_date)
```{r Collect all issues, eval = FALSE}
#gh call but with date
#created <- paste0('>',most_recent_created_date)
gh_response_pr <- github_api_project_issue_refresh(owner,repo,token,pr_most_recent_created_date)
gh_response_pr <- github_api_project_pull_request_refresh(owner,repo,token,pr_most_recent_created_date)
dir.create(save_path_pull_request)
github_api_iterate_pages(token,gh_response_pr,
save_path_pull_request,
prefix="pull_request")
prefix="pr")
```
## Issues and Pull Requests Comments

Expand All @@ -158,6 +158,28 @@ github_api_iterate_pages(token,gh_response,
save_path_issue_or_pr_comments,
prefix="issue_or_pr_comment")
```
#REFRESH ISSUE OR PR COMMENT
```{r}
# get the filename
latest_date_issue_or_pr_comment <- paste0(save_path_issue_or_pr_comments, parse_jira_latest_date(save_path_issue_or_pr_comments))
message(latest_date_issue_or_pr_comment)
# get the created_at
issue_or_pr_comment_most_recent_created_date <- format_created_at_from_file(latest_date_issue_or_pr_comment)
print(issue_or_pr_comment_most_recent_created_date)
```
```{r Collect all issues, eval = FALSE}
#gh call but with date
# get the data
gh_response_issue_or_pr_comment <- github_api_project_issue_or_pr_comment_refresh(owner,repo,token,issue_or_pr_comment_most_recent_created_date)
# create direcetory and iterate over data
dir.create(save_path_issue_or_pr_comments)
github_api_iterate_pages(token,gh_response_issue_or_pr_comment,
save_path_issue_or_pr_comments,
prefix="issue_or_pr_comment")
```

## Obtaining author's name and e-mail

Expand All @@ -175,6 +197,28 @@ dir.create(save_path_commit)
github_api_iterate_pages(token,gh_response,
save_path_commit,
prefix="commit")
```
#REFRESH Commit data
```{r}
# get the filename
latest_date_commit <- paste0(save_path_commit, parse_jira_latest_date(save_path_commit))
message(latest_date_commit)
# get the created_at
commit_most_recent_created_date <- format_created_at_from_file(latest_date_commit)
print(commit_most_recent_created_date)
```

```{r Collect all issues, eval = FALSE}
#gh call but with date
#created <- paste0('>',most_recent_created_date)
gh_response_issue_or_pr_comment <- github_api_project_commit_refresh(owner,repo,token,issue_or_pr_comment_most_recent_created_date)
dir.create(save_path_commit)
github_api_iterate_pages(token,gh_response_issue_or_pr_comment,
save_path_commit,
prefix="commit")
```

# Parsing Raw Data to Csv
Expand Down

0 comments on commit c0377c3

Please sign in to comment.