-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
6d2036c
commit ee85fd1
Showing
6 changed files
with
104 additions
and
18 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
#' Translate long text using Google Translate | ||
#' | ||
#' This function translates long text from one language to another using Google Translate. | ||
#' It splits the text into smaller chunks if necessary to handle large inputs. | ||
#' | ||
#' @param text The long text to translate. Should be a single string. | ||
#' @param target_language The language to translate the text into. Default is "en" for English. | ||
#' @param source_language The language of the input text. Default is "auto" for automatic detection. | ||
#' @param chunk_size The maximum number of characters to send in a single translation request. Default is 1000. | ||
#' | ||
#' @return A single string containing the translated text. | ||
#' @export | ||
#' | ||
#' @examples | ||
#' \donttest{ | ||
#' long_text <- paste(rep("This is a long text to translate.", 100), collapse = " ") | ||
#' google_translate_long_text( | ||
#' long_text, target_language = "de", | ||
#' source_language = "en", | ||
#' chunk_size = 500) | ||
#' } | ||
google_translate_long_text <- function(text, target_language = "en", source_language = "auto", chunk_size = 1000) { | ||
if (!google_is_valid_language_code(target_language)) { | ||
stop("Invalid target language code.") | ||
} | ||
if (!google_is_valid_language_code(source_language)) { | ||
stop("Invalid source language code.") | ||
} | ||
|
||
# Function to split text into chunks | ||
split_text <- function(text, chunk_size) { | ||
split_indices <- seq(1, nchar(text), by = chunk_size) | ||
sapply(split_indices, function(i) substr(text, i, i + chunk_size - 1)) | ||
} | ||
|
||
# Split text into chunks if it's too long | ||
if (nchar(text) > chunk_size) { | ||
text_chunks <- split_text(text, chunk_size) | ||
} else { | ||
text_chunks <- list(text) | ||
} | ||
|
||
# Translate each chunk | ||
translations <- sapply(text_chunks, function(chunk) { | ||
formatted_text <- urltools::url_encode(chunk) | ||
formatted_link <- paste0( | ||
"https://translate.google.com/m?tl=", | ||
target_language, "&sl=", source_language, | ||
"&q=", formatted_text | ||
) | ||
|
||
response <- httr::GET(formatted_link) | ||
translation <- httr::content(response) %>% | ||
rvest::html_nodes("div.result-container") %>% | ||
rvest::html_text() | ||
|
||
translation <- urltools::url_decode(translation) | ||
gsub("\n", "", translation) | ||
}) | ||
|
||
# Combine translated chunks | ||
paste(translations, collapse = " ") | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters